allow non-alpha passthrough

This commit is contained in:
Hamcha 2024-10-25 20:52:35 +02:00
parent fa50107a04
commit ba38b98624
Signed by: hamcha
GPG key ID: 1669C533B8CF6D89
3 changed files with 145 additions and 111 deletions

View file

@ -18,7 +18,12 @@ mod tests {
use super::*; use super::*;
#[test] #[test]
fn test_convert_to_hiragana() { fn test_convert_to_hiragana_words() {
assert_eq!(convert_to_hiragana("assolutamente"), "あっそるためんて"); assert_eq!(convert_to_hiragana("assolutamente"), "あっそるためんて");
} }
#[test]
fn test_convert_to_hiragana_symbols() {
assert_eq!(convert_to_hiragana("asso-lutamente no!1"), "あっそ-るためんて の!1");
}
} }

View file

@ -42,6 +42,8 @@ fn match3(s: &str) -> Option<Syllable> {
"zie" => Some(Zye), "zie" => Some(Zye),
"zio" => Some(Zyo), "zio" => Some(Zyo),
"ziu" => Some(Zyu), "ziu" => Some(Zyu),
"chu" => Some(Chu),
"cho" => Some(Cho),
_ => None, _ => None,
} }
} }
@ -183,12 +185,16 @@ pub(crate) fn romanize(word: &str) -> Vec<Syllable> {
.to_lowercase() .to_lowercase()
.chars() .chars()
.map(remove_accents) .map(remove_accents)
.filter(|c| c.is_alphabetic())
.collect(); .collect();
let mut remaining = lowercase.clone(); let mut remaining = lowercase.clone();
let mut syllables = Vec::new(); let mut syllables = Vec::new();
while remaining.len() > 0 { while remaining.len() > 0 {
if !remaining.as_bytes()[0].is_ascii_alphabetic() {
syllables.push(NonAlpha(remaining.as_bytes()[0] as char));
remaining = remaining[1..].to_string();
continue;
}
// Check for double consonants // Check for double consonants
if remaining.len() >= 3 && remaining.as_bytes()[0] == remaining.as_bytes()[1] { if remaining.len() >= 3 && remaining.as_bytes()[0] == remaining.as_bytes()[1] {
syllables.push(LittleTsu); syllables.push(LittleTsu);
@ -319,6 +325,26 @@ mod tests {
} }
} }
#[test]
fn symbols() {
let symbols = [
("x", vec![Ku, Su]),
("CIAO!", vec![Cha, O, NonAlpha('!')]),
("?", vec![NonAlpha('?')]),
("macciu-picchu", vec![Ma, LittleTsu, Chu, NonAlpha('-'), Pi, LittleTsu, Chu]),
("'", vec![NonAlpha('\'')]),
("adesso té la vedi con lui!!", vec![A, De, LittleTsu, So, NonAlpha(' '), Te, NonAlpha(' '), Ra, NonAlpha(' '), VuE, De, I, NonAlpha(' '), Ko, N, NonAlpha(' '), Ru, I, NonAlpha('!'), NonAlpha('!')]),
(".", vec![NonAlpha('.')]),
(",", vec![NonAlpha(',')]),
("(", vec![NonAlpha('(')]),
(")", vec![NonAlpha(')')]),
];
for (word, expected) in symbols {
assert_eq!(romanize(word)[..], expected);
}
}
#[test] #[test]
fn test_dictionary() { fn test_dictionary() {
// Read dictionary file // Read dictionary file

View file

@ -118,119 +118,122 @@ pub(crate) enum Syllable {
Zya, Zya,
Zyu, Zyu,
Zye, Zye,
// Extras for sentence making
NonAlpha(char),
} }
pub(crate) fn to_hiragana(syllable: &Syllable) -> String { pub(crate) fn to_hiragana(syllable: &Syllable) -> String {
match syllable { match syllable {
A => "", A => "".to_string(),
I => "", I => "".to_string(),
U => "", U => "".to_string(),
E => "", E => "".to_string(),
O => "", O => "".to_string(),
Ka => "", Ka => "".to_string(),
Ki => "", Ki => "".to_string(),
Ku => "", Ku => "".to_string(),
Ke => "", Ke => "".to_string(),
Ko => "", Ko => "".to_string(),
Sa => "", Sa => "".to_string(),
Shi => "", Shi => "".to_string(),
Su => "", Su => "".to_string(),
Se => "", Se => "".to_string(),
So => "", So => "".to_string(),
Ta => "", Ta => "".to_string(),
Chi => "", Chi => "".to_string(),
Tsu => "", Tsu => "".to_string(),
Te => "", Te => "".to_string(),
To => "", To => "".to_string(),
Na => "", Na => "".to_string(),
Ni => "", Ni => "".to_string(),
Nu => "", Nu => "".to_string(),
Ne => "", Ne => "".to_string(),
No => "", No => "".to_string(),
Ha => "", Ha => "".to_string(),
Hi => "", Hi => "".to_string(),
Fu => "", Fu => "".to_string(),
He => "", He => "".to_string(),
Ho => "", Ho => "".to_string(),
Ma => "", Ma => "".to_string(),
Mi => "", Mi => "".to_string(),
Mu => "", Mu => "".to_string(),
Me => "", Me => "".to_string(),
Mo => "", Mo => "".to_string(),
Ya => "", Ya => "".to_string(),
Yu => "", Yu => "".to_string(),
Yo => "", Yo => "".to_string(),
Ra => "", Ra => "".to_string(),
Ri => "", Ri => "".to_string(),
Ru => "", Ru => "".to_string(),
Re => "", Re => "".to_string(),
Ro => "", Ro => "".to_string(),
Wa => "", Wa => "".to_string(),
N => "", N => "".to_string(),
Ga => "", Ga => "".to_string(),
Gi => "", Gi => "".to_string(),
Gu => "", Gu => "".to_string(),
Ge => "", Ge => "".to_string(),
Go => "", Go => "".to_string(),
Za => "", Za => "".to_string(),
Ji => "", Ji => "".to_string(),
Zu => "", Zu => "".to_string(),
Ze => "", Ze => "".to_string(),
Zo => "", Zo => "".to_string(),
Da => "", Da => "".to_string(),
De => "", De => "".to_string(),
Do => "", Do => "".to_string(),
Ba => "", Ba => "".to_string(),
Bi => "", Bi => "".to_string(),
Bu => "", Bu => "".to_string(),
Be => "", Be => "".to_string(),
Bo => "", Bo => "".to_string(),
Pa => "", Pa => "".to_string(),
Pi => "", Pi => "".to_string(),
Pu => "", Pu => "".to_string(),
Pe => "", Pe => "".to_string(),
Po => "", Po => "".to_string(),
Kya => "きゃ", Kya => "きゃ".to_string(),
Kyu => "きゅ", Kyu => "きゅ".to_string(),
Kyo => "きょ", Kyo => "きょ".to_string(),
Sha => "しゃ", Sha => "しゃ".to_string(),
Shu => "しゅ", Shu => "しゅ".to_string(),
Sho => "しょ", Sho => "しょ".to_string(),
Cha => "ちゃ", Cha => "ちゃ".to_string(),
Chu => "ちゅ", Chu => "ちゅ".to_string(),
Cho => "ちょ", Cho => "ちょ".to_string(),
Rya => "りゃ", Rya => "りゃ".to_string(),
Ryu => "りゅ", Ryu => "りゅ".to_string(),
Ryo => "りょ", Ryo => "りょ".to_string(),
LittleTsu => "", LittleTsu => "".to_string(),
LongVowel => "", LongVowel => "".to_string(),
ChiE => "ちぇ", ChiE => "ちぇ".to_string(),
ToU => "とぅ", ToU => "とぅ".to_string(),
TeI => "とぃ", TeI => "とぃ".to_string(),
VuA => "ゔぁ", VuA => "ゔぁ".to_string(),
VuI => "ゔぃ", VuI => "ゔぃ".to_string(),
Vu => "", Vu => "".to_string(),
VuE => "ゔぇ", VuE => "ゔぇ".to_string(),
VuO => "ゔぉ", VuO => "ゔぉ".to_string(),
Ja => "じゃ", Ja => "じゃ".to_string(),
Ju => "じゅ", Ju => "じゅ".to_string(),
Jo => "じょ", Jo => "じょ".to_string(),
Je => "じぇ", Je => "じぇ".to_string(),
Zi => "ずぃ", Zi => "ずぃ".to_string(),
Gya => "ぎゃ", Gya => "ぎゃ".to_string(),
Gyu => "ぎゅ", Gyu => "ぎゅ".to_string(),
Gyo => "ぎょ", Gyo => "ぎょ".to_string(),
Bya => "びゃ", Bya => "びゃ".to_string(),
Byu => "びゅ", Byu => "びゅ".to_string(),
Byo => "びょ", Byo => "びょ".to_string(),
Pya => "ぴゃ", Pya => "ぴゃ".to_string(),
Pyu => "ぴゅ", Pyu => "ぴゅ".to_string(),
Pyo => "ぴょ", Pyo => "ぴょ".to_string(),
Zyo => "ずょ", Zyo => "ずょ".to_string(),
Zya => "ずゃ", Zya => "ずゃ".to_string(),
Zyu => "ずゅ", Zyu => "ずゅ".to_string(),
Zye => "ずぇ", Zye => "ずぇ".to_string(),
}.to_string() NonAlpha(char) => char.to_string()
}
} }
impl Display for Syllable { impl Display for Syllable {