diff --git a/src/lib.rs b/src/lib.rs index 9591c45..76b939f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,7 +18,12 @@ mod tests { use super::*; #[test] - fn test_convert_to_hiragana() { + fn test_convert_to_hiragana_words() { assert_eq!(convert_to_hiragana("assolutamente"), "あっそるためんて"); } + + #[test] + fn test_convert_to_hiragana_symbols() { + assert_eq!(convert_to_hiragana("asso-lutamente no!1"), "あっそ-るためんて の!1"); + } } \ No newline at end of file diff --git a/src/romanization.rs b/src/romanization.rs index 5c0ce2f..770246b 100644 --- a/src/romanization.rs +++ b/src/romanization.rs @@ -42,6 +42,8 @@ fn match3(s: &str) -> Option { "zie" => Some(Zye), "zio" => Some(Zyo), "ziu" => Some(Zyu), + "chu" => Some(Chu), + "cho" => Some(Cho), _ => None, } } @@ -183,12 +185,16 @@ pub(crate) fn romanize(word: &str) -> Vec { .to_lowercase() .chars() .map(remove_accents) - .filter(|c| c.is_alphabetic()) .collect(); let mut remaining = lowercase.clone(); let mut syllables = Vec::new(); while remaining.len() > 0 { + if !remaining.as_bytes()[0].is_ascii_alphabetic() { + syllables.push(NonAlpha(remaining.as_bytes()[0] as char)); + remaining = remaining[1..].to_string(); + continue; + } // Check for double consonants if remaining.len() >= 3 && remaining.as_bytes()[0] == remaining.as_bytes()[1] { syllables.push(LittleTsu); @@ -319,6 +325,26 @@ mod tests { } } + #[test] + fn symbols() { + let symbols = [ + ("x", vec![Ku, Su]), + ("CIAO!", vec![Cha, O, NonAlpha('!')]), + ("?", vec![NonAlpha('?')]), + ("macciu-picchu", vec![Ma, LittleTsu, Chu, NonAlpha('-'), Pi, LittleTsu, Chu]), + ("'", vec![NonAlpha('\'')]), + ("adesso té la vedi con lui!!", vec![A, De, LittleTsu, So, NonAlpha(' '), Te, NonAlpha(' '), Ra, NonAlpha(' '), VuE, De, I, NonAlpha(' '), Ko, N, NonAlpha(' '), Ru, I, NonAlpha('!'), NonAlpha('!')]), + (".", vec![NonAlpha('.')]), + (",", vec![NonAlpha(',')]), + ("(", vec![NonAlpha('(')]), + (")", vec![NonAlpha(')')]), + ]; + + for (word, expected) in symbols { + assert_eq!(romanize(word)[..], expected); + } + } + #[test] fn test_dictionary() { // Read dictionary file diff --git a/src/syllable.rs b/src/syllable.rs index ee029bf..d86f883 100644 --- a/src/syllable.rs +++ b/src/syllable.rs @@ -118,119 +118,122 @@ pub(crate) enum Syllable { Zya, Zyu, Zye, + // Extras for sentence making + NonAlpha(char), } pub(crate) fn to_hiragana(syllable: &Syllable) -> String { match syllable { - A => "あ", - I => "い", - U => "う", - E => "え", - O => "お", - Ka => "か", - Ki => "き", - Ku => "く", - Ke => "け", - Ko => "こ", - Sa => "さ", - Shi => "し", - Su => "す", - Se => "せ", - So => "そ", - Ta => "た", - Chi => "ち", - Tsu => "つ", - Te => "て", - To => "と", - Na => "な", - Ni => "に", - Nu => "ぬ", - Ne => "ね", - No => "の", - Ha => "は", - Hi => "ひ", - Fu => "ふ", - He => "へ", - Ho => "ほ", - Ma => "ま", - Mi => "み", - Mu => "む", - Me => "め", - Mo => "も", - Ya => "や", - Yu => "ゆ", - Yo => "よ", - Ra => "ら", - Ri => "り", - Ru => "る", - Re => "れ", - Ro => "ろ", - Wa => "わ", - N => "ん", - Ga => "が", - Gi => "ぎ", - Gu => "ぐ", - Ge => "げ", - Go => "ご", - Za => "ざ", - Ji => "じ", - Zu => "ず", - Ze => "ぜ", - Zo => "ぞ", - Da => "だ", - De => "で", - Do => "ど", - Ba => "ば", - Bi => "び", - Bu => "ぶ", - Be => "べ", - Bo => "ぼ", - Pa => "ぱ", - Pi => "ぴ", - Pu => "ぷ", - Pe => "ぺ", - Po => "ぽ", - Kya => "きゃ", - Kyu => "きゅ", - Kyo => "きょ", - Sha => "しゃ", - Shu => "しゅ", - Sho => "しょ", - Cha => "ちゃ", - Chu => "ちゅ", - Cho => "ちょ", - Rya => "りゃ", - Ryu => "りゅ", - Ryo => "りょ", - LittleTsu => "っ", - LongVowel => "ー", - ChiE => "ちぇ", - ToU => "とぅ", - TeI => "とぃ", - VuA => "ゔぁ", - VuI => "ゔぃ", - Vu => "ゔ", - VuE => "ゔぇ", - VuO => "ゔぉ", - Ja => "じゃ", - Ju => "じゅ", - Jo => "じょ", - Je => "じぇ", - Zi => "ずぃ", - Gya => "ぎゃ", - Gyu => "ぎゅ", - Gyo => "ぎょ", - Bya => "びゃ", - Byu => "びゅ", - Byo => "びょ", - Pya => "ぴゃ", - Pyu => "ぴゅ", - Pyo => "ぴょ", - Zyo => "ずょ", - Zya => "ずゃ", - Zyu => "ずゅ", - Zye => "ずぇ", - }.to_string() + A => "あ".to_string(), + I => "い".to_string(), + U => "う".to_string(), + E => "え".to_string(), + O => "お".to_string(), + Ka => "か".to_string(), + Ki => "き".to_string(), + Ku => "く".to_string(), + Ke => "け".to_string(), + Ko => "こ".to_string(), + Sa => "さ".to_string(), + Shi => "し".to_string(), + Su => "す".to_string(), + Se => "せ".to_string(), + So => "そ".to_string(), + Ta => "た".to_string(), + Chi => "ち".to_string(), + Tsu => "つ".to_string(), + Te => "て".to_string(), + To => "と".to_string(), + Na => "な".to_string(), + Ni => "に".to_string(), + Nu => "ぬ".to_string(), + Ne => "ね".to_string(), + No => "の".to_string(), + Ha => "は".to_string(), + Hi => "ひ".to_string(), + Fu => "ふ".to_string(), + He => "へ".to_string(), + Ho => "ほ".to_string(), + Ma => "ま".to_string(), + Mi => "み".to_string(), + Mu => "む".to_string(), + Me => "め".to_string(), + Mo => "も".to_string(), + Ya => "や".to_string(), + Yu => "ゆ".to_string(), + Yo => "よ".to_string(), + Ra => "ら".to_string(), + Ri => "り".to_string(), + Ru => "る".to_string(), + Re => "れ".to_string(), + Ro => "ろ".to_string(), + Wa => "わ".to_string(), + N => "ん".to_string(), + Ga => "が".to_string(), + Gi => "ぎ".to_string(), + Gu => "ぐ".to_string(), + Ge => "げ".to_string(), + Go => "ご".to_string(), + Za => "ざ".to_string(), + Ji => "じ".to_string(), + Zu => "ず".to_string(), + Ze => "ぜ".to_string(), + Zo => "ぞ".to_string(), + Da => "だ".to_string(), + De => "で".to_string(), + Do => "ど".to_string(), + Ba => "ば".to_string(), + Bi => "び".to_string(), + Bu => "ぶ".to_string(), + Be => "べ".to_string(), + Bo => "ぼ".to_string(), + Pa => "ぱ".to_string(), + Pi => "ぴ".to_string(), + Pu => "ぷ".to_string(), + Pe => "ぺ".to_string(), + Po => "ぽ".to_string(), + Kya => "きゃ".to_string(), + Kyu => "きゅ".to_string(), + Kyo => "きょ".to_string(), + Sha => "しゃ".to_string(), + Shu => "しゅ".to_string(), + Sho => "しょ".to_string(), + Cha => "ちゃ".to_string(), + Chu => "ちゅ".to_string(), + Cho => "ちょ".to_string(), + Rya => "りゃ".to_string(), + Ryu => "りゅ".to_string(), + Ryo => "りょ".to_string(), + LittleTsu => "っ".to_string(), + LongVowel => "ー".to_string(), + ChiE => "ちぇ".to_string(), + ToU => "とぅ".to_string(), + TeI => "とぃ".to_string(), + VuA => "ゔぁ".to_string(), + VuI => "ゔぃ".to_string(), + Vu => "ゔ".to_string(), + VuE => "ゔぇ".to_string(), + VuO => "ゔぉ".to_string(), + Ja => "じゃ".to_string(), + Ju => "じゅ".to_string(), + Jo => "じょ".to_string(), + Je => "じぇ".to_string(), + Zi => "ずぃ".to_string(), + Gya => "ぎゃ".to_string(), + Gyu => "ぎゅ".to_string(), + Gyo => "ぎょ".to_string(), + Bya => "びゃ".to_string(), + Byu => "びゅ".to_string(), + Byo => "びょ".to_string(), + Pya => "ぴゃ".to_string(), + Pyu => "ぴゅ".to_string(), + Pyo => "ぴょ".to_string(), + Zyo => "ずょ".to_string(), + Zya => "ずゃ".to_string(), + Zyu => "ずゅ".to_string(), + Zye => "ずぇ".to_string(), + NonAlpha(char) => char.to_string() + } } impl Display for Syllable {