allow non-alpha passthrough

This commit is contained in:
Hamcha 2024-10-25 20:52:35 +02:00
parent fa50107a04
commit ba38b98624
Signed by: hamcha
GPG key ID: 1669C533B8CF6D89
3 changed files with 145 additions and 111 deletions

View file

@ -18,7 +18,12 @@ mod tests {
use super::*;
#[test]
fn test_convert_to_hiragana() {
fn test_convert_to_hiragana_words() {
assert_eq!(convert_to_hiragana("assolutamente"), "あっそるためんて");
}
#[test]
fn test_convert_to_hiragana_symbols() {
assert_eq!(convert_to_hiragana("asso-lutamente no!1"), "あっそ-るためんて の!1");
}
}

View file

@ -42,6 +42,8 @@ fn match3(s: &str) -> Option<Syllable> {
"zie" => Some(Zye),
"zio" => Some(Zyo),
"ziu" => Some(Zyu),
"chu" => Some(Chu),
"cho" => Some(Cho),
_ => None,
}
}
@ -183,12 +185,16 @@ pub(crate) fn romanize(word: &str) -> Vec<Syllable> {
.to_lowercase()
.chars()
.map(remove_accents)
.filter(|c| c.is_alphabetic())
.collect();
let mut remaining = lowercase.clone();
let mut syllables = Vec::new();
while remaining.len() > 0 {
if !remaining.as_bytes()[0].is_ascii_alphabetic() {
syllables.push(NonAlpha(remaining.as_bytes()[0] as char));
remaining = remaining[1..].to_string();
continue;
}
// Check for double consonants
if remaining.len() >= 3 && remaining.as_bytes()[0] == remaining.as_bytes()[1] {
syllables.push(LittleTsu);
@ -319,6 +325,26 @@ mod tests {
}
}
#[test]
fn symbols() {
let symbols = [
("x", vec![Ku, Su]),
("CIAO!", vec![Cha, O, NonAlpha('!')]),
("?", vec![NonAlpha('?')]),
("macciu-picchu", vec![Ma, LittleTsu, Chu, NonAlpha('-'), Pi, LittleTsu, Chu]),
("'", vec![NonAlpha('\'')]),
("adesso té la vedi con lui!!", vec![A, De, LittleTsu, So, NonAlpha(' '), Te, NonAlpha(' '), Ra, NonAlpha(' '), VuE, De, I, NonAlpha(' '), Ko, N, NonAlpha(' '), Ru, I, NonAlpha('!'), NonAlpha('!')]),
(".", vec![NonAlpha('.')]),
(",", vec![NonAlpha(',')]),
("(", vec![NonAlpha('(')]),
(")", vec![NonAlpha(')')]),
];
for (word, expected) in symbols {
assert_eq!(romanize(word)[..], expected);
}
}
#[test]
fn test_dictionary() {
// Read dictionary file

View file

@ -118,119 +118,122 @@ pub(crate) enum Syllable {
Zya,
Zyu,
Zye,
// Extras for sentence making
NonAlpha(char),
}
pub(crate) fn to_hiragana(syllable: &Syllable) -> String {
match syllable {
A => "",
I => "",
U => "",
E => "",
O => "",
Ka => "",
Ki => "",
Ku => "",
Ke => "",
Ko => "",
Sa => "",
Shi => "",
Su => "",
Se => "",
So => "",
Ta => "",
Chi => "",
Tsu => "",
Te => "",
To => "",
Na => "",
Ni => "",
Nu => "",
Ne => "",
No => "",
Ha => "",
Hi => "",
Fu => "",
He => "",
Ho => "",
Ma => "",
Mi => "",
Mu => "",
Me => "",
Mo => "",
Ya => "",
Yu => "",
Yo => "",
Ra => "",
Ri => "",
Ru => "",
Re => "",
Ro => "",
Wa => "",
N => "",
Ga => "",
Gi => "",
Gu => "",
Ge => "",
Go => "",
Za => "",
Ji => "",
Zu => "",
Ze => "",
Zo => "",
Da => "",
De => "",
Do => "",
Ba => "",
Bi => "",
Bu => "",
Be => "",
Bo => "",
Pa => "",
Pi => "",
Pu => "",
Pe => "",
Po => "",
Kya => "きゃ",
Kyu => "きゅ",
Kyo => "きょ",
Sha => "しゃ",
Shu => "しゅ",
Sho => "しょ",
Cha => "ちゃ",
Chu => "ちゅ",
Cho => "ちょ",
Rya => "りゃ",
Ryu => "りゅ",
Ryo => "りょ",
LittleTsu => "",
LongVowel => "",
ChiE => "ちぇ",
ToU => "とぅ",
TeI => "とぃ",
VuA => "ゔぁ",
VuI => "ゔぃ",
Vu => "",
VuE => "ゔぇ",
VuO => "ゔぉ",
Ja => "じゃ",
Ju => "じゅ",
Jo => "じょ",
Je => "じぇ",
Zi => "ずぃ",
Gya => "ぎゃ",
Gyu => "ぎゅ",
Gyo => "ぎょ",
Bya => "びゃ",
Byu => "びゅ",
Byo => "びょ",
Pya => "ぴゃ",
Pyu => "ぴゅ",
Pyo => "ぴょ",
Zyo => "ずょ",
Zya => "ずゃ",
Zyu => "ずゅ",
Zye => "ずぇ",
}.to_string()
A => "".to_string(),
I => "".to_string(),
U => "".to_string(),
E => "".to_string(),
O => "".to_string(),
Ka => "".to_string(),
Ki => "".to_string(),
Ku => "".to_string(),
Ke => "".to_string(),
Ko => "".to_string(),
Sa => "".to_string(),
Shi => "".to_string(),
Su => "".to_string(),
Se => "".to_string(),
So => "".to_string(),
Ta => "".to_string(),
Chi => "".to_string(),
Tsu => "".to_string(),
Te => "".to_string(),
To => "".to_string(),
Na => "".to_string(),
Ni => "".to_string(),
Nu => "".to_string(),
Ne => "".to_string(),
No => "".to_string(),
Ha => "".to_string(),
Hi => "".to_string(),
Fu => "".to_string(),
He => "".to_string(),
Ho => "".to_string(),
Ma => "".to_string(),
Mi => "".to_string(),
Mu => "".to_string(),
Me => "".to_string(),
Mo => "".to_string(),
Ya => "".to_string(),
Yu => "".to_string(),
Yo => "".to_string(),
Ra => "".to_string(),
Ri => "".to_string(),
Ru => "".to_string(),
Re => "".to_string(),
Ro => "".to_string(),
Wa => "".to_string(),
N => "".to_string(),
Ga => "".to_string(),
Gi => "".to_string(),
Gu => "".to_string(),
Ge => "".to_string(),
Go => "".to_string(),
Za => "".to_string(),
Ji => "".to_string(),
Zu => "".to_string(),
Ze => "".to_string(),
Zo => "".to_string(),
Da => "".to_string(),
De => "".to_string(),
Do => "".to_string(),
Ba => "".to_string(),
Bi => "".to_string(),
Bu => "".to_string(),
Be => "".to_string(),
Bo => "".to_string(),
Pa => "".to_string(),
Pi => "".to_string(),
Pu => "".to_string(),
Pe => "".to_string(),
Po => "".to_string(),
Kya => "きゃ".to_string(),
Kyu => "きゅ".to_string(),
Kyo => "きょ".to_string(),
Sha => "しゃ".to_string(),
Shu => "しゅ".to_string(),
Sho => "しょ".to_string(),
Cha => "ちゃ".to_string(),
Chu => "ちゅ".to_string(),
Cho => "ちょ".to_string(),
Rya => "りゃ".to_string(),
Ryu => "りゅ".to_string(),
Ryo => "りょ".to_string(),
LittleTsu => "".to_string(),
LongVowel => "".to_string(),
ChiE => "ちぇ".to_string(),
ToU => "とぅ".to_string(),
TeI => "とぃ".to_string(),
VuA => "ゔぁ".to_string(),
VuI => "ゔぃ".to_string(),
Vu => "".to_string(),
VuE => "ゔぇ".to_string(),
VuO => "ゔぉ".to_string(),
Ja => "じゃ".to_string(),
Ju => "じゅ".to_string(),
Jo => "じょ".to_string(),
Je => "じぇ".to_string(),
Zi => "ずぃ".to_string(),
Gya => "ぎゃ".to_string(),
Gyu => "ぎゅ".to_string(),
Gyo => "ぎょ".to_string(),
Bya => "びゃ".to_string(),
Byu => "びゅ".to_string(),
Byo => "びょ".to_string(),
Pya => "ぴゃ".to_string(),
Pyu => "ぴゅ".to_string(),
Pyo => "ぴょ".to_string(),
Zyo => "ずょ".to_string(),
Zya => "ずゃ".to_string(),
Zyu => "ずゅ".to_string(),
Zye => "ずぇ".to_string(),
NonAlpha(char) => char.to_string()
}
}
impl Display for Syllable {