diff --git a/Cargo.lock b/Cargo.lock index d7fbbd0..f93fda1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -235,7 +235,7 @@ dependencies = [ [[package]] name = "weeblib" -version = "0.1.1" +version = "0.1.2" dependencies = [ "console_error_panic_hook", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 8711020..3ef50ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "weeblib" -version = "0.1.1" +version = "0.1.2" edition = "2021" authors = ["Hamcha "] diff --git a/src/romanization.rs b/src/romanization.rs index 770246b..3c5e2a4 100644 --- a/src/romanization.rs +++ b/src/romanization.rs @@ -1,7 +1,7 @@ use crate::syllable::{Syllable, Syllable::*}; -fn match4(s: &str) -> Option { - match s { +fn match4(s: &[char]) -> Option { + match s.iter().collect::().as_str() { "scia" => Some(Sha), "scio" => Some(Sho), "sciu" => Some(Shu), @@ -15,8 +15,8 @@ fn match4(s: &str) -> Option { } } -fn match3(s: &str) -> Option { - match s { +fn match3(s: &[char]) -> Option { + match s.iter().collect::().as_str() { "sci" => Some(Shi), "chi" => Some(Ki), "che" => Some(Ke), @@ -48,8 +48,8 @@ fn match3(s: &str) -> Option { } } -fn match2(s: &str) -> Option { - match s { +fn match2(s: &[char]) -> Option { + match s.iter().collect::().as_str() { "ka" => Some(Ka), "ca" => Some(Ka), "ki" => Some(Ki), @@ -133,33 +133,33 @@ fn match2(s: &str) -> Option { } } -fn match1(s: &str) -> Option { +fn match1(s: &char) -> Option { match s { - "a" => Some(A), - "i" => Some(I), - "u" => Some(U), - "e" => Some(E), - "o" => Some(O), - "n" => Some(N), - "s" => Some(Su), - "r" => Some(Ru), - "b" => Some(Bu), - "m" => Some(Mu), - "y" => Some(Yu), - "w" => Some(Wa), - "g" => Some(Gu), - "z" => Some(Zu), - "d" => Some(De), - "p" => Some(Pu), - "k" => Some(Ku), - "f" => Some(Fu), - "c" => Some(Ku), - "t" => Some(Tsu), - "v" => Some(Vu), - "l" => Some(Ru), - "h" => Some(LongVowel), - "j" => Some(I), - "q" => Some(Kyu), + 'a' => Some(A), + 'i' => Some(I), + 'u' => Some(U), + 'e' => Some(E), + 'o' => Some(O), + 'n' => Some(N), + 's' => Some(Su), + 'r' => Some(Ru), + 'b' => Some(Bu), + 'm' => Some(Mu), + 'y' => Some(Yu), + 'w' => Some(Wa), + 'g' => Some(Gu), + 'z' => Some(Zu), + 'd' => Some(De), + 'p' => Some(Pu), + 'k' => Some(Ku), + 'f' => Some(Fu), + 'c' => Some(Ku), + 't' => Some(Tsu), + 'v' => Some(Vu), + 'l' => Some(Ru), + 'h' => Some(LongVowel), + 'j' => Some(I), + 'q' => Some(Kyu), _ => None, } } @@ -186,23 +186,24 @@ pub(crate) fn romanize(word: &str) -> Vec { .chars() .map(remove_accents) .collect(); - let mut remaining = lowercase.clone(); + let mut remaining: Vec = lowercase.clone().chars().collect(); let mut syllables = Vec::new(); while remaining.len() > 0 { - if !remaining.as_bytes()[0].is_ascii_alphabetic() { - syllables.push(NonAlpha(remaining.as_bytes()[0] as char)); - remaining = remaining[1..].to_string(); + let next = remaining[0]; + if !next.is_alphabetic() { + syllables.push(NonAlpha(next)); + remaining.remove(0); continue; } // Check for double consonants - if remaining.len() >= 3 && remaining.as_bytes()[0] == remaining.as_bytes()[1] { + if remaining.len() >= 3 && remaining[0] == remaining[1] { syllables.push(LittleTsu); - remaining = remaining[1..].to_string(); + remaining.remove(0); continue; } // Check for X - if remaining.as_bytes()[0] == b'x' { + if remaining[0] == 'x' { syllables.push(Ku); if remaining.len() < 2 { // Last letter @@ -210,36 +211,42 @@ pub(crate) fn romanize(word: &str) -> Vec { break; } // Replace X with S - remaining = ["s", &remaining[1..]].concat(); + remaining[0] = 's'; } // Check for 4 letter patterns, then 3, etc. if remaining.len() >= 4 { if let Some(syllable) = match4(&remaining[..4]) { syllables.push(syllable); - remaining = remaining[4..].to_string(); + remaining.remove(0); + remaining.remove(0); + remaining.remove(0); + remaining.remove(0); continue; } } if remaining.len() >= 3 { if let Some(syllable) = match3(&remaining[..3]) { syllables.push(syllable); - remaining = remaining[3..].to_string(); + remaining.remove(0); + remaining.remove(0); + remaining.remove(0); continue; } } if remaining.len() >= 2 { if let Some(syllable) = match2(&remaining[..2]) { syllables.push(syllable); - remaining = remaining[2..].to_string(); + remaining.remove(0); + remaining.remove(0); continue; } } - if let Some(syllable) = match1(&remaining[..1]) { + if let Some(syllable) = match1(&remaining[0]) { syllables.push(syllable); - remaining = remaining[1..].to_string(); + remaining.remove(0); continue; } - panic!("No match found for {} ({})", remaining, word); + panic!("No match found for {} ({})", remaining.iter().collect::(), word); } syllables @@ -345,6 +352,11 @@ mod tests { } } + #[test] + fn test_utf8_symbol() { + assert_eq!(romanize("Some — test"), vec![So, Me, NonAlpha(' '), NonAlpha('—'), NonAlpha(' '), Te, Su, Tsu]); + } + #[test] fn test_dictionary() { // Read dictionary file