fix unicode issues
This commit is contained in:
parent
e09f185bb1
commit
8e830d03cb
3 changed files with 60 additions and 48 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -235,7 +235,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "weeblib"
|
name = "weeblib"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"console_error_panic_hook",
|
"console_error_panic_hook",
|
||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "weeblib"
|
name = "weeblib"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Hamcha <hamcha@crunchy.rocks>"]
|
authors = ["Hamcha <hamcha@crunchy.rocks>"]
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use crate::syllable::{Syllable, Syllable::*};
|
use crate::syllable::{Syllable, Syllable::*};
|
||||||
|
|
||||||
fn match4(s: &str) -> Option<Syllable> {
|
fn match4(s: &[char]) -> Option<Syllable> {
|
||||||
match s {
|
match s.iter().collect::<String>().as_str() {
|
||||||
"scia" => Some(Sha),
|
"scia" => Some(Sha),
|
||||||
"scio" => Some(Sho),
|
"scio" => Some(Sho),
|
||||||
"sciu" => Some(Shu),
|
"sciu" => Some(Shu),
|
||||||
|
@ -15,8 +15,8 @@ fn match4(s: &str) -> Option<Syllable> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn match3(s: &str) -> Option<Syllable> {
|
fn match3(s: &[char]) -> Option<Syllable> {
|
||||||
match s {
|
match s.iter().collect::<String>().as_str() {
|
||||||
"sci" => Some(Shi),
|
"sci" => Some(Shi),
|
||||||
"chi" => Some(Ki),
|
"chi" => Some(Ki),
|
||||||
"che" => Some(Ke),
|
"che" => Some(Ke),
|
||||||
|
@ -48,8 +48,8 @@ fn match3(s: &str) -> Option<Syllable> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn match2(s: &str) -> Option<Syllable> {
|
fn match2(s: &[char]) -> Option<Syllable> {
|
||||||
match s {
|
match s.iter().collect::<String>().as_str() {
|
||||||
"ka" => Some(Ka),
|
"ka" => Some(Ka),
|
||||||
"ca" => Some(Ka),
|
"ca" => Some(Ka),
|
||||||
"ki" => Some(Ki),
|
"ki" => Some(Ki),
|
||||||
|
@ -133,33 +133,33 @@ fn match2(s: &str) -> Option<Syllable> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn match1(s: &str) -> Option<Syllable> {
|
fn match1(s: &char) -> Option<Syllable> {
|
||||||
match s {
|
match s {
|
||||||
"a" => Some(A),
|
'a' => Some(A),
|
||||||
"i" => Some(I),
|
'i' => Some(I),
|
||||||
"u" => Some(U),
|
'u' => Some(U),
|
||||||
"e" => Some(E),
|
'e' => Some(E),
|
||||||
"o" => Some(O),
|
'o' => Some(O),
|
||||||
"n" => Some(N),
|
'n' => Some(N),
|
||||||
"s" => Some(Su),
|
's' => Some(Su),
|
||||||
"r" => Some(Ru),
|
'r' => Some(Ru),
|
||||||
"b" => Some(Bu),
|
'b' => Some(Bu),
|
||||||
"m" => Some(Mu),
|
'm' => Some(Mu),
|
||||||
"y" => Some(Yu),
|
'y' => Some(Yu),
|
||||||
"w" => Some(Wa),
|
'w' => Some(Wa),
|
||||||
"g" => Some(Gu),
|
'g' => Some(Gu),
|
||||||
"z" => Some(Zu),
|
'z' => Some(Zu),
|
||||||
"d" => Some(De),
|
'd' => Some(De),
|
||||||
"p" => Some(Pu),
|
'p' => Some(Pu),
|
||||||
"k" => Some(Ku),
|
'k' => Some(Ku),
|
||||||
"f" => Some(Fu),
|
'f' => Some(Fu),
|
||||||
"c" => Some(Ku),
|
'c' => Some(Ku),
|
||||||
"t" => Some(Tsu),
|
't' => Some(Tsu),
|
||||||
"v" => Some(Vu),
|
'v' => Some(Vu),
|
||||||
"l" => Some(Ru),
|
'l' => Some(Ru),
|
||||||
"h" => Some(LongVowel),
|
'h' => Some(LongVowel),
|
||||||
"j" => Some(I),
|
'j' => Some(I),
|
||||||
"q" => Some(Kyu),
|
'q' => Some(Kyu),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -186,23 +186,24 @@ pub(crate) fn romanize(word: &str) -> Vec<Syllable> {
|
||||||
.chars()
|
.chars()
|
||||||
.map(remove_accents)
|
.map(remove_accents)
|
||||||
.collect();
|
.collect();
|
||||||
let mut remaining = lowercase.clone();
|
let mut remaining: Vec<char> = lowercase.clone().chars().collect();
|
||||||
let mut syllables = Vec::new();
|
let mut syllables = Vec::new();
|
||||||
|
|
||||||
while remaining.len() > 0 {
|
while remaining.len() > 0 {
|
||||||
if !remaining.as_bytes()[0].is_ascii_alphabetic() {
|
let next = remaining[0];
|
||||||
syllables.push(NonAlpha(remaining.as_bytes()[0] as char));
|
if !next.is_alphabetic() {
|
||||||
remaining = remaining[1..].to_string();
|
syllables.push(NonAlpha(next));
|
||||||
|
remaining.remove(0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Check for double consonants
|
// Check for double consonants
|
||||||
if remaining.len() >= 3 && remaining.as_bytes()[0] == remaining.as_bytes()[1] {
|
if remaining.len() >= 3 && remaining[0] == remaining[1] {
|
||||||
syllables.push(LittleTsu);
|
syllables.push(LittleTsu);
|
||||||
remaining = remaining[1..].to_string();
|
remaining.remove(0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Check for X
|
// Check for X
|
||||||
if remaining.as_bytes()[0] == b'x' {
|
if remaining[0] == 'x' {
|
||||||
syllables.push(Ku);
|
syllables.push(Ku);
|
||||||
if remaining.len() < 2 {
|
if remaining.len() < 2 {
|
||||||
// Last letter
|
// Last letter
|
||||||
|
@ -210,36 +211,42 @@ pub(crate) fn romanize(word: &str) -> Vec<Syllable> {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Replace X with S
|
// Replace X with S
|
||||||
remaining = ["s", &remaining[1..]].concat();
|
remaining[0] = 's';
|
||||||
}
|
}
|
||||||
// Check for 4 letter patterns, then 3, etc.
|
// Check for 4 letter patterns, then 3, etc.
|
||||||
if remaining.len() >= 4 {
|
if remaining.len() >= 4 {
|
||||||
if let Some(syllable) = match4(&remaining[..4]) {
|
if let Some(syllable) = match4(&remaining[..4]) {
|
||||||
syllables.push(syllable);
|
syllables.push(syllable);
|
||||||
remaining = remaining[4..].to_string();
|
remaining.remove(0);
|
||||||
|
remaining.remove(0);
|
||||||
|
remaining.remove(0);
|
||||||
|
remaining.remove(0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if remaining.len() >= 3 {
|
if remaining.len() >= 3 {
|
||||||
if let Some(syllable) = match3(&remaining[..3]) {
|
if let Some(syllable) = match3(&remaining[..3]) {
|
||||||
syllables.push(syllable);
|
syllables.push(syllable);
|
||||||
remaining = remaining[3..].to_string();
|
remaining.remove(0);
|
||||||
|
remaining.remove(0);
|
||||||
|
remaining.remove(0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if remaining.len() >= 2 {
|
if remaining.len() >= 2 {
|
||||||
if let Some(syllable) = match2(&remaining[..2]) {
|
if let Some(syllable) = match2(&remaining[..2]) {
|
||||||
syllables.push(syllable);
|
syllables.push(syllable);
|
||||||
remaining = remaining[2..].to_string();
|
remaining.remove(0);
|
||||||
|
remaining.remove(0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if let Some(syllable) = match1(&remaining[..1]) {
|
if let Some(syllable) = match1(&remaining[0]) {
|
||||||
syllables.push(syllable);
|
syllables.push(syllable);
|
||||||
remaining = remaining[1..].to_string();
|
remaining.remove(0);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
panic!("No match found for {} ({})", remaining, word);
|
panic!("No match found for {} ({})", remaining.iter().collect::<String>(), word);
|
||||||
}
|
}
|
||||||
|
|
||||||
syllables
|
syllables
|
||||||
|
@ -345,6 +352,11 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_utf8_symbol() {
|
||||||
|
assert_eq!(romanize("Some — test"), vec![So, Me, NonAlpha(' '), NonAlpha('—'), NonAlpha(' '), Te, Su, Tsu]);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_dictionary() {
|
fn test_dictionary() {
|
||||||
// Read dictionary file
|
// Read dictionary file
|
||||||
|
|
Loading…
Reference in a new issue