1 use std::path::Path; 2 use std::str::FromStr; 3 4 use lazy_static::lazy_static; 5 use regex::Regex; 6 7 use crate::common::{Codepoint, CodepointIter, UcdFile, UcdFileByCodepoint}; 8 use crate::error::Error; 9 10 /// A single row in the `Jamo.txt` file. 11 /// 12 /// The `Jamo.txt` file defines the `Jamo_Short_Name` property. 13 #[derive(Clone, Debug, Default, Eq, PartialEq)] 14 pub struct JamoShortName { 15 /// The codepoint corresponding to this row. 16 pub codepoint: Codepoint, 17 /// The actual "Jamo Short Name." This string contains at most 3 bytes and 18 /// may be empty. 19 pub name: String, 20 } 21 22 impl UcdFile for JamoShortName { relative_file_path() -> &'static Path23 fn relative_file_path() -> &'static Path { 24 Path::new("Jamo.txt") 25 } 26 } 27 28 impl UcdFileByCodepoint for JamoShortName { codepoints(&self) -> CodepointIter29 fn codepoints(&self) -> CodepointIter { 30 self.codepoint.into_iter() 31 } 32 } 33 34 impl FromStr for JamoShortName { 35 type Err = Error; 36 from_str(line: &str) -> Result<JamoShortName, Error>37 fn from_str(line: &str) -> Result<JamoShortName, Error> { 38 lazy_static! { 39 static ref PARTS: Regex = Regex::new( 40 r"(?x) 41 ^ 42 (?P<codepoint>[A-Z0-9]+); 43 \s* 44 (?P<name>[A-Z]*) 45 " 46 ) 47 .unwrap(); 48 }; 49 50 let caps = match PARTS.captures(line.trim()) { 51 Some(caps) => caps, 52 None => return err!("invalid Jamo_Short_name line"), 53 }; 54 Ok(JamoShortName { 55 codepoint: caps["codepoint"].parse()?, 56 name: caps.name("name").unwrap().as_str().to_string(), 57 }) 58 } 59 } 60 61 #[cfg(test)] 62 mod tests { 63 use super::JamoShortName; 64 65 #[test] parse1()66 fn parse1() { 67 let line = "1164; YAE # HANGUL JUNGSEONG YAE\n"; 68 let row: JamoShortName = line.parse().unwrap(); 69 assert_eq!(row.codepoint, 0x1164); 70 assert_eq!(row.name, "YAE"); 71 } 72 73 #[test] parse2()74 fn parse2() { 75 let line = "110B; # HANGUL CHOSEONG IEUNG\n"; 76 let row: JamoShortName = line.parse().unwrap(); 77 assert_eq!(row.codepoint, 0x110B); 78 assert_eq!(row.name, ""); 79 } 80 } 81