1 use unicode_tables::jamo_short_name::JAMO_SHORT_NAME;
2 
3 // This implementation should correspond to the algorithms described in
4 // Unicode 3.12.
5 
6 /// A set of ranges that corresponds to the set of all Hangul syllable
7 /// codepoints.
8 ///
9 /// These ranges are defined in Unicode 4.8 Table 4-13.
10 pub const RANGE_HANGUL_SYLLABLE: &'static [(u32, u32)] = &[
11     (0xAC00, 0xD7A3),
12 ];
13 
14 const S_BASE: u32 = 0xAC00;
15 const L_BASE: u32 = 0x1100;
16 const V_BASE: u32 = 0x1161;
17 const T_BASE: u32 = 0x11A7;
18 const T_COUNT: u32 = 28;
19 const N_COUNT: u32 = 588;
20 
21 /// Return the character name of the given precomposed Hangul codepoint.
22 ///
23 /// If the given codepoint does not correspond to a precomposed Hangul
24 /// codepoint in the inclusive range `AC00..D7A3`, then this returns `None`.
25 ///
26 /// This implements the algorithms described in Unicode 3.12 and Unicode 4.8.
hangul_name(cp: u32) -> Option<String>27 pub fn hangul_name(cp: u32) -> Option<String> {
28     let mut name = "HANGUL SYLLABLE ".to_string();
29     let (lpart, vpart, tpart) = match hangul_full_canonical_decomposition(cp) {
30         None => return None,
31         Some(triple) => triple,
32     };
33 
34     name.push_str(jamo_short_name(lpart));
35     name.push_str(jamo_short_name(vpart));
36     name.push_str(tpart.map_or("", jamo_short_name));
37     Some(name)
38 }
39 
40 /// Return the full canonical decomposition of the given precomposed Hangul
41 /// codepoint.
42 ///
43 /// If the decomposition does not have any trailing consonant, then the third
44 /// part of the tuple returned is `None`.
45 ///
46 /// If the given codepoint does not correspond to a precomposed Hangul
47 /// codepoint in the inclusive range `AC00..D7A3`, then this returns `None`.
48 ///
49 /// This implements the algorithms described in Unicode 3.12 and Unicode 4.8.
hangul_full_canonical_decomposition( cp: u32, ) -> Option<(u32, u32, Option<u32>)>50 pub fn hangul_full_canonical_decomposition(
51     cp: u32,
52 ) -> Option<(u32, u32, Option<u32>)> {
53     if !(0xAC00 <= cp && cp <= 0xD7A3) {
54         return None;
55     }
56 
57     let s_index = cp - S_BASE;
58     let l_index = s_index / N_COUNT;
59     let v_index = (s_index % N_COUNT) / T_COUNT;
60     let t_index = s_index % T_COUNT;
61 
62     let l_part = L_BASE + l_index;
63     let v_part = V_BASE + v_index;
64     let t_part =
65         if t_index == 0 {
66             None
67         } else {
68             Some(T_BASE + t_index)
69         };
70     Some((l_part, v_part, t_part))
71 }
72 
jamo_short_name(cp: u32) -> &'static str73 fn jamo_short_name(cp: u32) -> &'static str {
74     let i = JAMO_SHORT_NAME.binary_search_by_key(&cp, |p| p.0).unwrap();
75     JAMO_SHORT_NAME[i].1
76 }
77 
78 #[cfg(test)]
79 mod tests {
80     use super::{hangul_name, hangul_full_canonical_decomposition};
81 
82     #[test]
canon_decomp()83     fn canon_decomp() {
84         assert_eq!(
85             hangul_full_canonical_decomposition(0xD4DB),
86             Some((0x1111, 0x1171, Some(0x11B6))));
87     }
88 
89     #[test]
name()90     fn name() {
91         assert_eq!(hangul_name(0xD4DB).unwrap(), "HANGUL SYLLABLE PWILH");
92     }
93 
94     #[test]
all()95     fn all() {
96         for cp in 0xAC00..(0xD7A3 + 1) {
97             hangul_name(cp).unwrap();
98         }
99     }
100 
101     #[test]
invalid()102     fn invalid() {
103         assert!(hangul_name(0).is_none());
104     }
105 }
106