1;; bo-ewts.mim -- Tibetan input method with EWTS
2;; Copyright (C) 2007, 2008, 2009
3;;   National Institute of Advanced Industrial Science and Technology (AIST)
4;;   Registration Number H15PRO112
5;; Copyright (C) 2010 Hugues MOISY <hugues.moisy@gmail.com>
6;; Copyright (C) 2014 Elie Roux <elie.roux@telecom-bretagne.eu>
7
8;; This file is part of the m17n database; a sub-part of the m17n
9;; library.
10
11;; The m17n library is free software; you can redistribute it and/or
12;; modify it under the terms of the GNU Lesser General Public License
13;; as published by the Free Software Foundation; either version 2.1 of
14;; the License, or (at your option) any later version.
15
16;; The m17n library is distributed in the hope that it will be useful,
17;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19;; Lesser General Public License for more details.
20
21;; You should have received a copy of the GNU Lesser General Public
22;; License along with the m17n library; if not, write to the Free
23;; Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24;; Boston, MA 02110-1301, USA.
25
26(input-method bo ewts)
27
28(description "Tibetan input method based on EWTS.
29This implementation is based on THDL Extended Wylie Transliteration Scheme
30Version 2.0 <http://www.thlib.org/reference/transliteration/#!essay=/thl/ewts>.")
31
32(title "ཀ")
33
34(variable
35 (precomposed
36  (_"Flag to tell whether or not to generate precomposed characters.
37If 1 (the default), generate precomposed characters (i.e. NFC) if available (e.g. \"ྲྀ\"(U+0F76).
38If 0, generate only decomposed characters (i.e. NFD) (e.g. \"ྲྀ\" (U+0FB2 U+0F80).")
39  ;; The default should be 1 (i.e NFC) according to:
40  ;;   http://www.w3.org/International/questions/qa-html-css-normalization
41  ;;   and Unicode Consortium http://www.unicode.org/faq/normalization.html
42  1 0 1))
43
44(map
45 ;; This map is used also for subjoined consonants.
46 (consonant
47  ("k" "ཀ")
48  ("kh" "ཁ")
49  ("g" "ག")
50  ("ng" "ང")
51  ("c" "ཅ")
52  ("ch" "ཆ")
53  ("j" "ཇ")
54  ("ny" "ཉ")
55  ("T" "ཊ")
56  ("Th" "ཋ")
57  ("D" "ཌ")
58  ("N" "ཎ")
59  ("t" "ཏ")
60  ("th" "ཐ")
61  ("d" "ད")
62  ("n" "ན")
63  ("p" "པ")
64  ("ph" "ཕ")
65  ("b" "བ")
66  ("m" "མ")
67  ("ts" "ཙ")
68  ("tsh" "ཚ")
69  ("dz" "ཛ")
70  ("w" "ཝ")
71  ("zh" "ཞ")
72  ("z" "ཟ")
73  ("'" "འ")
74  ("y" "ཡ")
75  ("r" "ར")
76  ("l" "ལ")
77  ("sh" "ཤ")
78  ("Sh" "ཥ")
79  ("s" "ས")
80  ("h" "ཧ")
81  ("a" "ཨ")
82)
83
84 ;; Standard Tibetan Stacks listed at:
85 ;; <http://www.thlib.org/reference/transliteration/tibstacks.php>
86 (standard-stack
87  ("f" "ཕ༹")
88  ("v" "བ༹")
89  ("R" "ཪ")
90  ("rk" "རྐ")
91  ("rg" "རྒ")
92  ("rng" "རྔ")
93  ("rj" "རྗ")
94  ("rny" "རྙ")
95  ("rt" "རྟ")
96  ("rd" "རྡ")
97  ("rn" "རྣ ")
98  ("rb" "རྦ")
99  ("rm" "རྨ")
100  ("rts" "རྩ")
101  ("rdz" "རྫ")
102  ("lk" "ལྐ")
103  ("lg" "ལྒ")
104  ("lng" "ལྔ")
105  ("lc" "ལྕ")
106  ("lj" "ལྗ")
107  ("lt" "ལྟ")
108  ("ld" "སྡ")
109  ("lp" "ལྤ")
110  ("lb" "ལྦ")
111  ("lh" "ལྷ")
112  ("sk" "སྐ")
113  ("sg" "སྒ")
114  ("sng" "སྔ")
115  ("sny" "སྙ")
116  ("st" "སྟ")
117  ("sd" "སྡ")
118  ("sn" "སྣ")
119  ("sp" "སྤ")
120  ("sb" "སྦ")
121  ("sm" "སྨ")
122  ("sts" "སྩ")
123  ("kw" "ཀྭ")
124  ("khw" "ཁྭ")
125  ("gw" "གྭ")
126  ("cw" "ཅྭ")
127  ("nyw" "ཉྭ")
128  ("tw" "ཏྭ")
129  ("dw" "དྭ")
130  ("tsw" "ཙྭ")
131  ("tshw" "ཚྭ")
132  ("zhw" "ཞྭ")
133  ("zw" "ཟྭ")
134  ("rw" "རྭ")
135  ("shw" "ཤྭ")
136  ("sw" "སྭ")
137  ("hw" "ཧྭ")
138  ("ky" "ཀྱ")
139  ("khy" "ཁྱ")
140  ("gy" "གྱ")
141  ("py" "པྱ")
142  ("phy" "ཕྱ")
143  ("by" "བྱ")
144  ("my" "མྱ")
145  ("kr" "ཀྲ")
146  ("khr" "ཁྲ")
147  ("gr" "གྲ")
148  ("tr" "ཏྲ")
149  ("thr" "ཐྲ")
150  ("dr" "དྲ")
151  ("pr" "པྲ")
152  ("phr" "ཕྲ")
153  ("br" "བྲ")
154  ("mr" "མྲ")
155  ("shr" "ཤྲ")
156  ("sr" "སྲ")
157  ("hr" "ཧྲ")
158  ("kl" "ཀླ")
159  ("gl" "གླ")
160  ("bl" "བླ")
161  ("zl" "ཟླ")
162  ("rl" "རླ")
163  ("sl" "སླ")
164  ("rky" "རྐྱ")
165  ("rgy" "རྒྱ")
166  ("rmy" "རྨྱ")
167  ("rgw" "རྒྭ")
168  ("rtsw" "རྩྭ")
169  ("sky" "སྐྱ")
170  ("sgy" "སྒྱ")
171  ("spy" "སྤྱ")
172  ("sby" "སྦྱ")
173  ("smy" "སྨྱ")
174  ("skr" "སྐྲ")
175  ("sgr" "སྒྲ")
176  ("snr" "སྣྲ")
177  ("spr" "སྤྲ")
178  ("sbr" "སྦྲ")
179  ("smr" "སྨྲ")
180  ("grw" "གྲྭ")
181  ("drw" "དྲྭ")
182  ("phyw" "ཕྱྭ")
183  ;; ambiguous cases with b as prefix
184  ("brk" "བརྐ")
185  ("brg" "བརྒ")
186  ("brng" "བརྔ")
187  ("brj" "བརྗ")
188  ("brl" "བརླ")
189  ("brny" "བརྙ")
190  ("brt" "བརྟ")
191  ("brd" "བརྡ")
192  ("brn" "བརྣ ")
193  ("brts" "བརྩ")
194  ("brdz" "བརྫ")
195  ("brky" "བརྐྱ")
196  ("brgy" "བརྒྱ")
197  ("blt" "བལྟ")
198  ("bld" "བལྡ")
199  ("brtsw" "བརྩྭ")
200  ("brgw" "བརྒྭ")
201  ;; oM
202  ("oM" (cond ((= precomposed 0) "ཨོཾ") (1 "ༀ")))
203)
204
205 (force-stack
206  ("+"))
207
208 (break-stack
209  ("."))
210
211 (special-subjoined
212  ("h" (cond ((= precomposed 0) "ྷ")
213	     ((= @-1 ?ག) (delete @-1) "གྷ")
214	     ((= @-1 ?ད) (delete @-1)  "དྷ")
215	     ((= @-1 ?ཌ) (delete @-1)  "ཌྷ")
216	     ((= @-1 ?བ) (delete @-1)  "བྷ")
217	     ((= @-1 ?ཛ) (delete @-1)  "ཛྷ")
218	     ((= @-1 ?ྒ) (delete @-1) "ྒྷ")
219	     ((= @-1 ?ྡ) (delete @-1)  "ྡྷ")
220	     ((= @-1 ?ྜ) (delete @-1)  "ྜྷ")
221	     ((= @-1 ?ྦ) (delete @-1)  "ྦྷ")
222	     ((= @-1 ?ྫ) (delete @-1)  "ྫྷ")
223	     (1 "ྷ")))
224  ("Sh" (cond ((= precomposed 0) "ྵ")
225	      ((= @-1 ?ཀ) (delete @-1) "ཀྵ")
226	      ((= @-1 ?ྐ) (delete @-1) "ྐྵ")
227	      (1 "ྵ")))
228  ("v" "ྦ༹")
229  ("f" "ྥ༹")
230  ("W" "ྺ")
231  ("Y" "ྻ")
232  ("R" "ྼ")
233  ("Z" "༹"))
234
235 (vowel
236  ("a" "")
237  ("-i" (cond ((= precomposed 0) "ྀ")
238	      ((= @-1 ?ྲ) "ྲྀ")
239	      ((= @-1 ?ླ) "ླྀ")
240	      (1 "ྀ")))
241  ("u" "ུ")
242  ("e" "ེ")
243  ("o" "ོ")
244  ("i" "ི")
245  ("A" "ཱ")
246  ("I" "ཱི")
247  ("U" "ཱུ")
248  ("ai" "ཻ")
249  ("au" "ཽ")
250  ("uo" "ོུ")
251  ("ui" "ིུ")
252  ("ue" "ེུ")
253  ("r-I" "ྲཱྀ")
254  ("l-I" "ླཱྀ")
255  ("-I" "ཱྀ"))
256
257 (others
258  ;; Numbers
259  ("0" "༠")
260  ("1" "༡")
261  ("2" "༢")
262  ("3" "༣")
263  ("4" "༤")
264  ("5" "༥")
265  ("6" "༦")
266  ("7" "༧")
267  ("8" "༨")
268  ("9" "༩")
269  ;; These half numbers are not in EWTS but for convenience.
270  ("-1" "༪")
271  ("-2" "༫")
272  ("-3" "༬")
273  ("-4" "༭")
274  ("-5" "༮")
275  ("-6" "༯")
276  ("-7" "༰")
277  ("-8" "༱")
278  ("-9" "༲")
279  ("-0" "༳")
280  ;; Sanskrit-related marks
281  ("H" "ཿ")
282  ("M" "ཾ")
283  ("~M" "ྃ")
284  ("~M`" "ྂ")
285  ("?" "྄")
286  ("&" "྅")
287  ;; Intersyllabic marks
288  (" " "་")
289  ("*" "༌")
290  ;; Phrase deliminating marks
291  ("/" "།")
292  ("//" "༎")
293  (";" "༏")
294  ("|" "༑")
295  ("!" "༈")
296  (":" "༔")
297  ("_" " ")
298  ("=" "༴")
299  ;; Head marks
300  ("@" "༄")
301  ("#" "༅")
302  ("$" "༆")
303  ("%" "༇")
304  ;; Paired punctuation marks (brackets)
305  ("<" "༺")
306  (">" "༻")
307  ("(" "༼")
308  (")" "༽")
309  ;; Miscellaneous
310  ("~X" "༵")
311  ("X" "༷")
312  ("^" "༹"))
313
314 (escape
315  ("\\" (mark BEG) "\\"))
316
317 (non-tibetan
318  ("["))
319
320 (unicode
321  ("u" "u" (set MAX-COUNT 4))
322  ("U" "U" (set MAX-COUNT 8)))
323
324 (unescape
325  ("]"))
326
327 (hexadigit
328  ("0" "0")
329  ("1" "1")
330  ("2" "2")
331  ("3" "3")
332  ("4" "4")
333  ("5" "5")
334  ("6" "6")
335  ("7" "7")
336  ("8" "8")
337  ("9" "9")
338  ("A" "A") ("a" "A")
339  ("B" "B") ("b" "B")
340  ("C" "C") ("c" "C")
341  ("D" "D") ("d" "D")
342  ("E" "E") ("e" "E")
343  ("F" "F") ("f" "F"))
344
345 (backspace
346  ((Backspace) (undo))))
347
348(state
349 (init
350  (consonant (shift after-consonant))
351  (standard-stack (shift after-consonant))
352  (vowel (move @<) "ཨ" (move @>) (shift after-vowel))
353  (others)
354  (escape (shift dispatch-escape))
355  (non-tibetan (shift non-tibetan))
356  (backspace))
357
358 (after-consonant
359  (force-stack (shift consonant-stack))
360  (break-stack (shift init))
361  (vowel (shift after-vowel)))
362
363 (consonant-stack
364  (special-subjoined (shift after-consonant))
365  (consonant (set C @-1) (delete @-1) (add C #x50) (insert C)
366	     (shift after-consonant))
367  (force-stack)
368  (nil (pop)))
369
370 (after-vowel
371  (force-stack (shift vowel-stack)))
372
373 (vowel-stack
374  (vowel (shift after-vowel)))
375
376 (dispatch-escape
377  (unicode (shift unicode))
378  (nil (delete BEG) (shift init) (unhandle)))
379
380 (unicode
381  (t (set COUNT 0) (set UNICODE 0))
382  (hexadigit (cond ((> @-1 ?9) (set UNICODE (+ (* UNICODE 16) (- @-1 55))))
383		   (1 (set UNICODE (+ (* UNICODE 16) (- @-1 48)))))
384	     (set COUNT (+ COUNT 1))
385	     (cond ((= COUNT MAX-COUNT) (delete BEG) (insert UNICODE)
386		    (shift init)))))
387
388 (non-tibetan
389  (unescape (shift init))
390  (nil (unhandle))))
391
392;; Local Variables:
393;; mode: lisp
394;; End:
395