1# Copyright (C) 2017, 2018  Olga Yakovleva <yakovleva.o.v@gmail.com>
2
3# This program is free software: you can redistribute it and/or modify
4# it under the terms of the GNU Lesser General Public License as published by
5# the Free Software Foundation, either version 2.1 of the License, or
6# (at your option) any later version.
7
8# This program is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11# GNU Lesser General Public License for more details.
12
13# You should have received a copy of the GNU Lesser General Public License
14# along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
16read spaced-text dict-prefixes.txt
17define PrefixDict0
18clear
19
20read spaced-text dict-words.txt
21define WordDict0
22clear
23
24define RussianVTranscription в -> v ;
25
26define PrefixDict PrefixDict0 .o. RussianVTranscription ;
27define WordDict WordDict0 .o. RussianVTranscription ;
28
29define TatarVowelLetter а|ә|е|ё|и|о|ө|у|ү|ы|э|ю|я|і ;
30define TatarConsonantLetter б|в|г|д|ж|җ|з|й|к|л|м|н|ң|п|р|с|т|ф|х|һ|ц|ч|ш|щ|ğ|q|w|ғ|қ ;
31define TatarLetter TatarVowelLetter | TatarConsonantLetter | ь | ъ ;
32
33define Prefix0 PrefixDict.u ;
34define Prefix(x) [x .o. [x [TatarLetter+]:0]].l ;
35define Prefix1 Prefix(Prefix0) ;
36define UnambigPrefix1 Prefix0 - Prefix1 ;
37define PrefixRule1 [UnambigPrefix1 .o. PrefixDict] TatarLetter* ;
38define Prefix2 Prefix(Prefix1) ;
39define UnambigPrefix2 Prefix1 - Prefix2 ;
40define PrefixRule2 [UnambigPrefix2 .o. PrefixDict] TatarLetter* ;
41define Prefix3 Prefix(Prefix2) ;
42define UnambigPrefix3 Prefix2 - Prefix3 ;
43define PrefixRule3 [UnambigPrefix3 .o. PrefixDict] TatarLetter* ;
44define Prefix4 Prefix(Prefix3) ;
45define UnambigPrefix4 Prefix3 - Prefix4 ;
46define PrefixRule4 [UnambigPrefix4 .o. PrefixDict] TatarLetter* ;
47define Prefix5 Prefix(Prefix4) ;
48define UnambigPrefix5 Prefix4 - Prefix5 ;
49define PrefixRule5 [UnambigPrefix5 .o. PrefixDict] TatarLetter* ;
50define Prefix6 Prefix(Prefix5) ;
51define UnambigPrefix6 Prefix5 - Prefix6 ;
52define PrefixRule6 [UnambigPrefix6 .o. PrefixDict] TatarLetter* ;
53
54define PrefixRule PrefixRule1 .P. PrefixRule2 .P. PrefixRule3 .P. PrefixRule4 .P. PrefixRule5 .P. PrefixRule6 ;
55
56define LatinVowelLetter a|e|i|o|u ;
57define LatinConsonantLetter b|v|w|g|ğ|d|j|c|z|y|k|q|l|m|n|p|r|s|t|f|x|h ;
58define LatinLetter LatinVowelLetter | LatinConsonantLetter ;
59
60define TatarWord WordDict .P. PrefixRule .P. [[TatarLetter|LatinLetter]+] ;
61
62define TatarConsonant b|v|g|d|zh|zhj|z|j|k|l|m|n|ng|p|r|s|t|f|hh|h|shj|sh|w ;
63define TatarVowel a|af|e|i|y|o|of|u|uf ;
64
65define TatarBasicConsonantTranscription б:b|в:w|[г|ғ]:g|д:d|[ж|j]:zh|җ:zhj|з:z|й:j|к:k|л:l|м:m|н:n|ң:ng|п:p|р:r|с:s|т:t|ф:f|х:hh|һ:h|ц:c|ч:shj|ш:sh|щ:shj|ğ:gh|[q|қ]:kh|w:w|x:{ks}|c:{ts} ;
66define TatarBasicConsonantsTranscription TatarBasicConsonantTranscription -> ;
67
68define TatarIntervocalicCTranscription c -> {ts} || TatarVowelLetter _ TatarVowelLetter ;
69define TatarDefaultCTranscription c -> s ;
70
71define TatarBasicVowelTranscription а:a|ә:af|е:je|ё:jo|[и|і]:i|о:o|ө:of|у:u|ү:uf|ы:y|э:e|ю:ju|я:ja|y:i ;
72define TatarBasicVowelsTranscription TatarBasicVowelTranscription -> ;
73
74define TatarIntervocalicWInsertion
75[..] -> w || u|uf _ TatarVowel ;
76
77define TatarJVowelAsFront [0:jj] [je:e|jo:of|ju:uf|ja:af] ;
78
79define TatarJVowelAsBack [0:jj] [je:y|jo:o|ju:u|ja:a] ;
80
81define TatarJVowelsAfterConsonants je:e|ju:uf|ja:af|jo:of -> || TatarConsonant _ ;
82
83define TatarFrontVowel af|e|i|of|uf ;
84
85define TatarBackVowel a|o|u|y ;
86
87define TatarJVowelsBeforeSoftSignTranscription
88TatarJVowelAsFront -> || _ TatarConsonant* ь ;
89
90define TatarJVowelsForwardSearchTranscription
91TatarJVowelAsFront -> || _ [\TatarBackVowel]* TatarFrontVowel ,,
92TatarJVowelAsBack -> || _ [\TatarFrontVowel]* TatarBackVowel ;
93
94define TatarJVowelsBackwardSearchTranscription
95TatarJVowelAsFront -> || TatarFrontVowel [\TatarBackVowel]* _ ,,
96TatarJVowelAsBack -> || TatarBackVowel [\TatarFrontVowel]* _ ;
97
98define TatarJVowelSequenceTranscription
99TatarJVowelAsFront^>1 @-> ;
100
101define TatarJVowelsDefaultTranscription
102TatarJVowelAsBack -> ;
103
104define TatarUjeException
105jj -> w || uf _ e ;
106
107define TatarJBeforeJVowels
108jj -> j ;
109
110define TatarUvularStopsTranscription
111k:kh|g:gh -> || _ ъ|TatarBackVowel , TatarBackVowel _ TatarConsonant+ TatarBackVowel | .#. ;
112
113define TatarSoftAndHardSignsRemoval ь|ъ -> 0 || _ ;
114
115define TatarNonSyllabicUToW
116[u|uf] -> w || TatarVowel _ ;
117
118define TatarVInSurnameLeftContext ? ? ? [о|е] ;
119
120define TatarVInSurnamesOfWomen
121в -> v || TatarVInSurnameLeftContext _ (н) а [[н|г|д|л|м|ң|с]|.#.] ;
122
123define TatarVInSurnamesOfMen
124в -> v || TatarVInSurnameLeftContext _ [ы] | [{ич}] ,,
125в -> f || TatarVInSurnameLeftContext _ [[н|к|т|л] | .#.] ;
126
127define TatarTranscription
128TatarWord .o.
129TatarVInSurnamesOfWomen .o.
130TatarVInSurnamesOfMen .o.
131TatarBasicConsonantsTranscription .o.
132TatarIntervocalicCTranscription .o.
133TatarDefaultCTranscription .o.
134TatarBasicVowelsTranscription .o.
135TatarJVowelsAfterConsonants .o.
136TatarJVowelsBeforeSoftSignTranscription .o.
137TatarJVowelsForwardSearchTranscription .o.
138TatarJVowelsBackwardSearchTranscription .o.
139TatarJVowelSequenceTranscription .o.
140TatarJVowelsDefaultTranscription .o.
141TatarUjeException .o.
142TatarIntervocalicWInsertion .o.
143TatarJBeforeJVowels .o.
144TatarUvularStopsTranscription .o.
145TatarNonSyllabicUToW .o.
146TatarSoftAndHardSignsRemoval ;
147
148define TatarBasicPromptConsonantTranscription  [в:v] .P. TatarBasicConsonantTranscription ;
149define TatarBasicPromptConsonantsTranscription TatarBasicPromptConsonantTranscription -> ;
150
151define TatarPromptTranscription
152[TatarLetter+] .o.
153TatarBasicPromptConsonantsTranscription .o.
154TatarIntervocalicCTranscription .o.
155TatarDefaultCTranscription .o.
156TatarBasicVowelsTranscription .o.
157TatarJVowelsAfterConsonants .o.
158TatarJVowelsBeforeSoftSignTranscription .o.
159TatarJVowelsForwardSearchTranscription .o.
160TatarJVowelsBackwardSearchTranscription .o.
161TatarJVowelSequenceTranscription .o.
162TatarJVowelsDefaultTranscription .o.
163TatarUjeException .o.
164TatarIntervocalicWInsertion .o.
165TatarJBeforeJVowels .o.
166TatarUvularStopsTranscription .o.
167TatarNonSyllabicUToW .o.
168TatarSoftAndHardSignsRemoval ;
169
170regex TatarTranscription ;
171