1# Copyright (C) 2016, 2018 Olga Yakovleva <yakovleva.o.v@gmail.com> 2 3# This program is free software: you can redistribute it and/or modify 4# it under the terms of the GNU Lesser General Public License as published by 5# the Free Software Foundation, either version 2.1 of the License, or 6# (at your option) any later version. 7 8# This program is distributed in the hope that it will be useful, 9# but WITHOUT ANY WARRANTY; without even the implied warranty of 10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11# GNU Lesser General Public License for more details. 12 13# You should have received a copy of the GNU Lesser General Public License 14# along with this program. If not, see <http://www.gnu.org/licenses/>. 15 16define Consonant 17б|г|д|ж|җ|з|к|л|м|н|п|р|с|т|ф|х|һ|ц|ч|ш|щ|й|в|ң|ғ|қ|Б|Г|Д|Ж|Җ|З|К|Л|М|Н|П|Р|С|Т|Ф|Х|Һ|Ц|Ч|Ш|Щ|Й|В|Ң|Ғ|Қ | 18b|v|w|g|ğ|d|j|c|z|y|k|q|l|m|n|p|r|s|t|f|x|h | 19B|V|W|G|Ğ|D|J|C|Z|Y|K|Q|L|M|N|P|R|S|T|F|X|H ; 20 21define Vowel 22а|ә|е|и|о|у|ю|я|ы|э|ө|ү|і|А|Ә|Е|И|І|О|У|Ю|Я|Ы|Э|Ө|Ү|І | 23a|e|i|o|u | 24A|E|I|O|U ; 25 26define Letter 27Consonant|Vowel|ь|ъ; 28 29define Word 30Letter+ ; 31 32define NZDigit 1|2|3|4|5|6|7|8|9; 33define Digit %0|NZDigit; 34 35define Number %0 | [NZDigit Digit*]; 36 37define ConsonantSequence 38Consonant+ ; 39 40define LetterSequence 41ConsonantSequence | [ь|ъ]+ ; 42 43define BasicTokenizer 44Word @-> ... word || _ ,, 45NZDigit Digit* @-> ... num ,, 46%0 Digit+ @-> ... dig ,, 47%0 @-> ... num ,, 48\[Letter | Digit] @-> ... sym ; 49 50define TagLetterSequences 51word -> lseq || [.#.|word|num|dig|sym] LetterSequence _ ; 52 53define UntagLargeNumbers 54num -> dig || Digit^13 _ ; 55 56regex 57BasicTokenizer .o. 58TagLetterSequences .o. 59UntagLargeNumbers ; 60