1/*
2Hungarian Stemmer
3Removes noun inflections
4*/
5
6routines (
7    mark_regions
8    R1
9    v_ending
10    case
11    case_special
12    case_other
13    plural
14    owned
15    sing_owner
16    plur_owner
17    instrum
18    factive
19    undouble
20    double
21)
22
23externals ( stem )
24
25integers ( p1 )
26groupings ( v )
27
28stringescapes {}
29
30/* special characters */
31
32stringdef a'  '{U+00E1}'  //a-acute
33stringdef e'  '{U+00E9}'  //e-acute
34stringdef i'  '{U+00ED}'  //i-acute
35stringdef o'  '{U+00F3}'  //o-acute
36stringdef o"  '{U+00F6}'  //o-umlaut
37stringdef oq  '{U+0151}' //o-double acute
38stringdef u'  '{U+00FA}'  //u-acute
39stringdef u"  '{U+00FC}'  //u-umlaut
40stringdef uq  '{U+0171}' //u-double acute
41
42define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}'
43
44define mark_regions as (
45
46    $p1 = limit
47
48    (v goto non-v
49     among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next
50     setmark p1)
51    or
52
53    (non-v gopast v setmark p1)
54)
55
56backwardmode (
57
58    define R1 as $p1 <= cursor
59
60    define v_ending as (
61        [substring] R1 among(
62            '{a'}' (<- 'a')
63            '{e'}' (<- 'e')
64        )
65    )
66
67    define double as (
68        test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm'
69        'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs')
70    )
71
72    define undouble as (
73        next [hop 1] delete
74    )
75
76    define instrum as(
77        [substring] R1 among(
78            'al' (double)
79            'el' (double)
80        )
81        delete
82        undouble
83    )
84
85
86    define case as (
87        [substring] R1 among(
88            'ban' 'ben'
89            'ba' 'be'
90            'ra' 're'
91            'nak' 'nek'
92            'val' 'vel'
93            't{o'}l' 't{oq}l'
94            'r{o'}l' 'r{oq}l'
95            'b{o'}l' 'b{oq}l'
96            'hoz' 'hez' 'h{o"}z'
97            'n{a'}l' 'n{e'}l'
98            'ig'
99            'at' 'et' 'ot' '{o"}t'
100            '{e'}rt'
101            'k{e'}pp' 'k{e'}ppen'
102            'kor'
103            'ul' '{u"}l'
104            'v{a'}' 'v{e'}'
105            'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt'
106            'k{e'}nt'
107            'en' 'on' 'an' '{o"}n'
108            'n'
109            't'
110        )
111        delete
112        v_ending
113    )
114
115    define case_special as(
116        [substring] R1 among(
117            '{e'}n' (<- 'e')
118            '{a'}n' (<- 'a')
119            '{a'}nk{e'}nt' (<- 'a')
120        )
121    )
122
123    define case_other as(
124        [substring] R1 among(
125            'astul' 'est{u"}l' (delete)
126            'stul' 'st{u"}l' (delete)
127            '{a'}stul' (<- 'a')
128            '{e'}st{u"}l' (<- 'e')
129        )
130    )
131
132    define factive as(
133        [substring] R1 among(
134            '{a'}' (double)
135            '{e'}' (double)
136        )
137        delete
138        undouble
139    )
140
141    define plural as (
142        [substring] R1 among(
143            '{a'}k' (<- 'a')
144            '{e'}k' (<- 'e')
145            '{o"}k' (delete)
146            'ak' (delete)
147            'ok' (delete)
148            'ek' (delete)
149            'k' (delete)
150        )
151    )
152
153    define owned as (
154        [substring] R1 among (
155            'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete)
156            '{e'}k{e'}' (<- 'e')
157            '{a'}k{e'}' (<- 'a')
158            'k{e'}' (delete)
159            '{e'}{e'}i' (<- 'e')
160            '{a'}{e'}i' (<- 'a')
161            '{e'}i'  (delete)
162            '{e'}{e'}' (<- 'e')
163            '{e'}' (delete)
164        )
165    )
166
167    define sing_owner as (
168        [substring] R1 among(
169            '{u"}nk' 'unk' (delete)
170            '{a'}nk' (<- 'a')
171            '{e'}nk' (<- 'e')
172            'nk' (delete)
173            '{a'}juk' (<- 'a')
174            '{e'}j{u"}k' (<- 'e')
175            'juk' 'j{u"}k' (delete)
176            'uk' '{u"}k' (delete)
177            'em' 'om' 'am' (delete)
178            '{a'}m' (<- 'a')
179            '{e'}m' (<- 'e')
180            'm' (delete)
181            'od' 'ed' 'ad' '{o"}d' (delete)
182            '{a'}d' (<- 'a')
183            '{e'}d' (<- 'e')
184            'd' (delete)
185            'ja' 'je' (delete)
186            'a' 'e' 'o' (delete)
187            '{a'}' (<- 'a')
188            '{e'}' (<- 'e')
189        )
190    )
191
192    define plur_owner as (
193        [substring] R1 among(
194            'jaim' 'jeim' (delete)
195            '{a'}im' (<- 'a')
196            '{e'}im' (<- 'e')
197            'aim' 'eim' (delete)
198            'im' (delete)
199            'jaid' 'jeid' (delete)
200            '{a'}id' (<- 'a')
201            '{e'}id' (<- 'e')
202            'aid' 'eid' (delete)
203            'id' (delete)
204            'jai' 'jei' (delete)
205            '{a'}i' (<- 'a')
206            '{e'}i' (<- 'e')
207            'ai' 'ei' (delete)
208            'i' (delete)
209            'jaink' 'jeink' (delete)
210            'eink' 'aink' (delete)
211            '{a'}ink' (<- 'a')
212            '{e'}ink' (<- 'e')
213            'ink'
214            'jaitok' 'jeitek' (delete)
215            'aitok' 'eitek' (delete)
216            '{a'}itok' (<- 'a')
217            '{e'}itek' (<- 'e')
218            'itek' (delete)
219            'jeik' 'jaik' (delete)
220            'aik' 'eik' (delete)
221            '{a'}ik' (<- 'a')
222            '{e'}ik' (<- 'e')
223            'ik' (delete)
224        )
225    )
226)
227
228define stem as (
229    do mark_regions
230    backwards (
231      do instrum
232        do case
233        do case_special
234        do case_other
235        do factive
236        do owned
237        do sing_owner
238        do plur_owner
239        do plural
240    )
241)
242