1/* 2Hungarian Stemmer 3Removes noun inflections 4*/ 5 6routines ( 7 mark_regions 8 R1 9 v_ending 10 case 11 case_special 12 case_other 13 plural 14 owned 15 sing_owner 16 plur_owner 17 instrum 18 factive 19 undouble 20 double 21) 22 23externals ( stem ) 24 25integers ( p1 ) 26groupings ( v ) 27 28stringescapes {} 29 30/* special characters */ 31 32stringdef a' '{U+00E1}' //a-acute 33stringdef e' '{U+00E9}' //e-acute 34stringdef i' '{U+00ED}' //i-acute 35stringdef o' '{U+00F3}' //o-acute 36stringdef o" '{U+00F6}' //o-umlaut 37stringdef oq '{U+0151}' //o-double acute 38stringdef u' '{U+00FA}' //u-acute 39stringdef u" '{U+00FC}' //u-umlaut 40stringdef uq '{U+0171}' //u-double acute 41 42define v 'aeiou{a'}{e'}{i'}{o'}{o"}{oq}{u'}{u"}{uq}' 43 44define mark_regions as ( 45 46 $p1 = limit 47 48 (v goto non-v 49 among('cs' 'gy' 'ly' 'ny' 'sz' 'ty' 'zs' 'dzs') or next 50 setmark p1) 51 or 52 53 (non-v gopast v setmark p1) 54) 55 56backwardmode ( 57 58 define R1 as $p1 <= cursor 59 60 define v_ending as ( 61 [substring] R1 among( 62 '{a'}' (<- 'a') 63 '{e'}' (<- 'e') 64 ) 65 ) 66 67 define double as ( 68 test among('bb' 'cc' 'ccs' 'dd' 'ff' 'gg' 'ggy' 'jj' 'kk' 'll' 'lly' 'mm' 69 'nn' 'nny' 'pp' 'rr' 'ss' 'ssz' 'tt' 'tty' 'vv' 'zz' 'zzs') 70 ) 71 72 define undouble as ( 73 next [hop 1] delete 74 ) 75 76 define instrum as( 77 [substring] R1 among( 78 'al' (double) 79 'el' (double) 80 ) 81 delete 82 undouble 83 ) 84 85 86 define case as ( 87 [substring] R1 among( 88 'ban' 'ben' 89 'ba' 'be' 90 'ra' 're' 91 'nak' 'nek' 92 'val' 'vel' 93 't{o'}l' 't{oq}l' 94 'r{o'}l' 'r{oq}l' 95 'b{o'}l' 'b{oq}l' 96 'hoz' 'hez' 'h{o"}z' 97 'n{a'}l' 'n{e'}l' 98 'ig' 99 'at' 'et' 'ot' '{o"}t' 100 '{e'}rt' 101 'k{e'}pp' 'k{e'}ppen' 102 'kor' 103 'ul' '{u"}l' 104 'v{a'}' 'v{e'}' 105 'onk{e'}nt' 'enk{e'}nt' 'ank{e'}nt' 106 'k{e'}nt' 107 'en' 'on' 'an' '{o"}n' 108 'n' 109 't' 110 ) 111 delete 112 v_ending 113 ) 114 115 define case_special as( 116 [substring] R1 among( 117 '{e'}n' (<- 'e') 118 '{a'}n' (<- 'a') 119 '{a'}nk{e'}nt' (<- 'a') 120 ) 121 ) 122 123 define case_other as( 124 [substring] R1 among( 125 'astul' 'est{u"}l' (delete) 126 'stul' 'st{u"}l' (delete) 127 '{a'}stul' (<- 'a') 128 '{e'}st{u"}l' (<- 'e') 129 ) 130 ) 131 132 define factive as( 133 [substring] R1 among( 134 '{a'}' (double) 135 '{e'}' (double) 136 ) 137 delete 138 undouble 139 ) 140 141 define plural as ( 142 [substring] R1 among( 143 '{a'}k' (<- 'a') 144 '{e'}k' (<- 'e') 145 '{o"}k' (delete) 146 'ak' (delete) 147 'ok' (delete) 148 'ek' (delete) 149 'k' (delete) 150 ) 151 ) 152 153 define owned as ( 154 [substring] R1 among ( 155 'ok{e'}' '{o"}k{e'}' 'ak{e'}' 'ek{e'}' (delete) 156 '{e'}k{e'}' (<- 'e') 157 '{a'}k{e'}' (<- 'a') 158 'k{e'}' (delete) 159 '{e'}{e'}i' (<- 'e') 160 '{a'}{e'}i' (<- 'a') 161 '{e'}i' (delete) 162 '{e'}{e'}' (<- 'e') 163 '{e'}' (delete) 164 ) 165 ) 166 167 define sing_owner as ( 168 [substring] R1 among( 169 '{u"}nk' 'unk' (delete) 170 '{a'}nk' (<- 'a') 171 '{e'}nk' (<- 'e') 172 'nk' (delete) 173 '{a'}juk' (<- 'a') 174 '{e'}j{u"}k' (<- 'e') 175 'juk' 'j{u"}k' (delete) 176 'uk' '{u"}k' (delete) 177 'em' 'om' 'am' (delete) 178 '{a'}m' (<- 'a') 179 '{e'}m' (<- 'e') 180 'm' (delete) 181 'od' 'ed' 'ad' '{o"}d' (delete) 182 '{a'}d' (<- 'a') 183 '{e'}d' (<- 'e') 184 'd' (delete) 185 'ja' 'je' (delete) 186 'a' 'e' 'o' (delete) 187 '{a'}' (<- 'a') 188 '{e'}' (<- 'e') 189 ) 190 ) 191 192 define plur_owner as ( 193 [substring] R1 among( 194 'jaim' 'jeim' (delete) 195 '{a'}im' (<- 'a') 196 '{e'}im' (<- 'e') 197 'aim' 'eim' (delete) 198 'im' (delete) 199 'jaid' 'jeid' (delete) 200 '{a'}id' (<- 'a') 201 '{e'}id' (<- 'e') 202 'aid' 'eid' (delete) 203 'id' (delete) 204 'jai' 'jei' (delete) 205 '{a'}i' (<- 'a') 206 '{e'}i' (<- 'e') 207 'ai' 'ei' (delete) 208 'i' (delete) 209 'jaink' 'jeink' (delete) 210 'eink' 'aink' (delete) 211 '{a'}ink' (<- 'a') 212 '{e'}ink' (<- 'e') 213 'ink' 214 'jaitok' 'jeitek' (delete) 215 'aitok' 'eitek' (delete) 216 '{a'}itok' (<- 'a') 217 '{e'}itek' (<- 'e') 218 'itek' (delete) 219 'jeik' 'jaik' (delete) 220 'aik' 'eik' (delete) 221 '{a'}ik' (<- 'a') 222 '{e'}ik' (<- 'e') 223 'ik' (delete) 224 ) 225 ) 226) 227 228define stem as ( 229 do mark_regions 230 backwards ( 231 do instrum 232 do case 233 do case_special 234 do case_other 235 do factive 236 do owned 237 do sing_owner 238 do plur_owner 239 do plural 240 ) 241) 242