1
2routines (
3           prelude postlude mark_regions
4           RV R1 R2
5           step_0
6           standard_suffix combo_suffix
7           verb_suffix
8           vowel_suffix
9)
10
11externals ( stem )
12
13integers ( pV p1 p2 )
14
15groupings ( v )
16
17booleans  ( standard_suffix_removed )
18
19stringescapes {}
20
21/* special characters */
22
23stringdef a^   '{U+00E2}'  // a circumflex
24stringdef i^   '{U+00EE}'  // i circumflex
25stringdef a+   '{U+0103}'  // a breve
26stringdef s,   '{U+015F}'  // s cedilla
27stringdef t,   '{U+0163}'  // t cedilla
28
29define v 'aeiou{a^}{i^}{a+}'
30
31define prelude as (
32    repeat goto (
33        v [ ('u' ] v <- 'U') or
34            ('i' ] v <- 'I')
35    )
36)
37
38define mark_regions as (
39
40    $pV = limit
41    $p1 = limit
42    $p2 = limit // defaults
43
44    do (
45        ( v (non-v gopast v) or (v gopast non-v) )
46        or
47        ( non-v (non-v gopast v) or (v next) )
48        setmark pV
49    )
50    do (
51        gopast v gopast non-v setmark p1
52        gopast v gopast non-v setmark p2
53    )
54)
55
56define postlude as repeat (
57
58    [substring] among(
59        'I'  (<- 'i')
60        'U'  (<- 'u')
61        ''   (next)
62    )
63
64)
65
66backwardmode (
67
68    define RV as $pV <= cursor
69    define R1 as $p1 <= cursor
70    define R2 as $p2 <= cursor
71
72    define step_0 as (
73        [substring] R1 among(
74            'ul' 'ului'
75                ( delete )
76            'aua'
77                ( <-'a' )
78            'ea' 'ele' 'elor'
79                ( <-'e' )
80            'ii' 'iua' 'iei' 'iile' 'iilor' 'ilor'
81                ( <-'i')
82            'ile'
83                ( not 'ab' <- 'i' )
84            'atei'
85                ( <- 'at' )
86            'a{t,}ie' 'a{t,}ia'
87                ( <- 'a{t,}i' )
88        )
89    )
90
91    define combo_suffix as test (
92        [substring] R1 (
93            among(
94            /* 'IST'. alternative: include the following
95                'alism' 'alisme'
96                'alist' 'alista' 'aliste' 'alisti' 'alist{a+}' 'ali{s,}ti' (
97                    <- 'al'
98                )
99            */
100                'abilitate' 'abilitati' 'abilit{a+}i' 'abilit{a+}{t,}i' (
101                    <- 'abil'
102                )
103                'ibilitate' (
104                    <- 'ibil'
105                )
106                'ivitate' 'ivitati' 'ivit{a+}i' 'ivit{a+}{t,}i' (
107                    <- 'iv'
108                )
109                'icitate' 'icitati' 'icit{a+}i' 'icit{a+}{t,}i'
110                'icator' 'icatori'
111                'iciv' 'iciva' 'icive' 'icivi' 'iciv{a+}'
112                'ical' 'icala' 'icale' 'icali' 'ical{a+}' (
113                    <- 'ic'
114                )
115                'ativ' 'ativa' 'ative' 'ativi' 'ativ{a+}' 'a{t,}iune'
116                'atoare' 'ator' 'atori'
117                '{a+}toare' '{a+}tor' '{a+}tori' (
118                    <- 'at'
119                )
120                'itiv' 'itiva' 'itive' 'itivi' 'itiv{a+}' 'i{t,}iune'
121                'itoare' 'itor' 'itori' (
122                    <- 'it'
123                )
124            )
125            set standard_suffix_removed
126        )
127    )
128
129    define standard_suffix as (
130        unset standard_suffix_removed
131        repeat combo_suffix
132        [substring] R2 (
133            among(
134
135                // past participle is treated here, rather than
136                // as a verb ending:
137                'at' 'ata' 'at{a+}' 'ati' 'ate'
138                'ut' 'uta' 'ut{a+}' 'uti' 'ute'
139                'it' 'ita' 'it{a+}' 'iti' 'ite'
140
141                'ic' 'ica' 'ice' 'ici' 'ic{a+}'
142                'abil' 'abila' 'abile' 'abili' 'abil{a+}'
143                'ibil' 'ibila' 'ibile' 'ibili' 'ibil{a+}'
144                'oasa' 'oas{a+}' 'oase' 'os' 'osi' 'o{s,}i'
145                'ant' 'anta' 'ante' 'anti' 'ant{a+}'
146                'ator' 'atori'
147                'itate' 'itati' 'it{a+}i' 'it{a+}{t,}i'
148                'iv' 'iva' 'ive' 'ivi' 'iv{a+}' (
149                    delete
150                )
151                'iune' 'iuni' (
152                    '{t,}'] <- 't'
153                )
154                'ism' 'isme'
155                'ist' 'ista' 'iste' 'isti' 'ist{a+}' 'i{s,}ti' (
156                    <- 'ist'
157                    /* 'IST'. alternative: remove with <- '' */
158                )
159            )
160            set standard_suffix_removed
161        )
162    )
163
164    define verb_suffix as setlimit tomark pV for (
165        [substring] among(
166            // 'long' infinitive:
167            'are' 'ere' 'ire' '{a^}re'
168
169            // gerund:
170            'ind' '{a^}nd'
171            'indu' '{a^}ndu'
172
173            'eze'
174            'easc{a+}'
175            // present:
176            'ez' 'ezi' 'eaz{a+}' 'esc' 'e{s,}ti'
177            'e{s,}te'
178            '{a+}sc' '{a+}{s,}ti'
179            '{a+}{s,}te'
180
181            // imperfect:
182            'am' 'ai' 'au'
183            'eam' 'eai' 'ea' 'ea{t,}i' 'eau'
184            'iam' 'iai' 'ia' 'ia{t,}i' 'iau'
185
186            // past: // (not 'ii')
187            'ui'
188            'a{s,}i' 'ar{a+}m' 'ar{a+}{t,}i' 'ar{a+}'
189            'u{s,}i' 'ur{a+}m' 'ur{a+}{t,}i' 'ur{a+}'
190            'i{s,}i' 'ir{a+}m' 'ir{a+}{t,}i' 'ir{a+}'
191            '{a^}i' '{a^}{s,}i' '{a^}r{a+}m' '{a^}r{a+}{t,}i' '{a^}r{a+}'
192
193            // pluferfect:
194            'asem' 'ase{s,}i' 'ase' 'aser{a+}m' 'aser{a+}{t,}i' 'aser{a+}'
195            'isem' 'ise{s,}i' 'ise' 'iser{a+}m' 'iser{a+}{t,}i' 'iser{a+}'
196            '{a^}sem' '{a^}se{s,}i' '{a^}se' '{a^}ser{a+}m' '{a^}ser{a+}{t,}i'
197            '{a^}ser{a+}'
198            'usem' 'use{s,}i' 'use' 'user{a+}m' 'user{a+}{t,}i' 'user{a+}'
199
200                ( non-v or 'u'  delete )
201
202            // present:
203            '{a+}m' 'a{t,}i'
204            'em' 'e{t,}i'
205            'im' 'i{t,}i'
206            '{a^}m' '{a^}{t,}i'
207
208            // past:
209            'se{s,}i' 'ser{a+}m' 'ser{a+}{t,}i' 'ser{a+}'
210            'sei' 'se'
211
212            // pluperfect:
213            'sesem' 'sese{s,}i' 'sese' 'seser{a+}m' 'seser{a+}{t,}i' 'seser{a+}'
214                (delete)
215        )
216    )
217
218    define vowel_suffix as (
219        [substring] RV among (
220            'a' 'e' 'i' 'ie' '{a+}' ( delete )
221        )
222    )
223)
224
225define stem as (
226    do prelude
227    do mark_regions
228    backwards (
229        do step_0
230        do standard_suffix
231        do ( standard_suffix_removed or verb_suffix )
232        do vowel_suffix
233    )
234    do postlude
235)
236
237