1/* Stemmer for Esperanto in UTF-8 */
2
3strings ()
4
5integers ()
6
7booleans ( foreign )
8
9routines (
10    apostrophe
11    canonical_form
12    correlative
13    interjection
14    short_word
15    standard_suffix
16    unuj
17)
18
19externals ( stem )
20
21groupings ( vowel aiou ao ou )
22
23stringdef a' decimal '225'
24stringdef e' hex 'E9'
25stringdef i' hex 'ED'
26stringdef o' hex ' f3'
27stringdef u' hex 'fa '
28
29stringdef cx hex '0109'
30stringdef gx hex '011D'
31stringdef hx hex '0125'
32stringdef jx hex '0135'
33stringdef sx hex '015D'
34stringdef ux hex '016D'
35
36define canonical_form as repeat (
37    [substring]
38    among (
39stringescapes //
40        '/a'/' (<- 'a' set foreign)
41        '/e'/' (<- 'e' set foreign)
42        '/i'/' (<- 'i' set foreign)
43        '/o'/' (<- 'o' set foreign)
44        '/u'/' (<- 'u' set foreign)
45stringescapes `'
46        'cx' (<- '`cx'')
47        'gx' (<- '`gx'')
48        'hx' (<- '`hx'')
49        'jx' (<- '`jx'')
50        'sx' (<- '`sx'')
51        'ux' (<- '`ux'')
52        '' (next)
53    )
54)
55
56backwardmode (
57    stringescapes { }
58
59    define apostrophe as (
60        (['un{'}'] atlimit <- 'unu') or
61        (['l{'}'] atlimit <- 'la') or
62        (['{'}'] <- 'o')
63    )
64
65    define vowel 'aeiou'
66    define aiou vowel - 'e'
67    define ao 'ao'
68    define ou 'ou'
69
70    define short_word as not (loop (maxint * 0 + 4 / 2) gopast vowel)
71
72    define interjection as (
73        among ('adia{ux}' 'aha' 'amen' 'hola' 'hura' 'mia{ux}' 'muu' 'oho')
74        atlimit
75    )
76
77    define correlative as (
78        []
79        // Ignore -al, -am, etc. since they can't be confused with suffixes.
80        test (
81            ('a' or (try 'n'] 'e') or (try 'n' try 'j'] ou))
82            'i'
83            try ('k' or 't' or '{cx}' or 'nen')
84            atlimit
85        )
86        delete
87    )
88
89    define unuj as (
90        [try 'n' 'j'] 'unu' atlimit delete
91    )
92
93    define standard_suffix as (
94        [
95        try ((try 'n' try 'j' ao) or (try 's' aiou) or (try 'n' 'e'))
96        try '-' try 'a{ux}'
97        ] delete
98    )
99)
100
101define stem as (
102    do canonical_form
103    not foreign
104    backwards (
105        do apostrophe
106        short_word or interjection or
107        correlative or unuj or do standard_suffix
108    )
109)
110