1/* Stemmer for Esperanto in UTF-8 */ 2 3strings () 4 5integers () 6 7booleans ( foreign ) 8 9routines ( 10 apostrophe 11 canonical_form 12 correlative 13 interjection 14 short_word 15 standard_suffix 16 unuj 17) 18 19externals ( stem ) 20 21groupings ( vowel aiou ao ou ) 22 23stringdef a' decimal '225' 24stringdef e' hex 'E9' 25stringdef i' hex 'ED' 26stringdef o' hex ' f3' 27stringdef u' hex 'fa ' 28 29stringdef cx hex '0109' 30stringdef gx hex '011D' 31stringdef hx hex '0125' 32stringdef jx hex '0135' 33stringdef sx hex '015D' 34stringdef ux hex '016D' 35 36define canonical_form as repeat ( 37 [substring] 38 among ( 39stringescapes // 40 '/a'/' (<- 'a' set foreign) 41 '/e'/' (<- 'e' set foreign) 42 '/i'/' (<- 'i' set foreign) 43 '/o'/' (<- 'o' set foreign) 44 '/u'/' (<- 'u' set foreign) 45stringescapes `' 46 'cx' (<- '`cx'') 47 'gx' (<- '`gx'') 48 'hx' (<- '`hx'') 49 'jx' (<- '`jx'') 50 'sx' (<- '`sx'') 51 'ux' (<- '`ux'') 52 '' (next) 53 ) 54) 55 56backwardmode ( 57 stringescapes { } 58 59 define apostrophe as ( 60 (['un{'}'] atlimit <- 'unu') or 61 (['l{'}'] atlimit <- 'la') or 62 (['{'}'] <- 'o') 63 ) 64 65 define vowel 'aeiou' 66 define aiou vowel - 'e' 67 define ao 'ao' 68 define ou 'ou' 69 70 define short_word as not (loop (maxint * 0 + 4 / 2) gopast vowel) 71 72 define interjection as ( 73 among ('adia{ux}' 'aha' 'amen' 'hola' 'hura' 'mia{ux}' 'muu' 'oho') 74 atlimit 75 ) 76 77 define correlative as ( 78 [] 79 // Ignore -al, -am, etc. since they can't be confused with suffixes. 80 test ( 81 ('a' or (try 'n'] 'e') or (try 'n' try 'j'] ou)) 82 'i' 83 try ('k' or 't' or '{cx}' or 'nen') 84 atlimit 85 ) 86 delete 87 ) 88 89 define unuj as ( 90 [try 'n' 'j'] 'unu' atlimit delete 91 ) 92 93 define standard_suffix as ( 94 [ 95 try ((try 'n' try 'j' ao) or (try 's' aiou) or (try 'n' 'e')) 96 try '-' try 'a{ux}' 97 ] delete 98 ) 99) 100 101define stem as ( 102 do canonical_form 103 not foreign 104 backwards ( 105 do apostrophe 106 short_word or interjection or 107 correlative or unuj or do standard_suffix 108 ) 109) 110