1""" 2 sphinx.search.fr 3 ~~~~~~~~~~~~~~~~ 4 5 French search language: includes the JS French stemmer. 6 7 :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. 8 :license: BSD, see LICENSE for details. 9""" 10 11from typing import Dict 12 13import snowballstemmer 14 15from sphinx.search import SearchLanguage, parse_stop_word 16 17french_stopwords = parse_stop_word(''' 18| source: http://snowball.tartarus.org/algorithms/french/stop.txt 19au | a + le 20aux | a + les 21avec | with 22ce | this 23ces | these 24dans | with 25de | of 26des | de + les 27du | de + le 28elle | she 29en | `of them' etc 30et | and 31eux | them 32il | he 33je | I 34la | the 35le | the 36leur | their 37lui | him 38ma | my (fem) 39mais | but 40me | me 41même | same; as in moi-même (myself) etc 42mes | me (pl) 43moi | me 44mon | my (masc) 45ne | not 46nos | our (pl) 47notre | our 48nous | we 49on | one 50ou | where 51par | by 52pas | not 53pour | for 54qu | que before vowel 55que | that 56qui | who 57sa | his, her (fem) 58se | oneself 59ses | his (pl) 60son | his, her (masc) 61sur | on 62ta | thy (fem) 63te | thee 64tes | thy (pl) 65toi | thee 66ton | thy (masc) 67tu | thou 68un | a 69une | a 70vos | your (pl) 71votre | your 72vous | you 73 74 | single letter forms 75 76c | c' 77d | d' 78j | j' 79l | l' 80à | to, at 81m | m' 82n | n' 83s | s' 84t | t' 85y | there 86 87 | forms of être (not including the infinitive): 88été 89étée 90étées 91étés 92étant 93suis 94es 95est 96sommes 97êtes 98sont 99serai 100seras 101sera 102serons 103serez 104seront 105serais 106serait 107serions 108seriez 109seraient 110étais 111était 112étions 113étiez 114étaient 115fus 116fut 117fûmes 118fûtes 119furent 120sois 121soit 122soyons 123soyez 124soient 125fusse 126fusses 127fût 128fussions 129fussiez 130fussent 131 132 | forms of avoir (not including the infinitive): 133ayant 134eu 135eue 136eues 137eus 138ai 139as 140avons 141avez 142ont 143aurai 144auras 145aura 146aurons 147aurez 148auront 149aurais 150aurait 151aurions 152auriez 153auraient 154avais 155avait 156avions 157aviez 158avaient 159eut 160eûmes 161eûtes 162eurent 163aie 164aies 165ait 166ayons 167ayez 168aient 169eusse 170eusses 171eût 172eussions 173eussiez 174eussent 175 176 | Later additions (from Jean-Christophe Deschamps) 177ceci | this 178cela | that (added 11 Apr 2012. Omission reported by Adrien Grand) 179celà | that (incorrect, though common) 180cet | this 181cette | this 182ici | here 183ils | they 184les | the (pl) 185leurs | their (pl) 186quel | which 187quels | which 188quelle | which 189quelles | which 190sans | without 191soi | oneself 192''') 193 194 195class SearchFrench(SearchLanguage): 196 lang = 'fr' 197 language_name = 'French' 198 js_stemmer_rawcode = 'french-stemmer.js' 199 stopwords = french_stopwords 200 201 def init(self, options: Dict) -> None: 202 self.stemmer = snowballstemmer.stemmer('french') 203 204 def stem(self, word: str) -> str: 205 return self.stemmer.stemWord(word.lower()) 206