1"""
2    sphinx.search.fr
3    ~~~~~~~~~~~~~~~~
4
5    French search language: includes the JS French stemmer.
6
7    :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10
11from typing import Dict
12
13import snowballstemmer
14
15from sphinx.search import SearchLanguage, parse_stop_word
16
17french_stopwords = parse_stop_word('''
18| source: http://snowball.tartarus.org/algorithms/french/stop.txt
19au             |  a + le
20aux            |  a + les
21avec           |  with
22ce             |  this
23ces            |  these
24dans           |  with
25de             |  of
26des            |  de + les
27du             |  de + le
28elle           |  she
29en             |  `of them' etc
30et             |  and
31eux            |  them
32il             |  he
33je             |  I
34la             |  the
35le             |  the
36leur           |  their
37lui            |  him
38ma             |  my (fem)
39mais           |  but
40me             |  me
41même           |  same; as in moi-même (myself) etc
42mes            |  me (pl)
43moi            |  me
44mon            |  my (masc)
45ne             |  not
46nos            |  our (pl)
47notre          |  our
48nous           |  we
49on             |  one
50ou             |  where
51par            |  by
52pas            |  not
53pour           |  for
54qu             |  que before vowel
55que            |  that
56qui            |  who
57sa             |  his, her (fem)
58se             |  oneself
59ses            |  his (pl)
60son            |  his, her (masc)
61sur            |  on
62ta             |  thy (fem)
63te             |  thee
64tes            |  thy (pl)
65toi            |  thee
66ton            |  thy (masc)
67tu             |  thou
68un             |  a
69une            |  a
70vos            |  your (pl)
71votre          |  your
72vous           |  you
73
74               |  single letter forms
75
76c              |  c'
77d              |  d'
78j              |  j'
79l              |  l'
80à              |  to, at
81m              |  m'
82n              |  n'
83s              |  s'
84t              |  t'
85y              |  there
86
87               | forms of être (not including the infinitive):
88été
89étée
90étées
91étés
92étant
93suis
94es
95est
96sommes
97êtes
98sont
99serai
100seras
101sera
102serons
103serez
104seront
105serais
106serait
107serions
108seriez
109seraient
110étais
111était
112étions
113étiez
114étaient
115fus
116fut
117fûmes
118fûtes
119furent
120sois
121soit
122soyons
123soyez
124soient
125fusse
126fusses
127fût
128fussions
129fussiez
130fussent
131
132               | forms of avoir (not including the infinitive):
133ayant
134eu
135eue
136eues
137eus
138ai
139as
140avons
141avez
142ont
143aurai
144auras
145aura
146aurons
147aurez
148auront
149aurais
150aurait
151aurions
152auriez
153auraient
154avais
155avait
156avions
157aviez
158avaient
159eut
160eûmes
161eûtes
162eurent
163aie
164aies
165ait
166ayons
167ayez
168aient
169eusse
170eusses
171eût
172eussions
173eussiez
174eussent
175
176               | Later additions (from Jean-Christophe Deschamps)
177ceci           |  this
178cela           |  that (added 11 Apr 2012. Omission reported by Adrien Grand)
179celà           |  that (incorrect, though common)
180cet            |  this
181cette          |  this
182ici            |  here
183ils            |  they
184les            |  the (pl)
185leurs          |  their (pl)
186quel           |  which
187quels          |  which
188quelle         |  which
189quelles        |  which
190sans           |  without
191soi            |  oneself
192''')
193
194
195class SearchFrench(SearchLanguage):
196    lang = 'fr'
197    language_name = 'French'
198    js_stemmer_rawcode = 'french-stemmer.js'
199    stopwords = french_stopwords
200
201    def init(self, options: Dict) -> None:
202        self.stemmer = snowballstemmer.stemmer('french')
203
204    def stem(self, word: str) -> str:
205        return self.stemmer.stemWord(word.lower())
206