1 /*  hunspell.cpp
2  *
3  *  Copyright (C) 2009 - Sayamindu Dasgupta <sayamindu@gmail.com>
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU Lesser General Public License as published by
7  *  the Free Software Foundation; either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU Library General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Lesser General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include <Python.h>
20 #include <hunspell.hxx>
21 
22 
23 #ifndef PyVarObject_HEAD_INIT
24     #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
25 #endif
26 
27 /* Compatibility python3 defines for python2 */
28 #if PY_MAJOR_VERSION < 3
29     #define PyInt_FromLong PyLong_FromLong
30     #define PyBytes_FromString  PyString_FromString
31 #endif /* PY_MAJOR_VERSION < 3 */
32 
33 
34 /****************************************
35                 HunSpell
36 ****************************************/
37 
38 static PyObject *HunSpellError;
39 
40 typedef struct {
41     PyObject_HEAD
42     Hunspell * handle;
43     const char *encoding;
44 } HunSpell;
45 
46 static int
HunSpell_init(HunSpell * self,PyObject * args,PyObject * kwds)47 HunSpell_init(HunSpell * self, PyObject *args, PyObject *kwds)
48 {
49     PyObject *dpath = NULL; /* PyBytes in py3 PyString in py2 */
50     PyObject *apath = NULL;
51     FILE *fh;
52 
53 #if PY_VERSION_HEX < 0x03010000
54     /* TODO: Please review if there is any shorter/nicer;less clumsy way to convert args to  PyStrings using Py_FileSystemDefaultEncoding in python 2.x */
55     const char * dpath_ptr = NULL;
56     const char * apath_ptr = NULL;
57     if (!PyArg_ParseTuple(args, "etet", Py_FileSystemDefaultEncoding, &dpath_ptr, Py_FileSystemDefaultEncoding, &apath_ptr))
58         return 1;
59     dpath = PyString_FromString(dpath_ptr);
60     apath = PyString_FromString(apath_ptr);
61 #else
62     if (!PyArg_ParseTuple(args, "O&O&", PyUnicode_FSConverter, &dpath, PyUnicode_FSConverter, &apath))
63         return 1;
64 #endif
65     /* Some versions of Hunspell() will succeed even if
66     * there are no dictionary files. So test for permissions.
67     */
68     /* TODO: consider  _Py_fopen for py3.x here ? */
69     fh = fopen(PyBytes_AsString(dpath), "r");
70     if (fh) {
71         fclose(fh);
72     } else {
73         PyErr_SetFromErrno(HunSpellError);
74         /* TODO: Py_DECREF(*path); */
75         return -1;
76     }
77     fh = fopen(PyBytes_AsString(apath), "r");
78     if (fh) {
79         fclose(fh);
80     } else {
81         PyErr_SetFromErrno(HunSpellError);
82         return -1;
83     }
84 
85     self->handle = new Hunspell(PyBytes_AsString(apath), PyBytes_AsString(dpath));
86     // TODO check class instanciation went well
87     //if(!self->handle) { Hunspell ain't bool
88     //    PyErr_SetString(HunSpellError, "Cannot open dictionary");
89     //    return -1;
90     //}
91     self->encoding = self->handle->get_dic_encoding();
92     Py_DECREF(dpath);
93     Py_DECREF(apath);
94     return 0;
95 }
96 
97 static void
HunSpell_dealloc(HunSpell * self)98 HunSpell_dealloc(HunSpell * self)
99 {
100     delete self->handle;
101     Py_TYPE(self)->tp_free((PyObject *)self);
102 }
103 
104 static PyObject *
HunSpell_add_dic(HunSpell * self,PyObject * args,PyObject * kwds)105 HunSpell_add_dic(HunSpell * self, PyObject *args, PyObject *kwds)
106 {
107     PyObject *dpath = NULL; /* PyBytes in py3 PyString in py2 */
108     FILE *fh;
109 #if PY_VERSION_HEX < 0x03010000
110     const char * dpath_ptr = NULL;
111     if (!PyArg_ParseTuple(args, "et", Py_FileSystemDefaultEncoding, &dpath_ptr))
112         return NULL;
113     dpath = PyString_FromString(dpath_ptr);
114 #else
115     if (!PyArg_ParseTuple(args, "O&", PyUnicode_FSConverter, &dpath))
116         return NULL;
117 #endif
118     fh = fopen(PyBytes_AsString(dpath), "r");
119     if (fh) {
120         fclose(fh);
121     } else {
122         PyErr_SetFromErrno(HunSpellError);
123         Py_DECREF(dpath);
124         return NULL;
125     }
126     int result = self->handle->add_dic(PyBytes_AsString(dpath));
127     Py_DECREF(dpath);
128     return PyLong_FromLong(result);
129 }
130 
131 static PyObject *
HunSpell_get_dic_encoding(HunSpell * self,PyObject * args)132 HunSpell_get_dic_encoding(HunSpell * self, PyObject *args)
133 {
134     return Py_BuildValue("s", self->encoding);
135 }
136 
137 static PyObject *
HunSpell_spell(HunSpell * self,PyObject * args)138 HunSpell_spell(HunSpell * self, PyObject *args)
139 {
140     char *word;
141     int retvalue;
142 
143     if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
144         return NULL;
145     retvalue = self->handle->spell(word);
146     PyMem_Free(word);
147     return PyBool_FromLong(retvalue);
148 }
149 
150 
151 static PyObject *
HunSpell_suggest(HunSpell * self,PyObject * args)152 HunSpell_suggest(HunSpell * self, PyObject *args)
153 {
154     char *word, **slist;
155     int i, num_slist, ret, str_size;
156     PyObject *slist_list, *pystr;
157     PyObject *etype, *evalue, *etrace;
158 
159     if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
160         return NULL;
161 
162     slist_list = PyList_New(0);
163     if (!slist_list) {
164         return NULL;
165     }
166     num_slist = self->handle->suggest(&slist, word);
167     PyMem_Free(word);
168 
169     for (i = 0, ret = 0; !ret && i < num_slist; i++) {
170         str_size = strlen(slist[i]);
171         pystr = PyUnicode_DecodeUTF8(slist[i], str_size, "strict");
172         if (!pystr) {
173             PyErr_Fetch(&etype, &evalue, &etrace);
174             Py_DECREF(etype);
175             pystr = PyUnicode_DecodeLatin1(slist[i], str_size, "strict");
176             if (!pystr)
177                 break;
178         }
179         ret = PyList_Append(slist_list, pystr);
180         Py_DECREF(pystr);
181     }
182 
183     self->handle->free_list(&slist, num_slist);
184     return slist_list;
185 }
186 
187 static PyObject *
HunSpell_analyze(HunSpell * self,PyObject * args)188 HunSpell_analyze(HunSpell * self, PyObject *args)
189 {
190     char *word, **slist;
191     int i, num_slist, ret;
192     PyObject *slist_list, *pystr;
193 
194     if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
195         return NULL;
196 
197     slist_list = PyList_New(0);
198     if (!slist_list) {
199         return NULL;
200     }
201     num_slist = self->handle->analyze(&slist, word);
202     PyMem_Free(word);
203 
204     for (i = 0, ret = 0; !ret && i < num_slist; i++) {
205         pystr = PyBytes_FromString(slist[i]);
206         if (!pystr)
207             break;
208         ret = PyList_Append(slist_list, pystr);
209         Py_DECREF(pystr);
210     }
211 
212     self->handle->free_list(&slist, num_slist);
213     return slist_list;
214 }
215 
216 static PyObject *
HunSpell_stem(HunSpell * self,PyObject * args)217 HunSpell_stem(HunSpell * self, PyObject *args)
218 {
219     char *word, **slist;
220     int i, num_slist, ret;
221     PyObject *slist_list, *pystr;
222 
223     if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
224     return NULL;
225 
226     slist_list = PyList_New(0);
227     if (!slist_list) {
228         return NULL;
229     }
230     num_slist = self->handle->stem(&slist, word);
231     PyMem_Free(word);
232 
233     for (i = 0, ret = 0; !ret && i < num_slist; i++) {
234         pystr = PyBytes_FromString(slist[i]);
235         if (!pystr)
236             break;
237         ret = PyList_Append(slist_list, pystr);
238         Py_DECREF(pystr);
239     }
240 
241     self->handle->free_list(&slist, num_slist);
242     return slist_list;
243 }
244 
245 static PyObject *
HunSpell_generate(HunSpell * self,PyObject * args)246 HunSpell_generate(HunSpell * self, PyObject *args)
247 {
248     char *word1, *word2, **slist;
249     int i, num_slist, ret;
250     PyObject *slist_list, *pystr;
251 
252     if (!PyArg_ParseTuple(args, "etet", self->encoding, &word1, self->encoding, &word2))
253     return NULL;
254 
255     slist_list = PyList_New(0);
256     if (!slist_list) {
257         return NULL;
258     }
259     num_slist = self->handle->generate(&slist, word1, word2);
260     PyMem_Free(word1);
261     PyMem_Free(word2);
262 
263     for (i = 0, ret = 0; !ret && i < num_slist; i++) {
264         pystr = PyBytes_FromString(slist[i]);
265         if (!pystr)
266             break;
267         ret = PyList_Append(slist_list, pystr);
268         Py_DECREF(pystr);
269     }
270 
271     self->handle->free_list(&slist, num_slist);
272     return slist_list;
273 }
274 
275 static PyObject *
HunSpell_generate2(HunSpell * self,PyObject * args)276 HunSpell_generate2(HunSpell * self, PyObject *args)
277 {
278     char *word1, *desc, **slist;
279     int i, num_slist, ret;
280     PyObject *slist_list, *pystr;
281 
282     if (!PyArg_ParseTuple(args, "etet", self->encoding, &word1, self->encoding, &desc))
283         return NULL;
284 
285     slist_list = PyList_New(0);
286     if (!slist_list) {
287         return NULL;
288     }
289 
290     num_slist = self->handle->generate(&slist, word1, &desc, 1);
291     PyMem_Free(word1);
292     PyMem_Free(desc);
293 
294     for (i = 0, ret = 0; !ret && i < num_slist; i++) {
295         pystr = PyBytes_FromString(slist[i]);
296         if (!pystr)
297             break;
298         ret = PyList_Append(slist_list, pystr);
299         Py_DECREF(pystr);
300     }
301 
302     self->handle->free_list(&slist, num_slist);
303     return slist_list;
304 }
305 
306 static PyObject *
HunSpell_add(HunSpell * self,PyObject * args)307 HunSpell_add(HunSpell * self, PyObject *args)
308 {
309     char *word;
310     int retvalue;
311 
312     if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
313         return NULL;
314     retvalue = self->handle->add(word);
315     PyMem_Free(word);
316 
317     return PyLong_FromLong(retvalue);
318 }
319 
320 static PyObject *
HunSpell_add_with_affix(HunSpell * self,PyObject * args)321 HunSpell_add_with_affix(HunSpell * self, PyObject *args)
322 {
323     char *word, *example;
324     int retvalue;
325 
326     if (!PyArg_ParseTuple(args, "etet", self->encoding, &word, self->encoding, &example))
327         return NULL;
328     retvalue = self->handle->add_with_affix(word, example);
329     PyMem_Free(word);
330     PyMem_Free(example);
331 
332     return PyLong_FromLong(retvalue);
333 }
334 
335 static PyObject *
HunSpell_remove(HunSpell * self,PyObject * args)336 HunSpell_remove(HunSpell * self, PyObject *args)
337 {
338     char *word;
339     int retvalue;
340 
341     if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
342         return NULL;
343     retvalue = self->handle->remove(word);
344     PyMem_Free(word);
345 
346     return PyLong_FromLong(retvalue);
347 }
348 
349 static PyMethodDef HunSpell_methods[] = {
350     {"get_dic_encoding", (PyCFunction) HunSpell_get_dic_encoding,
351      METH_NOARGS,
352      "Gets encoding of loaded dictionary.\n\n"
353      "Returns\n"
354      "-------\n"
355      "string : The encoding of currently used dic file (UTF-8, ISO8859-1, ...)"},
356 
357     {"add_dic", (PyCFunction) HunSpell_add_dic, METH_VARARGS,
358      "Load an extra dictionary to the current instance.\n"
359      "The  extra dictionaries use the affix file of the allocated Hunspell object.\n"
360      "Maximal number of the extra dictionaries is limited in the Hunspell source code to 20.\n\n"
361      "Parameters\n"
362      "----------\n"
363      "dpath : string\n"
364      "    Path to the .dic to add.\n\n"
365      "Returns\n"
366      "-------\n"
367      "int : hunspell program error code."},
368 
369     {"spell", (PyCFunction) HunSpell_spell, METH_VARARGS,
370      "Checks the spelling of the given word.\n\n"
371      "Parameters\n"
372      "----------\n"
373      "word : string\n"
374      "    Word to check.\n\n"
375      "Returns\n"
376      "-------\n"
377      "bool : True if the word is correctly spelled else False"},
378 
379     {"suggest", (PyCFunction) HunSpell_suggest, METH_VARARGS,
380      "Provide suggestions for the given word.\n\n"
381      "Parameters\n"
382      "----------\n"
383      "word : string\n"
384      "    Word for which we want suggestions\n\n"
385      "Returns\n"
386      "-------\n"
387      "list of strings : The list of suggestions for input word. (No suggestion returns an empty list)."},
388 
389     {"analyze", (PyCFunction) HunSpell_analyze, METH_VARARGS,
390      "Provide morphological analysis for the given word.\n\n"
391      "Parameters\n"
392      "----------\n"
393      "word : string\n"
394      "    Input word to analyze.\n\n"
395      "Returns\n"
396      "-------\n"
397      "list of strings : Each string is a possible analysis of the input word. "
398      "It contains the stem of the word (st:XXX) and some information about "
399      "modifications done to get to the input word.\n"
400      "For more information see: man 4 hunspell (or https://sourceforge.net/projects/hunspell/files/Hunspell/Documentation/) "
401      "in the \'Optional data fields\" section."},
402 
403     {"stem", (PyCFunction) HunSpell_stem, METH_VARARGS,
404      "Stemmer method. It is a simplified version of analyze method.\n\n"
405      "Parameters\n"
406      "----------\n"
407      "word : string\n"
408      "    The word to stem.\n\n"
409      "Returns\n"
410      "-------\n"
411      "list of string : The possible stems of the input word."},
412 
413     {"generate", (PyCFunction) HunSpell_generate, METH_VARARGS,
414      "Provide morphological generation for the given word using "
415      "the second one as example.\n\n"
416      "Parameters\n"
417      "----------\n"
418      "word : string\n"
419      "    The word to transform.\n"
420      "word : string\n"
421      "    The example to use as a generator\n\n"
422      "Returns\n"
423      "-------\n"
424      "list of string : A list of possible transformations or "
425      "an empty list if nothing were found"},
426 
427     {"generate2", (PyCFunction) HunSpell_generate2, METH_VARARGS,
428      "Provide morphological generation for the given word "
429      "the second one as example.\n\n"
430      "Parameters\n"
431      "----------\n"
432      "word : string\n"
433      "    The word to transform.\n"
434      "tags : string\n"
435      "    String of an analyzed word\n\n"
436      "Returns\n"
437      "-------\n"
438      "list of string : A list of possible transformations or "
439      "an empty list if nothing were found"},
440 
441     {"add", (PyCFunction) HunSpell_add, METH_VARARGS,
442      "Adds the given word into the runtime dictionary.\n\n"
443      "Parameters\n"
444      "----------\n"
445      "word : string\n"
446      "    The word to add in the dictionary\n\n"
447      "Returns\n"
448      "-------\n"
449      "int : 0 if success, hunspell program error code else."},
450 
451     {"add_with_affix", (PyCFunction) HunSpell_add_with_affix, METH_VARARGS,
452      "Adds the given word with affix flags of the example (a dictionary word) "
453      "into the runtime dictionary.\n\n"
454      "Parameters\n"
455      "----------\n"
456      "word : string\n"
457      "    The word to transform.\n"
458      "word : string\n"
459      "    The example to use to find flags\n\n"
460      "Returns\n"
461      "-------\n"
462      "int : 0 if success, hunspell program error code else."},
463 
464     {"remove", (PyCFunction) HunSpell_remove, METH_VARARGS,
465      "Removes the given word from the runtime dictionary\n\n"
466      "Parameters\n"
467      "----------\n"
468      "word : string\n"
469      "    The word to remove from the dictionary\n\n"
470      "Returns\n"
471      "-------\n"
472      "int : 0 if success, hunspell program error code else."},
473 
474     {NULL}
475 };
476 
477 static PyTypeObject HunSpellType = {
478     PyVarObject_HEAD_INIT(NULL, 0)
479     "HunSpell",        /* tp_name */
480     sizeof(HunSpell),    /* tp_basicsize */
481     0,            /* tp_itemsize */
482     (destructor) HunSpell_dealloc,    /* tp_dealloc */
483     0,            /* tp_print */
484     0,            /* tp_getattr */
485     0,            /* tp_setattr */
486     0,            /* tp_compare */
487     0,            /* tp_repr */
488     0,            /* tp_as_number */
489     0,            /* tp_as_sequence */
490     0,            /* tp_as_mapping */
491     0,            /* tp_hash */
492     0,            /* tp_call */
493     0,            /* tp_str */
494     0,            /* tp_getattro */
495     0,            /* tp_setattro */
496     0,            /* tp_as_buffer */
497     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,    /* tp_flags */
498     "HunSpell binding. \n\n"
499     "Instantiation goes like this:\n"
500     ">>> hobj = HunSpell('/path/to/dict.dic', '/path/to/dict.aff')",    /* tp_doc */
501     0,            /* tp_traverse */
502     0,            /* tp_clear */
503     0,            /* tp_richcompare */
504     0,            /* tp_weaklistoffset */
505     0,            /* tp_iter */
506     0,            /* tp_iternext */
507     HunSpell_methods,    /* tp_methods */
508     0,            /* tp_members */
509     0,            /* tp_getset */
510     0,            /* tp_base */
511     0,            /* tp_dict */
512     0,            /* tp_descr_get */
513     0,            /* tp_descr_set */
514     0,            /* tp_dictoffset */
515     (initproc) HunSpell_init,    /* tp_init */
516     0,            /* tp_alloc */
517     0,            /* tp_new */
518 };
519 
520 #if PY_MAJOR_VERSION >= 3
521 static struct PyModuleDef hunspellmodule = {
522     PyModuleDef_HEAD_INIT,
523     "hunspell",    /* name of module */
524     NULL,        /* module documentation, may be NULL */
525     -1, /* TODO */    /* size of per-interpreter state of the module,
526                or -1 if the module keeps state in global variables. */
527     HunSpell_methods
528 };
529 #endif
530 
531 /******************** Module Initialization function ****************/
532 
533 #if PY_MAJOR_VERSION >= 3
534 PyMODINIT_FUNC
PyInit_hunspell(void)535 PyInit_hunspell(void)
536 {
537     PyObject *mod;
538     mod = PyModule_Create(&hunspellmodule);
539 #else
540 extern "C" PyObject*
541 inithunspell(void)
542 {
543     PyObject *mod;
544     mod = Py_InitModule3("hunspell", NULL,
545                          "An extension for the Hunspell spell checker engine");
546 #endif
547     if (mod == NULL) {
548         return NULL;
549     }
550     /* Fill in some slots in the type, and make it ready */
551     HunSpellType.tp_new = PyType_GenericNew;
552     if (PyType_Ready(&HunSpellType) < 0) {
553         return NULL;
554     }
555     /* Add the type to the module. */
556     Py_INCREF(&HunSpellType);
557     PyModule_AddObject(mod, "HunSpell", (PyObject *)&HunSpellType);
558     HunSpellError = PyErr_NewException((char*) "hunspell.HunSpellError", NULL, NULL);
559     Py_INCREF(HunSpellError);
560     PyModule_AddObject(mod, "HunSpellError", HunSpellError);
561     return mod;
562 }
563