1 /* hunspell.cpp
2 *
3 * Copyright (C) 2009 - Sayamindu Dasgupta <sayamindu@gmail.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include <Python.h>
20 #include <hunspell.hxx>
21
22
23 #ifndef PyVarObject_HEAD_INIT
24 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
25 #endif
26
27 /* Compatibility python3 defines for python2 */
28 #if PY_MAJOR_VERSION < 3
29 #define PyInt_FromLong PyLong_FromLong
30 #define PyBytes_FromString PyString_FromString
31 #endif /* PY_MAJOR_VERSION < 3 */
32
33
34 /****************************************
35 HunSpell
36 ****************************************/
37
38 static PyObject *HunSpellError;
39
40 typedef struct {
41 PyObject_HEAD
42 Hunspell * handle;
43 const char *encoding;
44 } HunSpell;
45
46 static int
HunSpell_init(HunSpell * self,PyObject * args,PyObject * kwds)47 HunSpell_init(HunSpell * self, PyObject *args, PyObject *kwds)
48 {
49 PyObject *dpath = NULL; /* PyBytes in py3 PyString in py2 */
50 PyObject *apath = NULL;
51 FILE *fh;
52
53 #if PY_VERSION_HEX < 0x03010000
54 /* TODO: Please review if there is any shorter/nicer;less clumsy way to convert args to PyStrings using Py_FileSystemDefaultEncoding in python 2.x */
55 const char * dpath_ptr = NULL;
56 const char * apath_ptr = NULL;
57 if (!PyArg_ParseTuple(args, "etet", Py_FileSystemDefaultEncoding, &dpath_ptr, Py_FileSystemDefaultEncoding, &apath_ptr))
58 return 1;
59 dpath = PyString_FromString(dpath_ptr);
60 apath = PyString_FromString(apath_ptr);
61 #else
62 if (!PyArg_ParseTuple(args, "O&O&", PyUnicode_FSConverter, &dpath, PyUnicode_FSConverter, &apath))
63 return 1;
64 #endif
65 /* Some versions of Hunspell() will succeed even if
66 * there are no dictionary files. So test for permissions.
67 */
68 /* TODO: consider _Py_fopen for py3.x here ? */
69 fh = fopen(PyBytes_AsString(dpath), "r");
70 if (fh) {
71 fclose(fh);
72 } else {
73 PyErr_SetFromErrno(HunSpellError);
74 /* TODO: Py_DECREF(*path); */
75 return -1;
76 }
77 fh = fopen(PyBytes_AsString(apath), "r");
78 if (fh) {
79 fclose(fh);
80 } else {
81 PyErr_SetFromErrno(HunSpellError);
82 return -1;
83 }
84
85 self->handle = new Hunspell(PyBytes_AsString(apath), PyBytes_AsString(dpath));
86 // TODO check class instanciation went well
87 //if(!self->handle) { Hunspell ain't bool
88 // PyErr_SetString(HunSpellError, "Cannot open dictionary");
89 // return -1;
90 //}
91 self->encoding = self->handle->get_dic_encoding();
92 Py_DECREF(dpath);
93 Py_DECREF(apath);
94 return 0;
95 }
96
97 static void
HunSpell_dealloc(HunSpell * self)98 HunSpell_dealloc(HunSpell * self)
99 {
100 delete self->handle;
101 Py_TYPE(self)->tp_free((PyObject *)self);
102 }
103
104 static PyObject *
HunSpell_add_dic(HunSpell * self,PyObject * args,PyObject * kwds)105 HunSpell_add_dic(HunSpell * self, PyObject *args, PyObject *kwds)
106 {
107 PyObject *dpath = NULL; /* PyBytes in py3 PyString in py2 */
108 FILE *fh;
109 #if PY_VERSION_HEX < 0x03010000
110 const char * dpath_ptr = NULL;
111 if (!PyArg_ParseTuple(args, "et", Py_FileSystemDefaultEncoding, &dpath_ptr))
112 return NULL;
113 dpath = PyString_FromString(dpath_ptr);
114 #else
115 if (!PyArg_ParseTuple(args, "O&", PyUnicode_FSConverter, &dpath))
116 return NULL;
117 #endif
118 fh = fopen(PyBytes_AsString(dpath), "r");
119 if (fh) {
120 fclose(fh);
121 } else {
122 PyErr_SetFromErrno(HunSpellError);
123 Py_DECREF(dpath);
124 return NULL;
125 }
126 int result = self->handle->add_dic(PyBytes_AsString(dpath));
127 Py_DECREF(dpath);
128 return PyLong_FromLong(result);
129 }
130
131 static PyObject *
HunSpell_get_dic_encoding(HunSpell * self,PyObject * args)132 HunSpell_get_dic_encoding(HunSpell * self, PyObject *args)
133 {
134 return Py_BuildValue("s", self->encoding);
135 }
136
137 static PyObject *
HunSpell_spell(HunSpell * self,PyObject * args)138 HunSpell_spell(HunSpell * self, PyObject *args)
139 {
140 char *word;
141 int retvalue;
142
143 if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
144 return NULL;
145 retvalue = self->handle->spell(word);
146 PyMem_Free(word);
147 return PyBool_FromLong(retvalue);
148 }
149
150
151 static PyObject *
HunSpell_suggest(HunSpell * self,PyObject * args)152 HunSpell_suggest(HunSpell * self, PyObject *args)
153 {
154 char *word, **slist;
155 int i, num_slist, ret, str_size;
156 PyObject *slist_list, *pystr;
157 PyObject *etype, *evalue, *etrace;
158
159 if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
160 return NULL;
161
162 slist_list = PyList_New(0);
163 if (!slist_list) {
164 return NULL;
165 }
166 num_slist = self->handle->suggest(&slist, word);
167 PyMem_Free(word);
168
169 for (i = 0, ret = 0; !ret && i < num_slist; i++) {
170 str_size = strlen(slist[i]);
171 pystr = PyUnicode_DecodeUTF8(slist[i], str_size, "strict");
172 if (!pystr) {
173 PyErr_Fetch(&etype, &evalue, &etrace);
174 Py_DECREF(etype);
175 pystr = PyUnicode_DecodeLatin1(slist[i], str_size, "strict");
176 if (!pystr)
177 break;
178 }
179 ret = PyList_Append(slist_list, pystr);
180 Py_DECREF(pystr);
181 }
182
183 self->handle->free_list(&slist, num_slist);
184 return slist_list;
185 }
186
187 static PyObject *
HunSpell_analyze(HunSpell * self,PyObject * args)188 HunSpell_analyze(HunSpell * self, PyObject *args)
189 {
190 char *word, **slist;
191 int i, num_slist, ret;
192 PyObject *slist_list, *pystr;
193
194 if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
195 return NULL;
196
197 slist_list = PyList_New(0);
198 if (!slist_list) {
199 return NULL;
200 }
201 num_slist = self->handle->analyze(&slist, word);
202 PyMem_Free(word);
203
204 for (i = 0, ret = 0; !ret && i < num_slist; i++) {
205 pystr = PyBytes_FromString(slist[i]);
206 if (!pystr)
207 break;
208 ret = PyList_Append(slist_list, pystr);
209 Py_DECREF(pystr);
210 }
211
212 self->handle->free_list(&slist, num_slist);
213 return slist_list;
214 }
215
216 static PyObject *
HunSpell_stem(HunSpell * self,PyObject * args)217 HunSpell_stem(HunSpell * self, PyObject *args)
218 {
219 char *word, **slist;
220 int i, num_slist, ret;
221 PyObject *slist_list, *pystr;
222
223 if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
224 return NULL;
225
226 slist_list = PyList_New(0);
227 if (!slist_list) {
228 return NULL;
229 }
230 num_slist = self->handle->stem(&slist, word);
231 PyMem_Free(word);
232
233 for (i = 0, ret = 0; !ret && i < num_slist; i++) {
234 pystr = PyBytes_FromString(slist[i]);
235 if (!pystr)
236 break;
237 ret = PyList_Append(slist_list, pystr);
238 Py_DECREF(pystr);
239 }
240
241 self->handle->free_list(&slist, num_slist);
242 return slist_list;
243 }
244
245 static PyObject *
HunSpell_generate(HunSpell * self,PyObject * args)246 HunSpell_generate(HunSpell * self, PyObject *args)
247 {
248 char *word1, *word2, **slist;
249 int i, num_slist, ret;
250 PyObject *slist_list, *pystr;
251
252 if (!PyArg_ParseTuple(args, "etet", self->encoding, &word1, self->encoding, &word2))
253 return NULL;
254
255 slist_list = PyList_New(0);
256 if (!slist_list) {
257 return NULL;
258 }
259 num_slist = self->handle->generate(&slist, word1, word2);
260 PyMem_Free(word1);
261 PyMem_Free(word2);
262
263 for (i = 0, ret = 0; !ret && i < num_slist; i++) {
264 pystr = PyBytes_FromString(slist[i]);
265 if (!pystr)
266 break;
267 ret = PyList_Append(slist_list, pystr);
268 Py_DECREF(pystr);
269 }
270
271 self->handle->free_list(&slist, num_slist);
272 return slist_list;
273 }
274
275 static PyObject *
HunSpell_generate2(HunSpell * self,PyObject * args)276 HunSpell_generate2(HunSpell * self, PyObject *args)
277 {
278 char *word1, *desc, **slist;
279 int i, num_slist, ret;
280 PyObject *slist_list, *pystr;
281
282 if (!PyArg_ParseTuple(args, "etet", self->encoding, &word1, self->encoding, &desc))
283 return NULL;
284
285 slist_list = PyList_New(0);
286 if (!slist_list) {
287 return NULL;
288 }
289
290 num_slist = self->handle->generate(&slist, word1, &desc, 1);
291 PyMem_Free(word1);
292 PyMem_Free(desc);
293
294 for (i = 0, ret = 0; !ret && i < num_slist; i++) {
295 pystr = PyBytes_FromString(slist[i]);
296 if (!pystr)
297 break;
298 ret = PyList_Append(slist_list, pystr);
299 Py_DECREF(pystr);
300 }
301
302 self->handle->free_list(&slist, num_slist);
303 return slist_list;
304 }
305
306 static PyObject *
HunSpell_add(HunSpell * self,PyObject * args)307 HunSpell_add(HunSpell * self, PyObject *args)
308 {
309 char *word;
310 int retvalue;
311
312 if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
313 return NULL;
314 retvalue = self->handle->add(word);
315 PyMem_Free(word);
316
317 return PyLong_FromLong(retvalue);
318 }
319
320 static PyObject *
HunSpell_add_with_affix(HunSpell * self,PyObject * args)321 HunSpell_add_with_affix(HunSpell * self, PyObject *args)
322 {
323 char *word, *example;
324 int retvalue;
325
326 if (!PyArg_ParseTuple(args, "etet", self->encoding, &word, self->encoding, &example))
327 return NULL;
328 retvalue = self->handle->add_with_affix(word, example);
329 PyMem_Free(word);
330 PyMem_Free(example);
331
332 return PyLong_FromLong(retvalue);
333 }
334
335 static PyObject *
HunSpell_remove(HunSpell * self,PyObject * args)336 HunSpell_remove(HunSpell * self, PyObject *args)
337 {
338 char *word;
339 int retvalue;
340
341 if (!PyArg_ParseTuple(args, "et", self->encoding, &word))
342 return NULL;
343 retvalue = self->handle->remove(word);
344 PyMem_Free(word);
345
346 return PyLong_FromLong(retvalue);
347 }
348
349 static PyMethodDef HunSpell_methods[] = {
350 {"get_dic_encoding", (PyCFunction) HunSpell_get_dic_encoding,
351 METH_NOARGS,
352 "Gets encoding of loaded dictionary.\n\n"
353 "Returns\n"
354 "-------\n"
355 "string : The encoding of currently used dic file (UTF-8, ISO8859-1, ...)"},
356
357 {"add_dic", (PyCFunction) HunSpell_add_dic, METH_VARARGS,
358 "Load an extra dictionary to the current instance.\n"
359 "The extra dictionaries use the affix file of the allocated Hunspell object.\n"
360 "Maximal number of the extra dictionaries is limited in the Hunspell source code to 20.\n\n"
361 "Parameters\n"
362 "----------\n"
363 "dpath : string\n"
364 " Path to the .dic to add.\n\n"
365 "Returns\n"
366 "-------\n"
367 "int : hunspell program error code."},
368
369 {"spell", (PyCFunction) HunSpell_spell, METH_VARARGS,
370 "Checks the spelling of the given word.\n\n"
371 "Parameters\n"
372 "----------\n"
373 "word : string\n"
374 " Word to check.\n\n"
375 "Returns\n"
376 "-------\n"
377 "bool : True if the word is correctly spelled else False"},
378
379 {"suggest", (PyCFunction) HunSpell_suggest, METH_VARARGS,
380 "Provide suggestions for the given word.\n\n"
381 "Parameters\n"
382 "----------\n"
383 "word : string\n"
384 " Word for which we want suggestions\n\n"
385 "Returns\n"
386 "-------\n"
387 "list of strings : The list of suggestions for input word. (No suggestion returns an empty list)."},
388
389 {"analyze", (PyCFunction) HunSpell_analyze, METH_VARARGS,
390 "Provide morphological analysis for the given word.\n\n"
391 "Parameters\n"
392 "----------\n"
393 "word : string\n"
394 " Input word to analyze.\n\n"
395 "Returns\n"
396 "-------\n"
397 "list of strings : Each string is a possible analysis of the input word. "
398 "It contains the stem of the word (st:XXX) and some information about "
399 "modifications done to get to the input word.\n"
400 "For more information see: man 4 hunspell (or https://sourceforge.net/projects/hunspell/files/Hunspell/Documentation/) "
401 "in the \'Optional data fields\" section."},
402
403 {"stem", (PyCFunction) HunSpell_stem, METH_VARARGS,
404 "Stemmer method. It is a simplified version of analyze method.\n\n"
405 "Parameters\n"
406 "----------\n"
407 "word : string\n"
408 " The word to stem.\n\n"
409 "Returns\n"
410 "-------\n"
411 "list of string : The possible stems of the input word."},
412
413 {"generate", (PyCFunction) HunSpell_generate, METH_VARARGS,
414 "Provide morphological generation for the given word using "
415 "the second one as example.\n\n"
416 "Parameters\n"
417 "----------\n"
418 "word : string\n"
419 " The word to transform.\n"
420 "word : string\n"
421 " The example to use as a generator\n\n"
422 "Returns\n"
423 "-------\n"
424 "list of string : A list of possible transformations or "
425 "an empty list if nothing were found"},
426
427 {"generate2", (PyCFunction) HunSpell_generate2, METH_VARARGS,
428 "Provide morphological generation for the given word "
429 "the second one as example.\n\n"
430 "Parameters\n"
431 "----------\n"
432 "word : string\n"
433 " The word to transform.\n"
434 "tags : string\n"
435 " String of an analyzed word\n\n"
436 "Returns\n"
437 "-------\n"
438 "list of string : A list of possible transformations or "
439 "an empty list if nothing were found"},
440
441 {"add", (PyCFunction) HunSpell_add, METH_VARARGS,
442 "Adds the given word into the runtime dictionary.\n\n"
443 "Parameters\n"
444 "----------\n"
445 "word : string\n"
446 " The word to add in the dictionary\n\n"
447 "Returns\n"
448 "-------\n"
449 "int : 0 if success, hunspell program error code else."},
450
451 {"add_with_affix", (PyCFunction) HunSpell_add_with_affix, METH_VARARGS,
452 "Adds the given word with affix flags of the example (a dictionary word) "
453 "into the runtime dictionary.\n\n"
454 "Parameters\n"
455 "----------\n"
456 "word : string\n"
457 " The word to transform.\n"
458 "word : string\n"
459 " The example to use to find flags\n\n"
460 "Returns\n"
461 "-------\n"
462 "int : 0 if success, hunspell program error code else."},
463
464 {"remove", (PyCFunction) HunSpell_remove, METH_VARARGS,
465 "Removes the given word from the runtime dictionary\n\n"
466 "Parameters\n"
467 "----------\n"
468 "word : string\n"
469 " The word to remove from the dictionary\n\n"
470 "Returns\n"
471 "-------\n"
472 "int : 0 if success, hunspell program error code else."},
473
474 {NULL}
475 };
476
477 static PyTypeObject HunSpellType = {
478 PyVarObject_HEAD_INIT(NULL, 0)
479 "HunSpell", /* tp_name */
480 sizeof(HunSpell), /* tp_basicsize */
481 0, /* tp_itemsize */
482 (destructor) HunSpell_dealloc, /* tp_dealloc */
483 0, /* tp_print */
484 0, /* tp_getattr */
485 0, /* tp_setattr */
486 0, /* tp_compare */
487 0, /* tp_repr */
488 0, /* tp_as_number */
489 0, /* tp_as_sequence */
490 0, /* tp_as_mapping */
491 0, /* tp_hash */
492 0, /* tp_call */
493 0, /* tp_str */
494 0, /* tp_getattro */
495 0, /* tp_setattro */
496 0, /* tp_as_buffer */
497 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
498 "HunSpell binding. \n\n"
499 "Instantiation goes like this:\n"
500 ">>> hobj = HunSpell('/path/to/dict.dic', '/path/to/dict.aff')", /* tp_doc */
501 0, /* tp_traverse */
502 0, /* tp_clear */
503 0, /* tp_richcompare */
504 0, /* tp_weaklistoffset */
505 0, /* tp_iter */
506 0, /* tp_iternext */
507 HunSpell_methods, /* tp_methods */
508 0, /* tp_members */
509 0, /* tp_getset */
510 0, /* tp_base */
511 0, /* tp_dict */
512 0, /* tp_descr_get */
513 0, /* tp_descr_set */
514 0, /* tp_dictoffset */
515 (initproc) HunSpell_init, /* tp_init */
516 0, /* tp_alloc */
517 0, /* tp_new */
518 };
519
520 #if PY_MAJOR_VERSION >= 3
521 static struct PyModuleDef hunspellmodule = {
522 PyModuleDef_HEAD_INIT,
523 "hunspell", /* name of module */
524 NULL, /* module documentation, may be NULL */
525 -1, /* TODO */ /* size of per-interpreter state of the module,
526 or -1 if the module keeps state in global variables. */
527 HunSpell_methods
528 };
529 #endif
530
531 /******************** Module Initialization function ****************/
532
533 #if PY_MAJOR_VERSION >= 3
534 PyMODINIT_FUNC
PyInit_hunspell(void)535 PyInit_hunspell(void)
536 {
537 PyObject *mod;
538 mod = PyModule_Create(&hunspellmodule);
539 #else
540 extern "C" PyObject*
541 inithunspell(void)
542 {
543 PyObject *mod;
544 mod = Py_InitModule3("hunspell", NULL,
545 "An extension for the Hunspell spell checker engine");
546 #endif
547 if (mod == NULL) {
548 return NULL;
549 }
550 /* Fill in some slots in the type, and make it ready */
551 HunSpellType.tp_new = PyType_GenericNew;
552 if (PyType_Ready(&HunSpellType) < 0) {
553 return NULL;
554 }
555 /* Add the type to the module. */
556 Py_INCREF(&HunSpellType);
557 PyModule_AddObject(mod, "HunSpell", (PyObject *)&HunSpellType);
558 HunSpellError = PyErr_NewException((char*) "hunspell.HunSpellError", NULL, NULL);
559 Py_INCREF(HunSpellError);
560 PyModule_AddObject(mod, "HunSpellError", HunSpellError);
561 return mod;
562 }
563