1 /*
2  * unicode_names.c
3  * Copyright (C) 2018 Kovid Goyal <kovid at kovidgoyal.net>
4  *
5  * Distributed under terms of the GPL3 license.
6  */
7 
8 #include "names.h"
9 
10 static inline void
add_matches(const word_trie * wt,char_type * codepoints,size_t * pos,const size_t sz)11 add_matches(const word_trie *wt, char_type *codepoints, size_t *pos, const size_t sz) {
12     size_t num = mark_groups[wt->match_offset];
13     for (size_t i = wt->match_offset + 1; i < wt->match_offset + 1 + num && *pos < sz; i++, (*pos)++) {
14         codepoints[*pos] = mark_to_cp[mark_groups[i]];
15     }
16 }
17 
18 static void
process_trie_node(const word_trie * wt,char_type * codepoints,size_t * pos,const size_t sz)19 process_trie_node(const word_trie *wt, char_type *codepoints, size_t *pos, const size_t sz) {
20     if (wt->match_offset) add_matches(wt, codepoints, pos, sz);
21     size_t num_children = children_array[wt->children_offset];
22     if (!num_children) return;
23     for (size_t c = wt->children_offset + 1; c < wt->children_offset + 1 + num_children; c++) {
24         if (*pos > sz) return;
25         uint32_t x = children_array[c];
26         process_trie_node(&all_trie_nodes[x >> 8], codepoints, pos, sz);
27     }
28 }
29 
30 static inline PyObject*
codepoints_for_word(const char * word,size_t len)31 codepoints_for_word(const char *word, size_t len) {
32     const word_trie *wt = all_trie_nodes;
33     for (size_t i = 0; i < len; i++) {
34         unsigned char ch = word[i];
35         size_t num_children = children_array[wt->children_offset];
36         if (!num_children) return PyFrozenSet_New(NULL);
37         bool found = false;
38         for (size_t c = wt->children_offset + 1; c < wt->children_offset + 1 + num_children; c++) {
39             uint32_t x = children_array[c];
40             if ((x & 0xff) == ch) {
41                 found = true;
42                 wt = &all_trie_nodes[x >> 8];
43                 break;
44             }
45         }
46         if (!found) return PyFrozenSet_New(NULL);
47     }
48     static char_type codepoints[1024];
49     size_t cpos = 0;
50     process_trie_node(wt, codepoints, &cpos, arraysz(codepoints));
51     PyObject *ans = PyFrozenSet_New(NULL); if (ans == NULL) return NULL;
52     for (size_t i = 0; i < cpos; i++) {
53         PyObject *t = PyLong_FromUnsignedLong(codepoints[i]); if (t == NULL) { Py_DECREF(ans); return NULL; }
54         int ret = PySet_Add(ans, t); Py_DECREF(t); if (ret != 0) { Py_DECREF(ans); return NULL; }
55     }
56     return ans;
57 }
58 
59 static PyObject*
cfw(PyObject * self UNUSED,PyObject * args)60 cfw(PyObject *self UNUSED, PyObject *args) {
61     const char *word;
62     if (!PyArg_ParseTuple(args, "s", &word)) return NULL;
63     return codepoints_for_word(word, strlen(word));
64 }
65 
66 static PyObject*
nfc(PyObject * self UNUSED,PyObject * args)67 nfc(PyObject *self UNUSED, PyObject *args) {
68     unsigned int cp;
69     if (!PyArg_ParseTuple(args, "I", &cp)) return NULL;
70     const char *n = name_for_codepoint(cp);
71     if (n == NULL) Py_RETURN_NONE;
72     return PyUnicode_FromString(n);
73 }
74 
75 static PyMethodDef unicode_names_methods[] = {
76     {"codepoints_for_word", (PyCFunction)cfw, METH_VARARGS,
77      "Return a set of integer codepoints for where each codepoint's name "
78      "contains ``word``,"},
79     {"name_for_codepoint", (PyCFunction)nfc, METH_VARARGS,
80      "Returns the given codepoint's name"},
81     {NULL, NULL, 0, NULL}        /* Sentinel */
82 };
83 
84 static int
exec_module(PyObject * module)85 exec_module(PyObject *module) { return 0; }
86 
87 static PyModuleDef_Slot slots[] = { {Py_mod_exec, exec_module}, {0, NULL} };
88 
89 static struct PyModuleDef module_def = {
90     .m_base     = PyModuleDef_HEAD_INIT,
91     .m_name     = "unicode_names",
92     .m_doc      = "A library to assist with selecting special characters",
93     .m_methods  = unicode_names_methods,
94     .m_slots    = slots,
95 };
96 
PyInit_unicode_names(void)97 CALIBRE_MODINIT_FUNC PyInit_unicode_names(void) { return PyModuleDef_Init(&module_def); }
98