1#!/usr/bin/python
2# vim:set fileencoding=utf-8 et sts=4 sw=4:
3#
4# ibus - Intelligent Input Bus for Linux / Unix OS
5#
6# Copyright © 2020 Takao Fujiwara <takao.fujiwara1@gmail.com>
7#
8# This library is free software; you can redistribute it and/or
9# modify it under the terms of the GNU Lesser General Public
10# License as published by the Free Software Foundation; either
11# version 2.1 of the License, or (at your option) any later version.
12#
13# This library is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16# Lesser General Public License for more details.
17#
18# You should have received a copy of the GNU Lesser General Public
19# License along with this library. If not, see <http://www.gnu.org/licenses/>.
20
21
22# This script generates simple.xml with /usr/share/X11/xkb/rules/evdev.xml,
23# /usr/share/xml/iso-codes/iso_639.xml and denylist.txt
24
25
26from xml.dom import minidom
27from xml.sax import make_parser as sax_make_parser
28from xml.sax.handler import feature_namespaces as sax_feature_namespaces
29from xml.sax.saxutils import XMLFilterBase, XMLGenerator, escape
30from xml.sax.xmlreader import AttributesImpl
31from xml.sax._exceptions import SAXParseException
32
33import codecs
34import getopt
35import io
36import os
37import sys
38
39VERSION='0.1'
40EVDEV_XML = '/usr/share/X11/xkb/rules/evdev.xml'
41EXEC_PATH='/usr/lib/ibus-engine-simple'
42ISO_PATH='/usr/share/xml/iso-codes/iso_639.xml'
43PY3K = sys.version_info >= (3, 0)
44
45if PY3K:
46    from io import StringIO
47else:
48    # io.StringIO does not work with XMLGenerator
49    from cStringIO import StringIO
50    # iso_639.xml includes UTF-8
51    reload(sys)
52    sys.setdefaultencoding('utf-8')
53
54def usage(prgname):
55    print('''\
56%s Version %s
57Usage:
58  %s [OPTION...]
59
60Options:
61  -h, --help                         Show this message
62  -i, --input=EVDEV_XML              Load EVDEV_XML file (default is:
63                                         %s)
64  -o, --output=FILE                  Output FILE (default is stdout)
65  -V, --version=VERSION              Set IBus VERSION (default is %s)
66  -e, --exec-path=EXEC_PATH          Set EXEC_PATH file (default is:
67                                         %s)
68  -I, --iso-path=ISO_PATH            Load ISO_PATH file (default is:
69                                         %s)
70  -1, --first-language               Pull first language only in language list
71''' % (prgname, VERSION, prgname, EVDEV_XML, VERSION, EXEC_PATH, ISO_PATH))
72
73
74class EvdevXML(XMLFilterBase):
75    def __init__(self, parser=None, downstream=None, iso639=None,
76                 denylist=None, author=None, first=False):
77        XMLFilterBase.__init__(self, parser)
78        self.__downstream = downstream
79        self.__iso639 = iso639
80        self.__denylist = denylist
81        self.__author = author
82        self.__first = first
83        self.__is_layout = False
84        self.__is_description = False
85        self.__is_config_item = False
86        self.__is_variant = False
87        self.__is_iso639 = False
88        self.__is_name = False
89        self.__layout = ''
90        self.__description = ''
91        self.__variant = ''
92        self.__list_iso639 = []
93        self.__list_iso639_for_variant = []
94    def startDocument(self):
95        if self.__downstream:
96            self.__downstream.startDocument()
97            self.__downstream.startElement('engines', AttributesImpl({}))
98    def endDocument(self):
99        if self.__downstream:
100            self.__downstream.endElement('engines')
101            self.__downstream.endDocument()
102    def startElement(self, name, attrs):
103        if name == 'layout':
104            self.__is_layout = True
105        elif name == 'description':
106            self.__is_description = True
107        elif name == 'configItem':
108            self.__is_config_item = True
109        elif name == 'languageList':
110            if self.__is_variant and self.__is_config_item:
111                self.__list_iso639_for_variant = []
112            elif self.__is_layout and self.__is_config_item:
113                self.__list_iso639 = []
114        elif name == 'iso639Id':
115            self.__is_iso639 = True
116        elif name == 'variant':
117            self.__is_variant = True
118        elif name == 'name':
119            self.__is_name = True
120    def endElement(self, name):
121        if name == 'layout':
122            self.__is_layout = False
123            self.__layout = ''
124            self.__description = ''
125            self.__variant = ''
126            self.__list_iso639 = []
127        elif name == 'description':
128            self.__is_description = False
129        elif name == 'configItem':
130            self.save()
131            self.__is_config_item = False
132        elif name == 'iso639Id':
133            self.__is_iso639 = False
134        elif name == 'variant':
135            self.__is_variant = False
136            self.__list_iso639_for_variant = []
137        elif name == 'name':
138            self.__is_name = False
139    def characters(self, text):
140        if self.__is_description:
141            self.__description = text
142        elif self.__is_name:
143            if self.__is_variant and self.__is_config_item:
144                self.__variant = text
145            elif self.__is_layout and self.__is_config_item:
146                self.__layout = text
147        elif self.__is_iso639:
148            if self.__is_variant and self.__is_config_item:
149                self.__list_iso639_for_variant.append(text)
150            elif self.__is_layout and self.__is_config_item:
151                self.__list_iso639.append(text)
152    def save(self):
153        if not self.__downstream:
154            return
155        list_iso639 = []
156        if self.__is_variant and self.__is_config_item:
157            list_iso639 = self.__list_iso639_for_variant
158            if len(list_iso639) == 0:
159                list_iso639 = self.__list_iso639
160        elif self.__is_layout and self.__is_config_item:
161            list_iso639 = self.__list_iso639
162        for iso in list_iso639:
163            do_deny = False
164            for [xkb, layout, variant, lang] in self.__denylist:
165                if xkb == 'xkb' \
166                   and ( layout == self.__layout or layout == '*' ) \
167                   and ( variant == self.__variant or variant == '*' ) \
168                   and ( lang == iso or variant == '*' ):
169                    do_deny = True
170                    break
171            if do_deny:
172                continue
173            self.__downstream.startElement('engine', AttributesImpl({}))
174            self.__downstream.startElement('name', AttributesImpl({}))
175            name = 'xkb:%s:%s:%s' % (
176                self.__layout,
177                self.__variant,
178                iso
179            )
180            self.__downstream.characters(name)
181            self.__downstream.endElement('name')
182            self.__downstream.startElement('language', AttributesImpl({}))
183            iso639_1 = self.__iso639.code2to1(iso)
184            if iso639_1 != None:
185                iso = iso639_1
186            self.__downstream.characters(iso)
187            self.__downstream.endElement('language')
188            self.__downstream.startElement('license', AttributesImpl({}))
189            self.__downstream.characters('GPL')
190            self.__downstream.endElement('license')
191            if self.__author != None:
192                self.__downstream.startElement('author', AttributesImpl({}))
193                self.__downstream.characters(self.__author)
194                self.__downstream.endElement('author')
195            self.__downstream.startElement('layout', AttributesImpl({}))
196            self.__downstream.characters(self.__layout)
197            self.__downstream.endElement('layout')
198            if self.__variant != '':
199                self.__downstream.startElement('layout_variant',
200                                               AttributesImpl({}))
201                self.__downstream.characters(self.__variant)
202                self.__downstream.endElement('layout_variant')
203            self.__downstream.startElement('longname', AttributesImpl({}))
204            self.__downstream.characters(self.__description)
205            self.__downstream.endElement('longname')
206            self.__downstream.startElement('description', AttributesImpl({}))
207            self.__downstream.characters(self.__description)
208            self.__downstream.endElement('description')
209            self.__downstream.startElement('icon', AttributesImpl({}))
210            self.__downstream.characters('ibus-keyboard')
211            self.__downstream.endElement('icon')
212            self.__downstream.startElement('rank', AttributesImpl({}))
213            if self.__variant == '':
214                self.__downstream.characters('50')
215            else:
216                self.__downstream.characters('1')
217            self.__downstream.endElement('rank')
218            self.__downstream.endElement('engine')
219            if self.__first:
220                break
221
222
223class GenerateEngineXML():
224    _NAME = 'org.freedesktop.IBus.Simple'
225    _DESCRIPTION = 'A table based simple engine'
226    _AUTHOR = 'Peng Huang <shawn.p.huang@gmail.com>'
227    _HOMEPAGE = 'https://github.com/ibus/ibus/wiki'
228    _DOMAIN = 'ibus'
229    def __init__(self, path, iso639=None, denylist='', version='', exec='',
230                 first=False):
231        self.__path = path
232        self.__iso639 = iso639
233        self.__denylist = denylist
234        self.__version = version
235        self.__exec = exec
236        self.__first = first
237        self.__result = StringIO()
238        downstream = XMLGenerator(self.__result, 'utf-8')
239        self.__load(downstream)
240
241    def __load(self, downstream=None):
242        parser = sax_make_parser()
243        parser.setFeature(sax_feature_namespaces, 0)
244        self.__handler = EvdevXML(parser,
245                                  downstream,
246                                  self.__iso639,
247                                  self.__denylist,
248                                  self._AUTHOR,
249                                  self.__first)
250        parser.setContentHandler(self.__handler)
251        f = codecs.open(self.__path, 'r', encoding='utf-8')
252        try:
253            parser.parse(f)
254        except SAXParseException:
255            print('Error: Invalid file format: %s' % path)
256        finally:
257            f.close()
258    def write(self, output=None):
259        if output != None:
260            od = codecs.open(output, 'w', encoding='utf-8')
261        else:
262            if PY3K:
263                od = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
264            else:
265                od = codecs.getwriter('utf-8')(sys.stdout)
266        contents = self.__result.getvalue()
267        index = contents.find('<engines>')
268        if index >= 0:
269            author = escape(self._AUTHOR)
270            contents = '%s<component><name>%s</name>\
271<description>%s</description><exec>%s</exec><version>%s</version>\
272<author>%s</author><license>%s</license><homepage>%s</homepage>\
273<textdomain>%s</textdomain>%s</component>' % (
274        contents[:index],
275        self._NAME, self._DESCRIPTION,
276        self.__exec, self.__version, author, 'GPL',
277        self._HOMEPAGE, self._DOMAIN, contents[index:] )
278        parsed = minidom.parseString(contents)
279        # format with indent and encoding attribute in header
280        xml = parsed.toprettyxml(indent='    ', encoding='utf-8')
281        # convert byte to str
282        od.write(str(xml, 'utf-8'))
283        #od.write(contents)
284
285
286class ISO639XML(XMLFilterBase):
287    def __init__(self, parser=None):
288        self.__code2to1 = {}
289        self.__codetoname = {}
290        XMLFilterBase.__init__(self, parser)
291    def startElement(self, name, attrs):
292        if name != 'iso_639_entry':
293            return
294        n = attrs.get('name')
295        iso639_1 = attrs.get('iso_639_1_code')
296        iso639_2b = attrs.get('iso_639_2B_code')
297        iso639_2t = attrs.get('iso_639_2T_code')
298        if iso639_1 != None:
299            self.__codetoname[iso639_1] = n
300            if iso639_2b != None:
301                self.__code2to1[iso639_2b] = iso639_1
302                self.__codetoname[iso639_2b] = n
303            if iso639_2t != None and iso639_2b != iso639_2t:
304                self.__code2to1[iso639_2t] = iso639_1
305                self.__codetoname[iso639_2t] = n
306    def code2to1(self, iso639_2):
307        try:
308            return self.__code2to1[iso639_2]
309        except KeyError:
310            return None
311
312
313def parse_iso639(path):
314    f = codecs.open(path, 'r', encoding='utf-8')
315    parser = sax_make_parser()
316    parser.setFeature(sax_feature_namespaces, 0)
317    handler = ISO639XML(parser)
318    parser.setContentHandler(handler)
319    try:
320        parser.parse(f)
321    except SAXParseException:
322        print('Error: Invalid file format: %s' % path)
323    finally:
324        f.close()
325    return handler
326
327
328def parse_denylist(denyfile):
329    denylist = []
330    f = codecs.open(denyfile, 'r', encoding='utf-8')
331    for line in f.readlines():
332        if line == '\n' or line[0] == '#':
333            continue
334        line = line.rstrip()
335        entry = line.split(':')
336        if len(entry) != 4:
337            print('WARNING: format error: \'%s\' against \'%s\'' \
338                  % (line, 'xkb:layout:variant:lang'))
339            continue
340        denylist.append(entry)
341    f.close()
342    return denylist
343
344
345if __name__ == '__main__':
346    prgname = os.path.basename(sys.argv[0])
347    mydir = os.path.dirname(sys.argv[0])
348    try:
349        opts, args = getopt.getopt(sys.argv[1:],
350                                   'hi:o:V:e:I:1',
351                                   ['help', 'input=', 'output=', 'version=',
352                                    'exec-path=', 'iso-path=',
353                                    'first-language'])
354    except getopt.GetoptError as err:
355        print(err)
356        usage(prgname)
357        sys.exit(2)
358    if len(args) > 0:
359        usage(prgname)
360        sys.exit(2)
361    input = EVDEV_XML
362    output = None
363    version=VERSION
364    exec_path=EXEC_PATH
365    iso_path=ISO_PATH
366    first=False
367    for opt, arg in opts:
368        if opt in ('-h', '--help'):
369            usage(prgname)
370            sys.exit()
371        elif opt in ('-i', '--input'):
372            input = arg
373        elif opt in ('-o', '--output'):
374            output = arg
375        elif opt in ('-V', '--version'):
376            version = arg
377        elif opt in ('-e', '--exec-path'):
378            exec_path = arg
379        elif opt in ('-I', '--iso-path'):
380            iso_path = arg
381        elif opt in ('-1', '--first-langauge'):
382            first=True
383
384    iso639 = parse_iso639(iso_path)
385    denylist = parse_denylist('%s/%s' % ( mydir, 'denylist.txt'))
386    xml = GenerateEngineXML(input, iso639, denylist, version, exec_path, first)
387    xml.write(output)
388