1#!/usr/bin/python
2# vim:set fileencoding=utf-8 et sts=4 sw=4:
3#
4# ibus - Intelligent Input Bus for Linux / Unix OS
5#
6# Copyright © 2016 Takao Fujiwara <takao.fujiwara1@gmail.com>
7#
8# This library is free software; you can redistribute it and/or
9# modify it under the terms of the GNU Lesser General Public
10# License as published by the Free Software Foundation; either
11# version 2.1 of the License, or (at your option) any later version.
12#
13# This library is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16# Lesser General Public License for more details.
17#
18# You should have received a copy of the GNU Lesser General Public
19# License along with this library. If not, see <http://www.gnu.org/licenses/>.
20
21
22# This script converts ISO 639-2 of three characters to ISO 639-1 of two
23# characters in simple.xml.
24# E.g. "eng" to "en"
25
26
27from xml.sax import make_parser as sax_make_parser
28from xml.sax.handler import feature_namespaces as sax_feature_namespaces
29from xml.sax.saxutils import XMLFilterBase, XMLGenerator
30from xml.sax._exceptions import SAXParseException
31
32import codecs
33import getopt
34import io
35import os
36import sys
37
38INSTALLED_SIMPLE_XML = '/usr/share/ibus/component/simple.xml'
39PY3K = sys.version_info >= (3, 0)
40
41if PY3K:
42    from io import StringIO
43else:
44    # io.StringIO does not work with XMLGenerator
45    from cStringIO import StringIO
46    # iso_639.xml includes UTF-8
47    reload(sys)
48    sys.setdefaultencoding('utf-8')
49
50
51def usage(prgname):
52    print('''\
53Usage:
54  %s [OPTION...]
55
56Options:
57  -h, --help                         Show this message
58  -i, --input=SIMPLE_XML             Load SIMPLE_XML file (default is:
59                                         %s)
60  -o, --output=FILE                  Output FILE (default is stdout)
61''' % (prgname, INSTALLED_SIMPLE_XML))
62
63
64class ISO639XML(XMLFilterBase):
65    def __init__(self, parser=None):
66        self.__code2to1 = {}
67        self.__codetoname = {}
68        XMLFilterBase.__init__(self, parser)
69    def startElement(self, name, attrs):
70        if name != 'iso_639_entry':
71            return
72        n = attrs.get('name')
73        iso639_1 = attrs.get('iso_639_1_code')
74        iso639_2b = attrs.get('iso_639_2B_code')
75        iso639_2t = attrs.get('iso_639_2T_code')
76        if iso639_1 != None:
77            self.__codetoname[iso639_1] = n
78            if iso639_2b != None:
79                self.__code2to1[iso639_2b] = iso639_1
80                self.__codetoname[iso639_2b] = n
81            if iso639_2t != None and iso639_2b != iso639_2t:
82                self.__code2to1[iso639_2t] = iso639_1
83                self.__codetoname[iso639_2t] = n
84    def code2to1(self, iso639_2):
85        try:
86            return self.__code2to1[iso639_2]
87        except KeyError:
88            return None
89
90
91class IBusComponentXML(XMLFilterBase):
92    def __init__(self, parser=None, downstream=None, iso639=None):
93        XMLFilterBase.__init__(self, parser)
94        self.__downstream = downstream
95        self.__iso639 = iso639
96        self.__is_language = False
97    def startDocument(self):
98        if self.__downstream:
99            self.__downstream.startDocument()
100    def endDocument(self):
101        if self.__downstream:
102            self.__downstream.endDocument()
103    def startElement(self, name, attrs):
104        if name == 'language':
105            self.__is_language = True
106        if self.__downstream:
107            self.__downstream.startElement(name, attrs)
108    def endElement(self, name):
109        if name == 'language':
110            self.__is_language = False
111        if self.__downstream:
112            self.__downstream.endElement(name)
113    def characters(self, text):
114        if self.__is_language:
115            if self.__iso639:
116                iso639_1 = self.__iso639.code2to1(text)
117                if iso639_1 != None:
118                    text = iso639_1
119        if self.__downstream:
120            self.__downstream.characters(text)
121
122
123class ConvertEngineXML():
124    def __init__(self, path, iso639=None):
125        self.__path = path
126        self.__iso639 = iso639
127
128        self.__result = StringIO()
129        downstream = XMLGenerator(self.__result, 'utf-8')
130        self.__load(downstream)
131
132    def __load(self, downstream=None):
133        parser = sax_make_parser()
134        parser.setFeature(sax_feature_namespaces, 0)
135        self.__handler = IBusComponentXML(parser, downstream, self.__iso639)
136        parser.setContentHandler(self.__handler)
137        f = codecs.open(self.__path, 'r', encoding='utf-8')
138        try:
139            parser.parse(f)
140        except SAXParseException:
141            print('Error: Invalid file format: %s' % path)
142        finally:
143            f.close()
144    def write(self, output=None):
145        if output != None:
146            od = codecs.open(output, 'w', encoding='utf-8')
147        else:
148            if PY3K:
149                od = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
150            else:
151                od = codecs.getwriter('utf-8')(sys.stdout)
152        contents = self.__result.getvalue()
153        od.write(contents)
154
155
156def parse_iso639(path):
157    f = codecs.open(path, 'r', encoding='utf-8')
158    parser = sax_make_parser()
159    parser.setFeature(sax_feature_namespaces, 0)
160    handler = ISO639XML(parser)
161    parser.setContentHandler(handler)
162    try:
163        parser.parse(f)
164    except SAXParseException:
165        print('Error: Invalid file format: %s' % path)
166    finally:
167        f.close()
168    return handler
169
170
171if __name__ == '__main__':
172    prgname = os.path.basename(sys.argv[0])
173    try:
174        opts, args = getopt.getopt(sys.argv[1:],
175                                   'hi:o:',
176                                   ['help', 'input=', 'output='])
177    except getopt.GetoptError as err:
178        print(err)
179        usage(prgname)
180        sys.exit(2)
181    if len(args) > 0:
182        usage(prgname)
183        sys.exit(2)
184    input = INSTALLED_SIMPLE_XML
185    output = None
186    for opt, arg in opts:
187        if opt in ('-h', '--help'):
188            usage(prgname)
189            sys.exit()
190        elif opt in ('-i', '--input'):
191            input = arg
192        elif opt in ('-o', '--output'):
193            output = arg
194
195    iso639 = parse_iso639('/usr/share/xml/iso-codes/iso_639.xml')
196    xml = ConvertEngineXML(input, iso639)
197    xml.write(output)
198