1# coding=utf-8
2"""
3Python library for ISO 639 standard
4
5Copyright (c) 2014-2016 Mikael Karlsson (CSC - IT Center for Science Ltd.).
6Licensed under AGPLv3.
7"""
8
9# Fix for Python 3.0 - 3.2
10if not __package__:
11    __package__ = __name__.split('.')[0]
12
13
14def _fabtabular():
15    """
16    This function retrieves the ISO 639 and inverted names datasets as tsv files and returns them as lists.
17    """
18    import csv
19    import sys
20    from pkg_resources import resource_filename
21
22    data = resource_filename(__package__, 'iso-639-3.tab')
23    inverted = resource_filename(__package__, 'iso-639-3_Name_Index.tab')
24    macro = resource_filename(__package__, 'iso-639-3-macrolanguages.tab')
25    part5 = resource_filename(__package__, 'iso639-5.tsv')
26    part2 = resource_filename(__package__, 'iso639-2.tsv')
27    part1 = resource_filename(__package__, 'iso639-1.tsv')
28
29    # if sys.version_info[0] == 2:
30    #     from urllib2 import urlopen
31    #     from contextlib import closing
32    #     data_fo = closing(urlopen('http://www-01.sil.org/iso639-3/iso-639-3.tab'))
33    #     inverted_fo = closing(urlopen('http://www-01.sil.org/iso639-3/iso-639-3_Name_Index.tab'))
34    # else:
35    #     from urllib.request import urlopen
36    #     import io
37    #     data_fo = io.StringIO(urlopen('http://www-01.sil.org/iso639-3/iso-639-3.tab').read().decode())
38    #     inverted_fo = io.StringIO(urlopen('http://www-01.sil.org/iso639-3/iso-639-3_Name_Index.tab').read().decode())
39
40    if sys.version_info[0] == 3:
41        from functools import partial
42
43        global open
44        open = partial(open, encoding='utf-8')
45
46    data_fo = open(data)
47    inverted_fo = open(inverted)
48    macro_fo = open(macro)
49    part5_fo = open(part5)
50    part2_fo = open(part2)
51    part1_fo = open(part1)
52    with data_fo as u:
53        with inverted_fo as i:
54            with macro_fo as m:
55                with part5_fo as p5:
56                    with part2_fo as p2:
57                        with part1_fo as p1:
58                            return (list(csv.reader(u, delimiter='\t'))[1:],
59                                    list(csv.reader(i, delimiter='\t'))[1:],
60                                    list(csv.reader(m, delimiter='\t'))[1:],
61                                    list(csv.reader(p5, delimiter='\t'))[1:],
62                                    list(csv.reader(p2, delimiter='\t'))[1:],
63                                    list(csv.reader(p1, delimiter='\t'))[1:])
64
65
66class _Language(object):
67    """
68    This class represents a language. It provides pycountry language class compatibility.
69    """
70
71    def __init__(self, part3, part2b, part2t, part1, name, inverted, macro, names, part5):
72        self.part3 = part3
73        self.part2b = part2b
74        self.part2t = part2t
75        self.part1 = part1
76        self.name = name
77        self.inverted = inverted
78        self.macro = macro
79        self.names = names
80        self.part5 = part5
81
82    def __getattr__(self, item):
83        compat = {
84            'alpha2': self.part1,
85            'bibliographic': self.part2b,
86            'terminology': self.part2t,
87        }
88        if item not in compat:
89            raise AttributeError("'{o}' object has no attribute '{a}'".format(o=type(self).__name__, a=item))
90        return compat[item]
91
92
93class lazy_property(object):
94    """
95    Implements a lazy property decorator, that overwrites itself/property with value
96    """
97
98    def __init__(self, f):
99        self.f = f
100        self.name = f.__name__
101
102    def __get__(self, instance, owner=None):
103        if instance is None:
104            return self
105        val = self.f(instance)
106        setattr(instance, self.name, val)
107        return val
108
109
110class Iso639(object):
111    """
112    This class is a close to drop-in replacement for pycountry.languages.
113    But unlike pycountry.languages it also supports ISO 639-3.
114
115    It implements the Singleton design pattern for performance reasons.
116    Is uses lazy properties for faster import time.
117    """
118
119    def __new__(cls):
120        if not hasattr(cls, '__instance'):
121            setattr(cls, '__instance', super(cls, cls).__new__(cls))
122        return getattr(cls, '__instance')
123
124    def __len__(self):
125        return len(self.languages)
126
127    def __iter__(self):
128        return iter(self.languages)
129
130    def __getattr__(self, item):
131        compat = {
132            'alpha2': self.part1,
133            'bibliographic': self.part2b,
134            'terminology': self.part2t,
135        }
136        if item not in compat:
137            raise AttributeError("'{o}' object has no attribute '{a}'".format(o=type(self).__name__, a=item))
138        return compat[item]
139
140    @lazy_property
141    def languages(self):
142        def generate():
143            # All of part3 and matching part2
144            for a, b, c, d, _, _, e, _ in l:
145                inv = alt[a].pop(e)
146                yield _Language(a, b, c,
147                                d if d in p1c else '',  # Fixes 'sh'
148                                e, inv,
149                                m.get(a, [''])[0],
150                                list(alt[a].items()),
151                                '')
152                p2.pop(b, None)
153                p2.pop(c, None)
154
155            # All of part5 and matching part2
156            for _, a, b, _ in p5:
157                yield _Language('',
158                                a if a in p2 else '',
159                                a if a in p2 else '',
160                                p1n.get(b, ['', ''])[1],
161                                b, '', '', '', a)
162                p2.pop(a, None)
163
164            # Rest of part2
165            p2.pop('qaa-qtz', None)  # Is not a real code, but a range
166            for _, a, b, _ in p2.values():
167                n = [x.strip() for x in b.split('|')]
168                yield _Language('', a, a,
169                                p1n.get(b, ['', ''])[1],
170                                n[0], '', '', zip(n[1:], n[1:]), '')
171
172        import collections
173
174        l, i, m, p5, p2, p1 = _fabtabular()
175        alt = collections.defaultdict(dict)
176        for x in i:
177            alt[x[0]][x[1]] = x[2]
178        m = dict((x[1], x) for x in m)
179        p2 = dict((x[1], x) for x in p2)
180        p1c = dict((x[1], x) for x in p1)
181        p1n = dict((x[2].split('|')[0].strip(), x) for x in p1)
182        return list(generate())
183
184    @lazy_property
185    def part3(self):
186        return dict((x.part3, x) for x in self.languages if x.part3)
187
188    @lazy_property
189    def part2b(self):
190        return dict((x.part2b, x) for x in self.languages if x.part2b)
191
192    @lazy_property
193    def part2t(self):
194        return dict((x.part2t, x) for x in self.languages if x.part2t)
195
196    @lazy_property
197    def part1(self):
198        return dict((x.part1, x) for x in self.languages if x.part1)
199
200    @lazy_property
201    def part5(self):
202        return dict((x.part5, x) for x in self.languages if x.part5)
203
204    @lazy_property
205    def name(self):
206        def gen():
207            for x in self.languages:
208                if x.name:
209                    yield x.name, x
210                for n in x.names:
211                    yield n[0], x
212
213        return dict(gen())
214
215    @lazy_property
216    def inverted(self):
217        return dict((x.inverted, x) for x in self.languages if x.inverted)
218
219    @lazy_property
220    def macro(self):
221        import collections
222
223        m = collections.defaultdict(list)
224        for x in self.languages:
225            if x.macro:
226                m[x.macro].append(x)
227        return dict(m)
228
229    @lazy_property
230    def retired(self):
231        """
232        Function for generating retired languages. Returns a dict('code', (datetime, [language, ...], 'description')).
233        """
234
235        def gen():
236            import csv
237            import re
238            from datetime import datetime
239            from pkg_resources import resource_filename
240
241            with open(resource_filename(__package__, 'iso-639-3_Retirements.tab')) as rf:
242                rtd = list(csv.reader(rf, delimiter='\t'))[1:]
243                rc = [r[0] for r in rtd]
244                for i, _, _, m, s, d in rtd:
245                    d = datetime.strptime(d, '%Y-%m-%d')
246                    if not m:
247                        m = re.findall('\[([a-z]{3})\]', s)
248                    if m:
249                        m = [m] if isinstance(m, str) else m
250                        yield i, (d, [self.get(part3=x) for x in m if x not in rc], s)
251                    else:
252                        yield i, (d, [], s)
253
254            yield 'sh', self.get(part3='hbs')  # Add 'sh' as deprecated
255
256        return dict(gen())
257
258    def get(self, **kwargs):
259        """
260        Simple getter function for languages. Takes 1 keyword/value and returns 1 language object.
261        """
262        if not len(kwargs) == 1:
263            raise AttributeError('Only one keyword expected')
264        key, value = kwargs.popitem()
265        return getattr(self, key)[value]
266