1# -*- coding: utf8 -*-
2"""
3High-level Python bindings build on top of the low-level C API (clinkgrammar)
4See http://www.abisource.com/projects/link-grammar/api/index.html to get
5more information about C API.
6"""
7
8try:
9    #pylint: disable=no-name-in-module
10    import linkgrammar.clinkgrammar as clg
11except ImportError:
12    #pylint: disable=import-error
13    import clinkgrammar as clg
14
15Clinkgrammar = clg
16__all__ = ['ParseOptions', 'Dictionary', 'Link', 'Linkage', 'Sentence',
17           'LG_Error', 'LG_DictionaryError', 'LG_TimerExhausted', 'Clinkgrammar']
18
19# A decorator to ensure keyword-only arguments to __init__ (besides self).
20# In Python3 it can be done by using "*" as the second __init__ argument,
21# but here it is done with a decorator so it will work also in Python2.
22def kwargs_only(init):
23    def new_init(self_, *args, **kwargs):
24        if args:
25            raise TypeError("{}: Positional arguments are "
26                            "not allowed".format(self_.__class__.__name__))
27        return init(self_, **kwargs)
28    return new_init
29
30class ParseOptions(object):
31    @kwargs_only
32    def __init__(self, verbosity=0,
33                 linkage_limit=100,
34                 min_null_count=0,
35                 max_null_count=0,
36                 islands_ok=False,
37                 short_length=16,
38                 all_short_connectors=False,
39                 display_morphology=True,
40                 spell_guess=False,
41                 use_sat=False,
42                 max_parse_time=-1,
43                 disjunct_cost=2.7,
44                 repeatable_rand=True,
45                 test='',
46                 debug='',
47                 dialect='',
48                 ):
49
50        self._obj = clg.parse_options_create()
51        self.verbosity = verbosity
52        self.linkage_limit = linkage_limit
53        self.min_null_count = min_null_count
54        self.max_null_count = max_null_count
55        self.islands_ok = islands_ok
56        self.short_length = short_length
57        self.all_short_connectors = all_short_connectors
58        self.display_morphology = display_morphology
59        self.spell_guess = spell_guess
60        self.use_sat = use_sat
61        self.max_parse_time = max_parse_time
62        self.disjunct_cost = disjunct_cost
63        self.repeatable_rand = repeatable_rand
64        self.test = test
65        self.debug = debug
66        self.dialect = dialect
67
68    # Allow only the attribute names listed below.
69    def __setattr__(self, name, value):
70        if not hasattr(self, name) and name != "_obj":
71            # TypeError for consistency. It maybe should have been NameError.
72            raise TypeError('Unknown parse option "{}".'.format(name))
73        super(ParseOptions, self).__setattr__(name, value)
74
75    def __del__(self):
76        if hasattr(self, '_obj'):
77            clg.parse_options_delete(self._obj)
78            del self._obj
79
80    @property
81    def test(self):
82        return clg.parse_options_get_test(self._obj)
83
84    @test.setter
85    def test(self, value):
86        if not isinstance(value, str):
87            raise TypeError("test must be set to a string")
88        return clg.parse_options_set_test(self._obj, value)
89
90    @property
91    def debug(self):
92        return clg.parse_options_get_debug(self._obj)
93
94    @debug.setter
95    def debug(self, value):
96        if not isinstance(value, str):
97            raise TypeError("dialect must be set to a string")
98        return clg.parse_options_set_debug(self._obj, value)
99
100    @property
101    def dialect(self):
102        return clg.parse_options_get_dialect(self._obj)
103
104    @debug.setter
105    def dialect(self, value):
106        if not isinstance(value, str):
107            raise TypeError("dialect must be set to a string")
108        return clg.parse_options_set_dialect(self._obj, value)
109
110    @property
111    def verbosity(self):
112        """
113        This is the level of description printed to stderr/stdout about the
114        parsing process.
115        """
116        return clg.parse_options_get_verbosity(self._obj)
117
118    @verbosity.setter
119    def verbosity(self, value):
120        if not isinstance(value, int):
121            raise TypeError("verbosity must be set to an integer")
122        if value not in range(0,120):
123            raise ValueError("Verbosity levels can be any integer between 0 and 120 inclusive")
124        clg.parse_options_set_verbosity(self._obj, value)
125
126    @property
127    def linkage_limit(self):
128        """
129        This parameter determines the maximum number of linkages that are
130        considered in post-processing. If more than linkage_limit linkages
131        found, then a random sample of linkage_limit is chosen for
132        post-processing. When this happen a warning is displayed at verbosity
133        levels bigger than 1.
134        """
135        return clg.parse_options_get_linkage_limit(self._obj)
136
137    @linkage_limit.setter
138    def linkage_limit(self, value):
139        if not isinstance(value, int):
140            raise TypeError("linkage_limit must be set to an integer")
141        if value < 0:
142            raise ValueError("linkage_limit must be positive")
143        clg.parse_options_set_linkage_limit(self._obj, value)
144
145    @property
146    def disjunct_cost(self):
147        """
148        Determines the maximum disjunct cost used during parsing, where the
149        cost of a disjunct is equal to the maximum cost of all of its connectors.
150        The default is that only disjuncts of cost 2.7 or less are considered.
151        """
152        return clg.parse_options_get_disjunct_cost(self._obj)
153
154    @disjunct_cost.setter
155    def disjunct_cost(self, value):
156        if not isinstance(value, float):
157            raise TypeError("disjunct_cost must be set to a float")
158        clg.parse_options_set_disjunct_cost(self._obj, value)
159
160    @property
161    def min_null_count(self):
162        """
163         The minimum number of null links that a parse might have. A call to
164         sentence_parse will find all linkages having the minimum number of
165         null links within the range specified by this parameter.
166        """
167        return clg.parse_options_get_min_null_count(self._obj)
168
169    @min_null_count.setter
170    def min_null_count(self, value):
171        if not isinstance(value, int):
172            raise TypeError("min_null_count must be set to an integer")
173        if value < 0:
174            raise ValueError("min_null_count must be positive")
175        clg.parse_options_set_min_null_count(self._obj, value)
176
177    @property
178    def max_null_count(self):
179        """
180         The maximum number of null links that a parse might have. A call to
181         sentence_parse will find all linkages having the minimum number of
182         null links within the range specified by this parameter.
183        """
184        return clg.parse_options_get_max_null_count(self._obj)
185
186    @max_null_count.setter
187    def max_null_count(self, value):
188        if not isinstance(value, int):
189            raise TypeError("max_null_count must be set to an integer")
190        if value < 0:
191            raise ValueError("max_null_count must be positive")
192        clg.parse_options_set_max_null_count(self._obj, value)
193
194    @property
195    def short_length(self):
196        """
197         The short_length parameter determines how long the links are allowed
198         to be. The intended use of this is to speed up parsing by not
199         considering very long links for most connectors, since they are very
200         rarely used in a correct parse. An entry for UNLIMITED-CONNECTORS in
201         the dictionary will specify which connectors are exempt from the
202         length limit.
203        """
204        return clg.parse_options_get_short_length(self._obj)
205
206    @short_length.setter
207    def short_length(self, value):
208        if not isinstance(value, int):
209            raise TypeError("short_length must be set to an integer")
210        if value < 0:
211            raise ValueError("short_length must be positive")
212        clg.parse_options_set_short_length(self._obj, value)
213
214    @property
215    def islands_ok(self):
216        """
217        This option determines whether or not "islands" of links are allowed.
218        For example, the following linkage has an island:
219            +------Wd-----+
220            |     +--Dsu--+---Ss--+-Paf-+      +--Dsu--+---Ss--+--Pa-+
221            |     |       |       |     |      |       |       |     |
222          ///// this sentence.n is.v false.a this sentence.n is.v true.a
223        """
224        return clg.parse_options_get_islands_ok(self._obj) == 1
225
226    @islands_ok.setter
227    def islands_ok(self, value):
228        if not isinstance(value, bool):
229            raise TypeError("islands_ok must be set to a bool")
230        clg.parse_options_set_islands_ok(self._obj, 1 if value else 0)
231
232    @property
233    def max_parse_time(self):
234        """
235         Determines the approximate maximum time (in seconds) that parsing is
236         allowed to take. After this time has expired, the parsing process is
237         artificially forced to complete quickly by pretending that no further
238         solutions can be constructed. The actual parsing time might be
239         slightly longer.
240        """
241        return clg.parse_options_get_max_parse_time(self._obj)
242
243    @max_parse_time.setter
244    def max_parse_time(self, value):
245        if not isinstance(value, int):
246            raise TypeError("max_parse_time must be set to an integer")
247        clg.parse_options_set_max_parse_time(self._obj, value)
248
249    @property
250    def display_morphology(self):
251        """
252        Whether or not to show word morphology when a linkage diagram is printed.
253        """
254        return clg.parse_options_get_display_morphology(self._obj) == 1
255
256    @display_morphology.setter
257    def display_morphology(self, value):
258        if not isinstance(value, bool):
259            raise TypeError("display_morphology must be set to a bool")
260        clg.parse_options_set_display_morphology(self._obj, 1 if value else 0)
261
262    @property
263    def spell_guess(self):
264        """
265         If greater then 0, the spelling guesser is used on unknown words.
266         In that case, it performs at most this number of spell corrections
267         per word, and performs run-on corrections which are not limited in
268         their number. If 0 - the spelling guesser would not be used.
269        """
270        return clg.parse_options_get_spell_guess(self._obj)
271
272    @spell_guess.setter
273    def spell_guess(self, value):
274        """
275         If the value is an int, it is the maximum number of spell corrections
276         per word. If it is True, an int value of 7 is assumed. A value of
277         0 or False disables the spelling guesser.
278         In case the spelling guesser is not disabled, run-on corrections will
279         be issued too, not limited in their number.
280        """
281        if not isinstance(value, bool) and (not isinstance(value, int) or value < 0):
282            raise TypeError("spell_guess must be set to bool or a non-negative integer")
283        if isinstance(value, bool):
284            value = 7 if value else 0
285        clg.parse_options_set_spell_guess(self._obj, value)
286
287    @property
288    def use_sat(self):
289        """
290        To be used after enabling the use of the SAT solver in order to
291        validate that it is supported by the LG library.
292        """
293        return clg.parse_options_get_use_sat_parser(self._obj)
294
295    @use_sat.setter
296    def use_sat(self, value):
297        if not isinstance(value, bool):
298            raise TypeError("use_sat must be set to a bool")
299        clg.parse_options_set_use_sat_parser(self._obj, value)
300
301    @property
302    def all_short_connectors(self):
303        """
304         If true, then all connectors have length restrictions imposed on
305         them -- they can be no farther than short_length apart. This is
306         used when parsing in \"panic\" mode, for example.
307        """
308        return clg.parse_options_get_all_short_connectors(self._obj) == 1
309
310    @all_short_connectors.setter
311    def all_short_connectors(self, value):
312        if not isinstance(value, bool):
313            raise TypeError("all_short_connectors must be set to a bool")
314        clg.parse_options_set_all_short_connectors(self._obj, 1 if value else 0)
315
316    @property
317    def repeatable_rand(self):
318        """
319        If set to True, then a repeatable random sequence will be used, whenever
320        a random number is required.  The parser almost never uses random
321        numbers; currently they are only used in one place: to sample a subset
322        of linkages, if there are more parses than linkage_limit.
323        """
324        return clg.parse_options_get_repeatable_rand(self._obj) == 1
325
326    @repeatable_rand.setter
327    def repeatable_rand(self, value):
328        if not isinstance(value, bool):
329            raise TypeError("repeatable_rand must be set to a bool")
330        clg.parse_options_set_repeatable_rand(self._obj, 1 if value else 0)
331
332
333class LG_Error(Exception):
334    @staticmethod
335    def set_handler(ehandler_function, ehandler_data=None):
336        old_handler = clg._py_error_set_handler((ehandler_function, ehandler_data))
337        if isinstance(old_handler, str):
338            return LG_Error._default_handler
339        return old_handler
340
341    # lg_error_formatmsg is implemented as method "formatmsg" on errinfo
342    #@staticmethod
343    #def format(lgerror):
344    #    return clg.lg_error_formatmsg(lgerror)
345
346    @staticmethod
347    def flush():
348        return clg.lg_error_flush()
349
350    @staticmethod
351    def printall(ehandler_func, ehandler_data=None):
352        return clg._py_error_printall((ehandler_func, ehandler_data))
353
354    @staticmethod
355    def clearall():
356        return clg.lg_error_clearall()
357
358    @staticmethod
359    def message(msg):
360        """Print a message through the LG error facility"""
361        # Propagate a possible ending "\n" into the format, from which the LG
362        # error facility determine if this is a partial or a complete message.
363        if msg[-1:] == "\n":      # a newline-ended complete message
364            _local_eol = "\n"
365            msg = msg[:-1]
366        elif msg[-2:] == "\n\\":  # a newline-ended partial message
367            _local_eol = ""
368            msg = msg[:-1]
369        else:
370            _local_eol = ""       # a partial message
371
372        return clg._prt_error('%s'+_local_eol, msg)
373
374    @staticmethod
375    def _default_handler(errinfo, data):
376        # Exceptions (on data): TypeError, ValueError
377        clg._py_error_default_handler(errinfo, data)
378
379class LG_DictionaryError(LG_Error):
380    pass
381
382class Dictionary(object):
383    def __init__(self, lang='en'):
384        self._obj = clg.dictionary_create_lang(lang)
385        if not self._obj:
386            # We should get the error message from the library.
387            raise LG_DictionaryError('Error: Failed to open dictionary {!r}'.format(lang))
388
389    def __str__(self):
390        return clg.dictionary_get_lang(self._obj)
391
392    def __del__(self):
393        if hasattr(self, '_obj'):
394            clg.dictionary_delete(self._obj)
395            del self._obj
396
397    def __nonzero__(self):
398        """Return False iff the dictionary could not be opened or has been closed"""
399        if not hasattr(self, '_obj'):
400            return False
401        return bool(self._obj)
402
403    __bool__ = __nonzero__      # Account python3
404
405    def linkgrammar_get_dict_version(self):
406        return clg.linkgrammar_get_dict_version(self._obj)
407
408    def linkgrammar_get_dict_locale(self):
409        return clg.linkgrammar_get_dict_locale(self._obj)
410
411
412class Link(object):
413    def __init__(self, linkage, index, left_word, left_label, right_label, right_word):
414        self.linkage, self.index = linkage, index
415        self.left_word, self.right_word, self.left_label, self.right_label = \
416            left_word, right_word, left_label, right_label
417
418    def __eq__(self, other):
419        return self.left_word == other.left_word and self.left_label == other.left_label and \
420               self.right_word == other.right_word and self.right_label == other.right_label
421
422    def __str__(self):
423        if self.left_label == self.right_label:
424            return u"%s-%s-%s" % (self.left_word, self.left_label, self.right_word)
425        else:
426            return u"%s-%s-%s-%s" % (self.left_word, self.left_label, self.right_label, self.right_word)
427
428    def __unicode__(self):
429        return self.__str__()
430
431    def __repr__(self):
432        return u"Link: %s" % self.__str__()
433
434    def __len__(self):
435        return clg.linkage_get_link_length(self.linkage._obj, self.index)
436
437    def num_domains(self):
438        return clg.linkage_get_link_num_domains(self.linkage._obj, self.index)
439
440
441
442class Linkage(object):
443
444    def __init__(self, idx, sentence, parse_options):
445        # Keep all args passed into clg.* functions.
446        self.sentence, self.parse_options = sentence, parse_options
447        self._obj = clg.linkage_create(idx, sentence._obj, parse_options)
448
449    def __del__(self):
450        if hasattr(self, '_obj'):
451            clg.linkage_delete(self._obj)
452            del self._obj
453
454    def __nonzero__(self):
455        """Return False for SAT sentinel value (NULL); else return True."""
456        return bool(self._obj)
457
458    __bool__ = __nonzero__      # Account python3
459
460
461    def num_of_words(self):
462        return clg.linkage_get_num_words(self._obj)
463
464    def num_of_links(self):
465        return clg.linkage_get_num_links(self._obj)
466
467    def words(self):
468        for i in range(self.num_of_words()):
469            yield self.word(i)
470
471    def word(self, i):
472        return clg.linkage_get_word(self._obj, i)
473
474    def unused_word_cost(self):
475        return clg.linkage_unused_word_cost(self._obj)
476
477    def link_cost(self):
478        return clg.linkage_link_cost(self._obj)
479
480    def disjunct_cost(self):
481        return clg.linkage_disjunct_cost(self._obj)
482
483    def link(self, i):
484        return Link(self, i, self.word(clg.linkage_get_link_lword(self._obj, i)),
485                    clg.linkage_get_link_llabel(self._obj, i),
486                    clg.linkage_get_link_rlabel(self._obj, i),
487                    self.word(clg.linkage_get_link_rword(self._obj, i)))
488
489    def links(self):
490        for i in range(self.num_of_links()):
491            yield self.link(i)
492
493    def violation_name(self):
494        return clg.linkage_get_violation_name(self._obj)
495
496    def diagram(self, display_walls=False, screen_width=180):
497        return clg.linkage_print_diagram(self._obj, display_walls, screen_width)
498
499    def postscript(self, display_walls=True, print_ps_header=False):
500        return clg.linkage_print_postscript(self._obj, display_walls, print_ps_header)
501
502    def constituent_tree(self, mode=1):
503        return clg.linkage_print_constituent_tree(self._obj, mode)
504
505    def word_byte_start(self, w):
506        return clg.linkage_get_word_byte_start(self._obj, w)
507
508    def word_byte_end(self, w):
509        return clg.linkage_get_word_byte_end(self._obj, w)
510
511    def word_char_start(self, w):
512        return clg.linkage_get_word_char_start(self._obj, w)
513
514    def word_char_end(self, w):
515        return clg.linkage_get_word_char_end(self._obj, w)
516
517
518class LG_TimerExhausted(LG_Error):
519    pass
520
521class Sentence(object):
522    """
523    sent = Sentence("This is a test.", Dictionary(), ParseOptions())
524    # split() before parse() is optional.
525    # split() has ParseOptions as an optional argument
526    # (defaults to that of Sentence)
527    if sent.split(ParseOptions(verbosity=2)) < 0:
528        print "Cannot split sentence"
529    else
530        linkages = sent.parse()
531        print "English: found ", sent.num_valid_linkages(), "linkages"
532        for linkage in linkages:
533            print linkage.diagram()
534    """
535    def __init__(self, text, lgdict, parse_options):
536        # Keep all args passed into clg.* functions.
537        self.text, self.dict, self.parse_options = text, lgdict, parse_options
538        clg._py_incref(self.dict) # The Sentence struct refers to the Dictionary struct
539        self._obj = clg.sentence_create(self.text, self.dict._obj)
540
541    def __del__(self):
542        if hasattr(self, '_obj'):
543            clg.sentence_delete(self._obj)
544            clg._py_decref(self.dict)
545            del self._obj
546
547    def split(self, parse_options=None):
548        """Split a sentence. If an error occurs, return a negative number."""
549        if not parse_options:
550            parse_options = self.parse_options
551        return clg.sentence_split(self._obj, parse_options._obj)
552
553    def __len__(self):
554        """The number of tokens in the sentence."""
555        return clg.sentence_length(self._obj)
556
557    def null_count(self):
558        """Number of null links in the linkages of this sentence."""
559        return clg.sentence_null_count(self._obj)
560
561    class sentence_parse(object):
562        def __init__(self, sent, parse_options):
563            self.sent = sent
564            self.num = 0
565            self.parse_options = sent.parse_options if parse_options is None else parse_options
566            self.rc = clg.sentence_parse(sent._obj, self.parse_options._obj)
567            if clg.parse_options_timer_expired(self.parse_options._obj):
568                raise LG_TimerExhausted()
569
570        def __nonzero__(self):
571            """Return False if there was a split or parse error; else return True."""
572            return self.rc >= 0
573
574        __bool__ = __nonzero__      # Account python3
575
576        def __iter__(self):
577            if 0 == clg.sentence_num_valid_linkages(self.sent._obj):
578                return iter(())
579            return self
580
581        def __len__(self):
582            return clg.sentence_num_valid_linkages(self.sent._obj)
583
584        def next(self):
585            if self.num == clg.sentence_num_valid_linkages(self.sent._obj):
586                raise StopIteration()
587            linkage = Linkage(self.num, self.sent, self.parse_options._obj)
588            if not linkage:  # SAT sentinel value
589                raise StopIteration()
590            self.num += 1
591            return linkage
592
593        __next__ = next             # Account python3
594
595    def parse(self, parse_options=None):
596        return self.sentence_parse(self, parse_options)
597