1#!/usr/bin/env python3
2"""
3Note: This only runs with Python3!
4
5Demo: Find unlinked or unknown words.
6These demo is extremely simplified.
7It can only work with link-grammar library version >= 5.3.10.
8Input: English sentences, one per line.
9Output: If there are any []-marked words in the linkage results,
10the output contains unique combinations of the input sentence with
11these works marked.  No attempt is done to handle the walls.
12Spell guesses are not handled in this demo.
13
14Example:
15This is a the test of bfgiuing and xxxvfrg
16Output:
17Sentence has 1 unlinked word:
181: LEFT-WALL this.p is.v [a] the test.n of bfgiuing[!].g and.j-n xxxvfrg[?].n RIGHT-WALL
192: LEFT-WALL this.p is.v a [the] test.n of bfgiuing[!].g and.j-n xxxvfrg[?].n RIGHT-WALL
203: LEFT-WALL this.p is.v [a] the test.n of bfgiuing[!].g and.j-n xxxvfrg[?].a RIGHT-WALL
214: LEFT-WALL this.p is.v a [the] test.n of bfgiuing[!].g and.j-n xxxvfrg[?].a RIGHT-WALL
22"""
23
24from __future__ import print_function
25import sys
26from sys import stdin
27import re
28import argparse
29import readline
30
31from linkgrammar import (Sentence, ParseOptions, Dictionary,
32                         LG_Error, LG_TimerExhausted, Clinkgrammar as clg)
33
34def nsuffix(q):
35    return '' if q == 1 else 's'
36
37class Formatter(argparse.HelpFormatter):
38    """ Display the "lang" argument as a first one, as in link-parser. """
39    def _format_usage(self, usage, actions, groups, prefix):
40        usage_message = super(Formatter, self)._format_usage(usage, actions, groups, prefix)
41        return re.sub(r'(usage: \S+) (.*) \[lang]', r'\1 [lang] \2', str(usage_message))
42
43#-----------------------------------------------------------------------------#
44
45is_stdin_atty = sys.stdin.isatty()
46
47PROMPT = "sentence-check: " if is_stdin_atty else ""
48DISPLAY_GUESSES = True   # Display regex and POS guesses
49BATCH_LABELS = '*: '
50
51print ("Version:", clg.linkgrammar_get_version())
52
53args = argparse.ArgumentParser(formatter_class=Formatter)
54args.add_argument('lang', nargs='?', default='en',
55                  help="language or dictionary location")
56args.add_argument("-v", "--verbosity", type=int,default=0,
57                  choices=range(0,199), metavar='[0-199]',
58                  help= "1: Basic verbosity; 2-4: Trace; >5: Debug")
59args.add_argument("-p", "--position", action="store_true",
60                  help="show word sentence position")
61args.add_argument("-nm", "--no-morphology", dest='morphology', action='store_false',
62                  help="do not display morphology")
63args.add_argument("-i", "--interactive", action="store_true",
64                  help="interactive mode after each result")
65
66arg = args.parse_args()
67
68try:
69    lgdict = Dictionary(arg.lang)
70except LG_Error:
71    # The default error handler will print the error message
72    args.print_usage()
73    sys.exit(2)
74
75po = ParseOptions(verbosity=arg.verbosity)
76
77po.max_null_count = 999  # > allowed maximum number of words
78po.linkage_limit = 10000 # maximum number of linkages to generate
79po.max_parse_time = 10   # actual parse timeout may be about twice bigger
80po.spell_guess = True if DISPLAY_GUESSES else False
81po.display_morphology = arg.morphology
82
83while True:
84    try:
85        sentence_text = input(PROMPT)
86    except EOFError:
87        print("EOF")
88        exit(0)
89
90    if not is_stdin_atty and sentence_text:
91        if sentence_text[0] == '%':
92            continue
93        if sentence_text[0] == '!': # ignore user-settings for now
94            continue
95        if sentence_text[0] in BATCH_LABELS:
96            sentence_text = sentence_text[1:]
97    if sentence_text.strip() == '':
98        continue
99    if not is_stdin_atty:
100        print("\n" + sentence_text)
101
102    sent = Sentence(str(sentence_text), lgdict, po)
103    try:
104        linkages = sent.parse()
105    except LG_TimerExhausted:
106        print('Sentence too complex for parsing in ~{} second{}.'.format(
107            po.max_parse_time,nsuffix(po.max_parse_time)))
108        continue
109    if not linkages:
110        print('Error occurred - sentence ignored.')
111        continue
112    if len(linkages) <= 0:
113        print('Cannot parse the input sentence')
114        continue
115    null_count = sent.null_count()
116
117    if arg.position:
118        print(' ' * len(PROMPT), end='')
119        for p in range (0, len(sentence_text)):
120            print(p%10, end="")
121        print()
122
123    if null_count == 0:
124        print("Sentence parsed OK", end='')
125
126    linkages = list(linkages)
127
128    correction_found = False
129    # search for correction suggestions
130    for l in linkages:
131        for word in l.words():
132            if word.find(r'.#') > 0:
133                correction_found = True
134                break
135        if correction_found:
136            break
137
138    if correction_found:
139        print(" - with correction", end='')
140    if null_count == 0:
141        print(".")
142
143    guess_found = False
144    if DISPLAY_GUESSES:
145        # Check the first linkage for regexed/unknown words
146        for word in linkages[0].words():
147            # search for something[x]
148            if re.search(r'\S+\[[^]]+]', word):
149                guess_found = True
150                break
151
152    # Show results with unlinked words or guesses
153    if arg.position or guess_found or correction_found or null_count != 0:
154        print('Sentence has {} unlinked word{}:'.format(
155            null_count, nsuffix(null_count)))
156        result_no = 0
157        uniqe_parse = {}
158        for linkage in linkages:
159            words = list(linkage.words())
160            if str(words) in uniqe_parse:
161                continue
162            result_no += 1
163            uniqe_parse[str(words)] = True
164
165            if arg.position:
166                words_char = []
167                words_byte = []
168                for wi, w in enumerate(words):
169                    words_char.append(w + str((linkage.word_char_start(wi), linkage.word_char_end(wi))))
170                    words_byte.append(w + str((linkage.word_byte_start(wi), linkage.word_byte_end(wi))))
171
172                print(u"{}: {}".format(result_no, ' '.join(words_char)))
173                print(u"{}: {}".format(result_no, ' '.join(words_byte)))
174            else:
175                print("{}: {}".format(result_no, ' '.join(words)))
176
177    if arg.interactive:
178        print("Interactive session (^D to end):")
179        import code
180        code.interact(local=locals())
181