1#!/usr/bin/env python3 2""" 3Note: This only runs with Python3! 4 5Demo: Find unlinked or unknown words. 6These demo is extremely simplified. 7It can only work with link-grammar library version >= 5.3.10. 8Input: English sentences, one per line. 9Output: If there are any []-marked words in the linkage results, 10the output contains unique combinations of the input sentence with 11these works marked. No attempt is done to handle the walls. 12Spell guesses are not handled in this demo. 13 14Example: 15This is a the test of bfgiuing and xxxvfrg 16Output: 17Sentence has 1 unlinked word: 181: LEFT-WALL this.p is.v [a] the test.n of bfgiuing[!].g and.j-n xxxvfrg[?].n RIGHT-WALL 192: LEFT-WALL this.p is.v a [the] test.n of bfgiuing[!].g and.j-n xxxvfrg[?].n RIGHT-WALL 203: LEFT-WALL this.p is.v [a] the test.n of bfgiuing[!].g and.j-n xxxvfrg[?].a RIGHT-WALL 214: LEFT-WALL this.p is.v a [the] test.n of bfgiuing[!].g and.j-n xxxvfrg[?].a RIGHT-WALL 22""" 23 24from __future__ import print_function 25import sys 26from sys import stdin 27import re 28import argparse 29import readline 30 31from linkgrammar import (Sentence, ParseOptions, Dictionary, 32 LG_Error, LG_TimerExhausted, Clinkgrammar as clg) 33 34def nsuffix(q): 35 return '' if q == 1 else 's' 36 37class Formatter(argparse.HelpFormatter): 38 """ Display the "lang" argument as a first one, as in link-parser. """ 39 def _format_usage(self, usage, actions, groups, prefix): 40 usage_message = super(Formatter, self)._format_usage(usage, actions, groups, prefix) 41 return re.sub(r'(usage: \S+) (.*) \[lang]', r'\1 [lang] \2', str(usage_message)) 42 43#-----------------------------------------------------------------------------# 44 45is_stdin_atty = sys.stdin.isatty() 46 47PROMPT = "sentence-check: " if is_stdin_atty else "" 48DISPLAY_GUESSES = True # Display regex and POS guesses 49BATCH_LABELS = '*: ' 50 51print ("Version:", clg.linkgrammar_get_version()) 52 53args = argparse.ArgumentParser(formatter_class=Formatter) 54args.add_argument('lang', nargs='?', default='en', 55 help="language or dictionary location") 56args.add_argument("-v", "--verbosity", type=int,default=0, 57 choices=range(0,199), metavar='[0-199]', 58 help= "1: Basic verbosity; 2-4: Trace; >5: Debug") 59args.add_argument("-p", "--position", action="store_true", 60 help="show word sentence position") 61args.add_argument("-nm", "--no-morphology", dest='morphology', action='store_false', 62 help="do not display morphology") 63args.add_argument("-i", "--interactive", action="store_true", 64 help="interactive mode after each result") 65 66arg = args.parse_args() 67 68try: 69 lgdict = Dictionary(arg.lang) 70except LG_Error: 71 # The default error handler will print the error message 72 args.print_usage() 73 sys.exit(2) 74 75po = ParseOptions(verbosity=arg.verbosity) 76 77po.max_null_count = 999 # > allowed maximum number of words 78po.linkage_limit = 10000 # maximum number of linkages to generate 79po.max_parse_time = 10 # actual parse timeout may be about twice bigger 80po.spell_guess = True if DISPLAY_GUESSES else False 81po.display_morphology = arg.morphology 82 83while True: 84 try: 85 sentence_text = input(PROMPT) 86 except EOFError: 87 print("EOF") 88 exit(0) 89 90 if not is_stdin_atty and sentence_text: 91 if sentence_text[0] == '%': 92 continue 93 if sentence_text[0] == '!': # ignore user-settings for now 94 continue 95 if sentence_text[0] in BATCH_LABELS: 96 sentence_text = sentence_text[1:] 97 if sentence_text.strip() == '': 98 continue 99 if not is_stdin_atty: 100 print("\n" + sentence_text) 101 102 sent = Sentence(str(sentence_text), lgdict, po) 103 try: 104 linkages = sent.parse() 105 except LG_TimerExhausted: 106 print('Sentence too complex for parsing in ~{} second{}.'.format( 107 po.max_parse_time,nsuffix(po.max_parse_time))) 108 continue 109 if not linkages: 110 print('Error occurred - sentence ignored.') 111 continue 112 if len(linkages) <= 0: 113 print('Cannot parse the input sentence') 114 continue 115 null_count = sent.null_count() 116 117 if arg.position: 118 print(' ' * len(PROMPT), end='') 119 for p in range (0, len(sentence_text)): 120 print(p%10, end="") 121 print() 122 123 if null_count == 0: 124 print("Sentence parsed OK", end='') 125 126 linkages = list(linkages) 127 128 correction_found = False 129 # search for correction suggestions 130 for l in linkages: 131 for word in l.words(): 132 if word.find(r'.#') > 0: 133 correction_found = True 134 break 135 if correction_found: 136 break 137 138 if correction_found: 139 print(" - with correction", end='') 140 if null_count == 0: 141 print(".") 142 143 guess_found = False 144 if DISPLAY_GUESSES: 145 # Check the first linkage for regexed/unknown words 146 for word in linkages[0].words(): 147 # search for something[x] 148 if re.search(r'\S+\[[^]]+]', word): 149 guess_found = True 150 break 151 152 # Show results with unlinked words or guesses 153 if arg.position or guess_found or correction_found or null_count != 0: 154 print('Sentence has {} unlinked word{}:'.format( 155 null_count, nsuffix(null_count))) 156 result_no = 0 157 uniqe_parse = {} 158 for linkage in linkages: 159 words = list(linkage.words()) 160 if str(words) in uniqe_parse: 161 continue 162 result_no += 1 163 uniqe_parse[str(words)] = True 164 165 if arg.position: 166 words_char = [] 167 words_byte = [] 168 for wi, w in enumerate(words): 169 words_char.append(w + str((linkage.word_char_start(wi), linkage.word_char_end(wi)))) 170 words_byte.append(w + str((linkage.word_byte_start(wi), linkage.word_byte_end(wi)))) 171 172 print(u"{}: {}".format(result_no, ' '.join(words_char))) 173 print(u"{}: {}".format(result_no, ' '.join(words_byte))) 174 else: 175 print("{}: {}".format(result_no, ' '.join(words))) 176 177 if arg.interactive: 178 print("Interactive session (^D to end):") 179 import code 180 code.interact(local=locals()) 181