1#!/usr/bin/env python 2# coding: utf8 3"""Python3 link-grammar test script""" 4 5from __future__ import print_function 6import sys, os, re 7import locale 8import unittest 9 10# assertRaisesRegexp and assertRegexpMatches have been renamed in 11# unittest for python 3, but not in python 2 (at least yet). 12if hasattr(unittest.TestCase, 'assertRaisesRegex'): 13 unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex 14 unittest.TestCase.assertRegexpMatches = unittest.TestCase.assertRegex 15 16import lg_testutils # Found in the same directory of this test script 17 18# Show information on this program run 19print('Running by:', sys.executable) 20print('Running {} in:'.format(sys.argv[0]), os.getcwd()) 21for v in 'PYTHONPATH', 'srcdir', 'LINK_GRAMMAR_DATA': 22 print('{}={}'.format(v, os.environ.get(v))) 23#=== 24 25from linkgrammar import (Sentence, Linkage, ParseOptions, Link, Dictionary, 26 LG_Error, LG_DictionaryError, LG_TimerExhausted, 27 Clinkgrammar as clg) 28 29print(clg.linkgrammar_get_configuration()) 30 31# Show the location and version of the bindings modules 32for imported_module in 'linkgrammar$', 'clinkgrammar', '_clinkgrammar', 'lg_testutils': 33 module_found = False 34 for module in sys.modules: 35 if re.search(r'^(linkgrammar\.)?'+imported_module, module): 36 print("Using", sys.modules[module], end='') 37 if hasattr(sys.modules[module], '__version__'): 38 print(' version', sys.modules[module].__version__, end='') 39 print() 40 module_found = True 41 if not module_found: 42 print("Warning: Module", imported_module, "not loaded.") 43 44sys.stdout.flush() 45#=== 46 47def setUpModule(): 48 unittest.TestCase.maxDiff = None 49 50 datadir = os.getenv("LINK_GRAMMAR_DATA", "") 51 if datadir: 52 clg.dictionary_set_data_dir(datadir) 53 54 clg.test_data_srcdir = os.getenv("srcdir", os.path.dirname(sys.argv[0])) 55 if clg.test_data_srcdir: 56 clg.test_data_srcdir += "/" 57 58# The tests are run in alphabetical order.... 59# 60# First test: test the test framework itself ... 61class AAALinkTestCase(unittest.TestCase): 62 def test_link_display_with_identical_link_type(self): 63 self.assertEqual(str(Link(None, 0, 'Left','Link','Link','Right')), 64 u'Left-Link-Right') 65 66 def test_link_display_with_identical_link_type2(self): 67 self.assertEqual(str(Link(None, 0, 'Left','Link','Link*','Right')), 68 u'Left-Link-Link*-Right') 69 70class AADictionaryTestCase(unittest.TestCase): 71 def test_open_nonexistent_dictionary(self): 72 dummy_lang = "No such language test " 73 74 save_stderr = divert_start(2) 75 self.assertRaises(LG_DictionaryError, Dictionary, dummy_lang + '1') 76 self.assertIn(dummy_lang + '1', save_stderr.divert_end()) 77 78 save_stderr = divert_start(2) 79 self.assertRaises(LG_Error, Dictionary, dummy_lang + '2') 80 self.assertIn(dummy_lang + '2', save_stderr.divert_end()) 81 82# Check absolute and relative dictionary access. 83# Check also that the dictionary language is set correctly. 84# 85# We suppose here that this test program is located somewhere in the source 86# directory, 1 to 4 levels under it, that the data directory is named 'data', 87# and that it has a parallel directory called 'link-grammar'. 88DATA_DIR = 'data' 89PARALLEL_DIR = 'link-grammar' 90class ABDictionaryLocationTestCase(unittest.TestCase): 91 abs_datadir = None 92 93 @classmethod 94 def setUpClass(cls): 95 cls.po = ParseOptions(verbosity=0) 96 cls.original_directory = os.getcwd() 97 98 # Find the 'data' directory in the source directory. 99 os.chdir(clg.test_data_srcdir) 100 up = '' 101 for _ in range(1, 4): 102 up = '../' + up 103 datadir = up + DATA_DIR 104 if os.path.isdir(datadir): 105 break 106 datadir = '' 107 108 if not datadir: 109 assert False, 'Cannot find source directory dictionary data' 110 cls.abs_datadir = os.path.abspath(datadir) 111 112 @classmethod 113 def tearDownClass(cls): 114 del cls.po 115 os.chdir(cls.original_directory) 116 117 def test_open_absolute_path(self): 118 d = Dictionary(self.abs_datadir + '/en') 119 self.assertEqual(str(d), 'en') 120 if os.name == 'nt': 121 d = Dictionary(self.abs_datadir + r'\en') 122 self.assertEqual(str(d), 'en') 123 124 def test_open_relative_path_from_data_directory(self): 125 os.chdir(self.abs_datadir) 126 d = Dictionary('./en') 127 self.assertEqual(str(d), 'en') 128 if os.name == 'nt': 129 d = Dictionary(r'.\en') 130 self.assertEqual(str(d), 'en') 131 132 def test_open_lang_from_data_directory(self): 133 os.chdir(self.abs_datadir) 134 d = Dictionary('en') 135 self.assertEqual(str(d), 'en') 136 137 # Test use of the internal '..' path 138 def test_open_from_a_language_directory(self): 139 os.chdir(self.abs_datadir + '/ru') 140 d = Dictionary('en') 141 self.assertEqual(str(d), 'en') 142 143 def test_open_relative_path_from_data_parent_directory(self): 144 os.chdir(self.abs_datadir + '/..') 145 d = Dictionary('data/en') 146 self.assertEqual(str(d), 'en') 147 if os.name == 'nt': 148 d = Dictionary(r'data\en') 149 self.assertEqual(str(d), 'en') 150 151 # Test use of the internal './data' path. 152 def test_open_from_data_parent_directory(self): 153 os.chdir(self.abs_datadir + '/..') 154 d = Dictionary('en') 155 self.assertEqual(str(d), 'en') 156 157 # Test use of the internal '../data' path. 158 def test_open_from_a_parallel_directory(self): 159 os.chdir(self.abs_datadir + '/../' + PARALLEL_DIR) 160 d = Dictionary('en') 161 self.assertEqual(str(d), 'en') 162 163class BParseOptionsTestCase(unittest.TestCase): 164 def test_setting_verbosity(self): 165 po = ParseOptions() 166 po.verbosity = 2 167 #Ensure that the PO object reports the value correctly 168 self.assertEqual(po.verbosity, 2) 169 #Ensure that it's actually setting it. 170 self.assertEqual(clg.parse_options_get_verbosity(po._obj), 2) 171 172 def test_setting_verbosity_to_not_allow_value_raises_value_error(self): 173 po = ParseOptions() 174 self.assertRaises(ValueError, setattr, po, "verbosity", 121) 175 176 def test_setting_verbosity_to_non_integer_raises_type_error(self): 177 po = ParseOptions() 178 self.assertRaises(TypeError, setattr, po, "verbosity", "a") 179 180 def test_setting_linkage_limit(self): 181 po = ParseOptions() 182 po.linkage_limit = 3 183 self.assertEqual(clg.parse_options_get_linkage_limit(po._obj), 3) 184 185 def test_setting_linkage_limit_to_non_integer_raises_type_error(self): 186 po = ParseOptions() 187 self.assertRaises(TypeError, setattr, po, "linkage_limit", "a") 188 189 def test_setting_linkage_limit_to_negative_number_raises_value_error(self): 190 po = ParseOptions() 191 self.assertRaises(ValueError, setattr, po, "linkage_limit", -1) 192 193 def test_setting_disjunct_cost(self): 194 po = ParseOptions() 195 po.disjunct_cost = 3.0 196 self.assertEqual(clg.parse_options_get_disjunct_cost(po._obj), 3.0) 197 198 def test_setting_disjunct_cost_to_non_integer_raises_type_error(self): 199 po = ParseOptions() 200 self.assertRaises(TypeError, setattr, po, "disjunct_cost", "a") 201 202 def test_setting_min_null_count(self): 203 po = ParseOptions() 204 po.min_null_count = 3 205 self.assertEqual(clg.parse_options_get_min_null_count(po._obj), 3) 206 207 def test_setting_min_null_count_to_non_integer_raises_type_error(self): 208 po = ParseOptions() 209 self.assertRaises(TypeError, setattr, po, "min_null_count", "a") 210 211 def test_setting_min_null_count_to_negative_number_raises_value_error(self): 212 po = ParseOptions() 213 self.assertRaises(ValueError, setattr, po, "min_null_count", -1) 214 215 def test_setting_max_null_count(self): 216 po = ParseOptions() 217 po.max_null_count = 3 218 self.assertEqual(clg.parse_options_get_max_null_count(po._obj), 3) 219 220 def test_setting_max_null_count_to_non_integer_raises_type_error(self): 221 po = ParseOptions() 222 self.assertRaises(TypeError, setattr, po, "max_null_count", "a") 223 224 def test_setting_max_null_count_to_negative_number_raises_value_error(self): 225 po = ParseOptions() 226 self.assertRaises(ValueError, setattr, po, "max_null_count", -1) 227 228 def test_setting_short_length(self): 229 po = ParseOptions() 230 po.short_length = 3 231 self.assertEqual(clg.parse_options_get_short_length(po._obj), 3) 232 233 def test_setting_short_length_to_non_integer_raises_type_error(self): 234 po = ParseOptions() 235 self.assertRaises(TypeError, setattr, po, "short_length", "a") 236 237 def test_setting_short_length_to_negative_number_raises_value_error(self): 238 po = ParseOptions() 239 self.assertRaises(ValueError, setattr, po, "short_length", -1) 240 241 def test_setting_islands_ok(self): 242 po = ParseOptions() 243 po.islands_ok = True 244 self.assertEqual(po.islands_ok, True) 245 self.assertEqual(clg.parse_options_get_islands_ok(po._obj), 1) 246 po.islands_ok = False 247 self.assertEqual(po.islands_ok, False) 248 self.assertEqual(clg.parse_options_get_islands_ok(po._obj), 0) 249 250 def test_setting_islands_ok_to_non_boolean_raises_type_error(self): 251 po = ParseOptions() 252 self.assertRaises(TypeError, setattr, po, "islands_ok", "a") 253 254 def test_setting_max_parse_time(self): 255 po = ParseOptions() 256 po.max_parse_time = 3 257 self.assertEqual(clg.parse_options_get_max_parse_time(po._obj), 3) 258 259 def test_setting_max_parse_time_to_non_integer_raises_type_error(self): 260 po = ParseOptions() 261 self.assertRaises(TypeError, setattr, po, "max_parse_time", "a") 262 263 def test_setting_spell_guess_to_non_integer_raises_type_error(self): 264 po = ParseOptions() 265 self.assertRaises(TypeError, setattr, po, "spell_guess", "a") 266 267 def test_setting_display_morphology(self): 268 po = ParseOptions() 269 po.display_morphology = True 270 self.assertEqual(po.display_morphology, True) 271 self.assertEqual(clg.parse_options_get_display_morphology(po._obj), 1) 272 po.display_morphology = False 273 self.assertEqual(po.display_morphology, False) 274 self.assertEqual(clg.parse_options_get_display_morphology(po._obj), 0) 275 276 def test_setting_all_short_connectors(self): 277 po = ParseOptions() 278 po.all_short_connectors = True 279 self.assertEqual(po.all_short_connectors, True) 280 self.assertEqual(clg.parse_options_get_all_short_connectors(po._obj), 1) 281 po.all_short_connectors = False 282 self.assertEqual(po.all_short_connectors, False) 283 self.assertEqual(clg.parse_options_get_all_short_connectors(po._obj), 0) 284 285 def test_setting_all_short_connectors_to_non_boolean_raises_type_error(self): 286 po = ParseOptions() 287 self.assertRaises(TypeError, setattr, po, "all_short_connectors", "a") 288 289 def test_setting_spell_guess(self): 290 po = ParseOptions(spell_guess=True) 291 if po.spell_guess == 0: 292 raise unittest.SkipTest("Library is not configured with spell guess") 293 self.assertEqual(po.spell_guess, 7) 294 po = ParseOptions(spell_guess=5) 295 self.assertEqual(po.spell_guess, 5) 296 po = ParseOptions(spell_guess=False) 297 self.assertEqual(po.spell_guess, 0) 298 299 def test_specifying_parse_options(self): 300 po = ParseOptions(linkage_limit=99) 301 self.assertEqual(clg.parse_options_get_linkage_limit(po._obj), 99) 302 303class CParseOptionsTestCase(unittest.TestCase): 304 305 def test_that_sentence_can_be_destroyed_when_linkages_still_exist(self): 306 """ 307 If the parser is deleted before the associated swig objects 308 are, there will be bad pointer dereferences (as the swig 309 objects will be pointing into freed memory). This test ensures 310 that parsers can be created and deleted without regard for 311 the existence of PYTHON Linkage objects 312 """ 313 #pylint: disable=unused-variable 314 s = Sentence('This is a sentence.', Dictionary(), ParseOptions()) 315 linkages = s.parse() 316 del s 317 318 def test_that_invalid_options_are_disallowed(self): 319 self.assertRaisesRegexp(TypeError, "unexpected keyword argument", 320 ParseOptions, invalid_option=1) 321 322 def test_that_invalid_option_properties_cannot_be_used(self): 323 po = ParseOptions() 324 self.assertRaisesRegexp(TypeError, "Unknown parse option", 325 setattr, po, "invalid_option", 1) 326 327 def test_that_ParseOptions_cannot_get_positional_arguments(self): 328 self.assertRaisesRegexp(TypeError, "Positional arguments are not allowed", 329 ParseOptions, 1) 330 331class DBasicParsingTestCase(unittest.TestCase): 332 @classmethod 333 def setUpClass(cls): 334 cls.d, cls.po = Dictionary(), None 335 336 @classmethod 337 def tearDownClass(cls): 338 del cls.d, cls.po 339 340 def parse_sent(self, text, po=None): 341 if po is None: 342 po = ParseOptions() 343 return list(Sentence(text, self.d, po).parse()) 344 345 def test_that_parse_returns_empty_iterator_on_no_linkage(self): 346 """Parsing a bad sentence with no null-links shouldn't give any linkage.""" 347 result = self.parse_sent("This this doesn't parse") 348 linkage_exists = False 349 for _ in result: 350 linkage_exists = True 351 self.assertFalse(linkage_exists, "Unparsable sentence has linkages.") 352 353 def test_that_parse_returns_empty_iterator_on_no_linkage_sat(self): 354 """Parsing a bad sentence with no null-links shouldn't give any linkage (sat)""" 355 self.po = ParseOptions(use_sat=True) 356 if self.po.use_sat != True: 357 raise unittest.SkipTest("Library not configured with SAT parser") 358 result = self.parse_sent("This this doesn't parse", self.po) 359 linkage_exists = False 360 for _ in result: 361 linkage_exists = True 362 self.assertFalse(linkage_exists, "SAT: Unparsable sentence has linkages.") 363 364 def test_that_parse_sent_returns_list_of_linkage_objects_for_valid_sentence(self): 365 result = self.parse_sent("This is a relatively simple sentence.") 366 self.assertTrue(isinstance(result[0], Linkage)) 367 self.assertTrue(isinstance(result[1], Linkage)) 368 369 def test_utf8_encoded_string(self): 370 result = self.parse_sent("I love going to the café.") 371 self.assertTrue(len(result) > 1) 372 self.assertTrue(isinstance(result[0], Linkage)) 373 self.assertTrue(isinstance(result[1], Linkage)) 374 375 # def test_unicode_encoded_string(self): 376 result = self.parse_sent(u"I love going to the caf\N{LATIN SMALL LETTER E WITH ACUTE}.") 377 self.assertTrue(len(result) > 1) 378 self.assertTrue(isinstance(result[0], Linkage)) 379 self.assertTrue(isinstance(result[1], Linkage)) 380 381 # def test_unknown_word(self): 382 result = self.parse_sent("I love going to the qertfdwedadt.") 383 self.assertTrue(len(result) > 1) 384 self.assertTrue(isinstance(result[0], Linkage)) 385 self.assertTrue(isinstance(result[1], Linkage)) 386 387 # def test_unknown_euro_utf8_word(self): 388 result = self.parse_sent("I love going to the qéáéğíóşúüñ.") 389 self.assertTrue(len(result) > 1) 390 self.assertTrue(isinstance(result[0], Linkage)) 391 self.assertTrue(isinstance(result[1], Linkage)) 392 393 # def test_unknown_cyrillic_utf8_word(self): 394 result = self.parse_sent("I love going to the доктором.") 395 self.assertTrue(len(result) > 1) 396 self.assertTrue(isinstance(result[0], Linkage)) 397 self.assertTrue(isinstance(result[1], Linkage)) 398 399 def test_getting_link_distances(self): 400 linkage = self.parse_sent("This is a sentence.")[0] 401 self.assertEqual([len(l) for l in linkage.links()], [5,2,1,1,2,1,1]) 402 linkage = self.parse_sent("This is a silly sentence.")[0] 403 self.assertEqual([len(l) for l in linkage.links()], [6,2,1,1,3,2,1,1,1]) 404 405 # If \w is supported, other \ shortcuts are hopefully supported too. 406 def test_regex_class_shortcut_support(self): 407 r"""Test that regexes support \w""" 408 po = ParseOptions(display_morphology=False) 409 linkage = self.parse_sent("This is a _regex_ive regex test", po)[0] 410 self.assertEqual(linkage.word(4), '_regex_ive[!].a') 411 412 def test_timer_exhausted_exception(self): 413 self.assertRaises(LG_TimerExhausted, 414 self.parse_sent, 415 "This sentence parses without null words, " 416 "and should take more than one second to parse!" * 14, 417 ParseOptions(max_parse_time=1,short_length=255,disjunct_cost=10.0,linkage_limit=10000)) 418 419# The tests here are numbered since their order is important. 420# They depend on the result and state of the previous ones as follows: 421# - set_handler() returned a value that depend on it previous invocation. 422# - A class variable "handler" to record its previous results. 423class EErrorFacilityTestCase(unittest.TestCase): 424 # Initialize class variables to invalid (for the test) values. 425 handler = { 426 "default": lambda x, y=None: None, 427 "previous": lambda x, y=None: None 428 } 429 430 def setUp(self): 431 self.testit = "testit" 432 self.testleaks = 0 # A repeat count for validating no memory leaks 433 self.numerr = 0 434 self.errinfo = clg.lg_None 435 436 @staticmethod 437 def error_handler_test(errinfo, data): 438 # A test error handler. It assigns the errinfo struct as an attribute 439 # of its data so it can be examined after the call. In addition, the 440 # ability of the error handler to use its data argument is tested by 441 # the "testit" attribute. 442 if data is None: 443 return 444 data.errinfo = errinfo 445 data.gotit = data.testit 446 447 def test_10_set_error_handler(self): 448 # Set the error handler and validate that it 449 # gets the error info and the data. 450 self.__class__.handler["default"] = \ 451 LG_Error.set_handler(self.error_handler_test, self) 452 self.assertEqual(self.__class__.handler["default"].__name__, 453 "_default_handler") 454 self.gotit = None 455 self.assertRaises(LG_Error, Dictionary, "seh_dummy1") 456 self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_Error, "Error")) 457 self.assertEqual(self.gotit, "testit") 458 self.assertRegexpMatches(self.errinfo.text, "Could not open dictionary.*seh_dummy1") 459 460 def test_20_set_error_handler_None(self): 461 # Set the error handler to None and validate that printall() 462 # gets the error info and the data and returns the number of errors. 463 self.__class__.handler["previous"] = LG_Error.set_handler(None) 464 self.assertEqual(self.__class__.handler["previous"].__name__, "error_handler_test") 465 self.assertRaises(LG_Error, Dictionary, "seh_dummy2") 466 self.gotit = None 467 for i in range(0, 2+self.testleaks): 468 self.numerr = LG_Error.printall(self.error_handler_test, self) 469 if i == 0: 470 self.assertEqual(self.numerr, 1) 471 if i == 1: 472 self.assertEqual(self.numerr, 0) 473 self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_Error, "Error")) 474 self.assertEqual(self.gotit, "testit") 475 self.assertRegexpMatches(self.errinfo.text, ".*seh_dummy2") 476 477 def test_21_set_error_handler_None(self): 478 # Further test of correct number of errors. 479 self.numerr = 3 480 for _ in range(0, self.numerr): 481 self.assertRaises(LG_Error, Dictionary, "seh_dummy2") 482 self.numerr = LG_Error.printall(self.error_handler_test, None) 483 self.assertEqual(self.numerr, self.numerr) 484 485 def test_22_defaut_handler_param(self): 486 """Test bad data parameter to default error handler""" 487 # (It should be an integer >=0 and <= lg_None.) 488 # Here the error handler is still set to None. 489 490 # This test doesn't work - TypeError is somehow raised inside 491 # linkgrammar.py when _default_handler() is invoked as a callback. 492 # 493 #LG_Error.set_handler(self.__class__.handler["default"], "bad param") 494 #with self.assertRaises(TypeError): 495 # try: 496 # Dictionary("a dummy dict name (bad param test)") 497 # except LG_Error: 498 # pass 499 500 # So test it directly. 501 502 dummy_lang = "a dummy dict name (bad param test)" 503 self.assertRaises(LG_Error, Dictionary, dummy_lang) 504 LG_Error.printall(self.error_handler_test, self) # grab a valid errinfo 505 #self.assertIn(dummy_lang, save_stderr.divert_end()) 506 self.assertRaisesRegexp(TypeError, "must be an integer", 507 self.__class__.handler["default"], 508 self.errinfo, "bad param") 509 self.assertRaisesRegexp(ValueError, "must be an integer", 510 self.__class__.handler["default"], 511 self.errinfo, clg.lg_None+1) 512 self.assertRaises(ValueError, self.__class__.handler["default"], 513 self.errinfo, -1) 514 515 try: 516 self.param_ok = False 517 save_stdout = divert_start(1) # Note: Handler parameter is stdout 518 self.__class__.handler["default"](self.errinfo, 1) 519 self.assertIn(dummy_lang, save_stdout.divert_end()) 520 self.param_ok = True 521 except (TypeError, ValueError): 522 self.assertTrue(self.param_ok) 523 524 def test_23_prt_error(self): 525 LG_Error.message("Info: prt_error test\n") 526 LG_Error.printall(self.error_handler_test, self) 527 self.assertRegexpMatches(self.errinfo.text, "prt_error test\n") 528 self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_Info, "Info")) 529 530 def test_24_prt_error_in_parts(self): 531 LG_Error.message("Trace: part one... ") 532 LG_Error.message("part two\n") 533 LG_Error.printall(self.error_handler_test, self) 534 self.assertEqual(self.errinfo.text, "part one... part two\n") 535 self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_Trace, "Trace")) 536 537 def test_25_prt_error_in_parts_with_embedded_newline(self): 538 LG_Error.message("Trace: part one...\n\\") 539 LG_Error.message("part two\n") 540 LG_Error.printall(self.error_handler_test, self) 541 self.assertEqual(self.errinfo.text, "part one...\npart two\n") 542 self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_Trace, "Trace")) 543 544 def test_26_prt_error_plain_message(self): 545 LG_Error.message("This is a regular output line.\n") 546 LG_Error.printall(self.error_handler_test, self) 547 self.assertEqual(self.errinfo.text, "This is a regular output line.\n") 548 self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_None, "")) 549 550 def test_30_formatmsg(self): 551 # Here the error handler is still set to None. 552 for _ in range (0, 1+self.testleaks): 553 self.assertRaises(LG_Error, Dictionary, "formatmsg-test-dummy-dict") 554 LG_Error.printall(self.error_handler_test, self) 555 self.assertRegexpMatches(self.errinfo.formatmsg(), "link-grammar: Error: .*formatmsg-test-dummy-dict") 556 557 def test_40_clearall(self): 558 # Here the error handler is still set to None. 559 # Call LG_Error.clearall() and validate it indeed clears the error. 560 self.assertRaises(LG_Error, Dictionary, "clearall-test-dummy-dict") 561 LG_Error.clearall() 562 self.testit = "clearall" 563 self.numerr = LG_Error.printall(self.error_handler_test, self) 564 self.assertEqual(self.numerr, 0) 565 self.assertFalse(hasattr(self, "gotit")) 566 567 def test_41_flush(self): 568 # Here the error handler is still set to None. 569 # First validate that nothing gets flushed (no error is buffered at this point). 570 self.flushed = LG_Error.flush() 571 self.assertEqual(self.flushed, False) 572 # Now generate a partial error message that is still buffered. 573 LG_Error.message("This is a partial error message.") 574 # Validate that it is still hidden. 575 self.numerr = LG_Error.printall(self.error_handler_test, self) 576 self.assertEqual(self.numerr, 0) 577 self.assertFalse(hasattr(self, "gotit")) 578 # Flush it. 579 self.flushed = LG_Error.flush() 580 self.assertEqual(self.flushed, True) 581 self.numerr = LG_Error.printall(self.error_handler_test, self) 582 self.assertEqual(self.numerr, 1) 583 self.assertRegexpMatches(self.errinfo.text, "partial") 584 585 def test_50_set_orig_error_handler(self): 586 # Set the error handler back to the default handler. 587 # The error message is now visible (but we cannot test that). 588 self.__class__.handler["previous"] = LG_Error.set_handler(self.__class__.handler["default"]) 589 self.assertIsNone(self.__class__.handler["previous"]) 590 for _ in range(0, 1+self.testleaks): 591 self.__class__.handler["previous"] = LG_Error.set_handler(self.__class__.handler["default"]) 592 self.assertEqual(self.__class__.handler["previous"].__name__, "_default_handler") 593 594 self.errinfo = "dummy" 595 dummy_lang = "a dummy dict name (default handler test)" 596 save_stderr = divert_start(2) 597 self.assertRaises(LG_Error, Dictionary, dummy_lang) 598 self.assertIn(dummy_lang, save_stderr.divert_end()) 599 self.assertEqual(self.errinfo, "dummy") 600 601class FSATsolverTestCase(unittest.TestCase): 602 def setUp(self): 603 self.d, self.po = Dictionary(lang='en'), ParseOptions() 604 self.po = ParseOptions(use_sat=True) 605 if self.po.use_sat != True: 606 raise unittest.SkipTest("Library not configured with SAT parser") 607 608 def test_SAT_getting_links(self): 609 linkage_testfile(self, self.d, self.po, 'sat') 610 611class HEnglishLinkageTestCase(unittest.TestCase): 612 @classmethod 613 def setUpClass(cls): 614 cls.d, cls.po = Dictionary(), ParseOptions(linkage_limit=1000, display_morphology=False) 615 616 @classmethod 617 def tearDownClass(cls): 618 del cls.d, cls.po 619 620 def parse_sent(self, text): 621 return list(Sentence(text, self.d, self.po).parse()) 622 623 def test_a_getting_words(self): 624 self.assertEqual(list(self.parse_sent('This is a sentence.')[0].words()), 625 ['LEFT-WALL', 'this.p', 'is.v', 'a', 'sentence.n', '.', 'RIGHT-WALL']) 626 627 def test_b_getting_num_of_words(self): 628 #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL' 629 self.assertEqual(self.parse_sent('This is a sentence.')[0].num_of_words(), 7) 630 631 def test_c_getting_links(self): 632 sent = 'This is a sentence.' 633 linkage = self.parse_sent(sent)[0] 634 self.assertEqual(linkage.link(0), 635 Link(linkage, 0, 'LEFT-WALL','Xp','Xp','.')) 636 self.assertEqual(linkage.link(1), 637 Link(linkage, 1, 'LEFT-WALL','hWV','dWV','is.v')) 638 self.assertEqual(linkage.link(2), 639 Link(linkage, 2, 'LEFT-WALL','hWd','Wd','this.p')) 640 self.assertEqual(linkage.link(3), 641 Link(linkage, 3, 'this.p','Ss*b','Ss','is.v')) 642 self.assertEqual(linkage.link(4), 643 Link(linkage, 4, 'is.v','O*m','Os','sentence.n')) 644 self.assertEqual(linkage.link(5), 645 Link(linkage, 5, 'a','Ds**c','Ds**c','sentence.n')) 646 self.assertEqual(linkage.link(6), 647 Link(linkage, 6, '.','RW','RW','RIGHT-WALL')) 648 649 def test_d_spell_guessing_on(self): 650 self.po.spell_guess = 7 651 if self.po.spell_guess == 0: 652 raise unittest.SkipTest("Library is not configured with spell guess") 653 result = self.parse_sent("I love going to shoop.") 654 resultx = result[0] if result else [] 655 for resultx in result: 656 if resultx.word(5) == 'shop[~].v': 657 break 658 self.assertEqual(list(resultx.words()) if resultx else [], 659 ['LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shop[~].v', '.', 'RIGHT-WALL']) 660 661 def test_e_spell_guessing_off(self): 662 self.po.spell_guess = 0 663 result = self.parse_sent("I love going to shoop.") 664 self.assertEqual(list(result[0].words()), 665 ['LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shoop[?].v', '.', 'RIGHT-WALL']) 666 667 # Stress-test first-word-capitalized in various different ways. 668 # Roughly, the test matrix is this: 669 # -- word is/isn't in dict as lower-case word 670 # -- word is/isn't in dict as upper-case word 671 # -- word is/isn't matched with CAPITALIZED_WORDS regex 672 # -- word is/isn't split by suffix splitter 673 # -- the one that is in the dict is not the grammatically appropriate word. 674 # 675 # Let's is NOT split into two! It's in the dict as one word, lower-case only. 676 def test_f_captilization(self): 677 self.assertEqual(list(self.parse_sent('Let\'s eat.')[0].words()), 678 ['LEFT-WALL', 'let\'s', 'eat.v', '.', 'RIGHT-WALL']) 679 680 # He's is split into two words, he is in dict, lower-case only. 681 self.assertEqual(list(self.parse_sent('He\'s going.')[0].words()), 682 ['LEFT-WALL', 'he', '\'s.v', 'going.v', '.', 'RIGHT-WALL']) 683 684 self.assertEqual(list(self.parse_sent('You\'re going?')[0].words()), 685 ['LEFT-WALL', 'you', '\'re', 'going.v', '?', 'RIGHT-WALL']) 686 687 # Jumbo only in dict as adjective, lower-case, but not noun. 688 self.assertEqual(list(self.parse_sent('Jumbo\'s going?')[0].words()), 689 ['LEFT-WALL', 'Jumbo[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) 690 691 self.assertEqual(list(self.parse_sent('Jumbo\'s shoe fell off.')[0].words()), 692 ['LEFT-WALL', 'Jumbo[!]', 693 '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL']) 694 695 self.assertEqual(list(self.parse_sent('Jumbo sat down.')[0].words()), 696 ['LEFT-WALL', 'Jumbo[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) 697 698 # Red is in dict, lower-case, as noun, too. 699 # There's no way to really know, syntactically, that Red 700 # should be taken as a proper noun (given name). 701 #self.assertEqual(list(self.parse_sent('Red\'s going?')[0].words()), 702 # ['LEFT-WALL', 'Red[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) 703 # 704 #self.assertEqual(list(self.parse_sent('Red\'s shoe fell off.')[0].words()), 705 # ['LEFT-WALL', 'Red[!]', 706 # '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL']) 707 # 708 #self.assertEqual(list(self.parse_sent('Red sat down.')[1].words()), 709 # ['LEFT-WALL', 'Red[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) 710 711 # May in dict as noun, capitalized, and as lower-case verb. 712 self.assertEqual(list(self.parse_sent('May\'s going?')[0].words()), 713 ['LEFT-WALL', 'May.f', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) 714 715 self.assertEqual(list(self.parse_sent('May sat down.')[0].words()), 716 ['LEFT-WALL', 'May.f', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) 717 718 # McGyver is not in the dict, but is regex-matched. 719 self.assertEqual(list(self.parse_sent('McGyver\'s going?')[0].words()), 720 ['LEFT-WALL', 'McGyver[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL']) 721 722 self.assertEqual(list(self.parse_sent('McGyver\'s shoe fell off.')[0].words()), 723 ['LEFT-WALL', 'McGyver[!]', 724 '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL']) 725 726 self.assertEqual(list(self.parse_sent('McGyver sat down.')[0].words()), 727 ['LEFT-WALL', 'McGyver[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL']) 728 729 self.assertEqual(list(self.parse_sent('McGyver Industries stock declined.')[0].words()), 730 ['LEFT-WALL', 'McGyver[!]', 'Industries[!]', 731 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL']) 732 733 # King in dict as both upper and lower case. 734 self.assertEqual(list(self.parse_sent('King Industries stock declined.')[0].words()), 735 ['LEFT-WALL', 'King.b', 'Industries[!]', 736 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL']) 737 738 # Jumbo in dict only lower-case, as adjective 739 self.assertEqual(list(self.parse_sent('Jumbo Industries stock declined.')[0].words()), 740 ['LEFT-WALL', 'Jumbo[!]', 'Industries[!]', 741 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL']) 742 743 # Thomas in dict only as upper case. 744 self.assertEqual(list(self.parse_sent('Thomas Industries stock declined.')[0].words()), 745 ['LEFT-WALL', 'Thomas.b', 'Industries[!]', 746 'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL']) 747 748 # Some parses are fractionally preferred over others... 749 def test_g_fractions(self): 750 self.assertEqual(list(self.parse_sent('A player who is injured has to leave the field')[0].words()), 751 ['LEFT-WALL', 'a', 'player.n', 'who', 'is.v', 'injured.v-d', 'has.v', 'to.r', 'leave.v', 'the', 'field.n', 'RIGHT-WALL']) 752 753 self.assertEqual(list(self.parse_sent('They ate a special curry which was recommended by the restaurant\'s owner')[0].words()), 754 ['LEFT-WALL', 'they', 'ate.v-d', 'a', 'special.a', 'curry.s', 755 'which', 'was.v-d', 'recommended.v-d', 'by', 'the', 'restaurant.n', 756 '\'s.p', 'owner.n', 'RIGHT-WALL']) 757 758 # Verify that we are getting the linkages that we want 759 # See below, remainder of parses are in text files 760 def test_h_getting_links(self): 761 sent = 'Scientists sometimes may repeat experiments or use groups.' 762 linkage = self.parse_sent(sent)[0] 763 self.assertEqual(linkage.diagram(), 764"\n +----------------------------------------Xp---------------------------------------+" 765"\n +---------------------------->WV---------------------------->+ |" 766"\n | +--------------I--------------+ |" 767"\n | +--------Sp--------+ +<-------VJlpi<-------+ |" 768"\n +---->Wd----+ +---E---+ +----Op----+ +>VJrpi>+---Op--+ |" 769"\n | | | | | | | | | |" 770"\nLEFT-WALL scientists.n sometimes may.v repeat.v experiments.n or.j-v use.v groups.n ." 771"\n\n") 772 sent = 'I enjoy eating bass.' 773 linkage = self.parse_sent(sent)[0] 774 self.assertEqual(linkage.diagram(), 775"\n +-----------------Xp----------------+" 776"\n +---->WV---->+ |" 777"\n +->Wd--+-Sp*i+---Pg---+---Ou---+ |" 778"\n | | | | | |" 779"\nLEFT-WALL I.p enjoy.v eating.v bass.n-u ." 780"\n\n") 781 782 783 sent = 'We are from the planet Gorpon' 784 linkage = self.parse_sent(sent)[0] 785 self.assertEqual(linkage.diagram(), 786"\n +--->WV--->+ +---------Js--------+" 787"\n +->Wd--+Spx+--Pp-+ +--DD--+---GN---+" 788"\n | | | | | | |" 789"\nLEFT-WALL we are.v from the planet.n Gorpon[!]" 790"\n\n") 791 792class GSQLDictTestCase(unittest.TestCase): 793 @classmethod 794 def setUpClass(cls): 795 if os.name == 'nt' and \ 796 -1 == clg.linkgrammar_get_configuration().lower().find('mingw'): 797 raise unittest.SkipTest("No SQL dict support yet on the MSVC build") 798 799 #clg.parse_options_set_verbosity(clg.parse_options_create(), 3) 800 cls.d, cls.po = Dictionary(lang='demo-sql'), ParseOptions() 801 802 @classmethod 803 def tearDownClass(cls): 804 del cls.d, cls.po 805 806 def test_getting_links(self): 807 linkage_testfile(self, self.d, self.po) 808 809 def test_getting_links_sat(self): 810 sat_po = ParseOptions(use_sat=True) 811 if sat_po.use_sat != True: 812 raise unittest.SkipTest("Library not configured with SAT parser") 813 linkage_testfile(self, self.d, sat_po) 814 815class IWordPositionTestCase(unittest.TestCase): 816 @classmethod 817 def setUpClass(cls): 818 cls.d_en = Dictionary(lang='en') 819 820 @classmethod 821 def tearDownClass(cls): 822 del cls.d_en 823 824 def test_en_word_positions(self): 825 linkage_testfile(self, self.d_en, ParseOptions(), 'pos') 826 827 def test_en_spell_word_positions(self): 828 po = ParseOptions(spell_guess=1) 829 if po.spell_guess == 0: 830 raise unittest.SkipTest("Library is not configured with spell guess") 831 linkage_testfile(self, self.d_en, po, 'pos-spell') 832 833 def test_ru_word_positions(self): 834 linkage_testfile(self, Dictionary(lang='ru'), ParseOptions(), 'pos') 835 836 def test_he_word_positions(self): 837 linkage_testfile(self, Dictionary(lang='he'), ParseOptions(), 'pos') 838 839# Tests are run in alphabetical order; do the language tests last. 840 841class ZENLangTestCase(unittest.TestCase): 842 @classmethod 843 def setUpClass(cls): 844 cls.d, cls.po = Dictionary(lang='en'), ParseOptions() 845 846 @classmethod 847 def tearDownClass(cls): 848 del cls.d, cls.po 849 850 def test_getting_links(self): 851 linkage_testfile(self, self.d, self.po) 852 853 def test_quotes(self): 854 linkage_testfile(self, self.d, self.po, 'quotes') 855 856 def test_null_link_range_starting_with_zero(self): 857 """Test parsing with a minimal number of null-links, including 0.""" 858 # This sentence has no complete linkage. Validate that the library 859 # doesn't mangle parsing with null-count>0 due to power_prune()'s 860 # connector-discard optimization at null-count==0. Without commit 861 # "Allow calling classic_parse() with and w/o nulls", the number of 862 # linkages here is 1 instead of 2 and the unused_word_cost is 5. 863 self.po = ParseOptions(min_null_count=0, max_null_count=999) 864 linkages = Sentence('about people attended', self.d, self.po).parse() 865 self.assertEqual(len(linkages), 2) 866 self.assertEqual(linkages.next().unused_word_cost(), 1) 867 # Expected parses: 868 # 1: 869 # +------------>WV------------>+ 870 # +--------Wd-------+----Sp----+ 871 # | | | 872 #LEFT-WALL [about] people.p attended.v-d 873 # 2: 874 # 875 # +----Sp----+ 876 # | | 877 #[about] people.p attended.v-d 878 879 def test_2_step_parsing_with_null_links(self): 880 self.po = ParseOptions(min_null_count=0, max_null_count=0) 881 882 sent = Sentence('about people attended', self.d, self.po) 883 linkages = sent.parse() 884 self.assertEqual(len(linkages), 0) 885 self.po = ParseOptions(min_null_count=1, max_null_count=999) 886 linkages = sent.parse(self.po) 887 self.assertEqual(len(linkages), 2) 888 self.assertEqual(linkages.next().unused_word_cost(), 1) 889 890 def test_1_step_parsing_with_no_null_links_short(self): 891 self.po = ParseOptions(min_null_count=0, max_null_count=999) 892 893 text = 'This is a test.' 894 sent = Sentence(text, self.d, self.po) 895 self.assertTrue(len(sent.parse()) > 0) # Just check no crashes or leaks 896 897 def test_1_step_parsing_with_no_null_links_long(self): 898 self.po = ParseOptions(min_null_count=0, max_null_count=999) 899 900 text = 12 * 'This is a test. ' # The final blank is essential 901 sent = Sentence(text, self.d, self.po) 902 self.assertTrue(len(sent.parse()) > 0) # Just check no crashes or leaks 903 904 def test_1_step_parsing_with_nulls_short(self): 905 self.po = ParseOptions(min_null_count=0, max_null_count=999, short_length=1) 906 907 text = 'This a' 908 sent = Sentence(text, self.d, self.po) 909 self.assertTrue(len(sent.parse()) > 0) # Just check no crashes or leaks 910 911 def test_1_step_parsing_with_nulls_long(self): 912 self.po = ParseOptions(min_null_count=0, max_null_count=999, short_length=1) 913 914 text = 12 * 'This is a the test ' # The final blank is essential 915 sent = Sentence(text, self.d, self.po) 916 self.assertTrue(len(sent.parse()) > 0) # Just check no crashes or leaks 917 918class JADictionaryLocaleTestCase(unittest.TestCase): 919 @classmethod 920 def setUpClass(cls): 921 # python2: Gets system locale (getlocale() is not better) 922 cls.oldlocale = locale.setlocale(locale.LC_CTYPE, None) 923 #print('Current locale:', oldlocale) 924 #print('toupper hij:', 'hij'.upper()) 925 926 tr_locale = 'tr_TR.UTF-8' if os.name != 'nt' else 'Turkish' 927 try: 928 locale.setlocale(locale.LC_CTYPE, tr_locale) 929 except locale.Error as e: # Most probably tr_TR.UTF-8 is not installed 930 raise unittest.SkipTest("Locale {}: {}".format(tr_locale, e)) 931 932 #print('Turkish locale:', locale.setlocale(locale.LC_CTYPE, None)) 933 # python2: prints HiJ (lowercase small i in the middle) 934 #print('toupper hij:', 'hij'.upper()) 935 936 cls.d, cls.po = Dictionary(lang='en'), ParseOptions() 937 938 @classmethod 939 def tearDownClass(cls): 940 locale.setlocale(locale.LC_CTYPE, cls.oldlocale) 941 #print("Restored locale:", locale.setlocale(locale.LC_CTYPE)) 942 #print('toupper hij:', 'hij'.upper()) 943 del cls.d, cls.po, cls.oldlocale 944 945 def test_dictionary_locale_definition(self): 946 linkage = Sentence('Is it fine?', self.d, self.po).parse().next() 947 self.assertEqual(list(linkage.words())[1], 'is.v') 948 949# FIXME: Use a special testing dictionary for checks like that. 950class JBDictCostReadingTestCase(unittest.TestCase): 951 @classmethod 952 def setUpClass(cls): 953 cls.oldlocale = locale.setlocale(locale.LC_CTYPE, None) 954 ru_locale = 'ru_RU.UTF-8' if os.name != 'nt' else 'Russian' 955 try: 956 locale.setlocale(locale.LC_NUMERIC, ru_locale) 957 except locale.Error as e: # Most probably ru_RU.UTF-8 is not installed 958 del cls.oldlocale 959 raise unittest.SkipTest("Locale {}: {}".format(ru_locale, e)) 960 # The dict read must be performed after the locale change. 961 cls.d, cls.po = Dictionary(lang='en'), ParseOptions() 962 963 @classmethod 964 def tearDownClass(cls): 965 locale.setlocale(locale.LC_CTYPE, cls.oldlocale) 966 del cls.d, cls.po, cls.oldlocale 967 968 # When a comma-separator LC_NUMERIC affects the dict cost conversion, 969 # the 4th word is 'white.v'. 970 def test_cost_sensitive_parse(self): 971 linkage = Sentence('Is the bed white?', self.d, self.po).parse().next() 972 self.assertEqual(list(linkage.words())[4], 'white.a') 973 974class ZENConstituentsCase(unittest.TestCase): 975 @classmethod 976 def setUpClass(cls): 977 cls.d, cls.po = Dictionary(lang='en'), ParseOptions() 978 979 @classmethod 980 def tearDownClass(cls): 981 del cls.d, cls.po 982 983 def test_a_constituents_after_parse_list(self): 984 """ 985 Validate that the post-processing data of the first linkage is not 986 getting clobbered by later linkages. 987 """ 988 linkages = list(Sentence("This is a test.", self.d, self.po).parse()) 989 self.assertEqual(linkages[0].constituent_tree(), 990 "(S (NP this.p)\n (VP is.v\n (NP a test.n))\n .)\n") 991 992class ZDELangTestCase(unittest.TestCase): 993 @classmethod 994 def setUpClass(cls): 995 cls.d, cls.po = Dictionary(lang='de'), ParseOptions() 996 997 @classmethod 998 def tearDownClass(cls): 999 del cls.d, cls.po 1000 1001 def parse_sent(self, text): 1002 return list(Sentence(text, self.d, self.po).parse()) 1003 1004 def test_a_getting_num_of_words(self): 1005 #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL' 1006 self.assertEqual(self.parse_sent('Dies ist den Traum.')[0].num_of_words(), 7) 1007 self.assertEqual(self.parse_sent('Der Hund jagte ihn durch den Park.')[0].num_of_words(), 10) 1008 1009 def test_b_getting_words(self): 1010 self.assertEqual(list(self.parse_sent('Der Hund jagte ihn durch den Park.')[0].words()), 1011 ['LEFT-WALL', 'der.d', 'Hund.n', 'jagte.s', 'ihn', 'durch', 1012 'den.d', 'Park.n', '.', 'RIGHT-WALL']) 1013 1014 def test_c_getting_links(self): 1015 sent = 'Dies ist den Traum.' 1016 linkage = self.parse_sent(sent)[0] 1017 self.assertEqual(linkage.link(0), 1018 Link(linkage, 0, 'LEFT-WALL','Xp','Xp','.')) 1019 self.assertEqual(linkage.link(1), 1020 Link(linkage, 1, 'LEFT-WALL','W','W','ist.v')) 1021 self.assertEqual(linkage.link(2), 1022 Link(linkage, 2, 'dies','Ss','Ss','ist.v')) 1023 self.assertEqual(linkage.link(3), 1024 Link(linkage, 3, 'ist.v','O','O','Traum.n')) 1025 self.assertEqual(linkage.link(4), 1026 Link(linkage, 4, 'den.d','Dam','Dam','Traum.n')) 1027 self.assertEqual(linkage.link(5), 1028 Link(linkage, 5, '.','RW','RW','RIGHT-WALL')) 1029 1030class ZLTLangTestCase(unittest.TestCase): 1031 def setUp(self): 1032 self.d, self.po = Dictionary(lang='lt'), ParseOptions() 1033 1034 # Reads linkages from a test-file. 1035 def test_getting_links(self): 1036 linkage_testfile(self, self.d, self.po) 1037 1038# Tests are run in alphabetical order; do the language tests last. 1039class ZRULangTestCase(unittest.TestCase): 1040 @classmethod 1041 def setUpClass(cls): 1042 cls.d, cls.po = Dictionary(lang='ru'), ParseOptions() 1043 1044 @classmethod 1045 def tearDownClass(cls): 1046 del cls.d, cls.po 1047 1048 def parse_sent(self, text): 1049 return list(Sentence(text, self.d, self.po).parse()) 1050 1051 def test_a_getting_num_of_words(self): 1052 self.po.display_morphology = False 1053 #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL' 1054 self.assertEqual(self.parse_sent('это тести.')[0].num_of_words(), 5) 1055 self.assertEqual(self.parse_sent('вверху плыли редкие облачка.')[0].num_of_words(), 7) 1056 1057 def test_b_getting_words(self): 1058 self.po.display_morphology = False 1059 self.assertEqual(list(self.parse_sent('вверху плыли редкие облачка.')[0].words()), 1060 ['LEFT-WALL', 'вверху.e', 'плыли.vnndpp', 'редкие.api', 1061 'облачка.ndnpi', '.', 'RIGHT-WALL']) 1062 1063 def test_c_getting_links(self): 1064 self.po.display_morphology = False 1065 sent = 'вверху плыли редкие облачка.' 1066 linkage = self.parse_sent(sent)[0] 1067 self.assertEqual(linkage.link(0), 1068 Link(linkage, 0, 'LEFT-WALL','Xp','Xp','.')) 1069 self.assertEqual(linkage.link(1), 1070 Link(linkage, 1, 'LEFT-WALL','W','Wd','плыли.vnndpp')) 1071 self.assertEqual(linkage.link(2), 1072 Link(linkage, 2, 'вверху.e','EI','EI','плыли.vnndpp')) 1073 self.assertEqual(linkage.link(3), 1074 Link(linkage, 3, 'плыли.vnndpp','SIp','SIp','облачка.ndnpi')) 1075 self.assertEqual(linkage.link(4), 1076 Link(linkage, 4, 'редкие.api','Api','Api','облачка.ndnpi')) 1077 self.assertEqual(linkage.link(5), 1078 Link(linkage, 5, '.','RW','RW','RIGHT-WALL')) 1079 1080 # Expect morphological splitting to apply. 1081 def test_d_morphology(self): 1082 self.po.display_morphology = True 1083 self.assertEqual(list(self.parse_sent('вверху плыли редкие облачка.')[0].words()), 1084 ['LEFT-WALL', 1085 'вверху.e', 1086 'плы.=', '=ли.vnndpp', 1087 'ре.=', '=дкие.api', 1088 'облачк.=', '=а.ndnpi', 1089 '.', 'RIGHT-WALL']) 1090 1091class ZXDictDialectTestCase(unittest.TestCase): 1092 def test_dialect(self): 1093 linkage_testfile(self, Dictionary(lang='en'), ParseOptions(dialect='headline'), 'dialect') 1094 1095############################################################################# 1096 1097def linkage_testfile(self, lgdict, popt, desc=''): 1098 """ 1099 Reads sentences and their corresponding 1100 linkage diagrams / constituent printings. 1101 """ 1102 self.__class__.longMessage = True 1103 if desc != '': 1104 desc = desc + '-' 1105 testfile = clg.test_data_srcdir + "parses-" + desc + clg.dictionary_get_lang(lgdict._obj) + ".txt" 1106 diagram = None 1107 constituents = None 1108 wordpos = None 1109 sent = None 1110 lineno = 0 1111 last_opcode = None 1112 1113 def getwordpos(lkg): 1114 words_char = [] 1115 words_byte = [] 1116 for wi, w in enumerate(lkg.words()): 1117 words_char.append(w + str((int(linkage.word_char_start(wi)), int(linkage.word_char_end(wi))))) 1118 words_byte.append(w + str((int(linkage.word_byte_start(wi)), int(linkage.word_byte_end(wi))))) 1119 return ' '.join(words_char) + '\n' + ' '.join(words_byte) + '\n' 1120 1121 # Function code and file format sanity check 1122 def validate_opcode(opcode): 1123 if opcode != ord('O'): 1124 self.assertFalse(diagram, "at {}:{}: Unfinished diagram entry".format(testfile, lineno)) 1125 if opcode != ord('C'): 1126 self.assertFalse(constituents, "at {}:{}: Unfinished constituents entry".format(testfile, lineno)) 1127 if opcode != ord('P'): 1128 self.assertFalse(wordpos, "at {}:{}: Unfinished word-position entry".format(testfile, lineno)) 1129 1130 with open(testfile, 'rb') as _: 1131 parses = _.readlines() 1132 1133 for line in parses: 1134 lineno += 1 1135 line = line.decode('utf-8') 1136 1137 validate_opcode(ord(line[0])) # Use ord() for python2/3 compatibility 1138 if line[0] in 'INOCP': 1139 last_opcode = line[0] 1140 1141 # Lines starting with I are the input sentences 1142 if line[0] == 'I': 1143 sent = line[1:].rstrip('\r\n') # Strip whitespace before RIGHT-WALL (for P) 1144 diagram = "" 1145 constituents = "" 1146 wordpos = "" 1147 if popt.verbosity > 1: 1148 print('Sentence:', sent) 1149 linkages = Sentence(sent, lgdict, popt).parse() 1150 linkage = next(linkages, None) 1151 1152 # Generate the next linkage of the last input sentence 1153 elif line[0] == 'N': 1154 diagram = "" 1155 constituents = "" 1156 wordpos = "" 1157 linkage = next(linkages, None) 1158 self.assertTrue(linkage, "at {}:{}: Sentence has too few linkages".format(testfile, lineno)) 1159 1160 # Lines starting with O are the parse diagram 1161 # It ends with an empty line 1162 elif line[0] == 'O': 1163 diagram += line[1:] 1164 if line[1] == '\n': 1165 if diagram == 'C\nC\n': 1166 self.assertFalse(linkage) 1167 diagram = None 1168 elif len(diagram) > 2: 1169 self.assertTrue(linkage, "at {}:{}: Sentence has no linkages".format(testfile, lineno)) 1170 self.assertEqual(linkage.diagram(), diagram, "at {}:{}".format(testfile, lineno)) 1171 diagram = None 1172 1173 # Lines starting with C are the constituent output (type 1) 1174 # It ends with an empty line 1175 elif line[0] == 'C': 1176 if line[1] == '\n' and len(constituents) > 1: 1177 self.assertEqual(linkage.constituent_tree(), constituents, "at {}:{}".format(testfile, lineno)) 1178 constituents = None 1179 else: 1180 constituents += line[1:] 1181 1182 # Lines starting with P contain word positions "word(start, end) ... " 1183 # The first P line contains character positions 1184 # The second P line contains byte positions 1185 # It ends with an empty line 1186 elif line[0] == 'P': 1187 if line[1] == '\n' and len(wordpos) > 1: 1188 self.assertEqual(getwordpos(linkage), wordpos, "at {}:{}".format(testfile, lineno)) 1189 wordpos = None 1190 else: 1191 wordpos += line[1:] 1192 1193 # Lines starting with "-" contain a Parse Option 1194 elif line[0] == '-': 1195 exec('popt.' + line[1:]) in {}, locals() 1196 1197 elif line[0] in '%\r\n': 1198 pass 1199 else: 1200 self.fail('\nTest file "{}": Invalid opcode "{}" (ord={})'.format(testfile, line[0], ord(line[0]))) 1201 1202 self.assertIn(last_opcode , 'OCP', "Missing result comparison in " + testfile) 1203 1204def warning(*msg): 1205 progname = os.path.basename(sys.argv[0]) 1206 print("{}: Warning:".format(progname), *msg, file=sys.stderr) 1207 1208import tempfile 1209 1210class divert_start(object): 1211 """ Output diversion. """ 1212 def __init__(self, fd): 1213 """ Divert a file descriptor. 1214 The created object is used for restoring the original file descriptor. 1215 """ 1216 self.fd = fd 1217 self.savedfd = os.dup(fd) 1218 (newfd, self.filename) = tempfile.mkstemp(text=False) 1219 os.dup2(newfd, fd) 1220 os.close(newfd) 1221 1222 def divert_end(self): 1223 """ Restore a previous diversion and return its content. """ 1224 if not self.filename: 1225 return "" 1226 os.lseek(self.fd, os.SEEK_SET, 0) 1227 content = os.read(self.fd, 1024) # 1024 is more than needed 1228 os.dup2(self.savedfd, self.fd) 1229 os.close(self.savedfd) 1230 os.unlink(self.filename) 1231 self.filename = None 1232 return str(content) 1233 1234 __del__ = divert_end 1235 1236 1237# Decorate Sentence.parse with eqcost_sorted_parse. 1238lg_testutils.add_eqcost_linkage_order(Sentence) 1239 1240# For testing development branches, it may be sometimes useful to use the 1241# "test", "debug" and "verbosity" options. The following allows to specify them 1242# as "tests.py" arguments, interleaved with standard "unittest" arguments. 1243 1244for i,arg in enumerate(sys.argv): 1245 debug = sys.argv.pop(i)[7:] if arg.startswith('-debug' + '=') else '' 1246for i,arg in enumerate(sys.argv): 1247 test = sys.argv.pop(i)[6:] if arg.startswith('-test' + '=') else '' 1248for i,arg in enumerate(sys.argv): 1249 verbosity = int(sys.argv.pop(i)[11:]) if arg.startswith('-verbosity' + '=') else '' 1250if (test or debug or verbosity): 1251 ParseOptions = lg_testutils.add_test_option(ParseOptions, test, debug, verbosity) 1252 1253unittest.main() 1254