1#!/usr/bin/env python
2# coding: utf8
3"""Python3 link-grammar test script"""
4
5from __future__ import print_function
6import sys, os, re
7import locale
8import unittest
9
10# assertRaisesRegexp and assertRegexpMatches have been renamed in
11# unittest for python 3, but not in python 2 (at least yet).
12if hasattr(unittest.TestCase, 'assertRaisesRegex'):
13    unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex
14    unittest.TestCase.assertRegexpMatches = unittest.TestCase.assertRegex
15
16import lg_testutils # Found in the same directory of this test script
17
18# Show information on this program run
19print('Running by:', sys.executable)
20print('Running {} in:'.format(sys.argv[0]), os.getcwd())
21for v in 'PYTHONPATH', 'srcdir', 'LINK_GRAMMAR_DATA':
22    print('{}={}'.format(v, os.environ.get(v)))
23#===
24
25from linkgrammar import (Sentence, Linkage, ParseOptions, Link, Dictionary,
26                         LG_Error, LG_DictionaryError, LG_TimerExhausted,
27                         Clinkgrammar as clg)
28
29print(clg.linkgrammar_get_configuration())
30
31# Show the location and version of the bindings modules
32for imported_module in 'linkgrammar$', 'clinkgrammar', '_clinkgrammar', 'lg_testutils':
33    module_found = False
34    for module in sys.modules:
35        if re.search(r'^(linkgrammar\.)?'+imported_module, module):
36            print("Using", sys.modules[module], end='')
37            if hasattr(sys.modules[module], '__version__'):
38                print(' version', sys.modules[module].__version__, end='')
39            print()
40            module_found = True
41    if not module_found:
42        print("Warning: Module", imported_module,  "not loaded.")
43
44sys.stdout.flush()
45#===
46
47def setUpModule():
48    unittest.TestCase.maxDiff = None
49
50    datadir = os.getenv("LINK_GRAMMAR_DATA", "")
51    if datadir:
52        clg.dictionary_set_data_dir(datadir)
53
54    clg.test_data_srcdir = os.getenv("srcdir", os.path.dirname(sys.argv[0]))
55    if clg.test_data_srcdir:
56        clg.test_data_srcdir += "/"
57
58# The tests are run in alphabetical order....
59#
60# First test: test the test framework itself ...
61class AAALinkTestCase(unittest.TestCase):
62    def test_link_display_with_identical_link_type(self):
63        self.assertEqual(str(Link(None, 0, 'Left','Link','Link','Right')),
64                         u'Left-Link-Right')
65
66    def test_link_display_with_identical_link_type2(self):
67        self.assertEqual(str(Link(None, 0, 'Left','Link','Link*','Right')),
68                         u'Left-Link-Link*-Right')
69
70class AADictionaryTestCase(unittest.TestCase):
71    def test_open_nonexistent_dictionary(self):
72        dummy_lang = "No such language test "
73
74        save_stderr = divert_start(2)
75        self.assertRaises(LG_DictionaryError, Dictionary, dummy_lang + '1')
76        self.assertIn(dummy_lang + '1', save_stderr.divert_end())
77
78        save_stderr = divert_start(2)
79        self.assertRaises(LG_Error, Dictionary, dummy_lang + '2')
80        self.assertIn(dummy_lang + '2', save_stderr.divert_end())
81
82# Check absolute and relative dictionary access.
83# Check also that the dictionary language is set correctly.
84#
85# We suppose here that this test program is located somewhere in the source
86# directory, 1 to 4 levels under it, that the data directory is named 'data',
87# and that it has a parallel directory called 'link-grammar'.
88DATA_DIR = 'data'
89PARALLEL_DIR = 'link-grammar'
90class ABDictionaryLocationTestCase(unittest.TestCase):
91    abs_datadir = None
92
93    @classmethod
94    def setUpClass(cls):
95        cls.po = ParseOptions(verbosity=0)
96        cls.original_directory = os.getcwd()
97
98        # Find the 'data' directory in the source directory.
99        os.chdir(clg.test_data_srcdir)
100        up = ''
101        for _ in range(1, 4):
102            up = '../' + up
103            datadir = up + DATA_DIR
104            if os.path.isdir(datadir):
105                break
106            datadir = ''
107
108        if not datadir:
109            assert False, 'Cannot find source directory dictionary data'
110        cls.abs_datadir = os.path.abspath(datadir)
111
112    @classmethod
113    def tearDownClass(cls):
114        del cls.po
115        os.chdir(cls.original_directory)
116
117    def test_open_absolute_path(self):
118        d = Dictionary(self.abs_datadir + '/en')
119        self.assertEqual(str(d), 'en')
120        if os.name == 'nt':
121            d = Dictionary(self.abs_datadir + r'\en')
122            self.assertEqual(str(d), 'en')
123
124    def test_open_relative_path_from_data_directory(self):
125        os.chdir(self.abs_datadir)
126        d = Dictionary('./en')
127        self.assertEqual(str(d), 'en')
128        if os.name == 'nt':
129            d = Dictionary(r'.\en')
130            self.assertEqual(str(d), 'en')
131
132    def test_open_lang_from_data_directory(self):
133        os.chdir(self.abs_datadir)
134        d = Dictionary('en')
135        self.assertEqual(str(d), 'en')
136
137    # Test use of the internal '..' path
138    def test_open_from_a_language_directory(self):
139        os.chdir(self.abs_datadir + '/ru')
140        d = Dictionary('en')
141        self.assertEqual(str(d), 'en')
142
143    def test_open_relative_path_from_data_parent_directory(self):
144        os.chdir(self.abs_datadir + '/..')
145        d = Dictionary('data/en')
146        self.assertEqual(str(d), 'en')
147        if os.name == 'nt':
148            d = Dictionary(r'data\en')
149            self.assertEqual(str(d), 'en')
150
151    # Test use of the internal './data' path.
152    def test_open_from_data_parent_directory(self):
153        os.chdir(self.abs_datadir + '/..')
154        d = Dictionary('en')
155        self.assertEqual(str(d), 'en')
156
157    # Test use of the internal '../data' path.
158    def test_open_from_a_parallel_directory(self):
159        os.chdir(self.abs_datadir + '/../' + PARALLEL_DIR)
160        d = Dictionary('en')
161        self.assertEqual(str(d), 'en')
162
163class BParseOptionsTestCase(unittest.TestCase):
164    def test_setting_verbosity(self):
165        po = ParseOptions()
166        po.verbosity = 2
167        #Ensure that the PO object reports the value correctly
168        self.assertEqual(po.verbosity, 2)
169        #Ensure that it's actually setting it.
170        self.assertEqual(clg.parse_options_get_verbosity(po._obj), 2)
171
172    def test_setting_verbosity_to_not_allow_value_raises_value_error(self):
173        po = ParseOptions()
174        self.assertRaises(ValueError, setattr, po, "verbosity", 121)
175
176    def test_setting_verbosity_to_non_integer_raises_type_error(self):
177        po = ParseOptions()
178        self.assertRaises(TypeError, setattr, po, "verbosity", "a")
179
180    def test_setting_linkage_limit(self):
181        po = ParseOptions()
182        po.linkage_limit = 3
183        self.assertEqual(clg.parse_options_get_linkage_limit(po._obj), 3)
184
185    def test_setting_linkage_limit_to_non_integer_raises_type_error(self):
186        po = ParseOptions()
187        self.assertRaises(TypeError, setattr, po, "linkage_limit", "a")
188
189    def test_setting_linkage_limit_to_negative_number_raises_value_error(self):
190        po = ParseOptions()
191        self.assertRaises(ValueError, setattr, po, "linkage_limit", -1)
192
193    def test_setting_disjunct_cost(self):
194        po = ParseOptions()
195        po.disjunct_cost = 3.0
196        self.assertEqual(clg.parse_options_get_disjunct_cost(po._obj), 3.0)
197
198    def test_setting_disjunct_cost_to_non_integer_raises_type_error(self):
199        po = ParseOptions()
200        self.assertRaises(TypeError, setattr, po, "disjunct_cost", "a")
201
202    def test_setting_min_null_count(self):
203        po = ParseOptions()
204        po.min_null_count = 3
205        self.assertEqual(clg.parse_options_get_min_null_count(po._obj), 3)
206
207    def test_setting_min_null_count_to_non_integer_raises_type_error(self):
208        po = ParseOptions()
209        self.assertRaises(TypeError, setattr, po, "min_null_count", "a")
210
211    def test_setting_min_null_count_to_negative_number_raises_value_error(self):
212        po = ParseOptions()
213        self.assertRaises(ValueError, setattr, po, "min_null_count", -1)
214
215    def test_setting_max_null_count(self):
216        po = ParseOptions()
217        po.max_null_count = 3
218        self.assertEqual(clg.parse_options_get_max_null_count(po._obj), 3)
219
220    def test_setting_max_null_count_to_non_integer_raises_type_error(self):
221        po = ParseOptions()
222        self.assertRaises(TypeError, setattr, po, "max_null_count", "a")
223
224    def test_setting_max_null_count_to_negative_number_raises_value_error(self):
225        po = ParseOptions()
226        self.assertRaises(ValueError, setattr, po, "max_null_count", -1)
227
228    def test_setting_short_length(self):
229        po = ParseOptions()
230        po.short_length = 3
231        self.assertEqual(clg.parse_options_get_short_length(po._obj), 3)
232
233    def test_setting_short_length_to_non_integer_raises_type_error(self):
234        po = ParseOptions()
235        self.assertRaises(TypeError, setattr, po, "short_length", "a")
236
237    def test_setting_short_length_to_negative_number_raises_value_error(self):
238        po = ParseOptions()
239        self.assertRaises(ValueError, setattr, po, "short_length", -1)
240
241    def test_setting_islands_ok(self):
242        po = ParseOptions()
243        po.islands_ok = True
244        self.assertEqual(po.islands_ok, True)
245        self.assertEqual(clg.parse_options_get_islands_ok(po._obj), 1)
246        po.islands_ok = False
247        self.assertEqual(po.islands_ok, False)
248        self.assertEqual(clg.parse_options_get_islands_ok(po._obj), 0)
249
250    def test_setting_islands_ok_to_non_boolean_raises_type_error(self):
251        po = ParseOptions()
252        self.assertRaises(TypeError, setattr, po, "islands_ok", "a")
253
254    def test_setting_max_parse_time(self):
255        po = ParseOptions()
256        po.max_parse_time = 3
257        self.assertEqual(clg.parse_options_get_max_parse_time(po._obj), 3)
258
259    def test_setting_max_parse_time_to_non_integer_raises_type_error(self):
260        po = ParseOptions()
261        self.assertRaises(TypeError, setattr, po, "max_parse_time", "a")
262
263    def test_setting_spell_guess_to_non_integer_raises_type_error(self):
264        po = ParseOptions()
265        self.assertRaises(TypeError, setattr, po, "spell_guess", "a")
266
267    def test_setting_display_morphology(self):
268        po = ParseOptions()
269        po.display_morphology = True
270        self.assertEqual(po.display_morphology, True)
271        self.assertEqual(clg.parse_options_get_display_morphology(po._obj), 1)
272        po.display_morphology = False
273        self.assertEqual(po.display_morphology, False)
274        self.assertEqual(clg.parse_options_get_display_morphology(po._obj), 0)
275
276    def test_setting_all_short_connectors(self):
277        po = ParseOptions()
278        po.all_short_connectors = True
279        self.assertEqual(po.all_short_connectors, True)
280        self.assertEqual(clg.parse_options_get_all_short_connectors(po._obj), 1)
281        po.all_short_connectors = False
282        self.assertEqual(po.all_short_connectors, False)
283        self.assertEqual(clg.parse_options_get_all_short_connectors(po._obj), 0)
284
285    def test_setting_all_short_connectors_to_non_boolean_raises_type_error(self):
286        po = ParseOptions()
287        self.assertRaises(TypeError, setattr, po, "all_short_connectors", "a")
288
289    def test_setting_spell_guess(self):
290        po = ParseOptions(spell_guess=True)
291        if po.spell_guess == 0:
292            raise unittest.SkipTest("Library is not configured with spell guess")
293        self.assertEqual(po.spell_guess, 7)
294        po = ParseOptions(spell_guess=5)
295        self.assertEqual(po.spell_guess, 5)
296        po = ParseOptions(spell_guess=False)
297        self.assertEqual(po.spell_guess, 0)
298
299    def test_specifying_parse_options(self):
300        po = ParseOptions(linkage_limit=99)
301        self.assertEqual(clg.parse_options_get_linkage_limit(po._obj), 99)
302
303class CParseOptionsTestCase(unittest.TestCase):
304
305    def test_that_sentence_can_be_destroyed_when_linkages_still_exist(self):
306        """
307        If the parser is deleted before the associated swig objects
308        are, there will be bad pointer dereferences (as the swig
309        objects will be pointing into freed memory).  This test ensures
310        that parsers can be created and deleted without regard for
311        the existence of PYTHON Linkage objects
312        """
313        #pylint: disable=unused-variable
314        s = Sentence('This is a sentence.', Dictionary(), ParseOptions())
315        linkages = s.parse()
316        del s
317
318    def test_that_invalid_options_are_disallowed(self):
319        self.assertRaisesRegexp(TypeError, "unexpected keyword argument",
320                                ParseOptions, invalid_option=1)
321
322    def test_that_invalid_option_properties_cannot_be_used(self):
323        po = ParseOptions()
324        self.assertRaisesRegexp(TypeError, "Unknown parse option",
325                                setattr, po, "invalid_option", 1)
326
327    def test_that_ParseOptions_cannot_get_positional_arguments(self):
328        self.assertRaisesRegexp(TypeError, "Positional arguments are not allowed",
329                                ParseOptions, 1)
330
331class DBasicParsingTestCase(unittest.TestCase):
332    @classmethod
333    def setUpClass(cls):
334        cls.d, cls.po = Dictionary(), None
335
336    @classmethod
337    def tearDownClass(cls):
338        del cls.d, cls.po
339
340    def parse_sent(self, text, po=None):
341        if po is None:
342            po = ParseOptions()
343        return list(Sentence(text, self.d, po).parse())
344
345    def test_that_parse_returns_empty_iterator_on_no_linkage(self):
346        """Parsing a bad sentence with no null-links shouldn't give any linkage."""
347        result = self.parse_sent("This this doesn't parse")
348        linkage_exists = False
349        for _ in result:
350            linkage_exists = True
351            self.assertFalse(linkage_exists, "Unparsable sentence has linkages.")
352
353    def test_that_parse_returns_empty_iterator_on_no_linkage_sat(self):
354        """Parsing a bad sentence with no null-links shouldn't give any linkage (sat)"""
355        self.po = ParseOptions(use_sat=True)
356        if self.po.use_sat != True:
357            raise unittest.SkipTest("Library not configured with SAT parser")
358        result = self.parse_sent("This this doesn't parse", self.po)
359        linkage_exists = False
360        for _ in result:
361            linkage_exists = True
362            self.assertFalse(linkage_exists, "SAT: Unparsable sentence has linkages.")
363
364    def test_that_parse_sent_returns_list_of_linkage_objects_for_valid_sentence(self):
365        result = self.parse_sent("This is a relatively simple sentence.")
366        self.assertTrue(isinstance(result[0], Linkage))
367        self.assertTrue(isinstance(result[1], Linkage))
368
369    def test_utf8_encoded_string(self):
370        result = self.parse_sent("I love going to the café.")
371        self.assertTrue(len(result) > 1)
372        self.assertTrue(isinstance(result[0], Linkage))
373        self.assertTrue(isinstance(result[1], Linkage))
374
375        # def test_unicode_encoded_string(self):
376        result = self.parse_sent(u"I love going to the caf\N{LATIN SMALL LETTER E WITH ACUTE}.")
377        self.assertTrue(len(result) > 1)
378        self.assertTrue(isinstance(result[0], Linkage))
379        self.assertTrue(isinstance(result[1], Linkage))
380
381        # def test_unknown_word(self):
382        result = self.parse_sent("I love going to the qertfdwedadt.")
383        self.assertTrue(len(result) > 1)
384        self.assertTrue(isinstance(result[0], Linkage))
385        self.assertTrue(isinstance(result[1], Linkage))
386
387        # def test_unknown_euro_utf8_word(self):
388        result = self.parse_sent("I love going to the qéáéğíóşúüñ.")
389        self.assertTrue(len(result) > 1)
390        self.assertTrue(isinstance(result[0], Linkage))
391        self.assertTrue(isinstance(result[1], Linkage))
392
393        # def test_unknown_cyrillic_utf8_word(self):
394        result = self.parse_sent("I love going to the доктором.")
395        self.assertTrue(len(result) > 1)
396        self.assertTrue(isinstance(result[0], Linkage))
397        self.assertTrue(isinstance(result[1], Linkage))
398
399    def test_getting_link_distances(self):
400        linkage = self.parse_sent("This is a sentence.")[0]
401        self.assertEqual([len(l) for l in linkage.links()], [5,2,1,1,2,1,1])
402        linkage = self.parse_sent("This is a silly sentence.")[0]
403        self.assertEqual([len(l) for l in linkage.links()], [6,2,1,1,3,2,1,1,1])
404
405    # If \w is supported, other \ shortcuts are hopefully supported too.
406    def test_regex_class_shortcut_support(self):
407        r"""Test that regexes support \w"""
408        po = ParseOptions(display_morphology=False)
409        linkage = self.parse_sent("This is a _regex_ive regex test", po)[0]
410        self.assertEqual(linkage.word(4), '_regex_ive[!].a')
411
412    def test_timer_exhausted_exception(self):
413        self.assertRaises(LG_TimerExhausted,
414                self.parse_sent,
415                "This sentence parses without null words, "
416                "and should take more than one second to parse!" * 14,
417                ParseOptions(max_parse_time=1,short_length=255,disjunct_cost=10.0,linkage_limit=10000))
418
419# The tests here are numbered since their order is important.
420# They depend on the result and state of the previous ones as follows:
421# - set_handler() returned a value that depend on it previous invocation.
422# - A class variable "handler" to record its previous results.
423class EErrorFacilityTestCase(unittest.TestCase):
424    # Initialize class variables to invalid (for the test) values.
425    handler = {
426        "default":  lambda x, y=None: None,
427        "previous": lambda x, y=None: None
428    }
429
430    def setUp(self):
431        self.testit = "testit"
432        self.testleaks = 0  # A repeat count for validating no memory leaks
433        self.numerr = 0
434        self.errinfo = clg.lg_None
435
436    @staticmethod
437    def error_handler_test(errinfo, data):
438        # A test error handler.  It assigns the errinfo struct as an attribute
439        # of its data so it can be examined after the call. In addition, the
440        # ability of the error handler to use its data argument is tested by
441        # the "testit" attribute.
442        if data is None:
443            return
444        data.errinfo = errinfo
445        data.gotit = data.testit
446
447    def test_10_set_error_handler(self):
448        # Set the error handler and validate that it
449        # gets the error info and the data.
450        self.__class__.handler["default"] = \
451            LG_Error.set_handler(self.error_handler_test, self)
452        self.assertEqual(self.__class__.handler["default"].__name__,
453                         "_default_handler")
454        self.gotit = None
455        self.assertRaises(LG_Error, Dictionary, "seh_dummy1")
456        self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_Error, "Error"))
457        self.assertEqual(self.gotit, "testit")
458        self.assertRegexpMatches(self.errinfo.text, "Could not open dictionary.*seh_dummy1")
459
460    def test_20_set_error_handler_None(self):
461        # Set the error handler to None and validate that printall()
462        # gets the error info and the data and returns the number of errors.
463        self.__class__.handler["previous"] = LG_Error.set_handler(None)
464        self.assertEqual(self.__class__.handler["previous"].__name__, "error_handler_test")
465        self.assertRaises(LG_Error, Dictionary, "seh_dummy2")
466        self.gotit = None
467        for i in range(0, 2+self.testleaks):
468            self.numerr = LG_Error.printall(self.error_handler_test, self)
469            if i == 0:
470                self.assertEqual(self.numerr, 1)
471            if i == 1:
472                self.assertEqual(self.numerr, 0)
473        self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_Error, "Error"))
474        self.assertEqual(self.gotit, "testit")
475        self.assertRegexpMatches(self.errinfo.text, ".*seh_dummy2")
476
477    def test_21_set_error_handler_None(self):
478        # Further test of correct number of errors.
479        self.numerr = 3
480        for _ in range(0, self.numerr):
481            self.assertRaises(LG_Error, Dictionary, "seh_dummy2")
482        self.numerr = LG_Error.printall(self.error_handler_test, None)
483        self.assertEqual(self.numerr, self.numerr)
484
485    def test_22_defaut_handler_param(self):
486        """Test bad data parameter to default error handler"""
487        # (It should be an integer >=0 and <= lg_None.)
488        # Here the error handler is still set to None.
489
490        # This test doesn't work - TypeError is somehow raised inside
491        # linkgrammar.py when _default_handler() is invoked as a callback.
492        #
493        #LG_Error.set_handler(self.__class__.handler["default"], "bad param")
494        #with self.assertRaises(TypeError):
495        #    try:
496        #        Dictionary("a dummy dict name (bad param test)")
497        #    except LG_Error:
498        #        pass
499
500        # So test it directly.
501
502        dummy_lang = "a dummy dict name (bad param test)"
503        self.assertRaises(LG_Error, Dictionary, dummy_lang)
504        LG_Error.printall(self.error_handler_test, self) # grab a valid errinfo
505        #self.assertIn(dummy_lang, save_stderr.divert_end())
506        self.assertRaisesRegexp(TypeError, "must be an integer",
507                                self.__class__.handler["default"],
508                                self.errinfo, "bad param")
509        self.assertRaisesRegexp(ValueError, "must be an integer",
510                                self.__class__.handler["default"],
511                                self.errinfo, clg.lg_None+1)
512        self.assertRaises(ValueError, self.__class__.handler["default"],
513                          self.errinfo, -1)
514
515        try:
516            self.param_ok = False
517            save_stdout  = divert_start(1) # Note: Handler parameter is stdout
518            self.__class__.handler["default"](self.errinfo, 1)
519            self.assertIn(dummy_lang, save_stdout.divert_end())
520            self.param_ok = True
521        except (TypeError, ValueError):
522            self.assertTrue(self.param_ok)
523
524    def test_23_prt_error(self):
525        LG_Error.message("Info: prt_error test\n")
526        LG_Error.printall(self.error_handler_test, self)
527        self.assertRegexpMatches(self.errinfo.text, "prt_error test\n")
528        self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_Info, "Info"))
529
530    def test_24_prt_error_in_parts(self):
531        LG_Error.message("Trace: part one... ")
532        LG_Error.message("part two\n")
533        LG_Error.printall(self.error_handler_test, self)
534        self.assertEqual(self.errinfo.text, "part one... part two\n")
535        self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_Trace, "Trace"))
536
537    def test_25_prt_error_in_parts_with_embedded_newline(self):
538        LG_Error.message("Trace: part one...\n\\")
539        LG_Error.message("part two\n")
540        LG_Error.printall(self.error_handler_test, self)
541        self.assertEqual(self.errinfo.text, "part one...\npart two\n")
542        self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_Trace, "Trace"))
543
544    def test_26_prt_error_plain_message(self):
545        LG_Error.message("This is a regular output line.\n")
546        LG_Error.printall(self.error_handler_test, self)
547        self.assertEqual(self.errinfo.text, "This is a regular output line.\n")
548        self.assertEqual((self.errinfo.severity, self.errinfo.severity_label), (clg.lg_None, ""))
549
550    def test_30_formatmsg(self):
551        # Here the error handler is still set to None.
552        for _ in range (0, 1+self.testleaks):
553            self.assertRaises(LG_Error, Dictionary, "formatmsg-test-dummy-dict")
554            LG_Error.printall(self.error_handler_test, self)
555            self.assertRegexpMatches(self.errinfo.formatmsg(), "link-grammar: Error: .*formatmsg-test-dummy-dict")
556
557    def test_40_clearall(self):
558        # Here the error handler is still set to None.
559        # Call LG_Error.clearall() and validate it indeed clears the error.
560        self.assertRaises(LG_Error, Dictionary, "clearall-test-dummy-dict")
561        LG_Error.clearall()
562        self.testit = "clearall"
563        self.numerr = LG_Error.printall(self.error_handler_test, self)
564        self.assertEqual(self.numerr, 0)
565        self.assertFalse(hasattr(self, "gotit"))
566
567    def test_41_flush(self):
568        # Here the error handler is still set to None.
569        # First validate that nothing gets flushed (no error is buffered at this point).
570        self.flushed = LG_Error.flush()
571        self.assertEqual(self.flushed, False)
572        # Now generate a partial error message that is still buffered.
573        LG_Error.message("This is a partial error message.")
574        # Validate that it is still hidden.
575        self.numerr = LG_Error.printall(self.error_handler_test, self)
576        self.assertEqual(self.numerr, 0)
577        self.assertFalse(hasattr(self, "gotit"))
578        # Flush it.
579        self.flushed = LG_Error.flush()
580        self.assertEqual(self.flushed, True)
581        self.numerr = LG_Error.printall(self.error_handler_test, self)
582        self.assertEqual(self.numerr, 1)
583        self.assertRegexpMatches(self.errinfo.text, "partial")
584
585    def test_50_set_orig_error_handler(self):
586        # Set the error handler back to the default handler.
587        # The error message is now visible (but we cannot test that).
588        self.__class__.handler["previous"] = LG_Error.set_handler(self.__class__.handler["default"])
589        self.assertIsNone(self.__class__.handler["previous"])
590        for _ in range(0, 1+self.testleaks):
591            self.__class__.handler["previous"] = LG_Error.set_handler(self.__class__.handler["default"])
592        self.assertEqual(self.__class__.handler["previous"].__name__, "_default_handler")
593
594        self.errinfo = "dummy"
595        dummy_lang = "a dummy dict name (default handler test)"
596        save_stderr = divert_start(2)
597        self.assertRaises(LG_Error, Dictionary, dummy_lang)
598        self.assertIn(dummy_lang, save_stderr.divert_end())
599        self.assertEqual(self.errinfo, "dummy")
600
601class FSATsolverTestCase(unittest.TestCase):
602    def setUp(self):
603        self.d, self.po = Dictionary(lang='en'), ParseOptions()
604        self.po = ParseOptions(use_sat=True)
605        if self.po.use_sat != True:
606            raise unittest.SkipTest("Library not configured with SAT parser")
607
608    def test_SAT_getting_links(self):
609        linkage_testfile(self, self.d, self.po, 'sat')
610
611class HEnglishLinkageTestCase(unittest.TestCase):
612    @classmethod
613    def setUpClass(cls):
614        cls.d, cls.po = Dictionary(), ParseOptions(linkage_limit=1000, display_morphology=False)
615
616    @classmethod
617    def tearDownClass(cls):
618        del cls.d, cls.po
619
620    def parse_sent(self, text):
621        return list(Sentence(text, self.d, self.po).parse())
622
623    def test_a_getting_words(self):
624        self.assertEqual(list(self.parse_sent('This is a sentence.')[0].words()),
625             ['LEFT-WALL', 'this.p', 'is.v', 'a', 'sentence.n', '.', 'RIGHT-WALL'])
626
627    def test_b_getting_num_of_words(self):
628        #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL'
629        self.assertEqual(self.parse_sent('This is a sentence.')[0].num_of_words(), 7)
630
631    def test_c_getting_links(self):
632        sent = 'This is a sentence.'
633        linkage = self.parse_sent(sent)[0]
634        self.assertEqual(linkage.link(0),
635                         Link(linkage, 0, 'LEFT-WALL','Xp','Xp','.'))
636        self.assertEqual(linkage.link(1),
637                         Link(linkage, 1, 'LEFT-WALL','hWV','dWV','is.v'))
638        self.assertEqual(linkage.link(2),
639                         Link(linkage, 2, 'LEFT-WALL','hWd','Wd','this.p'))
640        self.assertEqual(linkage.link(3),
641                         Link(linkage, 3, 'this.p','Ss*b','Ss','is.v'))
642        self.assertEqual(linkage.link(4),
643                         Link(linkage, 4, 'is.v','O*m','Os','sentence.n'))
644        self.assertEqual(linkage.link(5),
645                         Link(linkage, 5, 'a','Ds**c','Ds**c','sentence.n'))
646        self.assertEqual(linkage.link(6),
647                         Link(linkage, 6, '.','RW','RW','RIGHT-WALL'))
648
649    def test_d_spell_guessing_on(self):
650        self.po.spell_guess = 7
651        if self.po.spell_guess == 0:
652            raise unittest.SkipTest("Library is not configured with spell guess")
653        result = self.parse_sent("I love going to shoop.")
654        resultx = result[0] if result else []
655        for resultx in result:
656            if resultx.word(5) == 'shop[~].v':
657                break
658        self.assertEqual(list(resultx.words()) if resultx else [],
659             ['LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shop[~].v', '.', 'RIGHT-WALL'])
660
661    def test_e_spell_guessing_off(self):
662        self.po.spell_guess = 0
663        result = self.parse_sent("I love going to shoop.")
664        self.assertEqual(list(result[0].words()),
665             ['LEFT-WALL', 'I.p', 'love.v', 'going.v', 'to.r', 'shoop[?].v', '.', 'RIGHT-WALL'])
666
667    # Stress-test first-word-capitalized in various different ways.
668    # Roughly, the test matrix is this:
669    # -- word is/isn't in dict as lower-case word
670    # -- word is/isn't in dict as upper-case word
671    # -- word is/isn't matched with CAPITALIZED_WORDS regex
672    # -- word is/isn't split by suffix splitter
673    # -- the one that is in the dict is not the grammatically appropriate word.
674    #
675    # Let's is NOT split into two! It's in the dict as one word, lower-case only.
676    def test_f_captilization(self):
677        self.assertEqual(list(self.parse_sent('Let\'s eat.')[0].words()),
678             ['LEFT-WALL', 'let\'s', 'eat.v', '.', 'RIGHT-WALL'])
679
680        # He's is split into two words, he is in dict, lower-case only.
681        self.assertEqual(list(self.parse_sent('He\'s going.')[0].words()),
682             ['LEFT-WALL', 'he', '\'s.v', 'going.v', '.', 'RIGHT-WALL'])
683
684        self.assertEqual(list(self.parse_sent('You\'re going?')[0].words()),
685             ['LEFT-WALL', 'you', '\'re', 'going.v', '?', 'RIGHT-WALL'])
686
687        # Jumbo only in dict as adjective, lower-case, but not noun.
688        self.assertEqual(list(self.parse_sent('Jumbo\'s going?')[0].words()),
689             ['LEFT-WALL', 'Jumbo[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])
690
691        self.assertEqual(list(self.parse_sent('Jumbo\'s shoe fell off.')[0].words()),
692             ['LEFT-WALL', 'Jumbo[!]',
693              '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL'])
694
695        self.assertEqual(list(self.parse_sent('Jumbo sat down.')[0].words()),
696             ['LEFT-WALL', 'Jumbo[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])
697
698        # Red is in dict, lower-case, as noun, too.
699        # There's no way to really know, syntactically, that Red
700        # should be taken as a proper noun (given name).
701        #self.assertEqual(list(self.parse_sent('Red\'s going?')[0].words()),
702        #     ['LEFT-WALL', 'Red[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])
703        #
704        #self.assertEqual(list(self.parse_sent('Red\'s shoe fell off.')[0].words()),
705        #     ['LEFT-WALL', 'Red[!]',
706        #      '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL'])
707        #
708        #self.assertEqual(list(self.parse_sent('Red sat down.')[1].words()),
709        #     ['LEFT-WALL', 'Red[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])
710
711        # May in dict as noun, capitalized, and as lower-case verb.
712        self.assertEqual(list(self.parse_sent('May\'s going?')[0].words()),
713             ['LEFT-WALL', 'May.f', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])
714
715        self.assertEqual(list(self.parse_sent('May sat down.')[0].words()),
716             ['LEFT-WALL', 'May.f', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])
717
718        # McGyver is not in the dict, but is regex-matched.
719        self.assertEqual(list(self.parse_sent('McGyver\'s going?')[0].words()),
720             ['LEFT-WALL', 'McGyver[!]', '\'s.v', 'going.v', '?', 'RIGHT-WALL'])
721
722        self.assertEqual(list(self.parse_sent('McGyver\'s shoe fell off.')[0].words()),
723             ['LEFT-WALL', 'McGyver[!]',
724              '\'s.p', 'shoe.n', 'fell.v-d', 'off', '.', 'RIGHT-WALL'])
725
726        self.assertEqual(list(self.parse_sent('McGyver sat down.')[0].words()),
727             ['LEFT-WALL', 'McGyver[!]', 'sat.v-d', 'down.r', '.', 'RIGHT-WALL'])
728
729        self.assertEqual(list(self.parse_sent('McGyver Industries stock declined.')[0].words()),
730             ['LEFT-WALL', 'McGyver[!]', 'Industries[!]',
731              'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL'])
732
733        # King in dict as both upper and lower case.
734        self.assertEqual(list(self.parse_sent('King Industries stock declined.')[0].words()),
735             ['LEFT-WALL', 'King.b', 'Industries[!]',
736              'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL'])
737
738        # Jumbo in dict only lower-case, as adjective
739        self.assertEqual(list(self.parse_sent('Jumbo Industries stock declined.')[0].words()),
740             ['LEFT-WALL', 'Jumbo[!]', 'Industries[!]',
741              'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL'])
742
743        # Thomas in dict only as upper case.
744        self.assertEqual(list(self.parse_sent('Thomas Industries stock declined.')[0].words()),
745             ['LEFT-WALL', 'Thomas.b', 'Industries[!]',
746              'stock.n-u', 'declined.v-d', '.', 'RIGHT-WALL'])
747
748    # Some parses are fractionally preferred over others...
749    def test_g_fractions(self):
750        self.assertEqual(list(self.parse_sent('A player who is injured has to leave the field')[0].words()),
751             ['LEFT-WALL', 'a', 'player.n', 'who', 'is.v', 'injured.v-d', 'has.v', 'to.r', 'leave.v', 'the', 'field.n', 'RIGHT-WALL'])
752
753        self.assertEqual(list(self.parse_sent('They ate a special curry which was recommended by the restaurant\'s owner')[0].words()),
754             ['LEFT-WALL', 'they', 'ate.v-d', 'a', 'special.a', 'curry.s',
755              'which', 'was.v-d', 'recommended.v-d', 'by', 'the', 'restaurant.n',
756              '\'s.p', 'owner.n', 'RIGHT-WALL'])
757
758    # Verify that we are getting the linkages that we want
759    # See below, remainder of parses are in text files
760    def test_h_getting_links(self):
761        sent = 'Scientists sometimes may repeat experiments or use groups.'
762        linkage = self.parse_sent(sent)[0]
763        self.assertEqual(linkage.diagram(),
764"\n    +----------------------------------------Xp---------------------------------------+"
765"\n    +---------------------------->WV---------------------------->+                    |"
766"\n    |                              +--------------I--------------+                    |"
767"\n    |           +--------Sp--------+       +<-------VJlpi<-------+                    |"
768"\n    +---->Wd----+          +---E---+       +----Op----+          +>VJrpi>+---Op--+    |"
769"\n    |           |          |       |       |          |          |       |       |    |"
770"\nLEFT-WALL scientists.n sometimes may.v repeat.v experiments.n or.j-v   use.v groups.n ."
771"\n\n")
772        sent = 'I enjoy eating bass.'
773        linkage = self.parse_sent(sent)[0]
774        self.assertEqual(linkage.diagram(),
775"\n    +-----------------Xp----------------+"
776"\n    +---->WV---->+                      |"
777"\n    +->Wd--+-Sp*i+---Pg---+---Ou---+    |"
778"\n    |      |     |        |        |    |"
779"\nLEFT-WALL I.p enjoy.v eating.v bass.n-u ."
780"\n\n")
781
782
783        sent = 'We are from the planet Gorpon'
784        linkage = self.parse_sent(sent)[0]
785        self.assertEqual(linkage.diagram(),
786"\n    +--->WV--->+     +---------Js--------+"
787"\n    +->Wd--+Spx+--Pp-+   +--DD--+---GN---+"
788"\n    |      |   |     |   |      |        |"
789"\nLEFT-WALL we are.v from the planet.n Gorpon[!]"
790"\n\n")
791
792class GSQLDictTestCase(unittest.TestCase):
793    @classmethod
794    def setUpClass(cls):
795        if os.name == 'nt' and \
796                -1 == clg.linkgrammar_get_configuration().lower().find('mingw'):
797            raise unittest.SkipTest("No SQL dict support yet on the MSVC build")
798
799        #clg.parse_options_set_verbosity(clg.parse_options_create(), 3)
800        cls.d, cls.po = Dictionary(lang='demo-sql'), ParseOptions()
801
802    @classmethod
803    def tearDownClass(cls):
804        del cls.d, cls.po
805
806    def test_getting_links(self):
807        linkage_testfile(self, self.d, self.po)
808
809    def test_getting_links_sat(self):
810        sat_po = ParseOptions(use_sat=True)
811        if sat_po.use_sat != True:
812            raise unittest.SkipTest("Library not configured with SAT parser")
813        linkage_testfile(self, self.d, sat_po)
814
815class IWordPositionTestCase(unittest.TestCase):
816    @classmethod
817    def setUpClass(cls):
818        cls.d_en = Dictionary(lang='en')
819
820    @classmethod
821    def tearDownClass(cls):
822        del cls.d_en
823
824    def test_en_word_positions(self):
825        linkage_testfile(self, self.d_en, ParseOptions(), 'pos')
826
827    def test_en_spell_word_positions(self):
828        po = ParseOptions(spell_guess=1)
829        if po.spell_guess == 0:
830            raise unittest.SkipTest("Library is not configured with spell guess")
831        linkage_testfile(self, self.d_en, po, 'pos-spell')
832
833    def test_ru_word_positions(self):
834        linkage_testfile(self, Dictionary(lang='ru'), ParseOptions(), 'pos')
835
836    def test_he_word_positions(self):
837        linkage_testfile(self, Dictionary(lang='he'), ParseOptions(), 'pos')
838
839# Tests are run in alphabetical order; do the language tests last.
840
841class ZENLangTestCase(unittest.TestCase):
842    @classmethod
843    def setUpClass(cls):
844        cls.d, cls.po = Dictionary(lang='en'), ParseOptions()
845
846    @classmethod
847    def tearDownClass(cls):
848        del cls.d, cls.po
849
850    def test_getting_links(self):
851        linkage_testfile(self, self.d, self.po)
852
853    def test_quotes(self):
854        linkage_testfile(self, self.d, self.po, 'quotes')
855
856    def test_null_link_range_starting_with_zero(self):
857        """Test parsing with a minimal number of null-links, including 0."""
858        # This sentence has no complete linkage. Validate that the library
859        # doesn't mangle parsing with null-count>0 due to power_prune()'s
860        # connector-discard optimization at null-count==0.  Without commit
861        # "Allow calling classic_parse() with and w/o nulls", the number of
862        # linkages here is 1 instead of 2 and the unused_word_cost is 5.
863        self.po = ParseOptions(min_null_count=0, max_null_count=999)
864        linkages = Sentence('about people attended', self.d, self.po).parse()
865        self.assertEqual(len(linkages), 2)
866        self.assertEqual(linkages.next().unused_word_cost(), 1)
867        # Expected parses:
868        # 1:
869        #    +------------>WV------------>+
870        #    +--------Wd-------+----Sp----+
871        #    |                 |          |
872        #LEFT-WALL [about] people.p attended.v-d
873        # 2:
874        #
875        #            +----Sp----+
876        #            |          |
877        #[about] people.p attended.v-d
878
879    def test_2_step_parsing_with_null_links(self):
880        self.po = ParseOptions(min_null_count=0, max_null_count=0)
881
882        sent = Sentence('about people attended', self.d, self.po)
883        linkages = sent.parse()
884        self.assertEqual(len(linkages), 0)
885        self.po = ParseOptions(min_null_count=1, max_null_count=999)
886        linkages = sent.parse(self.po)
887        self.assertEqual(len(linkages), 2)
888        self.assertEqual(linkages.next().unused_word_cost(), 1)
889
890    def test_1_step_parsing_with_no_null_links_short(self):
891        self.po = ParseOptions(min_null_count=0, max_null_count=999)
892
893        text = 'This is a test.'
894        sent = Sentence(text, self.d, self.po)
895        self.assertTrue(len(sent.parse()) > 0) # Just check no crashes or leaks
896
897    def test_1_step_parsing_with_no_null_links_long(self):
898        self.po = ParseOptions(min_null_count=0, max_null_count=999)
899
900        text = 12 * 'This is a test. ' # The final blank is essential
901        sent = Sentence(text, self.d, self.po)
902        self.assertTrue(len(sent.parse()) > 0) # Just check no crashes or leaks
903
904    def test_1_step_parsing_with_nulls_short(self):
905        self.po = ParseOptions(min_null_count=0, max_null_count=999, short_length=1)
906
907        text = 'This a'
908        sent = Sentence(text, self.d, self.po)
909        self.assertTrue(len(sent.parse()) > 0) # Just check no crashes or leaks
910
911    def test_1_step_parsing_with_nulls_long(self):
912        self.po = ParseOptions(min_null_count=0, max_null_count=999, short_length=1)
913
914        text = 12 * 'This is a the test ' # The final blank is essential
915        sent = Sentence(text, self.d, self.po)
916        self.assertTrue(len(sent.parse()) > 0) # Just check no crashes or leaks
917
918class JADictionaryLocaleTestCase(unittest.TestCase):
919    @classmethod
920    def setUpClass(cls):
921        # python2: Gets system locale (getlocale() is not better)
922        cls.oldlocale = locale.setlocale(locale.LC_CTYPE, None)
923        #print('Current locale:', oldlocale)
924        #print('toupper hij:', 'hij'.upper())
925
926        tr_locale = 'tr_TR.UTF-8' if os.name != 'nt' else 'Turkish'
927        try:
928            locale.setlocale(locale.LC_CTYPE, tr_locale)
929        except locale.Error as e: # Most probably tr_TR.UTF-8 is not installed
930            raise unittest.SkipTest("Locale {}: {}".format(tr_locale, e))
931
932        #print('Turkish locale:', locale.setlocale(locale.LC_CTYPE, None))
933        # python2: prints HiJ (lowercase small i in the middle)
934        #print('toupper hij:', 'hij'.upper())
935
936        cls.d, cls.po = Dictionary(lang='en'), ParseOptions()
937
938    @classmethod
939    def tearDownClass(cls):
940        locale.setlocale(locale.LC_CTYPE, cls.oldlocale)
941        #print("Restored locale:", locale.setlocale(locale.LC_CTYPE))
942        #print('toupper hij:', 'hij'.upper())
943        del cls.d, cls.po, cls.oldlocale
944
945    def test_dictionary_locale_definition(self):
946        linkage = Sentence('Is it fine?', self.d, self.po).parse().next()
947        self.assertEqual(list(linkage.words())[1], 'is.v')
948
949# FIXME: Use a special testing dictionary for checks like that.
950class JBDictCostReadingTestCase(unittest.TestCase):
951    @classmethod
952    def setUpClass(cls):
953        cls.oldlocale = locale.setlocale(locale.LC_CTYPE, None)
954        ru_locale = 'ru_RU.UTF-8' if os.name != 'nt' else 'Russian'
955        try:
956            locale.setlocale(locale.LC_NUMERIC, ru_locale)
957        except locale.Error as e: # Most probably ru_RU.UTF-8 is not installed
958            del cls.oldlocale
959            raise unittest.SkipTest("Locale {}: {}".format(ru_locale, e))
960        # The dict read must be performed after the locale change.
961        cls.d, cls.po = Dictionary(lang='en'), ParseOptions()
962
963    @classmethod
964    def tearDownClass(cls):
965        locale.setlocale(locale.LC_CTYPE, cls.oldlocale)
966        del cls.d, cls.po, cls.oldlocale
967
968    # When a comma-separator LC_NUMERIC affects the dict cost conversion,
969    # the 4th word is 'white.v'.
970    def test_cost_sensitive_parse(self):
971        linkage = Sentence('Is the bed white?', self.d, self.po).parse().next()
972        self.assertEqual(list(linkage.words())[4], 'white.a')
973
974class ZENConstituentsCase(unittest.TestCase):
975    @classmethod
976    def setUpClass(cls):
977        cls.d, cls.po = Dictionary(lang='en'), ParseOptions()
978
979    @classmethod
980    def tearDownClass(cls):
981        del cls.d, cls.po
982
983    def test_a_constituents_after_parse_list(self):
984        """
985        Validate that the post-processing data of the first linkage is not
986        getting clobbered by later linkages.
987        """
988        linkages = list(Sentence("This is a test.", self.d, self.po).parse())
989        self.assertEqual(linkages[0].constituent_tree(),
990                "(S (NP this.p)\n   (VP is.v\n       (NP a test.n))\n   .)\n")
991
992class ZDELangTestCase(unittest.TestCase):
993    @classmethod
994    def setUpClass(cls):
995        cls.d, cls.po = Dictionary(lang='de'), ParseOptions()
996
997    @classmethod
998    def tearDownClass(cls):
999        del cls.d, cls.po
1000
1001    def parse_sent(self, text):
1002        return list(Sentence(text, self.d, self.po).parse())
1003
1004    def test_a_getting_num_of_words(self):
1005        #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL'
1006        self.assertEqual(self.parse_sent('Dies ist den Traum.')[0].num_of_words(), 7)
1007        self.assertEqual(self.parse_sent('Der Hund jagte ihn durch den Park.')[0].num_of_words(), 10)
1008
1009    def test_b_getting_words(self):
1010        self.assertEqual(list(self.parse_sent('Der Hund jagte ihn durch den Park.')[0].words()),
1011            ['LEFT-WALL', 'der.d', 'Hund.n', 'jagte.s', 'ihn', 'durch',
1012               'den.d', 'Park.n', '.', 'RIGHT-WALL'])
1013
1014    def test_c_getting_links(self):
1015        sent = 'Dies ist den Traum.'
1016        linkage = self.parse_sent(sent)[0]
1017        self.assertEqual(linkage.link(0),
1018                         Link(linkage, 0, 'LEFT-WALL','Xp','Xp','.'))
1019        self.assertEqual(linkage.link(1),
1020                         Link(linkage, 1, 'LEFT-WALL','W','W','ist.v'))
1021        self.assertEqual(linkage.link(2),
1022                         Link(linkage, 2, 'dies','Ss','Ss','ist.v'))
1023        self.assertEqual(linkage.link(3),
1024                         Link(linkage, 3, 'ist.v','O','O','Traum.n'))
1025        self.assertEqual(linkage.link(4),
1026                         Link(linkage, 4, 'den.d','Dam','Dam','Traum.n'))
1027        self.assertEqual(linkage.link(5),
1028                         Link(linkage, 5, '.','RW','RW','RIGHT-WALL'))
1029
1030class ZLTLangTestCase(unittest.TestCase):
1031    def setUp(self):
1032        self.d, self.po = Dictionary(lang='lt'), ParseOptions()
1033
1034    # Reads linkages from a test-file.
1035    def test_getting_links(self):
1036        linkage_testfile(self, self.d, self.po)
1037
1038# Tests are run in alphabetical order; do the language tests last.
1039class ZRULangTestCase(unittest.TestCase):
1040    @classmethod
1041    def setUpClass(cls):
1042        cls.d, cls.po = Dictionary(lang='ru'), ParseOptions()
1043
1044    @classmethod
1045    def tearDownClass(cls):
1046        del cls.d, cls.po
1047
1048    def parse_sent(self, text):
1049        return list(Sentence(text, self.d, self.po).parse())
1050
1051    def test_a_getting_num_of_words(self):
1052        self.po.display_morphology = False
1053        #Words include punctuation and a 'LEFT-WALL' and 'RIGHT_WALL'
1054        self.assertEqual(self.parse_sent('это тести.')[0].num_of_words(), 5)
1055        self.assertEqual(self.parse_sent('вверху плыли редкие облачка.')[0].num_of_words(), 7)
1056
1057    def test_b_getting_words(self):
1058        self.po.display_morphology = False
1059        self.assertEqual(list(self.parse_sent('вверху плыли редкие облачка.')[0].words()),
1060            ['LEFT-WALL', 'вверху.e', 'плыли.vnndpp', 'редкие.api',
1061                'облачка.ndnpi', '.', 'RIGHT-WALL'])
1062
1063    def test_c_getting_links(self):
1064        self.po.display_morphology = False
1065        sent = 'вверху плыли редкие облачка.'
1066        linkage = self.parse_sent(sent)[0]
1067        self.assertEqual(linkage.link(0),
1068                         Link(linkage, 0, 'LEFT-WALL','Xp','Xp','.'))
1069        self.assertEqual(linkage.link(1),
1070                         Link(linkage, 1, 'LEFT-WALL','W','Wd','плыли.vnndpp'))
1071        self.assertEqual(linkage.link(2),
1072                         Link(linkage, 2, 'вверху.e','EI','EI','плыли.vnndpp'))
1073        self.assertEqual(linkage.link(3),
1074                         Link(linkage, 3, 'плыли.vnndpp','SIp','SIp','облачка.ndnpi'))
1075        self.assertEqual(linkage.link(4),
1076                         Link(linkage, 4, 'редкие.api','Api','Api','облачка.ndnpi'))
1077        self.assertEqual(linkage.link(5),
1078                         Link(linkage, 5, '.','RW','RW','RIGHT-WALL'))
1079
1080    # Expect morphological splitting to apply.
1081    def test_d_morphology(self):
1082        self.po.display_morphology = True
1083        self.assertEqual(list(self.parse_sent('вверху плыли редкие облачка.')[0].words()),
1084            ['LEFT-WALL',
1085             'вверху.e',
1086             'плы.=', '=ли.vnndpp',
1087             'ре.=', '=дкие.api',
1088             'облачк.=', '=а.ndnpi',
1089             '.', 'RIGHT-WALL'])
1090
1091class ZXDictDialectTestCase(unittest.TestCase):
1092    def test_dialect(self):
1093        linkage_testfile(self, Dictionary(lang='en'), ParseOptions(dialect='headline'), 'dialect')
1094
1095#############################################################################
1096
1097def linkage_testfile(self, lgdict, popt, desc=''):
1098    """
1099    Reads sentences and their corresponding
1100    linkage diagrams / constituent printings.
1101    """
1102    self.__class__.longMessage = True
1103    if desc != '':
1104        desc = desc + '-'
1105    testfile = clg.test_data_srcdir + "parses-" + desc + clg.dictionary_get_lang(lgdict._obj) + ".txt"
1106    diagram = None
1107    constituents = None
1108    wordpos = None
1109    sent = None
1110    lineno = 0
1111    last_opcode = None
1112
1113    def getwordpos(lkg):
1114        words_char = []
1115        words_byte = []
1116        for wi, w in enumerate(lkg.words()):
1117            words_char.append(w + str((int(linkage.word_char_start(wi)), int(linkage.word_char_end(wi)))))
1118            words_byte.append(w + str((int(linkage.word_byte_start(wi)), int(linkage.word_byte_end(wi)))))
1119        return ' '.join(words_char) + '\n' + ' '.join(words_byte) + '\n'
1120
1121    # Function code and file format sanity check
1122    def validate_opcode(opcode):
1123        if opcode != ord('O'):
1124            self.assertFalse(diagram, "at {}:{}: Unfinished diagram entry".format(testfile, lineno))
1125        if opcode != ord('C'):
1126            self.assertFalse(constituents, "at {}:{}: Unfinished constituents entry".format(testfile, lineno))
1127        if opcode != ord('P'):
1128            self.assertFalse(wordpos, "at {}:{}: Unfinished word-position entry".format(testfile, lineno))
1129
1130    with open(testfile, 'rb') as _:
1131        parses = _.readlines()
1132
1133    for line in parses:
1134        lineno += 1
1135        line = line.decode('utf-8')
1136
1137        validate_opcode(ord(line[0])) # Use ord() for python2/3 compatibility
1138        if line[0] in 'INOCP':
1139            last_opcode = line[0]
1140
1141        # Lines starting with I are the input sentences
1142        if line[0] == 'I':
1143            sent = line[1:].rstrip('\r\n') # Strip whitespace before RIGHT-WALL (for P)
1144            diagram = ""
1145            constituents = ""
1146            wordpos = ""
1147            if popt.verbosity > 1:
1148                print('Sentence:', sent)
1149            linkages = Sentence(sent, lgdict, popt).parse()
1150            linkage = next(linkages, None)
1151
1152        # Generate the next linkage of the last input sentence
1153        elif line[0] == 'N':
1154            diagram = ""
1155            constituents = ""
1156            wordpos = ""
1157            linkage = next(linkages, None)
1158            self.assertTrue(linkage, "at {}:{}: Sentence has too few linkages".format(testfile, lineno))
1159
1160        # Lines starting with O are the parse diagram
1161        # It ends with an empty line
1162        elif line[0] == 'O':
1163            diagram += line[1:]
1164            if line[1] == '\n':
1165                if diagram == 'C\nC\n':
1166                    self.assertFalse(linkage)
1167                    diagram = None
1168                elif len(diagram) > 2:
1169                    self.assertTrue(linkage, "at {}:{}: Sentence has no linkages".format(testfile, lineno))
1170                    self.assertEqual(linkage.diagram(), diagram, "at {}:{}".format(testfile, lineno))
1171                    diagram = None
1172
1173        # Lines starting with C are the constituent output (type 1)
1174        # It ends with an empty line
1175        elif line[0] == 'C':
1176            if line[1] == '\n' and len(constituents) > 1:
1177                self.assertEqual(linkage.constituent_tree(), constituents, "at {}:{}".format(testfile, lineno))
1178                constituents = None
1179            else:
1180                constituents += line[1:]
1181
1182        # Lines starting with P contain word positions "word(start, end) ... "
1183        # The first P line contains character positions
1184        # The second P line contains byte positions
1185        # It ends with an empty line
1186        elif line[0] == 'P':
1187            if line[1] == '\n' and len(wordpos) > 1:
1188                self.assertEqual(getwordpos(linkage), wordpos, "at {}:{}".format(testfile, lineno))
1189                wordpos = None
1190            else:
1191                wordpos += line[1:]
1192
1193        # Lines starting with "-" contain a Parse Option
1194        elif line[0] == '-':
1195                exec('popt.' + line[1:]) in {}, locals()
1196
1197        elif line[0] in '%\r\n':
1198            pass
1199        else:
1200            self.fail('\nTest file "{}": Invalid opcode "{}" (ord={})'.format(testfile, line[0], ord(line[0])))
1201
1202    self.assertIn(last_opcode , 'OCP', "Missing result comparison in " + testfile)
1203
1204def warning(*msg):
1205    progname = os.path.basename(sys.argv[0])
1206    print("{}: Warning:".format(progname), *msg, file=sys.stderr)
1207
1208import tempfile
1209
1210class divert_start(object):
1211    """ Output diversion. """
1212    def __init__(self, fd):
1213        """ Divert a file descriptor.
1214        The created object is used for restoring the original file descriptor.
1215        """
1216        self.fd = fd
1217        self.savedfd = os.dup(fd)
1218        (newfd, self.filename) = tempfile.mkstemp(text=False)
1219        os.dup2(newfd, fd)
1220        os.close(newfd)
1221
1222    def divert_end(self):
1223        """ Restore a previous diversion and return its content. """
1224        if not self.filename:
1225            return ""
1226        os.lseek(self.fd, os.SEEK_SET, 0)
1227        content = os.read(self.fd, 1024) # 1024 is more than needed
1228        os.dup2(self.savedfd, self.fd)
1229        os.close(self.savedfd)
1230        os.unlink(self.filename)
1231        self.filename = None
1232        return str(content)
1233
1234    __del__ = divert_end
1235
1236
1237# Decorate Sentence.parse with eqcost_sorted_parse.
1238lg_testutils.add_eqcost_linkage_order(Sentence)
1239
1240# For testing development branches, it may be sometimes useful to use the
1241# "test", "debug" and "verbosity" options. The following allows to specify them
1242# as "tests.py" arguments, interleaved with standard "unittest" arguments.
1243
1244for i,arg in enumerate(sys.argv):
1245    debug = sys.argv.pop(i)[7:] if arg.startswith('-debug' + '=') else ''
1246for i,arg in enumerate(sys.argv):
1247    test = sys.argv.pop(i)[6:] if arg.startswith('-test' + '=')  else ''
1248for i,arg in enumerate(sys.argv):
1249    verbosity = int(sys.argv.pop(i)[11:]) if arg.startswith('-verbosity' + '=')  else ''
1250if (test or debug or verbosity):
1251    ParseOptions = lg_testutils.add_test_option(ParseOptions, test, debug, verbosity)
1252
1253unittest.main()
1254