1#!/usr/bin/env python
2"""Unit tests for phonenumbermatcher.py"""
3
4# Based on original Java code:
5#     java/test/com/google/i18n/phonenumbers/PhoneNumberMatchTest.java
6#     java/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java
7# Copyright (C) 2011 The Libphonenumber Authors
8#
9# Licensed under the Apache License, Version 2.0 (the "License");
10# you may not use this file except in compliance with the License.
11# You may obtain a copy of the License at
12#
13# http://www.apache.org/licenses/LICENSE-2.0
14#
15# Unless required by applicable law or agreed to in writing, software
16# distributed under the License is distributed on an "AS IS" BASIS,
17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18# See the License for the specific language governing permissions and
19# limitations under the License.
20import sys
21import unittest
22
23from phonenumbers import PhoneNumberMatch, PhoneNumberMatcher, Leniency
24from phonenumbers import PhoneNumber, NumberFormat, phonenumberutil
25from phonenumbers import phonenumbermatcher, CountryCodeSource
26from phonenumbers.util import u
27from .testmetadatatest import TestMetadataTestCase
28
29
30class PhoneNumberMatchTest(unittest.TestCase):
31    """Tests the value type semantics for PhoneNumberMatch.
32
33    Equality must be based on the covered range and corresponding phone
34    number. Range and number correctness are tested by PhoneNumberMatcherTest.
35    """
36
37    def setUp(self):
38        pass
39
40    def tearDown(self):
41        pass
42
43    def testValueTypeSemantics(self):
44        number = PhoneNumber()
45        match1 = PhoneNumberMatch(10, "1 800 234 45 67", number)
46        match2 = PhoneNumberMatch(10, "1 800 234 45 67", number)
47        match3 = PhoneNumberMatch(10, "1 801 234 45 67", number)
48
49        self.assertEqual(match1, match2)
50        self.assertEqual(match1.start, match2.start)
51        self.assertEqual(match1.end, match2.end)
52        self.assertEqual(match1.number, match2.number)
53        self.assertEqual(match1.raw_string, match2.raw_string)
54        self.assertEqual("1 800 234 45 67", match1.raw_string)
55        # Python-specific: check __ne__()
56        self.assertNotEqual(match1, match3)
57        self.assertTrue(match1 != match3)
58        # Python-specific: Check only comparisons of the same type work
59        self.assertNotEqual(match1, None)
60        self.assertNotEqual(match1, "")
61        self.assertNotEqual(match1, "1 800 234 45 67")
62        self.assertNotEqual(match1, 0)
63
64    def testIllegalArguments(self):
65        """Tests the value type semantics for matches with a None number."""
66        try:
67            PhoneNumberMatch(-110, "1 800 234 45 67", PhoneNumber())
68            self.fail("Expected failed constructor")
69        except Exception:
70            pass
71
72        try:
73            PhoneNumberMatch(10, "1 800 234 45 67", None)
74            self.fail("Expected failed constructor")
75        except Exception:
76            pass
77
78        try:
79            PhoneNumberMatch(10, None, PhoneNumber())
80            self.fail("Expected failed constructor")
81        except Exception:
82            pass
83
84        try:
85            PhoneNumberMatch(10, None, None)
86            self.fail("Expected failed constructor")
87        except Exception:
88            pass
89
90    def testStringConvert(self):
91        """Check string conversion"""
92        number = PhoneNumber()
93        match = PhoneNumberMatch(10, "1 800 234 45 67", number)
94
95        self.assertEqual("PhoneNumberMatch [10,25) 1 800 234 45 67", str(match))
96        # Python version extra test
97        self.assertEqual("PhoneNumberMatch(start=10, raw_string='1 800 234 45 67', "
98                         "numobj=PhoneNumber(country_code=None, national_number=None, extension=None, "
99                         "italian_leading_zero=None, number_of_leading_zeros=None, "
100                         "country_code_source=None, preferred_domestic_carrier_code=None))", repr(match))
101
102
103class NumberContext(object):
104    """Small class that holds the context of the number we are testing
105    against. The test will insert the phone number to be found between
106    leadingText and trailingText."""
107    def __init__(self, leadingText, trailingText):
108        self.leadingText = leadingText
109        self.trailingText = trailingText
110
111
112class NumberTest(object):
113    """Small class that holds the number we want to test and the region for
114    which it should be valid."""
115    def __init__(self, rawString, region):
116        self.rawString = rawString
117        self.region = region
118
119    def __str__(self):
120        return "%s (%s)" % (self.rawString, self.region)
121
122
123# Strings with number-like things that shouldn't be found under any level.
124IMPOSSIBLE_CASES = [NumberTest("12345", "US"),
125                    NumberTest("23456789", "US"),
126                    NumberTest("234567890112", "US"),
127                    NumberTest("650+253+1234", "US"),
128                    NumberTest("3/10/1984", "CA"),
129                    NumberTest("03/27/2011", "US"),
130                    NumberTest("31/8/2011", "US"),
131                    NumberTest("1/12/2011", "US"),
132                    NumberTest("10/12/82", "DE"),
133                    NumberTest("650x2531234", "US"),
134                    NumberTest("2012-01-02 08:00", "US"),
135                    NumberTest("2012/01/02 08:00", "US"),
136                    NumberTest("20120102 08:00", "US"),
137                    NumberTest("2014-04-12 04:04 PM", "US"),
138                    NumberTest("2014-04-12  04:04 PM", "US"),
139                    NumberTest("2014-04-12  04:04 PM", "US"),
140                    NumberTest("2014-04-12  04:04 PM", "US"),
141                    ]
142
143# Strings with number-like things that should only be found under "possible".
144POSSIBLE_ONLY_CASES = [NumberTest("7121115678", "US"),  # US numbers cannot start with 7 in the test metadata to be valid.
145                       # 'X' should not be found in numbers at leniencies stricter than POSSIBLE, unless it represents
146                       # a carrier code or extension.
147                       NumberTest("1650 x 253 - 1234", "US"),
148                       NumberTest("650 x 253 - 1234", "US"),
149                       NumberTest("6502531x234", "US"),
150                       NumberTest("(20) 3346 1234", "GB"),  # Non-optional NP omitted
151                       ]
152
153# Strings with number-like things that should only be found up to and
154# including the "valid" leniency level.
155VALID_CASES = [NumberTest("65 02 53 00 00", "US"),
156               NumberTest("6502 538365", "US"),
157               NumberTest("650//253-1234", "US"),  # 2 slashes are illegal at higher levels
158               NumberTest("650/253/1234", "US"),
159               NumberTest("9002309. 158", "US"),
160               NumberTest("12 7/8 - 14 12/34 - 5", "US"),
161               NumberTest("12.1 - 23.71 - 23.45", "US"),
162               NumberTest("800 234 1 111x1111", "US"),
163               NumberTest("1979-2011 100", "US"),
164               NumberTest("+494949-4-94", "DE"),  # National number in wrong format
165               NumberTest(u("\uFF14\uFF11\uFF15\uFF16\uFF16\uFF16\uFF16-\uFF17\uFF17\uFF17"), "US"),
166               NumberTest("2012-0102 08", "US"),  # Very strange formatting.
167               NumberTest("2012-01-02 08", "US"),
168               # Breakdown assistance number with unexpected formatting.
169               NumberTest("1800-1-0-10 22", "AU"),
170               NumberTest("030-3-2 23 12 34", "DE"),
171               NumberTest("03 0 -3 2 23 12 34", "DE"),
172               NumberTest("(0)3 0 -3 2 23 12 34", "DE"),
173               NumberTest("0 3 0 -3 2 23 12 34", "DE"),
174               ]
175
176# Strings with number-like things that should only be found up to and
177# including the "strict_grouping" leniency level.
178STRICT_GROUPING_CASES = [NumberTest("(415) 6667777", "US"),
179                         NumberTest("415-6667777", "US"),
180                         # Should be found by strict grouping but not exact
181                         # grouping, as the last two groups are formatted
182                         # together as a block.
183                         NumberTest("0800-2491234", "DE"),
184                         # Doesn't match any formatting in the test file, but
185                         # almost matches an alternate format (the last two
186                         # groups have been squashed together here).
187                         NumberTest("0900-1 123123", "DE"),
188                         NumberTest("(0)900-1 123123", "DE"),
189                         NumberTest("0 900-1 123123", "DE"),
190                         # NDC also found as part of the country calling code;
191                         # this shouldn't ruin the grouping expectations.
192                         NumberTest("+33 3 34 2312", "FR"),
193                         ]
194
195# Strings with number-like things that should be found at all levels.
196EXACT_GROUPING_CASES = [NumberTest(u("\uFF14\uFF11\uFF15\uFF16\uFF16\uFF16\uFF17\uFF17\uFF17\uFF17"), "US"),
197                        NumberTest(u("\uFF14\uFF11\uFF15-\uFF16\uFF16\uFF16-\uFF17\uFF17\uFF17\uFF17"), "US"),
198                        NumberTest("4156667777", "US"),
199                        NumberTest("4156667777 x 123", "US"),
200                        NumberTest("415-666-7777", "US"),
201                        NumberTest("415/666-7777", "US"),
202                        NumberTest("415-666-7777 ext. 503", "US"),
203                        NumberTest("1 415 666 7777 x 123", "US"),
204                        NumberTest("+1 415-666-7777", "US"),
205                        NumberTest("+494949 49", "DE"),
206                        NumberTest("+49-49-34", "DE"),
207                        NumberTest("+49-4931-49", "DE"),
208                        NumberTest("04931-49", "DE"),  # With National Prefix
209                        NumberTest("+49-494949", "DE"),  # One group with country code
210                        NumberTest("+49-494949 ext. 49", "DE"),
211                        NumberTest("+49494949 ext. 49", "DE"),
212                        NumberTest("0494949", "DE"),
213                        NumberTest("0494949 ext. 49", "DE"),
214                        NumberTest("01 (33) 3461 2234", "MX"),  # Optional NP present
215                        NumberTest("(33) 3461 2234", "MX"),  # Optional NP omitted
216                        NumberTest("1800-10-10 22", "AU"),  # Breakdown assistance number.
217                        # Doesn't match any formatting in the test file, but
218                        # matches an alternate format exactly.
219                        NumberTest("0900-1 123 123", "DE"),
220                        NumberTest("(0)900-1 123 123", "DE"),
221                        NumberTest("0 900-1 123 123", "DE"),
222                        NumberTest("+33 3 34 23 12", "FR"),
223                        ]
224
225
226class PhoneNumberMatcherTest(TestMetadataTestCase):
227    """Tests for PhoneNumberMatcher.
228
229    This only tests basic functionality based on test metadata.  See
230    testphonenumberutil.py for the origin of the test data.
231    """
232    def testContainsMoreThanOneSlashInNationalNumber(self):
233        # A date should return true.
234        number = PhoneNumber(country_code=1,
235                             country_code_source=CountryCodeSource.FROM_DEFAULT_COUNTRY)
236        candidate = "1/05/2013"
237        self.assertTrue(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate))
238
239        # Here, the country code source thinks it started with a country calling code, but this is not
240        # the same as the part before the slash, so it's still true.
241        number = PhoneNumber(country_code=274,
242                             country_code_source=CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN)
243        candidate = "27/4/2013"
244        self.assertTrue(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate))
245
246        # Now it should be false, because the first slash is after the country calling code.
247        number = PhoneNumber(country_code=49,
248                             country_code_source=CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN)
249        candidate = "49/69/2013"
250        self.assertFalse(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate))
251
252        number = PhoneNumber(country_code=49,
253                             country_code_source=CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN)
254        candidate = "+49/69/2013"
255        self.assertFalse(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate))
256
257        candidate = "+ 49/69/2013"
258        self.assertFalse(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate))
259
260        candidate = "+ 49/69/20/13"
261        self.assertTrue(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate))
262
263        # Here, the first group is not assumed to be the country calling code, even though it is the
264        # same as it, so this should return true.
265        number = PhoneNumber(country_code=49,
266                             country_code_source=CountryCodeSource.FROM_DEFAULT_COUNTRY)
267        candidate = "49/69/2013"
268        self.assertTrue(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate))
269
270    # See PhoneNumberUtilTest.testParseNationalNumber().
271    def testFindNationalNumber(self):
272        # same cases as in testParseNationalNumber
273        self.doTestFindInContext("033316005", "NZ")
274        # self.doTestFindInContext("33316005", "NZ") is omitted since the
275        # national prefix is obligatory for these types of numbers in New Zealand.
276        # National prefix attached and some formatting present.
277        self.doTestFindInContext("03-331 6005", "NZ")
278        self.doTestFindInContext("03 331 6005", "NZ")
279        # Testing international prefixes.
280        # Should strip country code.
281        self.doTestFindInContext("0064 3 331 6005", "NZ")
282        # Try again, but this time we have an international number with Region
283        # Code US. It should recognize the country code and parse accordingly.
284        self.doTestFindInContext("01164 3 331 6005", "US")
285        self.doTestFindInContext("+64 3 331 6005", "US")
286
287        self.doTestFindInContext("64(0)64123456", "NZ")
288        # Check that using a "/" is fine in a phone number.
289        # Note that real Polish numbers do *not* start with a 0.
290        self.doTestFindInContext("0123/456789", "PL")
291        self.doTestFindInContext("123-456-7890", "US")
292
293    # See PhoneNumberUtilTest.testParseWithInternationalPrefixes().
294    def testFindWithInternationalPrefixes(self):
295        self.doTestFindInContext("+1 (650) 333-6000", "NZ")
296        self.doTestFindInContext("1-650-333-6000", "US")
297        # Calling the US number from Singapore by using different service
298        # providers
299        # 1st test: calling using SingTel IDD service (IDD is 001)
300        self.doTestFindInContext("0011-650-333-6000", "SG")
301        # 2nd test: calling using StarHub IDD service (IDD is 008)
302        self.doTestFindInContext("0081-650-333-6000", "SG")
303        # 3rd test: calling using SingTel V019 service (IDD is 019)
304        self.doTestFindInContext("0191-650-333-6000", "SG")
305        # Calling the US number from Poland
306        self.doTestFindInContext("0~01-650-333-6000", "PL")
307        # Using "++" at the start.
308        self.doTestFindInContext("++1 (650) 333-6000", "PL")
309        # Using a full-width plus sign.
310        self.doTestFindInContext(u("\uFF0B1 (650) 333-6000"), "SG")
311        # The whole number, including punctuation, is here represented in
312        # full-width form.
313        self.doTestFindInContext(u("\uFF0B\uFF11\u3000\uFF08\uFF16\uFF15\uFF10\uFF09") +
314                                 u("\u3000\uFF13\uFF13\uFF13\uFF0D\uFF16\uFF10\uFF10\uFF10"),
315                                 "SG")
316
317    # See PhoneNumberUtilTest.testParseWithLeadingZero().
318    def testFindWithLeadingZero(self):
319        self.doTestFindInContext("+39 02-36618 300", "NZ")
320        self.doTestFindInContext("02-36618 300", "IT")
321        self.doTestFindInContext("312 345 678", "IT")
322
323    # See PhoneNumberUtilTest.testParseNationalNumberArgentina().
324    def testFindNationalNumberArgentina(self):
325        # Test parsing mobile numbers of Argentina.
326        self.doTestFindInContext("+54 9 343 555 1212", "AR")
327        self.doTestFindInContext("0343 15 555 1212", "AR")
328
329        self.doTestFindInContext("+54 9 3715 65 4320", "AR")
330        self.doTestFindInContext("03715 15 65 4320", "AR")
331
332        # Test parsing fixed-line numbers of Argentina.
333        self.doTestFindInContext("+54 11 3797 0000", "AR")
334        self.doTestFindInContext("011 3797 0000", "AR")
335
336        self.doTestFindInContext("+54 3715 65 4321", "AR")
337        self.doTestFindInContext("03715 65 4321", "AR")
338
339        self.doTestFindInContext("+54 23 1234 0000", "AR")
340        self.doTestFindInContext("023 1234 0000", "AR")
341
342    # See PhoneNumberUtilTest.testParseWithXInNumber().
343    def testFindWithXInNumber(self):
344        self.doTestFindInContext("(0xx) 123456789", "AR")
345        # A case where x denotes both carrier codes and extension symbol.
346        self.doTestFindInContext("(0xx) 123456789 x 1234", "AR")
347
348        # This test is intentionally constructed such that the number of digit
349        # after xx is larger than 7, so that the number won't be mistakenly
350        # treated as an extension, as we allow extensions up to 7 digits. This
351        # assumption is okay for now as all the countries where a carrier
352        # selection code is written in the form of xx have a national
353        # significant number of length larger than 7.
354        self.doTestFindInContext("011xx5481429712", "US")
355
356    # See PhoneNumberUtilTest.testParseNumbersMexico().
357    def testFindNumbersMexico(self):
358        # Test parsing fixed-line numbers of Mexico.
359        self.doTestFindInContext("+52 (449)978-0001", "MX")
360        self.doTestFindInContext("01 (449)978-0001", "MX")
361        self.doTestFindInContext("(449)978-0001", "MX")
362
363        # Test parsing mobile numbers of Mexico.
364        self.doTestFindInContext("+52 1 33 1234-5678", "MX")
365        self.doTestFindInContext("044 (33) 1234-5678", "MX")
366        self.doTestFindInContext("045 33 1234-5678", "MX")
367
368    # See PhoneNumberUtilTest.testParseNumbersWithPlusWithNoRegion().
369    def testFindNumbersWithPlusWithNoRegion(self):
370        # "ZZ" is allowed only if the number starts with a '+' - then the
371        # country code can be calculated.
372        self.doTestFindInContext("+64 3 331 6005", "ZZ")
373        # None is also allowed for the region code in these cases.
374        self.doTestFindInContext("+64 3 331 6005", None)
375
376    # See PhoneNumberUtilTest.testParseExtensions().
377    def testFindExtensions(self):
378        self.doTestFindInContext("03 331 6005 ext 3456", "NZ")
379        self.doTestFindInContext("03-3316005x3456", "NZ")
380        self.doTestFindInContext("03-3316005 int.3456", "NZ")
381        self.doTestFindInContext("03 3316005 #3456", "NZ")
382        self.doTestFindInContext("0~0 1800 7493 524", "PL")
383        self.doTestFindInContext("(1800) 7493.524", "US")
384        # Check that the last instance of an extension token is matched.
385        self.doTestFindInContext("0~0 1800 7493 524 ~1234", "PL")
386        # Verifying bug-fix where the last digit of a number was previously omitted if it was a 0 when
387        # extracting the extension. Also verifying a few different cases of extensions.
388        self.doTestFindInContext("+44 2034567890x456", "NZ")
389        self.doTestFindInContext("+44 2034567890x456", "GB")
390        self.doTestFindInContext("+44 2034567890 x456", "GB")
391        self.doTestFindInContext("+44 2034567890 X456", "GB")
392        self.doTestFindInContext("+44 2034567890 X 456", "GB")
393        self.doTestFindInContext("+44 2034567890 X    456", "GB")
394        self.doTestFindInContext("+44 2034567890    X 456", "GB")
395
396        self.doTestFindInContext("(800) 901-3355 x 7246433", "US")
397        self.doTestFindInContext("(800) 901-3355 , ext 7246433", "US")
398        self.doTestFindInContext("(800) 901-3355 ,extension 7246433", "US")
399        # The next test differs from phonenumberutil -> when matching we don't
400        # consider a lone comma to indicate an extension, although we accept
401        # it when parsing.
402        self.doTestFindInContext("(800) 901-3355 ,x 7246433", "US")
403        self.doTestFindInContext("(800) 901-3355 ext: 7246433", "US")
404
405    def testFindInterspersedWithSpace(self):
406        self.doTestFindInContext("0 3   3 3 1   6 0 0 5", "NZ")
407
408    # Test matching behavior when starting in the middle of a phone number.
409    def testIntermediateParsePositions(self):
410        text = "Call 033316005  or 032316005!"
411        #       |    |    |    |    |    |
412        #       0    5   10   15   20   25
413
414        # Iterate over all possible indices.
415        for ii in range(6):
416            self.assertEqualRange(text, ii, 5, 14)
417
418        # 7 and 8 digits in a row are still parsed as number.
419        self.assertEqualRange(text, 6, 6, 14)
420        self.assertEqualRange(text, 7, 7, 14)
421        # Anything smaller is skipped to the second instance.
422        for ii in range(8, 20):
423            self.assertEqualRange(text, ii, 19, 28)
424
425    def testFourMatchesInARow(self):
426        number1 = "415-666-7777"
427        number2 = "800-443-1223"
428        number3 = "212-443-1223"
429        number4 = "650-443-1223"
430        text = number1 + " - " + number2 + " - " + number3 + " - " + number4
431
432        matcher = PhoneNumberMatcher(text, "US")
433        match = matcher.next() if matcher.has_next() else None
434        self.assertMatchProperties(match, text, number1, "US")
435
436        match = matcher.next() if matcher.has_next() else None
437        self.assertMatchProperties(match, text, number2, "US")
438
439        match = matcher.next() if matcher.has_next() else None
440        self.assertMatchProperties(match, text, number3, "US")
441
442        match = matcher.next() if matcher.has_next() else None
443        self.assertMatchProperties(match, text, number4, "US")
444
445    def testMatchesFoundWithMultipleSpaces(self):
446        number1 = "(415) 666-7777"
447        number2 = "(800) 443-1223"
448        text = number1 + " " + number2
449
450        matcher = PhoneNumberMatcher(text, "US")
451        match = matcher.next() if matcher.has_next() else None
452        self.assertMatchProperties(match, text, number1, "US")
453
454        match = matcher.next() if matcher.has_next() else None
455        self.assertMatchProperties(match, text, number2, "US")
456
457    def testMatchWithSurroundingZipcodes(self):
458        number = "415-666-7777"
459        zipPreceding = "My address is CA 34215 - " + number + " is my number."
460
461        matcher = PhoneNumberMatcher(zipPreceding, "US")
462        match = matcher.next() if matcher.has_next() else None
463        self.assertMatchProperties(match, zipPreceding, number, "US")
464
465        # Now repeat, but this time the phone number has spaces in it. It should still be found.
466        number = "(415) 666 7777"
467
468        zipFollowing = "My number is " + number + ". 34215 is my zip-code."
469        matcher = PhoneNumberMatcher(zipFollowing, "US")
470        match = matcher.next() if matcher.has_next() else None
471        self.assertMatchProperties(match, zipFollowing, number, "US")
472
473    def testIsLatinLetter(self):
474        self.assertTrue(PhoneNumberMatcher._is_latin_letter('c'))
475        self.assertTrue(PhoneNumberMatcher._is_latin_letter('C'))
476        self.assertTrue(PhoneNumberMatcher._is_latin_letter(u("\u00C9")))
477        self.assertTrue(PhoneNumberMatcher._is_latin_letter(u("\u0301")))  # Combining acute accent
478        # Punctuation, digits and white-space are not considered "latin letters".
479        self.assertFalse(PhoneNumberMatcher._is_latin_letter(':'))
480        self.assertFalse(PhoneNumberMatcher._is_latin_letter('5'))
481        self.assertFalse(PhoneNumberMatcher._is_latin_letter('-'))
482        self.assertFalse(PhoneNumberMatcher._is_latin_letter('.'))
483        self.assertFalse(PhoneNumberMatcher._is_latin_letter(' '))
484        self.assertFalse(PhoneNumberMatcher._is_latin_letter(u("\u6211")))  # Chinese character
485        self.assertFalse(PhoneNumberMatcher._is_latin_letter(u("\u306E")))  # Hiragana letter no
486
487    def testMatchesWithSurroundingLatinChars(self):
488        possibleOnlyContexts = []
489        possibleOnlyContexts.append(NumberContext("abc", "def"))
490        possibleOnlyContexts.append(NumberContext("abc", ""))
491        possibleOnlyContexts.append(NumberContext("", "def"))
492        # Latin capital letter e with an acute accent.
493        possibleOnlyContexts.append(NumberContext(u("\u00C9"), ""))
494        # e with an acute accent decomposed (with combining mark).
495        possibleOnlyContexts.append(NumberContext(u("e\u0301"), ""))
496
497        # Numbers should not be considered valid, if they are surrounded by
498        # Latin characters, but should be considered possible.
499        self.findMatchesInContexts(possibleOnlyContexts, False, True)
500
501    def testMoneyNotSeenAsPhoneNumber(self):
502        possibleOnlyContexts = []
503        possibleOnlyContexts.append(NumberContext("$", ""))
504        possibleOnlyContexts.append(NumberContext("", "$"))
505        possibleOnlyContexts.append(NumberContext(u("\u00A3"), ""))  # Pound sign
506        possibleOnlyContexts.append(NumberContext(u("\u00A5"), ""))  # Yen sign
507        self.findMatchesInContexts(possibleOnlyContexts, False, True)
508
509    def testPercentageNotSeenAsPhoneNumber(self):
510        possibleOnlyContexts = []
511        possibleOnlyContexts.append(NumberContext("", "%"))
512        # Numbers followed by % should be dropped.
513        self.findMatchesInContexts(possibleOnlyContexts, False, True)
514
515    def testPhoneNumberWithLeadingOrTrailingMoneyMatches(self):
516        # Because of the space after the 20 (or before the 100) these dollar
517        # amounts should not stop the actual number from being found.
518        contexts = []
519        contexts.append(NumberContext("$20 ", ""))
520        contexts.append(NumberContext("", " 100$"))
521        self.findMatchesInContexts(contexts, True, True)
522
523    def testMatchesWithSurroundingLatinCharsAndLeadingPunctuation(self):
524        # Contexts with trailing characters. Leading characters are okay here
525        # since the numbers we will insert start with punctuation, but
526        # trailing characters are still not allowed.
527        possibleOnlyContexts = []
528        possibleOnlyContexts.append(NumberContext("abc", "def"))
529        possibleOnlyContexts.append(NumberContext("", "def"))
530        possibleOnlyContexts.append(NumberContext("", u("\u00C9")))
531
532        # Numbers should not be considered valid, if they have trailing Latin
533        # characters, but should be considered possible.
534        numberWithPlus = "+14156667777"
535        numberWithBrackets = "(415)6667777"
536        self.findMatchesInContexts(possibleOnlyContexts, False, True, "US", numberWithPlus)
537        self.findMatchesInContexts(possibleOnlyContexts, False, True, "US", numberWithBrackets)
538
539        validContexts = []
540        validContexts.append(NumberContext("abc", ""))
541        validContexts.append(NumberContext(u("\u00C9"), ""))
542        validContexts.append(NumberContext(u("\u00C9"), "."))  # Trailing punctuation.
543        validContexts.append(NumberContext(u("\u00C9"), " def"))  # Trailing white-space.
544
545        # Numbers should be considered valid, since they start with punctuation.
546        self.findMatchesInContexts(validContexts, True, True, "US", numberWithPlus)
547        self.findMatchesInContexts(validContexts, True, True, "US", numberWithBrackets)
548
549    def testMatchesWithSurroundingChineseChars(self):
550        validContexts = []
551        validContexts.append(NumberContext(u("\u6211\u7684\u7535\u8BDD\u53F7\u7801\u662F"), ""))
552        validContexts.append(NumberContext("", u("\u662F\u6211\u7684\u7535\u8BDD\u53F7\u7801")))
553        validContexts.append(NumberContext(u("\u8BF7\u62E8\u6253"), u("\u6211\u5728\u660E\u5929")))
554
555        # Numbers should be considered valid, since they are surrounded by Chinese.
556        self.findMatchesInContexts(validContexts, True, True)
557
558    def testMatchesWithSurroundingPunctuation(self):
559        validContexts = []
560        validContexts.append(NumberContext("My number-", ""))    # At end of text.
561        validContexts.append(NumberContext("", ".Nice day."))    # At start of text.
562        validContexts.append(NumberContext("Tel:", "."))    # Punctuation surrounds number.
563        validContexts.append(NumberContext("Tel: ", " on Saturdays."))    # White-space is also fine.
564
565        # Numbers should be considered valid, since they are surrounded by punctuation.
566        self.findMatchesInContexts(validContexts, True, True)
567
568    def testMatchesMultiplePhoneNumbersSeparatedByPhoneNumberPunctuation(self):
569        text = "Call 650-253-4561 -- 455-234-3451"
570        region = "US"
571        number1 = PhoneNumber(country_code=phonenumberutil.country_code_for_region(region),
572                              national_number=6502534561)
573        match1 = PhoneNumberMatch(5, "650-253-4561", number1)
574        number2 = PhoneNumber(country_code=phonenumberutil.country_code_for_region(region),
575                              national_number=4552343451)
576        match2 = PhoneNumberMatch(21, "455-234-3451", number2)
577
578        matches = PhoneNumberMatcher(text, region)
579        self.assertEqual(match1, matches.next())
580        self.assertEqual(match2, matches.next())
581
582    def testDoesNotMatchMultiplePhoneNumbersSeparatedWithNoWhiteSpace(self):
583        # No white-space found between numbers - neither is found.
584        text = "Call 650-253-4561--455-234-3451"
585        region = "US"
586        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(text, region)))
587
588    def testMatchesWithPossibleLeniency(self):
589        testCases = STRICT_GROUPING_CASES + EXACT_GROUPING_CASES + VALID_CASES + POSSIBLE_ONLY_CASES
590        self._doTestNumberMatchesForLeniency(testCases, Leniency.POSSIBLE)
591
592    def testNonMatchesWithPossibleLeniency(self):
593        testCases = IMPOSSIBLE_CASES
594        self._doTestNumberNonMatchesForLeniency(testCases, Leniency.POSSIBLE)
595
596    def testMatchesWithValidLeniency(self):
597        testCases = STRICT_GROUPING_CASES + EXACT_GROUPING_CASES + VALID_CASES
598        self._doTestNumberMatchesForLeniency(testCases, Leniency.VALID)
599
600    def testNonMatchesWithValidLeniency(self):
601        testCases = IMPOSSIBLE_CASES + POSSIBLE_ONLY_CASES
602        self._doTestNumberNonMatchesForLeniency(testCases, Leniency.VALID)
603
604    def testMatchesWithStrictGroupingLeniency(self):
605        testCases = STRICT_GROUPING_CASES + EXACT_GROUPING_CASES
606        self._doTestNumberMatchesForLeniency(testCases, Leniency.STRICT_GROUPING)
607
608    def testNonMatchesWithStrictGroupLeniency(self):
609        testCases = IMPOSSIBLE_CASES + POSSIBLE_ONLY_CASES + VALID_CASES
610        self._doTestNumberNonMatchesForLeniency(testCases, Leniency.STRICT_GROUPING)
611
612    def testMatchesWithExactGroupingLeniency(self):
613        testCases = EXACT_GROUPING_CASES
614        self._doTestNumberMatchesForLeniency(testCases, Leniency.EXACT_GROUPING)
615
616    def testNonMatchesExactGroupLeniency(self):
617        testCases = IMPOSSIBLE_CASES + POSSIBLE_ONLY_CASES + VALID_CASES + STRICT_GROUPING_CASES
618        self._doTestNumberNonMatchesForLeniency(testCases, Leniency.EXACT_GROUPING)
619
620    def _doTestNumberMatchesForLeniency(self, testCases, leniency):
621        noMatchFoundCount = 0
622        wrongMatchFoundCount = 0
623        for test in testCases:
624            iterator = self.findNumbersForLeniency(test.rawString, test.region, leniency)
625            match = iterator.next() if iterator.has_next() else None
626            if match is None:
627                noMatchFoundCount += 1
628                prnt("No match found in  %s for leniency: %s" % (test, leniency), file=sys.stderr)
629            else:
630                if test.rawString != match.raw_string:
631                    wrongMatchFoundCount += 1
632                    prnt("Found wrong match in test %s. Found %s" % (test, match), file=sys.stderr)
633        self.assertEqual(0, noMatchFoundCount)
634        self.assertEqual(0, wrongMatchFoundCount)
635
636    def _doTestNumberNonMatchesForLeniency(self, testCases, leniency):
637        matchFoundCount = 0
638        for test in testCases:
639            iterator = self.findNumbersForLeniency(test.rawString, test.region, leniency)
640            match = iterator.next() if iterator.has_next() else None
641            if match is not None:
642                matchFoundCount += 1
643                prnt("Match found in %s for leniency: %s" % (test, leniency), file=sys.stderr)
644        self.assertEqual(0, matchFoundCount)
645
646    def findMatchesInContexts(self, contexts, isValid, isPossible,
647                              region="US", number="415-666-7777"):
648        """Helper method which tests the contexts provided and ensures
649        that:
650         - if isValid is True, they all find a test number inserted in the
651           middle when leniency of matching is set to VALID; else no test
652           number should be extracted at that leniency level
653         - if isPossible is True, they all find a test number inserted in the
654           middle when leniency of matching is set to POSSIBLE; else no test
655           number should be extracted at that leniency level"""
656        if isValid:
657            self.doTestInContext(number, region, contexts, Leniency.VALID)
658        else:
659            for context in contexts:
660                text = context.leadingText + number + context.trailingText
661                self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(text, region)),
662                                msg="Should not have found a number in " + text)
663        if isPossible:
664            self.doTestInContext(number, region, contexts, Leniency.POSSIBLE)
665        else:
666            for context in contexts:
667                text = context.leadingText + number + context.trailingText
668                self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(text, region,
669                                                                     leniency=Leniency.POSSIBLE, max_tries=65535)),
670                                msg="Should not have found a number in " + text)
671
672    def testNonMatchingBracketsAreInvalid(self):
673        # The digits up to the ", " form a valid US number, but it shouldn't
674        # be matched as one since there was a non-matching bracket present.
675        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("80.585 [79.964, 81.191]", "US")))
676
677        # The trailing "]" is thrown away before parsing, so the resultant
678        # number, while a valid US number, does not have matching brackets.
679        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("80.585 [79.964]", "US")))
680
681        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("80.585 ((79.964)", "US")))
682
683        # This case has too many sets of brackets to be valid.
684        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("(80).(585) (79).(9)64", "US")))
685
686    def testNoMatchIfRegionIsNone(self):
687        # Fail on non-international prefix if region code is None.
688        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("Random text body - number is 0331 6005, see you there", None)))
689
690    def testNoMatchInEmptyString(self):
691        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("", "US")))
692        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("  ", "US")))
693
694    def testNoMatchIfNoNumber(self):
695        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("Random text body - number is foobar, see you there", "US")))
696
697    def testSequences(self):
698        # Test multiple occurrences.
699        text = "Call 033316005  or 032316005!"
700        region = "NZ"
701
702        number1 = PhoneNumber()
703        number1.country_code = phonenumberutil.country_code_for_region(region)
704        number1.national_number = 33316005
705        match1 = PhoneNumberMatch(5, "033316005", number1)
706
707        number2 = PhoneNumber()
708        number2.country_code = phonenumberutil.country_code_for_region(region)
709        number2.national_number = 32316005
710        match2 = PhoneNumberMatch(19, "032316005", number2)
711
712        matcher = PhoneNumberMatcher(text, region, Leniency.POSSIBLE, 65535)
713
714        self.assertEqual(match1, matcher.next())
715        self.assertEqual(match2, matcher.next())
716        self.assertFalse(matcher.has_next())
717
718    def testNoneInput(self):
719        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(None, "US")))
720        self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(None, None)))
721
722    def testMaxMatches(self):
723        # Set up text with 100 valid phone numbers.
724        numbers = "My info: 415-666-7777," * 100
725
726        # Matches all 100. Max only applies to failed cases.
727        number = phonenumberutil.parse("+14156667777", None)
728        expected = [number] * 100
729
730        matcher = PhoneNumberMatcher(numbers, "US", Leniency.VALID, 10)
731        actual = [x.number for x in matcher]
732
733        self.assertEqual(expected, actual)
734
735    def testMaxMatchesInvalid(self):
736        # Set up text with 10 invalid phone numbers followed by 100 valid.
737        numbers = (("My address 949-8945-0" * 10) +
738                   ("My info: 415-666-7777," * 100))
739        matcher = PhoneNumberMatcher(numbers, "US", Leniency.VALID, 10)
740        self.assertFalse(matcher.has_next())
741
742    def testMaxMatchesMixed(self):
743        # Set up text with 100 valid numbers inside an invalid number.
744        numbers = "My info: 415-666-7777 123 fake street" * 100
745
746        # Only matches the first 10 despite there being 100 numbers due to max matches.
747        number = phonenumberutil.parse("+14156667777", None)
748        expected = [number] * 10
749
750        matcher = PhoneNumberMatcher(numbers, "US", Leniency.VALID, 10)
751        actual = [x.number for x in matcher]
752
753        self.assertEqual(expected, actual)
754
755    def testNonPlusPrefixedNumbersNotFoundForInvalidRegion(self):
756        # Does not start with a "+", we won't match it.
757        matcher = PhoneNumberMatcher("1 456 764 156", "ZZ")
758        self.assertFalse(matcher.has_next())
759        try:
760            matcher.next()
761            self.fail("Violation of the Iterator contract.")
762        except Exception:
763            # Success
764            pass
765        self.assertFalse(matcher.has_next())
766
767    def testEmptyIteration(self):
768        matcher = PhoneNumberMatcher("", "ZZ")
769        self.assertFalse(matcher.has_next())
770        self.assertFalse(matcher.has_next())
771        try:
772            matcher.next()
773            self.fail("Violation of the iterator contract.")
774        except Exception:
775            # Success
776            pass
777        self.assertFalse(matcher.has_next())
778
779    def testSingleIteration(self):
780        matcher = PhoneNumberMatcher("+14156667777", "ZZ")
781
782        # With hasNext() -> next().
783        # Double hasNext() to ensure it does not advance.
784        self.assertTrue(matcher.has_next())
785        self.assertTrue(matcher.has_next())
786        self.assertTrue(matcher.next() is not None)
787        self.assertFalse(matcher.has_next())
788        try:
789            matcher.next()
790            self.fail("Violation of the Matcher contract.")
791        except Exception:
792            # Success
793            pass
794        self.assertFalse(matcher.has_next())
795
796        # With next() only.
797        matcher = PhoneNumberMatcher("+14156667777", "ZZ")
798        self.assertTrue(matcher.next() is not None)
799        try:
800            matcher.next()
801            self.fail("Violation of the Matcher contract.")
802        except Exception:
803            # Success
804            pass
805
806    def testDoubleIteration(self):
807        matcher = PhoneNumberMatcher("+14156667777 foobar +14156667777 ", "ZZ")
808
809        # With hasNext() -> next().
810        # Double hasNext() to ensure it does not advance.
811        self.assertTrue(matcher.has_next())
812        self.assertTrue(matcher.has_next())
813        self.assertTrue(matcher.next() is not None)
814        self.assertTrue(matcher.has_next())
815        self.assertTrue(matcher.has_next())
816        self.assertTrue(matcher.next() is not None)
817        self.assertFalse(matcher.has_next())
818        try:
819            matcher.next()
820            self.fail("Violation of the Matcher contract.")
821        except Exception:
822            # Success
823            pass
824        self.assertFalse(matcher.has_next())
825
826        # With next() only.
827        matcher = PhoneNumberMatcher("+14156667777 foobar +14156667777 ", "ZZ")
828        self.assertTrue(matcher.next() is not None)
829        self.assertTrue(matcher.next() is not None)
830        try:
831            matcher.next()
832            self.fail("Violation of the Matcher contract.")
833        except Exception:
834            # Success
835            pass
836
837    def assertEqualRange(self, text, index, start, end):
838        """Asserts that another number can be found in text starting at index, and that
839        its corresponding range is [start, end).
840        """
841        sub = text[index:]
842        matcher = PhoneNumberMatcher(sub, "NZ", Leniency.POSSIBLE, 65535)
843
844        self.assertTrue(matcher.has_next())
845        match = matcher.next()
846        self.assertEqual(start - index, match.start)
847        self.assertEqual(end - index, match.end)
848        self.assertEqual(sub[match.start:match.end], match.raw_string)
849
850    def assertMatchProperties(self, match, text, number, region):
851        """Asserts that the expected match is non-null, and that the raw string
852        and expected proto buffer are set appropriately."""
853        expectedResult = phonenumberutil.parse(number, region)
854        self.assertTrue(match is not None,
855                        msg="Did not find a number in '" + text + "'; expected " + number)
856        self.assertEqual(expectedResult, match.number)
857        self.assertEqual(number, match.raw_string)
858
859    def doTestFindInContext(self, number, defaultCountry):
860        """Tests numbers found by PhoneNumberMatcher in various textual contexts"""
861        self.findPossibleInContext(number, defaultCountry)
862        parsed = phonenumberutil.parse(number, defaultCountry)
863        if phonenumberutil.is_valid_number(parsed):
864            self.findValidInContext(number, defaultCountry)
865
866    def findPossibleInContext(self, number, defaultCountry):
867        """Tests valid numbers in contexts that should pass for Leniency.POSSIBLE"""
868        contextPairs = [NumberContext("", ""),    # no context
869                        NumberContext("     ", "\t"),    # whitespace only
870                        NumberContext("Hello ", ""),    # no context at end
871                        NumberContext("", " to call me!"),    # no context at start
872                        NumberContext("Hi there, call ", " to reach me!"),    # no context at start
873                        NumberContext("Hi there, call ", ", or don't"),    # with commas
874                        # Three examples without whitespace around the number.
875                        NumberContext("Hi call", ""),
876                        NumberContext("", "forme"),
877                        NumberContext("Hi call", "forme"),
878                        # With other small numbers.
879                        NumberContext("It's cheap! Call ", " before 6:30"),
880                        # With a second number later.
881                        NumberContext("Call ", " or +1800-123-4567!"),
882                        NumberContext("Call me on June 2 at", ""),    # with a Month-Day date
883                        # With publication pages.
884                        NumberContext("As quoted by Alfonso 12-15 (2009), you may call me at ", ""),
885                        NumberContext("As quoted by Alfonso et al. 12-15 (2009), you may call me at ", ""),
886                        # With dates, written in the American style.
887                        NumberContext("As I said on 03/10/2011, you may call me at ", ""),
888                        # With trailing numbers after a comma. The 45 should not be considered an extension.
889                        NumberContext("", ", 45 days a year"),
890                        # When matching we don't consider semicolon along with legitimate extension
891                        # symbol to indicate an extension. The 7246433 should not be considered an
892                        # extension.
893                        NumberContext("", ";x 7246433"),
894                        # With a postfix stripped off as it looks like the start of another number.
895                        NumberContext("Call ", "/x12 more"),
896                        ]
897
898        self.doTestInContext(number, defaultCountry, contextPairs, Leniency.POSSIBLE)
899
900    def findValidInContext(self, number, defaultCountry):
901        """Tests valid numbers in contexts that fail for Leniency.POSSIBLE but
902        are valid for Leniency.VALID."""
903        contextPairs = [
904            # With other small numbers.
905            NumberContext("It's only 9.99! Call ", " to buy"),
906            # With a number Day.Month.Year date.
907            NumberContext("Call me on 21.6.1984 at ", ""),
908            # With a number Month/Day date.
909            NumberContext("Call me on 06/21 at ", ""),
910            # With a number Day.Month date.
911            NumberContext("Call me on 21.6. at ", ""),
912            # With a number Month/Day/Year date.
913            NumberContext("Call me on 06/21/84 at ", ""),
914        ]
915        self.doTestInContext(number, defaultCountry, contextPairs, Leniency.VALID)
916
917    def doTestInContext(self, number, defaultCountry, contextPairs, leniency):
918        for context in contextPairs:
919            prefix = context.leadingText
920            text = prefix + number + context.trailingText
921
922            start = len(prefix)
923            end = start + len(number)
924            matcher = PhoneNumberMatcher(text, defaultCountry, leniency, 65535)
925
926            match = matcher.next() if matcher.has_next() else None
927            self.assertTrue(match is not None,
928                            msg="Did not find a number in '" + text + "'; expected '" + number + "'")
929
930            extracted = text[match.start:match.end]
931            self.assertEqual(start, match.start,
932                             msg="Unexpected phone region in '" + text + "'; extracted '" + extracted + "'")
933            self.assertEqual(end, match.end,
934                             msg="Unexpected phone region in '" + text + "'; extracted '" + extracted + "'")
935            self.assertEqual(number, extracted)
936            self.assertEqual(match.raw_string, extracted)
937
938            self.ensureTermination(text, defaultCountry, leniency)
939
940    # Exhaustively searches for phone numbers from each index within text to
941    # test that finding matches always terminates.
942    def ensureTermination(self, text, defaultCountry, leniency):
943        for index in range(len(text) + 1):
944            sub = text[index:]
945            matches = ""
946            # Iterates over all matches.
947            for match in PhoneNumberMatcher(sub, defaultCountry, leniency, 65535):
948                matches += ", " + str(match)
949
950    def findNumbersForLeniency(self, text, defaultCountry, leniency):
951        return PhoneNumberMatcher(text, defaultCountry, leniency, 65535)
952
953    def hasNoMatches(self, matcher):
954        """Returns True if there were no matches found."""
955        return not matcher.has_next()
956
957    def testDoubleExtensionX(self):
958        # Python version extra test - multiple x for extension marker
959        xx_ext = "800 234 1 111 xx 1111"
960        # This gives different results for different leniency values (and so
961        # can't be used in a NumberTest).
962        m0 = PhoneNumberMatcher(xx_ext, "US", leniency=Leniency.POSSIBLE).next()
963        self.assertEqual(xx_ext, m0.raw_string)
964        matcher2 = PhoneNumberMatcher(xx_ext, "US", leniency=Leniency.STRICT_GROUPING)
965        self.assertFalse(matcher2.has_next())
966
967    def testInternals(self):
968        # Python-specific test: coverage of internals
969        from phonenumbers.phonenumbermatcher import _limit, _verify, _is_national_prefix_present_if_required, _get_national_number_groups
970        from phonenumbers import CountryCodeSource
971        self.assertEqual("{1,2}", _limit(1, 2))
972        self.assertRaises(Exception, _limit, *(-1, 2))
973        self.assertRaises(Exception, _limit, *(1, 0))
974        self.assertRaises(Exception, _limit, *(2, 1))
975        number = PhoneNumber(country_code=44, national_number=7912345678)
976        self.assertRaises(Exception, _verify, *(99, number, "12345678"))
977        self.assertRaises(ValueError, PhoneNumberMatcher, *("text", "US"), **{"leniency": None})
978        self.assertRaises(ValueError, PhoneNumberMatcher, *("text", "US"), **{"max_tries": -2})
979        # Invalid country looks like national prefix is present (no way to tell)
980        number2 = PhoneNumber(country_code=99, national_number=12345678, country_code_source=CountryCodeSource.FROM_DEFAULT_COUNTRY)
981        self.assertTrue(_is_national_prefix_present_if_required(number2))
982        # National prefix rule has no lead digits
983        number3 = PhoneNumber(country_code=61, national_number=1234567890, country_code_source=CountryCodeSource.FROM_DEFAULT_COUNTRY)
984        self.assertTrue(_is_national_prefix_present_if_required(number3))
985        # Coverage for _get_national_number_groups() with a formatting pattern provided
986        us_number = PhoneNumber(country_code=1, national_number=6502530000)
987        num_format = NumberFormat(pattern="(\\d{3})(\\d{3})(\\d{4})", format="\\1-\\2-\\3")
988        self.assertEqual(["650", "253", "0000"],
989                         _get_national_number_groups(us_number, num_format))
990