1#!/usr/bin/env python 2"""Unit tests for phonenumbermatcher.py""" 3 4# Based on original Java code: 5# java/test/com/google/i18n/phonenumbers/PhoneNumberMatchTest.java 6# java/test/com/google/i18n/phonenumbers/PhoneNumberMatcherTest.java 7# Copyright (C) 2011 The Libphonenumber Authors 8# 9# Licensed under the Apache License, Version 2.0 (the "License"); 10# you may not use this file except in compliance with the License. 11# You may obtain a copy of the License at 12# 13# http://www.apache.org/licenses/LICENSE-2.0 14# 15# Unless required by applicable law or agreed to in writing, software 16# distributed under the License is distributed on an "AS IS" BASIS, 17# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18# See the License for the specific language governing permissions and 19# limitations under the License. 20import sys 21import unittest 22 23from phonenumbers import PhoneNumberMatch, PhoneNumberMatcher, Leniency 24from phonenumbers import PhoneNumber, NumberFormat, phonenumberutil 25from phonenumbers import phonenumbermatcher, CountryCodeSource 26from phonenumbers.util import u 27from .testmetadatatest import TestMetadataTestCase 28 29 30class PhoneNumberMatchTest(unittest.TestCase): 31 """Tests the value type semantics for PhoneNumberMatch. 32 33 Equality must be based on the covered range and corresponding phone 34 number. Range and number correctness are tested by PhoneNumberMatcherTest. 35 """ 36 37 def setUp(self): 38 pass 39 40 def tearDown(self): 41 pass 42 43 def testValueTypeSemantics(self): 44 number = PhoneNumber() 45 match1 = PhoneNumberMatch(10, "1 800 234 45 67", number) 46 match2 = PhoneNumberMatch(10, "1 800 234 45 67", number) 47 match3 = PhoneNumberMatch(10, "1 801 234 45 67", number) 48 49 self.assertEqual(match1, match2) 50 self.assertEqual(match1.start, match2.start) 51 self.assertEqual(match1.end, match2.end) 52 self.assertEqual(match1.number, match2.number) 53 self.assertEqual(match1.raw_string, match2.raw_string) 54 self.assertEqual("1 800 234 45 67", match1.raw_string) 55 # Python-specific: check __ne__() 56 self.assertNotEqual(match1, match3) 57 self.assertTrue(match1 != match3) 58 # Python-specific: Check only comparisons of the same type work 59 self.assertNotEqual(match1, None) 60 self.assertNotEqual(match1, "") 61 self.assertNotEqual(match1, "1 800 234 45 67") 62 self.assertNotEqual(match1, 0) 63 64 def testIllegalArguments(self): 65 """Tests the value type semantics for matches with a None number.""" 66 try: 67 PhoneNumberMatch(-110, "1 800 234 45 67", PhoneNumber()) 68 self.fail("Expected failed constructor") 69 except Exception: 70 pass 71 72 try: 73 PhoneNumberMatch(10, "1 800 234 45 67", None) 74 self.fail("Expected failed constructor") 75 except Exception: 76 pass 77 78 try: 79 PhoneNumberMatch(10, None, PhoneNumber()) 80 self.fail("Expected failed constructor") 81 except Exception: 82 pass 83 84 try: 85 PhoneNumberMatch(10, None, None) 86 self.fail("Expected failed constructor") 87 except Exception: 88 pass 89 90 def testStringConvert(self): 91 """Check string conversion""" 92 number = PhoneNumber() 93 match = PhoneNumberMatch(10, "1 800 234 45 67", number) 94 95 self.assertEqual("PhoneNumberMatch [10,25) 1 800 234 45 67", str(match)) 96 # Python version extra test 97 self.assertEqual("PhoneNumberMatch(start=10, raw_string='1 800 234 45 67', " 98 "numobj=PhoneNumber(country_code=None, national_number=None, extension=None, " 99 "italian_leading_zero=None, number_of_leading_zeros=None, " 100 "country_code_source=None, preferred_domestic_carrier_code=None))", repr(match)) 101 102 103class NumberContext(object): 104 """Small class that holds the context of the number we are testing 105 against. The test will insert the phone number to be found between 106 leadingText and trailingText.""" 107 def __init__(self, leadingText, trailingText): 108 self.leadingText = leadingText 109 self.trailingText = trailingText 110 111 112class NumberTest(object): 113 """Small class that holds the number we want to test and the region for 114 which it should be valid.""" 115 def __init__(self, rawString, region): 116 self.rawString = rawString 117 self.region = region 118 119 def __str__(self): 120 return "%s (%s)" % (self.rawString, self.region) 121 122 123# Strings with number-like things that shouldn't be found under any level. 124IMPOSSIBLE_CASES = [NumberTest("12345", "US"), 125 NumberTest("23456789", "US"), 126 NumberTest("234567890112", "US"), 127 NumberTest("650+253+1234", "US"), 128 NumberTest("3/10/1984", "CA"), 129 NumberTest("03/27/2011", "US"), 130 NumberTest("31/8/2011", "US"), 131 NumberTest("1/12/2011", "US"), 132 NumberTest("10/12/82", "DE"), 133 NumberTest("650x2531234", "US"), 134 NumberTest("2012-01-02 08:00", "US"), 135 NumberTest("2012/01/02 08:00", "US"), 136 NumberTest("20120102 08:00", "US"), 137 NumberTest("2014-04-12 04:04 PM", "US"), 138 NumberTest("2014-04-12 04:04 PM", "US"), 139 NumberTest("2014-04-12 04:04 PM", "US"), 140 NumberTest("2014-04-12 04:04 PM", "US"), 141 ] 142 143# Strings with number-like things that should only be found under "possible". 144POSSIBLE_ONLY_CASES = [NumberTest("7121115678", "US"), # US numbers cannot start with 7 in the test metadata to be valid. 145 # 'X' should not be found in numbers at leniencies stricter than POSSIBLE, unless it represents 146 # a carrier code or extension. 147 NumberTest("1650 x 253 - 1234", "US"), 148 NumberTest("650 x 253 - 1234", "US"), 149 NumberTest("6502531x234", "US"), 150 NumberTest("(20) 3346 1234", "GB"), # Non-optional NP omitted 151 ] 152 153# Strings with number-like things that should only be found up to and 154# including the "valid" leniency level. 155VALID_CASES = [NumberTest("65 02 53 00 00", "US"), 156 NumberTest("6502 538365", "US"), 157 NumberTest("650//253-1234", "US"), # 2 slashes are illegal at higher levels 158 NumberTest("650/253/1234", "US"), 159 NumberTest("9002309. 158", "US"), 160 NumberTest("12 7/8 - 14 12/34 - 5", "US"), 161 NumberTest("12.1 - 23.71 - 23.45", "US"), 162 NumberTest("800 234 1 111x1111", "US"), 163 NumberTest("1979-2011 100", "US"), 164 NumberTest("+494949-4-94", "DE"), # National number in wrong format 165 NumberTest(u("\uFF14\uFF11\uFF15\uFF16\uFF16\uFF16\uFF16-\uFF17\uFF17\uFF17"), "US"), 166 NumberTest("2012-0102 08", "US"), # Very strange formatting. 167 NumberTest("2012-01-02 08", "US"), 168 # Breakdown assistance number with unexpected formatting. 169 NumberTest("1800-1-0-10 22", "AU"), 170 NumberTest("030-3-2 23 12 34", "DE"), 171 NumberTest("03 0 -3 2 23 12 34", "DE"), 172 NumberTest("(0)3 0 -3 2 23 12 34", "DE"), 173 NumberTest("0 3 0 -3 2 23 12 34", "DE"), 174 ] 175 176# Strings with number-like things that should only be found up to and 177# including the "strict_grouping" leniency level. 178STRICT_GROUPING_CASES = [NumberTest("(415) 6667777", "US"), 179 NumberTest("415-6667777", "US"), 180 # Should be found by strict grouping but not exact 181 # grouping, as the last two groups are formatted 182 # together as a block. 183 NumberTest("0800-2491234", "DE"), 184 # Doesn't match any formatting in the test file, but 185 # almost matches an alternate format (the last two 186 # groups have been squashed together here). 187 NumberTest("0900-1 123123", "DE"), 188 NumberTest("(0)900-1 123123", "DE"), 189 NumberTest("0 900-1 123123", "DE"), 190 # NDC also found as part of the country calling code; 191 # this shouldn't ruin the grouping expectations. 192 NumberTest("+33 3 34 2312", "FR"), 193 ] 194 195# Strings with number-like things that should be found at all levels. 196EXACT_GROUPING_CASES = [NumberTest(u("\uFF14\uFF11\uFF15\uFF16\uFF16\uFF16\uFF17\uFF17\uFF17\uFF17"), "US"), 197 NumberTest(u("\uFF14\uFF11\uFF15-\uFF16\uFF16\uFF16-\uFF17\uFF17\uFF17\uFF17"), "US"), 198 NumberTest("4156667777", "US"), 199 NumberTest("4156667777 x 123", "US"), 200 NumberTest("415-666-7777", "US"), 201 NumberTest("415/666-7777", "US"), 202 NumberTest("415-666-7777 ext. 503", "US"), 203 NumberTest("1 415 666 7777 x 123", "US"), 204 NumberTest("+1 415-666-7777", "US"), 205 NumberTest("+494949 49", "DE"), 206 NumberTest("+49-49-34", "DE"), 207 NumberTest("+49-4931-49", "DE"), 208 NumberTest("04931-49", "DE"), # With National Prefix 209 NumberTest("+49-494949", "DE"), # One group with country code 210 NumberTest("+49-494949 ext. 49", "DE"), 211 NumberTest("+49494949 ext. 49", "DE"), 212 NumberTest("0494949", "DE"), 213 NumberTest("0494949 ext. 49", "DE"), 214 NumberTest("01 (33) 3461 2234", "MX"), # Optional NP present 215 NumberTest("(33) 3461 2234", "MX"), # Optional NP omitted 216 NumberTest("1800-10-10 22", "AU"), # Breakdown assistance number. 217 # Doesn't match any formatting in the test file, but 218 # matches an alternate format exactly. 219 NumberTest("0900-1 123 123", "DE"), 220 NumberTest("(0)900-1 123 123", "DE"), 221 NumberTest("0 900-1 123 123", "DE"), 222 NumberTest("+33 3 34 23 12", "FR"), 223 ] 224 225 226class PhoneNumberMatcherTest(TestMetadataTestCase): 227 """Tests for PhoneNumberMatcher. 228 229 This only tests basic functionality based on test metadata. See 230 testphonenumberutil.py for the origin of the test data. 231 """ 232 def testContainsMoreThanOneSlashInNationalNumber(self): 233 # A date should return true. 234 number = PhoneNumber(country_code=1, 235 country_code_source=CountryCodeSource.FROM_DEFAULT_COUNTRY) 236 candidate = "1/05/2013" 237 self.assertTrue(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate)) 238 239 # Here, the country code source thinks it started with a country calling code, but this is not 240 # the same as the part before the slash, so it's still true. 241 number = PhoneNumber(country_code=274, 242 country_code_source=CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN) 243 candidate = "27/4/2013" 244 self.assertTrue(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate)) 245 246 # Now it should be false, because the first slash is after the country calling code. 247 number = PhoneNumber(country_code=49, 248 country_code_source=CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN) 249 candidate = "49/69/2013" 250 self.assertFalse(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate)) 251 252 number = PhoneNumber(country_code=49, 253 country_code_source=CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN) 254 candidate = "+49/69/2013" 255 self.assertFalse(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate)) 256 257 candidate = "+ 49/69/2013" 258 self.assertFalse(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate)) 259 260 candidate = "+ 49/69/20/13" 261 self.assertTrue(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate)) 262 263 # Here, the first group is not assumed to be the country calling code, even though it is the 264 # same as it, so this should return true. 265 number = PhoneNumber(country_code=49, 266 country_code_source=CountryCodeSource.FROM_DEFAULT_COUNTRY) 267 candidate = "49/69/2013" 268 self.assertTrue(phonenumbermatcher._contains_more_than_one_slash_in_national_number(number, candidate)) 269 270 # See PhoneNumberUtilTest.testParseNationalNumber(). 271 def testFindNationalNumber(self): 272 # same cases as in testParseNationalNumber 273 self.doTestFindInContext("033316005", "NZ") 274 # self.doTestFindInContext("33316005", "NZ") is omitted since the 275 # national prefix is obligatory for these types of numbers in New Zealand. 276 # National prefix attached and some formatting present. 277 self.doTestFindInContext("03-331 6005", "NZ") 278 self.doTestFindInContext("03 331 6005", "NZ") 279 # Testing international prefixes. 280 # Should strip country code. 281 self.doTestFindInContext("0064 3 331 6005", "NZ") 282 # Try again, but this time we have an international number with Region 283 # Code US. It should recognize the country code and parse accordingly. 284 self.doTestFindInContext("01164 3 331 6005", "US") 285 self.doTestFindInContext("+64 3 331 6005", "US") 286 287 self.doTestFindInContext("64(0)64123456", "NZ") 288 # Check that using a "/" is fine in a phone number. 289 # Note that real Polish numbers do *not* start with a 0. 290 self.doTestFindInContext("0123/456789", "PL") 291 self.doTestFindInContext("123-456-7890", "US") 292 293 # See PhoneNumberUtilTest.testParseWithInternationalPrefixes(). 294 def testFindWithInternationalPrefixes(self): 295 self.doTestFindInContext("+1 (650) 333-6000", "NZ") 296 self.doTestFindInContext("1-650-333-6000", "US") 297 # Calling the US number from Singapore by using different service 298 # providers 299 # 1st test: calling using SingTel IDD service (IDD is 001) 300 self.doTestFindInContext("0011-650-333-6000", "SG") 301 # 2nd test: calling using StarHub IDD service (IDD is 008) 302 self.doTestFindInContext("0081-650-333-6000", "SG") 303 # 3rd test: calling using SingTel V019 service (IDD is 019) 304 self.doTestFindInContext("0191-650-333-6000", "SG") 305 # Calling the US number from Poland 306 self.doTestFindInContext("0~01-650-333-6000", "PL") 307 # Using "++" at the start. 308 self.doTestFindInContext("++1 (650) 333-6000", "PL") 309 # Using a full-width plus sign. 310 self.doTestFindInContext(u("\uFF0B1 (650) 333-6000"), "SG") 311 # The whole number, including punctuation, is here represented in 312 # full-width form. 313 self.doTestFindInContext(u("\uFF0B\uFF11\u3000\uFF08\uFF16\uFF15\uFF10\uFF09") + 314 u("\u3000\uFF13\uFF13\uFF13\uFF0D\uFF16\uFF10\uFF10\uFF10"), 315 "SG") 316 317 # See PhoneNumberUtilTest.testParseWithLeadingZero(). 318 def testFindWithLeadingZero(self): 319 self.doTestFindInContext("+39 02-36618 300", "NZ") 320 self.doTestFindInContext("02-36618 300", "IT") 321 self.doTestFindInContext("312 345 678", "IT") 322 323 # See PhoneNumberUtilTest.testParseNationalNumberArgentina(). 324 def testFindNationalNumberArgentina(self): 325 # Test parsing mobile numbers of Argentina. 326 self.doTestFindInContext("+54 9 343 555 1212", "AR") 327 self.doTestFindInContext("0343 15 555 1212", "AR") 328 329 self.doTestFindInContext("+54 9 3715 65 4320", "AR") 330 self.doTestFindInContext("03715 15 65 4320", "AR") 331 332 # Test parsing fixed-line numbers of Argentina. 333 self.doTestFindInContext("+54 11 3797 0000", "AR") 334 self.doTestFindInContext("011 3797 0000", "AR") 335 336 self.doTestFindInContext("+54 3715 65 4321", "AR") 337 self.doTestFindInContext("03715 65 4321", "AR") 338 339 self.doTestFindInContext("+54 23 1234 0000", "AR") 340 self.doTestFindInContext("023 1234 0000", "AR") 341 342 # See PhoneNumberUtilTest.testParseWithXInNumber(). 343 def testFindWithXInNumber(self): 344 self.doTestFindInContext("(0xx) 123456789", "AR") 345 # A case where x denotes both carrier codes and extension symbol. 346 self.doTestFindInContext("(0xx) 123456789 x 1234", "AR") 347 348 # This test is intentionally constructed such that the number of digit 349 # after xx is larger than 7, so that the number won't be mistakenly 350 # treated as an extension, as we allow extensions up to 7 digits. This 351 # assumption is okay for now as all the countries where a carrier 352 # selection code is written in the form of xx have a national 353 # significant number of length larger than 7. 354 self.doTestFindInContext("011xx5481429712", "US") 355 356 # See PhoneNumberUtilTest.testParseNumbersMexico(). 357 def testFindNumbersMexico(self): 358 # Test parsing fixed-line numbers of Mexico. 359 self.doTestFindInContext("+52 (449)978-0001", "MX") 360 self.doTestFindInContext("01 (449)978-0001", "MX") 361 self.doTestFindInContext("(449)978-0001", "MX") 362 363 # Test parsing mobile numbers of Mexico. 364 self.doTestFindInContext("+52 1 33 1234-5678", "MX") 365 self.doTestFindInContext("044 (33) 1234-5678", "MX") 366 self.doTestFindInContext("045 33 1234-5678", "MX") 367 368 # See PhoneNumberUtilTest.testParseNumbersWithPlusWithNoRegion(). 369 def testFindNumbersWithPlusWithNoRegion(self): 370 # "ZZ" is allowed only if the number starts with a '+' - then the 371 # country code can be calculated. 372 self.doTestFindInContext("+64 3 331 6005", "ZZ") 373 # None is also allowed for the region code in these cases. 374 self.doTestFindInContext("+64 3 331 6005", None) 375 376 # See PhoneNumberUtilTest.testParseExtensions(). 377 def testFindExtensions(self): 378 self.doTestFindInContext("03 331 6005 ext 3456", "NZ") 379 self.doTestFindInContext("03-3316005x3456", "NZ") 380 self.doTestFindInContext("03-3316005 int.3456", "NZ") 381 self.doTestFindInContext("03 3316005 #3456", "NZ") 382 self.doTestFindInContext("0~0 1800 7493 524", "PL") 383 self.doTestFindInContext("(1800) 7493.524", "US") 384 # Check that the last instance of an extension token is matched. 385 self.doTestFindInContext("0~0 1800 7493 524 ~1234", "PL") 386 # Verifying bug-fix where the last digit of a number was previously omitted if it was a 0 when 387 # extracting the extension. Also verifying a few different cases of extensions. 388 self.doTestFindInContext("+44 2034567890x456", "NZ") 389 self.doTestFindInContext("+44 2034567890x456", "GB") 390 self.doTestFindInContext("+44 2034567890 x456", "GB") 391 self.doTestFindInContext("+44 2034567890 X456", "GB") 392 self.doTestFindInContext("+44 2034567890 X 456", "GB") 393 self.doTestFindInContext("+44 2034567890 X 456", "GB") 394 self.doTestFindInContext("+44 2034567890 X 456", "GB") 395 396 self.doTestFindInContext("(800) 901-3355 x 7246433", "US") 397 self.doTestFindInContext("(800) 901-3355 , ext 7246433", "US") 398 self.doTestFindInContext("(800) 901-3355 ,extension 7246433", "US") 399 # The next test differs from phonenumberutil -> when matching we don't 400 # consider a lone comma to indicate an extension, although we accept 401 # it when parsing. 402 self.doTestFindInContext("(800) 901-3355 ,x 7246433", "US") 403 self.doTestFindInContext("(800) 901-3355 ext: 7246433", "US") 404 405 def testFindInterspersedWithSpace(self): 406 self.doTestFindInContext("0 3 3 3 1 6 0 0 5", "NZ") 407 408 # Test matching behavior when starting in the middle of a phone number. 409 def testIntermediateParsePositions(self): 410 text = "Call 033316005 or 032316005!" 411 # | | | | | | 412 # 0 5 10 15 20 25 413 414 # Iterate over all possible indices. 415 for ii in range(6): 416 self.assertEqualRange(text, ii, 5, 14) 417 418 # 7 and 8 digits in a row are still parsed as number. 419 self.assertEqualRange(text, 6, 6, 14) 420 self.assertEqualRange(text, 7, 7, 14) 421 # Anything smaller is skipped to the second instance. 422 for ii in range(8, 20): 423 self.assertEqualRange(text, ii, 19, 28) 424 425 def testFourMatchesInARow(self): 426 number1 = "415-666-7777" 427 number2 = "800-443-1223" 428 number3 = "212-443-1223" 429 number4 = "650-443-1223" 430 text = number1 + " - " + number2 + " - " + number3 + " - " + number4 431 432 matcher = PhoneNumberMatcher(text, "US") 433 match = matcher.next() if matcher.has_next() else None 434 self.assertMatchProperties(match, text, number1, "US") 435 436 match = matcher.next() if matcher.has_next() else None 437 self.assertMatchProperties(match, text, number2, "US") 438 439 match = matcher.next() if matcher.has_next() else None 440 self.assertMatchProperties(match, text, number3, "US") 441 442 match = matcher.next() if matcher.has_next() else None 443 self.assertMatchProperties(match, text, number4, "US") 444 445 def testMatchesFoundWithMultipleSpaces(self): 446 number1 = "(415) 666-7777" 447 number2 = "(800) 443-1223" 448 text = number1 + " " + number2 449 450 matcher = PhoneNumberMatcher(text, "US") 451 match = matcher.next() if matcher.has_next() else None 452 self.assertMatchProperties(match, text, number1, "US") 453 454 match = matcher.next() if matcher.has_next() else None 455 self.assertMatchProperties(match, text, number2, "US") 456 457 def testMatchWithSurroundingZipcodes(self): 458 number = "415-666-7777" 459 zipPreceding = "My address is CA 34215 - " + number + " is my number." 460 461 matcher = PhoneNumberMatcher(zipPreceding, "US") 462 match = matcher.next() if matcher.has_next() else None 463 self.assertMatchProperties(match, zipPreceding, number, "US") 464 465 # Now repeat, but this time the phone number has spaces in it. It should still be found. 466 number = "(415) 666 7777" 467 468 zipFollowing = "My number is " + number + ". 34215 is my zip-code." 469 matcher = PhoneNumberMatcher(zipFollowing, "US") 470 match = matcher.next() if matcher.has_next() else None 471 self.assertMatchProperties(match, zipFollowing, number, "US") 472 473 def testIsLatinLetter(self): 474 self.assertTrue(PhoneNumberMatcher._is_latin_letter('c')) 475 self.assertTrue(PhoneNumberMatcher._is_latin_letter('C')) 476 self.assertTrue(PhoneNumberMatcher._is_latin_letter(u("\u00C9"))) 477 self.assertTrue(PhoneNumberMatcher._is_latin_letter(u("\u0301"))) # Combining acute accent 478 # Punctuation, digits and white-space are not considered "latin letters". 479 self.assertFalse(PhoneNumberMatcher._is_latin_letter(':')) 480 self.assertFalse(PhoneNumberMatcher._is_latin_letter('5')) 481 self.assertFalse(PhoneNumberMatcher._is_latin_letter('-')) 482 self.assertFalse(PhoneNumberMatcher._is_latin_letter('.')) 483 self.assertFalse(PhoneNumberMatcher._is_latin_letter(' ')) 484 self.assertFalse(PhoneNumberMatcher._is_latin_letter(u("\u6211"))) # Chinese character 485 self.assertFalse(PhoneNumberMatcher._is_latin_letter(u("\u306E"))) # Hiragana letter no 486 487 def testMatchesWithSurroundingLatinChars(self): 488 possibleOnlyContexts = [] 489 possibleOnlyContexts.append(NumberContext("abc", "def")) 490 possibleOnlyContexts.append(NumberContext("abc", "")) 491 possibleOnlyContexts.append(NumberContext("", "def")) 492 # Latin capital letter e with an acute accent. 493 possibleOnlyContexts.append(NumberContext(u("\u00C9"), "")) 494 # e with an acute accent decomposed (with combining mark). 495 possibleOnlyContexts.append(NumberContext(u("e\u0301"), "")) 496 497 # Numbers should not be considered valid, if they are surrounded by 498 # Latin characters, but should be considered possible. 499 self.findMatchesInContexts(possibleOnlyContexts, False, True) 500 501 def testMoneyNotSeenAsPhoneNumber(self): 502 possibleOnlyContexts = [] 503 possibleOnlyContexts.append(NumberContext("$", "")) 504 possibleOnlyContexts.append(NumberContext("", "$")) 505 possibleOnlyContexts.append(NumberContext(u("\u00A3"), "")) # Pound sign 506 possibleOnlyContexts.append(NumberContext(u("\u00A5"), "")) # Yen sign 507 self.findMatchesInContexts(possibleOnlyContexts, False, True) 508 509 def testPercentageNotSeenAsPhoneNumber(self): 510 possibleOnlyContexts = [] 511 possibleOnlyContexts.append(NumberContext("", "%")) 512 # Numbers followed by % should be dropped. 513 self.findMatchesInContexts(possibleOnlyContexts, False, True) 514 515 def testPhoneNumberWithLeadingOrTrailingMoneyMatches(self): 516 # Because of the space after the 20 (or before the 100) these dollar 517 # amounts should not stop the actual number from being found. 518 contexts = [] 519 contexts.append(NumberContext("$20 ", "")) 520 contexts.append(NumberContext("", " 100$")) 521 self.findMatchesInContexts(contexts, True, True) 522 523 def testMatchesWithSurroundingLatinCharsAndLeadingPunctuation(self): 524 # Contexts with trailing characters. Leading characters are okay here 525 # since the numbers we will insert start with punctuation, but 526 # trailing characters are still not allowed. 527 possibleOnlyContexts = [] 528 possibleOnlyContexts.append(NumberContext("abc", "def")) 529 possibleOnlyContexts.append(NumberContext("", "def")) 530 possibleOnlyContexts.append(NumberContext("", u("\u00C9"))) 531 532 # Numbers should not be considered valid, if they have trailing Latin 533 # characters, but should be considered possible. 534 numberWithPlus = "+14156667777" 535 numberWithBrackets = "(415)6667777" 536 self.findMatchesInContexts(possibleOnlyContexts, False, True, "US", numberWithPlus) 537 self.findMatchesInContexts(possibleOnlyContexts, False, True, "US", numberWithBrackets) 538 539 validContexts = [] 540 validContexts.append(NumberContext("abc", "")) 541 validContexts.append(NumberContext(u("\u00C9"), "")) 542 validContexts.append(NumberContext(u("\u00C9"), ".")) # Trailing punctuation. 543 validContexts.append(NumberContext(u("\u00C9"), " def")) # Trailing white-space. 544 545 # Numbers should be considered valid, since they start with punctuation. 546 self.findMatchesInContexts(validContexts, True, True, "US", numberWithPlus) 547 self.findMatchesInContexts(validContexts, True, True, "US", numberWithBrackets) 548 549 def testMatchesWithSurroundingChineseChars(self): 550 validContexts = [] 551 validContexts.append(NumberContext(u("\u6211\u7684\u7535\u8BDD\u53F7\u7801\u662F"), "")) 552 validContexts.append(NumberContext("", u("\u662F\u6211\u7684\u7535\u8BDD\u53F7\u7801"))) 553 validContexts.append(NumberContext(u("\u8BF7\u62E8\u6253"), u("\u6211\u5728\u660E\u5929"))) 554 555 # Numbers should be considered valid, since they are surrounded by Chinese. 556 self.findMatchesInContexts(validContexts, True, True) 557 558 def testMatchesWithSurroundingPunctuation(self): 559 validContexts = [] 560 validContexts.append(NumberContext("My number-", "")) # At end of text. 561 validContexts.append(NumberContext("", ".Nice day.")) # At start of text. 562 validContexts.append(NumberContext("Tel:", ".")) # Punctuation surrounds number. 563 validContexts.append(NumberContext("Tel: ", " on Saturdays.")) # White-space is also fine. 564 565 # Numbers should be considered valid, since they are surrounded by punctuation. 566 self.findMatchesInContexts(validContexts, True, True) 567 568 def testMatchesMultiplePhoneNumbersSeparatedByPhoneNumberPunctuation(self): 569 text = "Call 650-253-4561 -- 455-234-3451" 570 region = "US" 571 number1 = PhoneNumber(country_code=phonenumberutil.country_code_for_region(region), 572 national_number=6502534561) 573 match1 = PhoneNumberMatch(5, "650-253-4561", number1) 574 number2 = PhoneNumber(country_code=phonenumberutil.country_code_for_region(region), 575 national_number=4552343451) 576 match2 = PhoneNumberMatch(21, "455-234-3451", number2) 577 578 matches = PhoneNumberMatcher(text, region) 579 self.assertEqual(match1, matches.next()) 580 self.assertEqual(match2, matches.next()) 581 582 def testDoesNotMatchMultiplePhoneNumbersSeparatedWithNoWhiteSpace(self): 583 # No white-space found between numbers - neither is found. 584 text = "Call 650-253-4561--455-234-3451" 585 region = "US" 586 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(text, region))) 587 588 def testMatchesWithPossibleLeniency(self): 589 testCases = STRICT_GROUPING_CASES + EXACT_GROUPING_CASES + VALID_CASES + POSSIBLE_ONLY_CASES 590 self._doTestNumberMatchesForLeniency(testCases, Leniency.POSSIBLE) 591 592 def testNonMatchesWithPossibleLeniency(self): 593 testCases = IMPOSSIBLE_CASES 594 self._doTestNumberNonMatchesForLeniency(testCases, Leniency.POSSIBLE) 595 596 def testMatchesWithValidLeniency(self): 597 testCases = STRICT_GROUPING_CASES + EXACT_GROUPING_CASES + VALID_CASES 598 self._doTestNumberMatchesForLeniency(testCases, Leniency.VALID) 599 600 def testNonMatchesWithValidLeniency(self): 601 testCases = IMPOSSIBLE_CASES + POSSIBLE_ONLY_CASES 602 self._doTestNumberNonMatchesForLeniency(testCases, Leniency.VALID) 603 604 def testMatchesWithStrictGroupingLeniency(self): 605 testCases = STRICT_GROUPING_CASES + EXACT_GROUPING_CASES 606 self._doTestNumberMatchesForLeniency(testCases, Leniency.STRICT_GROUPING) 607 608 def testNonMatchesWithStrictGroupLeniency(self): 609 testCases = IMPOSSIBLE_CASES + POSSIBLE_ONLY_CASES + VALID_CASES 610 self._doTestNumberNonMatchesForLeniency(testCases, Leniency.STRICT_GROUPING) 611 612 def testMatchesWithExactGroupingLeniency(self): 613 testCases = EXACT_GROUPING_CASES 614 self._doTestNumberMatchesForLeniency(testCases, Leniency.EXACT_GROUPING) 615 616 def testNonMatchesExactGroupLeniency(self): 617 testCases = IMPOSSIBLE_CASES + POSSIBLE_ONLY_CASES + VALID_CASES + STRICT_GROUPING_CASES 618 self._doTestNumberNonMatchesForLeniency(testCases, Leniency.EXACT_GROUPING) 619 620 def _doTestNumberMatchesForLeniency(self, testCases, leniency): 621 noMatchFoundCount = 0 622 wrongMatchFoundCount = 0 623 for test in testCases: 624 iterator = self.findNumbersForLeniency(test.rawString, test.region, leniency) 625 match = iterator.next() if iterator.has_next() else None 626 if match is None: 627 noMatchFoundCount += 1 628 prnt("No match found in %s for leniency: %s" % (test, leniency), file=sys.stderr) 629 else: 630 if test.rawString != match.raw_string: 631 wrongMatchFoundCount += 1 632 prnt("Found wrong match in test %s. Found %s" % (test, match), file=sys.stderr) 633 self.assertEqual(0, noMatchFoundCount) 634 self.assertEqual(0, wrongMatchFoundCount) 635 636 def _doTestNumberNonMatchesForLeniency(self, testCases, leniency): 637 matchFoundCount = 0 638 for test in testCases: 639 iterator = self.findNumbersForLeniency(test.rawString, test.region, leniency) 640 match = iterator.next() if iterator.has_next() else None 641 if match is not None: 642 matchFoundCount += 1 643 prnt("Match found in %s for leniency: %s" % (test, leniency), file=sys.stderr) 644 self.assertEqual(0, matchFoundCount) 645 646 def findMatchesInContexts(self, contexts, isValid, isPossible, 647 region="US", number="415-666-7777"): 648 """Helper method which tests the contexts provided and ensures 649 that: 650 - if isValid is True, they all find a test number inserted in the 651 middle when leniency of matching is set to VALID; else no test 652 number should be extracted at that leniency level 653 - if isPossible is True, they all find a test number inserted in the 654 middle when leniency of matching is set to POSSIBLE; else no test 655 number should be extracted at that leniency level""" 656 if isValid: 657 self.doTestInContext(number, region, contexts, Leniency.VALID) 658 else: 659 for context in contexts: 660 text = context.leadingText + number + context.trailingText 661 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(text, region)), 662 msg="Should not have found a number in " + text) 663 if isPossible: 664 self.doTestInContext(number, region, contexts, Leniency.POSSIBLE) 665 else: 666 for context in contexts: 667 text = context.leadingText + number + context.trailingText 668 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(text, region, 669 leniency=Leniency.POSSIBLE, max_tries=65535)), 670 msg="Should not have found a number in " + text) 671 672 def testNonMatchingBracketsAreInvalid(self): 673 # The digits up to the ", " form a valid US number, but it shouldn't 674 # be matched as one since there was a non-matching bracket present. 675 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("80.585 [79.964, 81.191]", "US"))) 676 677 # The trailing "]" is thrown away before parsing, so the resultant 678 # number, while a valid US number, does not have matching brackets. 679 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("80.585 [79.964]", "US"))) 680 681 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("80.585 ((79.964)", "US"))) 682 683 # This case has too many sets of brackets to be valid. 684 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("(80).(585) (79).(9)64", "US"))) 685 686 def testNoMatchIfRegionIsNone(self): 687 # Fail on non-international prefix if region code is None. 688 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("Random text body - number is 0331 6005, see you there", None))) 689 690 def testNoMatchInEmptyString(self): 691 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("", "US"))) 692 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(" ", "US"))) 693 694 def testNoMatchIfNoNumber(self): 695 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher("Random text body - number is foobar, see you there", "US"))) 696 697 def testSequences(self): 698 # Test multiple occurrences. 699 text = "Call 033316005 or 032316005!" 700 region = "NZ" 701 702 number1 = PhoneNumber() 703 number1.country_code = phonenumberutil.country_code_for_region(region) 704 number1.national_number = 33316005 705 match1 = PhoneNumberMatch(5, "033316005", number1) 706 707 number2 = PhoneNumber() 708 number2.country_code = phonenumberutil.country_code_for_region(region) 709 number2.national_number = 32316005 710 match2 = PhoneNumberMatch(19, "032316005", number2) 711 712 matcher = PhoneNumberMatcher(text, region, Leniency.POSSIBLE, 65535) 713 714 self.assertEqual(match1, matcher.next()) 715 self.assertEqual(match2, matcher.next()) 716 self.assertFalse(matcher.has_next()) 717 718 def testNoneInput(self): 719 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(None, "US"))) 720 self.assertTrue(self.hasNoMatches(PhoneNumberMatcher(None, None))) 721 722 def testMaxMatches(self): 723 # Set up text with 100 valid phone numbers. 724 numbers = "My info: 415-666-7777," * 100 725 726 # Matches all 100. Max only applies to failed cases. 727 number = phonenumberutil.parse("+14156667777", None) 728 expected = [number] * 100 729 730 matcher = PhoneNumberMatcher(numbers, "US", Leniency.VALID, 10) 731 actual = [x.number for x in matcher] 732 733 self.assertEqual(expected, actual) 734 735 def testMaxMatchesInvalid(self): 736 # Set up text with 10 invalid phone numbers followed by 100 valid. 737 numbers = (("My address 949-8945-0" * 10) + 738 ("My info: 415-666-7777," * 100)) 739 matcher = PhoneNumberMatcher(numbers, "US", Leniency.VALID, 10) 740 self.assertFalse(matcher.has_next()) 741 742 def testMaxMatchesMixed(self): 743 # Set up text with 100 valid numbers inside an invalid number. 744 numbers = "My info: 415-666-7777 123 fake street" * 100 745 746 # Only matches the first 10 despite there being 100 numbers due to max matches. 747 number = phonenumberutil.parse("+14156667777", None) 748 expected = [number] * 10 749 750 matcher = PhoneNumberMatcher(numbers, "US", Leniency.VALID, 10) 751 actual = [x.number for x in matcher] 752 753 self.assertEqual(expected, actual) 754 755 def testNonPlusPrefixedNumbersNotFoundForInvalidRegion(self): 756 # Does not start with a "+", we won't match it. 757 matcher = PhoneNumberMatcher("1 456 764 156", "ZZ") 758 self.assertFalse(matcher.has_next()) 759 try: 760 matcher.next() 761 self.fail("Violation of the Iterator contract.") 762 except Exception: 763 # Success 764 pass 765 self.assertFalse(matcher.has_next()) 766 767 def testEmptyIteration(self): 768 matcher = PhoneNumberMatcher("", "ZZ") 769 self.assertFalse(matcher.has_next()) 770 self.assertFalse(matcher.has_next()) 771 try: 772 matcher.next() 773 self.fail("Violation of the iterator contract.") 774 except Exception: 775 # Success 776 pass 777 self.assertFalse(matcher.has_next()) 778 779 def testSingleIteration(self): 780 matcher = PhoneNumberMatcher("+14156667777", "ZZ") 781 782 # With hasNext() -> next(). 783 # Double hasNext() to ensure it does not advance. 784 self.assertTrue(matcher.has_next()) 785 self.assertTrue(matcher.has_next()) 786 self.assertTrue(matcher.next() is not None) 787 self.assertFalse(matcher.has_next()) 788 try: 789 matcher.next() 790 self.fail("Violation of the Matcher contract.") 791 except Exception: 792 # Success 793 pass 794 self.assertFalse(matcher.has_next()) 795 796 # With next() only. 797 matcher = PhoneNumberMatcher("+14156667777", "ZZ") 798 self.assertTrue(matcher.next() is not None) 799 try: 800 matcher.next() 801 self.fail("Violation of the Matcher contract.") 802 except Exception: 803 # Success 804 pass 805 806 def testDoubleIteration(self): 807 matcher = PhoneNumberMatcher("+14156667777 foobar +14156667777 ", "ZZ") 808 809 # With hasNext() -> next(). 810 # Double hasNext() to ensure it does not advance. 811 self.assertTrue(matcher.has_next()) 812 self.assertTrue(matcher.has_next()) 813 self.assertTrue(matcher.next() is not None) 814 self.assertTrue(matcher.has_next()) 815 self.assertTrue(matcher.has_next()) 816 self.assertTrue(matcher.next() is not None) 817 self.assertFalse(matcher.has_next()) 818 try: 819 matcher.next() 820 self.fail("Violation of the Matcher contract.") 821 except Exception: 822 # Success 823 pass 824 self.assertFalse(matcher.has_next()) 825 826 # With next() only. 827 matcher = PhoneNumberMatcher("+14156667777 foobar +14156667777 ", "ZZ") 828 self.assertTrue(matcher.next() is not None) 829 self.assertTrue(matcher.next() is not None) 830 try: 831 matcher.next() 832 self.fail("Violation of the Matcher contract.") 833 except Exception: 834 # Success 835 pass 836 837 def assertEqualRange(self, text, index, start, end): 838 """Asserts that another number can be found in text starting at index, and that 839 its corresponding range is [start, end). 840 """ 841 sub = text[index:] 842 matcher = PhoneNumberMatcher(sub, "NZ", Leniency.POSSIBLE, 65535) 843 844 self.assertTrue(matcher.has_next()) 845 match = matcher.next() 846 self.assertEqual(start - index, match.start) 847 self.assertEqual(end - index, match.end) 848 self.assertEqual(sub[match.start:match.end], match.raw_string) 849 850 def assertMatchProperties(self, match, text, number, region): 851 """Asserts that the expected match is non-null, and that the raw string 852 and expected proto buffer are set appropriately.""" 853 expectedResult = phonenumberutil.parse(number, region) 854 self.assertTrue(match is not None, 855 msg="Did not find a number in '" + text + "'; expected " + number) 856 self.assertEqual(expectedResult, match.number) 857 self.assertEqual(number, match.raw_string) 858 859 def doTestFindInContext(self, number, defaultCountry): 860 """Tests numbers found by PhoneNumberMatcher in various textual contexts""" 861 self.findPossibleInContext(number, defaultCountry) 862 parsed = phonenumberutil.parse(number, defaultCountry) 863 if phonenumberutil.is_valid_number(parsed): 864 self.findValidInContext(number, defaultCountry) 865 866 def findPossibleInContext(self, number, defaultCountry): 867 """Tests valid numbers in contexts that should pass for Leniency.POSSIBLE""" 868 contextPairs = [NumberContext("", ""), # no context 869 NumberContext(" ", "\t"), # whitespace only 870 NumberContext("Hello ", ""), # no context at end 871 NumberContext("", " to call me!"), # no context at start 872 NumberContext("Hi there, call ", " to reach me!"), # no context at start 873 NumberContext("Hi there, call ", ", or don't"), # with commas 874 # Three examples without whitespace around the number. 875 NumberContext("Hi call", ""), 876 NumberContext("", "forme"), 877 NumberContext("Hi call", "forme"), 878 # With other small numbers. 879 NumberContext("It's cheap! Call ", " before 6:30"), 880 # With a second number later. 881 NumberContext("Call ", " or +1800-123-4567!"), 882 NumberContext("Call me on June 2 at", ""), # with a Month-Day date 883 # With publication pages. 884 NumberContext("As quoted by Alfonso 12-15 (2009), you may call me at ", ""), 885 NumberContext("As quoted by Alfonso et al. 12-15 (2009), you may call me at ", ""), 886 # With dates, written in the American style. 887 NumberContext("As I said on 03/10/2011, you may call me at ", ""), 888 # With trailing numbers after a comma. The 45 should not be considered an extension. 889 NumberContext("", ", 45 days a year"), 890 # When matching we don't consider semicolon along with legitimate extension 891 # symbol to indicate an extension. The 7246433 should not be considered an 892 # extension. 893 NumberContext("", ";x 7246433"), 894 # With a postfix stripped off as it looks like the start of another number. 895 NumberContext("Call ", "/x12 more"), 896 ] 897 898 self.doTestInContext(number, defaultCountry, contextPairs, Leniency.POSSIBLE) 899 900 def findValidInContext(self, number, defaultCountry): 901 """Tests valid numbers in contexts that fail for Leniency.POSSIBLE but 902 are valid for Leniency.VALID.""" 903 contextPairs = [ 904 # With other small numbers. 905 NumberContext("It's only 9.99! Call ", " to buy"), 906 # With a number Day.Month.Year date. 907 NumberContext("Call me on 21.6.1984 at ", ""), 908 # With a number Month/Day date. 909 NumberContext("Call me on 06/21 at ", ""), 910 # With a number Day.Month date. 911 NumberContext("Call me on 21.6. at ", ""), 912 # With a number Month/Day/Year date. 913 NumberContext("Call me on 06/21/84 at ", ""), 914 ] 915 self.doTestInContext(number, defaultCountry, contextPairs, Leniency.VALID) 916 917 def doTestInContext(self, number, defaultCountry, contextPairs, leniency): 918 for context in contextPairs: 919 prefix = context.leadingText 920 text = prefix + number + context.trailingText 921 922 start = len(prefix) 923 end = start + len(number) 924 matcher = PhoneNumberMatcher(text, defaultCountry, leniency, 65535) 925 926 match = matcher.next() if matcher.has_next() else None 927 self.assertTrue(match is not None, 928 msg="Did not find a number in '" + text + "'; expected '" + number + "'") 929 930 extracted = text[match.start:match.end] 931 self.assertEqual(start, match.start, 932 msg="Unexpected phone region in '" + text + "'; extracted '" + extracted + "'") 933 self.assertEqual(end, match.end, 934 msg="Unexpected phone region in '" + text + "'; extracted '" + extracted + "'") 935 self.assertEqual(number, extracted) 936 self.assertEqual(match.raw_string, extracted) 937 938 self.ensureTermination(text, defaultCountry, leniency) 939 940 # Exhaustively searches for phone numbers from each index within text to 941 # test that finding matches always terminates. 942 def ensureTermination(self, text, defaultCountry, leniency): 943 for index in range(len(text) + 1): 944 sub = text[index:] 945 matches = "" 946 # Iterates over all matches. 947 for match in PhoneNumberMatcher(sub, defaultCountry, leniency, 65535): 948 matches += ", " + str(match) 949 950 def findNumbersForLeniency(self, text, defaultCountry, leniency): 951 return PhoneNumberMatcher(text, defaultCountry, leniency, 65535) 952 953 def hasNoMatches(self, matcher): 954 """Returns True if there were no matches found.""" 955 return not matcher.has_next() 956 957 def testDoubleExtensionX(self): 958 # Python version extra test - multiple x for extension marker 959 xx_ext = "800 234 1 111 xx 1111" 960 # This gives different results for different leniency values (and so 961 # can't be used in a NumberTest). 962 m0 = PhoneNumberMatcher(xx_ext, "US", leniency=Leniency.POSSIBLE).next() 963 self.assertEqual(xx_ext, m0.raw_string) 964 matcher2 = PhoneNumberMatcher(xx_ext, "US", leniency=Leniency.STRICT_GROUPING) 965 self.assertFalse(matcher2.has_next()) 966 967 def testInternals(self): 968 # Python-specific test: coverage of internals 969 from phonenumbers.phonenumbermatcher import _limit, _verify, _is_national_prefix_present_if_required, _get_national_number_groups 970 from phonenumbers import CountryCodeSource 971 self.assertEqual("{1,2}", _limit(1, 2)) 972 self.assertRaises(Exception, _limit, *(-1, 2)) 973 self.assertRaises(Exception, _limit, *(1, 0)) 974 self.assertRaises(Exception, _limit, *(2, 1)) 975 number = PhoneNumber(country_code=44, national_number=7912345678) 976 self.assertRaises(Exception, _verify, *(99, number, "12345678")) 977 self.assertRaises(ValueError, PhoneNumberMatcher, *("text", "US"), **{"leniency": None}) 978 self.assertRaises(ValueError, PhoneNumberMatcher, *("text", "US"), **{"max_tries": -2}) 979 # Invalid country looks like national prefix is present (no way to tell) 980 number2 = PhoneNumber(country_code=99, national_number=12345678, country_code_source=CountryCodeSource.FROM_DEFAULT_COUNTRY) 981 self.assertTrue(_is_national_prefix_present_if_required(number2)) 982 # National prefix rule has no lead digits 983 number3 = PhoneNumber(country_code=61, national_number=1234567890, country_code_source=CountryCodeSource.FROM_DEFAULT_COUNTRY) 984 self.assertTrue(_is_national_prefix_present_if_required(number3)) 985 # Coverage for _get_national_number_groups() with a formatting pattern provided 986 us_number = PhoneNumber(country_code=1, national_number=6502530000) 987 num_format = NumberFormat(pattern="(\\d{3})(\\d{3})(\\d{4})", format="\\1-\\2-\\3") 988 self.assertEqual(["650", "253", "0000"], 989 _get_national_number_groups(us_number, num_format)) 990