1# -*- coding: utf-8 -*- 2"""Python phone number parsing and formatting library 3 4If you use this library, and want to be notified about important changes, 5please sign up to the libphonenumber mailing list at 6http://groups.google.com/group/libphonenumber-discuss/about. 7 8NOTE: A lot of methods in this module require Region Code strings. These must 9be provided using CLDR two-letter region-code format. These should be in 10upper-case. The list of the codes can be found here: 11http://www.iso.org/iso/country_codes/iso_3166_code_lists/country_names_and_code_elements.htm 12 13author: Shaopeng Jia (original Java version) 14author: David Drysdale (Python version) 15""" 16# Based on original Java code: 17# java/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java 18# Copyright (C) 2009-2011 The Libphonenumber Authors 19# 20# Licensed under the Apache License, Version 2.0 (the "License"); 21# you may not use this file except in compliance with the License. 22# You may obtain a copy of the License at 23# 24# http://www.apache.org/licenses/LICENSE-2.0 25# 26# Unless required by applicable law or agreed to in writing, software 27# distributed under the License is distributed on an "AS IS" BASIS, 28# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 29# See the License for the specific language governing permissions and 30# limitations under the License. 31import sys 32import re 33 34from .re_util import fullmatch # Extra regexp function; see README 35from .util import UnicodeMixin, u, unicod, prnt, to_long 36from .util import U_EMPTY_STRING, U_SPACE, U_DASH, U_TILDE, U_ZERO, U_SEMICOLON 37from .unicode_util import digit as unicode_digit 38 39# Data class definitions 40from .phonenumber import PhoneNumber, CountryCodeSource 41from .phonemetadata import NumberFormat, PhoneMetadata, REGION_CODE_FOR_NON_GEO_ENTITY 42 43# Import auto-generated data structures 44try: 45 from .data import _COUNTRY_CODE_TO_REGION_CODE 46 from .shortdata import _AVAILABLE_REGION_CODES as _AVAILABLE_SHORT_REGION_CODES 47except ImportError: # pragma no cover 48 # Before the generated code exists, the data/ directory is empty. 49 # The generation process imports this module, creating a circular 50 # dependency. The hack below works around this. 51 import os 52 import sys 53 if (os.path.basename(sys.argv[0]) == "buildmetadatafromxml.py" or 54 os.path.basename(sys.argv[0]) == "buildprefixdata.py"): 55 prnt("Failed to import generated data (but OK as during autogeneration)", file=sys.stderr) 56 _COUNTRY_CODE_TO_REGION_CODE = {1: ("US",)} 57 _AVAILABLE_SHORT_REGION_CODES = [] 58 else: 59 raise 60 61# Set the master map from country code to region code. The 62# extra level of indirection allows the unit test to replace 63# the map with test data. 64COUNTRY_CODE_TO_REGION_CODE = _COUNTRY_CODE_TO_REGION_CODE 65 66# Naming convention for phone number arguments and variables: 67# - string arguments are named 'number' 68# - PhoneNumber objects are named 'numobj' 69 70# Flags to use when compiling regular expressions for phone numbers. 71_REGEX_FLAGS = re.UNICODE | re.IGNORECASE 72# The minimum and maximum length of the national significant number. 73_MIN_LENGTH_FOR_NSN = 2 74# The ITU says the maximum length should be 15, but we have found longer 75# numbers in Germany. 76_MAX_LENGTH_FOR_NSN = 17 77# The maximum length of the country calling code. 78_MAX_LENGTH_COUNTRY_CODE = 3 79# We don't allow input strings for parsing to be longer than 250 chars. This 80# prevents malicious input from overflowing the regular-expression engine. 81_MAX_INPUT_STRING_LENGTH = 250 82# Region-code for the unknown region. 83UNKNOWN_REGION = u("ZZ") 84# The set of regions that share country calling code 1. 85_NANPA_COUNTRY_CODE = 1 86# The prefix that needs to be inserted in front of a Colombian landline number 87# when dialed from a mobile phone in Colombia. 88_COLOMBIA_MOBILE_TO_FIXED_LINE_PREFIX = unicod("3") 89# Map of country calling codes that use a mobile token before the area 90# code. One example of when this is relevant is when determining the length of 91# the national destination code, which should be the length of the area code 92# plus the length of the mobile token. 93_MOBILE_TOKEN_MAPPINGS = {52: u('1'), 54: u('9')} 94# Set of country codes that have geographically assigned mobile numbers (see 95# GEO_MOBILE_COUNTRIES below) which are not based on *area codes*. For example, 96# in China mobile numbers start with a carrier indicator, and beyond that are 97# geographically assigned: this carrier indicator is not considered to be an 98# area code. 99_GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES = frozenset(( 100 86,)) # China 101# Set of country calling codes that have geographically assigned mobile 102# numbers. This may not be complete; we add calling codes case by case, as we 103# find geographical mobile numbers or hear from user reports. Note that 104# countries like the US, where we can't distinguish between fixed-line or 105# mobile numbers, are not listed here, since we consider FIXED_LINE_OR_MOBILE 106# to be a possibly geographically-related type anyway (like FIXED_LINE). 107_GEO_MOBILE_COUNTRIES = _GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES | set(( 108 52, # Mexico 109 54, # Argentina 110 55, # Brazil 111 62)) # Indonesia: some prefixes only (fixed CMDA wireless) 112# The PLUS_SIGN signifies the international prefix. 113_PLUS_SIGN = u("+") 114_STAR_SIGN = u('*') 115_RFC3966_EXTN_PREFIX = u(";ext=") 116_RFC3966_PREFIX = u("tel:") 117_RFC3966_PHONE_CONTEXT = u(";phone-context=") 118_RFC3966_ISDN_SUBADDRESS = u(";isub=") 119 120# Simple ASCII digits map used to populate _ALPHA_PHONE_MAPPINGS and 121# _ALL_PLUS_NUMBER_GROUPING_SYMBOLS. 122_ASCII_DIGITS_MAP = {u("0"): u("0"), u("1"): u("1"), 123 u("2"): u("2"), u("3"): u("3"), 124 u("4"): u("4"), u("5"): u("5"), 125 u("6"): u("6"), u("7"): u("7"), 126 u("8"): u("8"), u("9"): u("9")} 127 128# Only upper-case variants of alpha characters are stored. 129_ALPHA_MAPPINGS = {u("A"): u("2"), 130 u("B"): u("2"), 131 u("C"): u("2"), 132 u("D"): u("3"), 133 u("E"): u("3"), 134 u("F"): u("3"), 135 u("G"): u("4"), 136 u("H"): u("4"), 137 u("I"): u("4"), 138 u("J"): u("5"), 139 u("K"): u("5"), 140 u("L"): u("5"), 141 u("M"): u("6"), 142 u("N"): u("6"), 143 u("O"): u("6"), 144 u("P"): u("7"), 145 u("Q"): u("7"), 146 u("R"): u("7"), 147 u("S"): u("7"), 148 u("T"): u("8"), 149 u("U"): u("8"), 150 u("V"): u("8"), 151 u("W"): u("9"), 152 u("X"): u("9"), 153 u("Y"): u("9"), 154 u("Z"): u("9"), } 155# For performance reasons, amalgamate both into one map. 156_ALPHA_PHONE_MAPPINGS = dict(_ALPHA_MAPPINGS, **_ASCII_DIGITS_MAP) 157 158# A map that contains characters that are essential when dialling. That means 159# any of the characters in this map must not be removed from a number when 160# dialling, otherwise the call will not reach the intended destination. 161_DIALLABLE_CHAR_MAPPINGS = dict({_PLUS_SIGN: _PLUS_SIGN, 162 u('*'): u('*'), 163 u('#'): u('#')}, 164 **_ASCII_DIGITS_MAP) 165 166# Separate map of all symbols that we wish to retain when formatting alpha 167# numbers. This includes digits, ASCII letters and number grouping symbols 168# such as "-" and " ". 169_ALL_PLUS_NUMBER_GROUPING_SYMBOLS = dict({u("-"): u("-"), # Add grouping symbols. 170 u("\uFF0D"): u("-"), 171 u("\u2010"): u("-"), 172 u("\u2011"): u("-"), 173 u("\u2012"): u("-"), 174 u("\u2013"): u("-"), 175 u("\u2014"): u("-"), 176 u("\u2015"): u("-"), 177 u("\u2212"): u("-"), 178 u("/"): u("/"), 179 u("\uFF0F"): u("/"), 180 u(" "): u(" "), 181 u("\u3000"): u(" "), 182 u("\u2060"): u(" "), 183 u("."): u("."), 184 u("\uFF0E"): u(".")}, 185 # Put (lower letter -> upper letter) and 186 # (upper letter -> upper letter) mappings. 187 **dict([(_c.lower(), _c) for _c in _ALPHA_MAPPINGS.keys()] + 188 [(_c, _c) for _c in _ALPHA_MAPPINGS.keys()], 189 **_ASCII_DIGITS_MAP)) 190 191# Pattern that makes it easy to distinguish whether a region has a unique 192# international dialing prefix or not. If a region has a unique international 193# prefix (e.g. 011 in USA), it will be represented as a string that contains a 194# sequence of ASCII digits. If there are multiple available international 195# prefixes in a region, they will be represented as a regex string that always 196# contains character(s) other than ASCII digits. Note this regex also 197# includes tilde, which signals waiting for the tone. 198_UNIQUE_INTERNATIONAL_PREFIX = re.compile(u("[\\d]+(?:[~\u2053\u223C\uFF5E][\\d]+)?")) 199 200# Regular expression of acceptable punctuation found in phone numbers. This 201# excludes punctuation found as a leading character only. This consists of 202# dash characters, white space characters, full stops, slashes, square 203# brackets, parentheses and tildes. It also includes the letter 'x' as that is 204# found as a placeholder for carrier information in some phone numbers. Full-width 205# variants are also present. 206_VALID_PUNCTUATION = (u("-x\u2010-\u2015\u2212\u30FC\uFF0D-\uFF0F ") + 207 u("\u00A0\u00AD\u200B\u2060\u3000()\uFF08\uFF09\uFF3B\uFF3D.\\[\\]/~\u2053\u223C\uFF5E")) 208 209_DIGITS = unicod('\\d') # Java "\\p{Nd}", so need "(?u)" or re.UNICODE wherever this is used 210# We accept alpha characters in phone numbers, ASCII only, upper and lower 211# case. 212_VALID_ALPHA = (U_EMPTY_STRING.join(_ALPHA_MAPPINGS.keys()) + 213 U_EMPTY_STRING.join([_k.lower() for _k in _ALPHA_MAPPINGS.keys()])) 214_PLUS_CHARS = u("+\uFF0B") 215_PLUS_CHARS_PATTERN = re.compile(u("[") + _PLUS_CHARS + u("]+")) 216_SEPARATOR_PATTERN = re.compile(u("[") + _VALID_PUNCTUATION + u("]+")) 217_CAPTURING_DIGIT_PATTERN = re.compile(u("(") + _DIGITS + u(")"), re.UNICODE) 218 219# Regular expression of acceptable characters that may start a phone number 220# for the purposes of parsing. This allows us to strip away meaningless 221# prefixes to phone numbers that may be mistakenly given to us. This consists 222# of digits, the plus symbol and arabic-indic digits. This does not contain 223# alpha characters, although they may be used later in the number. It also 224# does not include other punctuation, as this will be stripped later during 225# parsing and is of no information value when parsing a number. 226_VALID_START_CHAR = u("[") + _PLUS_CHARS + _DIGITS + u("]") 227_VALID_START_CHAR_PATTERN = re.compile(_VALID_START_CHAR, re.UNICODE) 228 229# Regular expression of characters typically used to start a second phone 230# number for the purposes of parsing. This allows us to strip off parts of the 231# number that are actually the start of another number, such as for: (530) 232# 583-6985 x302/x2303 -> the second extension here makes this actually two 233# phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove the 234# second extension so that the first number is parsed correctly. 235_SECOND_NUMBER_START = u("[\\\\/] *x") 236_SECOND_NUMBER_START_PATTERN = re.compile(_SECOND_NUMBER_START) 237 238# Regular expression of trailing characters that we want to remove. We remove 239# all characters that are not alpha or numerical characters. The hash 240# character is retained here, as it may signify the previous block was an 241# extension. 242# 243# The original Java regexp is: 244# [[\\P{N}&&\\P{L}]&&[^#]]+$ 245# which splits out as: 246# [ ]+$ : >=1 of the following chars at end of string 247# [ ]&&[ ] : intersection of these two sets of chars 248# [ && ] : intersection of these two sets of chars 249# \\P{N} : characters without the "Number" Unicode property 250# \\P{L} : characters without the "Letter" Unicode property 251# [^#] : character other than hash 252# which nets down to: >=1 non-Number, non-Letter, non-# characters at string end 253# In Python Unicode regexp mode '(?u)', the class '[^#\w]' will match anything 254# that is not # and is not alphanumeric and is not underscore. 255_UNWANTED_END_CHARS = u("(?u)(?:_|[^#\w])+$") 256_UNWANTED_END_CHAR_PATTERN = re.compile(_UNWANTED_END_CHARS) 257 258# We use this pattern to check if the phone number has at least three letters 259# in it - if so, then we treat it as a number where some phone-number digits 260# are represented by letters. 261_VALID_ALPHA_PHONE_PATTERN = re.compile(u("(?:.*?[A-Za-z]){3}.*")) 262 263# Regular expression of viable phone numbers. This is location 264# independent. Checks we have at least three leading digits, and only valid 265# punctuation, alpha characters and digits in the phone number. Does not 266# include extension data. The symbol 'x' is allowed here as valid punctuation 267# since it is often used as a placeholder for carrier codes, for example in 268# Brazilian phone numbers. We also allow multiple "+" characters at the start. 269# Corresponds to the following: 270# [digits]{minLengthNsn}| 271# plus_sign*(([punctuation]|[star])*[digits]){3,}([punctuation]|[star]|[digits]|[alpha])* 272# 273# The first reg-ex is to allow short numbers (two digits long) to be parsed if 274# they are entered as "15" etc, but only if there is no punctuation in 275# them. The second expression restricts the number of digits to three or more, 276# but then allows them to be in international form, and to have 277# alpha-characters and punctuation. 278# 279# Note VALID_PUNCTUATION starts with a -, so must be the first in the range. 280_VALID_PHONE_NUMBER = (_DIGITS + (u("{%d}") % _MIN_LENGTH_FOR_NSN) + u("|") + 281 u("[") + _PLUS_CHARS + u("]*(?:[") + _VALID_PUNCTUATION + _STAR_SIGN + u("]*") + _DIGITS + u("){3,}[") + 282 _VALID_PUNCTUATION + _STAR_SIGN + _VALID_ALPHA + _DIGITS + u("]*")) 283 284# Default extension prefix to use when formatting. This will be put in front 285# of any extension component of the number, after the main national number is 286# formatted. For example, if you wish the default extension formatting to be 287# " extn: 3456", then you should specify " extn: " here as the default 288# extension prefix. This can be overridden by region-specific preferences. 289_DEFAULT_EXTN_PREFIX = u(" ext. ") 290 291# Pattern to capture digits used in an extension. Places a maximum length of 292# "7" for an extension. 293_CAPTURING_EXTN_DIGITS = u("(") + _DIGITS + u("{1,7})") 294 295# Regexp of all possible ways to write extensions, for use when parsing. This 296# will be run as a case-insensitive regexp match. Wide character versions are 297# also provided after each ASCII version. 298 299# One-character symbols that can be used to indicate an extension. 300_SINGLE_EXTN_SYMBOLS_FOR_MATCHING = u("x\uFF58#\uFF03~\uFF5E") 301# For parsing, we are slightly more lenient in our interpretation than for 302# matching. Here we allow "comma" and "semicolon" as a possible extension 303# indicator. When matching, these are hardly ever used to indicate this. 304_SINGLE_EXTN_SYMBOLS_FOR_PARSING = u(",;") + _SINGLE_EXTN_SYMBOLS_FOR_MATCHING 305 306 307def _create_extn_pattern(single_extn_symbols): 308 """Helper initialiser method to create the regular-expression pattern to 309 match extensions, allowing the one-char extension symbols provided by 310 single_extn_symbols.""" 311 # There are three regular expressions here. The first covers RFC 3966 312 # format, where the extension is added using ";ext=". The second more 313 # generic one starts with optional white space and ends with an optional 314 # full stop (.), followed by zero or more spaces/tabs/commas and then the 315 # numbers themselves. The other one covers the special case of American 316 # numbers where the extension is written with a hash at the end, such as 317 # "- 503#". Note that the only capturing groups should be around the 318 # digits that you want to capture as part of the extension, or else 319 # parsing will fail! Canonical-equivalence doesn't seem to be an option 320 # with Android java, so we allow two options for representing the accented 321 # o - the character itself, and one in the unicode decomposed form with 322 # the combining acute accent. 323 return (_RFC3966_EXTN_PREFIX + _CAPTURING_EXTN_DIGITS + u("|") + 324 u("[ \u00A0\\t,]*(?:e?xt(?:ensi(?:o\u0301?|\u00F3))?n?|") + 325 u("\uFF45?\uFF58\uFF54\uFF4E?|") + 326 u("[") + single_extn_symbols + u("]|int|anexo|\uFF49\uFF4E\uFF54)") + 327 u("[:\\.\uFF0E]?[ \u00A0\\t,-]*") + _CAPTURING_EXTN_DIGITS + u("#?|") + 328 u("[- ]+(") + _DIGITS + u("{1,5})#")) 329 330_EXTN_PATTERNS_FOR_PARSING = _create_extn_pattern(_SINGLE_EXTN_SYMBOLS_FOR_PARSING) 331_EXTN_PATTERNS_FOR_MATCHING = _create_extn_pattern(_SINGLE_EXTN_SYMBOLS_FOR_MATCHING) 332 333# Regexp of all known extension prefixes used by different regions followed by 334# 1 or more valid digits, for use when parsing. 335_EXTN_PATTERN = re.compile(u("(?:") + _EXTN_PATTERNS_FOR_PARSING + u(")$"), _REGEX_FLAGS) 336 337# We append optionally the extension pattern to the end here, as a valid phone 338# number may have an extension prefix appended, followed by 1 or more digits. 339_VALID_PHONE_NUMBER_PATTERN = re.compile(_VALID_PHONE_NUMBER + u("(?:") + _EXTN_PATTERNS_FOR_PARSING + u(")?"), _REGEX_FLAGS) 340 341# We use a non-capturing group because Python's re.split() returns any capturing 342# groups interspersed with the other results (unlike Java's Pattern.split()). 343NON_DIGITS_PATTERN = re.compile(u("(?:\\D+)")) 344 345# The FIRST_GROUP_PATTERN was originally set to \1 but there are some 346# countries for which the first group is not used in the national pattern 347# (e.g. Argentina) so the \1 group does not match correctly. Therefore, we 348# use \d, so that the first group actually used in the pattern will be 349# matched. 350_FIRST_GROUP_PATTERN = re.compile(u(r"(\\\d)")) 351_NP_PATTERN = re.compile(u("\\$NP")) 352_FG_PATTERN = re.compile(u("\\$FG")) 353_CC_PATTERN = re.compile(u("\\$CC")) 354 355# A pattern that is used to determine if the national prefix formatting rule 356# has the first group only, i.e., does not start with the national 357# prefix. Note that the pattern explicitly allows for unbalanced parentheses. 358_FIRST_GROUP_ONLY_PREFIX_PATTERN = re.compile("\\(?\\\\1\\)?") 359 360 361class PhoneNumberFormat(object): 362 """ 363 Phone number format. 364 365 INTERNATIONAL and NATIONAL formats are consistent with the definition in 366 ITU-T Recommendation E123. For example, the number of the Google 367 Switzerland office will be written as "+41 44 668 1800" in INTERNATIONAL 368 format, and as "044 668 1800" in NATIONAL format. E164 format is as per 369 INTERNATIONAL format but with no formatting applied, e.g. "+41446681800". 370 RFC3966 is as per INTERNATIONAL format, but with all spaces and other 371 separating symbols replaced with a hyphen, and with any phone number 372 extension appended with ";ext=". It also will have a prefix of "tel:" 373 added, e.g. "tel:+41-44-668-1800". 374 375 Note: If you are considering storing the number in a neutral format, you 376 are highly advised to use the PhoneNumber class. 377 """ 378 E164 = 0 379 INTERNATIONAL = 1 380 NATIONAL = 2 381 RFC3966 = 3 382 383 384class PhoneNumberType(object): 385 """Type of phone numbers.""" 386 FIXED_LINE = 0 387 MOBILE = 1 388 # In some regions (e.g. the USA), it is impossible to distinguish between 389 # fixed-line and mobile numbers by looking at the phone number itself. 390 FIXED_LINE_OR_MOBILE = 2 391 # Freephone lines 392 TOLL_FREE = 3 393 PREMIUM_RATE = 4 394 # The cost of this call is shared between the caller and the recipient, 395 # and is hence typically less than PREMIUM_RATE calls. See 396 # http://en.wikipedia.org/wiki/Shared_Cost_Service for more information. 397 SHARED_COST = 5 398 # Voice over IP numbers. This includes TSoIP (Telephony Service over IP). 399 VOIP = 6 400 # A personal number is associated with a particular person, and may be 401 # routed to either a MOBILE or FIXED_LINE number. Some more information 402 # can be found here: http://en.wikipedia.org/wiki/Personal_Numbers 403 PERSONAL_NUMBER = 7 404 PAGER = 8 405 # Used for "Universal Access Numbers" or "Company Numbers". They may be 406 # further routed to specific offices, but allow one number to be used for 407 # a company. 408 UAN = 9 409 # Used for "Voice Mail Access Numbers". 410 VOICEMAIL = 10 411 # A phone number is of type UNKNOWN when it does not fit any of the known 412 # patterns for a specific region. 413 UNKNOWN = 99 414 415 @classmethod 416 def values(cls): 417 return (PhoneNumberType.FIXED_LINE, 418 PhoneNumberType.MOBILE, 419 PhoneNumberType.FIXED_LINE_OR_MOBILE, 420 PhoneNumberType.TOLL_FREE, 421 PhoneNumberType.PREMIUM_RATE, 422 PhoneNumberType.SHARED_COST, 423 PhoneNumberType.VOIP, 424 PhoneNumberType.PERSONAL_NUMBER, 425 PhoneNumberType.PAGER, 426 PhoneNumberType.UAN, 427 PhoneNumberType.VOICEMAIL, 428 PhoneNumberType.UNKNOWN) 429 430 431class MatchType(object): 432 """Types of phone number matches.""" 433 # Not a telephone number 434 NOT_A_NUMBER = 0 435 # None of the match types below apply 436 NO_MATCH = 1 437 # Returns SHORT_NSN_MATCH if either or both has no region specified, or 438 # the region specified is the same, and one NSN could be a shorter version 439 # of the other number. This includes the case where one has an extension 440 # specified, and the other does not. 441 SHORT_NSN_MATCH = 2 442 # Either or both has no region specified, and the NSNs and extensions are 443 # the same. 444 NSN_MATCH = 3 445 # The country_code, NSN, presence of a leading zero for Italian numbers 446 # and any extension present are the same. 447 EXACT_MATCH = 4 448 449 450class ValidationResult(object): 451 """Possible outcomes when testing if a PhoneNumber is a possible number.""" 452 # The number length matches that of valid numbers for this region. 453 IS_POSSIBLE = 0 454 # The number length matches that of local numbers for this region only 455 # (i.e. numbers that may be able to be dialled within an area, but do not 456 # have all the information to be dialled from anywhere inside or outside 457 # the country). 458 IS_POSSIBLE_LOCAL_ONLY = 4 459 # The number has an invalid country calling code. 460 INVALID_COUNTRY_CODE = 1 461 # The number is shorter than all valid numbers for this region. 462 TOO_SHORT = 2 463 # The number is longer than the shortest valid numbers for this region, 464 # shorter than the longest valid numbers for this region, and does not 465 # itself have a number length that matches valid numbers for this region. 466 # This can also be returned in the case where 467 # is_possible_number_for_type_with_reason was called, and there are no 468 # numbers of this type at all for this region. 469 INVALID_LENGTH = 5 470 # The number is longer than all valid numbers for this region. 471 TOO_LONG = 3 472 473 474# Derived data structures 475SUPPORTED_REGIONS = set() 476COUNTRY_CODES_FOR_NON_GEO_REGIONS = set() 477_NANPA_REGIONS = set() 478SUPPORTED_SHORT_REGIONS = _AVAILABLE_SHORT_REGION_CODES 479 480 481def _regenerate_derived_data(): 482 global SUPPORTED_REGIONS, COUNTRY_CODES_FOR_NON_GEO_REGIONS, _NANPA_REGIONS 483 SUPPORTED_REGIONS.clear() 484 COUNTRY_CODES_FOR_NON_GEO_REGIONS.clear() 485 for cc, region_codes in COUNTRY_CODE_TO_REGION_CODE.items(): 486 if (len(region_codes) == 1 and region_codes[0] == REGION_CODE_FOR_NON_GEO_ENTITY): 487 COUNTRY_CODES_FOR_NON_GEO_REGIONS.add(cc) 488 else: 489 SUPPORTED_REGIONS.update(region_codes) 490 if REGION_CODE_FOR_NON_GEO_ENTITY in SUPPORTED_REGIONS: # pragma no cover 491 SUPPORTED_REGIONS.remove(REGION_CODE_FOR_NON_GEO_ENTITY) 492 _NANPA_REGIONS.clear() 493 _NANPA_REGIONS.update(COUNTRY_CODE_TO_REGION_CODE[_NANPA_COUNTRY_CODE]) 494 495 496_regenerate_derived_data() 497 498 499def _copy_number_format(other): 500 """Return a mutable copy of the given NumberFormat object""" 501 copy = NumberFormat(pattern=other.pattern, 502 format=other.format, 503 leading_digits_pattern=list(other.leading_digits_pattern), 504 national_prefix_formatting_rule=other.national_prefix_formatting_rule, 505 national_prefix_optional_when_formatting=other.national_prefix_optional_when_formatting, 506 domestic_carrier_code_formatting_rule=other.domestic_carrier_code_formatting_rule) 507 copy._mutable = True 508 return copy 509 510 511def _extract_possible_number(number): 512 """Attempt to extract a possible number from the string passed in. 513 514 This currently strips all leading characters that cannot be used to 515 start a phone number. Characters that can be used to start a phone number 516 are defined in the VALID_START_CHAR_PATTERN. If none of these characters 517 are found in the number passed in, an empty string is returned. This 518 function also attempts to strip off any alternative extensions or endings 519 if two or more are present, such as in the case of: (530) 583-6985 520 x302/x2303. The second extension here makes this actually two phone 521 numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove the 522 second extension so that the first number is parsed correctly. 523 524 Arguments: 525 number -- The string that might contain a phone number. 526 527 Returns the number, stripped of any non-phone-number prefix (such 528 as "Tel:") or an empty string if no character used to start phone 529 numbers (such as + or any digit) is found in the number 530 """ 531 match = _VALID_START_CHAR_PATTERN.search(number) 532 if match: 533 number = number[match.start():] 534 # Remove trailing non-alpha non-numberical characters. 535 trailing_chars_match = _UNWANTED_END_CHAR_PATTERN.search(number) 536 if trailing_chars_match: 537 number = number[:trailing_chars_match.start()] 538 # Check for extra numbers at the end. 539 second_number_match = _SECOND_NUMBER_START_PATTERN.search(number) 540 if second_number_match: 541 number = number[:second_number_match.start()] 542 return number 543 else: 544 return U_EMPTY_STRING 545 546 547def _is_viable_phone_number(number): 548 """Checks to see if a string could possibly be a phone number. 549 550 At the moment, checks to see that the string begins with at least 2 551 digits, ignoring any punctuation commonly found in phone numbers. This 552 method does not require the number to be normalized in advance - but does 553 assume that leading non-number symbols have been removed, such as by the 554 method _extract_possible_number. 555 556 Arguments: 557 number -- string to be checked for viability as a phone number 558 559 Returns True if the number could be a phone number of some sort, otherwise 560 False 561 """ 562 if len(number) < _MIN_LENGTH_FOR_NSN: 563 return False 564 match = fullmatch(_VALID_PHONE_NUMBER_PATTERN, number) 565 return bool(match) 566 567 568def _normalize(number): 569 """Normalizes a string of characters representing a phone number. 570 571 This performs the following conversions: 572 - Punctuation is stripped. 573 - For ALPHA/VANITY numbers: 574 - Letters are converted to their numeric representation on a telephone 575 keypad. The keypad used here is the one defined in ITU 576 Recommendation E.161. This is only done if there are 3 or more 577 letters in the number, to lessen the risk that such letters are 578 typos - otherwise alpha characters are stripped. 579 - For other numbers: 580 - Wide-ascii digits are converted to normal ASCII (European) digits. 581 - Arabic-Indic numerals are converted to European numerals. 582 - Spurious alpha characters are stripped. 583 584 Arguments: 585 number -- string representing a phone number 586 587 Returns the normalized string version of the phone number. 588 """ 589 m = fullmatch(_VALID_ALPHA_PHONE_PATTERN, number) 590 if m: 591 return _normalize_helper(number, _ALPHA_PHONE_MAPPINGS, True) 592 else: 593 return normalize_digits_only(number) 594 595 596def normalize_digits_only(number, keep_non_digits=False): 597 """Normalizes a string of characters representing a phone number. 598 599 This converts wide-ascii and arabic-indic numerals to European numerals, 600 and strips punctuation and alpha characters (optional). 601 602 Arguments: 603 number -- a string representing a phone number 604 keep_non_digits -- whether to keep non-digits 605 606 Returns the normalized string version of the phone number. 607 """ 608 number = unicod(number) 609 number_length = len(number) 610 normalized_digits = U_EMPTY_STRING 611 for ii in range(number_length): 612 d = unicode_digit(number[ii], -1) 613 if d != -1: 614 normalized_digits += unicod(d) 615 elif keep_non_digits: 616 normalized_digits += number[ii] 617 return normalized_digits 618 619 620def normalize_diallable_chars_only(number): 621 """Normalizes a string of characters representing a phone number. 622 623 This strips all characters which are not diallable on a mobile phone 624 keypad (including all non-ASCII digits). 625 626 Arguments: 627 number -- a string of characters representing a phone number 628 629 Returns the normalized string version of the phone number. 630 """ 631 return _normalize_helper(number, _DIALLABLE_CHAR_MAPPINGS, True) 632 633 634def convert_alpha_characters_in_number(number): 635 """Convert alpha chars in a number to their respective digits on a keypad, 636 but retains existing formatting.""" 637 return _normalize_helper(number, _ALPHA_PHONE_MAPPINGS, False) 638 639 640def length_of_geographical_area_code(numobj): 641 """Return length of the geographical area code for a number. 642 643 Gets the length of the geographical area code from the PhoneNumber object 644 passed in, so that clients could use it to split a national significant 645 number into geographical area code and subscriber number. It works in such 646 a way that the resultant subscriber number should be diallable, at least 647 on some devices. An example of how this could be used: 648 649 >>> import phonenumbers 650 >>> numobj = phonenumbers.parse("16502530000", "US") 651 >>> nsn = phonenumbers.national_significant_number(numobj) 652 >>> ac_len = phonenumbers.length_of_geographical_area_code(numobj) 653 >>> if ac_len > 0: 654 ... area_code = nsn[:ac_len] 655 ... subscriber_number = nsn[ac_len:] 656 ... else: 657 ... area_code = "" 658 ... subscriber_number = nsn 659 660 N.B.: area code is a very ambiguous concept, so the I18N team generally 661 recommends against using it for most purposes, but recommends using the 662 more general national_number instead. Read the following carefully before 663 deciding to use this method: 664 665 - geographical area codes change over time, and this method honors those 666 changes; therefore, it doesn't guarantee the stability of the result it 667 produces. 668 - subscriber numbers may not be diallable from all devices (notably 669 mobile devices, which typically require the full national_number to be 670 dialled in most countries). 671 - most non-geographical numbers have no area codes, including numbers 672 from non-geographical entities. 673 - some geographical numbers have no area codes. 674 675 Arguments: 676 numobj -- The PhoneNumber object to find the length of the area code form. 677 678 Returns the length of area code of the PhoneNumber object passed in. 679 """ 680 metadata = PhoneMetadata.metadata_for_region(region_code_for_number(numobj), None) 681 if metadata is None: 682 return 0 683 684 # If a country doesn't use a national prefix, and this number doesn't have 685 # an Italian leading zero, we assume it is a closed dialling plan with no 686 # area codes. 687 if metadata.national_prefix is None and not numobj.italian_leading_zero: 688 return 0 689 690 ntype = number_type(numobj) 691 country_code = numobj.country_code 692 if (ntype == PhoneNumberType.MOBILE and 693 (country_code in _GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES)): 694 # Note this is a rough heuristic; it doesn't cover Indonesia well, for 695 # example, where area codes are present for some mobile phones but not 696 # for others. We have no better way of representing this in the 697 # metadata at this point. 698 return 0 699 700 if not is_number_type_geographical(ntype, country_code): 701 return 0 702 703 return length_of_national_destination_code(numobj) 704 705 706def length_of_national_destination_code(numobj): 707 """Return length of the national destination code code for a number. 708 709 Gets the length of the national destination code (NDC) from the 710 PhoneNumber object passed in, so that clients could use it to split a 711 national significant number into NDC and subscriber number. The NDC of a 712 phone number is normally the first group of digit(s) right after the 713 country calling code when the number is formatted in the international 714 format, if there is a subscriber number part that follows. An example of 715 how this could be used: 716 717 >>> import phonenumbers 718 >>> numobj = phonenumbers.parse("18002530000", "US") 719 >>> nsn = phonenumbers.national_significant_number(numobj) 720 >>> ndc_len = phonenumbers.length_of_national_destination_code(numobj) 721 >>> if ndc_len > 0: 722 ... national_destination_code = nsn[:ndc_len] 723 ... subscriber_number = nsn[ndc_len:] 724 ... else: 725 ... national_destination_code = "" 726 ... subscriber_number = nsn 727 728 Refer to the unittests to see the difference between this function and 729 length_of_geographical_area_code. 730 731 Arguments: 732 numobj -- The PhoneNumber object to find the length of the NDC from. 733 734 Returns the length of NDC of the PhoneNumber object passed in. 735 """ 736 if numobj.extension is not None: 737 # We don't want to alter the object given to us, but we don't want to 738 # include the extension when we format it, so we copy it and clear the 739 # extension here. 740 copied_numobj = PhoneNumber() 741 copied_numobj.merge_from(numobj) 742 copied_numobj.extension = None 743 else: 744 copied_numobj = numobj 745 746 nsn = format_number(copied_numobj, PhoneNumberFormat.INTERNATIONAL) 747 number_groups = re.split(NON_DIGITS_PATTERN, nsn) 748 749 # The pattern will start with "+COUNTRY_CODE " so the first group will 750 # always be the empty string (before the + symbol) and the second group 751 # will be the country calling code. The third group will be area code if 752 # it is not the last group. 753 if len(number_groups) <= 3: 754 return 0 755 756 if number_type(numobj) == PhoneNumberType.MOBILE: 757 # For example Argentinian mobile numbers, when formatted in the 758 # international format, are in the form of +54 9 NDC XXXX... As a 759 # result, we take the length of the third group (NDC) and add the 760 # length of the second group (which is the mobile token), which also 761 # forms part of the national significant number. This assumes that 762 # the mobile token is always formatted separately from the rest of the 763 # phone number. 764 mobile_token = country_mobile_token(numobj.country_code) 765 if mobile_token != U_EMPTY_STRING: 766 return len(number_groups[2]) + len(number_groups[3]) 767 return len(number_groups[2]) 768 769 770def country_mobile_token(country_code): 771 """Returns the mobile token for the provided country calling code if it has one, otherwise 772 returns an empty string. A mobile token is a number inserted before the area code when dialing 773 a mobile number from that country from abroad. 774 775 Arguments: 776 country_code -- the country calling code for which we want the mobile token 777 Returns the mobile token, as a string, for the given country calling code. 778 """ 779 return _MOBILE_TOKEN_MAPPINGS.get(country_code, U_EMPTY_STRING) 780 781 782def _normalize_helper(number, replacements, remove_non_matches): 783 """Normalizes a string of characters representing a phone number by 784 replacing all characters found in the accompanying map with the values 785 therein, and stripping all other characters if remove_non_matches is true. 786 787 Arguments: 788 number -- a string representing a phone number 789 replacements -- a mapping of characters to what they should be replaced 790 by in the normalized version of the phone number 791 remove_non_matches -- indicates whether characters that are not able to be 792 replaced should be stripped from the number. If this is False, 793 they will be left unchanged in the number. 794 795 Returns the normalized string version of the phone number. 796 """ 797 normalized_number = [] 798 for char in number: 799 new_digit = replacements.get(char.upper(), None) 800 if new_digit is not None: 801 normalized_number.append(new_digit) 802 elif not remove_non_matches: 803 normalized_number.append(char) 804 # If neither of the above are true, we remove this character 805 return U_EMPTY_STRING.join(normalized_number) 806 807 808def _desc_has_possible_number_data(desc): 809 """Returns true if there is any possible number data set for a particular PhoneNumberDesc.""" 810 # If this is empty, it means numbers of this type inherit from the "general desc" -> the value 811 # "-1" means that no numbers exist for this type. 812 if desc is None: 813 return False 814 return len(desc.possible_length) != 1 or desc.possible_length[0] != -1 815 816 817# Note: desc_has_data must account for any of MetadataFilter's excludableChildFields potentially 818# being absent from the metadata. It must check them all. For any changes in descHasData, ensure 819# that all the excludableChildFields are still being checked. If your change is safe simply 820# mention why during a review without needing to change MetadataFilter. 821def _desc_has_data(desc): 822 """Returns true if there is any data set for a particular PhoneNumberDesc.""" 823 if desc is None: 824 return False 825 # Checking most properties since we don't know what's present, since a custom build may have 826 # stripped just one of them (e.g. liteBuild strips exampleNumber). We don't bother checking the 827 # possibleLengthsLocalOnly, since if this is the only thing that's present we don't really 828 # support the type at all: no type-specific methods will work with only this data. 829 return ((desc.example_number is not None) or 830 _desc_has_possible_number_data(desc) or 831 ((desc.national_number_pattern is not None) and (desc.national_number_pattern != "NA"))) 832 833 834def _supported_types_for_metadata(metadata): 835 """Returns the types we have metadata for based on the PhoneMetadata object passed in, which must be non-None.""" 836 numtypes = set() 837 for numtype in PhoneNumberType.values(): 838 if numtype in (PhoneNumberType.FIXED_LINE_OR_MOBILE, PhoneNumberType.UNKNOWN): 839 # Never return FIXED_LINE_OR_MOBILE (it is a convenience type, and represents that a 840 # particular number type can't be determined) or UNKNOWN (the non-type). 841 continue 842 if _desc_has_data(_number_desc_by_type(metadata, numtype)): 843 numtypes.add(numtype) 844 return numtypes 845 846 847def supported_types_for_region(region_code): 848 """Returns the types for a given region which the library has metadata for. 849 850 Will not include FIXED_LINE_OR_MOBILE (if numbers in this region could 851 be classified as FIXED_LINE_OR_MOBILE, both FIXED_LINE and MOBILE would 852 be present) and UNKNOWN. 853 854 No types will be returned for invalid or unknown region codes. 855 """ 856 if not _is_valid_region_code(region_code): 857 return set() 858 metadata = PhoneMetadata.metadata_for_region(region_code.upper()) 859 return _supported_types_for_metadata(metadata) 860 861 862def supported_types_for_non_geo_entity(country_code): 863 """Returns the types for a country-code belonging to a non-geographical entity 864 which the library has metadata for. Will not include FIXED_LINE_OR_MOBILE 865 (if numbers for this non-geographical entity could be classified as 866 FIXED_LINE_OR_MOBILE, both FIXED_LINE and MOBILE would be present) and 867 UNKNOWN. 868 869 No types will be returned for country calling codes that do not map to a 870 known non-geographical entity. 871 """ 872 metadata = PhoneMetadata.metadata_for_nongeo_region(country_code, None) 873 if metadata is None: 874 return set() 875 return _supported_types_for_metadata(metadata) 876 877 878def _formatting_rule_has_first_group_only(national_prefix_formatting_rule): 879 """Helper function to check if the national prefix formatting rule has the 880 first group only, i.e., does not start with the national prefix. 881 """ 882 if national_prefix_formatting_rule is None: 883 return True 884 return bool(fullmatch(_FIRST_GROUP_ONLY_PREFIX_PATTERN, 885 national_prefix_formatting_rule)) 886 887 888def is_number_geographical(numobj): 889 """Tests whether a phone number has a geographical association. 890 891 It checks if the number is associated to a certain region in the country 892 where it belongs to. Note that this doesn't verify if the number is 893 actually in use. 894 country_code -- the country calling code for which we want the mobile token 895 """ 896 return is_number_type_geographical(number_type(numobj), numobj.country_code) 897 898 899def is_number_type_geographical(num_type, country_code): 900 """Tests whether a phone number has a geographical association, 901 as represented by its type and the country it belongs to. 902 903 This version of isNumberGeographical exists since calculating the phone 904 number type is expensive; if we have already done this, we don't want to 905 do it again. 906 """ 907 return (num_type == PhoneNumberType.FIXED_LINE or 908 num_type == PhoneNumberType.FIXED_LINE_OR_MOBILE or 909 ((country_code in _GEO_MOBILE_COUNTRIES) and 910 num_type == PhoneNumberType.MOBILE)) 911 912 913def _is_valid_region_code(region_code): 914 """Helper function to check region code is not unknown or None""" 915 if region_code is None: 916 return False 917 return (region_code in SUPPORTED_REGIONS) 918 919 920def _has_valid_country_calling_code(country_calling_code): 921 return (country_calling_code in _COUNTRY_CODE_TO_REGION_CODE) 922 923 924def format_number(numobj, num_format): 925 """Formats a phone number in the specified format using default rules. 926 927 Note that this does not promise to produce a phone number that the user 928 can dial from where they are - although we do format in either 'national' 929 or 'international' format depending on what the client asks for, we do not 930 currently support a more abbreviated format, such as for users in the same 931 "area" who could potentially dial the number without area code. Note that 932 if the phone number has a country calling code of 0 or an otherwise 933 invalid country calling code, we cannot work out which formatting rules to 934 apply so we return the national significant number with no formatting 935 applied. 936 937 Arguments: 938 numobj -- The phone number to be formatted. 939 num_format -- The format the phone number should be formatted into 940 941 Returns the formatted phone number. 942 """ 943 if numobj.national_number == 0 and numobj.raw_input is not None: 944 # Unparseable numbers that kept their raw input just use that. This 945 # is the only case where a number can be formatted as E164 without a 946 # leading '+' symbol (but the original number wasn't parseable 947 # anyway). 948 # TODO: Consider removing the 'if' above so that unparseable strings 949 # without raw input format to the empty string instead of "+00". 950 if len(numobj.raw_input) > 0: 951 return numobj.raw_input 952 country_calling_code = numobj.country_code 953 nsn = national_significant_number(numobj) 954 if num_format == PhoneNumberFormat.E164: 955 # Early exit for E164 case (even if the country calling code is 956 # invalid) since no formatting of the national number needs to be 957 # applied. Extensions are not formatted. 958 return _prefix_number_with_country_calling_code(country_calling_code, num_format, nsn) 959 if not _has_valid_country_calling_code(country_calling_code): 960 return nsn 961 # Note region_code_for_country_code() is used because formatting 962 # information for regions which share a country calling code is contained 963 # by only one region for performance reasons. For example, for NANPA 964 # regions it will be contained in the metadata for US. 965 region_code = region_code_for_country_code(country_calling_code) 966 # Metadata cannot be None because the country calling code is valid (which 967 # means that the region code cannot be ZZ and must be one of our supported 968 # region codes). 969 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_calling_code, region_code.upper()) 970 formatted_number = _format_nsn(nsn, metadata, num_format) 971 formatted_number = _maybe_append_formatted_extension(numobj, 972 metadata, 973 num_format, 974 formatted_number) 975 return _prefix_number_with_country_calling_code(country_calling_code, 976 num_format, 977 formatted_number) 978 979 980def format_by_pattern(numobj, number_format, user_defined_formats): 981 """Formats a phone number using client-defined formatting rules." 982 983 Note that if the phone number has a country calling code of zero or an 984 otherwise invalid country calling code, we cannot work out things like 985 whether there should be a national prefix applied, or how to format 986 extensions, so we return the national significant number with no 987 formatting applied. 988 989 Arguments: 990 numobj -- The phone number to be formatted 991 num_format -- The format the phone number should be formatted into 992 user_defined_formats -- formatting rules specified by clients 993 994 Returns the formatted phone number. 995 """ 996 country_code = numobj.country_code 997 nsn = national_significant_number(numobj) 998 if not _has_valid_country_calling_code(country_code): 999 return nsn 1000 # Note region_code_for_country_code() is used because formatting 1001 # information for regions which share a country calling code is contained 1002 # by only one region for performance reasons. For example, for NANPA 1003 # regions it will be contained in the metadata for US. 1004 region_code = region_code_for_country_code(country_code) 1005 # Metadata cannot be None because the country calling code is valid. 1006 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code) 1007 1008 formatted_number = U_EMPTY_STRING 1009 formatting_pattern = _choose_formatting_pattern_for_number(user_defined_formats, nsn) 1010 if formatting_pattern is None: 1011 # If no pattern above is matched, we format the number as a whole. 1012 formatted_number = nsn 1013 else: 1014 num_format_copy = _copy_number_format(formatting_pattern) 1015 # Before we do a replacement of the national prefix pattern $NP with 1016 # the national prefix, we need to copy the rule so that subsequent 1017 # replacements for different numbers have the appropriate national 1018 # prefix. 1019 np_formatting_rule = formatting_pattern.national_prefix_formatting_rule 1020 if np_formatting_rule: 1021 national_prefix = metadata.national_prefix 1022 if national_prefix: 1023 # Replace $NP with national prefix and $FG with the first 1024 # group (\1) matcher. 1025 np_formatting_rule = re.sub(_NP_PATTERN, 1026 national_prefix, 1027 np_formatting_rule, 1028 count=1) 1029 np_formatting_rule = re.sub(_FG_PATTERN, 1030 unicod("\\\\1"), 1031 np_formatting_rule, 1032 count=1) 1033 num_format_copy.national_prefix_formatting_rule = np_formatting_rule 1034 else: 1035 # We don't want to have a rule for how to format the national 1036 # prefix if there isn't one. 1037 num_format_copy.national_prefix_formatting_rule = None 1038 formatted_number = _format_nsn_using_pattern(nsn, num_format_copy, number_format) 1039 formatted_number = _maybe_append_formatted_extension(numobj, 1040 metadata, 1041 number_format, 1042 formatted_number) 1043 formatted_number = _prefix_number_with_country_calling_code(country_code, 1044 number_format, 1045 formatted_number) 1046 return formatted_number 1047 1048 1049def format_national_number_with_carrier_code(numobj, carrier_code): 1050 """Format a number in national format for dialing using the specified carrier. 1051 1052 The carrier-code will always be used regardless of whether the phone 1053 number already has a preferred domestic carrier code stored. If 1054 carrier_code contains an empty string, returns the number in national 1055 format without any carrier code. 1056 1057 Arguments: 1058 numobj -- The phone number to be formatted 1059 carrier_code -- The carrier selection code to be used 1060 1061 Returns the formatted phone number in national format for dialing using 1062 the carrier as specified in the carrier_code. 1063 """ 1064 country_code = numobj.country_code 1065 nsn = national_significant_number(numobj) 1066 if not _has_valid_country_calling_code(country_code): 1067 return nsn 1068 # Note region_code_for_country_code() is used because formatting 1069 # information for regions which share a country calling code is contained 1070 # by only one region for performance reasons. For example, for NANPA 1071 # regions it will be contained in the metadata for US. 1072 region_code = region_code_for_country_code(country_code) 1073 # Metadata cannot be None because the country calling code is valid 1074 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code) 1075 formatted_number = _format_nsn(nsn, 1076 metadata, 1077 PhoneNumberFormat.NATIONAL, 1078 carrier_code) 1079 formatted_number = _maybe_append_formatted_extension(numobj, 1080 metadata, 1081 PhoneNumberFormat.NATIONAL, 1082 formatted_number) 1083 formatted_number = _prefix_number_with_country_calling_code(country_code, 1084 PhoneNumberFormat.NATIONAL, 1085 formatted_number) 1086 return formatted_number 1087 1088 1089def format_national_number_with_preferred_carrier_code(numobj, fallback_carrier_code): 1090 """Formats a phone number in national format for dialing using the carrier 1091 as specified in the preferred_domestic_carrier_code field of the 1092 PhoneNumber object passed in. If that is missing, use the 1093 fallback_carrier_code passed in instead. If there is no 1094 preferred_domestic_carrier_code, and the fallback_carrier_code contains an 1095 empty string, return the number in national format without any carrier 1096 code. 1097 1098 Use format_national_number_with_carrier_code instead if the carrier code 1099 passed in should take precedence over the number's 1100 preferred_domestic_carrier_code when formatting. 1101 1102 Arguments: 1103 numobj -- The phone number to be formatted 1104 carrier_code -- The carrier selection code to be used, if none is found in the 1105 phone number itself. 1106 1107 Returns the formatted phone number in national format for dialing using 1108 the number's preferred_domestic_carrier_code, or the fallback_carrier_code 1109 pass in if none is found. 1110 """ 1111 # Historically, we set this to an empty string when parsing with raw input 1112 # if none was found in the input string. However, this doesn't result in a 1113 # number we can dial. For this reason, we treat the empty string the same 1114 # as if it isn't set at all. 1115 if (numobj.preferred_domestic_carrier_code is not None and 1116 len(numobj.preferred_domestic_carrier_code) > 0): 1117 carrier_code = numobj.preferred_domestic_carrier_code 1118 else: 1119 carrier_code = fallback_carrier_code 1120 return format_national_number_with_carrier_code(numobj, carrier_code) 1121 1122 1123def format_number_for_mobile_dialing(numobj, region_calling_from, with_formatting): 1124 """Returns a number formatted in such a way that it can be dialed from a 1125 mobile phone in a specific region. 1126 1127 If the number cannot be reached from the region (e.g. some countries block 1128 toll-free numbers from being called outside of the country), the method 1129 returns an empty string. 1130 1131 Arguments: 1132 numobj -- The phone number to be formatted 1133 region_calling_from -- The region where the call is being placed. 1134 1135 with_formatting -- whether the number should be returned with formatting 1136 symbols, such as spaces and dashes. 1137 1138 Returns the formatted phone number. 1139 """ 1140 country_calling_code = numobj.country_code 1141 if not _has_valid_country_calling_code(country_calling_code): 1142 if numobj.raw_input is None: 1143 return U_EMPTY_STRING 1144 else: 1145 return numobj.raw_input 1146 formatted_number = U_EMPTY_STRING 1147 # Clear the extension, as that part cannot normally be dialed together with the main number. 1148 numobj_no_ext = PhoneNumber() 1149 numobj_no_ext.merge_from(numobj) 1150 numobj_no_ext.extension = None 1151 region_code = region_code_for_country_code(country_calling_code) 1152 numobj_type = number_type(numobj_no_ext) 1153 is_valid_number = (numobj_type != PhoneNumberType.UNKNOWN) 1154 if region_calling_from == region_code: 1155 is_fixed_line_or_mobile = ((numobj_type == PhoneNumberType.FIXED_LINE) or 1156 (numobj_type == PhoneNumberType.MOBILE) or 1157 (numobj_type == PhoneNumberType.FIXED_LINE_OR_MOBILE)) 1158 # Carrier codes may be needed in some countries. We handle this here. 1159 if region_code == "CO" and numobj_type == PhoneNumberType.FIXED_LINE: 1160 formatted_number = format_national_number_with_carrier_code(numobj_no_ext, 1161 _COLOMBIA_MOBILE_TO_FIXED_LINE_PREFIX) 1162 elif region_code == "BR" and is_fixed_line_or_mobile: 1163 # Historically, we set this to an empty string when parsing with 1164 # raw input if none was found in the input string. However, this 1165 # doesn't result in a number we can dial. For this reason, we 1166 # treat the empty string the same as if it isn't set at all. 1167 if (numobj_no_ext.preferred_domestic_carrier_code is not None and 1168 len(numobj_no_ext.preferred_domestic_carrier_code) > 0): 1169 formatted_number = format_national_number_with_preferred_carrier_code(numobj_no_ext, "") 1170 else: 1171 # Brazilian fixed line and mobile numbers need to be dialed with a 1172 # carrier code when called within Brazil. Without that, most of 1173 # the carriers won't connect the call. Because of that, we return 1174 # an empty string here. 1175 formatted_number = U_EMPTY_STRING 1176 elif is_valid_number and region_code == "HU": 1177 # The national format for HU numbers doesn't contain the national 1178 # prefix, because that is how numbers are normally written 1179 # down. However, the national prefix is obligatory when dialing 1180 # from a mobile phone, except for short numbers. As a result, we 1181 # add it back here if it is a valid regular length phone number. 1182 formatted_number = (ndd_prefix_for_region(region_code, True) + # strip non-digits 1183 U_SPACE + format_number(numobj_no_ext, PhoneNumberFormat.NATIONAL)) 1184 elif country_calling_code == _NANPA_COUNTRY_CODE: 1185 # For NANPA countries, we output international format for numbers 1186 # that can be dialed internationally, since that always works, 1187 # except for numbers which might potentially be short numbers, 1188 # which are always dialled in national format. 1189 metadata = PhoneMetadata.metadata_for_region(region_calling_from) 1190 if (_can_be_internationally_dialled(numobj_no_ext) and 1191 _test_number_length(national_significant_number(numobj_no_ext), 1192 metadata) != ValidationResult.TOO_SHORT): 1193 formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.INTERNATIONAL) 1194 else: 1195 formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.NATIONAL) 1196 else: 1197 # For non-geographical countries, and Mexican and Chilean fixed 1198 # line and mobile numbers, we output international format for 1199 # numbers that can be dialed internationally as that always works. 1200 if ((region_code == REGION_CODE_FOR_NON_GEO_ENTITY or 1201 ((region_code == unicod("MX") or region_code == unicod("CL")) and 1202 is_fixed_line_or_mobile)) and 1203 _can_be_internationally_dialled(numobj_no_ext)): 1204 # MX fixed line and mobile numbers should always be formatted 1205 # in international format, even when dialed within MX. For 1206 # national format to work, a carrier code needs to be used, 1207 # and the correct carrier code depends on if the caller and 1208 # callee are from the same local area. It is trickier to get 1209 # that to work correctly than using international format, 1210 # which is tested to work fine on all carriers. 1211 formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.INTERNATIONAL) 1212 else: 1213 formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.NATIONAL) 1214 elif is_valid_number and _can_be_internationally_dialled(numobj_no_ext): 1215 # We assume that short numbers are not diallable from outside their 1216 # region, so if a number is not a valid regular length phone number, 1217 # we treat it as if it cannot be internationally dialled. 1218 if with_formatting: 1219 return format_number(numobj_no_ext, PhoneNumberFormat.INTERNATIONAL) 1220 else: 1221 return format_number(numobj_no_ext, PhoneNumberFormat.E164) 1222 1223 if with_formatting: 1224 return formatted_number 1225 else: 1226 return normalize_diallable_chars_only(formatted_number) 1227 1228 1229def format_out_of_country_calling_number(numobj, region_calling_from): 1230 """Formats a phone number for out-of-country dialing purposes. 1231 1232 If no region_calling_from is supplied, we format the number in its 1233 INTERNATIONAL format. If the country calling code is the same as that of 1234 the region where the number is from, then NATIONAL formatting will be 1235 applied. 1236 1237 If the number itself has a country calling code of zero or an otherwise 1238 invalid country calling code, then we return the number with no formatting 1239 applied. 1240 1241 Note this function takes care of the case for calling inside of NANPA and 1242 between Russia and Kazakhstan (who share the same country calling 1243 code). In those cases, no international prefix is used. For regions which 1244 have multiple international prefixes, the number in its INTERNATIONAL 1245 format will be returned instead. 1246 1247 Arguments: 1248 numobj -- The phone number to be formatted 1249 region_calling_from -- The region where the call is being placed 1250 1251 Returns the formatted phone number 1252 """ 1253 if not _is_valid_region_code(region_calling_from): 1254 return format_number(numobj, PhoneNumberFormat.INTERNATIONAL) 1255 country_code = numobj.country_code 1256 nsn = national_significant_number(numobj) 1257 if not _has_valid_country_calling_code(country_code): 1258 return nsn 1259 if country_code == _NANPA_COUNTRY_CODE: 1260 if is_nanpa_country(region_calling_from): 1261 # For NANPA regions, return the national format for these regions 1262 # but prefix it with the country calling code. 1263 return (unicod(country_code) + U_SPACE + 1264 format_number(numobj, PhoneNumberFormat.NATIONAL)) 1265 elif country_code == country_code_for_valid_region(region_calling_from): 1266 # If regions share a country calling code, the country calling code 1267 # need not be dialled. This also applies when dialling within a 1268 # region, so this if clause covers both these cases. Technically this 1269 # is the case for dialling from La Reunion to other overseas 1270 # departments of France (French Guiana, Martinique, Guadeloupe), but 1271 # not vice versa - so we don't cover this edge case for now and for 1272 # those cases return the version including country calling code. 1273 # Details here: 1274 # http://www.petitfute.com/voyage/225-info-pratiques-reunion 1275 return format_number(numobj, PhoneNumberFormat.NATIONAL) 1276 1277 # Metadata cannot be None because we checked '_is_valid_region_code()' above. 1278 metadata_for_region_calling_from = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_calling_from.upper()) 1279 international_prefix = metadata_for_region_calling_from.international_prefix 1280 1281 # For regions that have multiple international prefixes, the international 1282 # format of the number is returned, unless there is a preferred 1283 # international prefix. 1284 i18n_prefix_for_formatting = U_EMPTY_STRING 1285 i18n_match = fullmatch(_UNIQUE_INTERNATIONAL_PREFIX, international_prefix) 1286 if i18n_match: 1287 i18n_prefix_for_formatting = international_prefix 1288 elif metadata_for_region_calling_from.preferred_international_prefix is not None: 1289 i18n_prefix_for_formatting = metadata_for_region_calling_from.preferred_international_prefix 1290 1291 region_code = region_code_for_country_code(country_code) 1292 # Metadata cannot be None because the country calling code is valid. 1293 metadata_for_region = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code.upper()) 1294 formatted_national_number = _format_nsn(nsn, 1295 metadata_for_region, 1296 PhoneNumberFormat.INTERNATIONAL) 1297 formatted_number = _maybe_append_formatted_extension(numobj, 1298 metadata_for_region, 1299 PhoneNumberFormat.INTERNATIONAL, 1300 formatted_national_number) 1301 if len(i18n_prefix_for_formatting) > 0: 1302 formatted_number = (i18n_prefix_for_formatting + U_SPACE + 1303 unicod(country_code) + U_SPACE + formatted_number) 1304 else: 1305 formatted_number = _prefix_number_with_country_calling_code(country_code, 1306 PhoneNumberFormat.INTERNATIONAL, 1307 formatted_number) 1308 return formatted_number 1309 1310 1311def format_in_original_format(numobj, region_calling_from): 1312 """Format a number using the original format that the number was parsed from. 1313 1314 The original format is embedded in the country_code_source field of the 1315 PhoneNumber object passed in. If such information is missing, the number 1316 will be formatted into the NATIONAL format by default. 1317 1318 When the number contains a leading zero and this is unexpected for this 1319 country, or we don't have a formatting pattern for the number, the method 1320 returns the raw input when it is available. 1321 1322 Note this method guarantees no digit will be inserted, removed or modified 1323 as a result of formatting. 1324 1325 Arguments: 1326 number -- The phone number that needs to be formatted in its original 1327 number format 1328 region_calling_from -- The region whose IDD needs to be prefixed if the 1329 original number has one. 1330 1331 Returns the formatted phone number in its original number format. 1332 """ 1333 if (numobj.raw_input is not None and 1334 (_has_unexpected_italian_leading_zero(numobj) or not _has_formatting_pattern_for_number(numobj))): 1335 # We check if we have the formatting pattern because without that, we 1336 # might format the number as a group without national prefix. 1337 return numobj.raw_input 1338 if numobj.country_code_source is None: 1339 return format_number(numobj, PhoneNumberFormat.NATIONAL) 1340 1341 formatted_number = _format_original_allow_mods(numobj, region_calling_from) 1342 num_raw_input = numobj.raw_input 1343 # If no digit is inserted/removed/modified as a result of our formatting, 1344 # we return the formatted phone number; otherwise we return the raw input 1345 # the user entered. 1346 if (formatted_number is not None and num_raw_input): 1347 normalized_formatted_number = normalize_diallable_chars_only(formatted_number) 1348 normalized_raw_input = normalize_diallable_chars_only(num_raw_input) 1349 if normalized_formatted_number != normalized_raw_input: 1350 formatted_number = num_raw_input 1351 return formatted_number 1352 1353 1354def _format_original_allow_mods(numobj, region_calling_from): 1355 if (numobj.country_code_source == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN): 1356 return format_number(numobj, PhoneNumberFormat.INTERNATIONAL) 1357 elif numobj.country_code_source == CountryCodeSource.FROM_NUMBER_WITH_IDD: 1358 return format_out_of_country_calling_number(numobj, region_calling_from) 1359 elif (numobj.country_code_source == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN): 1360 return format_number(numobj, PhoneNumberFormat.INTERNATIONAL)[1:] 1361 else: 1362 region_code = region_code_for_country_code(numobj.country_code) 1363 # We strip non-digits from the NDD here, and from the raw input later, so that we can 1364 # compare them easily. 1365 national_prefix = ndd_prefix_for_region(region_code, True) # strip non-digits 1366 national_format = format_number(numobj, PhoneNumberFormat.NATIONAL) 1367 if (national_prefix is None or len(national_prefix) == 0): 1368 # If the region doesn't have a national prefix at all, we can 1369 # safely return the national format without worrying about a 1370 # national prefix being added. 1371 return national_format 1372 # Otherwise, we check if the original number was entered with a national prefix. 1373 if (_raw_input_contains_national_prefix(numobj.raw_input, national_prefix, region_code)): 1374 # If so, we can safely return the national format. 1375 return national_format 1376 # Metadata cannot be None here because ndd_prefix_for_region() (above) returns None if 1377 # there is no metadata for the region. 1378 metadata = PhoneMetadata.metadata_for_region(region_code) 1379 national_number = national_significant_number(numobj) 1380 format_rule = _choose_formatting_pattern_for_number(metadata.number_format, national_number) 1381 # The format rule could still be null here if the national number was 1382 # 0 and there was no raw input (this should not be possible for 1383 # numbers generated by the phonenumber library as they would also not 1384 # have a country calling code and we would have exited earlier). 1385 if format_rule is None: 1386 return national_format 1387 # When the format we apply to this number doesn't contain national 1388 # prefix, we can just return the national format. 1389 # TODO: Refactor the code below with the code in isNationalPrefixPresentIfRequired. 1390 candidate_national_prefix_rule = format_rule.national_prefix_formatting_rule 1391 # We assume that the first-group symbol will never be _before_ the national prefix. 1392 if candidate_national_prefix_rule is None: 1393 return national_format 1394 index_of_first_group = candidate_national_prefix_rule.find("\\1") 1395 if (index_of_first_group <= 0): 1396 return national_format 1397 candidate_national_prefix_rule = candidate_national_prefix_rule[:index_of_first_group] 1398 candidate_national_prefix_rule = normalize_digits_only(candidate_national_prefix_rule) 1399 if len(candidate_national_prefix_rule) == 0: 1400 # National prefix not used when formatting this number. 1401 return national_format 1402 # Otherwise, we need to remove the national prefix from our output. 1403 new_format_rule = _copy_number_format(format_rule) 1404 new_format_rule.national_prefix_formatting_rule = None 1405 return format_by_pattern(numobj, PhoneNumberFormat.NATIONAL, [new_format_rule]) 1406 1407 1408def _raw_input_contains_national_prefix(raw_input, national_prefix, region_code): 1409 """Check if raw_input, which is assumed to be in the national format, has a 1410 national prefix. The national prefix is assumed to be in digits-only 1411 form.""" 1412 nnn = normalize_digits_only(raw_input) 1413 if nnn.startswith(national_prefix): 1414 try: 1415 # Some Japanese numbers (e.g. 00777123) might be mistaken to 1416 # contain the national prefix when written without it 1417 # (e.g. 0777123) if we just do prefix matching. To tackle that, we 1418 # check the validity of the number if the assumed national prefix 1419 # is removed (777123 won't be valid in Japan). 1420 return is_valid_number(parse(nnn[len(national_prefix):], region_code)) 1421 except NumberParseException: 1422 return False 1423 return False 1424 1425 1426def _has_unexpected_italian_leading_zero(numobj): 1427 """Returns true if a number is from a region whose national significant number couldn't contain a 1428 leading zero, but has the italian_leading_zero field set to true.""" 1429 return (numobj.italian_leading_zero and 1430 not _is_leading_zero_possible(numobj.country_code)) 1431 1432 1433def _has_formatting_pattern_for_number(numobj): 1434 country_code = numobj.country_code 1435 phone_number_region = region_code_for_country_code(country_code) 1436 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, phone_number_region) 1437 if metadata is None: 1438 return False 1439 national_number = national_significant_number(numobj) 1440 format_rule = _choose_formatting_pattern_for_number(metadata.number_format, national_number) 1441 return format_rule is not None 1442 1443 1444def format_out_of_country_keeping_alpha_chars(numobj, region_calling_from): 1445 """Formats a phone number for out-of-country dialing purposes. 1446 1447 Note that in this version, if the number was entered originally using 1448 alpha characters and this version of the number is stored in raw_input, 1449 this representation of the number will be used rather than the digit 1450 representation. Grouping information, as specified by characters such as 1451 "-" and " ", will be retained. 1452 1453 Caveats: 1454 1455 - This will not produce good results if the country calling code is both 1456 present in the raw input _and_ is the start of the national 1457 number. This is not a problem in the regions which typically use alpha 1458 numbers. 1459 1460 - This will also not produce good results if the raw input has any 1461 grouping information within the first three digits of the national 1462 number, and if the function needs to strip preceding digits/words in 1463 the raw input before these digits. Normally people group the first 1464 three digits together so this is not a huge problem - and will be fixed 1465 if it proves to be so. 1466 1467 Arguments: 1468 numobj -- The phone number that needs to be formatted. 1469 region_calling_from -- The region where the call is being placed. 1470 1471 Returns the formatted phone number 1472 """ 1473 num_raw_input = numobj.raw_input 1474 # If there is no raw input, then we can't keep alpha characters because there aren't any. 1475 # In this case, we return format_out_of_country_calling_number. 1476 if num_raw_input is None or len(num_raw_input) == 0: 1477 return format_out_of_country_calling_number(numobj, region_calling_from) 1478 country_code = numobj.country_code 1479 if not _has_valid_country_calling_code(country_code): 1480 return num_raw_input 1481 # Strip any prefix such as country calling code, IDD, that was present. We 1482 # do this by comparing the number in raw_input with the parsed number. To 1483 # do this, first we normalize punctuation. We retain number grouping 1484 # symbols such as " " only. 1485 num_raw_input = _normalize_helper(num_raw_input, 1486 _ALL_PLUS_NUMBER_GROUPING_SYMBOLS, 1487 True) 1488 # Now we trim everything before the first three digits in the parsed 1489 # number. We choose three because all valid alpha numbers have 3 digits at 1490 # the start - if it does not, then we don't trim anything at 1491 # all. Similarly, if the national number was less than three digits, we 1492 # don't trim anything at all. 1493 national_number = national_significant_number(numobj) 1494 if len(national_number) > 3: 1495 first_national_number_digit = num_raw_input.find(national_number[:3]) 1496 if first_national_number_digit != -1: 1497 num_raw_input = num_raw_input[first_national_number_digit:] 1498 1499 metadata_for_region_calling_from = PhoneMetadata.metadata_for_region(region_calling_from.upper(), None) 1500 if country_code == _NANPA_COUNTRY_CODE: 1501 if is_nanpa_country(region_calling_from): 1502 return unicod(country_code) + U_SPACE + num_raw_input 1503 elif (metadata_for_region_calling_from is not None and 1504 country_code == country_code_for_region(region_calling_from)): 1505 formatting_pattern = _choose_formatting_pattern_for_number(metadata_for_region_calling_from.number_format, 1506 national_number) 1507 if formatting_pattern is None: 1508 # If no pattern above is matched, we format the original input 1509 return num_raw_input 1510 new_format = _copy_number_format(formatting_pattern) 1511 # The first group is the first group of digits that the user 1512 # wrote together. 1513 new_format.pattern = u("(\\d+)(.*)") 1514 # Here we just concatenate them back together after the national 1515 # prefix has been fixed. 1516 new_format.format = u(r"\1\2") 1517 # Now we format using this pattern instead of the default pattern, 1518 # but with the national prefix prefixed if necessary. 1519 # This will not work in the cases where the pattern (and not the 1520 # leading digits) decide whether a national prefix needs to be used, 1521 # since we have overridden the pattern to match anything, but that is 1522 # not the case in the metadata to date. 1523 return _format_nsn_using_pattern(num_raw_input, 1524 new_format, 1525 PhoneNumberFormat.NATIONAL) 1526 i18n_prefix_for_formatting = U_EMPTY_STRING 1527 # If an unsupported region-calling-from is entered, or a country with 1528 # multiple international prefixes, the international format of the number 1529 # is returned, unless there is a preferred international prefix. 1530 if metadata_for_region_calling_from is not None: 1531 international_prefix = metadata_for_region_calling_from.international_prefix 1532 i18n_match = fullmatch(_UNIQUE_INTERNATIONAL_PREFIX, international_prefix) 1533 if i18n_match: 1534 i18n_prefix_for_formatting = international_prefix 1535 else: 1536 i18n_prefix_for_formatting = metadata_for_region_calling_from.preferred_international_prefix 1537 1538 region_code = region_code_for_country_code(country_code) 1539 # Metadata cannot be None because the country calling code is valid. 1540 metadata_for_region = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code) 1541 formatted_number = _maybe_append_formatted_extension(numobj, 1542 metadata_for_region, 1543 PhoneNumberFormat.INTERNATIONAL, 1544 num_raw_input) 1545 if i18n_prefix_for_formatting: 1546 formatted_number = (i18n_prefix_for_formatting + U_SPACE + 1547 unicod(country_code) + U_SPACE + formatted_number) 1548 else: 1549 # Invalid region entered as country-calling-from (so no metadata was 1550 # found for it) or the region chosen has multiple international 1551 # dialling prefixes. 1552 formatted_number = _prefix_number_with_country_calling_code(country_code, 1553 PhoneNumberFormat.INTERNATIONAL, 1554 formatted_number) 1555 return formatted_number 1556 1557 1558def national_significant_number(numobj): 1559 """Gets the national significant number of a phone number. 1560 1561 Note that a national significant number doesn't contain a national prefix 1562 or any formatting. 1563 1564 Arguments: 1565 numobj -- The PhoneNumber object for which the national significant number 1566 is needed. 1567 1568 Returns the national significant number of the PhoneNumber object passed 1569 in. 1570 """ 1571 # If leading zero(s) have been set, we prefix this now. Note this is not a 1572 # national prefix. 1573 national_number = U_EMPTY_STRING 1574 if numobj.italian_leading_zero: 1575 num_zeros = numobj.number_of_leading_zeros 1576 if num_zeros is None: 1577 num_zeros = 1 1578 if num_zeros > 0: 1579 national_number = U_ZERO * num_zeros 1580 national_number += str(numobj.national_number) 1581 return national_number 1582 1583 1584def _prefix_number_with_country_calling_code(country_code, num_format, formatted_number): 1585 """A helper function that is used by format_number and format_by_pattern.""" 1586 if num_format == PhoneNumberFormat.E164: 1587 return _PLUS_SIGN + unicod(country_code) + formatted_number 1588 elif num_format == PhoneNumberFormat.INTERNATIONAL: 1589 return _PLUS_SIGN + unicod(country_code) + U_SPACE + formatted_number 1590 elif num_format == PhoneNumberFormat.RFC3966: 1591 return _RFC3966_PREFIX + _PLUS_SIGN + unicod(country_code) + U_DASH + formatted_number 1592 else: 1593 return formatted_number 1594 1595 1596def _format_nsn(number, metadata, num_format, carrier_code=None): 1597 """Format a national number.""" 1598 # Note in some regions, the national number can be written in two 1599 # completely different ways depending on whether it forms part of the 1600 # NATIONAL format or INTERNATIONAL format. The num_format parameter here 1601 # is used to specify which format to use for those cases. If a carrier_code 1602 # is specified, this will be inserted into the formatted string to replace 1603 # $CC. 1604 intl_number_formats = metadata.intl_number_format 1605 1606 # When the intl_number_formats exists, we use that to format national 1607 # number for the INTERNATIONAL format instead of using the 1608 # number_desc.number_formats. 1609 if (len(intl_number_formats) == 0 or 1610 num_format == PhoneNumberFormat.NATIONAL): 1611 available_formats = metadata.number_format 1612 else: 1613 available_formats = metadata.intl_number_format 1614 formatting_pattern = _choose_formatting_pattern_for_number(available_formats, number) 1615 if formatting_pattern is None: 1616 return number 1617 else: 1618 return _format_nsn_using_pattern(number, formatting_pattern, num_format, carrier_code) 1619 1620 1621def _choose_formatting_pattern_for_number(available_formats, national_number): 1622 for num_format in available_formats: 1623 size = len(num_format.leading_digits_pattern) 1624 # We always use the last leading_digits_pattern, as it is the most detailed. 1625 if size > 0: 1626 ld_pattern = re.compile(num_format.leading_digits_pattern[-1]) 1627 ld_match = ld_pattern.match(national_number) 1628 if size == 0 or ld_match: 1629 format_pattern = re.compile(num_format.pattern) 1630 if fullmatch(format_pattern, national_number): 1631 return num_format 1632 return None 1633 1634 1635def _format_nsn_using_pattern(national_number, formatting_pattern, number_format, 1636 carrier_code=None): 1637 # Note that carrier_code is optional - if None or an empty string, no 1638 # carrier code replacement will take place. 1639 number_format_rule = formatting_pattern.format 1640 m_re = re.compile(formatting_pattern.pattern) 1641 formatted_national_number = U_EMPTY_STRING 1642 1643 if (number_format == PhoneNumberFormat.NATIONAL and carrier_code and 1644 formatting_pattern.domestic_carrier_code_formatting_rule): 1645 # Replace the $CC in the formatting rule with the desired 1646 # carrier code. 1647 cc_format_rule = formatting_pattern.domestic_carrier_code_formatting_rule 1648 cc_format_rule = re.sub(_CC_PATTERN, 1649 carrier_code, 1650 cc_format_rule, 1651 count=1) 1652 1653 # Now replace the $FG in the formatting rule with the 1654 # first group and the carrier code combined in the 1655 # appropriate way. 1656 number_format_rule = re.sub(_FIRST_GROUP_PATTERN, 1657 cc_format_rule, 1658 number_format_rule, 1659 count=1) 1660 formatted_national_number = re.sub(m_re, number_format_rule, national_number) 1661 else: 1662 # Use the national prefix formatting rule instead. 1663 national_prefix_formatting_rule = formatting_pattern.national_prefix_formatting_rule 1664 if (number_format == PhoneNumberFormat.NATIONAL and 1665 national_prefix_formatting_rule): 1666 first_group_rule = re.sub(_FIRST_GROUP_PATTERN, 1667 national_prefix_formatting_rule, 1668 number_format_rule, 1669 count=1) 1670 formatted_national_number = re.sub(m_re, first_group_rule, national_number) 1671 else: 1672 formatted_national_number = re.sub(m_re, number_format_rule, national_number) 1673 1674 if number_format == PhoneNumberFormat.RFC3966: 1675 # Strip any leading punctuation. 1676 m = _SEPARATOR_PATTERN.match(formatted_national_number) 1677 if m: 1678 formatted_national_number = re.sub(_SEPARATOR_PATTERN, U_EMPTY_STRING, formatted_national_number, count=1) 1679 # Replace the rest with a dash between each number group 1680 formatted_national_number = re.sub(_SEPARATOR_PATTERN, U_DASH, formatted_national_number) 1681 1682 return formatted_national_number 1683 1684 1685def example_number(region_code): 1686 """Gets a valid number for the specified region. 1687 1688 Arguments: 1689 region_code -- The region for which an example number is needed. 1690 1691 Returns a valid fixed-line number for the specified region. Returns None 1692 when the metadata does not contain such information, or the region 001 is 1693 passed in. For 001 (representing non-geographical numbers), call 1694 example_number_for_non_geo_entity instead. 1695 """ 1696 return example_number_for_type(region_code, PhoneNumberType.FIXED_LINE) 1697 1698 1699def invalid_example_number(region_code): 1700 """Gets an invalid number for the specified region. 1701 1702 This is useful for unit-testing purposes, where you want to test what 1703 will happen with an invalid number. Note that the number that is 1704 returned will always be able to be parsed and will have the correct 1705 country code. It may also be a valid *short* number/code for this 1706 region. Validity checking such numbers is handled with shortnumberinfo. 1707 1708 Arguments: 1709 region_code -- The region for which an example number is needed. 1710 1711 1712 Returns an invalid number for the specified region. Returns None when an 1713 unsupported region or the region 001 (Earth) is passed in. 1714 """ 1715 if not _is_valid_region_code(region_code): 1716 return None 1717 # We start off with a valid fixed-line number since every country 1718 # supports this. Alternatively we could start with a different number 1719 # type, since fixed-line numbers typically have a wide breadth of valid 1720 # number lengths and we may have to make it very short before we get an 1721 # invalid number. 1722 metadata = PhoneMetadata.metadata_for_region(region_code.upper()) 1723 desc = _number_desc_by_type(metadata, PhoneNumberType.FIXED_LINE) 1724 if desc is None or desc.example_number is None: 1725 # This shouldn't happen; we have a test for this. 1726 return None # pragma no cover 1727 example_number = desc.example_number 1728 # Try and make the number invalid. We do this by changing the length. We 1729 # try reducing the length of the number, since currently no region has a 1730 # number that is the same length as MIN_LENGTH_FOR_NSN. This is probably 1731 # quicker than making the number longer, which is another 1732 # alternative. We could also use the possible number pattern to extract 1733 # the possible lengths of the number to make this faster, but this 1734 # method is only for unit-testing so simplicity is preferred to 1735 # performance. We don't want to return a number that can't be parsed, 1736 # so we check the number is long enough. We try all possible lengths 1737 # because phone number plans often have overlapping prefixes so the 1738 # number 123456 might be valid as a fixed-line number, and 12345 as a 1739 # mobile number. It would be faster to loop in a different order, but we 1740 # prefer numbers that look closer to real numbers (and it gives us a 1741 # variety of different lengths for the resulting phone numbers - 1742 # otherwise they would all be MIN_LENGTH_FOR_NSN digits long.) 1743 phone_number_length = len(example_number) - 1 1744 while phone_number_length >= _MIN_LENGTH_FOR_NSN: 1745 number_to_try = example_number[:phone_number_length] 1746 try: 1747 possibly_valid_number = parse(number_to_try, region_code) 1748 if not is_valid_number(possibly_valid_number): 1749 return possibly_valid_number 1750 except NumberParseException: # pragma no cover 1751 # Shouldn't happen: we have already checked the length, we know 1752 # example numbers have only valid digits, and we know the region 1753 # code is fine. 1754 pass 1755 phone_number_length -= 1 1756 1757 # We have a test to check that this doesn't happen for any of our 1758 # supported regions. 1759 return None # pragma no cover 1760 1761 1762def example_number_for_type(region_code, num_type): 1763 """Gets a valid number for the specified region and number type. 1764 1765 If None is given as the region_code, then the returned number object 1766 may belong to any country. 1767 1768 Arguments: 1769 region_code -- The region for which an example number is needed, or None. 1770 num_type -- The type of number that is needed. 1771 1772 Returns a valid number for the specified region and type. Returns None 1773 when the metadata does not contain such information or if an invalid 1774 region or region 001 was specified. For 001 (representing 1775 non-geographical numbers), call example_number_for_non_geo_entity instead. 1776 """ 1777 if region_code is None: 1778 return _example_number_anywhere_for_type(num_type) 1779 # Check the region code is valid. 1780 if not _is_valid_region_code(region_code): 1781 return None 1782 metadata = PhoneMetadata.metadata_for_region(region_code.upper()) 1783 desc = _number_desc_by_type(metadata, num_type) 1784 if desc is not None and desc.example_number is not None: 1785 try: 1786 return parse(desc.example_number, region_code) 1787 except NumberParseException: # pragma no cover 1788 pass 1789 return None 1790 1791 1792def _example_number_anywhere_for_type(num_type): 1793 """Gets a valid number for the specified number type (it may belong to any country). 1794 1795 Arguments: 1796 num_type -- The type of number that is needed. 1797 1798 Returns a valid number for the specified type. Returns None when the 1799 metadata does not contain such information. This should only happen when 1800 no numbers of this type are allocated anywhere in the world anymore. 1801 """ 1802 for region_code in SUPPORTED_REGIONS: 1803 example_numobj = example_number_for_type(region_code, num_type) 1804 if example_numobj is not None: 1805 return example_numobj 1806 # If there wasn't an example number for a region, try the non-geographical entities. 1807 for country_calling_code in COUNTRY_CODES_FOR_NON_GEO_REGIONS: 1808 metadata = PhoneMetadata.metadata_for_nongeo_region(country_calling_code, None) 1809 desc = _number_desc_by_type(metadata, num_type) 1810 if desc is not None and desc.example_number is not None: 1811 try: 1812 return parse(_PLUS_SIGN + unicod(country_calling_code) + desc.example_number, UNKNOWN_REGION) 1813 except NumberParseException: # pragma no cover 1814 pass 1815 1816 # There are no example numbers of this type for any country in the library. 1817 return None # pragma no cover 1818 1819 1820def example_number_for_non_geo_entity(country_calling_code): 1821 """Gets a valid number for the specified country calling code for a non-geographical entity. 1822 1823 Arguments: 1824 country_calling_code -- The country calling code for a non-geographical entity. 1825 1826 Returns a valid number for the non-geographical entity. Returns None when 1827 the metadata does not contain such information, or the country calling 1828 code passed in does not belong to a non-geographical entity. 1829 """ 1830 metadata = PhoneMetadata.metadata_for_nongeo_region(country_calling_code, None) 1831 if metadata is not None: 1832 # For geographical entities, fixed-line data is always present. However, for non-geographical 1833 # entities, this is not the case, so we have to go through different types to find the 1834 # example number. We don't check fixed-line or personal number since they aren't used by 1835 # non-geographical entities (if this changes, a unit-test will catch this.) 1836 for desc in (metadata.mobile, metadata.toll_free, metadata.shared_cost, metadata.voip, 1837 metadata.voicemail, metadata.uan, metadata.premium_rate): 1838 try: 1839 if (desc is not None and desc.example_number is not None): 1840 return parse(_PLUS_SIGN + unicod(country_calling_code) + desc.example_number, UNKNOWN_REGION) 1841 except NumberParseException: 1842 pass 1843 return None 1844 1845 1846def _maybe_append_formatted_extension(numobj, metadata, num_format, number): 1847 """Appends the formatted extension of a phone number to formatted number, 1848 if the phone number had an extension specified. 1849 """ 1850 if numobj.extension: 1851 if num_format == PhoneNumberFormat.RFC3966: 1852 return number + _RFC3966_EXTN_PREFIX + numobj.extension 1853 else: 1854 if metadata.preferred_extn_prefix is not None: 1855 return number + metadata.preferred_extn_prefix + numobj.extension 1856 else: 1857 return number + _DEFAULT_EXTN_PREFIX + numobj.extension 1858 return number 1859 1860 1861def _number_desc_by_type(metadata, num_type): 1862 """Return the PhoneNumberDesc of the metadata for the given number type""" 1863 if num_type == PhoneNumberType.PREMIUM_RATE: 1864 return metadata.premium_rate 1865 elif num_type == PhoneNumberType.TOLL_FREE: 1866 return metadata.toll_free 1867 elif num_type == PhoneNumberType.MOBILE: 1868 return metadata.mobile 1869 elif (num_type == PhoneNumberType.FIXED_LINE or 1870 num_type == PhoneNumberType.FIXED_LINE_OR_MOBILE): 1871 return metadata.fixed_line 1872 elif num_type == PhoneNumberType.SHARED_COST: 1873 return metadata.shared_cost 1874 elif num_type == PhoneNumberType.VOIP: 1875 return metadata.voip 1876 elif num_type == PhoneNumberType.PERSONAL_NUMBER: 1877 return metadata.personal_number 1878 elif num_type == PhoneNumberType.PAGER: 1879 return metadata.pager 1880 elif num_type == PhoneNumberType.UAN: 1881 return metadata.uan 1882 elif num_type == PhoneNumberType.VOICEMAIL: 1883 return metadata.voicemail 1884 else: 1885 return metadata.general_desc 1886 1887 1888def number_type(numobj): 1889 """Gets the type of a phone number. 1890 1891 Arguments: 1892 numobj -- The PhoneNumber object that we want to know the type of. 1893 1894 Returns the type of the phone number, as a PhoneNumberType value. 1895 """ 1896 region_code = region_code_for_number(numobj) 1897 metadata = PhoneMetadata.metadata_for_region_or_calling_code(numobj.country_code, region_code) 1898 if metadata is None: 1899 return PhoneNumberType.UNKNOWN 1900 national_number = national_significant_number(numobj) 1901 return _number_type_helper(national_number, metadata) 1902 1903 1904def _number_type_helper(national_number, metadata): 1905 """Return the type of the given number against the metadata""" 1906 if not _is_number_matching_desc(national_number, metadata.general_desc): 1907 return PhoneNumberType.UNKNOWN 1908 if _is_number_matching_desc(national_number, metadata.premium_rate): 1909 return PhoneNumberType.PREMIUM_RATE 1910 if _is_number_matching_desc(national_number, metadata.toll_free): 1911 return PhoneNumberType.TOLL_FREE 1912 if _is_number_matching_desc(national_number, metadata.shared_cost): 1913 return PhoneNumberType.SHARED_COST 1914 if _is_number_matching_desc(national_number, metadata.voip): 1915 return PhoneNumberType.VOIP 1916 if _is_number_matching_desc(national_number, metadata.personal_number): 1917 return PhoneNumberType.PERSONAL_NUMBER 1918 if _is_number_matching_desc(national_number, metadata.pager): 1919 return PhoneNumberType.PAGER 1920 if _is_number_matching_desc(national_number, metadata.uan): 1921 return PhoneNumberType.UAN 1922 if _is_number_matching_desc(national_number, metadata.voicemail): 1923 return PhoneNumberType.VOICEMAIL 1924 1925 if _is_number_matching_desc(national_number, metadata.fixed_line): 1926 if metadata.same_mobile_and_fixed_line_pattern: 1927 return PhoneNumberType.FIXED_LINE_OR_MOBILE 1928 elif _is_number_matching_desc(national_number, metadata.mobile): 1929 return PhoneNumberType.FIXED_LINE_OR_MOBILE 1930 return PhoneNumberType.FIXED_LINE 1931 1932 # Otherwise, test to see if the number is mobile. Only do this if certain 1933 # that the patterns for mobile and fixed line aren't the same. 1934 if (not metadata.same_mobile_and_fixed_line_pattern and 1935 _is_number_matching_desc(national_number, metadata.mobile)): 1936 return PhoneNumberType.MOBILE 1937 return PhoneNumberType.UNKNOWN 1938 1939 1940def _is_number_matching_desc(national_number, number_desc): 1941 """Determine if the number matches the given PhoneNumberDesc""" 1942 # Check if any possible number lengths are present; if so, we use them to avoid checking the 1943 # validation pattern if they don't match. If they are absent, this means they match the general 1944 # description, which we have already checked before checking a specific number type. 1945 if number_desc is None: 1946 return False 1947 actual_length = len(national_number) 1948 possible_lengths = number_desc.possible_length 1949 if len(possible_lengths) > 0 and not actual_length in possible_lengths: 1950 return False 1951 national_re = re.compile(number_desc.national_number_pattern or U_EMPTY_STRING) 1952 return fullmatch(national_re, national_number) 1953 1954 1955def is_valid_number(numobj): 1956 """Tests whether a phone number matches a valid pattern. 1957 1958 Note this doesn't verify the number is actually in use, which is 1959 impossible to tell by just looking at a number itself. It only verifies 1960 whether the parsed, canonicalised number is valid: not whether a 1961 particular series of digits entered by the user is diallable from the 1962 region provided when parsing. For example, the number +41 (0) 78 927 2696 1963 can be parsed into a number with country code "41" and national 1964 significant number "789272696". This is valid, while the original string 1965 is not diallable. 1966 1967 Arguments: 1968 numobj -- The phone number object that we want to validate 1969 1970 Returns a boolean that indicates whether the number is of a valid pattern. 1971 """ 1972 region_code = region_code_for_number(numobj) 1973 return is_valid_number_for_region(numobj, region_code) 1974 1975 1976def is_valid_number_for_region(numobj, region_code): 1977 """Tests whether a phone number is valid for a certain region. 1978 1979 Note this doesn't verify the number is actually in use, which is 1980 impossible to tell by just looking at a number itself. If the country 1981 calling code is not the same as the country calling code for the region, 1982 this immediately exits with false. After this, the specific number pattern 1983 rules for the region are examined. This is useful for determining for 1984 example whether a particular number is valid for Canada, rather than just 1985 a valid NANPA number. 1986 1987 Warning: In most cases, you want to use is_valid_number instead. For 1988 example, this method will mark numbers from British Crown dependencies 1989 such as the Isle of Man as invalid for the region "GB" (United Kingdom), 1990 since it has its own region code, "IM", which may be undesirable. 1991 1992 Arguments: 1993 numobj -- The phone number object that we want to validate. 1994 region_code -- The region that we want to validate the phone number for. 1995 1996 Returns a boolean that indicates whether the number is of a valid pattern. 1997 """ 1998 country_code = numobj.country_code 1999 if region_code is None: 2000 return False 2001 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code.upper()) 2002 if (metadata is None or 2003 (region_code != REGION_CODE_FOR_NON_GEO_ENTITY and 2004 country_code != country_code_for_valid_region(region_code))): 2005 # Either the region code was invalid, or the country calling code for 2006 # this number does not match that of the region code. 2007 return False 2008 nsn = national_significant_number(numobj) 2009 return (_number_type_helper(nsn, metadata) != PhoneNumberType.UNKNOWN) 2010 2011 2012def region_code_for_number(numobj): 2013 """Returns the region where a phone number is from. 2014 2015 This could be used for geocoding at the region level. Only guarantees 2016 correct results for valid, full numbers (not short-codes, or invalid 2017 numbers). 2018 2019 Arguments: 2020 numobj -- The phone number object whose origin we want to know 2021 2022 Returns the region where the phone number is from, or None if no region 2023 matches this calling code. 2024 2025 """ 2026 country_code = numobj.country_code 2027 regions = COUNTRY_CODE_TO_REGION_CODE.get(country_code, None) 2028 if regions is None: 2029 return None 2030 2031 if len(regions) == 1: 2032 return regions[0] 2033 else: 2034 return _region_code_for_number_from_list(numobj, regions) 2035 2036 2037def _region_code_for_number_from_list(numobj, regions): 2038 """Find the region in a list that matches a number""" 2039 national_number = national_significant_number(numobj) 2040 for region_code in regions: 2041 # If leading_digits is present, use this. Otherwise, do full 2042 # validation. 2043 # Metadata cannot be None because the region codes come from 2044 # the country calling code map. 2045 metadata = PhoneMetadata.metadata_for_region(region_code.upper(), None) 2046 if metadata is None: 2047 continue 2048 if metadata.leading_digits is not None: 2049 leading_digit_re = re.compile(metadata.leading_digits) 2050 match = leading_digit_re.match(national_number) 2051 if match: 2052 return region_code 2053 elif _number_type_helper(national_number, metadata) != PhoneNumberType.UNKNOWN: 2054 return region_code 2055 return None 2056 2057 2058def region_code_for_country_code(country_code): 2059 """Returns the region code that matches a specific country calling code. 2060 2061 In the case of no region code being found, UNKNOWN_REGION ('ZZ') will be 2062 returned. In the case of multiple regions, the one designated in the 2063 metadata as the "main" region for this calling code will be returned. If 2064 the country_code entered is valid but doesn't match a specific region 2065 (such as in the case of non-geographical calling codes like 800) the value 2066 "001" will be returned (corresponding to the value for World in the UN 2067 M.49 schema). 2068 """ 2069 regions = COUNTRY_CODE_TO_REGION_CODE.get(country_code, None) 2070 if regions is None: 2071 return UNKNOWN_REGION 2072 else: 2073 return regions[0] 2074 2075 2076def region_codes_for_country_code(country_code): 2077 """Returns a list with the region codes that match the specific country calling code. 2078 2079 For non-geographical country calling codes, the region code 001 is 2080 returned. Also, in the case of no region code being found, an empty 2081 list is returned. 2082 """ 2083 regions = COUNTRY_CODE_TO_REGION_CODE.get(country_code, None) 2084 if regions is None: 2085 return () 2086 else: 2087 return regions 2088 2089 2090def country_code_for_region(region_code): 2091 """Returns the country calling code for a specific region. 2092 2093 For example, this would be 1 for the United States, and 64 for New 2094 Zealand. 2095 2096 Arguments: 2097 region_code -- The region that we want to get the country calling code for. 2098 2099 Returns the country calling code for the region denoted by region_code. 2100 """ 2101 if not _is_valid_region_code(region_code): 2102 return 0 2103 return country_code_for_valid_region(region_code) 2104 2105 2106def country_code_for_valid_region(region_code): 2107 """Returns the country calling code for a specific region. 2108 2109 For example, this would be 1 for the United States, and 64 for New 2110 Zealand. Assumes the region is already valid. 2111 2112 Arguments: 2113 region_code -- The region that we want to get the country calling code for. 2114 2115 Returns the country calling code for the region denoted by region_code. 2116 """ 2117 metadata = PhoneMetadata.metadata_for_region(region_code.upper(), None) 2118 if metadata is None: 2119 raise Exception("Invalid region code %s" % region_code) 2120 return metadata.country_code 2121 2122 2123def ndd_prefix_for_region(region_code, strip_non_digits): 2124 """Returns the national dialling prefix for a specific region. 2125 2126 For example, this would be 1 for the United States, and 0 for New 2127 Zealand. Set strip_non_digits to True to strip symbols like "~" (which 2128 indicates a wait for a dialling tone) from the prefix returned. If no 2129 national prefix is present, we return None. 2130 2131 Warning: Do not use this method for do-your-own formatting - for some 2132 regions, the national dialling prefix is used only for certain types of 2133 numbers. Use the library's formatting functions to prefix the national 2134 prefix when required. 2135 2136 Arguments: 2137 region_code -- The region that we want to get the dialling prefix for. 2138 strip_non_digits -- whether to strip non-digits from the national 2139 dialling prefix. 2140 2141 Returns the dialling prefix for the region denoted by region_code. 2142 """ 2143 if region_code is None: 2144 return None 2145 metadata = PhoneMetadata.metadata_for_region(region_code.upper(), None) 2146 if metadata is None: 2147 return None 2148 national_prefix = metadata.national_prefix 2149 if national_prefix is None or len(national_prefix) == 0: 2150 return None 2151 if strip_non_digits: 2152 # Note: if any other non-numeric symbols are ever used in national 2153 # prefixes, these would have to be removed here as well. 2154 national_prefix = re.sub(U_TILDE, U_EMPTY_STRING, national_prefix) 2155 return national_prefix 2156 2157 2158def is_nanpa_country(region_code): 2159 """Checks if this region is a NANPA region. 2160 2161 Returns True if region_code is one of the regions under the North American 2162 Numbering Plan Administration (NANPA). 2163 """ 2164 return region_code in _NANPA_REGIONS 2165 2166 2167def _is_leading_zero_possible(country_code): 2168 """Checks whether the country calling code is from a region whose national 2169 significant number could contain a leading zero. An example of such a 2170 region is Italy. Returns False if no metadata for the country is found.""" 2171 region_code = region_code_for_country_code(country_code) 2172 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code) 2173 if metadata is None: 2174 return False 2175 return metadata.leading_zero_possible 2176 2177 2178def is_alpha_number(number): 2179 """Checks if the number is a valid vanity (alpha) number such as 800 2180 MICROSOFT. A valid vanity number will start with at least 3 digits and 2181 will have three or more alpha characters. This does not do region-specific 2182 checks - to work out if this number is actually valid for a region, it 2183 should be parsed and methods such as is_possible_number_with_reason() and 2184 is_valid_number() should be used. 2185 2186 Arguments: 2187 number -- the number that needs to be checked 2188 2189 Returns True if the number is a valid vanity number 2190 """ 2191 if not _is_viable_phone_number(number): 2192 # Number is too short, or doesn't match the basic phone number pattern. 2193 return False 2194 extension, stripped_number = _maybe_strip_extension(number) 2195 return bool(fullmatch(_VALID_ALPHA_PHONE_PATTERN, stripped_number)) 2196 2197 2198def is_possible_number(numobj): 2199 """Convenience wrapper around is_possible_number_with_reason. 2200 2201 Instead of returning the reason for failure, this method returns true if 2202 the number is either a possible fully-qualified number (containing the area 2203 code and country code), or if the number could be a possible local number 2204 (with a country code, but missing an area code). Local numbers are 2205 considered possible if they could be possibly dialled in this format: if 2206 the area code is needed for a call to connect, the number is not considered 2207 possible without it. 2208 2209 Arguments: 2210 numobj -- the number object that needs to be checked 2211 2212 Returns True if the number is possible 2213 2214 """ 2215 result = is_possible_number_with_reason(numobj) 2216 return (result == ValidationResult.IS_POSSIBLE or 2217 result == ValidationResult.IS_POSSIBLE_LOCAL_ONLY) 2218 2219 2220def is_possible_number_for_type(numobj, numtype): 2221 """Convenience wrapper around is_possible_number_for_type_with_reason. 2222 2223 Instead of returning the reason for failure, this method returns true if 2224 the number is either a possible fully-qualified number (containing the area 2225 code and country code), or if the number could be a possible local number 2226 (with a country code, but missing an area code). Local numbers are 2227 considered possible if they could be possibly dialled in this format: if 2228 the area code is needed for a call to connect, the number is not considered 2229 possible without it. 2230 2231 Arguments: 2232 numobj -- the number object that needs to be checked 2233 numtype -- the type we are interested in 2234 2235 Returns True if the number is possible 2236 2237 """ 2238 result = is_possible_number_for_type_with_reason(numobj, numtype) 2239 return (result == ValidationResult.IS_POSSIBLE or 2240 result == ValidationResult.IS_POSSIBLE_LOCAL_ONLY) 2241 2242 2243def _test_number_length(national_number, metadata, numtype=PhoneNumberType.UNKNOWN): 2244 """Helper method to check a number against possible lengths for this number, 2245 and determine whether it matches, or is too short or too long. Currently, 2246 if a number pattern suggests that numbers of length 7 and 10 are possible, 2247 and a number in between these possible lengths is entered, such as of 2248 length 8, this will return TOO_LONG. 2249 """ 2250 desc_for_type = _number_desc_by_type(metadata, numtype) 2251 if desc_for_type is None: 2252 possible_lengths = metadata.general_desc.possible_length 2253 local_lengths = () 2254 else: 2255 # There should always be "possibleLengths" set for every element. This is declared in the XML 2256 # schema which is verified by PhoneNumberMetadataSchemaTest. 2257 # For size efficiency, where a sub-description (e.g. fixed-line) has the same possibleLengths 2258 # as the parent, this is missing, so we fall back to the general desc (where no numbers of the 2259 # type exist at all, there is one possible length (-1) which is guaranteed not to match the 2260 # length of any real phone number). 2261 possible_lengths = desc_for_type.possible_length 2262 if len(possible_lengths) == 0: # pragma no cover: Python sub-descs all have possible_length 2263 possible_lengths = metadata.general_desc.possible_length 2264 local_lengths = desc_for_type.possible_length_local_only 2265 2266 if numtype == PhoneNumberType.FIXED_LINE_OR_MOBILE: 2267 if not _desc_has_possible_number_data(_number_desc_by_type(metadata, PhoneNumberType.FIXED_LINE)): 2268 # The rare case has been encountered where no fixedLine data is available (true for some 2269 # non-geographical entities), so we just check mobile. 2270 return _test_number_length(national_number, metadata, PhoneNumberType.MOBILE) 2271 else: 2272 mobile_desc = _number_desc_by_type(metadata, PhoneNumberType.MOBILE) 2273 if _desc_has_possible_number_data(mobile_desc): 2274 # Merge the mobile data in if there was any. We have to make a copy to do this. 2275 possible_lengths = list(possible_lengths) 2276 # Note that when adding the possible lengths from mobile, we have to again check they 2277 # aren't empty since if they are this indicates they are the same as the general desc and 2278 # should be obtained from there. 2279 if len(mobile_desc.possible_length) == 0: # pragma no cover: Python sub-descs all have possible_length 2280 possible_lengths += metadata.general_desc.possible_length 2281 else: 2282 possible_lengths += mobile_desc.possible_length 2283 # The current list is sorted; we need to merge in the new list and re-sort (duplicates 2284 # are okay). Sorting isn't so expensive because the lists are very small. 2285 list.sort(possible_lengths) 2286 2287 if len(local_lengths) == 0: 2288 local_lengths = mobile_desc.possible_length_local_only 2289 else: 2290 local_lengths = list(local_lengths) 2291 local_lengths += mobile_desc.possible_length_local_only 2292 list.sort(local_lengths) 2293 2294 # If the type is not supported at all (indicated by a missing PhoneNumberDesc) we return invalid length. 2295 if desc_for_type is None: 2296 return ValidationResult.INVALID_LENGTH 2297 2298 actual_length = len(national_number) 2299 # This is safe because there is never an overlap beween the possible lengths and the local-only 2300 # lengths; this is checked at build time. 2301 if actual_length in local_lengths: 2302 return ValidationResult.IS_POSSIBLE_LOCAL_ONLY 2303 2304 minimum_length = possible_lengths[0] 2305 if minimum_length == actual_length: 2306 return ValidationResult.IS_POSSIBLE 2307 elif minimum_length > actual_length: 2308 return ValidationResult.TOO_SHORT 2309 elif possible_lengths[-1] < actual_length: 2310 return ValidationResult.TOO_LONG 2311 # We skip the first element; we've already checked it. 2312 if actual_length in possible_lengths[1:]: 2313 return ValidationResult.IS_POSSIBLE 2314 else: 2315 return ValidationResult.INVALID_LENGTH 2316 2317 2318def is_possible_number_with_reason(numobj): 2319 return is_possible_number_for_type_with_reason(numobj, PhoneNumberType.UNKNOWN) 2320 2321 2322def is_possible_number_for_type_with_reason(numobj, numtype): 2323 """Check whether a phone number is a possible number of a particular type. 2324 2325 For types that don't exist in a particular region, this will return a result 2326 that isn't so useful; it is recommended that you use 2327 supported_types_for_region or supported_types_for_non_geo_entity 2328 respectively before calling this method to determine whether you should call 2329 it for this number at all. 2330 2331 This provides a more lenient check than is_valid_number in the following sense: 2332 2333 - It only checks the length of phone numbers. In particular, it doesn't 2334 check starting digits of the number. 2335 2336 - For some numbers (particularly fixed-line), many regions have the 2337 concept of area code, which together with subscriber number constitute 2338 the national significant number. It is sometimes okay to dial only the 2339 subscriber number when dialing in the same area. This function will 2340 return IS_POSSIBLE_LOCAL_ONLY if the subscriber-number-only version is 2341 passed in. On the other hand, because is_valid_number validates using 2342 information on both starting digits (for fixed line numbers, that would 2343 most likely be area codes) and length (obviously includes the length of 2344 area codes for fixed line numbers), it will return false for the 2345 subscriber-number-only version. 2346 2347 Arguments: 2348 numobj -- The number object that needs to be checked 2349 numtype -- The type we are interested in 2350 2351 Returns a value from ValidationResult which indicates whether the number 2352 is possible 2353 """ 2354 national_number = national_significant_number(numobj) 2355 country_code = numobj.country_code 2356 # Note: For regions that share a country calling code, like NANPA numbers, 2357 # we just use the rules from the default region (US in this case) since the 2358 # region_code_for_number will not work if the number is possible but not 2359 # valid. There is in fact one country calling code (290) where the possible 2360 # number pattern differs between various regions (Saint Helena and Tristan 2361 # da Cuñha), but this is handled by putting all possible lengths for any 2362 # country with this country calling code in the metadata for the default 2363 # region in this case. 2364 if not _has_valid_country_calling_code(country_code): 2365 return ValidationResult.INVALID_COUNTRY_CODE 2366 region_code = region_code_for_country_code(country_code) 2367 # Metadata cannot be None because the country calling code is valid. 2368 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code) 2369 return _test_number_length(national_number, metadata, numtype) 2370 2371 2372def is_possible_number_string(number, region_dialing_from): 2373 """Check whether a phone number string is a possible number. 2374 2375 Takes a number in the form of a string, and the region where the number 2376 could be dialed from. It provides a more lenient check than 2377 is_valid_number; see is_possible_number_with_reason() for details. 2378 2379 This method first parses the number, then invokes is_possible_number with 2380 the resultant PhoneNumber object. 2381 2382 Arguments: 2383 number -- The number that needs to be checked, in the form of a string. 2384 region_dialling_from -- The region that we are expecting the number to be 2385 dialed from. Note this is different from the region where the 2386 number belongs. For example, the number +1 650 253 0000 is a 2387 number that belongs to US. When written in this form, it can be 2388 dialed from any region. When it is written as 00 1 650 253 0000, 2389 it can be dialed from any region which uses an international 2390 dialling prefix of 00. When it is written as 650 253 0000, it 2391 can only be dialed from within the US, and when written as 253 2392 0000, it can only be dialed from within a smaller area in the US 2393 (Mountain View, CA, to be more specific). 2394 2395 Returns True if the number is possible 2396 """ 2397 try: 2398 return is_possible_number(parse(number, region_dialing_from)) 2399 except NumberParseException: 2400 return False 2401 2402 2403def truncate_too_long_number(numobj): 2404 """Truncate a number object that is too long. 2405 2406 Attempts to extract a valid number from a phone number that is too long 2407 to be valid, and resets the PhoneNumber object passed in to that valid 2408 version. If no valid number could be extracted, the PhoneNumber object 2409 passed in will not be modified. 2410 2411 Arguments: 2412 numobj -- A PhoneNumber object which contains a number that is too long to 2413 be valid. 2414 2415 Returns True if a valid phone number can be successfully extracted. 2416 """ 2417 if is_valid_number(numobj): 2418 return True 2419 numobj_copy = PhoneNumber() 2420 numobj_copy.merge_from(numobj) 2421 national_number = numobj.national_number 2422 2423 while not is_valid_number(numobj_copy): 2424 # Strip a digit off the RHS 2425 national_number = national_number // 10 2426 numobj_copy.national_number = national_number 2427 validation_result = is_possible_number_with_reason(numobj_copy) 2428 if (validation_result == ValidationResult.TOO_SHORT or 2429 national_number == 0): 2430 return False 2431 # To reach here, numobj_copy is a valid number. Modify the original object 2432 numobj.national_number = national_number 2433 return True 2434 2435 2436def _extract_country_code(number): 2437 """Extracts country calling code from number. 2438 2439 Returns a 2-tuple of (country_calling_code, rest_of_number). It assumes 2440 that the leading plus sign or IDD has already been removed. Returns (0, 2441 number) if number doesn't start with a valid country calling code. 2442 """ 2443 2444 if len(number) == 0 or number[0] == U_ZERO: 2445 # Country codes do not begin with a '0'. 2446 return (0, number) 2447 for ii in range(1, min(len(number), _MAX_LENGTH_COUNTRY_CODE) + 1): 2448 try: 2449 country_code = int(number[:ii]) 2450 if country_code in COUNTRY_CODE_TO_REGION_CODE: 2451 return (country_code, number[ii:]) 2452 except Exception: 2453 pass 2454 return (0, number) 2455 2456 2457def _maybe_extract_country_code(number, metadata, keep_raw_input, numobj): 2458 """Tries to extract a country calling code from a number. 2459 2460 This method will return zero if no country calling code is considered to 2461 be present. Country calling codes are extracted in the following ways: 2462 2463 - by stripping the international dialing prefix of the region the person 2464 is dialing from, if this is present in the number, and looking at the 2465 next digits 2466 2467 - by stripping the '+' sign if present and then looking at the next 2468 digits 2469 2470 - by comparing the start of the number and the country calling code of 2471 the default region. If the number is not considered possible for the 2472 numbering plan of the default region initially, but starts with the 2473 country calling code of this region, validation will be reattempted 2474 after stripping this country calling code. If this number is considered 2475 a possible number, then the first digits will be considered the country 2476 calling code and removed as such. 2477 2478 It will raise a NumberParseException if the number starts with a '+' but 2479 the country calling code supplied after this does not match that of any 2480 known region. 2481 2482 Arguments: 2483 number -- non-normalized telephone number that we wish to extract a 2484 country calling code from; may begin with '+' 2485 metadata -- metadata about the region this number may be from, or None 2486 keep_raw_input -- True if the country_code_source and 2487 preferred_carrier_code fields of numobj should be populated. 2488 numobj -- The PhoneNumber object where the country_code and 2489 country_code_source need to be populated. Note the country_code 2490 is always populated, whereas country_code_source is only 2491 populated when keep_raw_input is True. 2492 2493 Returns a 2-tuple containing: 2494 - the country calling code extracted or 0 if none could be extracted 2495 - a string holding the national significant number, in the case 2496 that a country calling code was extracted. If no country calling code 2497 was extracted, this will be empty. 2498 """ 2499 if len(number) == 0: 2500 return (0, U_EMPTY_STRING) 2501 full_number = number 2502 # Set the default prefix to be something that will never match. 2503 possible_country_idd_prefix = unicod("NonMatch") 2504 if metadata is not None and metadata.international_prefix is not None: 2505 possible_country_idd_prefix = metadata.international_prefix 2506 2507 country_code_source, full_number = _maybe_strip_i18n_prefix_and_normalize(full_number, 2508 possible_country_idd_prefix) 2509 if keep_raw_input: 2510 numobj.country_code_source = country_code_source 2511 2512 if country_code_source != CountryCodeSource.FROM_DEFAULT_COUNTRY: 2513 if len(full_number) <= _MIN_LENGTH_FOR_NSN: 2514 raise NumberParseException(NumberParseException.TOO_SHORT_AFTER_IDD, 2515 "Phone number had an IDD, but after this was not " + 2516 "long enough to be a viable phone number.") 2517 potential_country_code, rest_of_number = _extract_country_code(full_number) 2518 if potential_country_code != 0: 2519 numobj.country_code = potential_country_code 2520 return (potential_country_code, rest_of_number) 2521 2522 # If this fails, they must be using a strange country calling code 2523 # that we don't recognize, or that doesn't exist. 2524 raise NumberParseException(NumberParseException.INVALID_COUNTRY_CODE, 2525 "Country calling code supplied was not recognised.") 2526 elif metadata is not None: 2527 # Check to see if the number starts with the country calling code for 2528 # the default region. If so, we remove the country calling code, and 2529 # do some checks on the validity of the number before and after. 2530 default_country_code = metadata.country_code 2531 default_country_code_str = str(metadata.country_code) 2532 normalized_number = full_number 2533 if normalized_number.startswith(default_country_code_str): 2534 potential_national_number = full_number[len(default_country_code_str):] 2535 general_desc = metadata.general_desc 2536 valid_pattern = re.compile(general_desc.national_number_pattern or U_EMPTY_STRING) 2537 _, potential_national_number, _ = _maybe_strip_national_prefix_carrier_code(potential_national_number, 2538 metadata) 2539 2540 # If the number was not valid before but is valid now, or if it 2541 # was too long before, we consider the number with the country 2542 # calling code stripped to be a better result and keep that 2543 # instead. 2544 if ((fullmatch(valid_pattern, full_number) is None and 2545 fullmatch(valid_pattern, potential_national_number)) or 2546 (_test_number_length(full_number, metadata) == ValidationResult.TOO_LONG)): 2547 if keep_raw_input: 2548 numobj.country_code_source = CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN 2549 numobj.country_code = default_country_code 2550 return (default_country_code, potential_national_number) 2551 2552 # No country calling code present. 2553 numobj.country_code = 0 2554 return (0, U_EMPTY_STRING) 2555 2556 2557def _parse_prefix_as_idd(idd_pattern, number): 2558 """Strips the IDD from the start of the number if present. 2559 2560 Helper function used by _maybe_strip_i18n_prefix_and_normalize(). 2561 2562 Returns a 2-tuple: 2563 - Boolean indicating if IDD was stripped 2564 - Number with IDD stripped 2565 """ 2566 match = idd_pattern.match(number) 2567 if match: 2568 match_end = match.end() 2569 # Only strip this if the first digit after the match is not a 0, since 2570 # country calling codes cannot begin with 0. 2571 digit_match = _CAPTURING_DIGIT_PATTERN.search(number[match_end:]) 2572 if digit_match: 2573 normalized_group = normalize_digits_only(digit_match.group(1)) 2574 if normalized_group == U_ZERO: 2575 return (False, number) 2576 return (True, number[match_end:]) 2577 return (False, number) 2578 2579 2580def _maybe_strip_i18n_prefix_and_normalize(number, possible_idd_prefix): 2581 """Strips any international prefix (such as +, 00, 011) present in the 2582 number provided, normalizes the resulting number, and indicates if an 2583 international prefix was present. 2584 2585 Arguments: 2586 number -- The non-normalized telephone number that we wish to strip any international 2587 dialing prefix from. 2588 possible_idd_prefix -- The international direct dialing prefix from the region we 2589 think this number may be dialed in. 2590 2591 Returns a 2-tuple containing: 2592 - The corresponding CountryCodeSource if an international dialing prefix 2593 could be removed from the number, otherwise 2594 CountryCodeSource.FROM_DEFAULT_COUNTRY if the number did not seem to 2595 be in international format. 2596 - The number with the prefix stripped. 2597 """ 2598 if len(number) == 0: 2599 return (CountryCodeSource.FROM_DEFAULT_COUNTRY, number) 2600 # Check to see if the number begins with one or more plus signs. 2601 m = _PLUS_CHARS_PATTERN.match(number) 2602 if m: 2603 number = number[m.end():] 2604 # Can now normalize the rest of the number since we've consumed the 2605 # "+" sign at the start. 2606 return (CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN, 2607 _normalize(number)) 2608 2609 # Attempt to parse the first digits as an international prefix. 2610 idd_pattern = re.compile(possible_idd_prefix) 2611 number = _normalize(number) 2612 stripped, number = _parse_prefix_as_idd(idd_pattern, number) 2613 if stripped: 2614 return (CountryCodeSource.FROM_NUMBER_WITH_IDD, number) 2615 else: 2616 return (CountryCodeSource.FROM_DEFAULT_COUNTRY, number) 2617 2618 2619def _maybe_strip_national_prefix_carrier_code(number, metadata): 2620 """Strips any national prefix (such as 0, 1) present in a number. 2621 2622 Arguments: 2623 number -- The normalized telephone number that we wish to strip any 2624 national dialing prefix from 2625 metadata -- The metadata for the region that we think this number 2626 is from. 2627 2628 Returns a 3-tuple of 2629 - The carrier code extracted if it is present, otherwise an empty string. 2630 - The number with the prefix stripped. 2631 - Boolean indicating if a national prefix or carrier code (or both) could be extracted. 2632 """ 2633 carrier_code = U_EMPTY_STRING 2634 possible_national_prefix = metadata.national_prefix_for_parsing 2635 if (len(number) == 0 or 2636 possible_national_prefix is None or 2637 len(possible_national_prefix) == 0): 2638 # Early return for numbers of zero length. 2639 return (U_EMPTY_STRING, number, False) 2640 2641 # Attempt to parse the first digits as a national prefix. 2642 prefix_pattern = re.compile(possible_national_prefix) 2643 prefix_match = prefix_pattern.match(number) 2644 if prefix_match: 2645 national_number_pattern = re.compile(metadata.general_desc.national_number_pattern or U_EMPTY_STRING) 2646 # Check if the original number is viable. 2647 is_viable_original_number = fullmatch(national_number_pattern, number) 2648 # prefix_match.groups() == () implies nothing was captured by the 2649 # capturing groups in possible_national_prefix; therefore, no 2650 # transformation is necessary, and we just remove the national prefix. 2651 num_groups = len(prefix_match.groups()) 2652 transform_rule = metadata.national_prefix_transform_rule 2653 if (transform_rule is None or 2654 len(transform_rule) == 0 or 2655 prefix_match.groups()[num_groups - 1] is None): 2656 # If the original number was viable, and the resultant number is not, we return. 2657 # Check that the resultant number is viable. If not, return. 2658 national_number_match = fullmatch(national_number_pattern, 2659 number[prefix_match.end():]) 2660 if (is_viable_original_number and not national_number_match): 2661 return (U_EMPTY_STRING, number, False) 2662 2663 if (num_groups > 0 and 2664 prefix_match.groups(num_groups) is not None): 2665 carrier_code = prefix_match.group(1) 2666 return (carrier_code, number[prefix_match.end():], True) 2667 else: 2668 # Check that the resultant number is still viable. If not, 2669 # return. Check this by copying the number and making the 2670 # transformation on the copy first. 2671 transformed_number = re.sub(prefix_pattern, transform_rule, number, count=1) 2672 national_number_match = fullmatch(national_number_pattern, 2673 transformed_number) 2674 if (is_viable_original_number and not national_number_match): 2675 return ("", number, False) 2676 if num_groups > 1: 2677 carrier_code = prefix_match.group(1) 2678 return (carrier_code, transformed_number, True) 2679 else: 2680 return (carrier_code, number, False) 2681 2682 2683def _maybe_strip_extension(number): 2684 """Strip extension from the end of a number string. 2685 2686 Strips any extension (as in, the part of the number dialled after the 2687 call is connected, usually indicated with extn, ext, x or similar) from 2688 the end of the number, and returns it. 2689 2690 Arguments: 2691 number -- the non-normalized telephone number that we wish to strip the extension from. 2692 2693 Returns a 2-tuple of: 2694 - the phone extension (or "" or not present) 2695 - the number before the extension. 2696 """ 2697 match = _EXTN_PATTERN.search(number) 2698 # If we find a potential extension, and the number preceding this is a 2699 # viable number, we assume it is an extension. 2700 if match and _is_viable_phone_number(number[:match.start()]): 2701 # The numbers are captured into groups in the regular expression. 2702 for group in match.groups(): 2703 # We go through the capturing groups until we find one that 2704 # captured some digits. If none did, then we will return the empty 2705 # string. 2706 if group is not None: 2707 return (group, number[:match.start()]) 2708 return ("", number) 2709 2710 2711def _check_region_for_parsing(number, default_region): 2712 """Checks to see that the region code used is valid, or if it is not 2713 valid, that the number to parse starts with a + symbol so that we can 2714 attempt to infer the region from the number. Returns False if it cannot 2715 use the region provided and the region cannot be inferred. 2716 """ 2717 if not _is_valid_region_code(default_region): 2718 # If the number is None or empty, we can't infer the region. 2719 if number is None or len(number) == 0: 2720 return False 2721 match = _PLUS_CHARS_PATTERN.match(number) 2722 if match is None: 2723 return False 2724 return True 2725 2726 2727def _set_italian_leading_zeros_for_phone_number(national_number, numobj): 2728 """A helper function to set the values related to leading zeros in a 2729 PhoneNumber.""" 2730 if len(national_number) > 1 and national_number[0] == U_ZERO: 2731 numobj.italian_leading_zero = True 2732 number_of_leading_zeros = 1 2733 # Note that if the number is all "0"s, the last "0" is not counted as 2734 # a leading zero. 2735 while (number_of_leading_zeros < len(national_number) - 1 and 2736 national_number[number_of_leading_zeros] == U_ZERO): 2737 number_of_leading_zeros += 1 2738 if number_of_leading_zeros != 1: 2739 numobj.number_of_leading_zeros = number_of_leading_zeros 2740 2741 2742def parse(number, region=None, keep_raw_input=False, 2743 numobj=None, _check_region=True): 2744 """Parse a string and return a corresponding PhoneNumber object. 2745 2746 The method is quite lenient and looks for a number in the input text 2747 (raw input) and does not check whether the string is definitely only a 2748 phone number. To do this, it ignores punctuation and white-space, as 2749 well as any text before the number (e.g. a leading "Tel: ") and trims 2750 the non-number bits. It will accept a number in any format (E164, 2751 national, international etc), assuming it can be interpreted with the 2752 defaultRegion supplied. It also attempts to convert any alpha characters 2753 into digits if it thinks this is a vanity number of the type "1800 2754 MICROSOFT". 2755 2756 This method will throw a NumberParseException if the number is not 2757 considered to be a possible number. Note that validation of whether the 2758 number is actually a valid number for a particular region is not 2759 performed. This can be done separately with is_valid_number. 2760 2761 Note if any new field is added to this method that should always be filled 2762 in, even when keep_raw_input is False, it should also be handled in the 2763 _copy_core_fields_only() function. 2764 2765 Arguments: 2766 number -- The number that we are attempting to parse. This can 2767 contain formatting such as +, ( and -, as well as a phone 2768 number extension. It can also be provided in RFC3966 format. 2769 region -- The region that we are expecting the number to be from. This 2770 is only used if the number being parsed is not written in 2771 international format. The country_code for the number in 2772 this case would be stored as that of the default region 2773 supplied. If the number is guaranteed to start with a '+' 2774 followed by the country calling code, then None or 2775 UNKNOWN_REGION can be supplied. 2776 keep_raw_input -- Whether to populate the raw_input field of the 2777 PhoneNumber object with number (as well as the 2778 country_code_source field). 2779 numobj -- An optional existing PhoneNumber object to receive the 2780 parsing results 2781 _check_region -- Whether to check the supplied region parameter; 2782 should always be True for external callers. 2783 2784 Returns a PhoneNumber object filled with the parse number. 2785 2786 Raises: 2787 NumberParseException if the string is not considered to be a viable 2788 phone number (e.g. too few or too many digits) or if no default 2789 region was supplied and the number is not in international format 2790 (does not start with +). 2791 2792 """ 2793 if numobj is None: 2794 numobj = PhoneNumber() 2795 if number is None: 2796 raise NumberParseException(NumberParseException.NOT_A_NUMBER, 2797 "The phone number supplied was None.") 2798 elif len(number) > _MAX_INPUT_STRING_LENGTH: 2799 raise NumberParseException(NumberParseException.TOO_LONG, 2800 "The string supplied was too long to parse.") 2801 2802 national_number = _build_national_number_for_parsing(number) 2803 2804 if not _is_viable_phone_number(national_number): 2805 raise NumberParseException(NumberParseException.NOT_A_NUMBER, 2806 "The string supplied did not seem to be a phone number.") 2807 2808 # Check the region supplied is valid, or that the extracted number starts 2809 # with some sort of + sign so the number's region can be determined. 2810 if _check_region and not _check_region_for_parsing(national_number, region): 2811 raise NumberParseException(NumberParseException.INVALID_COUNTRY_CODE, 2812 "Missing or invalid default region.") 2813 if keep_raw_input: 2814 numobj.raw_input = number 2815 2816 # Attempt to parse extension first, since it doesn't require 2817 # region-specific data and we want to have the non-normalised number here. 2818 extension, national_number = _maybe_strip_extension(national_number) 2819 if len(extension) > 0: 2820 numobj.extension = extension 2821 if region is None: 2822 metadata = None 2823 else: 2824 metadata = PhoneMetadata.metadata_for_region(region.upper(), None) 2825 2826 country_code = 0 2827 try: 2828 country_code, normalized_national_number = _maybe_extract_country_code(national_number, 2829 metadata, 2830 keep_raw_input, 2831 numobj) 2832 except NumberParseException: 2833 _, e, _ = sys.exc_info() 2834 matchobj = _PLUS_CHARS_PATTERN.match(national_number) 2835 if (e.error_type == NumberParseException.INVALID_COUNTRY_CODE and 2836 matchobj is not None): 2837 # Strip the plus-char, and try again. 2838 country_code, normalized_national_number = _maybe_extract_country_code(national_number[matchobj.end():], 2839 metadata, 2840 keep_raw_input, 2841 numobj) 2842 if country_code == 0: 2843 raise NumberParseException(NumberParseException.INVALID_COUNTRY_CODE, 2844 "Could not interpret numbers after plus-sign.") 2845 else: 2846 raise 2847 2848 if country_code != 0: 2849 number_region = region_code_for_country_code(country_code) 2850 if number_region != region: 2851 # Metadata cannot be null because the country calling code is valid. 2852 metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, number_region) 2853 else: 2854 # If no extracted country calling code, use the region supplied 2855 # instead. The national number is just the normalized version of the 2856 # number we were given to parse. 2857 national_number = _normalize(national_number) 2858 normalized_national_number += national_number 2859 if region is not None: 2860 country_code = metadata.country_code 2861 numobj.country_code = country_code 2862 elif keep_raw_input: 2863 numobj.country_code_source = None 2864 2865 if len(normalized_national_number) < _MIN_LENGTH_FOR_NSN: 2866 raise NumberParseException(NumberParseException.TOO_SHORT_NSN, 2867 "The string supplied is too short to be a phone number.") 2868 if metadata is not None: 2869 potential_national_number = normalized_national_number 2870 carrier_code, potential_national_number, _ = _maybe_strip_national_prefix_carrier_code(potential_national_number, 2871 metadata) 2872 # We require that the NSN remaining after stripping the national 2873 # prefix and carrier code be long enough to be a possible length for 2874 # the region. Otherwise, we don't do the stripping, since the original 2875 # number could be a valid short number. 2876 if _test_number_length(potential_national_number, metadata) != ValidationResult.TOO_SHORT: 2877 normalized_national_number = potential_national_number 2878 if keep_raw_input and carrier_code is not None and len(carrier_code) > 0: 2879 numobj.preferred_domestic_carrier_code = carrier_code 2880 len_national_number = len(normalized_national_number) 2881 if len_national_number < _MIN_LENGTH_FOR_NSN: # pragma no cover 2882 # Check of _is_viable_phone_number() at the top of this function makes 2883 # this effectively unhittable. 2884 raise NumberParseException(NumberParseException.TOO_SHORT_NSN, 2885 "The string supplied is too short to be a phone number.") 2886 if len_national_number > _MAX_LENGTH_FOR_NSN: 2887 raise NumberParseException(NumberParseException.TOO_LONG, 2888 "The string supplied is too long to be a phone number.") 2889 _set_italian_leading_zeros_for_phone_number(normalized_national_number, numobj) 2890 numobj.national_number = to_long(normalized_national_number) 2891 return numobj 2892 2893 2894def _build_national_number_for_parsing(number): 2895 """Converts number to a form that we can parse and return it if it is 2896 written in RFC3966; otherwise extract a possible number out of it and return it.""" 2897 index_of_phone_context = number.find(_RFC3966_PHONE_CONTEXT) 2898 if index_of_phone_context > 0: 2899 phone_context_start = index_of_phone_context + len(_RFC3966_PHONE_CONTEXT) 2900 # If the phone context contains a phone number prefix, we need to 2901 # capture it, whereas domains will be ignored. 2902 if number[phone_context_start] == _PLUS_SIGN: 2903 # Additional parameters might follow the phone context. If so, we 2904 # will remove them here because the parameters after phone context 2905 # are not important for parsing the phone number. 2906 phone_context_end = number.find(U_SEMICOLON, phone_context_start) 2907 if phone_context_end > 0: 2908 national_number = number[phone_context_start:phone_context_end] 2909 else: 2910 national_number = number[phone_context_start:] 2911 else: 2912 national_number = U_EMPTY_STRING 2913 # Now append everything between the "tel:" prefix and the 2914 # phone-context. This should include the national number, an optional 2915 # extension or isdn-subaddress component. Note we also handle the case 2916 # when "tel:" is missing, as we have seen in some of the phone number 2917 # inputs. In that case we append everything from the beginning. 2918 index_of_rfc3996_prefix = number.find(_RFC3966_PREFIX) 2919 index_of_national_number = ((index_of_rfc3996_prefix + len(_RFC3966_PREFIX)) 2920 if (index_of_rfc3996_prefix >= 0) else 0) 2921 national_number += number[index_of_national_number:index_of_phone_context] 2922 else: 2923 # Extract a possible number from the string passed in (this strips leading characters that 2924 # could not be the start of a phone number.) 2925 national_number = _extract_possible_number(number) 2926 2927 # Delete the isdn-subaddress and everything after it if it is 2928 # present. Note extension won't appear at the same time with 2929 # isdn-subaddress according to paragraph 5.3 of the RFC3966 spec, 2930 index_of_isdn = national_number.find(_RFC3966_ISDN_SUBADDRESS) 2931 if index_of_isdn > 0: 2932 national_number = national_number[:index_of_isdn] 2933 # If both phone context and isdn-subaddress are absent but other 2934 # parameters are present, the parameters are left in national_number. This 2935 # is because we are concerned about deleting content from a potential 2936 # number string when there is no strong evidence that the number is 2937 # actually written in RFC3966. 2938 return national_number 2939 2940 2941def _copy_core_fields_only(inobj): 2942 """Returns a new phone number containing only the fields needed to uniquely 2943 identify a phone number, rather than any fields that capture the context in 2944 which the phone number was created. 2945 """ 2946 numobj = PhoneNumber() 2947 numobj.country_code = inobj.country_code 2948 numobj.national_number = inobj.national_number 2949 if inobj.extension is not None and len(inobj.extension) > 0: 2950 numobj.extension = inobj.extension 2951 if inobj.italian_leading_zero: 2952 numobj.italian_leading_zero = True 2953 # This field is only relevant if there are leading zeros at all. 2954 numobj.number_of_leading_zeros = inobj.number_of_leading_zeros 2955 if numobj.number_of_leading_zeros is None: 2956 # No number set is implicitly a count of 1; make it explicit. 2957 numobj.number_of_leading_zeros = 1 2958 return numobj 2959 2960 2961def _is_number_match_OO(numobj1_in, numobj2_in): 2962 """Takes two phone number objects and compares them for equality.""" 2963 # We only care about the fields that uniquely define a number, so we copy these across explicitly. 2964 numobj1 = _copy_core_fields_only(numobj1_in) 2965 numobj2 = _copy_core_fields_only(numobj2_in) 2966 2967 # Early exit if both had extensions and these are different. 2968 if (numobj1.extension is not None and 2969 numobj2.extension is not None and 2970 numobj1.extension != numobj2.extension): 2971 return MatchType.NO_MATCH 2972 2973 country_code1 = numobj1.country_code 2974 country_code2 = numobj2.country_code 2975 # Both had country_code specified. 2976 if country_code1 != 0 and country_code2 != 0: 2977 if numobj1 == numobj2: 2978 return MatchType.EXACT_MATCH 2979 elif (country_code1 == country_code2 and 2980 _is_national_number_suffix_of_other(numobj1, numobj2)): 2981 # A SHORT_NSN_MATCH occurs if there is a difference because of the 2982 # presence or absence of an 'Italian leading zero', the presence 2983 # or absence of an extension, or one NSN being a shorter variant 2984 # of the other. 2985 return MatchType.SHORT_NSN_MATCH 2986 # This is not a match. 2987 return MatchType.NO_MATCH 2988 2989 # Checks cases where one or both country_code fields were not 2990 # specified. To make equality checks easier, we first set the country_code 2991 # fields to be equal. 2992 numobj1.country_code = country_code2 2993 # If all else was the same, then this is an NSN_MATCH. 2994 if numobj1 == numobj2: 2995 return MatchType.NSN_MATCH 2996 if _is_national_number_suffix_of_other(numobj1, numobj2): 2997 return MatchType.SHORT_NSN_MATCH 2998 return MatchType.NO_MATCH 2999 3000 3001def _is_national_number_suffix_of_other(numobj1, numobj2): 3002 """Returns true when one national number is the suffix of the other or both 3003 are the same. 3004 """ 3005 nn1 = str(numobj1.national_number) 3006 nn2 = str(numobj2.national_number) 3007 # Note that endswith returns True if the numbers are equal. 3008 return nn1.endswith(nn2) or nn2.endswith(nn1) 3009 3010 3011def _is_number_match_SS(number1, number2): 3012 """Takes two phone numbers as strings and compares them for equality. 3013 3014 This is a convenience wrapper for _is_number_match_OO/_is_number_match_OS. 3015 No default region is known. 3016 """ 3017 try: 3018 numobj1 = parse(number1, UNKNOWN_REGION) 3019 return _is_number_match_OS(numobj1, number2) 3020 except NumberParseException: 3021 _, exc, _ = sys.exc_info() 3022 if exc.error_type == NumberParseException.INVALID_COUNTRY_CODE: 3023 try: 3024 numobj2 = parse(number2, UNKNOWN_REGION) 3025 return _is_number_match_OS(numobj2, number1) 3026 except NumberParseException: 3027 _, exc2, _ = sys.exc_info() 3028 if exc2.error_type == NumberParseException.INVALID_COUNTRY_CODE: 3029 try: 3030 numobj1 = parse(number1, None, keep_raw_input=False, 3031 _check_region=False, numobj=None) 3032 numobj2 = parse(number2, None, keep_raw_input=False, 3033 _check_region=False, numobj=None) 3034 return _is_number_match_OO(numobj1, numobj2) 3035 except NumberParseException: 3036 return MatchType.NOT_A_NUMBER 3037 3038 # One or more of the phone numbers we are trying to match is not a viable 3039 # phone number. 3040 return MatchType.NOT_A_NUMBER 3041 3042 3043def _is_number_match_OS(numobj1, number2): 3044 """Wrapper variant of _is_number_match_OO that copes with one 3045 PhoneNumber object and one string.""" 3046 # First see if the second number has an implicit country calling code, by 3047 # attempting to parse it. 3048 try: 3049 numobj2 = parse(number2, UNKNOWN_REGION) 3050 return _is_number_match_OO(numobj1, numobj2) 3051 except NumberParseException: 3052 _, exc, _ = sys.exc_info() 3053 if exc.error_type == NumberParseException.INVALID_COUNTRY_CODE: 3054 # The second number has no country calling code. EXACT_MATCH is no 3055 # longer possible. We parse it as if the region was the same as 3056 # that for the first number, and if EXACT_MATCH is returned, we 3057 # replace this with NSN_MATCH. 3058 region1 = region_code_for_country_code(numobj1.country_code) 3059 try: 3060 if region1 != UNKNOWN_REGION: 3061 numobj2 = parse(number2, region1) 3062 match = _is_number_match_OO(numobj1, numobj2) 3063 if match == MatchType.EXACT_MATCH: 3064 return MatchType.NSN_MATCH 3065 else: 3066 return match 3067 else: 3068 # If the first number didn't have a valid country calling 3069 # code, then we parse the second number without one as 3070 # well. 3071 numobj2 = parse(number2, None, keep_raw_input=False, 3072 _check_region=False, numobj=None) 3073 return _is_number_match_OO(numobj1, numobj2) 3074 except NumberParseException: 3075 return MatchType.NOT_A_NUMBER 3076 # One or more of the phone numbers we are trying to match is not a viable 3077 # phone number. 3078 return MatchType.NOT_A_NUMBER 3079 3080 3081def is_number_match(num1, num2): 3082 """Takes two phone numbers and compares them for equality. 3083 3084 For example, the numbers +1 345 657 1234 and 657 1234 are a SHORT_NSN_MATCH. 3085 The numbers +1 345 657 1234 and 345 657 are a NO_MATCH. 3086 3087 Arguments 3088 num1 -- First number object or string to compare. Can contain formatting, 3089 and can have country calling code specified with + at the start. 3090 num2 -- Second number object or string to compare. Can contain formatting, 3091 and can have country calling code specified with + at the start. 3092 3093 Returns: 3094 - EXACT_MATCH if the country_code, NSN, presence of a leading zero for 3095 Italian numbers and any extension present are the same. 3096 - NSN_MATCH if either or both has no region specified, and the NSNs and 3097 extensions are the same. 3098 - SHORT_NSN_MATCH if either or both has no region specified, or the 3099 region specified is the same, and one NSN could be a shorter version of 3100 the other number. This includes the case where one has an extension 3101 specified, and the other does not. 3102 - NO_MATCH otherwise. 3103 """ 3104 if isinstance(num1, PhoneNumber) and isinstance(num2, PhoneNumber): 3105 return _is_number_match_OO(num1, num2) 3106 elif isinstance(num1, PhoneNumber): 3107 return _is_number_match_OS(num1, num2) 3108 elif isinstance(num2, PhoneNumber): 3109 return _is_number_match_OS(num2, num1) 3110 else: 3111 return _is_number_match_SS(num1, num2) 3112 3113 3114def _can_be_internationally_dialled(numobj): 3115 """Returns True if the number can only be dialled from outside the region, 3116 or unknown. 3117 3118 If the number can only be dialled from within the region 3119 as well, returns False. Does not check the number is a valid number. 3120 3121 TODO: Make this method public when we have enough metadata to make it 3122 worthwhile. 3123 3124 Arguments: 3125 numobj -- the phone number objectfor which we want to know whether it is 3126 diallable from outside the region. 3127 """ 3128 metadata = PhoneMetadata.metadata_for_region(region_code_for_number(numobj), None) 3129 if metadata is None: 3130 # Note numbers belonging to non-geographical entities (e.g. +800 3131 # numbers) are always internationally diallable, and will be caught 3132 # here. 3133 return True 3134 nsn = national_significant_number(numobj) 3135 return not _is_number_matching_desc(nsn, metadata.no_international_dialling) 3136 3137 3138def is_mobile_number_portable_region(region_code): 3139 """Returns true if the supplied region supports mobile number portability. 3140 Returns false for invalid, unknown or regions that don't support mobile 3141 number portability. 3142 3143 Arguments: 3144 region_code -- the region for which we want to know whether it supports mobile number 3145 portability or not. 3146 """ 3147 metadata = PhoneMetadata.metadata_for_region(region_code, None) 3148 if metadata is None: 3149 return False 3150 return metadata.mobile_number_portable_region 3151 3152 3153class NumberParseException(UnicodeMixin, Exception): 3154 """Exception when attempting to parse a putative phone number""" 3155 3156 # The reason a string could not be interpreted as a phone number. 3157 3158 # The country code supplied did not belong to a supported country or 3159 # non-geographical entity. 3160 INVALID_COUNTRY_CODE = 0 3161 3162 # This generally indicates the string passed in had fewer than 3 digits in 3163 # it. The number failed to match the regular expression 3164 # _VALID_PHONE_NUMBER in phonenumberutil.py. 3165 NOT_A_NUMBER = 1 3166 3167 # This indicates the string started with an international dialing prefix, 3168 # but after this was removed, it had fewer digits than any valid phone 3169 # number (including country code) could have. 3170 TOO_SHORT_AFTER_IDD = 2 3171 3172 # This indicates the string, after any country code has been stripped, 3173 # had fewer digits than any valid phone number could have. 3174 TOO_SHORT_NSN = 3 3175 3176 # This indicates the string had more digits than any valid phone number 3177 # could have 3178 TOO_LONG = 4 3179 3180 def __init__(self, error_type, msg): 3181 Exception.__init__(self, msg) 3182 self.error_type = error_type 3183 self._msg = msg 3184 3185 def __unicode__(self): 3186 return unicod("(%s) %s") % (self.error_type, self._msg) 3187