1# -*- coding: utf-8 -*-
2"""Python phone number parsing and formatting library
3
4If you use this library, and want to be notified about important changes,
5please sign up to the libphonenumber mailing list at
6http://groups.google.com/group/libphonenumber-discuss/about.
7
8NOTE: A lot of methods in this module require Region Code strings. These must
9be provided using CLDR two-letter region-code format. These should be in
10upper-case. The list of the codes can be found here:
11http://www.iso.org/iso/country_codes/iso_3166_code_lists/country_names_and_code_elements.htm
12
13author: Shaopeng Jia (original Java version)
14author: David Drysdale (Python version)
15"""
16# Based on original Java code:
17#     java/src/com/google/i18n/phonenumbers/PhoneNumberUtil.java
18#   Copyright (C) 2009-2011 The Libphonenumber Authors
19#
20# Licensed under the Apache License, Version 2.0 (the "License");
21# you may not use this file except in compliance with the License.
22# You may obtain a copy of the License at
23#
24# http://www.apache.org/licenses/LICENSE-2.0
25#
26# Unless required by applicable law or agreed to in writing, software
27# distributed under the License is distributed on an "AS IS" BASIS,
28# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
29# See the License for the specific language governing permissions and
30# limitations under the License.
31import sys
32import re
33
34from .re_util import fullmatch   # Extra regexp function; see README
35from .util import UnicodeMixin, u, unicod, prnt, to_long
36from .util import U_EMPTY_STRING, U_SPACE, U_DASH, U_TILDE, U_ZERO, U_SEMICOLON
37from .unicode_util import digit as unicode_digit
38
39# Data class definitions
40from .phonenumber import PhoneNumber, CountryCodeSource
41from .phonemetadata import NumberFormat, PhoneMetadata, REGION_CODE_FOR_NON_GEO_ENTITY
42
43# Import auto-generated data structures
44try:
45    from .data import _COUNTRY_CODE_TO_REGION_CODE
46    from .shortdata import _AVAILABLE_REGION_CODES as _AVAILABLE_SHORT_REGION_CODES
47except ImportError:  # pragma no cover
48    # Before the generated code exists, the data/ directory is empty.
49    # The generation process imports this module, creating a circular
50    # dependency.  The hack below works around this.
51    import os
52    import sys
53    if (os.path.basename(sys.argv[0]) == "buildmetadatafromxml.py" or
54        os.path.basename(sys.argv[0]) == "buildprefixdata.py"):
55        prnt("Failed to import generated data (but OK as during autogeneration)", file=sys.stderr)
56        _COUNTRY_CODE_TO_REGION_CODE = {1: ("US",)}
57        _AVAILABLE_SHORT_REGION_CODES = []
58    else:
59        raise
60
61# Set the master map from country code to region code.  The
62# extra level of indirection allows the unit test to replace
63# the map with test data.
64COUNTRY_CODE_TO_REGION_CODE = _COUNTRY_CODE_TO_REGION_CODE
65
66# Naming convention for phone number arguments and variables:
67#  - string arguments are named 'number'
68#  - PhoneNumber objects are named 'numobj'
69
70# Flags to use when compiling regular expressions for phone numbers.
71_REGEX_FLAGS = re.UNICODE | re.IGNORECASE
72# The minimum and maximum length of the national significant number.
73_MIN_LENGTH_FOR_NSN = 2
74# The ITU says the maximum length should be 15, but we have found longer
75# numbers in Germany.
76_MAX_LENGTH_FOR_NSN = 17
77# The maximum length of the country calling code.
78_MAX_LENGTH_COUNTRY_CODE = 3
79# We don't allow input strings for parsing to be longer than 250 chars. This
80# prevents malicious input from overflowing the regular-expression engine.
81_MAX_INPUT_STRING_LENGTH = 250
82# Region-code for the unknown region.
83UNKNOWN_REGION = u("ZZ")
84# The set of regions that share country calling code 1.
85_NANPA_COUNTRY_CODE = 1
86# The prefix that needs to be inserted in front of a Colombian landline number
87# when dialed from a mobile phone in Colombia.
88_COLOMBIA_MOBILE_TO_FIXED_LINE_PREFIX = unicod("3")
89# Map of country calling codes that use a mobile token before the area
90# code. One example of when this is relevant is when determining the length of
91# the national destination code, which should be the length of the area code
92# plus the length of the mobile token.
93_MOBILE_TOKEN_MAPPINGS = {52: u('1'), 54: u('9')}
94# Set of country codes that have geographically assigned mobile numbers (see
95# GEO_MOBILE_COUNTRIES below) which are not based on *area codes*. For example,
96# in China mobile numbers start with a carrier indicator, and beyond that are
97# geographically assigned: this carrier indicator is not considered to be an
98# area code.
99_GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES = frozenset((
100    86,))  # China
101# Set of country calling codes that have geographically assigned mobile
102# numbers. This may not be complete; we add calling codes case by case, as we
103# find geographical mobile numbers or hear from user reports.  Note that
104# countries like the US, where we can't distinguish between fixed-line or
105# mobile numbers, are not listed here, since we consider FIXED_LINE_OR_MOBILE
106# to be a possibly geographically-related type anyway (like FIXED_LINE).
107_GEO_MOBILE_COUNTRIES = _GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES | set((
108    52,  # Mexico
109    54,  # Argentina
110    55,  # Brazil
111    62))  # Indonesia: some prefixes only (fixed CMDA wireless)
112# The PLUS_SIGN signifies the international prefix.
113_PLUS_SIGN = u("+")
114_STAR_SIGN = u('*')
115_RFC3966_EXTN_PREFIX = u(";ext=")
116_RFC3966_PREFIX = u("tel:")
117_RFC3966_PHONE_CONTEXT = u(";phone-context=")
118_RFC3966_ISDN_SUBADDRESS = u(";isub=")
119
120# Simple ASCII digits map used to populate _ALPHA_PHONE_MAPPINGS and
121# _ALL_PLUS_NUMBER_GROUPING_SYMBOLS.
122_ASCII_DIGITS_MAP = {u("0"): u("0"), u("1"): u("1"),
123                     u("2"): u("2"), u("3"): u("3"),
124                     u("4"): u("4"), u("5"): u("5"),
125                     u("6"): u("6"), u("7"): u("7"),
126                     u("8"): u("8"), u("9"): u("9")}
127
128# Only upper-case variants of alpha characters are stored.
129_ALPHA_MAPPINGS = {u("A"): u("2"),
130                   u("B"): u("2"),
131                   u("C"): u("2"),
132                   u("D"): u("3"),
133                   u("E"): u("3"),
134                   u("F"): u("3"),
135                   u("G"): u("4"),
136                   u("H"): u("4"),
137                   u("I"): u("4"),
138                   u("J"): u("5"),
139                   u("K"): u("5"),
140                   u("L"): u("5"),
141                   u("M"): u("6"),
142                   u("N"): u("6"),
143                   u("O"): u("6"),
144                   u("P"): u("7"),
145                   u("Q"): u("7"),
146                   u("R"): u("7"),
147                   u("S"): u("7"),
148                   u("T"): u("8"),
149                   u("U"): u("8"),
150                   u("V"): u("8"),
151                   u("W"): u("9"),
152                   u("X"): u("9"),
153                   u("Y"): u("9"),
154                   u("Z"): u("9"), }
155# For performance reasons, amalgamate both into one map.
156_ALPHA_PHONE_MAPPINGS = dict(_ALPHA_MAPPINGS, **_ASCII_DIGITS_MAP)
157
158# A map that contains characters that are essential when dialling. That means
159# any of the characters in this map must not be removed from a number when
160# dialling, otherwise the call will not reach the intended destination.
161_DIALLABLE_CHAR_MAPPINGS = dict({_PLUS_SIGN: _PLUS_SIGN,
162                                 u('*'): u('*'),
163                                 u('#'): u('#')},
164                                **_ASCII_DIGITS_MAP)
165
166# Separate map of all symbols that we wish to retain when formatting alpha
167# numbers. This includes digits, ASCII letters and number grouping symbols
168# such as "-" and " ".
169_ALL_PLUS_NUMBER_GROUPING_SYMBOLS = dict({u("-"): u("-"),  # Add grouping symbols.
170                                          u("\uFF0D"): u("-"),
171                                          u("\u2010"): u("-"),
172                                          u("\u2011"): u("-"),
173                                          u("\u2012"): u("-"),
174                                          u("\u2013"): u("-"),
175                                          u("\u2014"): u("-"),
176                                          u("\u2015"): u("-"),
177                                          u("\u2212"): u("-"),
178                                          u("/"): u("/"),
179                                          u("\uFF0F"): u("/"),
180                                          u(" "): u(" "),
181                                          u("\u3000"): u(" "),
182                                          u("\u2060"): u(" "),
183                                          u("."): u("."),
184                                          u("\uFF0E"): u(".")},
185                                         # Put (lower letter -> upper letter) and
186                                         # (upper letter -> upper letter) mappings.
187                                         **dict([(_c.lower(), _c) for _c in _ALPHA_MAPPINGS.keys()] +
188                                                [(_c, _c) for _c in _ALPHA_MAPPINGS.keys()],
189                                                **_ASCII_DIGITS_MAP))
190
191# Pattern that makes it easy to distinguish whether a region has a unique
192# international dialing prefix or not. If a region has a unique international
193# prefix (e.g. 011 in USA), it will be represented as a string that contains a
194# sequence of ASCII digits. If there are multiple available international
195# prefixes in a region, they will be represented as a regex string that always
196# contains character(s) other than ASCII digits.  Note this regex also
197# includes tilde, which signals waiting for the tone.
198_UNIQUE_INTERNATIONAL_PREFIX = re.compile(u("[\\d]+(?:[~\u2053\u223C\uFF5E][\\d]+)?"))
199
200# Regular expression of acceptable punctuation found in phone numbers. This
201# excludes punctuation found as a leading character only.  This consists of
202# dash characters, white space characters, full stops, slashes, square
203# brackets, parentheses and tildes. It also includes the letter 'x' as that is
204# found as a placeholder for carrier information in some phone numbers. Full-width
205# variants are also present.
206_VALID_PUNCTUATION = (u("-x\u2010-\u2015\u2212\u30FC\uFF0D-\uFF0F ") +
207                      u("\u00A0\u00AD\u200B\u2060\u3000()\uFF08\uFF09\uFF3B\uFF3D.\\[\\]/~\u2053\u223C\uFF5E"))
208
209_DIGITS = unicod('\\d')  # Java "\\p{Nd}", so need "(?u)" or re.UNICODE wherever this is used
210# We accept alpha characters in phone numbers, ASCII only, upper and lower
211# case.
212_VALID_ALPHA = (U_EMPTY_STRING.join(_ALPHA_MAPPINGS.keys()) +
213                U_EMPTY_STRING.join([_k.lower() for _k in _ALPHA_MAPPINGS.keys()]))
214_PLUS_CHARS = u("+\uFF0B")
215_PLUS_CHARS_PATTERN = re.compile(u("[") + _PLUS_CHARS + u("]+"))
216_SEPARATOR_PATTERN = re.compile(u("[") + _VALID_PUNCTUATION + u("]+"))
217_CAPTURING_DIGIT_PATTERN = re.compile(u("(") + _DIGITS + u(")"), re.UNICODE)
218
219# Regular expression of acceptable characters that may start a phone number
220# for the purposes of parsing. This allows us to strip away meaningless
221# prefixes to phone numbers that may be mistakenly given to us. This consists
222# of digits, the plus symbol and arabic-indic digits. This does not contain
223# alpha characters, although they may be used later in the number. It also
224# does not include other punctuation, as this will be stripped later during
225# parsing and is of no information value when parsing a number.
226_VALID_START_CHAR = u("[") + _PLUS_CHARS + _DIGITS + u("]")
227_VALID_START_CHAR_PATTERN = re.compile(_VALID_START_CHAR, re.UNICODE)
228
229# Regular expression of characters typically used to start a second phone
230# number for the purposes of parsing. This allows us to strip off parts of the
231# number that are actually the start of another number, such as for: (530)
232# 583-6985 x302/x2303 -> the second extension here makes this actually two
233# phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove the
234# second extension so that the first number is parsed correctly.
235_SECOND_NUMBER_START = u("[\\\\/] *x")
236_SECOND_NUMBER_START_PATTERN = re.compile(_SECOND_NUMBER_START)
237
238# Regular expression of trailing characters that we want to remove. We remove
239# all characters that are not alpha or numerical characters. The hash
240# character is retained here, as it may signify the previous block was an
241# extension.
242#
243# The original Java regexp is:
244#   [[\\P{N}&&\\P{L}]&&[^#]]+$
245# which splits out as:
246#   [                      ]+$  : >=1 of the following chars at end of string
247#    [              ]&&[  ]     : intersection of these two sets of chars
248#    [      &&      ]           : intersection of these two sets of chars
249#     \\P{N}                    : characters without the "Number" Unicode property
250#              \\P{L}           : characters without the "Letter" Unicode property
251#                      [^#]     : character other than hash
252# which nets down to: >=1 non-Number, non-Letter, non-# characters at string end
253# In Python Unicode regexp mode '(?u)', the class '[^#\w]' will match anything
254# that is not # and is not alphanumeric and is not underscore.
255_UNWANTED_END_CHARS = u("(?u)(?:_|[^#\w])+$")
256_UNWANTED_END_CHAR_PATTERN = re.compile(_UNWANTED_END_CHARS)
257
258# We use this pattern to check if the phone number has at least three letters
259# in it - if so, then we treat it as a number where some phone-number digits
260# are represented by letters.
261_VALID_ALPHA_PHONE_PATTERN = re.compile(u("(?:.*?[A-Za-z]){3}.*"))
262
263# Regular expression of viable phone numbers. This is location
264# independent. Checks we have at least three leading digits, and only valid
265# punctuation, alpha characters and digits in the phone number. Does not
266# include extension data.  The symbol 'x' is allowed here as valid punctuation
267# since it is often used as a placeholder for carrier codes, for example in
268# Brazilian phone numbers. We also allow multiple "+" characters at the start.
269# Corresponds to the following:
270# [digits]{minLengthNsn}|
271# plus_sign*(([punctuation]|[star])*[digits]){3,}([punctuation]|[star]|[digits]|[alpha])*
272#
273# The first reg-ex is to allow short numbers (two digits long) to be parsed if
274# they are entered as "15" etc, but only if there is no punctuation in
275# them. The second expression restricts the number of digits to three or more,
276# but then allows them to be in international form, and to have
277# alpha-characters and punctuation.
278#
279# Note VALID_PUNCTUATION starts with a -, so must be the first in the range.
280_VALID_PHONE_NUMBER = (_DIGITS + (u("{%d}") % _MIN_LENGTH_FOR_NSN) + u("|") +
281                       u("[") + _PLUS_CHARS + u("]*(?:[") + _VALID_PUNCTUATION + _STAR_SIGN + u("]*") + _DIGITS + u("){3,}[") +
282                       _VALID_PUNCTUATION + _STAR_SIGN + _VALID_ALPHA + _DIGITS + u("]*"))
283
284# Default extension prefix to use when formatting. This will be put in front
285# of any extension component of the number, after the main national number is
286# formatted. For example, if you wish the default extension formatting to be
287# " extn: 3456", then you should specify " extn: " here as the default
288# extension prefix. This can be overridden by region-specific preferences.
289_DEFAULT_EXTN_PREFIX = u(" ext. ")
290
291# Pattern to capture digits used in an extension. Places a maximum length of
292# "7" for an extension.
293_CAPTURING_EXTN_DIGITS = u("(") + _DIGITS + u("{1,7})")
294
295# Regexp of all possible ways to write extensions, for use when parsing. This
296# will be run as a case-insensitive regexp match. Wide character versions are
297# also provided after each ASCII version.
298
299# One-character symbols that can be used to indicate an extension.
300_SINGLE_EXTN_SYMBOLS_FOR_MATCHING = u("x\uFF58#\uFF03~\uFF5E")
301# For parsing, we are slightly more lenient in our interpretation than for
302# matching. Here we allow "comma" and "semicolon" as a possible extension
303# indicator. When matching, these are hardly ever used to indicate this.
304_SINGLE_EXTN_SYMBOLS_FOR_PARSING = u(",;") + _SINGLE_EXTN_SYMBOLS_FOR_MATCHING
305
306
307def _create_extn_pattern(single_extn_symbols):
308    """Helper initialiser method to create the regular-expression pattern to
309    match extensions, allowing the one-char extension symbols provided by
310    single_extn_symbols."""
311    # There are three regular expressions here. The first covers RFC 3966
312    # format, where the extension is added using ";ext=". The second more
313    # generic one starts with optional white space and ends with an optional
314    # full stop (.), followed by zero or more spaces/tabs/commas and then the
315    # numbers themselves. The other one covers the special case of American
316    # numbers where the extension is written with a hash at the end, such as
317    # "- 503#".  Note that the only capturing groups should be around the
318    # digits that you want to capture as part of the extension, or else
319    # parsing will fail!  Canonical-equivalence doesn't seem to be an option
320    # with Android java, so we allow two options for representing the accented
321    # o - the character itself, and one in the unicode decomposed form with
322    # the combining acute accent.
323    return (_RFC3966_EXTN_PREFIX + _CAPTURING_EXTN_DIGITS + u("|") +
324            u("[ \u00A0\\t,]*(?:e?xt(?:ensi(?:o\u0301?|\u00F3))?n?|") +
325            u("\uFF45?\uFF58\uFF54\uFF4E?|") +
326            u("[") + single_extn_symbols + u("]|int|anexo|\uFF49\uFF4E\uFF54)") +
327            u("[:\\.\uFF0E]?[ \u00A0\\t,-]*") + _CAPTURING_EXTN_DIGITS + u("#?|") +
328            u("[- ]+(") + _DIGITS + u("{1,5})#"))
329
330_EXTN_PATTERNS_FOR_PARSING = _create_extn_pattern(_SINGLE_EXTN_SYMBOLS_FOR_PARSING)
331_EXTN_PATTERNS_FOR_MATCHING = _create_extn_pattern(_SINGLE_EXTN_SYMBOLS_FOR_MATCHING)
332
333# Regexp of all known extension prefixes used by different regions followed by
334# 1 or more valid digits, for use when parsing.
335_EXTN_PATTERN = re.compile(u("(?:") + _EXTN_PATTERNS_FOR_PARSING + u(")$"), _REGEX_FLAGS)
336
337# We append optionally the extension pattern to the end here, as a valid phone
338# number may have an extension prefix appended, followed by 1 or more digits.
339_VALID_PHONE_NUMBER_PATTERN = re.compile(_VALID_PHONE_NUMBER + u("(?:") + _EXTN_PATTERNS_FOR_PARSING + u(")?"), _REGEX_FLAGS)
340
341# We use a non-capturing group because Python's re.split() returns any capturing
342# groups interspersed with the other results (unlike Java's Pattern.split()).
343NON_DIGITS_PATTERN = re.compile(u("(?:\\D+)"))
344
345# The FIRST_GROUP_PATTERN was originally set to \1 but there are some
346# countries for which the first group is not used in the national pattern
347# (e.g. Argentina) so the \1 group does not match correctly.  Therefore, we
348# use \d, so that the first group actually used in the pattern will be
349# matched.
350_FIRST_GROUP_PATTERN = re.compile(u(r"(\\\d)"))
351_NP_PATTERN = re.compile(u("\\$NP"))
352_FG_PATTERN = re.compile(u("\\$FG"))
353_CC_PATTERN = re.compile(u("\\$CC"))
354
355# A pattern that is used to determine if the national prefix formatting rule
356# has the first group only, i.e., does not start with the national
357# prefix. Note that the pattern explicitly allows for unbalanced parentheses.
358_FIRST_GROUP_ONLY_PREFIX_PATTERN = re.compile("\\(?\\\\1\\)?")
359
360
361class PhoneNumberFormat(object):
362    """
363    Phone number format.
364
365    INTERNATIONAL and NATIONAL formats are consistent with the definition in
366    ITU-T Recommendation E123. For example, the number of the Google
367    Switzerland office will be written as "+41 44 668 1800" in INTERNATIONAL
368    format, and as "044 668 1800" in NATIONAL format.  E164 format is as per
369    INTERNATIONAL format but with no formatting applied, e.g. "+41446681800".
370    RFC3966 is as per INTERNATIONAL format, but with all spaces and other
371    separating symbols replaced with a hyphen, and with any phone number
372    extension appended with ";ext=". It also will have a prefix of "tel:"
373    added, e.g. "tel:+41-44-668-1800".
374
375    Note: If you are considering storing the number in a neutral format, you
376    are highly advised to use the PhoneNumber class.
377    """
378    E164 = 0
379    INTERNATIONAL = 1
380    NATIONAL = 2
381    RFC3966 = 3
382
383
384class PhoneNumberType(object):
385    """Type of phone numbers."""
386    FIXED_LINE = 0
387    MOBILE = 1
388    # In some regions (e.g. the USA), it is impossible to distinguish between
389    # fixed-line and mobile numbers by looking at the phone number itself.
390    FIXED_LINE_OR_MOBILE = 2
391    # Freephone lines
392    TOLL_FREE = 3
393    PREMIUM_RATE = 4
394    # The cost of this call is shared between the caller and the recipient,
395    # and is hence typically less than PREMIUM_RATE calls. See
396    # http://en.wikipedia.org/wiki/Shared_Cost_Service for more information.
397    SHARED_COST = 5
398    # Voice over IP numbers. This includes TSoIP (Telephony Service over IP).
399    VOIP = 6
400    # A personal number is associated with a particular person, and may be
401    # routed to either a MOBILE or FIXED_LINE number. Some more information
402    # can be found here: http://en.wikipedia.org/wiki/Personal_Numbers
403    PERSONAL_NUMBER = 7
404    PAGER = 8
405    # Used for "Universal Access Numbers" or "Company Numbers". They may be
406    # further routed to specific offices, but allow one number to be used for
407    # a company.
408    UAN = 9
409    # Used for "Voice Mail Access Numbers".
410    VOICEMAIL = 10
411    # A phone number is of type UNKNOWN when it does not fit any of the known
412    # patterns for a specific region.
413    UNKNOWN = 99
414
415    @classmethod
416    def values(cls):
417        return (PhoneNumberType.FIXED_LINE,
418                PhoneNumberType.MOBILE,
419                PhoneNumberType.FIXED_LINE_OR_MOBILE,
420                PhoneNumberType.TOLL_FREE,
421                PhoneNumberType.PREMIUM_RATE,
422                PhoneNumberType.SHARED_COST,
423                PhoneNumberType.VOIP,
424                PhoneNumberType.PERSONAL_NUMBER,
425                PhoneNumberType.PAGER,
426                PhoneNumberType.UAN,
427                PhoneNumberType.VOICEMAIL,
428                PhoneNumberType.UNKNOWN)
429
430
431class MatchType(object):
432    """Types of phone number matches."""
433    # Not a telephone number
434    NOT_A_NUMBER = 0
435    # None of the match types below apply
436    NO_MATCH = 1
437    # Returns SHORT_NSN_MATCH if either or both has no region specified, or
438    # the region specified is the same, and one NSN could be a shorter version
439    # of the other number. This includes the case where one has an extension
440    # specified, and the other does not.
441    SHORT_NSN_MATCH = 2
442    # Either or both has no region specified, and the NSNs and extensions are
443    # the same.
444    NSN_MATCH = 3
445    # The country_code, NSN, presence of a leading zero for Italian numbers
446    # and any extension present are the same.
447    EXACT_MATCH = 4
448
449
450class ValidationResult(object):
451    """Possible outcomes when testing if a PhoneNumber is a possible number."""
452    # The number length matches that of valid numbers for this region.
453    IS_POSSIBLE = 0
454    # The number length matches that of local numbers for this region only
455    # (i.e. numbers that may be able to be dialled within an area, but do not
456    # have all the information to be dialled from anywhere inside or outside
457    # the country).
458    IS_POSSIBLE_LOCAL_ONLY = 4
459    # The number has an invalid country calling code.
460    INVALID_COUNTRY_CODE = 1
461    # The number is shorter than all valid numbers for this region.
462    TOO_SHORT = 2
463    # The number is longer than the shortest valid numbers for this region,
464    # shorter than the longest valid numbers for this region, and does not
465    # itself have a number length that matches valid numbers for this region.
466    # This can also be returned in the case where
467    # is_possible_number_for_type_with_reason was called, and there are no
468    # numbers of this type at all for this region.
469    INVALID_LENGTH = 5
470    # The number is longer than all valid numbers for this region.
471    TOO_LONG = 3
472
473
474# Derived data structures
475SUPPORTED_REGIONS = set()
476COUNTRY_CODES_FOR_NON_GEO_REGIONS = set()
477_NANPA_REGIONS = set()
478SUPPORTED_SHORT_REGIONS = _AVAILABLE_SHORT_REGION_CODES
479
480
481def _regenerate_derived_data():
482    global SUPPORTED_REGIONS, COUNTRY_CODES_FOR_NON_GEO_REGIONS, _NANPA_REGIONS
483    SUPPORTED_REGIONS.clear()
484    COUNTRY_CODES_FOR_NON_GEO_REGIONS.clear()
485    for cc, region_codes in COUNTRY_CODE_TO_REGION_CODE.items():
486        if (len(region_codes) == 1 and region_codes[0] == REGION_CODE_FOR_NON_GEO_ENTITY):
487            COUNTRY_CODES_FOR_NON_GEO_REGIONS.add(cc)
488        else:
489            SUPPORTED_REGIONS.update(region_codes)
490    if REGION_CODE_FOR_NON_GEO_ENTITY in SUPPORTED_REGIONS:  # pragma no cover
491        SUPPORTED_REGIONS.remove(REGION_CODE_FOR_NON_GEO_ENTITY)
492    _NANPA_REGIONS.clear()
493    _NANPA_REGIONS.update(COUNTRY_CODE_TO_REGION_CODE[_NANPA_COUNTRY_CODE])
494
495
496_regenerate_derived_data()
497
498
499def _copy_number_format(other):
500    """Return a mutable copy of the given NumberFormat object"""
501    copy = NumberFormat(pattern=other.pattern,
502                        format=other.format,
503                        leading_digits_pattern=list(other.leading_digits_pattern),
504                        national_prefix_formatting_rule=other.national_prefix_formatting_rule,
505                        national_prefix_optional_when_formatting=other.national_prefix_optional_when_formatting,
506                        domestic_carrier_code_formatting_rule=other.domestic_carrier_code_formatting_rule)
507    copy._mutable = True
508    return copy
509
510
511def _extract_possible_number(number):
512    """Attempt to extract a possible number from the string passed in.
513
514    This currently strips all leading characters that cannot be used to
515    start a phone number. Characters that can be used to start a phone number
516    are defined in the VALID_START_CHAR_PATTERN. If none of these characters
517    are found in the number passed in, an empty string is returned. This
518    function also attempts to strip off any alternative extensions or endings
519    if two or more are present, such as in the case of: (530) 583-6985
520    x302/x2303. The second extension here makes this actually two phone
521    numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove the
522    second extension so that the first number is parsed correctly.
523
524    Arguments:
525    number -- The string that might contain a phone number.
526
527    Returns the number, stripped of any non-phone-number prefix (such
528    as "Tel:") or an empty string if no character used to start phone
529    numbers (such as + or any digit) is found in the number
530    """
531    match = _VALID_START_CHAR_PATTERN.search(number)
532    if match:
533        number = number[match.start():]
534        # Remove trailing non-alpha non-numberical characters.
535        trailing_chars_match = _UNWANTED_END_CHAR_PATTERN.search(number)
536        if trailing_chars_match:
537            number = number[:trailing_chars_match.start()]
538        # Check for extra numbers at the end.
539        second_number_match = _SECOND_NUMBER_START_PATTERN.search(number)
540        if second_number_match:
541            number = number[:second_number_match.start()]
542        return number
543    else:
544        return U_EMPTY_STRING
545
546
547def _is_viable_phone_number(number):
548    """Checks to see if a string could possibly be a phone number.
549
550    At the moment, checks to see that the string begins with at least 2
551    digits, ignoring any punctuation commonly found in phone numbers.  This
552    method does not require the number to be normalized in advance - but does
553    assume that leading non-number symbols have been removed, such as by the
554    method _extract_possible_number.
555
556    Arguments:
557    number -- string to be checked for viability as a phone number
558
559    Returns True if the number could be a phone number of some sort, otherwise
560    False
561    """
562    if len(number) < _MIN_LENGTH_FOR_NSN:
563        return False
564    match = fullmatch(_VALID_PHONE_NUMBER_PATTERN, number)
565    return bool(match)
566
567
568def _normalize(number):
569    """Normalizes a string of characters representing a phone number.
570
571    This performs the following conversions:
572     - Punctuation is stripped.
573     - For ALPHA/VANITY numbers:
574        - Letters are converted to their numeric representation on a telephone
575          keypad. The keypad used here is the one defined in ITU
576          Recommendation E.161. This is only done if there are 3 or more
577          letters in the number, to lessen the risk that such letters are
578          typos - otherwise alpha characters are stripped.
579     - For other numbers:
580        - Wide-ascii digits are converted to normal ASCII (European) digits.
581        - Arabic-Indic numerals are converted to European numerals.
582        - Spurious alpha characters are stripped.
583
584    Arguments:
585    number -- string representing a phone number
586
587    Returns the normalized string version of the phone number.
588    """
589    m = fullmatch(_VALID_ALPHA_PHONE_PATTERN, number)
590    if m:
591        return _normalize_helper(number, _ALPHA_PHONE_MAPPINGS, True)
592    else:
593        return normalize_digits_only(number)
594
595
596def normalize_digits_only(number, keep_non_digits=False):
597    """Normalizes a string of characters representing a phone number.
598
599    This converts wide-ascii and arabic-indic numerals to European numerals,
600    and strips punctuation and alpha characters (optional).
601
602    Arguments:
603    number -- a string representing a phone number
604    keep_non_digits -- whether to keep non-digits
605
606    Returns the normalized string version of the phone number.
607    """
608    number = unicod(number)
609    number_length = len(number)
610    normalized_digits = U_EMPTY_STRING
611    for ii in range(number_length):
612        d = unicode_digit(number[ii], -1)
613        if d != -1:
614            normalized_digits += unicod(d)
615        elif keep_non_digits:
616            normalized_digits += number[ii]
617    return normalized_digits
618
619
620def normalize_diallable_chars_only(number):
621    """Normalizes a string of characters representing a phone number.
622
623    This strips all characters which are not diallable on a mobile phone
624    keypad (including all non-ASCII digits).
625
626    Arguments:
627    number -- a string of characters representing a phone number
628
629    Returns the normalized string version of the phone number.
630    """
631    return _normalize_helper(number, _DIALLABLE_CHAR_MAPPINGS, True)
632
633
634def convert_alpha_characters_in_number(number):
635    """Convert alpha chars in a number to their respective digits on a keypad,
636    but retains existing formatting."""
637    return _normalize_helper(number, _ALPHA_PHONE_MAPPINGS, False)
638
639
640def length_of_geographical_area_code(numobj):
641    """Return length of the geographical area code for a number.
642
643    Gets the length of the geographical area code from the PhoneNumber object
644    passed in, so that clients could use it to split a national significant
645    number into geographical area code and subscriber number. It works in such
646    a way that the resultant subscriber number should be diallable, at least
647    on some devices. An example of how this could be used:
648
649    >>> import phonenumbers
650    >>> numobj = phonenumbers.parse("16502530000", "US")
651    >>> nsn = phonenumbers.national_significant_number(numobj)
652    >>> ac_len = phonenumbers.length_of_geographical_area_code(numobj)
653    >>> if ac_len > 0:
654    ...     area_code = nsn[:ac_len]
655    ...     subscriber_number = nsn[ac_len:]
656    ... else:
657    ...     area_code = ""
658    ...     subscriber_number = nsn
659
660    N.B.: area code is a very ambiguous concept, so the I18N team generally
661    recommends against using it for most purposes, but recommends using the
662    more general national_number instead. Read the following carefully before
663    deciding to use this method:
664
665     - geographical area codes change over time, and this method honors those
666       changes; therefore, it doesn't guarantee the stability of the result it
667       produces.
668     - subscriber numbers may not be diallable from all devices (notably
669       mobile devices, which typically require the full national_number to be
670       dialled in most countries).
671     - most non-geographical numbers have no area codes, including numbers
672       from non-geographical entities.
673     - some geographical numbers have no area codes.
674
675    Arguments:
676    numobj -- The PhoneNumber object to find the length of the area code form.
677
678    Returns the length of area code of the PhoneNumber object passed in.
679    """
680    metadata = PhoneMetadata.metadata_for_region(region_code_for_number(numobj), None)
681    if metadata is None:
682        return 0
683
684    # If a country doesn't use a national prefix, and this number doesn't have
685    # an Italian leading zero, we assume it is a closed dialling plan with no
686    # area codes.
687    if metadata.national_prefix is None and not numobj.italian_leading_zero:
688        return 0
689
690    ntype = number_type(numobj)
691    country_code = numobj.country_code
692    if (ntype == PhoneNumberType.MOBILE and
693        (country_code in _GEO_MOBILE_COUNTRIES_WITHOUT_MOBILE_AREA_CODES)):
694        # Note this is a rough heuristic; it doesn't cover Indonesia well, for
695        # example, where area codes are present for some mobile phones but not
696        # for others. We have no better way of representing this in the
697        # metadata at this point.
698        return 0
699
700    if not is_number_type_geographical(ntype, country_code):
701        return 0
702
703    return length_of_national_destination_code(numobj)
704
705
706def length_of_national_destination_code(numobj):
707    """Return length of the national destination code code for a number.
708
709    Gets the length of the national destination code (NDC) from the
710    PhoneNumber object passed in, so that clients could use it to split a
711    national significant number into NDC and subscriber number. The NDC of a
712    phone number is normally the first group of digit(s) right after the
713    country calling code when the number is formatted in the international
714    format, if there is a subscriber number part that follows. An example of
715    how this could be used:
716
717    >>> import phonenumbers
718    >>> numobj = phonenumbers.parse("18002530000", "US")
719    >>> nsn = phonenumbers.national_significant_number(numobj)
720    >>> ndc_len = phonenumbers.length_of_national_destination_code(numobj)
721    >>> if ndc_len > 0:
722    ...     national_destination_code = nsn[:ndc_len]
723    ...     subscriber_number = nsn[ndc_len:]
724    ... else:
725    ...     national_destination_code = ""
726    ...     subscriber_number = nsn
727
728    Refer to the unittests to see the difference between this function and
729    length_of_geographical_area_code.
730
731    Arguments:
732    numobj -- The PhoneNumber object to find the length of the NDC from.
733
734    Returns the length of NDC of the PhoneNumber object passed in.
735    """
736    if numobj.extension is not None:
737        # We don't want to alter the object given to us, but we don't want to
738        # include the extension when we format it, so we copy it and clear the
739        # extension here.
740        copied_numobj = PhoneNumber()
741        copied_numobj.merge_from(numobj)
742        copied_numobj.extension = None
743    else:
744        copied_numobj = numobj
745
746    nsn = format_number(copied_numobj, PhoneNumberFormat.INTERNATIONAL)
747    number_groups = re.split(NON_DIGITS_PATTERN, nsn)
748
749    # The pattern will start with "+COUNTRY_CODE " so the first group will
750    # always be the empty string (before the + symbol) and the second group
751    # will be the country calling code. The third group will be area code if
752    # it is not the last group.
753    if len(number_groups) <= 3:
754        return 0
755
756    if number_type(numobj) == PhoneNumberType.MOBILE:
757        # For example Argentinian mobile numbers, when formatted in the
758        # international format, are in the form of +54 9 NDC XXXX... As a
759        # result, we take the length of the third group (NDC) and add the
760        # length of the second group (which is the mobile token), which also
761        # forms part of the national significant number.  This assumes that
762        # the mobile token is always formatted separately from the rest of the
763        # phone number.
764        mobile_token = country_mobile_token(numobj.country_code)
765        if mobile_token != U_EMPTY_STRING:
766            return len(number_groups[2]) + len(number_groups[3])
767    return len(number_groups[2])
768
769
770def country_mobile_token(country_code):
771    """Returns the mobile token for the provided country calling code if it has one, otherwise
772    returns an empty string. A mobile token is a number inserted before the area code when dialing
773    a mobile number from that country from abroad.
774
775    Arguments:
776    country_code -- the country calling code for which we want the mobile token
777    Returns the mobile token, as a string, for the given country calling code.
778    """
779    return _MOBILE_TOKEN_MAPPINGS.get(country_code, U_EMPTY_STRING)
780
781
782def _normalize_helper(number, replacements, remove_non_matches):
783    """Normalizes a string of characters representing a phone number by
784    replacing all characters found in the accompanying map with the values
785    therein, and stripping all other characters if remove_non_matches is true.
786
787    Arguments:
788    number -- a string representing a phone number
789    replacements -- a mapping of characters to what they should be replaced
790              by in the normalized version of the phone number
791    remove_non_matches -- indicates whether characters that are not able to be
792              replaced should be stripped from the number. If this is False,
793              they will be left unchanged in the number.
794
795    Returns the normalized string version of the phone number.
796    """
797    normalized_number = []
798    for char in number:
799        new_digit = replacements.get(char.upper(), None)
800        if new_digit is not None:
801            normalized_number.append(new_digit)
802        elif not remove_non_matches:
803            normalized_number.append(char)
804        # If neither of the above are true, we remove this character
805    return U_EMPTY_STRING.join(normalized_number)
806
807
808def _desc_has_possible_number_data(desc):
809    """Returns true if there is any possible number data set for a particular PhoneNumberDesc."""
810    # If this is empty, it means numbers of this type inherit from the "general desc" -> the value
811    # "-1" means that no numbers exist for this type.
812    if desc is None:
813        return False
814    return len(desc.possible_length) != 1 or desc.possible_length[0] != -1
815
816
817# Note: desc_has_data must account for any of MetadataFilter's excludableChildFields potentially
818# being absent from the metadata. It must check them all. For any changes in descHasData, ensure
819# that all the excludableChildFields are still being checked. If your change is safe simply
820# mention why during a review without needing to change MetadataFilter.
821def _desc_has_data(desc):
822    """Returns true if there is any data set for a particular PhoneNumberDesc."""
823    if desc is None:
824        return False
825    # Checking most properties since we don't know what's present, since a custom build may have
826    # stripped just one of them (e.g. liteBuild strips exampleNumber). We don't bother checking the
827    # possibleLengthsLocalOnly, since if this is the only thing that's present we don't really
828    # support the type at all: no type-specific methods will work with only this data.
829    return ((desc.example_number is not None) or
830            _desc_has_possible_number_data(desc) or
831            ((desc.national_number_pattern is not None) and (desc.national_number_pattern != "NA")))
832
833
834def _supported_types_for_metadata(metadata):
835    """Returns the types we have metadata for based on the PhoneMetadata object passed in, which must be non-None."""
836    numtypes = set()
837    for numtype in PhoneNumberType.values():
838        if numtype in (PhoneNumberType.FIXED_LINE_OR_MOBILE, PhoneNumberType.UNKNOWN):
839            # Never return FIXED_LINE_OR_MOBILE (it is a convenience type, and represents that a
840            # particular number type can't be determined) or UNKNOWN (the non-type).
841            continue
842        if _desc_has_data(_number_desc_by_type(metadata, numtype)):
843            numtypes.add(numtype)
844    return numtypes
845
846
847def supported_types_for_region(region_code):
848    """Returns the types for a given region which the library has metadata for.
849
850    Will not include FIXED_LINE_OR_MOBILE (if numbers in this region could
851    be classified as FIXED_LINE_OR_MOBILE, both FIXED_LINE and MOBILE would
852    be present) and UNKNOWN.
853
854    No types will be returned for invalid or unknown region codes.
855    """
856    if not _is_valid_region_code(region_code):
857        return set()
858    metadata = PhoneMetadata.metadata_for_region(region_code.upper())
859    return _supported_types_for_metadata(metadata)
860
861
862def supported_types_for_non_geo_entity(country_code):
863    """Returns the types for a country-code belonging to a non-geographical entity
864    which the library has metadata for. Will not include FIXED_LINE_OR_MOBILE
865    (if numbers for this non-geographical entity could be classified as
866    FIXED_LINE_OR_MOBILE, both FIXED_LINE and MOBILE would be present) and
867    UNKNOWN.
868
869    No types will be returned for country calling codes that do not map to a
870    known non-geographical entity.
871    """
872    metadata = PhoneMetadata.metadata_for_nongeo_region(country_code, None)
873    if metadata is None:
874        return set()
875    return _supported_types_for_metadata(metadata)
876
877
878def _formatting_rule_has_first_group_only(national_prefix_formatting_rule):
879    """Helper function to check if the national prefix formatting rule has the
880    first group only, i.e., does not start with the national prefix.
881    """
882    if national_prefix_formatting_rule is None:
883        return True
884    return bool(fullmatch(_FIRST_GROUP_ONLY_PREFIX_PATTERN,
885                          national_prefix_formatting_rule))
886
887
888def is_number_geographical(numobj):
889    """Tests whether a phone number has a geographical association.
890
891    It checks if the number is associated to a certain region in the country
892    where it belongs to. Note that this doesn't verify if the number is
893    actually in use.
894    country_code -- the country calling code for which we want the mobile token
895    """
896    return is_number_type_geographical(number_type(numobj), numobj.country_code)
897
898
899def is_number_type_geographical(num_type, country_code):
900    """Tests whether a phone number has a geographical association,
901    as represented by its type and the country it belongs to.
902
903    This version of isNumberGeographical exists since calculating the phone
904    number type is expensive; if we have already done this, we don't want to
905    do it again.
906    """
907    return (num_type == PhoneNumberType.FIXED_LINE or
908            num_type == PhoneNumberType.FIXED_LINE_OR_MOBILE or
909            ((country_code in _GEO_MOBILE_COUNTRIES) and
910             num_type == PhoneNumberType.MOBILE))
911
912
913def _is_valid_region_code(region_code):
914    """Helper function to check region code is not unknown or None"""
915    if region_code is None:
916        return False
917    return (region_code in SUPPORTED_REGIONS)
918
919
920def _has_valid_country_calling_code(country_calling_code):
921    return (country_calling_code in _COUNTRY_CODE_TO_REGION_CODE)
922
923
924def format_number(numobj, num_format):
925    """Formats a phone number in the specified format using default rules.
926
927    Note that this does not promise to produce a phone number that the user
928    can dial from where they are - although we do format in either 'national'
929    or 'international' format depending on what the client asks for, we do not
930    currently support a more abbreviated format, such as for users in the same
931    "area" who could potentially dial the number without area code. Note that
932    if the phone number has a country calling code of 0 or an otherwise
933    invalid country calling code, we cannot work out which formatting rules to
934    apply so we return the national significant number with no formatting
935    applied.
936
937    Arguments:
938    numobj -- The phone number to be formatted.
939    num_format -- The format the phone number should be formatted into
940
941    Returns the formatted phone number.
942    """
943    if numobj.national_number == 0 and numobj.raw_input is not None:
944        # Unparseable numbers that kept their raw input just use that.  This
945        # is the only case where a number can be formatted as E164 without a
946        # leading '+' symbol (but the original number wasn't parseable
947        # anyway).
948        # TODO: Consider removing the 'if' above so that unparseable strings
949        # without raw input format to the empty string instead of "+00".
950        if len(numobj.raw_input) > 0:
951            return numobj.raw_input
952    country_calling_code = numobj.country_code
953    nsn = national_significant_number(numobj)
954    if num_format == PhoneNumberFormat.E164:
955        # Early exit for E164 case (even if the country calling code is
956        # invalid) since no formatting of the national number needs to be
957        # applied.  Extensions are not formatted.
958        return _prefix_number_with_country_calling_code(country_calling_code, num_format, nsn)
959    if not _has_valid_country_calling_code(country_calling_code):
960        return nsn
961    # Note region_code_for_country_code() is used because formatting
962    # information for regions which share a country calling code is contained
963    # by only one region for performance reasons. For example, for NANPA
964    # regions it will be contained in the metadata for US.
965    region_code = region_code_for_country_code(country_calling_code)
966    # Metadata cannot be None because the country calling code is valid (which
967    # means that the region code cannot be ZZ and must be one of our supported
968    # region codes).
969    metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_calling_code, region_code.upper())
970    formatted_number = _format_nsn(nsn, metadata, num_format)
971    formatted_number = _maybe_append_formatted_extension(numobj,
972                                                         metadata,
973                                                         num_format,
974                                                         formatted_number)
975    return _prefix_number_with_country_calling_code(country_calling_code,
976                                                    num_format,
977                                                    formatted_number)
978
979
980def format_by_pattern(numobj, number_format, user_defined_formats):
981    """Formats a phone number using client-defined formatting rules."
982
983    Note that if the phone number has a country calling code of zero or an
984    otherwise invalid country calling code, we cannot work out things like
985    whether there should be a national prefix applied, or how to format
986    extensions, so we return the national significant number with no
987    formatting applied.
988
989    Arguments:
990    numobj -- The phone number to be formatted
991    num_format -- The format the phone number should be formatted into
992    user_defined_formats -- formatting rules specified by clients
993
994    Returns the formatted phone number.
995    """
996    country_code = numobj.country_code
997    nsn = national_significant_number(numobj)
998    if not _has_valid_country_calling_code(country_code):
999        return nsn
1000    # Note region_code_for_country_code() is used because formatting
1001    # information for regions which share a country calling code is contained
1002    # by only one region for performance reasons. For example, for NANPA
1003    # regions it will be contained in the metadata for US.
1004    region_code = region_code_for_country_code(country_code)
1005    # Metadata cannot be None because the country calling code is valid.
1006    metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code)
1007
1008    formatted_number = U_EMPTY_STRING
1009    formatting_pattern = _choose_formatting_pattern_for_number(user_defined_formats, nsn)
1010    if formatting_pattern is None:
1011        # If no pattern above is matched, we format the number as a whole.
1012        formatted_number = nsn
1013    else:
1014        num_format_copy = _copy_number_format(formatting_pattern)
1015        # Before we do a replacement of the national prefix pattern $NP with
1016        # the national prefix, we need to copy the rule so that subsequent
1017        # replacements for different numbers have the appropriate national
1018        # prefix.
1019        np_formatting_rule = formatting_pattern.national_prefix_formatting_rule
1020        if np_formatting_rule:
1021            national_prefix = metadata.national_prefix
1022            if national_prefix:
1023                # Replace $NP with national prefix and $FG with the first
1024                # group (\1) matcher.
1025                np_formatting_rule = re.sub(_NP_PATTERN,
1026                                            national_prefix,
1027                                            np_formatting_rule,
1028                                            count=1)
1029                np_formatting_rule = re.sub(_FG_PATTERN,
1030                                            unicod("\\\\1"),
1031                                            np_formatting_rule,
1032                                            count=1)
1033                num_format_copy.national_prefix_formatting_rule = np_formatting_rule
1034            else:
1035                # We don't want to have a rule for how to format the national
1036                # prefix if there isn't one.
1037                num_format_copy.national_prefix_formatting_rule = None
1038        formatted_number = _format_nsn_using_pattern(nsn, num_format_copy, number_format)
1039    formatted_number = _maybe_append_formatted_extension(numobj,
1040                                                         metadata,
1041                                                         number_format,
1042                                                         formatted_number)
1043    formatted_number = _prefix_number_with_country_calling_code(country_code,
1044                                                                number_format,
1045                                                                formatted_number)
1046    return formatted_number
1047
1048
1049def format_national_number_with_carrier_code(numobj, carrier_code):
1050    """Format a number in national format for dialing using the specified carrier.
1051
1052    The carrier-code will always be used regardless of whether the phone
1053    number already has a preferred domestic carrier code stored. If
1054    carrier_code contains an empty string, returns the number in national
1055    format without any carrier code.
1056
1057    Arguments:
1058    numobj -- The phone number to be formatted
1059    carrier_code -- The carrier selection code to be used
1060
1061    Returns the formatted phone number in national format for dialing using
1062    the carrier as specified in the carrier_code.
1063    """
1064    country_code = numobj.country_code
1065    nsn = national_significant_number(numobj)
1066    if not _has_valid_country_calling_code(country_code):
1067        return nsn
1068    # Note region_code_for_country_code() is used because formatting
1069    # information for regions which share a country calling code is contained
1070    # by only one region for performance reasons. For example, for NANPA
1071    # regions it will be contained in the metadata for US.
1072    region_code = region_code_for_country_code(country_code)
1073    # Metadata cannot be None because the country calling code is valid
1074    metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code)
1075    formatted_number = _format_nsn(nsn,
1076                                   metadata,
1077                                   PhoneNumberFormat.NATIONAL,
1078                                   carrier_code)
1079    formatted_number = _maybe_append_formatted_extension(numobj,
1080                                                         metadata,
1081                                                         PhoneNumberFormat.NATIONAL,
1082                                                         formatted_number)
1083    formatted_number = _prefix_number_with_country_calling_code(country_code,
1084                                                                PhoneNumberFormat.NATIONAL,
1085                                                                formatted_number)
1086    return formatted_number
1087
1088
1089def format_national_number_with_preferred_carrier_code(numobj, fallback_carrier_code):
1090    """Formats a phone number in national format for dialing using the carrier
1091    as specified in the preferred_domestic_carrier_code field of the
1092    PhoneNumber object passed in. If that is missing, use the
1093    fallback_carrier_code passed in instead. If there is no
1094    preferred_domestic_carrier_code, and the fallback_carrier_code contains an
1095    empty string, return the number in national format without any carrier
1096    code.
1097
1098    Use format_national_number_with_carrier_code instead if the carrier code
1099    passed in should take precedence over the number's
1100    preferred_domestic_carrier_code when formatting.
1101
1102    Arguments:
1103    numobj -- The phone number to be formatted
1104    carrier_code -- The carrier selection code to be used, if none is found in the
1105              phone number itself.
1106
1107    Returns the formatted phone number in national format for dialing using
1108    the number's preferred_domestic_carrier_code, or the fallback_carrier_code
1109    pass in if none is found.
1110    """
1111    # Historically, we set this to an empty string when parsing with raw input
1112    # if none was found in the input string. However, this doesn't result in a
1113    # number we can dial. For this reason, we treat the empty string the same
1114    # as if it isn't set at all.
1115    if (numobj.preferred_domestic_carrier_code is not None and
1116        len(numobj.preferred_domestic_carrier_code) > 0):
1117        carrier_code = numobj.preferred_domestic_carrier_code
1118    else:
1119        carrier_code = fallback_carrier_code
1120    return format_national_number_with_carrier_code(numobj, carrier_code)
1121
1122
1123def format_number_for_mobile_dialing(numobj, region_calling_from, with_formatting):
1124    """Returns a number formatted in such a way that it can be dialed from a
1125     mobile phone in a specific region.
1126
1127    If the number cannot be reached from the region (e.g. some countries block
1128    toll-free numbers from being called outside of the country), the method
1129    returns an empty string.
1130
1131    Arguments:
1132    numobj -- The phone number to be formatted
1133    region_calling_from -- The region where the call is being placed.
1134
1135    with_formatting -- whether the number should be returned with formatting
1136              symbols, such as spaces and dashes.
1137
1138    Returns the formatted phone number.
1139    """
1140    country_calling_code = numobj.country_code
1141    if not _has_valid_country_calling_code(country_calling_code):
1142        if numobj.raw_input is None:
1143            return U_EMPTY_STRING
1144        else:
1145            return numobj.raw_input
1146    formatted_number = U_EMPTY_STRING
1147    # Clear the extension, as that part cannot normally be dialed together with the main number.
1148    numobj_no_ext = PhoneNumber()
1149    numobj_no_ext.merge_from(numobj)
1150    numobj_no_ext.extension = None
1151    region_code = region_code_for_country_code(country_calling_code)
1152    numobj_type = number_type(numobj_no_ext)
1153    is_valid_number = (numobj_type != PhoneNumberType.UNKNOWN)
1154    if region_calling_from == region_code:
1155        is_fixed_line_or_mobile = ((numobj_type == PhoneNumberType.FIXED_LINE) or
1156                                   (numobj_type == PhoneNumberType.MOBILE) or
1157                                   (numobj_type == PhoneNumberType.FIXED_LINE_OR_MOBILE))
1158        # Carrier codes may be needed in some countries. We handle this here.
1159        if region_code == "CO" and numobj_type == PhoneNumberType.FIXED_LINE:
1160            formatted_number = format_national_number_with_carrier_code(numobj_no_ext,
1161                                                                        _COLOMBIA_MOBILE_TO_FIXED_LINE_PREFIX)
1162        elif region_code == "BR" and is_fixed_line_or_mobile:
1163            # Historically, we set this to an empty string when parsing with
1164            # raw input if none was found in the input string. However, this
1165            # doesn't result in a number we can dial. For this reason, we
1166            # treat the empty string the same as if it isn't set at all.
1167            if (numobj_no_ext.preferred_domestic_carrier_code is not None and
1168                len(numobj_no_ext.preferred_domestic_carrier_code) > 0):
1169                formatted_number = format_national_number_with_preferred_carrier_code(numobj_no_ext, "")
1170            else:
1171                # Brazilian fixed line and mobile numbers need to be dialed with a
1172                # carrier code when called within Brazil. Without that, most of
1173                # the carriers won't connect the call.  Because of that, we return
1174                # an empty string here.
1175                formatted_number = U_EMPTY_STRING
1176        elif is_valid_number and region_code == "HU":
1177            # The national format for HU numbers doesn't contain the national
1178            # prefix, because that is how numbers are normally written
1179            # down. However, the national prefix is obligatory when dialing
1180            # from a mobile phone, except for short numbers. As a result, we
1181            # add it back here if it is a valid regular length phone number.
1182            formatted_number = (ndd_prefix_for_region(region_code, True) +  # strip non-digits
1183                                U_SPACE + format_number(numobj_no_ext, PhoneNumberFormat.NATIONAL))
1184        elif country_calling_code == _NANPA_COUNTRY_CODE:
1185            # For NANPA countries, we output international format for numbers
1186            # that can be dialed internationally, since that always works,
1187            # except for numbers which might potentially be short numbers,
1188            # which are always dialled in national format.
1189            metadata = PhoneMetadata.metadata_for_region(region_calling_from)
1190            if (_can_be_internationally_dialled(numobj_no_ext) and
1191                _test_number_length(national_significant_number(numobj_no_ext),
1192                                    metadata) != ValidationResult.TOO_SHORT):
1193                formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.INTERNATIONAL)
1194            else:
1195                formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.NATIONAL)
1196        else:
1197            # For non-geographical countries, and Mexican and Chilean fixed
1198            # line and mobile numbers, we output international format for
1199            # numbers that can be dialed internationally as that always works.
1200            if ((region_code == REGION_CODE_FOR_NON_GEO_ENTITY or
1201                 ((region_code == unicod("MX") or region_code == unicod("CL")) and
1202                  is_fixed_line_or_mobile)) and
1203                _can_be_internationally_dialled(numobj_no_ext)):
1204                # MX fixed line and mobile numbers should always be formatted
1205                # in international format, even when dialed within MX. For
1206                # national format to work, a carrier code needs to be used,
1207                # and the correct carrier code depends on if the caller and
1208                # callee are from the same local area. It is trickier to get
1209                # that to work correctly than using international format,
1210                # which is tested to work fine on all carriers.
1211                formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.INTERNATIONAL)
1212            else:
1213                formatted_number = format_number(numobj_no_ext, PhoneNumberFormat.NATIONAL)
1214    elif is_valid_number and _can_be_internationally_dialled(numobj_no_ext):
1215        # We assume that short numbers are not diallable from outside their
1216        # region, so if a number is not a valid regular length phone number,
1217        # we treat it as if it cannot be internationally dialled.
1218        if with_formatting:
1219            return format_number(numobj_no_ext, PhoneNumberFormat.INTERNATIONAL)
1220        else:
1221            return format_number(numobj_no_ext, PhoneNumberFormat.E164)
1222
1223    if with_formatting:
1224        return formatted_number
1225    else:
1226        return normalize_diallable_chars_only(formatted_number)
1227
1228
1229def format_out_of_country_calling_number(numobj, region_calling_from):
1230    """Formats a phone number for out-of-country dialing purposes.
1231
1232    If no region_calling_from is supplied, we format the number in its
1233    INTERNATIONAL format. If the country calling code is the same as that of
1234    the region where the number is from, then NATIONAL formatting will be
1235    applied.
1236
1237    If the number itself has a country calling code of zero or an otherwise
1238    invalid country calling code, then we return the number with no formatting
1239    applied.
1240
1241    Note this function takes care of the case for calling inside of NANPA and
1242    between Russia and Kazakhstan (who share the same country calling
1243    code). In those cases, no international prefix is used. For regions which
1244    have multiple international prefixes, the number in its INTERNATIONAL
1245    format will be returned instead.
1246
1247    Arguments:
1248    numobj -- The phone number to be formatted
1249    region_calling_from -- The region where the call is being placed
1250
1251    Returns the formatted phone number
1252    """
1253    if not _is_valid_region_code(region_calling_from):
1254        return format_number(numobj, PhoneNumberFormat.INTERNATIONAL)
1255    country_code = numobj.country_code
1256    nsn = national_significant_number(numobj)
1257    if not _has_valid_country_calling_code(country_code):
1258        return nsn
1259    if country_code == _NANPA_COUNTRY_CODE:
1260        if is_nanpa_country(region_calling_from):
1261            # For NANPA regions, return the national format for these regions
1262            # but prefix it with the country calling code.
1263            return (unicod(country_code) + U_SPACE +
1264                    format_number(numobj, PhoneNumberFormat.NATIONAL))
1265    elif country_code == country_code_for_valid_region(region_calling_from):
1266        # If regions share a country calling code, the country calling code
1267        # need not be dialled.  This also applies when dialling within a
1268        # region, so this if clause covers both these cases.  Technically this
1269        # is the case for dialling from La Reunion to other overseas
1270        # departments of France (French Guiana, Martinique, Guadeloupe), but
1271        # not vice versa - so we don't cover this edge case for now and for
1272        # those cases return the version including country calling code.
1273        # Details here:
1274        # http://www.petitfute.com/voyage/225-info-pratiques-reunion
1275        return format_number(numobj, PhoneNumberFormat.NATIONAL)
1276
1277    # Metadata cannot be None because we checked '_is_valid_region_code()' above.
1278    metadata_for_region_calling_from = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_calling_from.upper())
1279    international_prefix = metadata_for_region_calling_from.international_prefix
1280
1281    # For regions that have multiple international prefixes, the international
1282    # format of the number is returned, unless there is a preferred
1283    # international prefix.
1284    i18n_prefix_for_formatting = U_EMPTY_STRING
1285    i18n_match = fullmatch(_UNIQUE_INTERNATIONAL_PREFIX, international_prefix)
1286    if i18n_match:
1287        i18n_prefix_for_formatting = international_prefix
1288    elif metadata_for_region_calling_from.preferred_international_prefix is not None:
1289        i18n_prefix_for_formatting = metadata_for_region_calling_from.preferred_international_prefix
1290
1291    region_code = region_code_for_country_code(country_code)
1292    # Metadata cannot be None because the country calling code is valid.
1293    metadata_for_region = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code.upper())
1294    formatted_national_number = _format_nsn(nsn,
1295                                            metadata_for_region,
1296                                            PhoneNumberFormat.INTERNATIONAL)
1297    formatted_number = _maybe_append_formatted_extension(numobj,
1298                                                         metadata_for_region,
1299                                                         PhoneNumberFormat.INTERNATIONAL,
1300                                                         formatted_national_number)
1301    if len(i18n_prefix_for_formatting) > 0:
1302        formatted_number = (i18n_prefix_for_formatting + U_SPACE +
1303                            unicod(country_code) + U_SPACE + formatted_number)
1304    else:
1305        formatted_number = _prefix_number_with_country_calling_code(country_code,
1306                                                                    PhoneNumberFormat.INTERNATIONAL,
1307                                                                    formatted_number)
1308    return formatted_number
1309
1310
1311def format_in_original_format(numobj, region_calling_from):
1312    """Format a number using the original format that the number was parsed from.
1313
1314    The original format is embedded in the country_code_source field of the
1315    PhoneNumber object passed in. If such information is missing, the number
1316    will be formatted into the NATIONAL format by default.
1317
1318    When the number contains a leading zero and this is unexpected for this
1319    country, or we don't have a formatting pattern for the number, the method
1320    returns the raw input when it is available.
1321
1322    Note this method guarantees no digit will be inserted, removed or modified
1323    as a result of formatting.
1324
1325    Arguments:
1326    number -- The phone number that needs to be formatted in its original
1327              number format
1328    region_calling_from -- The region whose IDD needs to be prefixed if the
1329              original number has one.
1330
1331    Returns the formatted phone number in its original number format.
1332    """
1333    if (numobj.raw_input is not None and
1334        (_has_unexpected_italian_leading_zero(numobj) or not _has_formatting_pattern_for_number(numobj))):
1335        # We check if we have the formatting pattern because without that, we
1336        # might format the number as a group without national prefix.
1337        return numobj.raw_input
1338    if numobj.country_code_source is None:
1339        return format_number(numobj, PhoneNumberFormat.NATIONAL)
1340
1341    formatted_number = _format_original_allow_mods(numobj, region_calling_from)
1342    num_raw_input = numobj.raw_input
1343    # If no digit is inserted/removed/modified as a result of our formatting,
1344    # we return the formatted phone number; otherwise we return the raw input
1345    # the user entered.
1346    if (formatted_number is not None and num_raw_input):
1347        normalized_formatted_number = normalize_diallable_chars_only(formatted_number)
1348        normalized_raw_input = normalize_diallable_chars_only(num_raw_input)
1349        if normalized_formatted_number != normalized_raw_input:
1350            formatted_number = num_raw_input
1351    return formatted_number
1352
1353
1354def _format_original_allow_mods(numobj, region_calling_from):
1355    if (numobj.country_code_source == CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN):
1356        return format_number(numobj, PhoneNumberFormat.INTERNATIONAL)
1357    elif numobj.country_code_source == CountryCodeSource.FROM_NUMBER_WITH_IDD:
1358        return format_out_of_country_calling_number(numobj, region_calling_from)
1359    elif (numobj.country_code_source == CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN):
1360        return format_number(numobj, PhoneNumberFormat.INTERNATIONAL)[1:]
1361    else:
1362        region_code = region_code_for_country_code(numobj.country_code)
1363        # We strip non-digits from the NDD here, and from the raw input later, so that we can
1364        # compare them easily.
1365        national_prefix = ndd_prefix_for_region(region_code, True)  # strip non-digits
1366        national_format = format_number(numobj, PhoneNumberFormat.NATIONAL)
1367        if (national_prefix is None or len(national_prefix) == 0):
1368            # If the region doesn't have a national prefix at all, we can
1369            # safely return the national format without worrying about a
1370            # national prefix being added.
1371            return national_format
1372        # Otherwise, we check if the original number was entered with a national prefix.
1373        if (_raw_input_contains_national_prefix(numobj.raw_input, national_prefix, region_code)):
1374            # If so, we can safely return the national format.
1375            return national_format
1376        # Metadata cannot be None here because ndd_prefix_for_region() (above) returns None if
1377        # there is no metadata for the region.
1378        metadata = PhoneMetadata.metadata_for_region(region_code)
1379        national_number = national_significant_number(numobj)
1380        format_rule = _choose_formatting_pattern_for_number(metadata.number_format, national_number)
1381        # The format rule could still be null here if the national number was
1382        # 0 and there was no raw input (this should not be possible for
1383        # numbers generated by the phonenumber library as they would also not
1384        # have a country calling code and we would have exited earlier).
1385        if format_rule is None:
1386            return national_format
1387        # When the format we apply to this number doesn't contain national
1388        # prefix, we can just return the national format.
1389        # TODO: Refactor the code below with the code in isNationalPrefixPresentIfRequired.
1390        candidate_national_prefix_rule = format_rule.national_prefix_formatting_rule
1391        # We assume that the first-group symbol will never be _before_ the national prefix.
1392        if candidate_national_prefix_rule is None:
1393            return national_format
1394        index_of_first_group = candidate_national_prefix_rule.find("\\1")
1395        if (index_of_first_group <= 0):
1396            return national_format
1397        candidate_national_prefix_rule = candidate_national_prefix_rule[:index_of_first_group]
1398        candidate_national_prefix_rule = normalize_digits_only(candidate_national_prefix_rule)
1399        if len(candidate_national_prefix_rule) == 0:
1400            # National prefix not used when formatting this number.
1401            return national_format
1402        # Otherwise, we need to remove the national prefix from our output.
1403        new_format_rule = _copy_number_format(format_rule)
1404        new_format_rule.national_prefix_formatting_rule = None
1405        return format_by_pattern(numobj, PhoneNumberFormat.NATIONAL, [new_format_rule])
1406
1407
1408def _raw_input_contains_national_prefix(raw_input, national_prefix, region_code):
1409    """Check if raw_input, which is assumed to be in the national format, has a
1410    national prefix. The national prefix is assumed to be in digits-only
1411    form."""
1412    nnn = normalize_digits_only(raw_input)
1413    if nnn.startswith(national_prefix):
1414        try:
1415            # Some Japanese numbers (e.g. 00777123) might be mistaken to
1416            # contain the national prefix when written without it
1417            # (e.g. 0777123) if we just do prefix matching. To tackle that, we
1418            # check the validity of the number if the assumed national prefix
1419            # is removed (777123 won't be valid in Japan).
1420            return is_valid_number(parse(nnn[len(national_prefix):], region_code))
1421        except NumberParseException:
1422            return False
1423    return False
1424
1425
1426def _has_unexpected_italian_leading_zero(numobj):
1427    """Returns true if a number is from a region whose national significant number couldn't contain a
1428    leading zero, but has the italian_leading_zero field set to true."""
1429    return (numobj.italian_leading_zero and
1430            not _is_leading_zero_possible(numobj.country_code))
1431
1432
1433def _has_formatting_pattern_for_number(numobj):
1434    country_code = numobj.country_code
1435    phone_number_region = region_code_for_country_code(country_code)
1436    metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, phone_number_region)
1437    if metadata is None:
1438        return False
1439    national_number = national_significant_number(numobj)
1440    format_rule = _choose_formatting_pattern_for_number(metadata.number_format, national_number)
1441    return format_rule is not None
1442
1443
1444def format_out_of_country_keeping_alpha_chars(numobj, region_calling_from):
1445    """Formats a phone number for out-of-country dialing purposes.
1446
1447    Note that in this version, if the number was entered originally using
1448    alpha characters and this version of the number is stored in raw_input,
1449    this representation of the number will be used rather than the digit
1450    representation. Grouping information, as specified by characters such as
1451    "-" and " ", will be retained.
1452
1453    Caveats:
1454
1455     - This will not produce good results if the country calling code is both
1456       present in the raw input _and_ is the start of the national
1457       number. This is not a problem in the regions which typically use alpha
1458       numbers.
1459
1460     - This will also not produce good results if the raw input has any
1461       grouping information within the first three digits of the national
1462       number, and if the function needs to strip preceding digits/words in
1463       the raw input before these digits. Normally people group the first
1464       three digits together so this is not a huge problem - and will be fixed
1465       if it proves to be so.
1466
1467    Arguments:
1468    numobj -- The phone number that needs to be formatted.
1469    region_calling_from -- The region where the call is being placed.
1470
1471    Returns the formatted phone number
1472    """
1473    num_raw_input = numobj.raw_input
1474    # If there is no raw input, then we can't keep alpha characters because there aren't any.
1475    # In this case, we return format_out_of_country_calling_number.
1476    if num_raw_input is None or len(num_raw_input) == 0:
1477        return format_out_of_country_calling_number(numobj, region_calling_from)
1478    country_code = numobj.country_code
1479    if not _has_valid_country_calling_code(country_code):
1480        return num_raw_input
1481    # Strip any prefix such as country calling code, IDD, that was present. We
1482    # do this by comparing the number in raw_input with the parsed number.  To
1483    # do this, first we normalize punctuation. We retain number grouping
1484    # symbols such as " " only.
1485    num_raw_input = _normalize_helper(num_raw_input,
1486                                      _ALL_PLUS_NUMBER_GROUPING_SYMBOLS,
1487                                      True)
1488    # Now we trim everything before the first three digits in the parsed
1489    # number. We choose three because all valid alpha numbers have 3 digits at
1490    # the start - if it does not, then we don't trim anything at
1491    # all. Similarly, if the national number was less than three digits, we
1492    # don't trim anything at all.
1493    national_number = national_significant_number(numobj)
1494    if len(national_number) > 3:
1495        first_national_number_digit = num_raw_input.find(national_number[:3])
1496        if first_national_number_digit != -1:
1497            num_raw_input = num_raw_input[first_national_number_digit:]
1498
1499    metadata_for_region_calling_from = PhoneMetadata.metadata_for_region(region_calling_from.upper(), None)
1500    if country_code == _NANPA_COUNTRY_CODE:
1501        if is_nanpa_country(region_calling_from):
1502            return unicod(country_code) + U_SPACE + num_raw_input
1503    elif (metadata_for_region_calling_from is not None and
1504          country_code == country_code_for_region(region_calling_from)):
1505        formatting_pattern = _choose_formatting_pattern_for_number(metadata_for_region_calling_from.number_format,
1506                                                                   national_number)
1507        if formatting_pattern is None:
1508            # If no pattern above is matched, we format the original input
1509            return num_raw_input
1510        new_format = _copy_number_format(formatting_pattern)
1511        # The first group is the first group of digits that the user
1512        # wrote together.
1513        new_format.pattern = u("(\\d+)(.*)")
1514        # Here we just concatenate them back together after the national
1515        # prefix has been fixed.
1516        new_format.format = u(r"\1\2")
1517        # Now we format using this pattern instead of the default pattern,
1518        # but with the national prefix prefixed if necessary.
1519        # This will not work in the cases where the pattern (and not the
1520        # leading digits) decide whether a national prefix needs to be used,
1521        # since we have overridden the pattern to match anything, but that is
1522        # not the case in the metadata to date.
1523        return _format_nsn_using_pattern(num_raw_input,
1524                                         new_format,
1525                                         PhoneNumberFormat.NATIONAL)
1526    i18n_prefix_for_formatting = U_EMPTY_STRING
1527    # If an unsupported region-calling-from is entered, or a country with
1528    # multiple international prefixes, the international format of the number
1529    # is returned, unless there is a preferred international prefix.
1530    if metadata_for_region_calling_from is not None:
1531        international_prefix = metadata_for_region_calling_from.international_prefix
1532        i18n_match = fullmatch(_UNIQUE_INTERNATIONAL_PREFIX, international_prefix)
1533        if i18n_match:
1534            i18n_prefix_for_formatting = international_prefix
1535        else:
1536            i18n_prefix_for_formatting = metadata_for_region_calling_from.preferred_international_prefix
1537
1538    region_code = region_code_for_country_code(country_code)
1539    # Metadata cannot be None because the country calling code is valid.
1540    metadata_for_region = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code)
1541    formatted_number = _maybe_append_formatted_extension(numobj,
1542                                                         metadata_for_region,
1543                                                         PhoneNumberFormat.INTERNATIONAL,
1544                                                         num_raw_input)
1545    if i18n_prefix_for_formatting:
1546        formatted_number = (i18n_prefix_for_formatting + U_SPACE +
1547                            unicod(country_code) + U_SPACE + formatted_number)
1548    else:
1549        # Invalid region entered as country-calling-from (so no metadata was
1550        # found for it) or the region chosen has multiple international
1551        # dialling prefixes.
1552        formatted_number = _prefix_number_with_country_calling_code(country_code,
1553                                                                    PhoneNumberFormat.INTERNATIONAL,
1554                                                                    formatted_number)
1555    return formatted_number
1556
1557
1558def national_significant_number(numobj):
1559    """Gets the national significant number of a phone number.
1560
1561    Note that a national significant number doesn't contain a national prefix
1562    or any formatting.
1563
1564    Arguments:
1565    numobj -- The PhoneNumber object for which the national significant number
1566              is needed.
1567
1568    Returns the national significant number of the PhoneNumber object passed
1569    in.
1570    """
1571    # If leading zero(s) have been set, we prefix this now. Note this is not a
1572    # national prefix.
1573    national_number = U_EMPTY_STRING
1574    if numobj.italian_leading_zero:
1575        num_zeros = numobj.number_of_leading_zeros
1576        if num_zeros is None:
1577            num_zeros = 1
1578        if num_zeros > 0:
1579            national_number = U_ZERO * num_zeros
1580    national_number += str(numobj.national_number)
1581    return national_number
1582
1583
1584def _prefix_number_with_country_calling_code(country_code, num_format, formatted_number):
1585    """A helper function that is used by format_number and format_by_pattern."""
1586    if num_format == PhoneNumberFormat.E164:
1587        return _PLUS_SIGN + unicod(country_code) + formatted_number
1588    elif num_format == PhoneNumberFormat.INTERNATIONAL:
1589        return _PLUS_SIGN + unicod(country_code) + U_SPACE + formatted_number
1590    elif num_format == PhoneNumberFormat.RFC3966:
1591        return _RFC3966_PREFIX + _PLUS_SIGN + unicod(country_code) + U_DASH + formatted_number
1592    else:
1593        return formatted_number
1594
1595
1596def _format_nsn(number, metadata, num_format, carrier_code=None):
1597    """Format a national number."""
1598    # Note in some regions, the national number can be written in two
1599    # completely different ways depending on whether it forms part of the
1600    # NATIONAL format or INTERNATIONAL format. The num_format parameter here
1601    # is used to specify which format to use for those cases. If a carrier_code
1602    # is specified, this will be inserted into the formatted string to replace
1603    # $CC.
1604    intl_number_formats = metadata.intl_number_format
1605
1606    # When the intl_number_formats exists, we use that to format national
1607    # number for the INTERNATIONAL format instead of using the
1608    # number_desc.number_formats.
1609    if (len(intl_number_formats) == 0 or
1610        num_format == PhoneNumberFormat.NATIONAL):
1611        available_formats = metadata.number_format
1612    else:
1613        available_formats = metadata.intl_number_format
1614    formatting_pattern = _choose_formatting_pattern_for_number(available_formats, number)
1615    if formatting_pattern is None:
1616        return number
1617    else:
1618        return _format_nsn_using_pattern(number, formatting_pattern, num_format, carrier_code)
1619
1620
1621def _choose_formatting_pattern_for_number(available_formats, national_number):
1622    for num_format in available_formats:
1623        size = len(num_format.leading_digits_pattern)
1624        # We always use the last leading_digits_pattern, as it is the most detailed.
1625        if size > 0:
1626            ld_pattern = re.compile(num_format.leading_digits_pattern[-1])
1627            ld_match = ld_pattern.match(national_number)
1628        if size == 0 or ld_match:
1629            format_pattern = re.compile(num_format.pattern)
1630            if fullmatch(format_pattern, national_number):
1631                return num_format
1632    return None
1633
1634
1635def _format_nsn_using_pattern(national_number, formatting_pattern, number_format,
1636                              carrier_code=None):
1637    # Note that carrier_code is optional - if None or an empty string, no
1638    # carrier code replacement will take place.
1639    number_format_rule = formatting_pattern.format
1640    m_re = re.compile(formatting_pattern.pattern)
1641    formatted_national_number = U_EMPTY_STRING
1642
1643    if (number_format == PhoneNumberFormat.NATIONAL and carrier_code and
1644        formatting_pattern.domestic_carrier_code_formatting_rule):
1645        # Replace the $CC in the formatting rule with the desired
1646        # carrier code.
1647        cc_format_rule = formatting_pattern.domestic_carrier_code_formatting_rule
1648        cc_format_rule = re.sub(_CC_PATTERN,
1649                                carrier_code,
1650                                cc_format_rule,
1651                                count=1)
1652
1653        # Now replace the $FG in the formatting rule with the
1654        # first group and the carrier code combined in the
1655        # appropriate way.
1656        number_format_rule = re.sub(_FIRST_GROUP_PATTERN,
1657                                    cc_format_rule,
1658                                    number_format_rule,
1659                                    count=1)
1660        formatted_national_number = re.sub(m_re, number_format_rule, national_number)
1661    else:
1662        # Use the national prefix formatting rule instead.
1663        national_prefix_formatting_rule = formatting_pattern.national_prefix_formatting_rule
1664        if (number_format == PhoneNumberFormat.NATIONAL and
1665            national_prefix_formatting_rule):
1666            first_group_rule = re.sub(_FIRST_GROUP_PATTERN,
1667                                      national_prefix_formatting_rule,
1668                                      number_format_rule,
1669                                      count=1)
1670            formatted_national_number = re.sub(m_re, first_group_rule, national_number)
1671        else:
1672            formatted_national_number = re.sub(m_re, number_format_rule, national_number)
1673
1674    if number_format == PhoneNumberFormat.RFC3966:
1675        # Strip any leading punctuation.
1676        m = _SEPARATOR_PATTERN.match(formatted_national_number)
1677        if m:
1678            formatted_national_number = re.sub(_SEPARATOR_PATTERN, U_EMPTY_STRING, formatted_national_number, count=1)
1679        # Replace the rest with a dash between each number group
1680        formatted_national_number = re.sub(_SEPARATOR_PATTERN, U_DASH, formatted_national_number)
1681
1682    return formatted_national_number
1683
1684
1685def example_number(region_code):
1686    """Gets a valid number for the specified region.
1687
1688    Arguments:
1689    region_code -- The region for which an example number is needed.
1690
1691    Returns a valid fixed-line number for the specified region. Returns None
1692    when the metadata does not contain such information, or the region 001 is
1693    passed in.  For 001 (representing non-geographical numbers), call
1694    example_number_for_non_geo_entity instead.
1695    """
1696    return example_number_for_type(region_code, PhoneNumberType.FIXED_LINE)
1697
1698
1699def invalid_example_number(region_code):
1700    """Gets an invalid number for the specified region.
1701
1702    This is useful for unit-testing purposes, where you want to test what
1703    will happen with an invalid number. Note that the number that is
1704    returned will always be able to be parsed and will have the correct
1705    country code. It may also be a valid *short* number/code for this
1706    region. Validity checking such numbers is handled with shortnumberinfo.
1707
1708    Arguments:
1709    region_code -- The region for which an example number is needed.
1710
1711
1712    Returns an invalid number for the specified region. Returns None when an
1713    unsupported region or the region 001 (Earth) is passed in.
1714    """
1715    if not _is_valid_region_code(region_code):
1716        return None
1717    # We start off with a valid fixed-line number since every country
1718    # supports this. Alternatively we could start with a different number
1719    # type, since fixed-line numbers typically have a wide breadth of valid
1720    # number lengths and we may have to make it very short before we get an
1721    # invalid number.
1722    metadata = PhoneMetadata.metadata_for_region(region_code.upper())
1723    desc = _number_desc_by_type(metadata, PhoneNumberType.FIXED_LINE)
1724    if desc is None or desc.example_number is None:
1725        # This shouldn't happen; we have a test for this.
1726        return None  # pragma no cover
1727    example_number = desc.example_number
1728    # Try and make the number invalid. We do this by changing the length. We
1729    # try reducing the length of the number, since currently no region has a
1730    # number that is the same length as MIN_LENGTH_FOR_NSN. This is probably
1731    # quicker than making the number longer, which is another
1732    # alternative. We could also use the possible number pattern to extract
1733    # the possible lengths of the number to make this faster, but this
1734    # method is only for unit-testing so simplicity is preferred to
1735    # performance.  We don't want to return a number that can't be parsed,
1736    # so we check the number is long enough. We try all possible lengths
1737    # because phone number plans often have overlapping prefixes so the
1738    # number 123456 might be valid as a fixed-line number, and 12345 as a
1739    # mobile number. It would be faster to loop in a different order, but we
1740    # prefer numbers that look closer to real numbers (and it gives us a
1741    # variety of different lengths for the resulting phone numbers -
1742    # otherwise they would all be MIN_LENGTH_FOR_NSN digits long.)
1743    phone_number_length = len(example_number) - 1
1744    while phone_number_length >= _MIN_LENGTH_FOR_NSN:
1745        number_to_try = example_number[:phone_number_length]
1746        try:
1747            possibly_valid_number = parse(number_to_try, region_code)
1748            if not is_valid_number(possibly_valid_number):
1749                return possibly_valid_number
1750        except NumberParseException:  # pragma no cover
1751            # Shouldn't happen: we have already checked the length, we know
1752            # example numbers have only valid digits, and we know the region
1753            # code is fine.
1754            pass
1755        phone_number_length -= 1
1756
1757    # We have a test to check that this doesn't happen for any of our
1758    # supported regions.
1759    return None  # pragma no cover
1760
1761
1762def example_number_for_type(region_code, num_type):
1763    """Gets a valid number for the specified region and number type.
1764
1765    If None is given as the region_code, then the returned number object
1766    may belong to any country.
1767
1768    Arguments:
1769    region_code -- The region for which an example number is needed, or None.
1770    num_type -- The type of number that is needed.
1771
1772    Returns a valid number for the specified region and type. Returns None
1773    when the metadata does not contain such information or if an invalid
1774    region or region 001 was specified.  For 001 (representing
1775    non-geographical numbers), call example_number_for_non_geo_entity instead.
1776    """
1777    if region_code is None:
1778        return _example_number_anywhere_for_type(num_type)
1779    # Check the region code is valid.
1780    if not _is_valid_region_code(region_code):
1781        return None
1782    metadata = PhoneMetadata.metadata_for_region(region_code.upper())
1783    desc = _number_desc_by_type(metadata, num_type)
1784    if desc is not None and desc.example_number is not None:
1785        try:
1786            return parse(desc.example_number, region_code)
1787        except NumberParseException:  # pragma no cover
1788            pass
1789    return None
1790
1791
1792def _example_number_anywhere_for_type(num_type):
1793    """Gets a valid number for the specified number type (it may belong to any country).
1794
1795    Arguments:
1796    num_type -- The type of number that is needed.
1797
1798    Returns a valid number for the specified type. Returns None when the
1799    metadata does not contain such information. This should only happen when
1800    no numbers of this type are allocated anywhere in the world anymore.
1801    """
1802    for region_code in SUPPORTED_REGIONS:
1803        example_numobj = example_number_for_type(region_code, num_type)
1804        if example_numobj is not None:
1805            return example_numobj
1806    # If there wasn't an example number for a region, try the non-geographical entities.
1807    for country_calling_code in COUNTRY_CODES_FOR_NON_GEO_REGIONS:
1808        metadata = PhoneMetadata.metadata_for_nongeo_region(country_calling_code, None)
1809        desc = _number_desc_by_type(metadata, num_type)
1810        if desc is not None and desc.example_number is not None:
1811            try:
1812                return parse(_PLUS_SIGN + unicod(country_calling_code) + desc.example_number, UNKNOWN_REGION)
1813            except NumberParseException:  # pragma no cover
1814                pass
1815
1816    # There are no example numbers of this type for any country in the library.
1817    return None  # pragma no cover
1818
1819
1820def example_number_for_non_geo_entity(country_calling_code):
1821    """Gets a valid number for the specified country calling code for a non-geographical entity.
1822
1823    Arguments:
1824    country_calling_code -- The country calling code for a non-geographical entity.
1825
1826    Returns a valid number for the non-geographical entity. Returns None when
1827    the metadata does not contain such information, or the country calling
1828    code passed in does not belong to a non-geographical entity.
1829    """
1830    metadata = PhoneMetadata.metadata_for_nongeo_region(country_calling_code, None)
1831    if metadata is not None:
1832        # For geographical entities, fixed-line data is always present. However, for non-geographical
1833        # entities, this is not the case, so we have to go through different types to find the
1834        # example number. We don't check fixed-line or personal number since they aren't used by
1835        # non-geographical entities (if this changes, a unit-test will catch this.)
1836        for desc in (metadata.mobile, metadata.toll_free, metadata.shared_cost, metadata.voip,
1837                     metadata.voicemail, metadata.uan, metadata.premium_rate):
1838            try:
1839                if (desc is not None and desc.example_number is not None):
1840                    return parse(_PLUS_SIGN + unicod(country_calling_code) + desc.example_number, UNKNOWN_REGION)
1841            except NumberParseException:
1842                pass
1843    return None
1844
1845
1846def _maybe_append_formatted_extension(numobj, metadata, num_format, number):
1847    """Appends the formatted extension of a phone number to formatted number,
1848    if the phone number had an extension specified.
1849    """
1850    if numobj.extension:
1851        if num_format == PhoneNumberFormat.RFC3966:
1852            return number + _RFC3966_EXTN_PREFIX + numobj.extension
1853        else:
1854            if metadata.preferred_extn_prefix is not None:
1855                return number + metadata.preferred_extn_prefix + numobj.extension
1856            else:
1857                return number + _DEFAULT_EXTN_PREFIX + numobj.extension
1858    return number
1859
1860
1861def _number_desc_by_type(metadata, num_type):
1862    """Return the PhoneNumberDesc of the metadata for the given number type"""
1863    if num_type == PhoneNumberType.PREMIUM_RATE:
1864        return metadata.premium_rate
1865    elif num_type == PhoneNumberType.TOLL_FREE:
1866        return metadata.toll_free
1867    elif num_type == PhoneNumberType.MOBILE:
1868        return metadata.mobile
1869    elif (num_type == PhoneNumberType.FIXED_LINE or
1870          num_type == PhoneNumberType.FIXED_LINE_OR_MOBILE):
1871        return metadata.fixed_line
1872    elif num_type == PhoneNumberType.SHARED_COST:
1873        return metadata.shared_cost
1874    elif num_type == PhoneNumberType.VOIP:
1875        return metadata.voip
1876    elif num_type == PhoneNumberType.PERSONAL_NUMBER:
1877        return metadata.personal_number
1878    elif num_type == PhoneNumberType.PAGER:
1879        return metadata.pager
1880    elif num_type == PhoneNumberType.UAN:
1881        return metadata.uan
1882    elif num_type == PhoneNumberType.VOICEMAIL:
1883        return metadata.voicemail
1884    else:
1885        return metadata.general_desc
1886
1887
1888def number_type(numobj):
1889    """Gets the type of a phone number.
1890
1891    Arguments:
1892    numobj -- The PhoneNumber object that we want to know the type of.
1893
1894    Returns the type of the phone number, as a PhoneNumberType value.
1895    """
1896    region_code = region_code_for_number(numobj)
1897    metadata = PhoneMetadata.metadata_for_region_or_calling_code(numobj.country_code, region_code)
1898    if metadata is None:
1899        return PhoneNumberType.UNKNOWN
1900    national_number = national_significant_number(numobj)
1901    return _number_type_helper(national_number, metadata)
1902
1903
1904def _number_type_helper(national_number, metadata):
1905    """Return the type of the given number against the metadata"""
1906    if not _is_number_matching_desc(national_number, metadata.general_desc):
1907        return PhoneNumberType.UNKNOWN
1908    if _is_number_matching_desc(national_number, metadata.premium_rate):
1909        return PhoneNumberType.PREMIUM_RATE
1910    if _is_number_matching_desc(national_number, metadata.toll_free):
1911        return PhoneNumberType.TOLL_FREE
1912    if _is_number_matching_desc(national_number, metadata.shared_cost):
1913        return PhoneNumberType.SHARED_COST
1914    if _is_number_matching_desc(national_number, metadata.voip):
1915        return PhoneNumberType.VOIP
1916    if _is_number_matching_desc(national_number, metadata.personal_number):
1917        return PhoneNumberType.PERSONAL_NUMBER
1918    if _is_number_matching_desc(national_number, metadata.pager):
1919        return PhoneNumberType.PAGER
1920    if _is_number_matching_desc(national_number, metadata.uan):
1921        return PhoneNumberType.UAN
1922    if _is_number_matching_desc(national_number, metadata.voicemail):
1923        return PhoneNumberType.VOICEMAIL
1924
1925    if _is_number_matching_desc(national_number, metadata.fixed_line):
1926        if metadata.same_mobile_and_fixed_line_pattern:
1927            return PhoneNumberType.FIXED_LINE_OR_MOBILE
1928        elif _is_number_matching_desc(national_number, metadata.mobile):
1929            return PhoneNumberType.FIXED_LINE_OR_MOBILE
1930        return PhoneNumberType.FIXED_LINE
1931
1932    # Otherwise, test to see if the number is mobile. Only do this if certain
1933    # that the patterns for mobile and fixed line aren't the same.
1934    if (not metadata.same_mobile_and_fixed_line_pattern and
1935        _is_number_matching_desc(national_number, metadata.mobile)):
1936        return PhoneNumberType.MOBILE
1937    return PhoneNumberType.UNKNOWN
1938
1939
1940def _is_number_matching_desc(national_number, number_desc):
1941    """Determine if the number matches the given PhoneNumberDesc"""
1942    # Check if any possible number lengths are present; if so, we use them to avoid checking the
1943    # validation pattern if they don't match. If they are absent, this means they match the general
1944    # description, which we have already checked before checking a specific number type.
1945    if number_desc is None:
1946        return False
1947    actual_length = len(national_number)
1948    possible_lengths = number_desc.possible_length
1949    if len(possible_lengths) > 0 and not actual_length in possible_lengths:
1950        return False
1951    national_re = re.compile(number_desc.national_number_pattern or U_EMPTY_STRING)
1952    return fullmatch(national_re, national_number)
1953
1954
1955def is_valid_number(numobj):
1956    """Tests whether a phone number matches a valid pattern.
1957
1958    Note this doesn't verify the number is actually in use, which is
1959    impossible to tell by just looking at a number itself.  It only verifies
1960    whether the parsed, canonicalised number is valid: not whether a
1961    particular series of digits entered by the user is diallable from the
1962    region provided when parsing. For example, the number +41 (0) 78 927 2696
1963    can be parsed into a number with country code "41" and national
1964    significant number "789272696". This is valid, while the original string
1965    is not diallable.
1966
1967    Arguments:
1968    numobj -- The phone number object that we want to validate
1969
1970    Returns a boolean that indicates whether the number is of a valid pattern.
1971    """
1972    region_code = region_code_for_number(numobj)
1973    return is_valid_number_for_region(numobj, region_code)
1974
1975
1976def is_valid_number_for_region(numobj, region_code):
1977    """Tests whether a phone number is valid for a certain region.
1978
1979    Note this doesn't verify the number is actually in use, which is
1980    impossible to tell by just looking at a number itself. If the country
1981    calling code is not the same as the country calling code for the region,
1982    this immediately exits with false. After this, the specific number pattern
1983    rules for the region are examined. This is useful for determining for
1984    example whether a particular number is valid for Canada, rather than just
1985    a valid NANPA number.
1986
1987    Warning: In most cases, you want to use is_valid_number instead. For
1988    example, this method will mark numbers from British Crown dependencies
1989    such as the Isle of Man as invalid for the region "GB" (United Kingdom),
1990    since it has its own region code, "IM", which may be undesirable.
1991
1992    Arguments:
1993    numobj -- The phone number object that we want to validate.
1994    region_code -- The region that we want to validate the phone number for.
1995
1996    Returns a boolean that indicates whether the number is of a valid pattern.
1997    """
1998    country_code = numobj.country_code
1999    if region_code is None:
2000        return False
2001    metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code.upper())
2002    if (metadata is None or
2003        (region_code != REGION_CODE_FOR_NON_GEO_ENTITY and
2004         country_code != country_code_for_valid_region(region_code))):
2005        # Either the region code was invalid, or the country calling code for
2006        # this number does not match that of the region code.
2007        return False
2008    nsn = national_significant_number(numobj)
2009    return (_number_type_helper(nsn, metadata) != PhoneNumberType.UNKNOWN)
2010
2011
2012def region_code_for_number(numobj):
2013    """Returns the region where a phone number is from.
2014
2015    This could be used for geocoding at the region level. Only guarantees
2016    correct results for valid, full numbers (not short-codes, or invalid
2017    numbers).
2018
2019    Arguments:
2020    numobj -- The phone number object whose origin we want to know
2021
2022    Returns the region where the phone number is from, or None if no region
2023    matches this calling code.
2024
2025    """
2026    country_code = numobj.country_code
2027    regions = COUNTRY_CODE_TO_REGION_CODE.get(country_code, None)
2028    if regions is None:
2029        return None
2030
2031    if len(regions) == 1:
2032        return regions[0]
2033    else:
2034        return _region_code_for_number_from_list(numobj, regions)
2035
2036
2037def _region_code_for_number_from_list(numobj, regions):
2038    """Find the region in a list that matches a number"""
2039    national_number = national_significant_number(numobj)
2040    for region_code in regions:
2041        # If leading_digits is present, use this. Otherwise, do full
2042        # validation.
2043        # Metadata cannot be None because the region codes come from
2044        # the country calling code map.
2045        metadata = PhoneMetadata.metadata_for_region(region_code.upper(), None)
2046        if metadata is None:
2047            continue
2048        if metadata.leading_digits is not None:
2049            leading_digit_re = re.compile(metadata.leading_digits)
2050            match = leading_digit_re.match(national_number)
2051            if match:
2052                return region_code
2053        elif _number_type_helper(national_number, metadata) != PhoneNumberType.UNKNOWN:
2054            return region_code
2055    return None
2056
2057
2058def region_code_for_country_code(country_code):
2059    """Returns the region code that matches a specific country calling code.
2060
2061    In the case of no region code being found, UNKNOWN_REGION ('ZZ') will be
2062    returned. In the case of multiple regions, the one designated in the
2063    metadata as the "main" region for this calling code will be returned.  If
2064    the country_code entered is valid but doesn't match a specific region
2065    (such as in the case of non-geographical calling codes like 800) the value
2066    "001" will be returned (corresponding to the value for World in the UN
2067    M.49 schema).
2068    """
2069    regions = COUNTRY_CODE_TO_REGION_CODE.get(country_code, None)
2070    if regions is None:
2071        return UNKNOWN_REGION
2072    else:
2073        return regions[0]
2074
2075
2076def region_codes_for_country_code(country_code):
2077    """Returns a list with the region codes that match the specific country calling code.
2078
2079    For non-geographical country calling codes, the region code 001 is
2080    returned. Also, in the case of no region code being found, an empty
2081    list is returned.
2082    """
2083    regions = COUNTRY_CODE_TO_REGION_CODE.get(country_code, None)
2084    if regions is None:
2085        return ()
2086    else:
2087        return regions
2088
2089
2090def country_code_for_region(region_code):
2091    """Returns the country calling code for a specific region.
2092
2093    For example, this would be 1 for the United States, and 64 for New
2094    Zealand.
2095
2096    Arguments:
2097    region_code -- The region that we want to get the country calling code for.
2098
2099    Returns the country calling code for the region denoted by region_code.
2100    """
2101    if not _is_valid_region_code(region_code):
2102        return 0
2103    return country_code_for_valid_region(region_code)
2104
2105
2106def country_code_for_valid_region(region_code):
2107    """Returns the country calling code for a specific region.
2108
2109    For example, this would be 1 for the United States, and 64 for New
2110    Zealand.  Assumes the region is already valid.
2111
2112    Arguments:
2113    region_code -- The region that we want to get the country calling code for.
2114
2115    Returns the country calling code for the region denoted by region_code.
2116    """
2117    metadata = PhoneMetadata.metadata_for_region(region_code.upper(), None)
2118    if metadata is None:
2119        raise Exception("Invalid region code %s" % region_code)
2120    return metadata.country_code
2121
2122
2123def ndd_prefix_for_region(region_code, strip_non_digits):
2124    """Returns the national dialling prefix for a specific region.
2125
2126    For example, this would be 1 for the United States, and 0 for New
2127    Zealand. Set strip_non_digits to True to strip symbols like "~" (which
2128    indicates a wait for a dialling tone) from the prefix returned. If no
2129    national prefix is present, we return None.
2130
2131    Warning: Do not use this method for do-your-own formatting - for some
2132    regions, the national dialling prefix is used only for certain types of
2133    numbers. Use the library's formatting functions to prefix the national
2134    prefix when required.
2135
2136    Arguments:
2137    region_code -- The region that we want to get the dialling prefix for.
2138    strip_non_digits -- whether to strip non-digits from the national
2139               dialling prefix.
2140
2141    Returns the dialling prefix for the region denoted by region_code.
2142    """
2143    if region_code is None:
2144        return None
2145    metadata = PhoneMetadata.metadata_for_region(region_code.upper(), None)
2146    if metadata is None:
2147        return None
2148    national_prefix = metadata.national_prefix
2149    if national_prefix is None or len(national_prefix) == 0:
2150        return None
2151    if strip_non_digits:
2152        # Note: if any other non-numeric symbols are ever used in national
2153        # prefixes, these would have to be removed here as well.
2154        national_prefix = re.sub(U_TILDE, U_EMPTY_STRING, national_prefix)
2155    return national_prefix
2156
2157
2158def is_nanpa_country(region_code):
2159    """Checks if this region is a NANPA region.
2160
2161    Returns True if region_code is one of the regions under the North American
2162    Numbering Plan Administration (NANPA).
2163    """
2164    return region_code in _NANPA_REGIONS
2165
2166
2167def _is_leading_zero_possible(country_code):
2168    """Checks whether the country calling code is from a region whose national
2169    significant number could contain a leading zero. An example of such a
2170    region is Italy.  Returns False if no metadata for the country is found."""
2171    region_code = region_code_for_country_code(country_code)
2172    metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code)
2173    if metadata is None:
2174        return False
2175    return metadata.leading_zero_possible
2176
2177
2178def is_alpha_number(number):
2179    """Checks if the number is a valid vanity (alpha) number such as 800
2180    MICROSOFT. A valid vanity number will start with at least 3 digits and
2181    will have three or more alpha characters. This does not do region-specific
2182    checks - to work out if this number is actually valid for a region, it
2183    should be parsed and methods such as is_possible_number_with_reason() and
2184    is_valid_number() should be used.
2185
2186    Arguments:
2187    number -- the number that needs to be checked
2188
2189    Returns True if the number is a valid vanity number
2190    """
2191    if not _is_viable_phone_number(number):
2192        # Number is too short, or doesn't match the basic phone number pattern.
2193        return False
2194    extension, stripped_number = _maybe_strip_extension(number)
2195    return bool(fullmatch(_VALID_ALPHA_PHONE_PATTERN, stripped_number))
2196
2197
2198def is_possible_number(numobj):
2199    """Convenience wrapper around is_possible_number_with_reason.
2200
2201    Instead of returning the reason for failure, this method returns true if
2202    the number is either a possible fully-qualified number (containing the area
2203    code and country code), or if the number could be a possible local number
2204    (with a country code, but missing an area code). Local numbers are
2205    considered possible if they could be possibly dialled in this format: if
2206    the area code is needed for a call to connect, the number is not considered
2207    possible without it.
2208
2209    Arguments:
2210    numobj -- the number object that needs to be checked
2211
2212    Returns True if the number is possible
2213
2214    """
2215    result = is_possible_number_with_reason(numobj)
2216    return (result == ValidationResult.IS_POSSIBLE or
2217            result == ValidationResult.IS_POSSIBLE_LOCAL_ONLY)
2218
2219
2220def is_possible_number_for_type(numobj, numtype):
2221    """Convenience wrapper around is_possible_number_for_type_with_reason.
2222
2223    Instead of returning the reason for failure, this method returns true if
2224    the number is either a possible fully-qualified number (containing the area
2225    code and country code), or if the number could be a possible local number
2226    (with a country code, but missing an area code). Local numbers are
2227    considered possible if they could be possibly dialled in this format: if
2228    the area code is needed for a call to connect, the number is not considered
2229    possible without it.
2230
2231    Arguments:
2232    numobj -- the number object that needs to be checked
2233    numtype -- the type we are interested in
2234
2235    Returns True if the number is possible
2236
2237    """
2238    result = is_possible_number_for_type_with_reason(numobj, numtype)
2239    return (result == ValidationResult.IS_POSSIBLE or
2240            result == ValidationResult.IS_POSSIBLE_LOCAL_ONLY)
2241
2242
2243def _test_number_length(national_number, metadata, numtype=PhoneNumberType.UNKNOWN):
2244    """Helper method to check a number against possible lengths for this number,
2245    and determine whether it matches, or is too short or too long. Currently,
2246    if a number pattern suggests that numbers of length 7 and 10 are possible,
2247    and a number in between these possible lengths is entered, such as of
2248    length 8, this will return TOO_LONG.
2249    """
2250    desc_for_type = _number_desc_by_type(metadata, numtype)
2251    if desc_for_type is None:
2252        possible_lengths = metadata.general_desc.possible_length
2253        local_lengths = ()
2254    else:
2255        # There should always be "possibleLengths" set for every element. This is declared in the XML
2256        # schema which is verified by PhoneNumberMetadataSchemaTest.
2257        # For size efficiency, where a sub-description (e.g. fixed-line) has the same possibleLengths
2258        # as the parent, this is missing, so we fall back to the general desc (where no numbers of the
2259        # type exist at all, there is one possible length (-1) which is guaranteed not to match the
2260        # length of any real phone number).
2261        possible_lengths = desc_for_type.possible_length
2262        if len(possible_lengths) == 0:  # pragma no cover: Python sub-descs all have possible_length
2263            possible_lengths = metadata.general_desc.possible_length
2264        local_lengths = desc_for_type.possible_length_local_only
2265
2266    if numtype == PhoneNumberType.FIXED_LINE_OR_MOBILE:
2267        if not _desc_has_possible_number_data(_number_desc_by_type(metadata, PhoneNumberType.FIXED_LINE)):
2268            # The rare case has been encountered where no fixedLine data is available (true for some
2269            # non-geographical entities), so we just check mobile.
2270            return _test_number_length(national_number, metadata, PhoneNumberType.MOBILE)
2271        else:
2272            mobile_desc = _number_desc_by_type(metadata, PhoneNumberType.MOBILE)
2273            if _desc_has_possible_number_data(mobile_desc):
2274                # Merge the mobile data in if there was any. We have to make a copy to do this.
2275                possible_lengths = list(possible_lengths)
2276                # Note that when adding the possible lengths from mobile, we have to again check they
2277                # aren't empty since if they are this indicates they are the same as the general desc and
2278                # should be obtained from there.
2279                if len(mobile_desc.possible_length) == 0:  # pragma no cover: Python sub-descs all have possible_length
2280                    possible_lengths += metadata.general_desc.possible_length
2281                else:
2282                    possible_lengths += mobile_desc.possible_length
2283                # The current list is sorted; we need to merge in the new list and re-sort (duplicates
2284                # are okay). Sorting isn't so expensive because the lists are very small.
2285                list.sort(possible_lengths)
2286
2287                if len(local_lengths) == 0:
2288                    local_lengths = mobile_desc.possible_length_local_only
2289                else:
2290                    local_lengths = list(local_lengths)
2291                    local_lengths += mobile_desc.possible_length_local_only
2292                    list.sort(local_lengths)
2293
2294    # If the type is not supported at all (indicated by a missing PhoneNumberDesc) we return invalid length.
2295    if desc_for_type is None:
2296        return ValidationResult.INVALID_LENGTH
2297
2298    actual_length = len(national_number)
2299    # This is safe because there is never an overlap beween the possible lengths and the local-only
2300    # lengths; this is checked at build time.
2301    if actual_length in local_lengths:
2302        return ValidationResult.IS_POSSIBLE_LOCAL_ONLY
2303
2304    minimum_length = possible_lengths[0]
2305    if minimum_length == actual_length:
2306        return ValidationResult.IS_POSSIBLE
2307    elif minimum_length > actual_length:
2308        return ValidationResult.TOO_SHORT
2309    elif possible_lengths[-1] < actual_length:
2310        return ValidationResult.TOO_LONG
2311    # We skip the first element; we've already checked it.
2312    if actual_length in possible_lengths[1:]:
2313        return ValidationResult.IS_POSSIBLE
2314    else:
2315        return ValidationResult.INVALID_LENGTH
2316
2317
2318def is_possible_number_with_reason(numobj):
2319    return is_possible_number_for_type_with_reason(numobj, PhoneNumberType.UNKNOWN)
2320
2321
2322def is_possible_number_for_type_with_reason(numobj, numtype):
2323    """Check whether a phone number is a possible number of a particular type.
2324
2325    For types that don't exist in a particular region, this will return a result
2326    that isn't so useful; it is recommended that you use
2327    supported_types_for_region or supported_types_for_non_geo_entity
2328    respectively before calling this method to determine whether you should call
2329    it for this number at all.
2330
2331    This provides a more lenient check than is_valid_number in the following sense:
2332
2333     - It only checks the length of phone numbers. In particular, it doesn't
2334       check starting digits of the number.
2335
2336     - For some numbers (particularly fixed-line), many regions have the
2337       concept of area code, which together with subscriber number constitute
2338       the national significant number. It is sometimes okay to dial only the
2339       subscriber number when dialing in the same area. This function will
2340       return IS_POSSIBLE_LOCAL_ONLY if the subscriber-number-only version is
2341       passed in. On the other hand, because is_valid_number validates using
2342       information on both starting digits (for fixed line numbers, that would
2343       most likely be area codes) and length (obviously includes the length of
2344       area codes for fixed line numbers), it will return false for the
2345       subscriber-number-only version.
2346
2347    Arguments:
2348    numobj -- The number object that needs to be checked
2349    numtype -- The type we are interested in
2350
2351    Returns a value from ValidationResult which indicates whether the number
2352    is possible
2353    """
2354    national_number = national_significant_number(numobj)
2355    country_code = numobj.country_code
2356    # Note: For regions that share a country calling code, like NANPA numbers,
2357    # we just use the rules from the default region (US in this case) since the
2358    # region_code_for_number will not work if the number is possible but not
2359    # valid. There is in fact one country calling code (290) where the possible
2360    # number pattern differs between various regions (Saint Helena and Tristan
2361    # da Cuñha), but this is handled by putting all possible lengths for any
2362    # country with this country calling code in the metadata for the default
2363    # region in this case.
2364    if not _has_valid_country_calling_code(country_code):
2365        return ValidationResult.INVALID_COUNTRY_CODE
2366    region_code = region_code_for_country_code(country_code)
2367    # Metadata cannot be None because the country calling code is valid.
2368    metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, region_code)
2369    return _test_number_length(national_number, metadata, numtype)
2370
2371
2372def is_possible_number_string(number, region_dialing_from):
2373    """Check whether a phone number string is a possible number.
2374
2375    Takes a number in the form of a string, and the region where the number
2376    could be dialed from. It provides a more lenient check than
2377    is_valid_number; see is_possible_number_with_reason() for details.
2378
2379    This method first parses the number, then invokes is_possible_number with
2380    the resultant PhoneNumber object.
2381
2382    Arguments:
2383    number -- The number that needs to be checked, in the form of a string.
2384    region_dialling_from -- The region that we are expecting the number to be
2385              dialed from.  Note this is different from the region where the
2386              number belongs.  For example, the number +1 650 253 0000 is a
2387              number that belongs to US. When written in this form, it can be
2388              dialed from any region. When it is written as 00 1 650 253 0000,
2389              it can be dialed from any region which uses an international
2390              dialling prefix of 00. When it is written as 650 253 0000, it
2391              can only be dialed from within the US, and when written as 253
2392              0000, it can only be dialed from within a smaller area in the US
2393              (Mountain View, CA, to be more specific).
2394
2395    Returns True if the number is possible
2396    """
2397    try:
2398        return is_possible_number(parse(number, region_dialing_from))
2399    except NumberParseException:
2400        return False
2401
2402
2403def truncate_too_long_number(numobj):
2404    """Truncate a number object that is too long.
2405
2406    Attempts to extract a valid number from a phone number that is too long
2407    to be valid, and resets the PhoneNumber object passed in to that valid
2408    version. If no valid number could be extracted, the PhoneNumber object
2409    passed in will not be modified.
2410
2411    Arguments:
2412    numobj -- A PhoneNumber object which contains a number that is too long to
2413              be valid.
2414
2415    Returns True if a valid phone number can be successfully extracted.
2416    """
2417    if is_valid_number(numobj):
2418        return True
2419    numobj_copy = PhoneNumber()
2420    numobj_copy.merge_from(numobj)
2421    national_number = numobj.national_number
2422
2423    while not is_valid_number(numobj_copy):
2424        # Strip a digit off the RHS
2425        national_number = national_number // 10
2426        numobj_copy.national_number = national_number
2427        validation_result = is_possible_number_with_reason(numobj_copy)
2428        if (validation_result == ValidationResult.TOO_SHORT or
2429            national_number == 0):
2430            return False
2431    # To reach here, numobj_copy is a valid number.  Modify the original object
2432    numobj.national_number = national_number
2433    return True
2434
2435
2436def _extract_country_code(number):
2437    """Extracts country calling code from number.
2438
2439    Returns a 2-tuple of (country_calling_code, rest_of_number).  It assumes
2440    that the leading plus sign or IDD has already been removed.  Returns (0,
2441    number) if number doesn't start with a valid country calling code.
2442    """
2443
2444    if len(number) == 0 or number[0] == U_ZERO:
2445        # Country codes do not begin with a '0'.
2446        return (0, number)
2447    for ii in range(1, min(len(number), _MAX_LENGTH_COUNTRY_CODE) + 1):
2448        try:
2449            country_code = int(number[:ii])
2450            if country_code in COUNTRY_CODE_TO_REGION_CODE:
2451                return (country_code, number[ii:])
2452        except Exception:
2453            pass
2454    return (0, number)
2455
2456
2457def _maybe_extract_country_code(number, metadata, keep_raw_input, numobj):
2458    """Tries to extract a country calling code from a number.
2459
2460    This method will return zero if no country calling code is considered to
2461    be present. Country calling codes are extracted in the following ways:
2462
2463     - by stripping the international dialing prefix of the region the person
2464       is dialing from, if this is present in the number, and looking at the
2465       next digits
2466
2467     - by stripping the '+' sign if present and then looking at the next
2468       digits
2469
2470     - by comparing the start of the number and the country calling code of
2471       the default region.  If the number is not considered possible for the
2472       numbering plan of the default region initially, but starts with the
2473       country calling code of this region, validation will be reattempted
2474       after stripping this country calling code. If this number is considered
2475       a possible number, then the first digits will be considered the country
2476       calling code and removed as such.
2477
2478    It will raise a NumberParseException if the number starts with a '+' but
2479    the country calling code supplied after this does not match that of any
2480    known region.
2481
2482    Arguments:
2483    number -- non-normalized telephone number that we wish to extract a
2484              country calling code from; may begin with '+'
2485    metadata -- metadata about the region this number may be from, or None
2486    keep_raw_input -- True if the country_code_source and
2487              preferred_carrier_code fields of numobj should be populated.
2488    numobj -- The PhoneNumber object where the country_code and
2489              country_code_source need to be populated. Note the country_code
2490              is always populated, whereas country_code_source is only
2491              populated when keep_raw_input is True.
2492
2493    Returns a 2-tuple containing:
2494      - the country calling code extracted or 0 if none could be extracted
2495      - a string holding the national significant number, in the case
2496        that a country calling code was extracted. If no country calling code
2497        was extracted, this will be empty.
2498    """
2499    if len(number) == 0:
2500        return (0, U_EMPTY_STRING)
2501    full_number = number
2502    # Set the default prefix to be something that will never match.
2503    possible_country_idd_prefix = unicod("NonMatch")
2504    if metadata is not None and metadata.international_prefix is not None:
2505        possible_country_idd_prefix = metadata.international_prefix
2506
2507    country_code_source, full_number = _maybe_strip_i18n_prefix_and_normalize(full_number,
2508                                                                              possible_country_idd_prefix)
2509    if keep_raw_input:
2510        numobj.country_code_source = country_code_source
2511
2512    if country_code_source != CountryCodeSource.FROM_DEFAULT_COUNTRY:
2513        if len(full_number) <= _MIN_LENGTH_FOR_NSN:
2514            raise NumberParseException(NumberParseException.TOO_SHORT_AFTER_IDD,
2515                                       "Phone number had an IDD, but after this was not " +
2516                                       "long enough to be a viable phone number.")
2517        potential_country_code, rest_of_number = _extract_country_code(full_number)
2518        if potential_country_code != 0:
2519            numobj.country_code = potential_country_code
2520            return (potential_country_code, rest_of_number)
2521
2522        # If this fails, they must be using a strange country calling code
2523        # that we don't recognize, or that doesn't exist.
2524        raise NumberParseException(NumberParseException.INVALID_COUNTRY_CODE,
2525                                   "Country calling code supplied was not recognised.")
2526    elif metadata is not None:
2527        # Check to see if the number starts with the country calling code for
2528        # the default region. If so, we remove the country calling code, and
2529        # do some checks on the validity of the number before and after.
2530        default_country_code = metadata.country_code
2531        default_country_code_str = str(metadata.country_code)
2532        normalized_number = full_number
2533        if normalized_number.startswith(default_country_code_str):
2534            potential_national_number = full_number[len(default_country_code_str):]
2535            general_desc = metadata.general_desc
2536            valid_pattern = re.compile(general_desc.national_number_pattern or U_EMPTY_STRING)
2537            _, potential_national_number, _ = _maybe_strip_national_prefix_carrier_code(potential_national_number,
2538                                                                                        metadata)
2539
2540            # If the number was not valid before but is valid now, or if it
2541            # was too long before, we consider the number with the country
2542            # calling code stripped to be a better result and keep that
2543            # instead.
2544            if ((fullmatch(valid_pattern, full_number) is None and
2545                 fullmatch(valid_pattern, potential_national_number)) or
2546                (_test_number_length(full_number, metadata) == ValidationResult.TOO_LONG)):
2547                if keep_raw_input:
2548                    numobj.country_code_source = CountryCodeSource.FROM_NUMBER_WITHOUT_PLUS_SIGN
2549                numobj.country_code = default_country_code
2550                return (default_country_code, potential_national_number)
2551
2552    # No country calling code present.
2553    numobj.country_code = 0
2554    return (0, U_EMPTY_STRING)
2555
2556
2557def _parse_prefix_as_idd(idd_pattern, number):
2558    """Strips the IDD from the start of the number if present.
2559
2560    Helper function used by _maybe_strip_i18n_prefix_and_normalize().
2561
2562    Returns a 2-tuple:
2563      - Boolean indicating if IDD was stripped
2564      - Number with IDD stripped
2565    """
2566    match = idd_pattern.match(number)
2567    if match:
2568        match_end = match.end()
2569        # Only strip this if the first digit after the match is not a 0, since
2570        # country calling codes cannot begin with 0.
2571        digit_match = _CAPTURING_DIGIT_PATTERN.search(number[match_end:])
2572        if digit_match:
2573            normalized_group = normalize_digits_only(digit_match.group(1))
2574            if normalized_group == U_ZERO:
2575                return (False, number)
2576        return (True, number[match_end:])
2577    return (False, number)
2578
2579
2580def _maybe_strip_i18n_prefix_and_normalize(number, possible_idd_prefix):
2581    """Strips any international prefix (such as +, 00, 011) present in the
2582    number provided, normalizes the resulting number, and indicates if an
2583    international prefix was present.
2584
2585    Arguments:
2586    number -- The non-normalized telephone number that we wish to strip any international
2587              dialing prefix from.
2588    possible_idd_prefix -- The international direct dialing prefix from the region we
2589              think this number may be dialed in.
2590
2591    Returns a 2-tuple containing:
2592      - The corresponding CountryCodeSource if an international dialing prefix
2593        could be removed from the number, otherwise
2594        CountryCodeSource.FROM_DEFAULT_COUNTRY if the number did not seem to
2595        be in international format.
2596      - The number with the prefix stripped.
2597    """
2598    if len(number) == 0:
2599        return (CountryCodeSource.FROM_DEFAULT_COUNTRY, number)
2600    # Check to see if the number begins with one or more plus signs.
2601    m = _PLUS_CHARS_PATTERN.match(number)
2602    if m:
2603        number = number[m.end():]
2604        # Can now normalize the rest of the number since we've consumed the
2605        # "+" sign at the start.
2606        return (CountryCodeSource.FROM_NUMBER_WITH_PLUS_SIGN,
2607                _normalize(number))
2608
2609    # Attempt to parse the first digits as an international prefix.
2610    idd_pattern = re.compile(possible_idd_prefix)
2611    number = _normalize(number)
2612    stripped, number = _parse_prefix_as_idd(idd_pattern, number)
2613    if stripped:
2614        return (CountryCodeSource.FROM_NUMBER_WITH_IDD, number)
2615    else:
2616        return (CountryCodeSource.FROM_DEFAULT_COUNTRY, number)
2617
2618
2619def _maybe_strip_national_prefix_carrier_code(number, metadata):
2620    """Strips any national prefix (such as 0, 1) present in a number.
2621
2622    Arguments:
2623    number -- The normalized telephone number that we wish to strip any
2624              national dialing prefix from
2625    metadata -- The metadata for the region that we think this number
2626              is from.
2627
2628    Returns a 3-tuple of
2629     - The carrier code extracted if it is present, otherwise an empty string.
2630     - The number with the prefix stripped.
2631     - Boolean indicating if a national prefix or carrier code (or both) could be extracted.
2632     """
2633    carrier_code = U_EMPTY_STRING
2634    possible_national_prefix = metadata.national_prefix_for_parsing
2635    if (len(number) == 0 or
2636        possible_national_prefix is None or
2637        len(possible_national_prefix) == 0):
2638        # Early return for numbers of zero length.
2639        return (U_EMPTY_STRING, number, False)
2640
2641    # Attempt to parse the first digits as a national prefix.
2642    prefix_pattern = re.compile(possible_national_prefix)
2643    prefix_match = prefix_pattern.match(number)
2644    if prefix_match:
2645        national_number_pattern = re.compile(metadata.general_desc.national_number_pattern or U_EMPTY_STRING)
2646        # Check if the original number is viable.
2647        is_viable_original_number = fullmatch(national_number_pattern, number)
2648        # prefix_match.groups() == () implies nothing was captured by the
2649        # capturing groups in possible_national_prefix; therefore, no
2650        # transformation is necessary, and we just remove the national prefix.
2651        num_groups = len(prefix_match.groups())
2652        transform_rule = metadata.national_prefix_transform_rule
2653        if (transform_rule is None or
2654            len(transform_rule) == 0 or
2655            prefix_match.groups()[num_groups - 1] is None):
2656            # If the original number was viable, and the resultant number is not, we return.
2657            # Check that the resultant number is viable. If not, return.
2658            national_number_match = fullmatch(national_number_pattern,
2659                                              number[prefix_match.end():])
2660            if (is_viable_original_number and not national_number_match):
2661                return (U_EMPTY_STRING, number, False)
2662
2663            if (num_groups > 0 and
2664                prefix_match.groups(num_groups) is not None):
2665                carrier_code = prefix_match.group(1)
2666            return (carrier_code, number[prefix_match.end():], True)
2667        else:
2668            # Check that the resultant number is still viable. If not,
2669            # return. Check this by copying the number and making the
2670            # transformation on the copy first.
2671            transformed_number = re.sub(prefix_pattern, transform_rule, number, count=1)
2672            national_number_match = fullmatch(national_number_pattern,
2673                                              transformed_number)
2674            if (is_viable_original_number and not national_number_match):
2675                return ("", number, False)
2676            if num_groups > 1:
2677                carrier_code = prefix_match.group(1)
2678            return (carrier_code, transformed_number, True)
2679    else:
2680        return (carrier_code, number, False)
2681
2682
2683def _maybe_strip_extension(number):
2684    """Strip extension from the end of a number string.
2685
2686    Strips any extension (as in, the part of the number dialled after the
2687    call is connected, usually indicated with extn, ext, x or similar) from
2688    the end of the number, and returns it.
2689
2690    Arguments:
2691    number -- the non-normalized telephone number that we wish to strip the extension from.
2692
2693    Returns a 2-tuple of:
2694     - the phone extension (or "" or not present)
2695     - the number before the extension.
2696    """
2697    match = _EXTN_PATTERN.search(number)
2698    # If we find a potential extension, and the number preceding this is a
2699    # viable number, we assume it is an extension.
2700    if match and _is_viable_phone_number(number[:match.start()]):
2701        # The numbers are captured into groups in the regular expression.
2702        for group in match.groups():
2703            # We go through the capturing groups until we find one that
2704            # captured some digits. If none did, then we will return the empty
2705            # string.
2706            if group is not None:
2707                return (group, number[:match.start()])
2708    return ("", number)
2709
2710
2711def _check_region_for_parsing(number, default_region):
2712    """Checks to see that the region code used is valid, or if it is not
2713    valid, that the number to parse starts with a + symbol so that we can
2714    attempt to infer the region from the number.  Returns False if it cannot
2715    use the region provided and the region cannot be inferred.
2716    """
2717    if not _is_valid_region_code(default_region):
2718        # If the number is None or empty, we can't infer the region.
2719        if number is None or len(number) == 0:
2720            return False
2721        match = _PLUS_CHARS_PATTERN.match(number)
2722        if match is None:
2723            return False
2724    return True
2725
2726
2727def _set_italian_leading_zeros_for_phone_number(national_number, numobj):
2728    """A helper function to set the values related to leading zeros in a
2729    PhoneNumber."""
2730    if len(national_number) > 1 and national_number[0] == U_ZERO:
2731        numobj.italian_leading_zero = True
2732        number_of_leading_zeros = 1
2733        # Note that if the number is all "0"s, the last "0" is not counted as
2734        # a leading zero.
2735        while (number_of_leading_zeros < len(national_number) - 1 and
2736               national_number[number_of_leading_zeros] == U_ZERO):
2737            number_of_leading_zeros += 1
2738        if number_of_leading_zeros != 1:
2739            numobj.number_of_leading_zeros = number_of_leading_zeros
2740
2741
2742def parse(number, region=None, keep_raw_input=False,
2743          numobj=None, _check_region=True):
2744    """Parse a string and return a corresponding PhoneNumber object.
2745
2746    The method is quite lenient and looks for a number in the input text
2747    (raw input) and does not check whether the string is definitely only a
2748    phone number. To do this, it ignores punctuation and white-space, as
2749    well as any text before the number (e.g. a leading "Tel: ") and trims
2750    the non-number bits.  It will accept a number in any format (E164,
2751    national, international etc), assuming it can be interpreted with the
2752    defaultRegion supplied. It also attempts to convert any alpha characters
2753    into digits if it thinks this is a vanity number of the type "1800
2754    MICROSOFT".
2755
2756    This method will throw a NumberParseException if the number is not
2757    considered to be a possible number. Note that validation of whether the
2758    number is actually a valid number for a particular region is not
2759    performed. This can be done separately with is_valid_number.
2760
2761    Note if any new field is added to this method that should always be filled
2762    in, even when keep_raw_input is False, it should also be handled in the
2763    _copy_core_fields_only() function.
2764
2765    Arguments:
2766    number -- The number that we are attempting to parse. This can
2767              contain formatting such as +, ( and -, as well as a phone
2768              number extension. It can also be provided in RFC3966 format.
2769    region -- The region that we are expecting the number to be from. This
2770              is only used if the number being parsed is not written in
2771              international format. The country_code for the number in
2772              this case would be stored as that of the default region
2773              supplied. If the number is guaranteed to start with a '+'
2774              followed by the country calling code, then None or
2775              UNKNOWN_REGION can be supplied.
2776    keep_raw_input -- Whether to populate the raw_input field of the
2777              PhoneNumber object with number (as well as the
2778              country_code_source field).
2779    numobj -- An optional existing PhoneNumber object to receive the
2780              parsing results
2781    _check_region -- Whether to check the supplied region parameter;
2782              should always be True for external callers.
2783
2784    Returns a PhoneNumber object filled with the parse number.
2785
2786    Raises:
2787    NumberParseException if the string is not considered to be a viable
2788    phone number (e.g.  too few or too many digits) or if no default
2789    region was supplied and the number is not in international format
2790    (does not start with +).
2791
2792    """
2793    if numobj is None:
2794        numobj = PhoneNumber()
2795    if number is None:
2796        raise NumberParseException(NumberParseException.NOT_A_NUMBER,
2797                                   "The phone number supplied was None.")
2798    elif len(number) > _MAX_INPUT_STRING_LENGTH:
2799        raise NumberParseException(NumberParseException.TOO_LONG,
2800                                   "The string supplied was too long to parse.")
2801
2802    national_number = _build_national_number_for_parsing(number)
2803
2804    if not _is_viable_phone_number(national_number):
2805        raise NumberParseException(NumberParseException.NOT_A_NUMBER,
2806                                   "The string supplied did not seem to be a phone number.")
2807
2808    # Check the region supplied is valid, or that the extracted number starts
2809    # with some sort of + sign so the number's region can be determined.
2810    if _check_region and not _check_region_for_parsing(national_number, region):
2811        raise NumberParseException(NumberParseException.INVALID_COUNTRY_CODE,
2812                                   "Missing or invalid default region.")
2813    if keep_raw_input:
2814        numobj.raw_input = number
2815
2816    # Attempt to parse extension first, since it doesn't require
2817    # region-specific data and we want to have the non-normalised number here.
2818    extension, national_number = _maybe_strip_extension(national_number)
2819    if len(extension) > 0:
2820        numobj.extension = extension
2821    if region is None:
2822        metadata = None
2823    else:
2824        metadata = PhoneMetadata.metadata_for_region(region.upper(), None)
2825
2826    country_code = 0
2827    try:
2828        country_code, normalized_national_number = _maybe_extract_country_code(national_number,
2829                                                                               metadata,
2830                                                                               keep_raw_input,
2831                                                                               numobj)
2832    except NumberParseException:
2833        _, e, _ = sys.exc_info()
2834        matchobj = _PLUS_CHARS_PATTERN.match(national_number)
2835        if (e.error_type == NumberParseException.INVALID_COUNTRY_CODE and
2836            matchobj is not None):
2837            # Strip the plus-char, and try again.
2838            country_code, normalized_national_number = _maybe_extract_country_code(national_number[matchobj.end():],
2839                                                                                   metadata,
2840                                                                                   keep_raw_input,
2841                                                                                   numobj)
2842            if country_code == 0:
2843                raise NumberParseException(NumberParseException.INVALID_COUNTRY_CODE,
2844                                           "Could not interpret numbers after plus-sign.")
2845        else:
2846            raise
2847
2848    if country_code != 0:
2849        number_region = region_code_for_country_code(country_code)
2850        if number_region != region:
2851            # Metadata cannot be null because the country calling code is valid.
2852            metadata = PhoneMetadata.metadata_for_region_or_calling_code(country_code, number_region)
2853    else:
2854        # If no extracted country calling code, use the region supplied
2855        # instead. The national number is just the normalized version of the
2856        # number we were given to parse.
2857        national_number = _normalize(national_number)
2858        normalized_national_number += national_number
2859        if region is not None:
2860            country_code = metadata.country_code
2861            numobj.country_code = country_code
2862        elif keep_raw_input:
2863            numobj.country_code_source = None
2864
2865    if len(normalized_national_number) < _MIN_LENGTH_FOR_NSN:
2866        raise NumberParseException(NumberParseException.TOO_SHORT_NSN,
2867                                   "The string supplied is too short to be a phone number.")
2868    if metadata is not None:
2869        potential_national_number = normalized_national_number
2870        carrier_code, potential_national_number, _ = _maybe_strip_national_prefix_carrier_code(potential_national_number,
2871                                                                                               metadata)
2872        # We require that the NSN remaining after stripping the national
2873        # prefix and carrier code be long enough to be a possible length for
2874        # the region. Otherwise, we don't do the stripping, since the original
2875        # number could be a valid short number.
2876        if _test_number_length(potential_national_number, metadata) != ValidationResult.TOO_SHORT:
2877            normalized_national_number = potential_national_number
2878            if keep_raw_input and carrier_code is not None and len(carrier_code) > 0:
2879                numobj.preferred_domestic_carrier_code = carrier_code
2880    len_national_number = len(normalized_national_number)
2881    if len_national_number < _MIN_LENGTH_FOR_NSN:  # pragma no cover
2882        # Check of _is_viable_phone_number() at the top of this function makes
2883        # this effectively unhittable.
2884        raise NumberParseException(NumberParseException.TOO_SHORT_NSN,
2885                                   "The string supplied is too short to be a phone number.")
2886    if len_national_number > _MAX_LENGTH_FOR_NSN:
2887        raise NumberParseException(NumberParseException.TOO_LONG,
2888                                   "The string supplied is too long to be a phone number.")
2889    _set_italian_leading_zeros_for_phone_number(normalized_national_number, numobj)
2890    numobj.national_number = to_long(normalized_national_number)
2891    return numobj
2892
2893
2894def _build_national_number_for_parsing(number):
2895    """Converts number to a form that we can parse and return it if it is
2896    written in RFC3966; otherwise extract a possible number out of it and return it."""
2897    index_of_phone_context = number.find(_RFC3966_PHONE_CONTEXT)
2898    if index_of_phone_context > 0:
2899        phone_context_start = index_of_phone_context + len(_RFC3966_PHONE_CONTEXT)
2900        # If the phone context contains a phone number prefix, we need to
2901        # capture it, whereas domains will be ignored.
2902        if number[phone_context_start] == _PLUS_SIGN:
2903            # Additional parameters might follow the phone context. If so, we
2904            # will remove them here because the parameters after phone context
2905            # are not important for parsing the phone number.
2906            phone_context_end = number.find(U_SEMICOLON, phone_context_start)
2907            if phone_context_end > 0:
2908                national_number = number[phone_context_start:phone_context_end]
2909            else:
2910                national_number = number[phone_context_start:]
2911        else:
2912            national_number = U_EMPTY_STRING
2913        # Now append everything between the "tel:" prefix and the
2914        # phone-context. This should include the national number, an optional
2915        # extension or isdn-subaddress component. Note we also handle the case
2916        # when "tel:" is missing, as we have seen in some of the phone number
2917        # inputs.  In that case we append everything from the beginning.
2918        index_of_rfc3996_prefix = number.find(_RFC3966_PREFIX)
2919        index_of_national_number = ((index_of_rfc3996_prefix + len(_RFC3966_PREFIX))
2920                                    if (index_of_rfc3996_prefix >= 0) else 0)
2921        national_number += number[index_of_national_number:index_of_phone_context]
2922    else:
2923        # Extract a possible number from the string passed in (this strips leading characters that
2924        # could not be the start of a phone number.)
2925        national_number = _extract_possible_number(number)
2926
2927    # Delete the isdn-subaddress and everything after it if it is
2928    # present. Note extension won't appear at the same time with
2929    # isdn-subaddress according to paragraph 5.3 of the RFC3966 spec,
2930    index_of_isdn = national_number.find(_RFC3966_ISDN_SUBADDRESS)
2931    if index_of_isdn > 0:
2932        national_number = national_number[:index_of_isdn]
2933    # If both phone context and isdn-subaddress are absent but other
2934    # parameters are present, the parameters are left in national_number. This
2935    # is because we are concerned about deleting content from a potential
2936    # number string when there is no strong evidence that the number is
2937    # actually written in RFC3966.
2938    return national_number
2939
2940
2941def _copy_core_fields_only(inobj):
2942    """Returns a new phone number containing only the fields needed to uniquely
2943    identify a phone number, rather than any fields that capture the context in
2944    which the phone number was created.
2945    """
2946    numobj = PhoneNumber()
2947    numobj.country_code = inobj.country_code
2948    numobj.national_number = inobj.national_number
2949    if inobj.extension is not None and len(inobj.extension) > 0:
2950        numobj.extension = inobj.extension
2951    if inobj.italian_leading_zero:
2952        numobj.italian_leading_zero = True
2953        # This field is only relevant if there are leading zeros at all.
2954        numobj.number_of_leading_zeros = inobj.number_of_leading_zeros
2955        if numobj.number_of_leading_zeros is None:
2956            # No number set is implicitly a count of 1; make it explicit.
2957            numobj.number_of_leading_zeros = 1
2958    return numobj
2959
2960
2961def _is_number_match_OO(numobj1_in, numobj2_in):
2962    """Takes two phone number objects and compares them for equality."""
2963    # We only care about the fields that uniquely define a number, so we copy these across explicitly.
2964    numobj1 = _copy_core_fields_only(numobj1_in)
2965    numobj2 = _copy_core_fields_only(numobj2_in)
2966
2967    # Early exit if both had extensions and these are different.
2968    if (numobj1.extension is not None and
2969        numobj2.extension is not None and
2970        numobj1.extension != numobj2.extension):
2971        return MatchType.NO_MATCH
2972
2973    country_code1 = numobj1.country_code
2974    country_code2 = numobj2.country_code
2975    # Both had country_code specified.
2976    if country_code1 != 0 and country_code2 != 0:
2977        if numobj1 == numobj2:
2978            return MatchType.EXACT_MATCH
2979        elif (country_code1 == country_code2 and
2980              _is_national_number_suffix_of_other(numobj1, numobj2)):
2981            # A SHORT_NSN_MATCH occurs if there is a difference because of the
2982            # presence or absence of an 'Italian leading zero', the presence
2983            # or absence of an extension, or one NSN being a shorter variant
2984            # of the other.
2985            return MatchType.SHORT_NSN_MATCH
2986        # This is not a match.
2987        return MatchType.NO_MATCH
2988
2989    # Checks cases where one or both country_code fields were not
2990    # specified. To make equality checks easier, we first set the country_code
2991    # fields to be equal.
2992    numobj1.country_code = country_code2
2993    # If all else was the same, then this is an NSN_MATCH.
2994    if numobj1 == numobj2:
2995        return MatchType.NSN_MATCH
2996    if _is_national_number_suffix_of_other(numobj1, numobj2):
2997        return MatchType.SHORT_NSN_MATCH
2998    return MatchType.NO_MATCH
2999
3000
3001def _is_national_number_suffix_of_other(numobj1, numobj2):
3002    """Returns true when one national number is the suffix of the other or both
3003    are the same.
3004    """
3005    nn1 = str(numobj1.national_number)
3006    nn2 = str(numobj2.national_number)
3007    # Note that endswith returns True if the numbers are equal.
3008    return nn1.endswith(nn2) or nn2.endswith(nn1)
3009
3010
3011def _is_number_match_SS(number1, number2):
3012    """Takes two phone numbers as strings and compares them for equality.
3013
3014    This is a convenience wrapper for _is_number_match_OO/_is_number_match_OS.
3015    No default region is known.
3016    """
3017    try:
3018        numobj1 = parse(number1, UNKNOWN_REGION)
3019        return _is_number_match_OS(numobj1, number2)
3020    except NumberParseException:
3021        _, exc, _ = sys.exc_info()
3022        if exc.error_type == NumberParseException.INVALID_COUNTRY_CODE:
3023            try:
3024                numobj2 = parse(number2, UNKNOWN_REGION)
3025                return _is_number_match_OS(numobj2, number1)
3026            except NumberParseException:
3027                _, exc2, _ = sys.exc_info()
3028                if exc2.error_type == NumberParseException.INVALID_COUNTRY_CODE:
3029                    try:
3030                        numobj1 = parse(number1, None, keep_raw_input=False,
3031                                        _check_region=False, numobj=None)
3032                        numobj2 = parse(number2, None, keep_raw_input=False,
3033                                        _check_region=False, numobj=None)
3034                        return _is_number_match_OO(numobj1, numobj2)
3035                    except NumberParseException:
3036                        return MatchType.NOT_A_NUMBER
3037
3038    # One or more of the phone numbers we are trying to match is not a viable
3039    # phone number.
3040    return MatchType.NOT_A_NUMBER
3041
3042
3043def _is_number_match_OS(numobj1, number2):
3044    """Wrapper variant of _is_number_match_OO that copes with one
3045    PhoneNumber object and one string."""
3046    # First see if the second number has an implicit country calling code, by
3047    # attempting to parse it.
3048    try:
3049        numobj2 = parse(number2, UNKNOWN_REGION)
3050        return _is_number_match_OO(numobj1, numobj2)
3051    except NumberParseException:
3052        _, exc, _ = sys.exc_info()
3053        if exc.error_type == NumberParseException.INVALID_COUNTRY_CODE:
3054            # The second number has no country calling code. EXACT_MATCH is no
3055            # longer possible.  We parse it as if the region was the same as
3056            # that for the first number, and if EXACT_MATCH is returned, we
3057            # replace this with NSN_MATCH.
3058            region1 = region_code_for_country_code(numobj1.country_code)
3059            try:
3060                if region1 != UNKNOWN_REGION:
3061                    numobj2 = parse(number2, region1)
3062                    match = _is_number_match_OO(numobj1, numobj2)
3063                    if match == MatchType.EXACT_MATCH:
3064                        return MatchType.NSN_MATCH
3065                    else:
3066                        return match
3067                else:
3068                    # If the first number didn't have a valid country calling
3069                    # code, then we parse the second number without one as
3070                    # well.
3071                    numobj2 = parse(number2, None, keep_raw_input=False,
3072                                    _check_region=False, numobj=None)
3073                    return _is_number_match_OO(numobj1, numobj2)
3074            except NumberParseException:
3075                return MatchType.NOT_A_NUMBER
3076    # One or more of the phone numbers we are trying to match is not a viable
3077    # phone number.
3078    return MatchType.NOT_A_NUMBER
3079
3080
3081def is_number_match(num1, num2):
3082    """Takes two phone numbers and compares them for equality.
3083
3084    For example, the numbers +1 345 657 1234 and 657 1234 are a SHORT_NSN_MATCH.
3085    The numbers +1 345 657 1234 and 345 657 are a NO_MATCH.
3086
3087    Arguments
3088    num1 -- First number object or string to compare. Can contain formatting,
3089              and can have country calling code specified with + at the start.
3090    num2 -- Second number object or string to compare. Can contain formatting,
3091              and can have country calling code specified with + at the start.
3092
3093    Returns:
3094     - EXACT_MATCH if the country_code, NSN, presence of a leading zero for
3095       Italian numbers and any extension present are the same.
3096     - NSN_MATCH if either or both has no region specified, and the NSNs and
3097       extensions are the same.
3098     - SHORT_NSN_MATCH if either or both has no region specified, or the
3099       region specified is the same, and one NSN could be a shorter version of
3100       the other number. This includes the case where one has an extension
3101       specified, and the other does not.
3102     - NO_MATCH otherwise.
3103     """
3104    if isinstance(num1, PhoneNumber) and isinstance(num2, PhoneNumber):
3105        return _is_number_match_OO(num1, num2)
3106    elif isinstance(num1, PhoneNumber):
3107        return _is_number_match_OS(num1, num2)
3108    elif isinstance(num2, PhoneNumber):
3109        return _is_number_match_OS(num2, num1)
3110    else:
3111        return _is_number_match_SS(num1, num2)
3112
3113
3114def _can_be_internationally_dialled(numobj):
3115    """Returns True if the number can only be dialled from outside the region,
3116    or unknown.
3117
3118    If the number can only be dialled from within the region
3119    as well, returns False. Does not check the number is a valid number.
3120
3121    TODO: Make this method public when we have enough metadata to make it
3122    worthwhile.
3123
3124    Arguments:
3125    numobj -- the phone number objectfor which we want to know whether it is
3126              diallable from outside the region.
3127    """
3128    metadata = PhoneMetadata.metadata_for_region(region_code_for_number(numobj), None)
3129    if metadata is None:
3130        # Note numbers belonging to non-geographical entities (e.g. +800
3131        # numbers) are always internationally diallable, and will be caught
3132        # here.
3133        return True
3134    nsn = national_significant_number(numobj)
3135    return not _is_number_matching_desc(nsn, metadata.no_international_dialling)
3136
3137
3138def is_mobile_number_portable_region(region_code):
3139    """Returns true if the supplied region supports mobile number portability.
3140    Returns false for invalid, unknown or regions that don't support mobile
3141    number portability.
3142
3143    Arguments:
3144    region_code -- the region for which we want to know whether it supports mobile number
3145                   portability or not.
3146    """
3147    metadata = PhoneMetadata.metadata_for_region(region_code, None)
3148    if metadata is None:
3149        return False
3150    return metadata.mobile_number_portable_region
3151
3152
3153class NumberParseException(UnicodeMixin, Exception):
3154    """Exception when attempting to parse a putative phone number"""
3155
3156    # The reason a string could not be interpreted as a phone number.
3157
3158    # The country code supplied did not belong to a supported country or
3159    # non-geographical entity.
3160    INVALID_COUNTRY_CODE = 0
3161
3162    # This generally indicates the string passed in had fewer than 3 digits in
3163    # it.  The number failed to match the regular expression
3164    # _VALID_PHONE_NUMBER in phonenumberutil.py.
3165    NOT_A_NUMBER = 1
3166
3167    # This indicates the string started with an international dialing prefix,
3168    # but after this was removed, it had fewer digits than any valid phone
3169    # number (including country code) could have.
3170    TOO_SHORT_AFTER_IDD = 2
3171
3172    # This indicates the string, after any country code has been stripped,
3173    # had fewer digits than any valid phone number could have.
3174    TOO_SHORT_NSN = 3
3175
3176    # This indicates the string had more digits than any valid phone number
3177    # could have
3178    TOO_LONG = 4
3179
3180    def __init__(self, error_type, msg):
3181        Exception.__init__(self, msg)
3182        self.error_type = error_type
3183        self._msg = msg
3184
3185    def __unicode__(self):
3186        return unicod("(%s) %s") % (self.error_type, self._msg)
3187