1# isin.py - functions for handling ISIN numbers
2#
3# Copyright (C) 2015-2017 Arthur de Jong
4#
5# This library is free software; you can redistribute it and/or
6# modify it under the terms of the GNU Lesser General Public
7# License as published by the Free Software Foundation; either
8# version 2.1 of the License, or (at your option) any later version.
9#
10# This library is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13# Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU Lesser General Public
16# License along with this library; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18# 02110-1301 USA
19
20"""ISIN (International Securities Identification Number).
21
22The ISIN is a 12-character alpha-numerical code specified in ISO 6166 used to
23identify exchange listed securities such as bonds, commercial paper, stocks
24and warrants. The number is formed of a two-letter country code, a nine
25character national security identifier and a single check digit.
26
27This module does not currently separately validate the embedded national
28security identifier part (e.g. when it is a CUSIP).
29
30More information:
31
32* https://en.wikipedia.org/wiki/International_Securities_Identification_Number
33
34>>> validate('US0378331005')
35'US0378331005'
36>>> validate('US0378331003')
37Traceback (most recent call last):
38    ...
39InvalidChecksum: ...
40>>> from_natid('gb', 'BYXJL75')
41'GB00BYXJL758'
42"""
43
44from stdnum.exceptions import *
45from stdnum.util import clean
46
47
48# all valid ISO 3166-1 alpha-2 country codes
49_iso_3116_1_country_codes = [
50    'AD', 'AE', 'AF', 'AG', 'AI', 'AL', 'AM', 'AN', 'AO', 'AQ', 'AR', 'AS',
51    'AT', 'AU', 'AW', 'AX', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG', 'BH',
52    'BI', 'BJ', 'BL', 'BM', 'BN', 'BO', 'BQ', 'BR', 'BS', 'BT', 'BV', 'BW',
53    'BY', 'BZ', 'CA', 'CC', 'CD', 'CF', 'CG', 'CH', 'CI', 'CK', 'CL', 'CM',
54    'CN', 'CO', 'CR', 'CS', 'CU', 'CV', 'CW', 'CX', 'CY', 'CZ', 'DE', 'DJ',
55    'DK', 'DM', 'DO', 'DZ', 'EC', 'EE', 'EG', 'EH', 'ER', 'ES', 'ET', 'FI',
56    'FJ', 'FK', 'FM', 'FO', 'FR', 'GA', 'GB', 'GD', 'GE', 'GF', 'GG', 'GH',
57    'GI', 'GL', 'GM', 'GN', 'GP', 'GQ', 'GR', 'GS', 'GT', 'GU', 'GW', 'GY',
58    'HK', 'HM', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IM', 'IN', 'IO',
59    'IQ', 'IR', 'IS', 'IT', 'JE', 'JM', 'JO', 'JP', 'KE', 'KG', 'KH', 'KI',
60    'KM', 'KN', 'KP', 'KR', 'KW', 'KY', 'KZ', 'LA', 'LB', 'LC', 'LI', 'LK',
61    'LR', 'LS', 'LT', 'LU', 'LV', 'LY', 'MA', 'MC', 'MD', 'ME', 'MF', 'MG',
62    'MH', 'MK', 'ML', 'MM', 'MN', 'MO', 'MP', 'MQ', 'MR', 'MS', 'MT', 'MU',
63    'MV', 'MW', 'MX', 'MY', 'MZ', 'NA', 'NC', 'NE', 'NF', 'NG', 'NI', 'NL',
64    'NO', 'NP', 'NR', 'NU', 'NZ', 'OM', 'PA', 'PE', 'PF', 'PG', 'PH', 'PK',
65    'PL', 'PM', 'PN', 'PR', 'PS', 'PT', 'PW', 'PY', 'QA', 'RE', 'RO', 'RS',
66    'RU', 'RW', 'SA', 'SB', 'SC', 'SD', 'SE', 'SG', 'SH', 'SI', 'SJ', 'SK',
67    'SL', 'SM', 'SN', 'SO', 'SR', 'SS', 'ST', 'SV', 'SX', 'SY', 'SZ', 'TC',
68    'TD', 'TF', 'TG', 'TH', 'TJ', 'TK', 'TL', 'TM', 'TN', 'TO', 'TR', 'TT',
69    'TV', 'TW', 'TZ', 'UA', 'UG', 'UM', 'US', 'UY', 'UZ', 'VA', 'VC', 'VE',
70    'VG', 'VI', 'VN', 'VU', 'WF', 'WS', 'YE', 'YT', 'ZA', 'ZM', 'ZW']
71
72# These special code are allowed for ISIN
73_country_codes = set(_iso_3116_1_country_codes + [
74    'EU',  # European Union
75    'QS',  # internally used by Euroclear France
76    'QS',  # temporarily assigned in Germany
77    'QT',  # internally used in Switzerland
78    'XA',  # CUSIP Global Services substitute agencies
79    'XB',  # NSD Russia substitute agencies
80    'XC',  # WM Datenservice Germany substitute agencies
81    'XD',  # SIX Telekurs substitute agencies
82    'XF',  # internally assigned, not unique numbers
83    'XK',  # temporary country code for Kosovo
84    'XS',  # international securities
85])
86
87# the letters allowed in an ISIN
88_alphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
89
90
91def compact(number):
92    """Convert the number to the minimal representation. This strips the
93    number of any valid separators and removes surrounding whitespace."""
94    return clean(number, ' ').strip().upper()
95
96
97def calc_check_digit(number):
98    """Calculate the check digits for the number."""
99    # convert to numeric first, then double some, then sum individual digits
100    number = ''.join(str(_alphabet.index(n)) for n in number)
101    number = ''.join(
102        str((2, 1)[i % 2] * int(n)) for i, n in enumerate(reversed(number)))
103    return str((10 - sum(int(n) for n in number)) % 10)
104
105
106def validate(number):
107    """Check if the number provided is valid. This checks the length and
108    check digit."""
109    number = compact(number)
110    if not all(x in _alphabet for x in number):
111        raise InvalidFormat()
112    if len(number) != 12:
113        raise InvalidLength()
114    if number[:2] not in _country_codes:
115        raise InvalidComponent()
116    if calc_check_digit(number[:-1]) != number[-1]:
117        raise InvalidChecksum()
118    return number
119
120
121def is_valid(number):
122    """Check if the number provided is valid. This checks the length and
123    check digit."""
124    try:
125        return bool(validate(number))
126    except ValidationError:
127        return False
128
129
130def from_natid(country_code, number):
131    """Generate an ISIN from a national security identifier."""
132    number = compact(number)
133    number = country_code.upper() + (9 - len(number)) * '0' + number
134    return number + calc_check_digit(number)
135