1# hetu.py - functions for handling Finnish personal identity codes
2# coding: utf-8
3#
4# Copyright (C) 2011 Jussi Judin
5# Copyright (C) 2012, 2013 Arthur de Jong
6#
7# This library is free software; you can redistribute it and/or
8# modify it under the terms of the GNU Lesser General Public
9# License as published by the Free Software Foundation; either
10# version 2.1 of the License, or (at your option) any later version.
11#
12# This library is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15# Lesser General Public License for more details.
16#
17# You should have received a copy of the GNU Lesser General Public
18# License along with this library; if not, write to the Free Software
19# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20# 02110-1301 USA
21
22"""HETU (Henkilötunnus, Finnish personal identity code).
23
24Module for handling Finnish personal identity codes (HETU, Henkilötunnus).
25See http://www.vaestorekisterikeskus.fi/default.aspx?id=45 for checksum
26calculation details and http://tarkistusmerkit.teppovuori.fi/tarkmerk.htm#hetu1
27for historical details.
28
29>>> validate('131052-308T')
30'131052-308T'
31>>> validate('131052-308U')
32Traceback (most recent call last):
33    ...
34InvalidChecksum: ...
35>>> validate('310252-308Y')
36Traceback (most recent call last):
37    ...
38InvalidComponent: ...
39>>> compact('131052a308t')
40'131052A308T'
41"""
42
43import datetime
44import re
45
46from stdnum.exceptions import *
47from stdnum.util import clean
48
49
50_century_codes = {
51    '+': 1800,
52    '-': 1900,
53    'A': 2000,
54}
55
56# Finnish personal identity codes are composed of date part, century
57# indicating sign, individual number and control character.
58# ddmmyyciiiC
59_hetu_re = re.compile(r'^(?P<day>[0123]\d)(?P<month>[01]\d)(?P<year>\d\d)'
60                      r'(?P<century>[-+A])(?P<individual>\d\d\d)'
61                      r'(?P<control>[0-9ABCDEFHJKLMNPRSTUVWXY])$')
62
63
64def compact(number):
65    """Convert the HETU to the minimal representation. This strips
66    surrounding whitespace and converts it to upper case."""
67    return clean(number, '').upper().strip()
68
69
70def _calc_checksum(number):
71    return '0123456789ABCDEFHJKLMNPRSTUVWXY'[int(number) % 31]
72
73
74def validate(number):
75    """Check if the number is a valid HETU. It checks the format, whether a
76    valid date is given and whether the check digit is correct."""
77    number = compact(number)
78    match = _hetu_re.search(number)
79    if not match:
80        raise InvalidFormat()
81    day = int(match.group('day'))
82    month = int(match.group('month'))
83    year = int(match.group('year'))
84    century = _century_codes[match.group('century')]
85    individual = int(match.group('individual'))
86    # check if birth date is valid
87    try:
88        datetime.date(century + year, month, day)
89    except ValueError:
90        raise InvalidComponent()
91    # for historical reasons individual IDs start from 002
92    if individual < 2:
93        raise InvalidComponent()
94    # this range is for temporary identifiers
95    if 900 <= individual <= 999:
96        raise InvalidComponent()
97    checkable_number = '%02d%02d%02d%03d' % (day, month, year, individual)
98    if match.group('control') != _calc_checksum(checkable_number):
99        raise InvalidChecksum()
100    return number
101
102
103def is_valid(number):
104    """Check if the number is a valid HETU."""
105    try:
106        return bool(validate(number))
107    except ValidationError:
108        return False
109
110
111# This is here just for completeness as there are no different length forms
112# of Finnish personal identity codes:
113format = compact
114