1# isbn.py - functions for handling ISBNs
2#
3# Copyright (C) 2010-2017 Arthur de Jong
4#
5# This library is free software; you can redistribute it and/or
6# modify it under the terms of the GNU Lesser General Public
7# License as published by the Free Software Foundation; either
8# version 2.1 of the License, or (at your option) any later version.
9#
10# This library is distributed in the hope that it will be useful,
11# but WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13# Lesser General Public License for more details.
14#
15# You should have received a copy of the GNU Lesser General Public
16# License along with this library; if not, write to the Free Software
17# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18# 02110-1301 USA
19
20"""ISBN (International Standard Book Number).
21
22The ISBN is the International Standard Book Number, used to identify
23publications. An ISBN is used to identify books. Numbers can either have 10
24digits (in ISBN-10 format) or 13 digits (in ISBN-13, EAN compatible format).
25An ISBN has the following components:
26
27* 3-digit (only in ISBN-13) Bookland code
28* 1 to 5-digit group identifier (identifies country or language)
29* 1 to 7-digit publisher code
30* 1 to 8-digit item number (identifies the book)
31* a check digit
32
33More information:
34
35* https://en.wikipedia.org/wiki/International_Standard_Book_Number
36* https://www.isbn-international.org/range_file_generation
37
38This module also offers functions for converting to ISBN-13 and formatting
39based on how the number should be split into a bookland code, group
40identifier, publisher code, item number and check digit.
41
42>>> validate('978-9024538270')
43'9789024538270'
44>>> validate('978-9024538271')
45Traceback (most recent call last):
46    ...
47InvalidChecksum: ...
48>>> compact('1-85798-218-5')
49'1857982185'
50>>> format('9780471117094')
51'978-0-471-11709-4'
52>>> format('1857982185')
53'1-85798-218-5'
54>>> isbn_type('1-85798-218-5')
55'ISBN10'
56>>> isbn_type('978-0-471-11709-4')
57'ISBN13'
58>>> to_isbn13('1-85798-218-5')
59'978-1-85798-218-3'
60>>> to_isbn10('978-1-85798-218-3')
61'1-85798-218-5'
62"""
63
64from stdnum import ean
65from stdnum.exceptions import *
66from stdnum.util import clean, isdigits
67
68
69def compact(number, convert=False):
70    """Convert the ISBN to the minimal representation. This strips the number
71    of any valid ISBN separators and removes surrounding whitespace. If the
72    covert parameter is True the number is also converted to ISBN-13
73    format."""
74    number = clean(number, ' -').strip().upper()
75    if len(number) == 9:
76        number = '0' + number
77    if convert:
78        return to_isbn13(number)
79    return number
80
81
82def _calc_isbn10_check_digit(number):
83    """Calculate the ISBN check digit for 10-digit numbers. The number passed
84    should not have the check bit included."""
85    check = sum((i + 1) * int(n)
86                for i, n in enumerate(number)) % 11
87    return 'X' if check == 10 else str(check)
88
89
90def validate(number, convert=False):
91    """Check if the number provided is a valid ISBN (either a legacy 10-digit
92    one or a 13-digit one). This checks the length and the check bit but does
93    not check if the group and publisher are valid (use split() for that)."""
94    number = compact(number, convert=False)
95    if not isdigits(number[:-1]):
96        raise InvalidFormat()
97    if len(number) == 10:
98        if _calc_isbn10_check_digit(number[:-1]) != number[-1]:
99            raise InvalidChecksum()
100    elif len(number) == 13:
101        ean.validate(number)
102        if number[:3] not in ('978', '979'):
103            raise InvalidComponent()
104    else:
105        raise InvalidLength()
106    if convert:
107        number = to_isbn13(number)
108    return number
109
110
111def isbn_type(number):
112    """Check the passed number and return 'ISBN13', 'ISBN10' or None (for
113    invalid) for checking the type of number passed."""
114    try:
115        number = validate(number, convert=False)
116    except ValidationError:
117        return None
118    if len(number) == 10:
119        return 'ISBN10'
120    else:  # len(number) == 13:
121        return 'ISBN13'
122
123
124def is_valid(number):
125    """Check if the number provided is a valid ISBN (either a legacy 10-digit
126    one or a 13-digit one). This checks the length and the check bit but does
127    not check if the group and publisher are valid (use split() for that)."""
128    try:
129        return bool(validate(number))
130    except ValidationError:
131        return False
132
133
134def to_isbn13(number):
135    """Convert the number to ISBN-13 format."""
136    number = number.strip()
137    min_number = clean(number, ' -')
138    if len(min_number) == 13:
139        return number  # nothing to do, already ISBN-13
140    if len(min_number) == 9:
141        number = '0' + number  # convert from 9 to 10 digits
142    # put new check digit in place
143    number = number[:-1] + ean.calc_check_digit('978' + min_number[:-1])
144    # add prefix
145    if ' ' in number:
146        return '978 ' + number
147    elif '-' in number:
148        return '978-' + number
149    else:
150        return '978' + number
151
152
153def to_isbn10(number):
154    """Convert the number to ISBN-10 format."""
155    number = number.strip()
156    min_number = compact(number, convert=False)
157    if len(min_number) == 10:
158        return number  # nothing to do, already ISBN-10
159    elif isbn_type(min_number) != 'ISBN13':
160        raise InvalidFormat('Not a valid ISBN13.')
161    elif not number.startswith('978'):
162        raise InvalidComponent('Does not use 978 Bookland prefix.')
163    # strip EAN prefix
164    number = number[3:-1].strip().strip('-')
165    digit = _calc_isbn10_check_digit(min_number[3:-1])
166    # append the new check digit
167    if ' ' in number:
168        return number + ' ' + digit
169    elif '-' in number:
170        return number + '-' + digit
171    else:
172        return number + digit
173
174
175def split(number, convert=False):
176    """Split the specified ISBN into an EAN.UCC prefix, a group prefix, a
177    registrant, an item number and a check-digit. If the number is in ISBN-10
178    format the returned EAN.UCC prefix is '978'. If the covert parameter is
179    True the number is converted to ISBN-13 format first."""
180    from stdnum import numdb
181    # clean up number
182    number = compact(number, convert)
183    # get Bookland prefix if any
184    delprefix = False
185    if len(number) == 10:
186        number = '978' + number
187        delprefix = True
188    # split the number
189    result = numdb.get('isbn').split(number[:-1])
190    itemnr = result.pop() if result else ''
191    prefix = result.pop(0) if result else ''
192    group = result.pop(0) if result else ''
193    publisher = result.pop(0) if result else ''
194    # return results
195    return ('' if delprefix else prefix, group, publisher, itemnr, number[-1])
196
197
198def format(number, separator='-', convert=False):
199    """Reformat the number to the standard presentation format with the
200    EAN.UCC prefix (if any), the group prefix, the registrant, the item
201    number and the check-digit separated (if possible) by the specified
202    separator. Passing an empty separator should equal compact() though this
203    is less efficient. If the covert parameter is True the number is
204    converted to ISBN-13 format first."""
205    return separator.join(x for x in split(number, convert) if x)
206