1#!/usr/local/bin/python2.7
2
3"""RFC 3548: Base16, Base32, Base64 Data Encodings"""
4
5# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
7
8import re
9import struct
10import string
11import binascii
12
13
14__all__ = [
15    # Legacy interface exports traditional RFC 1521 Base64 encodings
16    'encode', 'decode', 'encodestring', 'decodestring',
17    # Generalized interface for other encodings
18    'b64encode', 'b64decode', 'b32encode', 'b32decode',
19    'b16encode', 'b16decode',
20    # Standard Base64 encoding
21    'standard_b64encode', 'standard_b64decode',
22    # Some common Base64 alternatives.  As referenced by RFC 3458, see thread
23    # starting at:
24    #
25    # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
26    'urlsafe_b64encode', 'urlsafe_b64decode',
27    ]
28
29_translation = [chr(_x) for _x in range(256)]
30EMPTYSTRING = ''
31
32
33def _translate(s, altchars):
34    translation = _translation[:]
35    for k, v in altchars.items():
36        translation[ord(k)] = v
37    return s.translate(''.join(translation))
38
39
40
41# Base64 encoding/decoding uses binascii
42
43def b64encode(s, altchars=None):
44    """Encode a string using Base64.
45
46    s is the string to encode.  Optional altchars must be a string of at least
47    length 2 (additional characters are ignored) which specifies an
48    alternative alphabet for the '+' and '/' characters.  This allows an
49    application to e.g. generate url or filesystem safe Base64 strings.
50
51    The encoded string is returned.
52    """
53    # Strip off the trailing newline
54    encoded = binascii.b2a_base64(s)[:-1]
55    if altchars is not None:
56        return encoded.translate(string.maketrans(b'+/', altchars[:2]))
57    return encoded
58
59
60def b64decode(s, altchars=None):
61    """Decode a Base64 encoded string.
62
63    s is the string to decode.  Optional altchars must be a string of at least
64    length 2 (additional characters are ignored) which specifies the
65    alternative alphabet used instead of the '+' and '/' characters.
66
67    The decoded string is returned.  A TypeError is raised if s is
68    incorrectly padded.  Characters that are neither in the normal base-64
69    alphabet nor the alternative alphabet are discarded prior to the padding
70    check.
71    """
72    if altchars is not None:
73        s = s.translate(string.maketrans(altchars[:2], '+/'))
74    try:
75        return binascii.a2b_base64(s)
76    except binascii.Error, msg:
77        # Transform this exception for consistency
78        raise TypeError(msg)
79
80
81def standard_b64encode(s):
82    """Encode a string using the standard Base64 alphabet.
83
84    s is the string to encode.  The encoded string is returned.
85    """
86    return b64encode(s)
87
88def standard_b64decode(s):
89    """Decode a string encoded with the standard Base64 alphabet.
90
91    Argument s is the string to decode.  The decoded string is returned.  A
92    TypeError is raised if the string is incorrectly padded.  Characters that
93    are not in the standard alphabet are discarded prior to the padding
94    check.
95    """
96    return b64decode(s)
97
98_urlsafe_encode_translation = string.maketrans(b'+/', b'-_')
99_urlsafe_decode_translation = string.maketrans(b'-_', b'+/')
100
101def urlsafe_b64encode(s):
102    """Encode a string using the URL- and filesystem-safe Base64 alphabet.
103
104    Argument s is the string to encode.  The encoded string is returned.  The
105    alphabet uses '-' instead of '+' and '_' instead of '/'.
106    """
107    return b64encode(s).translate(_urlsafe_encode_translation)
108
109def urlsafe_b64decode(s):
110    """Decode a string using the URL- and filesystem-safe Base64 alphabet.
111
112    Argument s is the string to decode.  The decoded string is returned.  A
113    TypeError is raised if the string is incorrectly padded.  Characters that
114    are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash
115    '/', are discarded prior to the padding check.
116
117    The alphabet uses '-' instead of '+' and '_' instead of '/'.
118    """
119    return b64decode(s.translate(_urlsafe_decode_translation))
120
121
122
123# Base32 encoding/decoding must be done in Python
124_b32alphabet = {
125    0: 'A',  9: 'J', 18: 'S', 27: '3',
126    1: 'B', 10: 'K', 19: 'T', 28: '4',
127    2: 'C', 11: 'L', 20: 'U', 29: '5',
128    3: 'D', 12: 'M', 21: 'V', 30: '6',
129    4: 'E', 13: 'N', 22: 'W', 31: '7',
130    5: 'F', 14: 'O', 23: 'X',
131    6: 'G', 15: 'P', 24: 'Y',
132    7: 'H', 16: 'Q', 25: 'Z',
133    8: 'I', 17: 'R', 26: '2',
134    }
135
136_b32tab = _b32alphabet.items()
137_b32tab.sort()
138_b32tab = [v for k, v in _b32tab]
139_b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()])
140
141
142def b32encode(s):
143    """Encode a string using Base32.
144
145    s is the string to encode.  The encoded string is returned.
146    """
147    parts = []
148    quanta, leftover = divmod(len(s), 5)
149    # Pad the last quantum with zero bits if necessary
150    if leftover:
151        s += ('\0' * (5 - leftover))
152        quanta += 1
153    for i in range(quanta):
154        # c1 and c2 are 16 bits wide, c3 is 8 bits wide.  The intent of this
155        # code is to process the 40 bits in units of 5 bits.  So we take the 1
156        # leftover bit of c1 and tack it onto c2.  Then we take the 2 leftover
157        # bits of c2 and tack them onto c3.  The shifts and masks are intended
158        # to give us values of exactly 5 bits in width.
159        c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5])
160        c2 += (c1 & 1) << 16 # 17 bits wide
161        c3 += (c2 & 3) << 8  # 10 bits wide
162        parts.extend([_b32tab[c1 >> 11],         # bits 1 - 5
163                      _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10
164                      _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15
165                      _b32tab[c2 >> 12],         # bits 16 - 20 (1 - 5)
166                      _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10)
167                      _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15)
168                      _b32tab[c3 >> 5],          # bits 31 - 35 (1 - 5)
169                      _b32tab[c3 & 0x1f],        # bits 36 - 40 (1 - 5)
170                      ])
171    encoded = EMPTYSTRING.join(parts)
172    # Adjust for any leftover partial quanta
173    if leftover == 1:
174        return encoded[:-6] + '======'
175    elif leftover == 2:
176        return encoded[:-4] + '===='
177    elif leftover == 3:
178        return encoded[:-3] + '==='
179    elif leftover == 4:
180        return encoded[:-1] + '='
181    return encoded
182
183
184def b32decode(s, casefold=False, map01=None):
185    """Decode a Base32 encoded string.
186
187    s is the string to decode.  Optional casefold is a flag specifying whether
188    a lowercase alphabet is acceptable as input.  For security purposes, the
189    default is False.
190
191    RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O
192    (oh), and for optional mapping of the digit 1 (one) to either the letter I
193    (eye) or letter L (el).  The optional argument map01 when not None,
194    specifies which letter the digit 1 should be mapped to (when map01 is not
195    None, the digit 0 is always mapped to the letter O).  For security
196    purposes the default is None, so that 0 and 1 are not allowed in the
197    input.
198
199    The decoded string is returned.  A TypeError is raised if s were
200    incorrectly padded or if there are non-alphabet characters present in the
201    string.
202    """
203    quanta, leftover = divmod(len(s), 8)
204    if leftover:
205        raise TypeError('Incorrect padding')
206    # Handle section 2.4 zero and one mapping.  The flag map01 will be either
207    # False, or the character to map the digit 1 (one) to.  It should be
208    # either L (el) or I (eye).
209    if map01:
210        s = s.translate(string.maketrans(b'01', b'O' + map01))
211    if casefold:
212        s = s.upper()
213    # Strip off pad characters from the right.  We need to count the pad
214    # characters because this will tell us how many null bytes to remove from
215    # the end of the decoded string.
216    padchars = 0
217    mo = re.search('(?P<pad>[=]*)$', s)
218    if mo:
219        padchars = len(mo.group('pad'))
220        if padchars > 0:
221            s = s[:-padchars]
222    # Now decode the full quanta
223    parts = []
224    acc = 0
225    shift = 35
226    for c in s:
227        val = _b32rev.get(c)
228        if val is None:
229            raise TypeError('Non-base32 digit found')
230        acc += _b32rev[c] << shift
231        shift -= 5
232        if shift < 0:
233            parts.append(binascii.unhexlify('%010x' % acc))
234            acc = 0
235            shift = 35
236    # Process the last, partial quanta
237    last = binascii.unhexlify('%010x' % acc)
238    if padchars == 0:
239        last = ''                       # No characters
240    elif padchars == 1:
241        last = last[:-1]
242    elif padchars == 3:
243        last = last[:-2]
244    elif padchars == 4:
245        last = last[:-3]
246    elif padchars == 6:
247        last = last[:-4]
248    else:
249        raise TypeError('Incorrect padding')
250    parts.append(last)
251    return EMPTYSTRING.join(parts)
252
253
254
255# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
256# lowercase.  The RFC also recommends against accepting input case
257# insensitively.
258def b16encode(s):
259    """Encode a string using Base16.
260
261    s is the string to encode.  The encoded string is returned.
262    """
263    return binascii.hexlify(s).upper()
264
265
266def b16decode(s, casefold=False):
267    """Decode a Base16 encoded string.
268
269    s is the string to decode.  Optional casefold is a flag specifying whether
270    a lowercase alphabet is acceptable as input.  For security purposes, the
271    default is False.
272
273    The decoded string is returned.  A TypeError is raised if s is
274    incorrectly padded or if there are non-alphabet characters present in the
275    string.
276    """
277    if casefold:
278        s = s.upper()
279    if re.search('[^0-9A-F]', s):
280        raise TypeError('Non-base16 digit found')
281    return binascii.unhexlify(s)
282
283
284
285# Legacy interface.  This code could be cleaned up since I don't believe
286# binascii has any line length limitations.  It just doesn't seem worth it
287# though.
288
289MAXLINESIZE = 76 # Excluding the CRLF
290MAXBINSIZE = (MAXLINESIZE//4)*3
291
292def encode(input, output):
293    """Encode a file."""
294    while True:
295        s = input.read(MAXBINSIZE)
296        if not s:
297            break
298        while len(s) < MAXBINSIZE:
299            ns = input.read(MAXBINSIZE-len(s))
300            if not ns:
301                break
302            s += ns
303        line = binascii.b2a_base64(s)
304        output.write(line)
305
306
307def decode(input, output):
308    """Decode a file."""
309    while True:
310        line = input.readline()
311        if not line:
312            break
313        s = binascii.a2b_base64(line)
314        output.write(s)
315
316
317def encodestring(s):
318    """Encode a string into multiple lines of base-64 data."""
319    pieces = []
320    for i in range(0, len(s), MAXBINSIZE):
321        chunk = s[i : i + MAXBINSIZE]
322        pieces.append(binascii.b2a_base64(chunk))
323    return "".join(pieces)
324
325
326def decodestring(s):
327    """Decode a string."""
328    return binascii.a2b_base64(s)
329
330
331
332# Useable as a script...
333def test():
334    """Small test program"""
335    import sys, getopt
336    try:
337        opts, args = getopt.getopt(sys.argv[1:], 'deut')
338    except getopt.error, msg:
339        sys.stdout = sys.stderr
340        print msg
341        print """usage: %s [-d|-e|-u|-t] [file|-]
342        -d, -u: decode
343        -e: encode (default)
344        -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]
345        sys.exit(2)
346    func = encode
347    for o, a in opts:
348        if o == '-e': func = encode
349        if o == '-d': func = decode
350        if o == '-u': func = decode
351        if o == '-t': test1(); return
352    if args and args[0] != '-':
353        with open(args[0], 'rb') as f:
354            func(f, sys.stdout)
355    else:
356        func(sys.stdin, sys.stdout)
357
358
359def test1():
360    s0 = "Aladdin:open sesame"
361    s1 = encodestring(s0)
362    s2 = decodestring(s1)
363    print s0, repr(s1), s2
364
365
366if __name__ == '__main__':
367    test()
368