1# Copyright 2016-present MongoDB, Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""An implementation of RFC4013 SASLprep."""
16
17from bson.py3compat import text_type as _text_type
18
19try:
20    import stringprep
21except ImportError:
22    HAVE_STRINGPREP = False
23    def saslprep(data):
24        """SASLprep dummy"""
25        if isinstance(data, _text_type):
26            raise TypeError(
27                "The stringprep module is not available. Usernames and "
28                "passwords must be ASCII strings.")
29        return data
30else:
31    HAVE_STRINGPREP = True
32    import unicodedata
33    # RFC4013 section 2.3 prohibited output.
34    _PROHIBITED = (
35        # A strict reading of RFC 4013 requires table c12 here, but
36        # characters from it are mapped to SPACE in the Map step. Can
37        # normalization reintroduce them somehow?
38        stringprep.in_table_c12,
39        stringprep.in_table_c21_c22,
40        stringprep.in_table_c3,
41        stringprep.in_table_c4,
42        stringprep.in_table_c5,
43        stringprep.in_table_c6,
44        stringprep.in_table_c7,
45        stringprep.in_table_c8,
46        stringprep.in_table_c9)
47
48    def saslprep(data, prohibit_unassigned_code_points=True):
49        """An implementation of RFC4013 SASLprep.
50
51        :Parameters:
52          - `data`: The string to SASLprep. Unicode strings
53            (python 2.x unicode, 3.x str) are supported. Byte strings
54            (python 2.x str, 3.x bytes) are ignored.
55          - `prohibit_unassigned_code_points`: True / False. RFC 3454
56            and RFCs for various SASL mechanisms distinguish between
57            `queries` (unassigned code points allowed) and
58            `stored strings` (unassigned code points prohibited). Defaults
59            to ``True`` (unassigned code points are prohibited).
60
61        :Returns:
62        The SASLprep'ed version of `data`.
63        """
64        if not isinstance(data, _text_type):
65            return data
66
67        if prohibit_unassigned_code_points:
68            prohibited = _PROHIBITED + (stringprep.in_table_a1,)
69        else:
70            prohibited = _PROHIBITED
71
72        # RFC3454 section 2, step 1 - Map
73        # RFC4013 section 2.1 mappings
74        # Map Non-ASCII space characters to SPACE (U+0020). Map
75        # commonly mapped to nothing characters to, well, nothing.
76        in_table_c12 = stringprep.in_table_c12
77        in_table_b1 = stringprep.in_table_b1
78        data = u"".join(
79            [u"\u0020" if in_table_c12(elt) else elt
80             for elt in data if not in_table_b1(elt)])
81
82        # RFC3454 section 2, step 2 - Normalize
83        # RFC4013 section 2.2 normalization
84        data = unicodedata.ucd_3_2_0.normalize('NFKC', data)
85
86        in_table_d1 = stringprep.in_table_d1
87        if in_table_d1(data[0]):
88            if not in_table_d1(data[-1]):
89                # RFC3454, Section 6, #3. If a string contains any
90                # RandALCat character, the first and last characters
91                # MUST be RandALCat characters.
92                raise ValueError("SASLprep: failed bidirectional check")
93            # RFC3454, Section 6, #2. If a string contains any RandALCat
94            # character, it MUST NOT contain any LCat character.
95            prohibited = prohibited + (stringprep.in_table_d2,)
96        else:
97            # RFC3454, Section 6, #3. Following the logic of #3, if
98            # the first character is not a RandALCat, no other character
99            # can be either.
100            prohibited = prohibited + (in_table_d1,)
101
102        # RFC3454 section 2, step 3 and 4 - Prohibit and check bidi
103        for char in data:
104            if any(in_table(char) for in_table in prohibited):
105                raise ValueError(
106                    "SASLprep: failed prohibited character check")
107
108        return data
109