1"""
2"""
3
4# Created on 2016.08.09
5#
6# Author: Giovanni Cannata
7#
8# Copyright 2016 - 2020 Giovanni Cannata
9#
10# This file is part of ldap3.
11#
12# ldap3 is free software: you can redistribute it and/or modify
13# it under the terms of the GNU Lesser General Public License as published
14# by the Free Software Foundation, either version 3 of the License, or
15# (at your option) any later version.
16#
17# ldap3 is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU Lesser General Public License for more details.
21#
22# You should have received a copy of the GNU Lesser General Public License
23# along with ldap3 in the COPYING and COPYING.LESSER files.
24# If not, see <http://www.gnu.org/licenses/>.
25from binascii import a2b_hex, hexlify
26from datetime import datetime
27from calendar import timegm
28from uuid import UUID
29from struct import pack
30
31
32from ... import SEQUENCE_TYPES, STRING_TYPES, NUMERIC_TYPES, INTEGER_TYPES
33from .formatters import format_time, format_ad_timestamp
34from ...utils.conv import to_raw, to_unicode, ldap_escape_to_bytes, escape_bytes
35
36# Validators return True if value is valid, False if value is not valid,
37# or a value different from True and False that is a valid value to substitute to the input value
38
39
40def check_backslash(value):
41    if isinstance(value, (bytearray, bytes)):
42        if b'\\' in value:
43            value = value.replace(b'\\', b'\\5C')
44    elif isinstance(value, STRING_TYPES):
45        if '\\' in value:
46            value = value.replace('\\', '\\5C')
47    return value
48
49
50def check_type(input_value, value_type):
51    if isinstance(input_value, value_type):
52        return True
53
54    if isinstance(input_value, SEQUENCE_TYPES):
55        for value in input_value:
56            if not isinstance(value, value_type):
57                return False
58        return True
59
60    return False
61
62
63# noinspection PyUnusedLocal
64def always_valid(input_value):
65    return True
66
67
68def validate_generic_single_value(input_value):
69    if not isinstance(input_value, SEQUENCE_TYPES):
70        return True
71
72    try:  # object couldn't have a __len__ method
73        if len(input_value) == 1:
74            return True
75    except Exception:
76        pass
77
78    return False
79
80
81def validate_zero_and_minus_one_and_positive_int(input_value):
82    """Accept -1 and 0 only (used by pwdLastSet in AD)
83    """
84    if not isinstance(input_value, SEQUENCE_TYPES):
85        if isinstance(input_value, NUMERIC_TYPES) or isinstance(input_value, STRING_TYPES):
86            return True if int(input_value) >= -1 else False
87        return False
88    else:
89        if len(input_value) == 1 and (isinstance(input_value[0], NUMERIC_TYPES) or isinstance(input_value[0], STRING_TYPES)):
90            return True if int(input_value[0]) >= -1 else False
91
92    return False
93
94
95def validate_integer(input_value):
96    if check_type(input_value, (float, bool)):
97        return False
98    if check_type(input_value, INTEGER_TYPES):
99        return True
100
101    if not isinstance(input_value, SEQUENCE_TYPES):
102        sequence = False
103        input_value = [input_value]
104    else:
105        sequence = True  # indicates if a sequence must be returned
106
107    valid_values = []  # builds a list of valid int values
108    from decimal import Decimal, InvalidOperation
109    for element in input_value:
110        try:  #try to convert any type to int, an invalid conversion raise TypeError or ValueError, doublecheck with Decimal type, if both are valid and equal then then int() value is used
111            value = to_unicode(element) if isinstance(element, bytes) else element
112            decimal_value = Decimal(value)
113            int_value = int(value)
114            if decimal_value == int_value:
115                valid_values.append(int_value)
116            else:
117                return False
118        except (ValueError, TypeError, InvalidOperation):
119            return False
120
121    if sequence:
122        return valid_values
123    else:
124        return valid_values[0]
125
126
127def validate_bytes(input_value):
128    return check_type(input_value, bytes)
129
130
131def validate_boolean(input_value):
132    # it could be a real bool or the string TRUE or FALSE, # only a single valued is allowed
133    if validate_generic_single_value(input_value):  # valid only if a single value or a sequence with a single element
134        if isinstance(input_value, SEQUENCE_TYPES):
135            input_value = input_value[0]
136        if isinstance(input_value, bool):
137            if input_value:
138                return 'TRUE'
139            else:
140                return 'FALSE'
141        if str is not bytes and isinstance(input_value, bytes):  # python3 try to converts bytes to string
142            input_value = to_unicode(input_value)
143        if isinstance(input_value, STRING_TYPES):
144            if input_value.lower() == 'true':
145                return 'TRUE'
146            elif input_value.lower() == 'false':
147                return 'FALSE'
148    return False
149
150
151def validate_time_with_0_year(input_value):
152    # validates generalized time but accept a 0000 year too
153    # if datetime object doesn't have a timezone it's considered local time and is adjusted to UTC
154    if not isinstance(input_value, SEQUENCE_TYPES):
155        sequence = False
156        input_value = [input_value]
157    else:
158        sequence = True  # indicates if a sequence must be returned
159
160    valid_values = []
161    changed = False
162    for element in input_value:
163        if str is not bytes and isinstance(element, bytes):  # python3 try to converts bytes to string
164            element = to_unicode(element)
165        if isinstance(element, STRING_TYPES):  # tries to check if it is already be a Generalized Time
166            if element.startswith('0000') or isinstance(format_time(to_raw(element)), datetime):  # valid Generalized Time string
167                valid_values.append(element)
168            else:
169                return False
170        elif isinstance(element, datetime):
171            changed = True
172            if element.tzinfo:  # a datetime with a timezone
173                valid_values.append(element.strftime('%Y%m%d%H%M%S%z'))
174            else:  # datetime without timezone, assumed local and adjusted to UTC
175                offset = datetime.now() - datetime.utcnow()
176                valid_values.append((element - offset).strftime('%Y%m%d%H%M%SZ'))
177        else:
178            return False
179
180    if changed:
181        if sequence:
182            return valid_values
183        else:
184            return valid_values[0]
185    else:
186        return True
187
188
189def validate_time(input_value):
190    # if datetime object doesn't have a timezone it's considered local time and is adjusted to UTC
191    if not isinstance(input_value, SEQUENCE_TYPES):
192        sequence = False
193        input_value = [input_value]
194    else:
195        sequence = True  # indicates if a sequence must be returned
196
197    valid_values = []
198    changed = False
199    for element in input_value:
200        if str is not bytes and isinstance(element, bytes):  # python3 try to converts bytes to string
201            element = to_unicode(element)
202        if isinstance(element, STRING_TYPES):  # tries to check if it is already be a Generalized Time
203            if isinstance(format_time(to_raw(element)), datetime):  # valid Generalized Time string
204                valid_values.append(element)
205            else:
206                return False
207        elif isinstance(element, datetime):
208            changed = True
209            if element.tzinfo:  # a datetime with a timezone
210                valid_values.append(element.strftime('%Y%m%d%H%M%S%z'))
211            else:  # datetime without timezone, assumed local and adjusted to UTC
212                offset = datetime.now() - datetime.utcnow()
213                valid_values.append((element - offset).strftime('%Y%m%d%H%M%SZ'))
214        else:
215            return False
216
217    if changed:
218        if sequence:
219            return valid_values
220        else:
221            return valid_values[0]
222    else:
223        return True
224
225
226def validate_ad_timestamp(input_value):
227    """
228    Active Directory stores date/time values as the number of 100-nanosecond intervals
229    that have elapsed since the 0 hour on January 1, 1601 till the date/time that is being stored.
230    The time is always stored in Greenwich Mean Time (GMT) in the Active Directory.
231    """
232    if not isinstance(input_value, SEQUENCE_TYPES):
233        sequence = False
234        input_value = [input_value]
235    else:
236        sequence = True  # indicates if a sequence must be returned
237
238    valid_values = []
239    changed = False
240    for element in input_value:
241        if str is not bytes and isinstance(element, bytes):  # python3 try to converts bytes to string
242            element = to_unicode(element)
243        if isinstance(element, NUMERIC_TYPES):
244            if 0 <= element <= 9223372036854775807:  # min and max for the AD timestamp starting from 12:00 AM January 1, 1601
245                valid_values.append(element)
246            else:
247                return False
248        elif isinstance(element, STRING_TYPES):  # tries to check if it is already be a AD timestamp
249            if isinstance(format_ad_timestamp(to_raw(element)), datetime):  # valid Generalized Time string
250                valid_values.append(element)
251            else:
252                return False
253        elif isinstance(element, datetime):
254            changed = True
255            if element.tzinfo:  # a datetime with a timezone
256                valid_values.append(to_raw((timegm(element.utctimetuple()) + 11644473600) * 10000000, encoding='ascii'))
257            else:  # datetime without timezone, assumed local and adjusted to UTC
258                offset = datetime.now() - datetime.utcnow()
259                valid_values.append(to_raw((timegm((element - offset).timetuple()) + 11644473600) * 10000000, encoding='ascii'))
260        else:
261            return False
262
263    if changed:
264        if sequence:
265            return valid_values
266        else:
267            return valid_values[0]
268    else:
269        return True
270
271
272def validate_ad_timedelta(input_value):
273    """
274    Should be validated like an AD timestamp except that since it is a time
275    delta, it is stored as a negative number.
276    """
277    if not isinstance(input_value, INTEGER_TYPES) or input_value > 0:
278        return False
279    return validate_ad_timestamp(input_value * -1)
280
281
282def validate_guid(input_value):
283    """
284    object guid in uuid format (Novell eDirectory)
285    """
286    if not isinstance(input_value, SEQUENCE_TYPES):
287        sequence = False
288        input_value = [input_value]
289    else:
290        sequence = True  # indicates if a sequence must be returned
291
292    valid_values = []
293    changed = False
294    for element in input_value:
295        if isinstance(element,  STRING_TYPES):
296            try:
297                valid_values.append(UUID(element).bytes)
298                changed = True
299            except ValueError: # try if the value is an escaped byte sequence
300                try:
301                    valid_values.append(UUID(element.replace('\\', '')).bytes)
302                    changed = True
303                    continue
304                except ValueError:
305                    if str is not bytes:  # python 3
306                        pass
307                    else:
308                        valid_values.append(element)
309                        continue
310                return False
311        elif isinstance(element, (bytes, bytearray)):  # assumes bytes are valid
312            valid_values.append(element)
313        else:
314            return False
315
316    if changed:
317        valid_values = [check_backslash(value) for value in valid_values]
318        if sequence:
319            return valid_values
320        else:
321            return valid_values[0]
322    else:
323        return True
324
325
326def validate_uuid(input_value):
327    """
328    object entryUUID in uuid format
329    """
330    if not isinstance(input_value, SEQUENCE_TYPES):
331        sequence = False
332        input_value = [input_value]
333    else:
334        sequence = True  # indicates if a sequence must be returned
335
336    valid_values = []
337    changed = False
338    for element in input_value:
339        if isinstance(element,  STRING_TYPES):
340            try:
341                valid_values.append(str(UUID(element)))
342                changed = True
343            except ValueError: # try if the value is an escaped byte sequence
344                try:
345                    valid_values.append(str(UUID(element.replace('\\', ''))))
346                    changed = True
347                    continue
348                except ValueError:
349                    if str is not bytes:  # python 3
350                        pass
351                    else:
352                        valid_values.append(element)
353                        continue
354                return False
355        elif isinstance(element, (bytes, bytearray)):  # assumes bytes are valid
356            valid_values.append(element)
357        else:
358            return False
359
360    if changed:
361        valid_values = [check_backslash(value) for value in valid_values]
362        if sequence:
363            return valid_values
364        else:
365            return valid_values[0]
366    else:
367        return True
368
369
370def validate_uuid_le(input_value):
371    r"""
372    Active Directory stores objectGUID in uuid_le format, follows RFC4122 and MS-DTYP:
373    "{07039e68-4373-264d-a0a7-07039e684373}": string representation big endian, converted to little endian (with or without brace curles)
374    "689e030773434d26a7a007039e684373": packet representation, already in little endian
375    "\68\9e\03\07\73\43\4d\26\a7\a0\07\03\9e\68\43\73": bytes representation, already in little endian
376    byte sequence: already in little endian
377
378    """
379    if not isinstance(input_value, SEQUENCE_TYPES):
380        sequence = False
381        input_value = [input_value]
382    else:
383        sequence = True  # indicates if a sequence must be returned
384
385    valid_values = []
386    changed = False
387    for element in input_value:
388        error = False
389        if isinstance(element, STRING_TYPES):
390            if element[0] == '{' and element[-1] == '}':
391                try:
392                    valid_values.append(UUID(hex=element).bytes_le)  # string representation, value in big endian, converts to little endian
393                    changed = True
394                except ValueError:
395                    error = True
396            elif '-' in element:
397                try:
398                    valid_values.append(UUID(hex=element).bytes_le)  # string representation, value in big endian, converts to little endian
399                    changed = True
400                except ValueError:
401                    error = True
402            elif '\\' in element:
403                try:
404                    valid_values.append(UUID(bytes_le=ldap_escape_to_bytes(element)).bytes_le)  # byte representation, value in little endian
405                    changed = True
406                except ValueError:
407                    error = True
408            elif '-' not in element:  # value in little endian
409                try:
410                    valid_values.append(UUID(bytes_le=a2b_hex(element)).bytes_le)  # packet representation, value in little endian, converts to little endian
411                    changed = True
412                except ValueError:
413                    error = True
414            if error and str == bytes:  # python2 only assume value is bytes and valid
415                valid_values.append(element)  # value is untouched, must be in little endian
416        elif isinstance(element, (bytes, bytearray)):  # assumes bytes are valid uuid
417            valid_values.append(element)  # value is untouched, must be in little endian
418        else:
419            return False
420
421    if changed:
422        valid_values = [check_backslash(value) for value in valid_values]
423        if sequence:
424            return valid_values
425        else:
426            return valid_values[0]
427    else:
428        return True
429
430
431def validate_sid(input_value):
432    """
433        SID= "S-1-" IdentifierAuthority 1*SubAuthority
434               IdentifierAuthority= IdentifierAuthorityDec / IdentifierAuthorityHex
435                  ; If the identifier authority is < 2^32, the
436                  ; identifier authority is represented as a decimal
437                  ; number
438                  ; If the identifier authority is >= 2^32,
439                  ; the identifier authority is represented in
440                  ; hexadecimal
441                IdentifierAuthorityDec =  1*10DIGIT
442                  ; IdentifierAuthorityDec, top level authority of a
443                  ; security identifier is represented as a decimal number
444                IdentifierAuthorityHex = "0x" 12HEXDIG
445                  ; IdentifierAuthorityHex, the top-level authority of a
446                  ; security identifier is represented as a hexadecimal number
447                SubAuthority= "-" 1*10DIGIT
448                  ; Sub-Authority is always represented as a decimal number
449                  ; No leading "0" characters are allowed when IdentifierAuthority
450                  ; or SubAuthority is represented as a decimal number
451                  ; All hexadecimal digits must be output in string format,
452                  ; pre-pended by "0x"
453
454        Revision (1 byte): An 8-bit unsigned integer that specifies the revision level of the SID. This value MUST be set to 0x01.
455        SubAuthorityCount (1 byte): An 8-bit unsigned integer that specifies the number of elements in the SubAuthority array. The maximum number of elements allowed is 15.
456        IdentifierAuthority (6 bytes): A SID_IDENTIFIER_AUTHORITY structure that indicates the authority under which the SID was created. It describes the entity that created the SID. The Identifier Authority value {0,0,0,0,0,5} denotes SIDs created by the NT SID authority.
457        SubAuthority (variable): A variable length array of unsigned 32-bit integers that uniquely identifies a principal relative to the IdentifierAuthority. Its length is determined by SubAuthorityCount.
458
459        If you have a SID like S-a-b-c-d-e-f-g-...
460
461        Then the bytes are
462        a 	(revision)
463        N 	(number of dashes minus two)
464        bbbbbb 	(six bytes of "b" treated as a 48-bit number in big-endian format)
465        cccc 	(four bytes of "c" treated as a 32-bit number in little-endian format)
466        dddd 	(four bytes of "d" treated as a 32-bit number in little-endian format)
467        eeee 	(four bytes of "e" treated as a 32-bit number in little-endian format)
468        ffff 	(four bytes of "f" treated as a 32-bit number in little-endian format)
469
470    """
471    if not isinstance(input_value, SEQUENCE_TYPES):
472        sequence = False
473        input_value = [input_value]
474    else:
475        sequence = True  # indicates if a sequence must be returned
476
477    valid_values = []
478    changed = False
479    for element in input_value:
480        if isinstance(element, STRING_TYPES):
481            if element.startswith('S-'):
482                parts = element.split('-')
483                sid_bytes = pack('<q', int(parts[1]))[0:1]  # revision number
484                sid_bytes += pack('<q', len(parts[3:]))[0:1]  # number of sub authorities
485                if len(parts[2]) <= 10:
486                    sid_bytes += pack('>q', int(parts[2]))[2:]  # authority (in dec)
487                else:
488                    sid_bytes += pack('>q', int(parts[2], 16))[2:]  # authority (in hex)
489                for sub_auth in parts[3:]:
490                    sid_bytes += pack('<q', int(sub_auth))[0:4]  # sub-authorities
491                valid_values.append(sid_bytes)
492                changed = True
493
494    if changed:
495        valid_values = [check_backslash(value) for value in valid_values]
496        if sequence:
497            return valid_values
498        else:
499            return valid_values[0]
500    else:
501        return True
502