1""" 2""" 3 4# Created on 2016.08.09 5# 6# Author: Giovanni Cannata 7# 8# Copyright 2016 - 2020 Giovanni Cannata 9# 10# This file is part of ldap3. 11# 12# ldap3 is free software: you can redistribute it and/or modify 13# it under the terms of the GNU Lesser General Public License as published 14# by the Free Software Foundation, either version 3 of the License, or 15# (at your option) any later version. 16# 17# ldap3 is distributed in the hope that it will be useful, 18# but WITHOUT ANY WARRANTY; without even the implied warranty of 19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20# GNU Lesser General Public License for more details. 21# 22# You should have received a copy of the GNU Lesser General Public License 23# along with ldap3 in the COPYING and COPYING.LESSER files. 24# If not, see <http://www.gnu.org/licenses/>. 25from binascii import a2b_hex, hexlify 26from datetime import datetime 27from calendar import timegm 28from uuid import UUID 29from struct import pack 30 31 32from ... import SEQUENCE_TYPES, STRING_TYPES, NUMERIC_TYPES, INTEGER_TYPES 33from .formatters import format_time, format_ad_timestamp 34from ...utils.conv import to_raw, to_unicode, ldap_escape_to_bytes, escape_bytes 35 36# Validators return True if value is valid, False if value is not valid, 37# or a value different from True and False that is a valid value to substitute to the input value 38 39 40def check_backslash(value): 41 if isinstance(value, (bytearray, bytes)): 42 if b'\\' in value: 43 value = value.replace(b'\\', b'\\5C') 44 elif isinstance(value, STRING_TYPES): 45 if '\\' in value: 46 value = value.replace('\\', '\\5C') 47 return value 48 49 50def check_type(input_value, value_type): 51 if isinstance(input_value, value_type): 52 return True 53 54 if isinstance(input_value, SEQUENCE_TYPES): 55 for value in input_value: 56 if not isinstance(value, value_type): 57 return False 58 return True 59 60 return False 61 62 63# noinspection PyUnusedLocal 64def always_valid(input_value): 65 return True 66 67 68def validate_generic_single_value(input_value): 69 if not isinstance(input_value, SEQUENCE_TYPES): 70 return True 71 72 try: # object couldn't have a __len__ method 73 if len(input_value) == 1: 74 return True 75 except Exception: 76 pass 77 78 return False 79 80 81def validate_zero_and_minus_one_and_positive_int(input_value): 82 """Accept -1 and 0 only (used by pwdLastSet in AD) 83 """ 84 if not isinstance(input_value, SEQUENCE_TYPES): 85 if isinstance(input_value, NUMERIC_TYPES) or isinstance(input_value, STRING_TYPES): 86 return True if int(input_value) >= -1 else False 87 return False 88 else: 89 if len(input_value) == 1 and (isinstance(input_value[0], NUMERIC_TYPES) or isinstance(input_value[0], STRING_TYPES)): 90 return True if int(input_value[0]) >= -1 else False 91 92 return False 93 94 95def validate_integer(input_value): 96 if check_type(input_value, (float, bool)): 97 return False 98 if check_type(input_value, INTEGER_TYPES): 99 return True 100 101 if not isinstance(input_value, SEQUENCE_TYPES): 102 sequence = False 103 input_value = [input_value] 104 else: 105 sequence = True # indicates if a sequence must be returned 106 107 valid_values = [] # builds a list of valid int values 108 from decimal import Decimal, InvalidOperation 109 for element in input_value: 110 try: #try to convert any type to int, an invalid conversion raise TypeError or ValueError, doublecheck with Decimal type, if both are valid and equal then then int() value is used 111 value = to_unicode(element) if isinstance(element, bytes) else element 112 decimal_value = Decimal(value) 113 int_value = int(value) 114 if decimal_value == int_value: 115 valid_values.append(int_value) 116 else: 117 return False 118 except (ValueError, TypeError, InvalidOperation): 119 return False 120 121 if sequence: 122 return valid_values 123 else: 124 return valid_values[0] 125 126 127def validate_bytes(input_value): 128 return check_type(input_value, bytes) 129 130 131def validate_boolean(input_value): 132 # it could be a real bool or the string TRUE or FALSE, # only a single valued is allowed 133 if validate_generic_single_value(input_value): # valid only if a single value or a sequence with a single element 134 if isinstance(input_value, SEQUENCE_TYPES): 135 input_value = input_value[0] 136 if isinstance(input_value, bool): 137 if input_value: 138 return 'TRUE' 139 else: 140 return 'FALSE' 141 if str is not bytes and isinstance(input_value, bytes): # python3 try to converts bytes to string 142 input_value = to_unicode(input_value) 143 if isinstance(input_value, STRING_TYPES): 144 if input_value.lower() == 'true': 145 return 'TRUE' 146 elif input_value.lower() == 'false': 147 return 'FALSE' 148 return False 149 150 151def validate_time_with_0_year(input_value): 152 # validates generalized time but accept a 0000 year too 153 # if datetime object doesn't have a timezone it's considered local time and is adjusted to UTC 154 if not isinstance(input_value, SEQUENCE_TYPES): 155 sequence = False 156 input_value = [input_value] 157 else: 158 sequence = True # indicates if a sequence must be returned 159 160 valid_values = [] 161 changed = False 162 for element in input_value: 163 if str is not bytes and isinstance(element, bytes): # python3 try to converts bytes to string 164 element = to_unicode(element) 165 if isinstance(element, STRING_TYPES): # tries to check if it is already be a Generalized Time 166 if element.startswith('0000') or isinstance(format_time(to_raw(element)), datetime): # valid Generalized Time string 167 valid_values.append(element) 168 else: 169 return False 170 elif isinstance(element, datetime): 171 changed = True 172 if element.tzinfo: # a datetime with a timezone 173 valid_values.append(element.strftime('%Y%m%d%H%M%S%z')) 174 else: # datetime without timezone, assumed local and adjusted to UTC 175 offset = datetime.now() - datetime.utcnow() 176 valid_values.append((element - offset).strftime('%Y%m%d%H%M%SZ')) 177 else: 178 return False 179 180 if changed: 181 if sequence: 182 return valid_values 183 else: 184 return valid_values[0] 185 else: 186 return True 187 188 189def validate_time(input_value): 190 # if datetime object doesn't have a timezone it's considered local time and is adjusted to UTC 191 if not isinstance(input_value, SEQUENCE_TYPES): 192 sequence = False 193 input_value = [input_value] 194 else: 195 sequence = True # indicates if a sequence must be returned 196 197 valid_values = [] 198 changed = False 199 for element in input_value: 200 if str is not bytes and isinstance(element, bytes): # python3 try to converts bytes to string 201 element = to_unicode(element) 202 if isinstance(element, STRING_TYPES): # tries to check if it is already be a Generalized Time 203 if isinstance(format_time(to_raw(element)), datetime): # valid Generalized Time string 204 valid_values.append(element) 205 else: 206 return False 207 elif isinstance(element, datetime): 208 changed = True 209 if element.tzinfo: # a datetime with a timezone 210 valid_values.append(element.strftime('%Y%m%d%H%M%S%z')) 211 else: # datetime without timezone, assumed local and adjusted to UTC 212 offset = datetime.now() - datetime.utcnow() 213 valid_values.append((element - offset).strftime('%Y%m%d%H%M%SZ')) 214 else: 215 return False 216 217 if changed: 218 if sequence: 219 return valid_values 220 else: 221 return valid_values[0] 222 else: 223 return True 224 225 226def validate_ad_timestamp(input_value): 227 """ 228 Active Directory stores date/time values as the number of 100-nanosecond intervals 229 that have elapsed since the 0 hour on January 1, 1601 till the date/time that is being stored. 230 The time is always stored in Greenwich Mean Time (GMT) in the Active Directory. 231 """ 232 if not isinstance(input_value, SEQUENCE_TYPES): 233 sequence = False 234 input_value = [input_value] 235 else: 236 sequence = True # indicates if a sequence must be returned 237 238 valid_values = [] 239 changed = False 240 for element in input_value: 241 if str is not bytes and isinstance(element, bytes): # python3 try to converts bytes to string 242 element = to_unicode(element) 243 if isinstance(element, NUMERIC_TYPES): 244 if 0 <= element <= 9223372036854775807: # min and max for the AD timestamp starting from 12:00 AM January 1, 1601 245 valid_values.append(element) 246 else: 247 return False 248 elif isinstance(element, STRING_TYPES): # tries to check if it is already be a AD timestamp 249 if isinstance(format_ad_timestamp(to_raw(element)), datetime): # valid Generalized Time string 250 valid_values.append(element) 251 else: 252 return False 253 elif isinstance(element, datetime): 254 changed = True 255 if element.tzinfo: # a datetime with a timezone 256 valid_values.append(to_raw((timegm(element.utctimetuple()) + 11644473600) * 10000000, encoding='ascii')) 257 else: # datetime without timezone, assumed local and adjusted to UTC 258 offset = datetime.now() - datetime.utcnow() 259 valid_values.append(to_raw((timegm((element - offset).timetuple()) + 11644473600) * 10000000, encoding='ascii')) 260 else: 261 return False 262 263 if changed: 264 if sequence: 265 return valid_values 266 else: 267 return valid_values[0] 268 else: 269 return True 270 271 272def validate_ad_timedelta(input_value): 273 """ 274 Should be validated like an AD timestamp except that since it is a time 275 delta, it is stored as a negative number. 276 """ 277 if not isinstance(input_value, INTEGER_TYPES) or input_value > 0: 278 return False 279 return validate_ad_timestamp(input_value * -1) 280 281 282def validate_guid(input_value): 283 """ 284 object guid in uuid format (Novell eDirectory) 285 """ 286 if not isinstance(input_value, SEQUENCE_TYPES): 287 sequence = False 288 input_value = [input_value] 289 else: 290 sequence = True # indicates if a sequence must be returned 291 292 valid_values = [] 293 changed = False 294 for element in input_value: 295 if isinstance(element, STRING_TYPES): 296 try: 297 valid_values.append(UUID(element).bytes) 298 changed = True 299 except ValueError: # try if the value is an escaped byte sequence 300 try: 301 valid_values.append(UUID(element.replace('\\', '')).bytes) 302 changed = True 303 continue 304 except ValueError: 305 if str is not bytes: # python 3 306 pass 307 else: 308 valid_values.append(element) 309 continue 310 return False 311 elif isinstance(element, (bytes, bytearray)): # assumes bytes are valid 312 valid_values.append(element) 313 else: 314 return False 315 316 if changed: 317 valid_values = [check_backslash(value) for value in valid_values] 318 if sequence: 319 return valid_values 320 else: 321 return valid_values[0] 322 else: 323 return True 324 325 326def validate_uuid(input_value): 327 """ 328 object entryUUID in uuid format 329 """ 330 if not isinstance(input_value, SEQUENCE_TYPES): 331 sequence = False 332 input_value = [input_value] 333 else: 334 sequence = True # indicates if a sequence must be returned 335 336 valid_values = [] 337 changed = False 338 for element in input_value: 339 if isinstance(element, STRING_TYPES): 340 try: 341 valid_values.append(str(UUID(element))) 342 changed = True 343 except ValueError: # try if the value is an escaped byte sequence 344 try: 345 valid_values.append(str(UUID(element.replace('\\', '')))) 346 changed = True 347 continue 348 except ValueError: 349 if str is not bytes: # python 3 350 pass 351 else: 352 valid_values.append(element) 353 continue 354 return False 355 elif isinstance(element, (bytes, bytearray)): # assumes bytes are valid 356 valid_values.append(element) 357 else: 358 return False 359 360 if changed: 361 valid_values = [check_backslash(value) for value in valid_values] 362 if sequence: 363 return valid_values 364 else: 365 return valid_values[0] 366 else: 367 return True 368 369 370def validate_uuid_le(input_value): 371 r""" 372 Active Directory stores objectGUID in uuid_le format, follows RFC4122 and MS-DTYP: 373 "{07039e68-4373-264d-a0a7-07039e684373}": string representation big endian, converted to little endian (with or without brace curles) 374 "689e030773434d26a7a007039e684373": packet representation, already in little endian 375 "\68\9e\03\07\73\43\4d\26\a7\a0\07\03\9e\68\43\73": bytes representation, already in little endian 376 byte sequence: already in little endian 377 378 """ 379 if not isinstance(input_value, SEQUENCE_TYPES): 380 sequence = False 381 input_value = [input_value] 382 else: 383 sequence = True # indicates if a sequence must be returned 384 385 valid_values = [] 386 changed = False 387 for element in input_value: 388 error = False 389 if isinstance(element, STRING_TYPES): 390 if element[0] == '{' and element[-1] == '}': 391 try: 392 valid_values.append(UUID(hex=element).bytes_le) # string representation, value in big endian, converts to little endian 393 changed = True 394 except ValueError: 395 error = True 396 elif '-' in element: 397 try: 398 valid_values.append(UUID(hex=element).bytes_le) # string representation, value in big endian, converts to little endian 399 changed = True 400 except ValueError: 401 error = True 402 elif '\\' in element: 403 try: 404 valid_values.append(UUID(bytes_le=ldap_escape_to_bytes(element)).bytes_le) # byte representation, value in little endian 405 changed = True 406 except ValueError: 407 error = True 408 elif '-' not in element: # value in little endian 409 try: 410 valid_values.append(UUID(bytes_le=a2b_hex(element)).bytes_le) # packet representation, value in little endian, converts to little endian 411 changed = True 412 except ValueError: 413 error = True 414 if error and str == bytes: # python2 only assume value is bytes and valid 415 valid_values.append(element) # value is untouched, must be in little endian 416 elif isinstance(element, (bytes, bytearray)): # assumes bytes are valid uuid 417 valid_values.append(element) # value is untouched, must be in little endian 418 else: 419 return False 420 421 if changed: 422 valid_values = [check_backslash(value) for value in valid_values] 423 if sequence: 424 return valid_values 425 else: 426 return valid_values[0] 427 else: 428 return True 429 430 431def validate_sid(input_value): 432 """ 433 SID= "S-1-" IdentifierAuthority 1*SubAuthority 434 IdentifierAuthority= IdentifierAuthorityDec / IdentifierAuthorityHex 435 ; If the identifier authority is < 2^32, the 436 ; identifier authority is represented as a decimal 437 ; number 438 ; If the identifier authority is >= 2^32, 439 ; the identifier authority is represented in 440 ; hexadecimal 441 IdentifierAuthorityDec = 1*10DIGIT 442 ; IdentifierAuthorityDec, top level authority of a 443 ; security identifier is represented as a decimal number 444 IdentifierAuthorityHex = "0x" 12HEXDIG 445 ; IdentifierAuthorityHex, the top-level authority of a 446 ; security identifier is represented as a hexadecimal number 447 SubAuthority= "-" 1*10DIGIT 448 ; Sub-Authority is always represented as a decimal number 449 ; No leading "0" characters are allowed when IdentifierAuthority 450 ; or SubAuthority is represented as a decimal number 451 ; All hexadecimal digits must be output in string format, 452 ; pre-pended by "0x" 453 454 Revision (1 byte): An 8-bit unsigned integer that specifies the revision level of the SID. This value MUST be set to 0x01. 455 SubAuthorityCount (1 byte): An 8-bit unsigned integer that specifies the number of elements in the SubAuthority array. The maximum number of elements allowed is 15. 456 IdentifierAuthority (6 bytes): A SID_IDENTIFIER_AUTHORITY structure that indicates the authority under which the SID was created. It describes the entity that created the SID. The Identifier Authority value {0,0,0,0,0,5} denotes SIDs created by the NT SID authority. 457 SubAuthority (variable): A variable length array of unsigned 32-bit integers that uniquely identifies a principal relative to the IdentifierAuthority. Its length is determined by SubAuthorityCount. 458 459 If you have a SID like S-a-b-c-d-e-f-g-... 460 461 Then the bytes are 462 a (revision) 463 N (number of dashes minus two) 464 bbbbbb (six bytes of "b" treated as a 48-bit number in big-endian format) 465 cccc (four bytes of "c" treated as a 32-bit number in little-endian format) 466 dddd (four bytes of "d" treated as a 32-bit number in little-endian format) 467 eeee (four bytes of "e" treated as a 32-bit number in little-endian format) 468 ffff (four bytes of "f" treated as a 32-bit number in little-endian format) 469 470 """ 471 if not isinstance(input_value, SEQUENCE_TYPES): 472 sequence = False 473 input_value = [input_value] 474 else: 475 sequence = True # indicates if a sequence must be returned 476 477 valid_values = [] 478 changed = False 479 for element in input_value: 480 if isinstance(element, STRING_TYPES): 481 if element.startswith('S-'): 482 parts = element.split('-') 483 sid_bytes = pack('<q', int(parts[1]))[0:1] # revision number 484 sid_bytes += pack('<q', len(parts[3:]))[0:1] # number of sub authorities 485 if len(parts[2]) <= 10: 486 sid_bytes += pack('>q', int(parts[2]))[2:] # authority (in dec) 487 else: 488 sid_bytes += pack('>q', int(parts[2], 16))[2:] # authority (in hex) 489 for sub_auth in parts[3:]: 490 sid_bytes += pack('<q', int(sub_auth))[0:4] # sub-authorities 491 valid_values.append(sid_bytes) 492 changed = True 493 494 if changed: 495 valid_values = [check_backslash(value) for value in valid_values] 496 if sequence: 497 return valid_values 498 else: 499 return valid_values[0] 500 else: 501 return True 502