1# Protocol Buffers - Google's data interchange format 2# Copyright 2008 Google Inc. All rights reserved. 3# https://developers.google.com/protocol-buffers/ 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31"""Provides type checking routines. 32 33This module defines type checking utilities in the forms of dictionaries: 34 35VALUE_CHECKERS: A dictionary of field types and a value validation object. 36TYPE_TO_BYTE_SIZE_FN: A dictionary with field types and a size computing 37 function. 38TYPE_TO_SERIALIZE_METHOD: A dictionary with field types and serialization 39 function. 40FIELD_TYPE_TO_WIRE_TYPE: A dictionary with field typed and their 41 corresponding wire types. 42TYPE_TO_DESERIALIZE_METHOD: A dictionary with field types and deserialization 43 function. 44""" 45 46__author__ = 'robinson@google.com (Will Robinson)' 47 48try: 49 import ctypes 50except Exception: # pylint: disable=broad-except 51 ctypes = None 52 import struct 53import numbers 54import six 55 56if six.PY3: 57 long = int 58 59from google.protobuf.internal import api_implementation 60from google.protobuf.internal import decoder 61from google.protobuf.internal import encoder 62from google.protobuf.internal import wire_format 63from google.protobuf import descriptor 64 65_FieldDescriptor = descriptor.FieldDescriptor 66 67 68def TruncateToFourByteFloat(original): 69 if ctypes: 70 return ctypes.c_float(original).value 71 else: 72 return struct.unpack('<f', struct.pack('<f', original))[0] 73 74 75def ToShortestFloat(original): 76 """Returns the shortest float that has same value in wire.""" 77 # All 4 byte floats have between 6 and 9 significant digits, so we 78 # start with 6 as the lower bound. 79 # It has to be iterative because use '.9g' directly can not get rid 80 # of the noises for most values. For example if set a float_field=0.9 81 # use '.9g' will print 0.899999976. 82 precision = 6 83 rounded = float('{0:.{1}g}'.format(original, precision)) 84 while TruncateToFourByteFloat(rounded) != original: 85 precision += 1 86 rounded = float('{0:.{1}g}'.format(original, precision)) 87 return rounded 88 89 90def SupportsOpenEnums(field_descriptor): 91 return field_descriptor.containing_type.syntax == "proto3" 92 93def GetTypeChecker(field): 94 """Returns a type checker for a message field of the specified types. 95 96 Args: 97 field: FieldDescriptor object for this field. 98 99 Returns: 100 An instance of TypeChecker which can be used to verify the types 101 of values assigned to a field of the specified type. 102 """ 103 if (field.cpp_type == _FieldDescriptor.CPPTYPE_STRING and 104 field.type == _FieldDescriptor.TYPE_STRING): 105 return UnicodeValueChecker() 106 if field.cpp_type == _FieldDescriptor.CPPTYPE_ENUM: 107 if SupportsOpenEnums(field): 108 # When open enums are supported, any int32 can be assigned. 109 return _VALUE_CHECKERS[_FieldDescriptor.CPPTYPE_INT32] 110 else: 111 return EnumValueChecker(field.enum_type) 112 return _VALUE_CHECKERS[field.cpp_type] 113 114 115# None of the typecheckers below make any attempt to guard against people 116# subclassing builtin types and doing weird things. We're not trying to 117# protect against malicious clients here, just people accidentally shooting 118# themselves in the foot in obvious ways. 119 120class TypeChecker(object): 121 122 """Type checker used to catch type errors as early as possible 123 when the client is setting scalar fields in protocol messages. 124 """ 125 126 def __init__(self, *acceptable_types): 127 self._acceptable_types = acceptable_types 128 129 def CheckValue(self, proposed_value): 130 """Type check the provided value and return it. 131 132 The returned value might have been normalized to another type. 133 """ 134 if not isinstance(proposed_value, self._acceptable_types): 135 message = ('%.1024r has type %s, but expected one of: %s' % 136 (proposed_value, type(proposed_value), self._acceptable_types)) 137 raise TypeError(message) 138 # Some field types(float, double and bool) accept other types, must 139 # convert to the correct type in such cases. 140 if self._acceptable_types: 141 if self._acceptable_types[0] in (bool, float): 142 return self._acceptable_types[0](proposed_value) 143 return proposed_value 144 145 146class TypeCheckerWithDefault(TypeChecker): 147 148 def __init__(self, default_value, *acceptable_types): 149 TypeChecker.__init__(self, *acceptable_types) 150 self._default_value = default_value 151 152 def DefaultValue(self): 153 return self._default_value 154 155 156# IntValueChecker and its subclasses perform integer type-checks 157# and bounds-checks. 158class IntValueChecker(object): 159 160 """Checker used for integer fields. Performs type-check and range check.""" 161 162 def CheckValue(self, proposed_value): 163 if not isinstance(proposed_value, numbers.Integral): 164 message = ('%.1024r has type %s, but expected one of: %s' % 165 (proposed_value, type(proposed_value), six.integer_types)) 166 raise TypeError(message) 167 if not self._MIN <= int(proposed_value) <= self._MAX: 168 raise ValueError('Value out of range: %d' % proposed_value) 169 # We force 32-bit values to int and 64-bit values to long to make 170 # alternate implementations where the distinction is more significant 171 # (e.g. the C++ implementation) simpler. 172 proposed_value = self._TYPE(proposed_value) 173 return proposed_value 174 175 def DefaultValue(self): 176 return 0 177 178 179class EnumValueChecker(object): 180 181 """Checker used for enum fields. Performs type-check and range check.""" 182 183 def __init__(self, enum_type): 184 self._enum_type = enum_type 185 186 def CheckValue(self, proposed_value): 187 if not isinstance(proposed_value, numbers.Integral): 188 message = ('%.1024r has type %s, but expected one of: %s' % 189 (proposed_value, type(proposed_value), six.integer_types)) 190 raise TypeError(message) 191 if int(proposed_value) not in self._enum_type.values_by_number: 192 raise ValueError('Unknown enum value: %d' % proposed_value) 193 return proposed_value 194 195 def DefaultValue(self): 196 return self._enum_type.values[0].number 197 198 199class UnicodeValueChecker(object): 200 201 """Checker used for string fields. 202 203 Always returns a unicode value, even if the input is of type str. 204 """ 205 206 def CheckValue(self, proposed_value): 207 if not isinstance(proposed_value, (bytes, six.text_type)): 208 message = ('%.1024r has type %s, but expected one of: %s' % 209 (proposed_value, type(proposed_value), (bytes, six.text_type))) 210 raise TypeError(message) 211 212 # If the value is of type 'bytes' make sure that it is valid UTF-8 data. 213 if isinstance(proposed_value, bytes): 214 try: 215 proposed_value = proposed_value.decode('utf-8') 216 except UnicodeDecodeError: 217 raise ValueError('%.1024r has type bytes, but isn\'t valid UTF-8 ' 218 'encoding. Non-UTF-8 strings must be converted to ' 219 'unicode objects before being added.' % 220 (proposed_value)) 221 else: 222 try: 223 proposed_value.encode('utf8') 224 except UnicodeEncodeError: 225 raise ValueError('%.1024r isn\'t a valid unicode string and ' 226 'can\'t be encoded in UTF-8.'% 227 (proposed_value)) 228 229 return proposed_value 230 231 def DefaultValue(self): 232 return u"" 233 234 235class Int32ValueChecker(IntValueChecker): 236 # We're sure to use ints instead of longs here since comparison may be more 237 # efficient. 238 _MIN = -2147483648 239 _MAX = 2147483647 240 _TYPE = int 241 242 243class Uint32ValueChecker(IntValueChecker): 244 _MIN = 0 245 _MAX = (1 << 32) - 1 246 _TYPE = int 247 248 249class Int64ValueChecker(IntValueChecker): 250 _MIN = -(1 << 63) 251 _MAX = (1 << 63) - 1 252 _TYPE = long 253 254 255class Uint64ValueChecker(IntValueChecker): 256 _MIN = 0 257 _MAX = (1 << 64) - 1 258 _TYPE = long 259 260 261# The max 4 bytes float is about 3.4028234663852886e+38 262_FLOAT_MAX = float.fromhex('0x1.fffffep+127') 263_FLOAT_MIN = -_FLOAT_MAX 264_INF = float('inf') 265_NEG_INF = float('-inf') 266 267 268class FloatValueChecker(object): 269 270 """Checker used for float fields. Performs type-check and range check. 271 272 Values exceeding a 32-bit float will be converted to inf/-inf. 273 """ 274 275 def CheckValue(self, proposed_value): 276 """Check and convert proposed_value to float.""" 277 if not isinstance(proposed_value, numbers.Real): 278 message = ('%.1024r has type %s, but expected one of: numbers.Real' % 279 (proposed_value, type(proposed_value))) 280 raise TypeError(message) 281 converted_value = float(proposed_value) 282 # This inf rounding matches the C++ proto SafeDoubleToFloat logic. 283 if converted_value > _FLOAT_MAX: 284 return _INF 285 if converted_value < _FLOAT_MIN: 286 return _NEG_INF 287 288 return TruncateToFourByteFloat(converted_value) 289 290 def DefaultValue(self): 291 return 0.0 292 293 294# Type-checkers for all scalar CPPTYPEs. 295_VALUE_CHECKERS = { 296 _FieldDescriptor.CPPTYPE_INT32: Int32ValueChecker(), 297 _FieldDescriptor.CPPTYPE_INT64: Int64ValueChecker(), 298 _FieldDescriptor.CPPTYPE_UINT32: Uint32ValueChecker(), 299 _FieldDescriptor.CPPTYPE_UINT64: Uint64ValueChecker(), 300 _FieldDescriptor.CPPTYPE_DOUBLE: TypeCheckerWithDefault( 301 0.0, float, numbers.Real), 302 _FieldDescriptor.CPPTYPE_FLOAT: FloatValueChecker(), 303 _FieldDescriptor.CPPTYPE_BOOL: TypeCheckerWithDefault( 304 False, bool, numbers.Integral), 305 _FieldDescriptor.CPPTYPE_STRING: TypeCheckerWithDefault(b'', bytes), 306 } 307 308 309# Map from field type to a function F, such that F(field_num, value) 310# gives the total byte size for a value of the given type. This 311# byte size includes tag information and any other additional space 312# associated with serializing "value". 313TYPE_TO_BYTE_SIZE_FN = { 314 _FieldDescriptor.TYPE_DOUBLE: wire_format.DoubleByteSize, 315 _FieldDescriptor.TYPE_FLOAT: wire_format.FloatByteSize, 316 _FieldDescriptor.TYPE_INT64: wire_format.Int64ByteSize, 317 _FieldDescriptor.TYPE_UINT64: wire_format.UInt64ByteSize, 318 _FieldDescriptor.TYPE_INT32: wire_format.Int32ByteSize, 319 _FieldDescriptor.TYPE_FIXED64: wire_format.Fixed64ByteSize, 320 _FieldDescriptor.TYPE_FIXED32: wire_format.Fixed32ByteSize, 321 _FieldDescriptor.TYPE_BOOL: wire_format.BoolByteSize, 322 _FieldDescriptor.TYPE_STRING: wire_format.StringByteSize, 323 _FieldDescriptor.TYPE_GROUP: wire_format.GroupByteSize, 324 _FieldDescriptor.TYPE_MESSAGE: wire_format.MessageByteSize, 325 _FieldDescriptor.TYPE_BYTES: wire_format.BytesByteSize, 326 _FieldDescriptor.TYPE_UINT32: wire_format.UInt32ByteSize, 327 _FieldDescriptor.TYPE_ENUM: wire_format.EnumByteSize, 328 _FieldDescriptor.TYPE_SFIXED32: wire_format.SFixed32ByteSize, 329 _FieldDescriptor.TYPE_SFIXED64: wire_format.SFixed64ByteSize, 330 _FieldDescriptor.TYPE_SINT32: wire_format.SInt32ByteSize, 331 _FieldDescriptor.TYPE_SINT64: wire_format.SInt64ByteSize 332 } 333 334 335# Maps from field types to encoder constructors. 336TYPE_TO_ENCODER = { 337 _FieldDescriptor.TYPE_DOUBLE: encoder.DoubleEncoder, 338 _FieldDescriptor.TYPE_FLOAT: encoder.FloatEncoder, 339 _FieldDescriptor.TYPE_INT64: encoder.Int64Encoder, 340 _FieldDescriptor.TYPE_UINT64: encoder.UInt64Encoder, 341 _FieldDescriptor.TYPE_INT32: encoder.Int32Encoder, 342 _FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Encoder, 343 _FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Encoder, 344 _FieldDescriptor.TYPE_BOOL: encoder.BoolEncoder, 345 _FieldDescriptor.TYPE_STRING: encoder.StringEncoder, 346 _FieldDescriptor.TYPE_GROUP: encoder.GroupEncoder, 347 _FieldDescriptor.TYPE_MESSAGE: encoder.MessageEncoder, 348 _FieldDescriptor.TYPE_BYTES: encoder.BytesEncoder, 349 _FieldDescriptor.TYPE_UINT32: encoder.UInt32Encoder, 350 _FieldDescriptor.TYPE_ENUM: encoder.EnumEncoder, 351 _FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Encoder, 352 _FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Encoder, 353 _FieldDescriptor.TYPE_SINT32: encoder.SInt32Encoder, 354 _FieldDescriptor.TYPE_SINT64: encoder.SInt64Encoder, 355 } 356 357 358# Maps from field types to sizer constructors. 359TYPE_TO_SIZER = { 360 _FieldDescriptor.TYPE_DOUBLE: encoder.DoubleSizer, 361 _FieldDescriptor.TYPE_FLOAT: encoder.FloatSizer, 362 _FieldDescriptor.TYPE_INT64: encoder.Int64Sizer, 363 _FieldDescriptor.TYPE_UINT64: encoder.UInt64Sizer, 364 _FieldDescriptor.TYPE_INT32: encoder.Int32Sizer, 365 _FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Sizer, 366 _FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Sizer, 367 _FieldDescriptor.TYPE_BOOL: encoder.BoolSizer, 368 _FieldDescriptor.TYPE_STRING: encoder.StringSizer, 369 _FieldDescriptor.TYPE_GROUP: encoder.GroupSizer, 370 _FieldDescriptor.TYPE_MESSAGE: encoder.MessageSizer, 371 _FieldDescriptor.TYPE_BYTES: encoder.BytesSizer, 372 _FieldDescriptor.TYPE_UINT32: encoder.UInt32Sizer, 373 _FieldDescriptor.TYPE_ENUM: encoder.EnumSizer, 374 _FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Sizer, 375 _FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Sizer, 376 _FieldDescriptor.TYPE_SINT32: encoder.SInt32Sizer, 377 _FieldDescriptor.TYPE_SINT64: encoder.SInt64Sizer, 378 } 379 380 381# Maps from field type to a decoder constructor. 382TYPE_TO_DECODER = { 383 _FieldDescriptor.TYPE_DOUBLE: decoder.DoubleDecoder, 384 _FieldDescriptor.TYPE_FLOAT: decoder.FloatDecoder, 385 _FieldDescriptor.TYPE_INT64: decoder.Int64Decoder, 386 _FieldDescriptor.TYPE_UINT64: decoder.UInt64Decoder, 387 _FieldDescriptor.TYPE_INT32: decoder.Int32Decoder, 388 _FieldDescriptor.TYPE_FIXED64: decoder.Fixed64Decoder, 389 _FieldDescriptor.TYPE_FIXED32: decoder.Fixed32Decoder, 390 _FieldDescriptor.TYPE_BOOL: decoder.BoolDecoder, 391 _FieldDescriptor.TYPE_STRING: decoder.StringDecoder, 392 _FieldDescriptor.TYPE_GROUP: decoder.GroupDecoder, 393 _FieldDescriptor.TYPE_MESSAGE: decoder.MessageDecoder, 394 _FieldDescriptor.TYPE_BYTES: decoder.BytesDecoder, 395 _FieldDescriptor.TYPE_UINT32: decoder.UInt32Decoder, 396 _FieldDescriptor.TYPE_ENUM: decoder.EnumDecoder, 397 _FieldDescriptor.TYPE_SFIXED32: decoder.SFixed32Decoder, 398 _FieldDescriptor.TYPE_SFIXED64: decoder.SFixed64Decoder, 399 _FieldDescriptor.TYPE_SINT32: decoder.SInt32Decoder, 400 _FieldDescriptor.TYPE_SINT64: decoder.SInt64Decoder, 401 } 402 403# Maps from field type to expected wiretype. 404FIELD_TYPE_TO_WIRE_TYPE = { 405 _FieldDescriptor.TYPE_DOUBLE: wire_format.WIRETYPE_FIXED64, 406 _FieldDescriptor.TYPE_FLOAT: wire_format.WIRETYPE_FIXED32, 407 _FieldDescriptor.TYPE_INT64: wire_format.WIRETYPE_VARINT, 408 _FieldDescriptor.TYPE_UINT64: wire_format.WIRETYPE_VARINT, 409 _FieldDescriptor.TYPE_INT32: wire_format.WIRETYPE_VARINT, 410 _FieldDescriptor.TYPE_FIXED64: wire_format.WIRETYPE_FIXED64, 411 _FieldDescriptor.TYPE_FIXED32: wire_format.WIRETYPE_FIXED32, 412 _FieldDescriptor.TYPE_BOOL: wire_format.WIRETYPE_VARINT, 413 _FieldDescriptor.TYPE_STRING: 414 wire_format.WIRETYPE_LENGTH_DELIMITED, 415 _FieldDescriptor.TYPE_GROUP: wire_format.WIRETYPE_START_GROUP, 416 _FieldDescriptor.TYPE_MESSAGE: 417 wire_format.WIRETYPE_LENGTH_DELIMITED, 418 _FieldDescriptor.TYPE_BYTES: 419 wire_format.WIRETYPE_LENGTH_DELIMITED, 420 _FieldDescriptor.TYPE_UINT32: wire_format.WIRETYPE_VARINT, 421 _FieldDescriptor.TYPE_ENUM: wire_format.WIRETYPE_VARINT, 422 _FieldDescriptor.TYPE_SFIXED32: wire_format.WIRETYPE_FIXED32, 423 _FieldDescriptor.TYPE_SFIXED64: wire_format.WIRETYPE_FIXED64, 424 _FieldDescriptor.TYPE_SINT32: wire_format.WIRETYPE_VARINT, 425 _FieldDescriptor.TYPE_SINT64: wire_format.WIRETYPE_VARINT, 426 } 427