1# Protocol Buffers - Google's data interchange format
2# Copyright 2008 Google Inc.  All rights reserved.
3# https://developers.google.com/protocol-buffers/
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9#     * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#     * Redistributions in binary form must reproduce the above
12# copyright notice, this list of conditions and the following disclaimer
13# in the documentation and/or other materials provided with the
14# distribution.
15#     * Neither the name of Google Inc. nor the names of its
16# contributors may be used to endorse or promote products derived from
17# this software without specific prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31"""Provides type checking routines.
32
33This module defines type checking utilities in the forms of dictionaries:
34
35VALUE_CHECKERS: A dictionary of field types and a value validation object.
36TYPE_TO_BYTE_SIZE_FN: A dictionary with field types and a size computing
37  function.
38TYPE_TO_SERIALIZE_METHOD: A dictionary with field types and serialization
39  function.
40FIELD_TYPE_TO_WIRE_TYPE: A dictionary with field typed and their
41  coresponding wire types.
42TYPE_TO_DESERIALIZE_METHOD: A dictionary with field types and deserialization
43  function.
44"""
45
46__author__ = 'robinson@google.com (Will Robinson)'
47
48import numbers
49import six
50
51if six.PY3:
52  long = int
53
54from google.protobuf.internal import api_implementation
55from google.protobuf.internal import decoder
56from google.protobuf.internal import encoder
57from google.protobuf.internal import wire_format
58from google.protobuf import descriptor
59
60_FieldDescriptor = descriptor.FieldDescriptor
61
62def SupportsOpenEnums(field_descriptor):
63  return field_descriptor.containing_type.syntax == "proto3"
64
65def GetTypeChecker(field):
66  """Returns a type checker for a message field of the specified types.
67
68  Args:
69    field: FieldDescriptor object for this field.
70
71  Returns:
72    An instance of TypeChecker which can be used to verify the types
73    of values assigned to a field of the specified type.
74  """
75  if (field.cpp_type == _FieldDescriptor.CPPTYPE_STRING and
76      field.type == _FieldDescriptor.TYPE_STRING):
77    return UnicodeValueChecker()
78  if field.cpp_type == _FieldDescriptor.CPPTYPE_ENUM:
79    if SupportsOpenEnums(field):
80      # When open enums are supported, any int32 can be assigned.
81      return _VALUE_CHECKERS[_FieldDescriptor.CPPTYPE_INT32]
82    else:
83      return EnumValueChecker(field.enum_type)
84  return _VALUE_CHECKERS[field.cpp_type]
85
86
87# None of the typecheckers below make any attempt to guard against people
88# subclassing builtin types and doing weird things.  We're not trying to
89# protect against malicious clients here, just people accidentally shooting
90# themselves in the foot in obvious ways.
91
92class TypeChecker(object):
93
94  """Type checker used to catch type errors as early as possible
95  when the client is setting scalar fields in protocol messages.
96  """
97
98  def __init__(self, *acceptable_types):
99    self._acceptable_types = acceptable_types
100
101  def CheckValue(self, proposed_value):
102    """Type check the provided value and return it.
103
104    The returned value might have been normalized to another type.
105    """
106    if not isinstance(proposed_value, self._acceptable_types):
107      message = ('%.1024r has type %s, but expected one of: %s' %
108                 (proposed_value, type(proposed_value), self._acceptable_types))
109      raise TypeError(message)
110    # Some field types(float, double and bool) accept other types, must
111    # convert to the correct type in such cases.
112    if self._acceptable_types:
113      if self._acceptable_types[0] in (bool, float):
114        return self._acceptable_types[0](proposed_value)
115    return proposed_value
116
117
118class TypeCheckerWithDefault(TypeChecker):
119
120  def __init__(self, default_value, *acceptable_types):
121    TypeChecker.__init__(self, *acceptable_types)
122    self._default_value = default_value
123
124  def DefaultValue(self):
125    return self._default_value
126
127
128# IntValueChecker and its subclasses perform integer type-checks
129# and bounds-checks.
130class IntValueChecker(object):
131
132  """Checker used for integer fields.  Performs type-check and range check."""
133
134  def CheckValue(self, proposed_value):
135    if not isinstance(proposed_value, numbers.Integral):
136      message = ('%.1024r has type %s, but expected one of: %s' %
137                 (proposed_value, type(proposed_value), six.integer_types))
138      raise TypeError(message)
139    if not self._MIN <= int(proposed_value) <= self._MAX:
140      raise ValueError('Value out of range: %d' % proposed_value)
141    # We force 32-bit values to int and 64-bit values to long to make
142    # alternate implementations where the distinction is more significant
143    # (e.g. the C++ implementation) simpler.
144    proposed_value = self._TYPE(proposed_value)
145    return proposed_value
146
147  def DefaultValue(self):
148    return 0
149
150
151class EnumValueChecker(object):
152
153  """Checker used for enum fields.  Performs type-check and range check."""
154
155  def __init__(self, enum_type):
156    self._enum_type = enum_type
157
158  def CheckValue(self, proposed_value):
159    if not isinstance(proposed_value, numbers.Integral):
160      message = ('%.1024r has type %s, but expected one of: %s' %
161                 (proposed_value, type(proposed_value), six.integer_types))
162      raise TypeError(message)
163    if int(proposed_value) not in self._enum_type.values_by_number:
164      raise ValueError('Unknown enum value: %d' % proposed_value)
165    return proposed_value
166
167  def DefaultValue(self):
168    return self._enum_type.values[0].number
169
170
171class UnicodeValueChecker(object):
172
173  """Checker used for string fields.
174
175  Always returns a unicode value, even if the input is of type str.
176  """
177
178  def CheckValue(self, proposed_value):
179    if not isinstance(proposed_value, (bytes, six.text_type)):
180      message = ('%.1024r has type %s, but expected one of: %s' %
181                 (proposed_value, type(proposed_value), (bytes, six.text_type)))
182      raise TypeError(message)
183
184    # If the value is of type 'bytes' make sure that it is valid UTF-8 data.
185    if isinstance(proposed_value, bytes):
186      try:
187        proposed_value = proposed_value.decode('utf-8')
188      except UnicodeDecodeError:
189        raise ValueError('%.1024r has type bytes, but isn\'t valid UTF-8 '
190                         'encoding. Non-UTF-8 strings must be converted to '
191                         'unicode objects before being added.' %
192                         (proposed_value))
193    else:
194      try:
195        proposed_value.encode('utf8')
196      except UnicodeEncodeError:
197        raise ValueError('%.1024r isn\'t a valid unicode string and '
198                         'can\'t be encoded in UTF-8.'%
199                         (proposed_value))
200
201    return proposed_value
202
203  def DefaultValue(self):
204    return u""
205
206
207class Int32ValueChecker(IntValueChecker):
208  # We're sure to use ints instead of longs here since comparison may be more
209  # efficient.
210  _MIN = -2147483648
211  _MAX = 2147483647
212  _TYPE = int
213
214
215class Uint32ValueChecker(IntValueChecker):
216  _MIN = 0
217  _MAX = (1 << 32) - 1
218  _TYPE = int
219
220
221class Int64ValueChecker(IntValueChecker):
222  _MIN = -(1 << 63)
223  _MAX = (1 << 63) - 1
224  _TYPE = long
225
226
227class Uint64ValueChecker(IntValueChecker):
228  _MIN = 0
229  _MAX = (1 << 64) - 1
230  _TYPE = long
231
232
233# The max 4 bytes float is about 3.4028234663852886e+38
234_FLOAT_MAX = float.fromhex('0x1.fffffep+127')
235_FLOAT_MIN = -_FLOAT_MAX
236_INF = float('inf')
237_NEG_INF = float('-inf')
238
239
240class FloatValueChecker(object):
241
242  """Checker used for float fields.  Performs type-check and range check.
243
244  Values exceeding a 32-bit float will be converted to inf/-inf.
245  """
246
247  def CheckValue(self, proposed_value):
248    """Check and convert proposed_value to float."""
249    if not isinstance(proposed_value, numbers.Real):
250      message = ('%.1024r has type %s, but expected one of: numbers.Real' %
251                 (proposed_value, type(proposed_value)))
252      raise TypeError(message)
253    converted_value = float(proposed_value)
254    # This inf rounding matches the C++ proto SafeDoubleToFloat logic.
255    if converted_value > _FLOAT_MAX:
256      return _INF
257    if converted_value < _FLOAT_MIN:
258      return _NEG_INF
259
260    return converted_value
261    # TODO(jieluo): convert to 4 bytes float (c style float) at setters:
262    # return struct.unpack('f', struct.pack('f', converted_value))
263
264  def DefaultValue(self):
265    return 0.0
266
267
268# Type-checkers for all scalar CPPTYPEs.
269_VALUE_CHECKERS = {
270    _FieldDescriptor.CPPTYPE_INT32: Int32ValueChecker(),
271    _FieldDescriptor.CPPTYPE_INT64: Int64ValueChecker(),
272    _FieldDescriptor.CPPTYPE_UINT32: Uint32ValueChecker(),
273    _FieldDescriptor.CPPTYPE_UINT64: Uint64ValueChecker(),
274    _FieldDescriptor.CPPTYPE_DOUBLE: TypeCheckerWithDefault(
275        0.0, float, numbers.Real),
276    _FieldDescriptor.CPPTYPE_FLOAT: FloatValueChecker(),
277    _FieldDescriptor.CPPTYPE_BOOL: TypeCheckerWithDefault(
278        False, bool, numbers.Integral),
279    _FieldDescriptor.CPPTYPE_STRING: TypeCheckerWithDefault(b'', bytes),
280    }
281
282
283# Map from field type to a function F, such that F(field_num, value)
284# gives the total byte size for a value of the given type.  This
285# byte size includes tag information and any other additional space
286# associated with serializing "value".
287TYPE_TO_BYTE_SIZE_FN = {
288    _FieldDescriptor.TYPE_DOUBLE: wire_format.DoubleByteSize,
289    _FieldDescriptor.TYPE_FLOAT: wire_format.FloatByteSize,
290    _FieldDescriptor.TYPE_INT64: wire_format.Int64ByteSize,
291    _FieldDescriptor.TYPE_UINT64: wire_format.UInt64ByteSize,
292    _FieldDescriptor.TYPE_INT32: wire_format.Int32ByteSize,
293    _FieldDescriptor.TYPE_FIXED64: wire_format.Fixed64ByteSize,
294    _FieldDescriptor.TYPE_FIXED32: wire_format.Fixed32ByteSize,
295    _FieldDescriptor.TYPE_BOOL: wire_format.BoolByteSize,
296    _FieldDescriptor.TYPE_STRING: wire_format.StringByteSize,
297    _FieldDescriptor.TYPE_GROUP: wire_format.GroupByteSize,
298    _FieldDescriptor.TYPE_MESSAGE: wire_format.MessageByteSize,
299    _FieldDescriptor.TYPE_BYTES: wire_format.BytesByteSize,
300    _FieldDescriptor.TYPE_UINT32: wire_format.UInt32ByteSize,
301    _FieldDescriptor.TYPE_ENUM: wire_format.EnumByteSize,
302    _FieldDescriptor.TYPE_SFIXED32: wire_format.SFixed32ByteSize,
303    _FieldDescriptor.TYPE_SFIXED64: wire_format.SFixed64ByteSize,
304    _FieldDescriptor.TYPE_SINT32: wire_format.SInt32ByteSize,
305    _FieldDescriptor.TYPE_SINT64: wire_format.SInt64ByteSize
306    }
307
308
309# Maps from field types to encoder constructors.
310TYPE_TO_ENCODER = {
311    _FieldDescriptor.TYPE_DOUBLE: encoder.DoubleEncoder,
312    _FieldDescriptor.TYPE_FLOAT: encoder.FloatEncoder,
313    _FieldDescriptor.TYPE_INT64: encoder.Int64Encoder,
314    _FieldDescriptor.TYPE_UINT64: encoder.UInt64Encoder,
315    _FieldDescriptor.TYPE_INT32: encoder.Int32Encoder,
316    _FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Encoder,
317    _FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Encoder,
318    _FieldDescriptor.TYPE_BOOL: encoder.BoolEncoder,
319    _FieldDescriptor.TYPE_STRING: encoder.StringEncoder,
320    _FieldDescriptor.TYPE_GROUP: encoder.GroupEncoder,
321    _FieldDescriptor.TYPE_MESSAGE: encoder.MessageEncoder,
322    _FieldDescriptor.TYPE_BYTES: encoder.BytesEncoder,
323    _FieldDescriptor.TYPE_UINT32: encoder.UInt32Encoder,
324    _FieldDescriptor.TYPE_ENUM: encoder.EnumEncoder,
325    _FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Encoder,
326    _FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Encoder,
327    _FieldDescriptor.TYPE_SINT32: encoder.SInt32Encoder,
328    _FieldDescriptor.TYPE_SINT64: encoder.SInt64Encoder,
329    }
330
331
332# Maps from field types to sizer constructors.
333TYPE_TO_SIZER = {
334    _FieldDescriptor.TYPE_DOUBLE: encoder.DoubleSizer,
335    _FieldDescriptor.TYPE_FLOAT: encoder.FloatSizer,
336    _FieldDescriptor.TYPE_INT64: encoder.Int64Sizer,
337    _FieldDescriptor.TYPE_UINT64: encoder.UInt64Sizer,
338    _FieldDescriptor.TYPE_INT32: encoder.Int32Sizer,
339    _FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Sizer,
340    _FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Sizer,
341    _FieldDescriptor.TYPE_BOOL: encoder.BoolSizer,
342    _FieldDescriptor.TYPE_STRING: encoder.StringSizer,
343    _FieldDescriptor.TYPE_GROUP: encoder.GroupSizer,
344    _FieldDescriptor.TYPE_MESSAGE: encoder.MessageSizer,
345    _FieldDescriptor.TYPE_BYTES: encoder.BytesSizer,
346    _FieldDescriptor.TYPE_UINT32: encoder.UInt32Sizer,
347    _FieldDescriptor.TYPE_ENUM: encoder.EnumSizer,
348    _FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Sizer,
349    _FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Sizer,
350    _FieldDescriptor.TYPE_SINT32: encoder.SInt32Sizer,
351    _FieldDescriptor.TYPE_SINT64: encoder.SInt64Sizer,
352    }
353
354
355# Maps from field type to a decoder constructor.
356TYPE_TO_DECODER = {
357    _FieldDescriptor.TYPE_DOUBLE: decoder.DoubleDecoder,
358    _FieldDescriptor.TYPE_FLOAT: decoder.FloatDecoder,
359    _FieldDescriptor.TYPE_INT64: decoder.Int64Decoder,
360    _FieldDescriptor.TYPE_UINT64: decoder.UInt64Decoder,
361    _FieldDescriptor.TYPE_INT32: decoder.Int32Decoder,
362    _FieldDescriptor.TYPE_FIXED64: decoder.Fixed64Decoder,
363    _FieldDescriptor.TYPE_FIXED32: decoder.Fixed32Decoder,
364    _FieldDescriptor.TYPE_BOOL: decoder.BoolDecoder,
365    _FieldDescriptor.TYPE_STRING: decoder.StringDecoder,
366    _FieldDescriptor.TYPE_GROUP: decoder.GroupDecoder,
367    _FieldDescriptor.TYPE_MESSAGE: decoder.MessageDecoder,
368    _FieldDescriptor.TYPE_BYTES: decoder.BytesDecoder,
369    _FieldDescriptor.TYPE_UINT32: decoder.UInt32Decoder,
370    _FieldDescriptor.TYPE_ENUM: decoder.EnumDecoder,
371    _FieldDescriptor.TYPE_SFIXED32: decoder.SFixed32Decoder,
372    _FieldDescriptor.TYPE_SFIXED64: decoder.SFixed64Decoder,
373    _FieldDescriptor.TYPE_SINT32: decoder.SInt32Decoder,
374    _FieldDescriptor.TYPE_SINT64: decoder.SInt64Decoder,
375    }
376
377# Maps from field type to expected wiretype.
378FIELD_TYPE_TO_WIRE_TYPE = {
379    _FieldDescriptor.TYPE_DOUBLE: wire_format.WIRETYPE_FIXED64,
380    _FieldDescriptor.TYPE_FLOAT: wire_format.WIRETYPE_FIXED32,
381    _FieldDescriptor.TYPE_INT64: wire_format.WIRETYPE_VARINT,
382    _FieldDescriptor.TYPE_UINT64: wire_format.WIRETYPE_VARINT,
383    _FieldDescriptor.TYPE_INT32: wire_format.WIRETYPE_VARINT,
384    _FieldDescriptor.TYPE_FIXED64: wire_format.WIRETYPE_FIXED64,
385    _FieldDescriptor.TYPE_FIXED32: wire_format.WIRETYPE_FIXED32,
386    _FieldDescriptor.TYPE_BOOL: wire_format.WIRETYPE_VARINT,
387    _FieldDescriptor.TYPE_STRING:
388      wire_format.WIRETYPE_LENGTH_DELIMITED,
389    _FieldDescriptor.TYPE_GROUP: wire_format.WIRETYPE_START_GROUP,
390    _FieldDescriptor.TYPE_MESSAGE:
391      wire_format.WIRETYPE_LENGTH_DELIMITED,
392    _FieldDescriptor.TYPE_BYTES:
393      wire_format.WIRETYPE_LENGTH_DELIMITED,
394    _FieldDescriptor.TYPE_UINT32: wire_format.WIRETYPE_VARINT,
395    _FieldDescriptor.TYPE_ENUM: wire_format.WIRETYPE_VARINT,
396    _FieldDescriptor.TYPE_SFIXED32: wire_format.WIRETYPE_FIXED32,
397    _FieldDescriptor.TYPE_SFIXED64: wire_format.WIRETYPE_FIXED64,
398    _FieldDescriptor.TYPE_SINT32: wire_format.WIRETYPE_VARINT,
399    _FieldDescriptor.TYPE_SINT64: wire_format.WIRETYPE_VARINT,
400    }
401