1 /*
2  * Copyright (c) 2009-2021, Google LLC
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Google LLC nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29 ** Our memory representation for parsing tables and messages themselves.
30 ** Functions in this file are used by generated code and possibly reflection.
31 **
32 ** The definitions in this file are internal to upb.
33 **/
34 
35 #ifndef UPB_MSG_INT_H_
36 #define UPB_MSG_INT_H_
37 
38 #include <stdint.h>
39 #include <stdlib.h>
40 #include <string.h>
41 
42 #include "upb/msg.h"
43 #include "upb/table_internal.h"
44 #include "upb/upb.h"
45 
46 /* Must be last. */
47 #include "upb/port_def.inc"
48 
49 #ifdef __cplusplus
50 extern "C" {
51 #endif
52 
53 /** upb_msglayout *************************************************************/
54 
55 /* upb_msglayout represents the memory layout of a given upb_msgdef.  The
56  * members are public so generated code can initialize them, but users MUST NOT
57  * read or write any of its members. */
58 
59 /* These aren't real labels according to descriptor.proto, but in the table we
60  * use these for map/packed fields instead of UPB_LABEL_REPEATED. */
61 enum {
62   _UPB_LABEL_MAP = 4,
63   _UPB_LABEL_PACKED = 7  /* Low 3 bits are common with UPB_LABEL_REPEATED. */
64 };
65 
66 typedef struct {
67   uint32_t number;
68   uint16_t offset;
69   int16_t presence;       /* If >0, hasbit_index.  If <0, ~oneof_index. */
70   uint16_t submsg_index;  /* undefined if descriptortype != MESSAGE or GROUP. */
71   uint8_t descriptortype;
72   uint8_t mode; /* upb_fieldmode | upb_labelflags |
73                    (upb_rep << _UPB_REP_SHIFT) */
74 } upb_msglayout_field;
75 
76 typedef enum {
77   _UPB_MODE_MAP = 0,
78   _UPB_MODE_ARRAY = 1,
79   _UPB_MODE_SCALAR = 2,
80 
81   _UPB_MODE_MASK = 3,  /* Mask to isolate the mode from upb_rep. */
82 } upb_fieldmode;
83 
84 /* Extra flags on the mode field. */
85 enum upb_labelflags {
86   _UPB_MODE_IS_PACKED = 4,
87   _UPB_MODE_IS_EXTENSION = 8,
88 };
89 
90 /* Representation in the message.  Derivable from descriptortype and mode, but
91  * fast access helps the serializer. */
92 enum upb_rep {
93   _UPB_REP_1BYTE = 0,
94   _UPB_REP_4BYTE = 1,
95   _UPB_REP_8BYTE = 2,
96   _UPB_REP_STRVIEW = 3,
97 
98 #if UINTPTR_MAX == 0xffffffff
99   _UPB_REP_PTR = _UPB_REP_4BYTE,
100 #else
101   _UPB_REP_PTR = _UPB_REP_8BYTE,
102 #endif
103 
104   _UPB_REP_SHIFT = 6,  /* Bit offset of the rep in upb_msglayout_field.mode */
105 };
106 
_upb_getmode(const upb_msglayout_field * field)107 UPB_INLINE upb_fieldmode _upb_getmode(const upb_msglayout_field *field) {
108   return (upb_fieldmode)(field->mode & 3);
109 }
110 
_upb_repeated_or_map(const upb_msglayout_field * field)111 UPB_INLINE bool _upb_repeated_or_map(const upb_msglayout_field *field) {
112   /* This works because upb_fieldmode has no value 3. */
113   return !(field->mode & _UPB_MODE_SCALAR);
114 }
115 
_upb_issubmsg(const upb_msglayout_field * field)116 UPB_INLINE bool _upb_issubmsg(const upb_msglayout_field *field) {
117   return field->descriptortype == UPB_DTYPE_MESSAGE ||
118          field->descriptortype == UPB_DTYPE_GROUP;
119 }
120 
121 struct upb_decstate;
122 struct upb_msglayout;
123 
124 typedef const char *_upb_field_parser(struct upb_decstate *d, const char *ptr,
125                                       upb_msg *msg, intptr_t table,
126                                       uint64_t hasbits, uint64_t data);
127 
128 typedef struct {
129   uint64_t field_data;
130   _upb_field_parser *field_parser;
131 } _upb_fasttable_entry;
132 
133 typedef union {
134   const struct upb_msglayout *submsg;
135   // TODO: const upb_enumlayout *subenum;
136 } upb_msglayout_sub;
137 
138 typedef enum {
139   _UPB_MSGEXT_NONE = 0,         // Non-extendable message.
140   _UPB_MSGEXT_EXTENDABLE = 1,   // Normal extendable message.
141   _UPB_MSGEXT_MSGSET = 2,       // MessageSet message.
142   _UPB_MSGEXT_MSGSET_ITEM = 3,  // MessageSet item (temporary only, see decode.c)
143 } upb_msgext_mode;
144 
145 /* MessageSet wire format is:
146  *   message MessageSet {
147  *     repeated group Item = 1 {
148  *       required int32 type_id = 2;
149  *       required string message = 3;
150  *     }
151  *   }
152  */
153 typedef enum {
154   _UPB_MSGSET_ITEM = 1,
155   _UPB_MSGSET_TYPEID = 2,
156   _UPB_MSGSET_MESSAGE = 3,
157 } upb_msgext_fieldnum;
158 
159 struct upb_msglayout {
160   const upb_msglayout_sub *subs;
161   const upb_msglayout_field *fields;
162   /* Must be aligned to sizeof(void*).  Doesn't include internal members like
163    * unknown fields, extension dict, pointer to msglayout, etc. */
164   uint16_t size;
165   uint16_t field_count;
166   uint8_t ext;  // upb_msgext_mode, declared as uint8_t so sizeof(ext) == 1
167   uint8_t dense_below;
168   uint8_t table_mask;
169   /* To constant-initialize the tables of variable length, we need a flexible
170    * array member, and we need to compile in C99 mode. */
171   _upb_fasttable_entry fasttable[];
172 };
173 
174 typedef struct {
175   upb_msglayout_field field;
176   const upb_msglayout *extendee;
177   upb_msglayout_sub sub;   /* NULL unless submessage or proto2 enum */
178 } upb_msglayout_ext;
179 
180 typedef struct {
181   const upb_msglayout **msgs;
182   const upb_msglayout_ext **exts;
183   int msg_count;
184   int ext_count;
185 } upb_msglayout_file;
186 
187 /** upb_extreg ****************************************************************/
188 
189 /* Adds the given extension info for message type |l| and field number |num|
190  * into the registry. Returns false if this message type and field number were
191  * already in the map, or if memory allocation fails. */
192 bool _upb_extreg_add(upb_extreg *r, const upb_msglayout_ext **e, size_t count);
193 
194 /* Looks up the extension (if any) defined for message type |l| and field
195  * number |num|.  If an extension was found, copies the field info into |*ext|
196  * and returns true. Otherwise returns false. */
197 const upb_msglayout_ext *_upb_extreg_get(const upb_extreg *r,
198                                          const upb_msglayout *l, uint32_t num);
199 
200 /** upb_msg *******************************************************************/
201 
202 /* Internal members of a upb_msg that track unknown fields and/or extensions.
203  * We can change this without breaking binary compatibility.  We put these
204  * before the user's data.  The user's upb_msg* points after the
205  * upb_msg_internal. */
206 
207 typedef struct {
208   /* Total size of this structure, including the data that follows.
209    * Must be aligned to 8, which is alignof(upb_msg_ext) */
210   uint32_t size;
211 
212   /* Offsets relative to the beginning of this structure.
213    *
214    * Unknown data grows forward from the beginning to unknown_end.
215    * Extension data grows backward from size to ext_begin.
216    * When the two meet, we're out of data and have to realloc.
217    *
218    * If we imagine that the final member of this struct is:
219    *   char data[size - overhead];  // overhead = sizeof(upb_msg_internaldata)
220    *
221    * Then we have:
222    *   unknown data: data[0 .. (unknown_end - overhead)]
223    *   extensions data: data[(ext_begin - overhead) .. (size - overhead)] */
224   uint32_t unknown_end;
225   uint32_t ext_begin;
226   /* Data follows, as if there were an array:
227    *   char data[size - sizeof(upb_msg_internaldata)]; */
228 } upb_msg_internaldata;
229 
230 typedef struct {
231   upb_msg_internaldata *internal;
232 } upb_msg_internal;
233 
234 /* Maps upb_fieldtype_t -> memory size. */
235 extern char _upb_fieldtype_to_size[12];
236 
upb_msg_sizeof(const upb_msglayout * l)237 UPB_INLINE size_t upb_msg_sizeof(const upb_msglayout *l) {
238   return l->size + sizeof(upb_msg_internal);
239 }
240 
_upb_msg_new_inl(const upb_msglayout * l,upb_arena * a)241 UPB_INLINE upb_msg *_upb_msg_new_inl(const upb_msglayout *l, upb_arena *a) {
242   size_t size = upb_msg_sizeof(l);
243   void *mem = upb_arena_malloc(a, size);
244   upb_msg *msg;
245   if (UPB_UNLIKELY(!mem)) return NULL;
246   msg = UPB_PTR_AT(mem, sizeof(upb_msg_internal), upb_msg);
247   memset(mem, 0, size);
248   return msg;
249 }
250 
251 /* Creates a new messages with the given layout on the given arena. */
252 upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a);
253 
upb_msg_getinternal(upb_msg * msg)254 UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
255   ptrdiff_t size = sizeof(upb_msg_internal);
256   return (upb_msg_internal*)((char*)msg - size);
257 }
258 
259 /* Clears the given message. */
260 void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l);
261 
262 /* Discards the unknown fields for this message only. */
263 void _upb_msg_discardunknown_shallow(upb_msg *msg);
264 
265 /* Adds unknown data (serialized protobuf data) to the given message.  The data
266  * is copied into the message instance. */
267 bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
268                          upb_arena *arena);
269 
270 /** upb_msg_ext ***************************************************************/
271 
272 /* The internal representation of an extension is self-describing: it contains
273  * enough information that we can serialize it to binary format without needing
274  * to look it up in a registry. */
275 typedef struct {
276   const upb_msglayout_ext *ext;
277   union {
278     upb_strview str;
279     void *ptr;
280     double dbl;
281     char scalar_data[8];
282   } data;
283 } upb_msg_ext;
284 
285 /* Adds the given extension data to the given message. The returned extension will
286  * have its "ext" member initialized according to |ext|. */
287 upb_msg_ext *_upb_msg_getorcreateext(upb_msg *msg, const upb_msglayout_ext *ext,
288                                      upb_arena *arena);
289 
290 /* Returns an array of extensions for this message. Note: the array is
291  * ordered in reverse relative to the order of creation. */
292 const upb_msg_ext *_upb_msg_getexts(const upb_msg *msg, size_t *count);
293 
294 /* Returns an extension for the given field number, or NULL if no extension
295  * exists for this field number. */
296 const upb_msg_ext *_upb_msg_getext(const upb_msg *msg,
297                                    const upb_msglayout_ext *ext);
298 
299 void _upb_msg_clearext(upb_msg *msg, const upb_msglayout_ext *ext);
300 
301 /** Hasbit access *************************************************************/
302 
_upb_hasbit(const upb_msg * msg,size_t idx)303 UPB_INLINE bool _upb_hasbit(const upb_msg *msg, size_t idx) {
304   return (*UPB_PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0;
305 }
306 
_upb_sethas(const upb_msg * msg,size_t idx)307 UPB_INLINE void _upb_sethas(const upb_msg *msg, size_t idx) {
308   (*UPB_PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8));
309 }
310 
_upb_clearhas(const upb_msg * msg,size_t idx)311 UPB_INLINE void _upb_clearhas(const upb_msg *msg, size_t idx) {
312   (*UPB_PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8)));
313 }
314 
_upb_msg_hasidx(const upb_msglayout_field * f)315 UPB_INLINE size_t _upb_msg_hasidx(const upb_msglayout_field *f) {
316   UPB_ASSERT(f->presence > 0);
317   return f->presence;
318 }
319 
_upb_hasbit_field(const upb_msg * msg,const upb_msglayout_field * f)320 UPB_INLINE bool _upb_hasbit_field(const upb_msg *msg,
321                                   const upb_msglayout_field *f) {
322   return _upb_hasbit(msg, _upb_msg_hasidx(f));
323 }
324 
_upb_sethas_field(const upb_msg * msg,const upb_msglayout_field * f)325 UPB_INLINE void _upb_sethas_field(const upb_msg *msg,
326                                   const upb_msglayout_field *f) {
327   _upb_sethas(msg, _upb_msg_hasidx(f));
328 }
329 
_upb_clearhas_field(const upb_msg * msg,const upb_msglayout_field * f)330 UPB_INLINE void _upb_clearhas_field(const upb_msg *msg,
331                                     const upb_msglayout_field *f) {
332   _upb_clearhas(msg, _upb_msg_hasidx(f));
333 }
334 
335 /** Oneof case access *********************************************************/
336 
_upb_oneofcase(upb_msg * msg,size_t case_ofs)337 UPB_INLINE uint32_t *_upb_oneofcase(upb_msg *msg, size_t case_ofs) {
338   return UPB_PTR_AT(msg, case_ofs, uint32_t);
339 }
340 
_upb_getoneofcase(const void * msg,size_t case_ofs)341 UPB_INLINE uint32_t _upb_getoneofcase(const void *msg, size_t case_ofs) {
342   return *UPB_PTR_AT(msg, case_ofs, uint32_t);
343 }
344 
_upb_oneofcase_ofs(const upb_msglayout_field * f)345 UPB_INLINE size_t _upb_oneofcase_ofs(const upb_msglayout_field *f) {
346   UPB_ASSERT(f->presence < 0);
347   return ~(ptrdiff_t)f->presence;
348 }
349 
_upb_oneofcase_field(upb_msg * msg,const upb_msglayout_field * f)350 UPB_INLINE uint32_t *_upb_oneofcase_field(upb_msg *msg,
351                                           const upb_msglayout_field *f) {
352   return _upb_oneofcase(msg, _upb_oneofcase_ofs(f));
353 }
354 
_upb_getoneofcase_field(const upb_msg * msg,const upb_msglayout_field * f)355 UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_msg *msg,
356                                             const upb_msglayout_field *f) {
357   return _upb_getoneofcase(msg, _upb_oneofcase_ofs(f));
358 }
359 
_upb_has_submsg_nohasbit(const upb_msg * msg,size_t ofs)360 UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_msg *msg, size_t ofs) {
361   return *UPB_PTR_AT(msg, ofs, const upb_msg*) != NULL;
362 }
363 
364 /** upb_array *****************************************************************/
365 
366 /* Our internal representation for repeated fields.  */
367 typedef struct {
368   uintptr_t data;   /* Tagged ptr: low 3 bits of ptr are lg2(elem size). */
369   size_t len;   /* Measured in elements. */
370   size_t size;  /* Measured in elements. */
371   uint64_t junk;
372 } upb_array;
373 
_upb_array_constptr(const upb_array * arr)374 UPB_INLINE const void *_upb_array_constptr(const upb_array *arr) {
375   UPB_ASSERT((arr->data & 7) <= 4);
376   return (void*)(arr->data & ~(uintptr_t)7);
377 }
378 
_upb_array_tagptr(void * ptr,int elem_size_lg2)379 UPB_INLINE uintptr_t _upb_array_tagptr(void* ptr, int elem_size_lg2) {
380   UPB_ASSERT(elem_size_lg2 <= 4);
381   return (uintptr_t)ptr | elem_size_lg2;
382 }
383 
_upb_array_ptr(upb_array * arr)384 UPB_INLINE void *_upb_array_ptr(upb_array *arr) {
385   return (void*)_upb_array_constptr(arr);
386 }
387 
_upb_tag_arrptr(void * ptr,int elem_size_lg2)388 UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) {
389   UPB_ASSERT(elem_size_lg2 <= 4);
390   UPB_ASSERT(((uintptr_t)ptr & 7) == 0);
391   return (uintptr_t)ptr | (unsigned)elem_size_lg2;
392 }
393 
_upb_array_new(upb_arena * a,size_t init_size,int elem_size_lg2)394 UPB_INLINE upb_array *_upb_array_new(upb_arena *a, size_t init_size,
395                                      int elem_size_lg2) {
396   const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_array), 8);
397   const size_t bytes = sizeof(upb_array) + (init_size << elem_size_lg2);
398   upb_array *arr = (upb_array*)upb_arena_malloc(a, bytes);
399   if (!arr) return NULL;
400   arr->data = _upb_tag_arrptr(UPB_PTR_AT(arr, arr_size, void), elem_size_lg2);
401   arr->len = 0;
402   arr->size = init_size;
403   return arr;
404 }
405 
406 /* Resizes the capacity of the array to be at least min_size. */
407 bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena);
408 
409 /* Fallback functions for when the accessors require a resize. */
410 void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size,
411                                  int elem_size_lg2, upb_arena *arena);
412 bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value,
413                                 int elem_size_lg2, upb_arena *arena);
414 
_upb_array_reserve(upb_array * arr,size_t size,upb_arena * arena)415 UPB_INLINE bool _upb_array_reserve(upb_array *arr, size_t size,
416                                    upb_arena *arena) {
417   if (arr->size < size) return _upb_array_realloc(arr, size, arena);
418   return true;
419 }
420 
_upb_array_resize(upb_array * arr,size_t size,upb_arena * arena)421 UPB_INLINE bool _upb_array_resize(upb_array *arr, size_t size,
422                                   upb_arena *arena) {
423   if (!_upb_array_reserve(arr, size, arena)) return false;
424   arr->len = size;
425   return true;
426 }
427 
_upb_array_accessor(const void * msg,size_t ofs,size_t * size)428 UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs,
429                                            size_t *size) {
430   const upb_array *arr = *UPB_PTR_AT(msg, ofs, const upb_array*);
431   if (arr) {
432     if (size) *size = arr->len;
433     return _upb_array_constptr(arr);
434   } else {
435     if (size) *size = 0;
436     return NULL;
437   }
438 }
439 
_upb_array_mutable_accessor(void * msg,size_t ofs,size_t * size)440 UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs,
441                                              size_t *size) {
442   upb_array *arr = *UPB_PTR_AT(msg, ofs, upb_array*);
443   if (arr) {
444     if (size) *size = arr->len;
445     return _upb_array_ptr(arr);
446   } else {
447     if (size) *size = 0;
448     return NULL;
449   }
450 }
451 
_upb_array_resize_accessor2(void * msg,size_t ofs,size_t size,int elem_size_lg2,upb_arena * arena)452 UPB_INLINE void *_upb_array_resize_accessor2(void *msg, size_t ofs, size_t size,
453                                              int elem_size_lg2,
454                                              upb_arena *arena) {
455   upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *);
456   upb_array *arr = *arr_ptr;
457   if (!arr || arr->size < size) {
458     return _upb_array_resize_fallback(arr_ptr, size, elem_size_lg2, arena);
459   }
460   arr->len = size;
461   return _upb_array_ptr(arr);
462 }
463 
_upb_array_append_accessor2(void * msg,size_t ofs,int elem_size_lg2,const void * value,upb_arena * arena)464 UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs,
465                                             int elem_size_lg2,
466                                             const void *value,
467                                             upb_arena *arena) {
468   upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *);
469   size_t elem_size = 1 << elem_size_lg2;
470   upb_array *arr = *arr_ptr;
471   void *ptr;
472   if (!arr || arr->len == arr->size) {
473     return _upb_array_append_fallback(arr_ptr, value, elem_size_lg2, arena);
474   }
475   ptr = _upb_array_ptr(arr);
476   memcpy(UPB_PTR_AT(ptr, arr->len * elem_size, char), value, elem_size);
477   arr->len++;
478   return true;
479 }
480 
481 /* Used by old generated code, remove once all code has been regenerated. */
_upb_sizelg2(upb_fieldtype_t type)482 UPB_INLINE int _upb_sizelg2(upb_fieldtype_t type) {
483   switch (type) {
484     case UPB_TYPE_BOOL:
485       return 0;
486     case UPB_TYPE_FLOAT:
487     case UPB_TYPE_INT32:
488     case UPB_TYPE_UINT32:
489     case UPB_TYPE_ENUM:
490       return 2;
491     case UPB_TYPE_MESSAGE:
492       return UPB_SIZE(2, 3);
493     case UPB_TYPE_DOUBLE:
494     case UPB_TYPE_INT64:
495     case UPB_TYPE_UINT64:
496       return 3;
497     case UPB_TYPE_STRING:
498     case UPB_TYPE_BYTES:
499       return UPB_SIZE(3, 4);
500   }
501   UPB_UNREACHABLE();
502 }
_upb_array_resize_accessor(void * msg,size_t ofs,size_t size,upb_fieldtype_t type,upb_arena * arena)503 UPB_INLINE void *_upb_array_resize_accessor(void *msg, size_t ofs, size_t size,
504                                              upb_fieldtype_t type,
505                                              upb_arena *arena) {
506   return _upb_array_resize_accessor2(msg, ofs, size, _upb_sizelg2(type), arena);
507 }
_upb_array_append_accessor(void * msg,size_t ofs,size_t elem_size,upb_fieldtype_t type,const void * value,upb_arena * arena)508 UPB_INLINE bool _upb_array_append_accessor(void *msg, size_t ofs,
509                                             size_t elem_size, upb_fieldtype_t type,
510                                             const void *value,
511                                             upb_arena *arena) {
512   (void)elem_size;
513   return _upb_array_append_accessor2(msg, ofs, _upb_sizelg2(type), value,
514                                      arena);
515 }
516 
517 /** upb_map *******************************************************************/
518 
519 /* Right now we use strmaps for everything.  We'll likely want to use
520  * integer-specific maps for integer-keyed maps.*/
521 typedef struct {
522   /* Size of key and val, based on the map type.  Strings are represented as '0'
523    * because they must be handled specially. */
524   char key_size;
525   char val_size;
526 
527   upb_strtable table;
528 } upb_map;
529 
530 /* Map entries aren't actually stored, they are only used during parsing.  For
531  * parsing, it helps a lot if all map entry messages have the same layout.
532  * The compiler and def.c must ensure that all map entries have this layout. */
533 typedef struct {
534   upb_msg_internal internal;
535   union {
536     upb_strview str;  /* For str/bytes. */
537     upb_value val;    /* For all other types. */
538   } k;
539   union {
540     upb_strview str;  /* For str/bytes. */
541     upb_value val;    /* For all other types. */
542   } v;
543 } upb_map_entry;
544 
545 /* Creates a new map on the given arena with this key/value type. */
546 upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size);
547 
548 /* Converting between internal table representation and user values.
549  *
550  * _upb_map_tokey() and _upb_map_fromkey() are inverses.
551  * _upb_map_tovalue() and _upb_map_fromvalue() are inverses.
552  *
553  * These functions account for the fact that strings are treated differently
554  * from other types when stored in a map.
555  */
556 
_upb_map_tokey(const void * key,size_t size)557 UPB_INLINE upb_strview _upb_map_tokey(const void *key, size_t size) {
558   if (size == UPB_MAPTYPE_STRING) {
559     return *(upb_strview*)key;
560   } else {
561     return upb_strview_make((const char*)key, size);
562   }
563 }
564 
_upb_map_fromkey(upb_strview key,void * out,size_t size)565 UPB_INLINE void _upb_map_fromkey(upb_strview key, void* out, size_t size) {
566   if (size == UPB_MAPTYPE_STRING) {
567     memcpy(out, &key, sizeof(key));
568   } else {
569     memcpy(out, key.data, size);
570   }
571 }
572 
_upb_map_tovalue(const void * val,size_t size,upb_value * msgval,upb_arena * a)573 UPB_INLINE bool _upb_map_tovalue(const void *val, size_t size, upb_value *msgval,
574                                  upb_arena *a) {
575   if (size == UPB_MAPTYPE_STRING) {
576     upb_strview *strp = (upb_strview*)upb_arena_malloc(a, sizeof(*strp));
577     if (!strp) return false;
578     *strp = *(upb_strview*)val;
579     *msgval = upb_value_ptr(strp);
580   } else {
581     memcpy(msgval, val, size);
582   }
583   return true;
584 }
585 
_upb_map_fromvalue(upb_value val,void * out,size_t size)586 UPB_INLINE void _upb_map_fromvalue(upb_value val, void* out, size_t size) {
587   if (size == UPB_MAPTYPE_STRING) {
588     const upb_strview *strp = (const upb_strview*)upb_value_getptr(val);
589     memcpy(out, strp, sizeof(upb_strview));
590   } else {
591     memcpy(out, &val, size);
592   }
593 }
594 
595 /* Map operations, shared by reflection and generated code. */
596 
_upb_map_size(const upb_map * map)597 UPB_INLINE size_t _upb_map_size(const upb_map *map) {
598   return map->table.t.count;
599 }
600 
_upb_map_get(const upb_map * map,const void * key,size_t key_size,void * val,size_t val_size)601 UPB_INLINE bool _upb_map_get(const upb_map *map, const void *key,
602                              size_t key_size, void *val, size_t val_size) {
603   upb_value tabval;
604   upb_strview k = _upb_map_tokey(key, key_size);
605   bool ret = upb_strtable_lookup2(&map->table, k.data, k.size, &tabval);
606   if (ret && val) {
607     _upb_map_fromvalue(tabval, val, val_size);
608   }
609   return ret;
610 }
611 
_upb_map_next(const upb_map * map,size_t * iter)612 UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) {
613   upb_strtable_iter it;
614   it.t = &map->table;
615   it.index = *iter;
616   upb_strtable_next(&it);
617   *iter = it.index;
618   if (upb_strtable_done(&it)) return NULL;
619   return (void*)str_tabent(&it);
620 }
621 
_upb_map_set(upb_map * map,const void * key,size_t key_size,void * val,size_t val_size,upb_arena * a)622 UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size,
623                              void *val, size_t val_size, upb_arena *a) {
624   upb_strview strkey = _upb_map_tokey(key, key_size);
625   upb_value tabval = {0};
626   if (!_upb_map_tovalue(val, val_size, &tabval, a)) return false;
627 
628   /* TODO(haberman): add overwrite operation to minimize number of lookups. */
629   upb_strtable_remove(&map->table, strkey.data, strkey.size, NULL);
630   return upb_strtable_insert(&map->table, strkey.data, strkey.size, tabval, a);
631 }
632 
_upb_map_delete(upb_map * map,const void * key,size_t key_size)633 UPB_INLINE bool _upb_map_delete(upb_map *map, const void *key, size_t key_size) {
634   upb_strview k = _upb_map_tokey(key, key_size);
635   return upb_strtable_remove(&map->table, k.data, k.size, NULL);
636 }
637 
_upb_map_clear(upb_map * map)638 UPB_INLINE void _upb_map_clear(upb_map *map) {
639   upb_strtable_clear(&map->table);
640 }
641 
642 /* Message map operations, these get the map from the message first. */
643 
_upb_msg_map_size(const upb_msg * msg,size_t ofs)644 UPB_INLINE size_t _upb_msg_map_size(const upb_msg *msg, size_t ofs) {
645   upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
646   return map ? _upb_map_size(map) : 0;
647 }
648 
_upb_msg_map_get(const upb_msg * msg,size_t ofs,const void * key,size_t key_size,void * val,size_t val_size)649 UPB_INLINE bool _upb_msg_map_get(const upb_msg *msg, size_t ofs,
650                                  const void *key, size_t key_size, void *val,
651                                  size_t val_size) {
652   upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
653   if (!map) return false;
654   return _upb_map_get(map, key, key_size, val, val_size);
655 }
656 
_upb_msg_map_next(const upb_msg * msg,size_t ofs,size_t * iter)657 UPB_INLINE void *_upb_msg_map_next(const upb_msg *msg, size_t ofs,
658                                    size_t *iter) {
659   upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
660   if (!map) return NULL;
661   return _upb_map_next(map, iter);
662 }
663 
_upb_msg_map_set(upb_msg * msg,size_t ofs,const void * key,size_t key_size,void * val,size_t val_size,upb_arena * arena)664 UPB_INLINE bool _upb_msg_map_set(upb_msg *msg, size_t ofs, const void *key,
665                                  size_t key_size, void *val, size_t val_size,
666                                  upb_arena *arena) {
667   upb_map **map = UPB_PTR_AT(msg, ofs, upb_map *);
668   if (!*map) {
669     *map = _upb_map_new(arena, key_size, val_size);
670   }
671   return _upb_map_set(*map, key, key_size, val, val_size, arena);
672 }
673 
_upb_msg_map_delete(upb_msg * msg,size_t ofs,const void * key,size_t key_size)674 UPB_INLINE bool _upb_msg_map_delete(upb_msg *msg, size_t ofs, const void *key,
675                                     size_t key_size) {
676   upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
677   if (!map) return false;
678   return _upb_map_delete(map, key, key_size);
679 }
680 
_upb_msg_map_clear(upb_msg * msg,size_t ofs)681 UPB_INLINE void _upb_msg_map_clear(upb_msg *msg, size_t ofs) {
682   upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
683   if (!map) return;
684   _upb_map_clear(map);
685 }
686 
687 /* Accessing map key/value from a pointer, used by generated code only. */
688 
_upb_msg_map_key(const void * msg,void * key,size_t size)689 UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) {
690   const upb_tabent *ent = (const upb_tabent*)msg;
691   uint32_t u32len;
692   upb_strview k;
693   k.data = upb_tabstr(ent->key, &u32len);
694   k.size = u32len;
695   _upb_map_fromkey(k, key, size);
696 }
697 
_upb_msg_map_value(const void * msg,void * val,size_t size)698 UPB_INLINE void _upb_msg_map_value(const void* msg, void* val, size_t size) {
699   const upb_tabent *ent = (const upb_tabent*)msg;
700   upb_value v = {ent->val.val};
701   _upb_map_fromvalue(v, val, size);
702 }
703 
_upb_msg_map_set_value(void * msg,const void * val,size_t size)704 UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size) {
705   upb_tabent *ent = (upb_tabent*)msg;
706   /* This is like _upb_map_tovalue() except the entry already exists so we can
707    * reuse the allocated upb_strview for string fields. */
708   if (size == UPB_MAPTYPE_STRING) {
709     upb_strview *strp = (upb_strview*)(uintptr_t)ent->val.val;
710     memcpy(strp, val, sizeof(*strp));
711   } else {
712     memcpy(&ent->val.val, val, size);
713   }
714 }
715 
716 /** _upb_mapsorter *************************************************************/
717 
718 /* _upb_mapsorter sorts maps and provides ordered iteration over the entries.
719  * Since maps can be recursive (map values can be messages which contain other maps).
720  * _upb_mapsorter can contain a stack of maps. */
721 
722 typedef struct {
723   upb_tabent const**entries;
724   int size;
725   int cap;
726 } _upb_mapsorter;
727 
728 typedef struct {
729   int start;
730   int pos;
731   int end;
732 } _upb_sortedmap;
733 
_upb_mapsorter_init(_upb_mapsorter * s)734 UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter *s) {
735   s->entries = NULL;
736   s->size = 0;
737   s->cap = 0;
738 }
739 
_upb_mapsorter_destroy(_upb_mapsorter * s)740 UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter *s) {
741   if (s->entries) free(s->entries);
742 }
743 
744 bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
745                             const upb_map *map, _upb_sortedmap *sorted);
746 
_upb_mapsorter_popmap(_upb_mapsorter * s,_upb_sortedmap * sorted)747 UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter *s, _upb_sortedmap *sorted) {
748   s->size = sorted->start;
749 }
750 
_upb_sortedmap_next(_upb_mapsorter * s,const upb_map * map,_upb_sortedmap * sorted,upb_map_entry * ent)751 UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map,
752                                     _upb_sortedmap *sorted,
753                                     upb_map_entry *ent) {
754   if (sorted->pos == sorted->end) return false;
755   const upb_tabent *tabent = s->entries[sorted->pos++];
756   upb_strview key = upb_tabstrview(tabent->key);
757   _upb_map_fromkey(key, &ent->k, map->key_size);
758   upb_value val = {tabent->val.val};
759   _upb_map_fromvalue(val, &ent->v, map->val_size);
760   return true;
761 }
762 
763 #ifdef __cplusplus
764 }  /* extern "C" */
765 #endif
766 
767 #include "upb/port_undef.inc"
768 
769 #endif /* UPB_MSG_INT_H_ */
770