1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 ** Our memory representation for parsing tables and messages themselves.
30 ** Functions in this file are used by generated code and possibly reflection.
31 **
32 ** The definitions in this file are internal to upb.
33 **/
34
35 #ifndef UPB_MSG_INT_H_
36 #define UPB_MSG_INT_H_
37
38 #include <stdint.h>
39 #include <stdlib.h>
40 #include <string.h>
41
42 #include "upb/msg.h"
43 #include "upb/table_internal.h"
44 #include "upb/upb.h"
45
46 /* Must be last. */
47 #include "upb/port_def.inc"
48
49 #ifdef __cplusplus
50 extern "C" {
51 #endif
52
53 /** upb_msglayout *************************************************************/
54
55 /* upb_msglayout represents the memory layout of a given upb_msgdef. The
56 * members are public so generated code can initialize them, but users MUST NOT
57 * read or write any of its members. */
58
59 /* These aren't real labels according to descriptor.proto, but in the table we
60 * use these for map/packed fields instead of UPB_LABEL_REPEATED. */
61 enum {
62 _UPB_LABEL_MAP = 4,
63 _UPB_LABEL_PACKED = 7 /* Low 3 bits are common with UPB_LABEL_REPEATED. */
64 };
65
66 typedef struct {
67 uint32_t number;
68 uint16_t offset;
69 int16_t presence; /* If >0, hasbit_index. If <0, ~oneof_index. */
70 uint16_t submsg_index; /* undefined if descriptortype != MESSAGE or GROUP. */
71 uint8_t descriptortype;
72 uint8_t mode; /* upb_fieldmode | upb_labelflags |
73 (upb_rep << _UPB_REP_SHIFT) */
74 } upb_msglayout_field;
75
76 typedef enum {
77 _UPB_MODE_MAP = 0,
78 _UPB_MODE_ARRAY = 1,
79 _UPB_MODE_SCALAR = 2,
80
81 _UPB_MODE_MASK = 3, /* Mask to isolate the mode from upb_rep. */
82 } upb_fieldmode;
83
84 /* Extra flags on the mode field. */
85 enum upb_labelflags {
86 _UPB_MODE_IS_PACKED = 4,
87 _UPB_MODE_IS_EXTENSION = 8,
88 };
89
90 /* Representation in the message. Derivable from descriptortype and mode, but
91 * fast access helps the serializer. */
92 enum upb_rep {
93 _UPB_REP_1BYTE = 0,
94 _UPB_REP_4BYTE = 1,
95 _UPB_REP_8BYTE = 2,
96 _UPB_REP_STRVIEW = 3,
97
98 #if UINTPTR_MAX == 0xffffffff
99 _UPB_REP_PTR = _UPB_REP_4BYTE,
100 #else
101 _UPB_REP_PTR = _UPB_REP_8BYTE,
102 #endif
103
104 _UPB_REP_SHIFT = 6, /* Bit offset of the rep in upb_msglayout_field.mode */
105 };
106
_upb_getmode(const upb_msglayout_field * field)107 UPB_INLINE upb_fieldmode _upb_getmode(const upb_msglayout_field *field) {
108 return (upb_fieldmode)(field->mode & 3);
109 }
110
_upb_repeated_or_map(const upb_msglayout_field * field)111 UPB_INLINE bool _upb_repeated_or_map(const upb_msglayout_field *field) {
112 /* This works because upb_fieldmode has no value 3. */
113 return !(field->mode & _UPB_MODE_SCALAR);
114 }
115
_upb_issubmsg(const upb_msglayout_field * field)116 UPB_INLINE bool _upb_issubmsg(const upb_msglayout_field *field) {
117 return field->descriptortype == UPB_DTYPE_MESSAGE ||
118 field->descriptortype == UPB_DTYPE_GROUP;
119 }
120
121 struct upb_decstate;
122 struct upb_msglayout;
123
124 typedef const char *_upb_field_parser(struct upb_decstate *d, const char *ptr,
125 upb_msg *msg, intptr_t table,
126 uint64_t hasbits, uint64_t data);
127
128 typedef struct {
129 uint64_t field_data;
130 _upb_field_parser *field_parser;
131 } _upb_fasttable_entry;
132
133 typedef union {
134 const struct upb_msglayout *submsg;
135 // TODO: const upb_enumlayout *subenum;
136 } upb_msglayout_sub;
137
138 typedef enum {
139 _UPB_MSGEXT_NONE = 0, // Non-extendable message.
140 _UPB_MSGEXT_EXTENDABLE = 1, // Normal extendable message.
141 _UPB_MSGEXT_MSGSET = 2, // MessageSet message.
142 _UPB_MSGEXT_MSGSET_ITEM = 3, // MessageSet item (temporary only, see decode.c)
143 } upb_msgext_mode;
144
145 /* MessageSet wire format is:
146 * message MessageSet {
147 * repeated group Item = 1 {
148 * required int32 type_id = 2;
149 * required string message = 3;
150 * }
151 * }
152 */
153 typedef enum {
154 _UPB_MSGSET_ITEM = 1,
155 _UPB_MSGSET_TYPEID = 2,
156 _UPB_MSGSET_MESSAGE = 3,
157 } upb_msgext_fieldnum;
158
159 struct upb_msglayout {
160 const upb_msglayout_sub *subs;
161 const upb_msglayout_field *fields;
162 /* Must be aligned to sizeof(void*). Doesn't include internal members like
163 * unknown fields, extension dict, pointer to msglayout, etc. */
164 uint16_t size;
165 uint16_t field_count;
166 uint8_t ext; // upb_msgext_mode, declared as uint8_t so sizeof(ext) == 1
167 uint8_t dense_below;
168 uint8_t table_mask;
169 /* To constant-initialize the tables of variable length, we need a flexible
170 * array member, and we need to compile in C99 mode. */
171 _upb_fasttable_entry fasttable[];
172 };
173
174 typedef struct {
175 upb_msglayout_field field;
176 const upb_msglayout *extendee;
177 upb_msglayout_sub sub; /* NULL unless submessage or proto2 enum */
178 } upb_msglayout_ext;
179
180 typedef struct {
181 const upb_msglayout **msgs;
182 const upb_msglayout_ext **exts;
183 int msg_count;
184 int ext_count;
185 } upb_msglayout_file;
186
187 /** upb_extreg ****************************************************************/
188
189 /* Adds the given extension info for message type |l| and field number |num|
190 * into the registry. Returns false if this message type and field number were
191 * already in the map, or if memory allocation fails. */
192 bool _upb_extreg_add(upb_extreg *r, const upb_msglayout_ext **e, size_t count);
193
194 /* Looks up the extension (if any) defined for message type |l| and field
195 * number |num|. If an extension was found, copies the field info into |*ext|
196 * and returns true. Otherwise returns false. */
197 const upb_msglayout_ext *_upb_extreg_get(const upb_extreg *r,
198 const upb_msglayout *l, uint32_t num);
199
200 /** upb_msg *******************************************************************/
201
202 /* Internal members of a upb_msg that track unknown fields and/or extensions.
203 * We can change this without breaking binary compatibility. We put these
204 * before the user's data. The user's upb_msg* points after the
205 * upb_msg_internal. */
206
207 typedef struct {
208 /* Total size of this structure, including the data that follows.
209 * Must be aligned to 8, which is alignof(upb_msg_ext) */
210 uint32_t size;
211
212 /* Offsets relative to the beginning of this structure.
213 *
214 * Unknown data grows forward from the beginning to unknown_end.
215 * Extension data grows backward from size to ext_begin.
216 * When the two meet, we're out of data and have to realloc.
217 *
218 * If we imagine that the final member of this struct is:
219 * char data[size - overhead]; // overhead = sizeof(upb_msg_internaldata)
220 *
221 * Then we have:
222 * unknown data: data[0 .. (unknown_end - overhead)]
223 * extensions data: data[(ext_begin - overhead) .. (size - overhead)] */
224 uint32_t unknown_end;
225 uint32_t ext_begin;
226 /* Data follows, as if there were an array:
227 * char data[size - sizeof(upb_msg_internaldata)]; */
228 } upb_msg_internaldata;
229
230 typedef struct {
231 upb_msg_internaldata *internal;
232 } upb_msg_internal;
233
234 /* Maps upb_fieldtype_t -> memory size. */
235 extern char _upb_fieldtype_to_size[12];
236
upb_msg_sizeof(const upb_msglayout * l)237 UPB_INLINE size_t upb_msg_sizeof(const upb_msglayout *l) {
238 return l->size + sizeof(upb_msg_internal);
239 }
240
_upb_msg_new_inl(const upb_msglayout * l,upb_arena * a)241 UPB_INLINE upb_msg *_upb_msg_new_inl(const upb_msglayout *l, upb_arena *a) {
242 size_t size = upb_msg_sizeof(l);
243 void *mem = upb_arena_malloc(a, size);
244 upb_msg *msg;
245 if (UPB_UNLIKELY(!mem)) return NULL;
246 msg = UPB_PTR_AT(mem, sizeof(upb_msg_internal), upb_msg);
247 memset(mem, 0, size);
248 return msg;
249 }
250
251 /* Creates a new messages with the given layout on the given arena. */
252 upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a);
253
upb_msg_getinternal(upb_msg * msg)254 UPB_INLINE upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
255 ptrdiff_t size = sizeof(upb_msg_internal);
256 return (upb_msg_internal*)((char*)msg - size);
257 }
258
259 /* Clears the given message. */
260 void _upb_msg_clear(upb_msg *msg, const upb_msglayout *l);
261
262 /* Discards the unknown fields for this message only. */
263 void _upb_msg_discardunknown_shallow(upb_msg *msg);
264
265 /* Adds unknown data (serialized protobuf data) to the given message. The data
266 * is copied into the message instance. */
267 bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
268 upb_arena *arena);
269
270 /** upb_msg_ext ***************************************************************/
271
272 /* The internal representation of an extension is self-describing: it contains
273 * enough information that we can serialize it to binary format without needing
274 * to look it up in a registry. */
275 typedef struct {
276 const upb_msglayout_ext *ext;
277 union {
278 upb_strview str;
279 void *ptr;
280 double dbl;
281 char scalar_data[8];
282 } data;
283 } upb_msg_ext;
284
285 /* Adds the given extension data to the given message. The returned extension will
286 * have its "ext" member initialized according to |ext|. */
287 upb_msg_ext *_upb_msg_getorcreateext(upb_msg *msg, const upb_msglayout_ext *ext,
288 upb_arena *arena);
289
290 /* Returns an array of extensions for this message. Note: the array is
291 * ordered in reverse relative to the order of creation. */
292 const upb_msg_ext *_upb_msg_getexts(const upb_msg *msg, size_t *count);
293
294 /* Returns an extension for the given field number, or NULL if no extension
295 * exists for this field number. */
296 const upb_msg_ext *_upb_msg_getext(const upb_msg *msg,
297 const upb_msglayout_ext *ext);
298
299 void _upb_msg_clearext(upb_msg *msg, const upb_msglayout_ext *ext);
300
301 /** Hasbit access *************************************************************/
302
_upb_hasbit(const upb_msg * msg,size_t idx)303 UPB_INLINE bool _upb_hasbit(const upb_msg *msg, size_t idx) {
304 return (*UPB_PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0;
305 }
306
_upb_sethas(const upb_msg * msg,size_t idx)307 UPB_INLINE void _upb_sethas(const upb_msg *msg, size_t idx) {
308 (*UPB_PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8));
309 }
310
_upb_clearhas(const upb_msg * msg,size_t idx)311 UPB_INLINE void _upb_clearhas(const upb_msg *msg, size_t idx) {
312 (*UPB_PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8)));
313 }
314
_upb_msg_hasidx(const upb_msglayout_field * f)315 UPB_INLINE size_t _upb_msg_hasidx(const upb_msglayout_field *f) {
316 UPB_ASSERT(f->presence > 0);
317 return f->presence;
318 }
319
_upb_hasbit_field(const upb_msg * msg,const upb_msglayout_field * f)320 UPB_INLINE bool _upb_hasbit_field(const upb_msg *msg,
321 const upb_msglayout_field *f) {
322 return _upb_hasbit(msg, _upb_msg_hasidx(f));
323 }
324
_upb_sethas_field(const upb_msg * msg,const upb_msglayout_field * f)325 UPB_INLINE void _upb_sethas_field(const upb_msg *msg,
326 const upb_msglayout_field *f) {
327 _upb_sethas(msg, _upb_msg_hasidx(f));
328 }
329
_upb_clearhas_field(const upb_msg * msg,const upb_msglayout_field * f)330 UPB_INLINE void _upb_clearhas_field(const upb_msg *msg,
331 const upb_msglayout_field *f) {
332 _upb_clearhas(msg, _upb_msg_hasidx(f));
333 }
334
335 /** Oneof case access *********************************************************/
336
_upb_oneofcase(upb_msg * msg,size_t case_ofs)337 UPB_INLINE uint32_t *_upb_oneofcase(upb_msg *msg, size_t case_ofs) {
338 return UPB_PTR_AT(msg, case_ofs, uint32_t);
339 }
340
_upb_getoneofcase(const void * msg,size_t case_ofs)341 UPB_INLINE uint32_t _upb_getoneofcase(const void *msg, size_t case_ofs) {
342 return *UPB_PTR_AT(msg, case_ofs, uint32_t);
343 }
344
_upb_oneofcase_ofs(const upb_msglayout_field * f)345 UPB_INLINE size_t _upb_oneofcase_ofs(const upb_msglayout_field *f) {
346 UPB_ASSERT(f->presence < 0);
347 return ~(ptrdiff_t)f->presence;
348 }
349
_upb_oneofcase_field(upb_msg * msg,const upb_msglayout_field * f)350 UPB_INLINE uint32_t *_upb_oneofcase_field(upb_msg *msg,
351 const upb_msglayout_field *f) {
352 return _upb_oneofcase(msg, _upb_oneofcase_ofs(f));
353 }
354
_upb_getoneofcase_field(const upb_msg * msg,const upb_msglayout_field * f)355 UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_msg *msg,
356 const upb_msglayout_field *f) {
357 return _upb_getoneofcase(msg, _upb_oneofcase_ofs(f));
358 }
359
_upb_has_submsg_nohasbit(const upb_msg * msg,size_t ofs)360 UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_msg *msg, size_t ofs) {
361 return *UPB_PTR_AT(msg, ofs, const upb_msg*) != NULL;
362 }
363
364 /** upb_array *****************************************************************/
365
366 /* Our internal representation for repeated fields. */
367 typedef struct {
368 uintptr_t data; /* Tagged ptr: low 3 bits of ptr are lg2(elem size). */
369 size_t len; /* Measured in elements. */
370 size_t size; /* Measured in elements. */
371 uint64_t junk;
372 } upb_array;
373
_upb_array_constptr(const upb_array * arr)374 UPB_INLINE const void *_upb_array_constptr(const upb_array *arr) {
375 UPB_ASSERT((arr->data & 7) <= 4);
376 return (void*)(arr->data & ~(uintptr_t)7);
377 }
378
_upb_array_tagptr(void * ptr,int elem_size_lg2)379 UPB_INLINE uintptr_t _upb_array_tagptr(void* ptr, int elem_size_lg2) {
380 UPB_ASSERT(elem_size_lg2 <= 4);
381 return (uintptr_t)ptr | elem_size_lg2;
382 }
383
_upb_array_ptr(upb_array * arr)384 UPB_INLINE void *_upb_array_ptr(upb_array *arr) {
385 return (void*)_upb_array_constptr(arr);
386 }
387
_upb_tag_arrptr(void * ptr,int elem_size_lg2)388 UPB_INLINE uintptr_t _upb_tag_arrptr(void* ptr, int elem_size_lg2) {
389 UPB_ASSERT(elem_size_lg2 <= 4);
390 UPB_ASSERT(((uintptr_t)ptr & 7) == 0);
391 return (uintptr_t)ptr | (unsigned)elem_size_lg2;
392 }
393
_upb_array_new(upb_arena * a,size_t init_size,int elem_size_lg2)394 UPB_INLINE upb_array *_upb_array_new(upb_arena *a, size_t init_size,
395 int elem_size_lg2) {
396 const size_t arr_size = UPB_ALIGN_UP(sizeof(upb_array), 8);
397 const size_t bytes = sizeof(upb_array) + (init_size << elem_size_lg2);
398 upb_array *arr = (upb_array*)upb_arena_malloc(a, bytes);
399 if (!arr) return NULL;
400 arr->data = _upb_tag_arrptr(UPB_PTR_AT(arr, arr_size, void), elem_size_lg2);
401 arr->len = 0;
402 arr->size = init_size;
403 return arr;
404 }
405
406 /* Resizes the capacity of the array to be at least min_size. */
407 bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena);
408
409 /* Fallback functions for when the accessors require a resize. */
410 void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size,
411 int elem_size_lg2, upb_arena *arena);
412 bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value,
413 int elem_size_lg2, upb_arena *arena);
414
_upb_array_reserve(upb_array * arr,size_t size,upb_arena * arena)415 UPB_INLINE bool _upb_array_reserve(upb_array *arr, size_t size,
416 upb_arena *arena) {
417 if (arr->size < size) return _upb_array_realloc(arr, size, arena);
418 return true;
419 }
420
_upb_array_resize(upb_array * arr,size_t size,upb_arena * arena)421 UPB_INLINE bool _upb_array_resize(upb_array *arr, size_t size,
422 upb_arena *arena) {
423 if (!_upb_array_reserve(arr, size, arena)) return false;
424 arr->len = size;
425 return true;
426 }
427
_upb_array_accessor(const void * msg,size_t ofs,size_t * size)428 UPB_INLINE const void *_upb_array_accessor(const void *msg, size_t ofs,
429 size_t *size) {
430 const upb_array *arr = *UPB_PTR_AT(msg, ofs, const upb_array*);
431 if (arr) {
432 if (size) *size = arr->len;
433 return _upb_array_constptr(arr);
434 } else {
435 if (size) *size = 0;
436 return NULL;
437 }
438 }
439
_upb_array_mutable_accessor(void * msg,size_t ofs,size_t * size)440 UPB_INLINE void *_upb_array_mutable_accessor(void *msg, size_t ofs,
441 size_t *size) {
442 upb_array *arr = *UPB_PTR_AT(msg, ofs, upb_array*);
443 if (arr) {
444 if (size) *size = arr->len;
445 return _upb_array_ptr(arr);
446 } else {
447 if (size) *size = 0;
448 return NULL;
449 }
450 }
451
_upb_array_resize_accessor2(void * msg,size_t ofs,size_t size,int elem_size_lg2,upb_arena * arena)452 UPB_INLINE void *_upb_array_resize_accessor2(void *msg, size_t ofs, size_t size,
453 int elem_size_lg2,
454 upb_arena *arena) {
455 upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *);
456 upb_array *arr = *arr_ptr;
457 if (!arr || arr->size < size) {
458 return _upb_array_resize_fallback(arr_ptr, size, elem_size_lg2, arena);
459 }
460 arr->len = size;
461 return _upb_array_ptr(arr);
462 }
463
_upb_array_append_accessor2(void * msg,size_t ofs,int elem_size_lg2,const void * value,upb_arena * arena)464 UPB_INLINE bool _upb_array_append_accessor2(void *msg, size_t ofs,
465 int elem_size_lg2,
466 const void *value,
467 upb_arena *arena) {
468 upb_array **arr_ptr = UPB_PTR_AT(msg, ofs, upb_array *);
469 size_t elem_size = 1 << elem_size_lg2;
470 upb_array *arr = *arr_ptr;
471 void *ptr;
472 if (!arr || arr->len == arr->size) {
473 return _upb_array_append_fallback(arr_ptr, value, elem_size_lg2, arena);
474 }
475 ptr = _upb_array_ptr(arr);
476 memcpy(UPB_PTR_AT(ptr, arr->len * elem_size, char), value, elem_size);
477 arr->len++;
478 return true;
479 }
480
481 /* Used by old generated code, remove once all code has been regenerated. */
_upb_sizelg2(upb_fieldtype_t type)482 UPB_INLINE int _upb_sizelg2(upb_fieldtype_t type) {
483 switch (type) {
484 case UPB_TYPE_BOOL:
485 return 0;
486 case UPB_TYPE_FLOAT:
487 case UPB_TYPE_INT32:
488 case UPB_TYPE_UINT32:
489 case UPB_TYPE_ENUM:
490 return 2;
491 case UPB_TYPE_MESSAGE:
492 return UPB_SIZE(2, 3);
493 case UPB_TYPE_DOUBLE:
494 case UPB_TYPE_INT64:
495 case UPB_TYPE_UINT64:
496 return 3;
497 case UPB_TYPE_STRING:
498 case UPB_TYPE_BYTES:
499 return UPB_SIZE(3, 4);
500 }
501 UPB_UNREACHABLE();
502 }
_upb_array_resize_accessor(void * msg,size_t ofs,size_t size,upb_fieldtype_t type,upb_arena * arena)503 UPB_INLINE void *_upb_array_resize_accessor(void *msg, size_t ofs, size_t size,
504 upb_fieldtype_t type,
505 upb_arena *arena) {
506 return _upb_array_resize_accessor2(msg, ofs, size, _upb_sizelg2(type), arena);
507 }
_upb_array_append_accessor(void * msg,size_t ofs,size_t elem_size,upb_fieldtype_t type,const void * value,upb_arena * arena)508 UPB_INLINE bool _upb_array_append_accessor(void *msg, size_t ofs,
509 size_t elem_size, upb_fieldtype_t type,
510 const void *value,
511 upb_arena *arena) {
512 (void)elem_size;
513 return _upb_array_append_accessor2(msg, ofs, _upb_sizelg2(type), value,
514 arena);
515 }
516
517 /** upb_map *******************************************************************/
518
519 /* Right now we use strmaps for everything. We'll likely want to use
520 * integer-specific maps for integer-keyed maps.*/
521 typedef struct {
522 /* Size of key and val, based on the map type. Strings are represented as '0'
523 * because they must be handled specially. */
524 char key_size;
525 char val_size;
526
527 upb_strtable table;
528 } upb_map;
529
530 /* Map entries aren't actually stored, they are only used during parsing. For
531 * parsing, it helps a lot if all map entry messages have the same layout.
532 * The compiler and def.c must ensure that all map entries have this layout. */
533 typedef struct {
534 upb_msg_internal internal;
535 union {
536 upb_strview str; /* For str/bytes. */
537 upb_value val; /* For all other types. */
538 } k;
539 union {
540 upb_strview str; /* For str/bytes. */
541 upb_value val; /* For all other types. */
542 } v;
543 } upb_map_entry;
544
545 /* Creates a new map on the given arena with this key/value type. */
546 upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size);
547
548 /* Converting between internal table representation and user values.
549 *
550 * _upb_map_tokey() and _upb_map_fromkey() are inverses.
551 * _upb_map_tovalue() and _upb_map_fromvalue() are inverses.
552 *
553 * These functions account for the fact that strings are treated differently
554 * from other types when stored in a map.
555 */
556
_upb_map_tokey(const void * key,size_t size)557 UPB_INLINE upb_strview _upb_map_tokey(const void *key, size_t size) {
558 if (size == UPB_MAPTYPE_STRING) {
559 return *(upb_strview*)key;
560 } else {
561 return upb_strview_make((const char*)key, size);
562 }
563 }
564
_upb_map_fromkey(upb_strview key,void * out,size_t size)565 UPB_INLINE void _upb_map_fromkey(upb_strview key, void* out, size_t size) {
566 if (size == UPB_MAPTYPE_STRING) {
567 memcpy(out, &key, sizeof(key));
568 } else {
569 memcpy(out, key.data, size);
570 }
571 }
572
_upb_map_tovalue(const void * val,size_t size,upb_value * msgval,upb_arena * a)573 UPB_INLINE bool _upb_map_tovalue(const void *val, size_t size, upb_value *msgval,
574 upb_arena *a) {
575 if (size == UPB_MAPTYPE_STRING) {
576 upb_strview *strp = (upb_strview*)upb_arena_malloc(a, sizeof(*strp));
577 if (!strp) return false;
578 *strp = *(upb_strview*)val;
579 *msgval = upb_value_ptr(strp);
580 } else {
581 memcpy(msgval, val, size);
582 }
583 return true;
584 }
585
_upb_map_fromvalue(upb_value val,void * out,size_t size)586 UPB_INLINE void _upb_map_fromvalue(upb_value val, void* out, size_t size) {
587 if (size == UPB_MAPTYPE_STRING) {
588 const upb_strview *strp = (const upb_strview*)upb_value_getptr(val);
589 memcpy(out, strp, sizeof(upb_strview));
590 } else {
591 memcpy(out, &val, size);
592 }
593 }
594
595 /* Map operations, shared by reflection and generated code. */
596
_upb_map_size(const upb_map * map)597 UPB_INLINE size_t _upb_map_size(const upb_map *map) {
598 return map->table.t.count;
599 }
600
_upb_map_get(const upb_map * map,const void * key,size_t key_size,void * val,size_t val_size)601 UPB_INLINE bool _upb_map_get(const upb_map *map, const void *key,
602 size_t key_size, void *val, size_t val_size) {
603 upb_value tabval;
604 upb_strview k = _upb_map_tokey(key, key_size);
605 bool ret = upb_strtable_lookup2(&map->table, k.data, k.size, &tabval);
606 if (ret && val) {
607 _upb_map_fromvalue(tabval, val, val_size);
608 }
609 return ret;
610 }
611
_upb_map_next(const upb_map * map,size_t * iter)612 UPB_INLINE void* _upb_map_next(const upb_map *map, size_t *iter) {
613 upb_strtable_iter it;
614 it.t = &map->table;
615 it.index = *iter;
616 upb_strtable_next(&it);
617 *iter = it.index;
618 if (upb_strtable_done(&it)) return NULL;
619 return (void*)str_tabent(&it);
620 }
621
_upb_map_set(upb_map * map,const void * key,size_t key_size,void * val,size_t val_size,upb_arena * a)622 UPB_INLINE bool _upb_map_set(upb_map *map, const void *key, size_t key_size,
623 void *val, size_t val_size, upb_arena *a) {
624 upb_strview strkey = _upb_map_tokey(key, key_size);
625 upb_value tabval = {0};
626 if (!_upb_map_tovalue(val, val_size, &tabval, a)) return false;
627
628 /* TODO(haberman): add overwrite operation to minimize number of lookups. */
629 upb_strtable_remove(&map->table, strkey.data, strkey.size, NULL);
630 return upb_strtable_insert(&map->table, strkey.data, strkey.size, tabval, a);
631 }
632
_upb_map_delete(upb_map * map,const void * key,size_t key_size)633 UPB_INLINE bool _upb_map_delete(upb_map *map, const void *key, size_t key_size) {
634 upb_strview k = _upb_map_tokey(key, key_size);
635 return upb_strtable_remove(&map->table, k.data, k.size, NULL);
636 }
637
_upb_map_clear(upb_map * map)638 UPB_INLINE void _upb_map_clear(upb_map *map) {
639 upb_strtable_clear(&map->table);
640 }
641
642 /* Message map operations, these get the map from the message first. */
643
_upb_msg_map_size(const upb_msg * msg,size_t ofs)644 UPB_INLINE size_t _upb_msg_map_size(const upb_msg *msg, size_t ofs) {
645 upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
646 return map ? _upb_map_size(map) : 0;
647 }
648
_upb_msg_map_get(const upb_msg * msg,size_t ofs,const void * key,size_t key_size,void * val,size_t val_size)649 UPB_INLINE bool _upb_msg_map_get(const upb_msg *msg, size_t ofs,
650 const void *key, size_t key_size, void *val,
651 size_t val_size) {
652 upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
653 if (!map) return false;
654 return _upb_map_get(map, key, key_size, val, val_size);
655 }
656
_upb_msg_map_next(const upb_msg * msg,size_t ofs,size_t * iter)657 UPB_INLINE void *_upb_msg_map_next(const upb_msg *msg, size_t ofs,
658 size_t *iter) {
659 upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
660 if (!map) return NULL;
661 return _upb_map_next(map, iter);
662 }
663
_upb_msg_map_set(upb_msg * msg,size_t ofs,const void * key,size_t key_size,void * val,size_t val_size,upb_arena * arena)664 UPB_INLINE bool _upb_msg_map_set(upb_msg *msg, size_t ofs, const void *key,
665 size_t key_size, void *val, size_t val_size,
666 upb_arena *arena) {
667 upb_map **map = UPB_PTR_AT(msg, ofs, upb_map *);
668 if (!*map) {
669 *map = _upb_map_new(arena, key_size, val_size);
670 }
671 return _upb_map_set(*map, key, key_size, val, val_size, arena);
672 }
673
_upb_msg_map_delete(upb_msg * msg,size_t ofs,const void * key,size_t key_size)674 UPB_INLINE bool _upb_msg_map_delete(upb_msg *msg, size_t ofs, const void *key,
675 size_t key_size) {
676 upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
677 if (!map) return false;
678 return _upb_map_delete(map, key, key_size);
679 }
680
_upb_msg_map_clear(upb_msg * msg,size_t ofs)681 UPB_INLINE void _upb_msg_map_clear(upb_msg *msg, size_t ofs) {
682 upb_map *map = *UPB_PTR_AT(msg, ofs, upb_map *);
683 if (!map) return;
684 _upb_map_clear(map);
685 }
686
687 /* Accessing map key/value from a pointer, used by generated code only. */
688
_upb_msg_map_key(const void * msg,void * key,size_t size)689 UPB_INLINE void _upb_msg_map_key(const void* msg, void* key, size_t size) {
690 const upb_tabent *ent = (const upb_tabent*)msg;
691 uint32_t u32len;
692 upb_strview k;
693 k.data = upb_tabstr(ent->key, &u32len);
694 k.size = u32len;
695 _upb_map_fromkey(k, key, size);
696 }
697
_upb_msg_map_value(const void * msg,void * val,size_t size)698 UPB_INLINE void _upb_msg_map_value(const void* msg, void* val, size_t size) {
699 const upb_tabent *ent = (const upb_tabent*)msg;
700 upb_value v = {ent->val.val};
701 _upb_map_fromvalue(v, val, size);
702 }
703
_upb_msg_map_set_value(void * msg,const void * val,size_t size)704 UPB_INLINE void _upb_msg_map_set_value(void* msg, const void* val, size_t size) {
705 upb_tabent *ent = (upb_tabent*)msg;
706 /* This is like _upb_map_tovalue() except the entry already exists so we can
707 * reuse the allocated upb_strview for string fields. */
708 if (size == UPB_MAPTYPE_STRING) {
709 upb_strview *strp = (upb_strview*)(uintptr_t)ent->val.val;
710 memcpy(strp, val, sizeof(*strp));
711 } else {
712 memcpy(&ent->val.val, val, size);
713 }
714 }
715
716 /** _upb_mapsorter *************************************************************/
717
718 /* _upb_mapsorter sorts maps and provides ordered iteration over the entries.
719 * Since maps can be recursive (map values can be messages which contain other maps).
720 * _upb_mapsorter can contain a stack of maps. */
721
722 typedef struct {
723 upb_tabent const**entries;
724 int size;
725 int cap;
726 } _upb_mapsorter;
727
728 typedef struct {
729 int start;
730 int pos;
731 int end;
732 } _upb_sortedmap;
733
_upb_mapsorter_init(_upb_mapsorter * s)734 UPB_INLINE void _upb_mapsorter_init(_upb_mapsorter *s) {
735 s->entries = NULL;
736 s->size = 0;
737 s->cap = 0;
738 }
739
_upb_mapsorter_destroy(_upb_mapsorter * s)740 UPB_INLINE void _upb_mapsorter_destroy(_upb_mapsorter *s) {
741 if (s->entries) free(s->entries);
742 }
743
744 bool _upb_mapsorter_pushmap(_upb_mapsorter *s, upb_descriptortype_t key_type,
745 const upb_map *map, _upb_sortedmap *sorted);
746
_upb_mapsorter_popmap(_upb_mapsorter * s,_upb_sortedmap * sorted)747 UPB_INLINE void _upb_mapsorter_popmap(_upb_mapsorter *s, _upb_sortedmap *sorted) {
748 s->size = sorted->start;
749 }
750
_upb_sortedmap_next(_upb_mapsorter * s,const upb_map * map,_upb_sortedmap * sorted,upb_map_entry * ent)751 UPB_INLINE bool _upb_sortedmap_next(_upb_mapsorter *s, const upb_map *map,
752 _upb_sortedmap *sorted,
753 upb_map_entry *ent) {
754 if (sorted->pos == sorted->end) return false;
755 const upb_tabent *tabent = s->entries[sorted->pos++];
756 upb_strview key = upb_tabstrview(tabent->key);
757 _upb_map_fromkey(key, &ent->k, map->key_size);
758 upb_value val = {tabent->val.val};
759 _upb_map_fromvalue(val, &ent->v, map->val_size);
760 return true;
761 }
762
763 #ifdef __cplusplus
764 } /* extern "C" */
765 #endif
766
767 #include "upb/port_undef.inc"
768
769 #endif /* UPB_MSG_INT_H_ */
770