1 /* Flatbuffers parser attributes and symbols. */
2 
3 #ifndef SYMBOLS_H
4 #define SYMBOLS_H
5 
6 #include <stdint.h>
7 
8 #include "config.h"
9 #include "lex/tokens.h"
10 #include "hash/hash_table.h"
11 #include "hash/ptr_set.h"
12 
13 typedef struct fb_token fb_token_t;
14 typedef struct fb_string fb_string_t;
15 typedef struct fb_value fb_value_t;
16 typedef struct fb_symbol fb_symbol_t;
17 
18 typedef struct fb_metadata fb_metadata_t;
19 
20 typedef struct fb_name fb_name_t;
21 typedef fb_symbol_t fb_namespace_t;
22 typedef fb_symbol_t fb_ref_t;
23 /* Doc is not strictly a symbol, just a chained token list, but close enough. */
24 typedef fb_symbol_t fb_doc_t;
25 typedef fb_name_t fb_include_t;
26 typedef struct fb_attribute fb_attribute_t;
27 
28 typedef struct fb_member fb_member_t;
29 typedef struct fb_compound_type fb_compound_type_t;
30 
31 typedef struct fb_scope fb_scope_t;
32 typedef struct fb_root_schema fb_root_schema_t;
33 typedef struct fb_root_type fb_root_type_t;
34 typedef struct fb_schema fb_schema_t;
35 
36 enum {
37     tok_kw_base = LEX_TOK_KW_BASE,
38     tok_kw_bool,
39     tok_kw_byte,
40     tok_kw_char,
41     tok_kw_enum,
42     tok_kw_float32,
43     tok_kw_float64,
44     tok_kw_int,
45     tok_kw_int8,
46     tok_kw_int16,
47     tok_kw_int32,
48     tok_kw_int64,
49     tok_kw_long,
50     tok_kw_true,
51     tok_kw_uint,
52     tok_kw_false,
53     tok_kw_float,
54     tok_kw_short,
55     tok_kw_table,
56     tok_kw_ubyte,
57     tok_kw_uint8,
58     tok_kw_uint16,
59     tok_kw_uint32,
60     tok_kw_uint64,
61     tok_kw_ulong,
62     tok_kw_union,
63     tok_kw_double,
64     tok_kw_string,
65     tok_kw_struct,
66     tok_kw_ushort,
67     tok_kw_include,
68     tok_kw_attribute,
69     tok_kw_namespace,
70     tok_kw_root_type,
71     tok_kw_rpc_service,
72     tok_kw_file_extension,
73     tok_kw_file_identifier,
74     LEX_TOK_KW_END,
75     /* Pseudo keywords. */
76     tok_kw_doc_comment
77 };
78 
79 struct fb_token {
80     const char *text;
81     long len;
82     long linenum;
83     long pos;
84     long id;
85 };
86 
87 enum fb_scalar_type {
88     fb_missing_type = 0,
89     fb_ulong,
90     fb_uint,
91     fb_ushort,
92     fb_ubyte,
93     fb_bool,
94     fb_long,
95     fb_int,
96     fb_short,
97     fb_byte,
98     fb_double,
99     fb_float,
100     fb_char,
101 };
102 
103 typedef enum fb_scalar_type fb_scalar_type_t;
104 
sizeof_scalar_type(fb_scalar_type_t st)105 static inline size_t sizeof_scalar_type(fb_scalar_type_t st)
106 {
107     static const int scalar_type_size[] = {
108         0, 8, 4, 2, 1, 1, 8, 4, 2, 1, 8, 4, 1
109     };
110 
111     return scalar_type_size[st];
112 }
113 
114 enum fb_value_type {
115     vt_missing = 0,
116     vt_invalid = 1,
117     vt_string,
118     vt_float,
119     vt_int,
120     vt_uint,
121     vt_bool,
122     vt_vector_type,
123     vt_scalar_type,
124     vt_vector_string_type,
125     vt_string_type,
126     vt_vector_type_ref,
127     vt_type_ref,
128     vt_name_ref,
129     vt_compound_type_ref,
130     vt_vector_compound_type_ref,
131     vt_fixed_array_type,
132     vt_fixed_array_type_ref,
133     vt_fixed_array_string_type,
134     vt_fixed_array_compound_type_ref
135 };
136 
137 struct fb_string {
138     char *s;
139     /* printf statements relies on this being int. */
140     int len;
141 };
142 
143 struct fb_value {
144     union {
145         fb_string_t s;
146         double f;
147         int64_t i;
148         uint64_t u;
149         uint8_t b;
150         fb_token_t *t;
151         fb_compound_type_t *ct;
152         fb_scalar_type_t st;
153         fb_ref_t *ref;
154     };
155     unsigned short type;
156     uint32_t len;
157 };
158 
159 enum fb_kind {
160     fb_is_table,
161     fb_is_struct,
162     fb_is_rpc_service,
163     fb_is_enum,
164     fb_is_union,
165     fb_is_member
166 };
167 
168 /*
169  * Used for white, gray, black graph coloring while detecting circular
170  * references.
171  */
172 enum fb_symbol_flags {
173     fb_circular_open = 1,
174     fb_circular_closed = 2,
175     fb_duplicate = 4,
176     fb_indexed = 8,
177 };
178 
179 /*
180  * We keep the link first in all structs so that we can use a
181  * generic list reverse function after all symbols have been pushed
182  * within a scope.
183  */
184 struct fb_symbol {
185     fb_symbol_t *link;
186     fb_token_t *ident;
187     uint16_t kind;
188     uint16_t flags;
189 };
190 
191 struct fb_name {
192     fb_name_t *link;
193     fb_value_t name;
194 };
195 
196 #define fb_name_table __flatcc_fb_name_table
197 #define fb_value_set __flatcc_fb_value_set
198 #define fb_symbol_table __flatcc_fb_symbol_table
199 #define fb_scope_table __flatcc_fb_scope_table
200 
201 DECLARE_HASH_TABLE(fb_name_table, fb_name_t *)
202 DECLARE_HASH_TABLE(fb_schema_table, fb_schema_t *)
203 DECLARE_HASH_TABLE(fb_value_set, fb_value_t *)
204 DECLARE_HASH_TABLE(fb_symbol_table, fb_symbol_t *)
205 DECLARE_HASH_TABLE(fb_scope_table, fb_scope_t *)
206 
207 struct fb_member {
208     fb_symbol_t symbol;
209     /* Struct or table field type, or method response type. */
210     fb_value_t type;
211     /* Method request type only used for methods. */
212     fb_value_t req_type;
213     fb_value_t value;
214     fb_metadata_t *metadata;
215     fb_doc_t *doc;
216     uint16_t metadata_flags;
217     /*
218      * `align`, `offset` are for structs only.  64-bit allows for
219      * dynamically configured 64-bit file offsets. Align is restricted to
220      * at most 256 and must be a power of 2.
221      */
222     uint16_t align;
223     uint64_t offset;
224     uint64_t size;
225 
226     /* `id` is for table fields only. */
227     uint64_t id;
228     /*
229      * Resolved `nested_flatbuffer` attribute type. Always a table if
230      * set, and only on struct and table fields.
231      */
232     fb_compound_type_t *nest;
233     /* Used to generate table fields in sorted order. */
234     fb_member_t *order;
235 
236     /*
237      * Use by code generators. Only valid during export and may hold
238      * garbage from a prevous export.
239      */
240     size_t export_index;
241 };
242 
243 struct fb_metadata {
244     fb_metadata_t *link;
245     fb_token_t *ident;
246     fb_value_t value;
247 };
248 
249 struct fb_compound_type {
250     fb_symbol_t symbol;
251     /* `scope` may span multiple input files, but has a unique namespace. */
252     fb_scope_t *scope;
253     /* Identifies the the schema the symbol belongs. */
254     fb_schema_t *schema;
255     fb_symbol_t *members;
256     fb_member_t *ordered_members;
257     fb_member_t *primary_key;
258     fb_metadata_t *metadata;
259     fb_doc_t *doc;
260     fb_value_t type;
261     fb_symbol_table_t index;
262     /* Only for enums. */
263     fb_value_set_t value_set;
264     /* FNV-1a 32 bit hash of fully qualified name, accidental 0 maps to hash(""). */
265     uint32_t type_hash;
266     uint16_t metadata_flags;
267     /* `count` is for tables only. */
268     uint64_t count;
269     /* `align`, `size` is for structs only. */
270     uint16_t align;
271     uint64_t size;
272     /* Sort structs with forward references. */
273     fb_compound_type_t *order;
274     /*
275      * Use by code generators. Only valid during export and may hold
276      * garbage from a previous export.
277      */
278     size_t export_index;
279 };
280 
281 enum fb_known_attributes {
282     fb_attr_unknown = 0,
283     fb_attr_id = 1,
284     fb_attr_deprecated = 2,
285     fb_attr_original_order = 3,
286     fb_attr_force_align = 4,
287     fb_attr_bit_flags = 5,
288     fb_attr_nested_flatbuffer = 6,
289     fb_attr_key = 7,
290     fb_attr_required = 8,
291     fb_attr_hash = 9,
292     fb_attr_base64 = 10,
293     fb_attr_base64url = 11,
294     fb_attr_primary_key = 12,
295     fb_attr_sorted = 13,
296     KNOWN_ATTR_COUNT
297 };
298 
299 enum fb_known_attribute_flags {
300     fb_f_unknown = 1 << fb_attr_unknown,
301     fb_f_id = 1 << fb_attr_id,
302     fb_f_deprecated = 1 << fb_attr_deprecated,
303     fb_f_original_order = 1 << fb_attr_original_order,
304     fb_f_force_align = 1 << fb_attr_force_align,
305     fb_f_bit_flags = 1 << fb_attr_bit_flags,
306     fb_f_nested_flatbuffer = 1 << fb_attr_nested_flatbuffer,
307     fb_f_key = 1 << fb_attr_key,
308     fb_f_required = 1 << fb_attr_required,
309     fb_f_hash = 1 << fb_attr_hash,
310     fb_f_base64 = 1 << fb_attr_base64,
311     fb_f_base64url = 1 << fb_attr_base64url,
312     fb_f_primary_key = 1 << fb_attr_primary_key,
313     fb_f_sorted = 1 << fb_attr_sorted,
314 };
315 
316 struct fb_attribute {
317     fb_name_t name;
318     unsigned int known;
319 };
320 
321 struct fb_scope {
322     fb_ref_t *name;
323     fb_symbol_table_t symbol_index;
324     fb_string_t prefix;
325 };
326 
327 struct fb_root_schema {
328     fb_scope_table_t scope_index;
329     fb_name_table_t attribute_index;
330     fb_schema_table_t include_index;
331     int include_count;
332     int include_depth;
333     size_t total_source_size;
334 };
335 
336 struct fb_root_type {
337     /* Root decl. before symbol is visible. */
338     fb_ref_t *name;
339     /* Resolved symbol. */
340     fb_symbol_t *type;
341     fb_scope_t *scope;
342 };
343 
344 /*
345  * We store the parsed structure as token references. Tokens are stored
346  * in a single array pointing into the source buffer.
347  *
348  * Strings may contain multiple tokens when holding control characters
349  * and line breaks, but for our purposes the first string part is
350  * sufficient.
351  */
352 
353 struct fb_schema {
354     fb_include_t *includes;
355     fb_name_t *attributes;
356     fb_value_t file_identifier;
357     fb_value_t file_extension;
358     fb_symbol_t *symbols;
359     /* Topologically sorted structs. */
360     fb_compound_type_t *ordered_structs;
361     fb_root_type_t root_type;
362     fb_root_schema_t *root_schema;
363     /* Only used if schema is root. */
364     fb_root_schema_t root_schema_instance;
365 
366     /* An optional scope prefix for generated code. */
367     fb_string_t prefix;
368 
369     /* The basenameup in a format that can be index. */
370     fb_name_t name;
371 
372     /* These are allocated strings that must be freed. */
373 
374     /* Name of schema being processed without path or default extension. */
375     char *basename;
376     /* Uppercase basename for codegen and for case insenstive file inclusion check. */
377     char *basenameup;
378     /* Basename with extension. */
379     char *errorname;
380 
381     /*
382      * The dependency schemas visible to this schema (includes self).
383      * Compound symbols have a link to schema which can be checked
384      * against this set to see if the symbol is visible in this
385      * conctext.
386      */
387     ptr_set_t visible_schema;
388 };
389 
390 /*
391  * Helpers to ensure a symbol is actually visible because a scope
392  * (namespace) may be extended when a parent inlcudes another file
393  * first.
394  */
get_enum_if_visible(fb_schema_t * schema,fb_symbol_t * sym)395 static inline fb_compound_type_t *get_enum_if_visible(fb_schema_t *schema, fb_symbol_t *sym)
396 {
397     fb_compound_type_t *ct = 0;
398 
399     switch (sym->kind) {
400     case fb_is_union:
401         /* Fall through. */
402     case fb_is_enum:
403         ct = (fb_compound_type_t *)sym;
404         if (!ptr_set_exists(&schema->visible_schema, ct->schema)) {
405             ct = 0;
406         }
407         break;
408     default:
409         break;
410     }
411     return ct;
412 }
413 
get_compound_if_visible(fb_schema_t * schema,fb_symbol_t * sym)414 static inline fb_compound_type_t *get_compound_if_visible(fb_schema_t *schema, fb_symbol_t *sym)
415 {
416     fb_compound_type_t *ct = 0;
417 
418     switch (sym->kind) {
419     case fb_is_struct:
420     case fb_is_table:
421     case fb_is_rpc_service:
422     case fb_is_union:
423     case fb_is_enum:
424         ct = (fb_compound_type_t *)sym;
425         if (!ptr_set_exists(&schema->visible_schema, ct->schema)) {
426             ct = 0;
427         }
428         break;
429     default:
430         break;
431     }
432     return ct;
433 }
434 
435 /* Constants are specific to 32-bit FNV-1a hash. It is important to use unsigned integers. */
fb_hash_fnv1a_32_init()436 static inline uint32_t fb_hash_fnv1a_32_init()
437 {
438     return 2166136261UL;
439 }
440 
fb_hash_fnv1a_32_append(uint32_t hash,const char * data,size_t len)441 static inline uint32_t fb_hash_fnv1a_32_append(uint32_t hash, const char *data, size_t len)
442 {
443     while (len--) {
444         hash ^= *(uint8_t *)data++;
445         hash = hash * 16777619UL;
446     }
447     return hash;
448 }
449 
450 #endif /* SYMBOLS_H */
451