1 /*
2 **      cdecl -- C gibberish translator
3 **      src/c_ast.h
4 **
5 **      Copyright (C) 2017-2021  Paul J. Lucas
6 **
7 **      This program is free software: you can redistribute it and/or modify
8 **      it under the terms of the GNU General Public License as published by
9 **      the Free Software Foundation, either version 3 of the License, or
10 **      (at your option) any later version.
11 **
12 **      This program is distributed in the hope that it will be useful,
13 **      but WITHOUT ANY WARRANTY; without even the implied warranty of
14 **      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 **      GNU General Public License for more details.
16 **
17 **      You should have received a copy of the GNU General Public License
18 **      along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 #ifndef cdecl_c_ast_H
22 #define cdecl_c_ast_H
23 
24 /**
25  * @file
26  * Declares types to represent an _Abstract Syntax Tree_ (AST) for parsed C/C++
27  * declarations as well as functions for traversing and manipulating an AST.
28  *
29  * @sa [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree)
30  */
31 
32 // local
33 #include "pjl_config.h"                 /* must go first */
34 #include "c_kind.h"
35 #include "c_operator.h"
36 #include "c_sname.h"
37 #include "c_type.h"
38 #include "slist.h"
39 #include "types.h"
40 #include "util.h"
41 
42 /// @cond DOXYGEN_IGNORE
43 
44 // standard
45 #include <stdbool.h>
46 #include <stddef.h>                     /* for size_t */
47 
48 _GL_INLINE_HEADER_BEGIN
49 #ifndef C_AST_INLINE
50 # define C_AST_INLINE _GL_INLINE
51 #endif /* C_AST_INLINE */
52 
53 /// @endcond
54 
55 /**
56  * For c_array_ast.size, denotes `array[]`.
57  *
58  * @sa #C_ARRAY_SIZE_VARIABLE
59  * @sa c_array_size_t
60  */
61 #define C_ARRAY_SIZE_NONE     (-1)
62 
63 /**
64  * For c_array_ast.size, denotes `array[*]`.
65  *
66  * @sa #C_ARRAY_SIZE_NONE
67  * @sa c_array_size_t
68  */
69 #define C_ARRAY_SIZE_VARIABLE (-2)
70 
71 /**
72  * For c_function_ast.flags, denotes that the function is unspecified (unknown
73  * whether it's a member or non-member).
74  *
75  * @sa #C_FN_MASK_MEMBER
76  * @sa #C_FN_MEMBER
77  * @sa #C_FN_NON_MEMBER
78  */
79 #define C_FN_UNSPECIFIED      0u
80 
81 /**
82  * For c_function_ast.flags, denotes that the function is a member.
83  *
84  * @sa #C_FN_MASK_MEMBER
85  * @sa #C_FN_NON_MEMBER
86  * @sa #C_FN_UNSPECIFIED
87  */
88 #define C_FN_MEMBER           (1u << 0)
89 
90 /**
91  * For c_function_ast.flags, denotes that the function is a non-member.
92  *
93  * @sa #C_FN_MASK_MEMBER
94  * @sa #C_FN_MEMBER
95  * @sa #C_FN_UNSPECIFIED
96  */
97 #define C_FN_NON_MEMBER       (1u << 1)
98 
99 /**
100  * For c_function_ast.flags, member bitmask.
101  *
102  * @sa #C_FN_MEMBER
103  * @sa #C_FN_NON_MEMBER
104  * @sa #C_FN_UNSPECIFIED
105  */
106 #define C_FN_MASK_MEMBER      0x3u
107 
108 /**
109  * Convenience macro for iterating over all parameters of a function-like AST.
110  *
111  * @param VAR The \ref c_ast_param_t loop variable.
112  * @param AST The AST to iterate the function-like parameters of.
113  *
114  * @sa c_ast_params()
115  */
116 #define FOREACH_AST_FUNC_PARAM(VAR,AST) \
117   FOREACH_SLIST_NODE( VAR, &(AST)->as.func.param_ast_list )
118 
119 ///////////////////////////////////////////////////////////////////////////////
120 
121 /**
122  * The argument type for the `alignas` specifier.
123  */
124 enum c_alignas_arg {
125   C_ALIGNAS_NONE,                       ///< No `alignas` specifier.
126   C_ALIGNAS_EXPR,                       ///< `alignas(` _expr_ `)`
127   C_ALIGNAS_TYPE                        ///< `alignas(` _type_ `)`
128 };
129 
130 /**
131  * Data for the `alignas` specifier.
132  */
133 struct c_alignas {
134   union {
135     unsigned      expr;                 ///< Aligned to this number of bytes.
136     c_ast_t      *type_ast;             ///< Aligned the same as this type.
137   } as;                                 ///< Union discriminator.
138   c_alignas_arg_t kind;                 ///< Kind of `alignas` argument.
139   c_loc_t         loc;                  ///< Source location.
140 };
141 
142 /**
143  * The direction to traverse an AST using c_ast_visit().
144  */
145 enum c_visit_dir {
146   C_VISIT_DOWN,                         ///< Root to leaves.
147   C_VISIT_UP                            ///< Leaf to root.
148 };
149 
150 ///////////////////////////////////////////////////////////////////////////////
151 
152 /**
153  * Type of optional data passed to c_ast_visit().
154  *
155  * @note This is `uint64_t` so it can hold either the largest possible integer
156  * or a pointer.  It is _not_ `uintptr_t` because that can't hold a 64-bit
157  * integer on a 32-bit pointer platform.
158  */
159 typedef uint64_t c_ast_visit_data_t;
160 
161 /**
162  * The signature for functions passed to c_ast_visit().
163  *
164  * @param ast The AST to visit.
165  * @param v_data Optional data passed to c_ast_visit().
166  * @return Returning `true` will cause traversal to stop and \a ast to be
167  * returned to the caller of c_ast_visit().
168  */
169 typedef bool (*c_ast_visit_fn_t)( c_ast_t *ast, c_ast_visit_data_t v_data );
170 
171 /**
172  * @defgroup ast-nodes-group AST Nodes
173  *
174  * ## Layout
175  *
176  * The AST node `struct`s contain data specific to each \ref c_ast_kind_t.  In
177  * all cases where an AST node contains:
178  *
179  *  1. A pointer to another, that pointer is always declared first.
180  *  2. Parameters, they are always declared second.
181  *  3. Flags, they are always declared third.
182  *
183  * Since all the different kinds of AST nodes are declared within a `union`,
184  * these `struct` members are at the same offsets.  This makes traversing and
185  * manipulating an AST easy.
186  *
187  * ## Memory Management
188  *
189  * Typically, nodes of a tree data structure are freed by freeing the root node
190  * followed by its child nodes in turn, recursively.  This is _not_ done for
191  * AST nodes.  Instead, AST nodes created via c_ast_new() or c_ast_dup() are
192  * appended to a \ref c_ast_list_t.  Nodes are later freed by traversing the
193  * list and calling c_ast_free() on each node individually.  It's done this way
194  * to simplify node memory management.
195  *
196  * As an AST is being built, sometimes \ref K_PLACEHOLDER nodes are created
197  * temporarily.  Later, once an actual node is created, the \ref K_PLACEHOLDER
198  * node is replaced.  Rather than freeing a \ref K_PLACEHOLDER node immediately
199  * (and, for a parent node, set its "of" node to NULL just prior to being freed
200  * so as not to free its child node also), it's simply left on the list.  Once
201  * parsing is complete, the entire list is freed effectively "garbage
202  * collecting" all nodes.
203  * @{
204  */
205 
206 /**
207  * Generic "parent" AST node.
208  *
209  * @note All parent nodes have an AST pointer to what they're a parent of as
210  * their first `struct` member: this is taken advantage of.
211  */
212 struct c_parent_ast {
213   c_ast_t        *of_ast;               ///< What it's a parent of.
214 };
215 
216 /**
217  * AST node for a C/C++ array.
218  */
219 struct c_array_ast {
220   c_ast_t        *of_ast;               ///< What it's an array of.
221   c_array_size_t  size;                 ///< The array size.
222   c_tid_t         stids;                ///< E.g., `array[static const 10]`
223 };
224 
225 /**
226  * AST node for a C/C++ block (Apple extension).
227  *
228  * @note Members are laid out in the same order as c_function_ast: this is
229  * taken advantage of.
230  *
231  * @sa [Apple's Extensions to C](http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1370.pdf)
232  * @sa [Blocks Programming Topics](https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/Blocks)
233  */
234 struct c_apple_block_ast {
235   c_ast_t        *ret_ast;              ///< Return type.
236   c_ast_list_t    param_ast_list;       ///< Block parameter(s), if any.
237 };
238 
239 /**
240  * AST node for a built-in type.
241  *
242  * @note Members are laid out in the same order as c_typedef_ast: this is taken
243  * advantage of.
244  */
245 struct c_builtin_ast {
246   /// So `bit_width` is at the same offset as in c_typedef_ast.
247   void           *not_used;
248 
249   c_bit_width_t   bit_width;            ///< Bit-field width when &gt; 0.
250 };
251 
252 /**
253  * AST node for a C++ constructor.
254  *
255  * @note Members are laid out in the same order as c_function_ast: this is
256  * taken advantage of.
257  */
258 struct c_constructor_ast {
259   /// Constructors don't have a return type, but we need an unused pointer so
260   /// `param_ast_list` is at the same offset as in c_function_ast.
261   void           *not_used;
262 
263   c_ast_list_t    param_ast_list;       ///< Constructor parameter(s), if any.
264 };
265 
266 /**
267  * AST node for a C/C++ `enum`, `class`, `struct`, or `union` type.
268  *
269  * @note Members are laid out in the same order as `c_ptr_mbr_ast`: this is
270  * taken advantage of.
271  */
272 struct c_ecsu_ast {
273   c_ast_t        *of_ast;               ///< For `enum`, the fixed type, if any.
274   c_sname_t       ecsu_sname;           ///< enum/class/struct/union name
275 };
276 
277 /**
278  * AST node for a C/C++ function.
279  */
280 struct c_function_ast {
281   c_ast_t        *ret_ast;              ///< Return type.
282   c_ast_list_t    param_ast_list;       ///< Function parameter(s), if any.
283 
284   /**
285    * Bitwise-or of flags currently specifying whether the function is a member,
286    * non-member, or unspecified function.
287    *
288    * @sa #C_FN_UNSPECIFIED
289    * @sa #C_FN_MEMBER
290    * @sa #C_FN_NON_MEMBER
291    * @sa #C_FN_MASK_MEMBER
292    */
293   unsigned        flags;
294 };
295 
296 /**
297  * AST node for a C++ overloaded operator.
298  *
299  * @note Members are laid out in the same order as c_function_ast: this is
300  * taken advantage of.
301  */
302 struct c_operator_ast {
303   c_ast_t        *ret_ast;              ///< Return type.
304   c_ast_list_t    param_ast_list;       ///< Operator parameter(s), if any.
305 
306   /**
307    * Bitwise-or of flags specifying whether the user specified an operator as a
308    * member, non-member, or neither.  This is a mostly a subset of \ref
309    * c_operator.flags except this can be #C_OP_UNSPECIFIED.
310    *
311    * @sa #C_OP_UNSPECIFIED
312    * @sa #C_OP_MEMBER
313    * @sa #C_OP_NON_MEMBER
314    * @sa #C_OP_MASK_OVERLOAD
315    */
316   unsigned        flags;
317 
318   c_oper_id_t     oper_id;              ///< Which operator it is.
319 };
320 
321 /**
322  * AST node for a C++ pointer-to-member of a class.
323  *
324  * @note Members are laid out in the same order as c_ecsu_ast: this is taken
325  * advantage of.
326  */
327 struct c_ptr_mbr_ast {
328   c_ast_t        *of_ast;               ///< Member type.
329   c_sname_t       class_sname;          ///< When a member function; or empty.
330 };
331 
332 /**
333  * AST node for a C/C++ pointer, or a C++ reference or rvalue reference.
334  */
335 struct c_ptr_ref_ast {
336   c_ast_t        *to_ast;               ///< What it's a pointer/reference to.
337 };
338 
339 /**
340  * AST node for a C/C++ `typedef`.
341  *
342  * @note Even though %c_typedef_ast has an AST pointer as its
343  * first `struct` member, it is _not_ a parent "of" the underlying type, but
344  * instead a synonym "for" it; hence, it's _not_ included in #K_ANY_PARENT, but
345  * it is, however, included in #K_ANY_REFERRER.
346  *
347  * @note C++ `using` declarations are stored as their equivalent `typedef`
348  * declarations.
349  */
350 struct c_typedef_ast {
351   c_ast_t const  *for_ast;              ///< What it's a `typedef` for.
352   c_bit_width_t   bit_width;            ///< Bit-field width when &gt; 0.
353 };
354 
355 /**
356  * AST node for a C++ user-defined conversion operator.
357  */
358 struct c_udef_conv_ast {
359   c_ast_t        *conv_ast;             ///< Conversion type.
360 };
361 
362 /**
363  * AST node for a C++ user-defined literal.
364  *
365  * @note Members are laid out in the same order as c_function_ast: this is
366  * taken advantage of.
367  */
368 struct c_udef_lit_ast {
369   c_ast_t        *ret_ast;              ///< Return type.
370   c_ast_list_t    param_ast_list;       ///< Literal parameter(s).
371 };
372 
373 /**
374  * AST node for a parsed C/C++ declaration.
375  */
376 struct c_ast {
377   c_alignas_t           align;          ///< Alignment, if any.
378   c_ast_depth_t         depth;          ///< How many `()` deep.
379   c_ast_kind_t          kind;           ///< AST kind.
380   c_cast_kind_t         cast_kind;      ///< Cast kind, if any.
381   c_loc_t               loc;            ///< Source location.
382   c_sname_t             sname;          ///< Scoped name, if any.
383   c_type_t              type;           ///< Type, if any.
384   c_ast_t              *parent_ast;     ///< Parent AST node, if any.
385   c_ast_id_t            unique_id;      ///< Unique id (starts at 1).
386 
387   union {
388     c_parent_ast_t      parent;         ///< "Parent" member(s).
389     c_array_ast_t       array;          ///< Array member(s).
390     c_apple_block_ast_t block;          ///< Block member(s).
391     c_builtin_ast_t     builtin;        ///< Built-in type member(s).
392     c_constructor_ast_t constructor;    ///< Constructor member(s).
393     // nothing needed for K_DESTRUCTOR
394     c_ecsu_ast_t        ecsu;           ///< `enum`, `class`, `struct`, `union`
395     c_function_ast_t    func;           ///< Function member(s).
396     // nothing needed for K_NAME
397     c_operator_ast_t    oper;           ///< Operator member(s).
398     c_ptr_mbr_ast_t     ptr_mbr;        ///< Pointer-to-member member(s).
399     c_ptr_ref_ast_t     ptr_ref;        ///< Pointer or reference member(s).
400     c_typedef_ast_t     tdef;           ///< `typedef` member(s).
401     c_udef_conv_ast_t   udef_conv;      ///< User-defined conversion member(s).
402     c_udef_lit_ast_t    udef_lit;       ///< User-defined literal member(s).
403     // nothing needed for K_VARIADIC
404   } as;                                 ///< Union discriminator.
405 };
406 
407 /** @} */
408 
409 ////////// extern functions ///////////////////////////////////////////////////
410 
411 /**
412  * @defgroup ast-functions-group AST Functions
413  * Functions for accessing and manipulating AST nodes.
414  * @{
415  */
416 
417 /**
418  * Cleans up all AST data.
419  *
420  * @remarks Currently, this only checks that the number of AST nodes freed
421  * equals the number allocated.
422  *
423  * @sa c_ast_free()
424  * @sa c_ast_new()
425  */
426 void c_ast_cleanup( void );
427 
428 /**
429  * Duplicates the entire AST starting at \a ast.
430  *
431  * @param ast The AST to duplicate.
432  * @param ast_list If not NULL, the duplicated AST nodes are appended to the
433  * list.
434  * @return Returns the duplicated AST.
435  *
436  * @sa c_ast_free()
437  * @sa c_ast_new()
438  */
439 PJL_WARN_UNUSED_RESULT
440 c_ast_t* c_ast_dup( c_ast_t const *ast, c_ast_list_t *ast_list );
441 
442 /**
443  * Checks whether two ASTs are equal.
444  *
445  * @param i_ast The first AST.  May be NULL.
446  * @param j_ast The second AST.  May be NULL.
447  * @return Returns `true` only if the two ASTs are equal.
448  */
449 PJL_WARN_UNUSED_RESULT
450 bool c_ast_equal( c_ast_t const *i_ast, c_ast_t const *j_ast );
451 
452 /**
453  * Frees all memory used by \a ast _including_ \a ast itself.
454  *
455  * @param ast The AST to free.  If NULL, does nothing.
456  *
457  * @note Even though \a ast invariably is part of a larger abstract syntax
458  * tree, this function frees _only_ \a ast and _not_ any child AST node \a ast
459  * may have.  Hence to free all AST nodes, they all be kept track of
460  * independently via some other data structure, e.g., a \ref
461  * c_ast_list_t.
462  *
463  * @sa c_ast_cleanup()
464  * @sa c_ast_dup()
465  * @sa c_ast_list_cleanup()
466  * @sa c_ast_new()
467  */
468 void c_ast_free( c_ast_t *ast );
469 
470 /**
471  * Checks whether \a ast is an AST of one of \a kinds.
472  *
473  * @param ast The AST to check.
474  * @param kinds The bitwise-or of the kinds(s) \a ast can be.
475  * @return Returns `true` only if \a ast is one of \a kinds.
476  *
477  * @sa c_ast_is_parent()
478  * @sa c_ast_is_referrer()
479  */
480 C_AST_INLINE PJL_WARN_UNUSED_RESULT
c_ast_is_kind_any(c_ast_t const * ast,c_ast_kind_t kinds)481 bool c_ast_is_kind_any( c_ast_t const *ast, c_ast_kind_t kinds ) {
482   return (ast->kind & kinds) != 0;
483 }
484 
485 /**
486  * Checks whether \a ast is an "orphan," that is: either has no parent AST or
487  * its parent no longer points to \a ast.
488  *
489  * @param ast The AST to check.
490  * @return Returns `true` only if \a ast is an orphan.
491  *
492  * @sa c_ast_is_parent()
493  * @sa c_ast_is_referrer()
494  */
495 C_AST_INLINE PJL_WARN_UNUSED_RESULT
c_ast_is_orphan(c_ast_t const * ast)496 bool c_ast_is_orphan( c_ast_t const *ast ) {
497   return ast->parent_ast == NULL || ast->parent_ast->as.parent.of_ast != ast;
498 }
499 
500 /**
501  * Checks whether \a ast is a #K_ANY_PARENT.
502  *
503  * @param ast The AST to check.  May be NULL.
504  * @return Returns `true` only if it is.
505  *
506  * @sa c_ast_is_kind_any()
507  * @sa c_ast_is_orphan()
508  * @sa c_ast_is_referrer()
509  */
510 C_AST_INLINE PJL_WARN_UNUSED_RESULT
c_ast_is_parent(c_ast_t const * ast)511 bool c_ast_is_parent( c_ast_t const *ast ) {
512   return ast != NULL && c_ast_is_kind_any( ast, K_ANY_PARENT );
513 }
514 
515 /**
516  * Checks whether \a ast is a #K_ANY_REFERRER.
517  *
518  * @param ast The AST to check.  May be NULL.
519  * @return Returns `true` only if it is.
520  *
521  * @sa c_ast_is_kind_any()
522  * @sa c_ast_is_orphan()
523  * @sa c_ast_is_parent()
524  */
525 C_AST_INLINE PJL_WARN_UNUSED_RESULT
c_ast_is_referrer(c_ast_t const * ast)526 bool c_ast_is_referrer( c_ast_t const *ast ) {
527   return ast != NULL && c_ast_is_kind_any( ast, K_ANY_REFERRER );
528 }
529 
530 /**
531  * Cleans-up \a list by freeing only its nodes but _not_ \a list itself.
532  *
533  * @param list The AST list to free the list nodes of.
534  *
535  * @sa c_ast_free()
536  */
537 void c_ast_list_cleanup( c_ast_list_t *list );
538 
539 /**
540  * Creates a new AST node.
541  *
542  * @param kind The kind of AST to create.
543  * @param depth How deep within `()` it is.
544  * @param loc A pointer to the token location data.
545  * @param ast_list If not NULL, the new AST is appended to the list.
546  * @return Returns a pointer to a new AST.
547  *
548  * @sa c_ast_cleanup()
549  * @sa c_ast_dup()
550  * @sa c_ast_free()
551  */
552 PJL_WARN_UNUSED_RESULT
553 c_ast_t* c_ast_new( c_ast_kind_t kind, c_ast_depth_t depth,
554                     c_loc_t const *loc, c_ast_list_t *ast_list );
555 
556 /**
557  * Convenience function for getting function-like parameters.
558  *
559  * @param ast The AST to get the parameters of.
560  * @return Returns a pointer to the first parameter or NULL if none.
561  *
562  * @sa c_ast_params_count()
563  * @sa c_param_ast()
564  * @sa #FOREACH_AST_FUNC_PARAM
565  */
566 C_AST_INLINE PJL_WARN_UNUSED_RESULT
c_ast_params(c_ast_t const * ast)567 c_ast_param_t const* c_ast_params( c_ast_t const *ast ) {
568   return ast->as.func.param_ast_list.head;
569 }
570 
571 /**
572  * Convenience function for getting the number of function-like parameters.
573  *
574  * @param ast The AST to get the number of parameters of.
575  * @return Returns said number of parameters.
576  *
577  * @sa c_ast_params()
578  * @sa c_param_ast()
579  * @sa #FOREACH_AST_FUNC_PARAM
580  */
581 C_AST_INLINE PJL_WARN_UNUSED_RESULT
c_ast_params_count(c_ast_t const * ast)582 size_t c_ast_params_count( c_ast_t const *ast ) {
583   return slist_len( &ast->as.func.param_ast_list );
584 }
585 
586 /**
587  * Checks whether the parent AST of \a ast (if any) is \a kind.
588  *
589  * @param ast The AST to check the parent of.
590  * @param kind The kind to check for.
591  * @return Returns `true` only if the parent of \a ast is \a kind.
592  */
593 C_AST_INLINE PJL_WARN_UNUSED_RESULT
c_ast_parent_is_kind(c_ast_t const * ast,c_ast_kind_t kind)594 bool c_ast_parent_is_kind( c_ast_t const *ast, c_ast_kind_t kind ) {
595   return ast->parent_ast != NULL && ast->parent_ast->kind == kind;
596 }
597 
598 /**
599  * Sets the two-way pointer links between parent/child AST nodes.
600  *
601  * @param child_ast The "child" AST node to set the parent of.
602  * @param parent_ast The "parent" AST node whose child node is set.
603  */
604 void c_ast_set_parent( c_ast_t *child_ast, c_ast_t *parent_ast );
605 
606 /**
607  * Does a depth-first, post-order traversal of an AST.
608  *
609  * @param ast The AST to start from.  If NULL, does nothing.
610  * @param dir The direction to visit.
611  * @param visit_fn The visitor function to use.
612  * @param v_data Optional data passed to \a visit_fn.
613  * @return Returns a pointer to the AST the visitor stopped on or NULL.
614  *
615  * @note Function-like parameters are _not_ traversed into.  They're considered
616  * distinct ASTs.
617  */
618 PJL_NOWARN_UNUSED_RESULT
619 c_ast_t* c_ast_visit( c_ast_t *ast, c_visit_dir_t dir,
620                       c_ast_visit_fn_t visit_fn, c_ast_visit_data_t v_data );
621 
622 /**
623  * Convenience function to get the AST given a \ref c_ast_param_t.
624  *
625  * @param param A pointer to a \ref c_ast_param_t.
626  * @return Returns a pointer to the AST.
627  *
628  * @sa c_ast_params()
629  * @sa c_ast_params_count()
630  * @sa #FOREACH_AST_FUNC_PARAM
631  */
632 C_AST_INLINE PJL_WARN_UNUSED_RESULT
c_param_ast(c_ast_param_t const * param)633 c_ast_t const* c_param_ast( c_ast_param_t const *param ) {
634   return param->data;
635 }
636 
637 /** @} */
638 
639 ///////////////////////////////////////////////////////////////////////////////
640 
641 _GL_INLINE_HEADER_END
642 
643 #endif /* cdecl_c_ast_H */
644 /* vim:set et sw=2 ts=2: */
645