1 /*
2  * Copyright (c) 2013 Hugh Bailey <obs.jim@gmail.com>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 #pragma once
18 
19 #include "lexer.h"
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 EXPORT char *cf_literal_to_str(const char *literal, size_t count);
26 
27 /* ------------------------------------------------------------------------- */
28 /*
29  * A C-family lexer token is defined as:
30  *   1.) A generic 'name' token.  (abc123_def456)
31  *   2.) A numeric sequence (usually starting with a number)
32  *   3.) A sequence of generic whitespace defined as spaces and tabs
33  *   4.) A newline
34  *   5.) A string or character sequence (surrounded by single or double quotes)
35  *   6.) A single character of a type not specified above
36  */
37 enum cf_token_type {
38 	CFTOKEN_NONE,
39 	CFTOKEN_NAME,
40 	CFTOKEN_NUM,
41 	CFTOKEN_SPACETAB,
42 	CFTOKEN_NEWLINE,
43 	CFTOKEN_STRING,
44 	CFTOKEN_OTHER
45 };
46 
47 struct cf_token {
48 	const struct cf_lexer *lex;
49 	struct strref str;
50 	struct strref unmerged_str;
51 	enum cf_token_type type;
52 };
53 
cf_token_clear(struct cf_token * t)54 static inline void cf_token_clear(struct cf_token *t)
55 {
56 	memset(t, 0, sizeof(struct cf_token));
57 }
58 
cf_token_copy(struct cf_token * dst,const struct cf_token * src)59 static inline void cf_token_copy(struct cf_token *dst,
60 				 const struct cf_token *src)
61 {
62 	memcpy(dst, src, sizeof(struct cf_token));
63 }
64 
cf_token_add(struct cf_token * dst,const struct cf_token * add)65 static inline void cf_token_add(struct cf_token *dst,
66 				const struct cf_token *add)
67 {
68 	strref_add(&dst->str, &add->str);
69 	strref_add(&dst->unmerged_str, &add->unmerged_str);
70 }
71 
72 /* ------------------------------------------------------------------------- */
73 /*
74  *   The c-family lexer is a base lexer for generating a list of string
75  * reference tokens to be used with c-style languages.
76  *
77  *   This base lexer is meant to be used as a stepping stone for an actual
78  * language lexer/parser.
79  *
80  *   It reformats the text in the two following ways:
81  *     1.) Spliced lines (escaped newlines) are merged
82  *     2.) All comments are converted to a single space
83  */
84 
85 struct cf_lexer {
86 	char *file;
87 	struct lexer base_lexer;
88 	char *reformatted, *write_offset;
89 	DARRAY(struct cf_token) tokens;
90 	bool unexpected_eof; /* unexpected multi-line comment eof */
91 };
92 
93 EXPORT void cf_lexer_init(struct cf_lexer *lex);
94 EXPORT void cf_lexer_free(struct cf_lexer *lex);
95 
cf_lexer_get_tokens(struct cf_lexer * lex)96 static inline struct cf_token *cf_lexer_get_tokens(struct cf_lexer *lex)
97 {
98 	return lex->tokens.array;
99 }
100 
101 EXPORT bool cf_lexer_lex(struct cf_lexer *lex, const char *str,
102 			 const char *file);
103 
104 /* ------------------------------------------------------------------------- */
105 /* c-family preprocessor definition */
106 
107 struct cf_def {
108 	struct cf_token name;
109 	DARRAY(struct cf_token) params;
110 	DARRAY(struct cf_token) tokens;
111 	bool macro;
112 };
113 
cf_def_init(struct cf_def * cfd)114 static inline void cf_def_init(struct cf_def *cfd)
115 {
116 	cf_token_clear(&cfd->name);
117 	da_init(cfd->params);
118 	da_init(cfd->tokens);
119 	cfd->macro = false;
120 }
121 
cf_def_addparam(struct cf_def * cfd,struct cf_token * param)122 static inline void cf_def_addparam(struct cf_def *cfd, struct cf_token *param)
123 {
124 	da_push_back(cfd->params, param);
125 }
126 
cf_def_addtoken(struct cf_def * cfd,struct cf_token * token)127 static inline void cf_def_addtoken(struct cf_def *cfd, struct cf_token *token)
128 {
129 	da_push_back(cfd->tokens, token);
130 }
131 
cf_def_getparam(const struct cf_def * cfd,size_t idx)132 static inline struct cf_token *cf_def_getparam(const struct cf_def *cfd,
133 					       size_t idx)
134 {
135 	return cfd->params.array + idx;
136 }
137 
cf_def_free(struct cf_def * cfd)138 static inline void cf_def_free(struct cf_def *cfd)
139 {
140 	cf_token_clear(&cfd->name);
141 	da_free(cfd->params);
142 	da_free(cfd->tokens);
143 }
144 
145 /* ------------------------------------------------------------------------- */
146 /*
147  * C-family preprocessor
148  *
149  *   This preprocessor allows for standard c-style preprocessor directives
150  * to be applied to source text, such as:
151  *
152  *   + #include
153  *   + #define/#undef
154  *   + #ifdef/#ifndef/#if/#elif/#else/#endif
155  *
156  *   Still left to implement (TODO):
157  *   + #if/#elif
158  *   + "defined" preprocessor keyword
159  *   + system includes
160  *   + variadic macros
161  *   + custom callbacks (for things like pragma)
162  *   + option to exclude features such as #import, variadic macros, and other
163  *     features for certain language implementations
164  *   + macro parameter string operator #
165  *   + macro parameter token concatenation operator ##
166  *   + predefined macros
167  *   + restricted macros
168  */
169 
170 struct cf_preprocessor {
171 	struct cf_lexer *lex;
172 	struct error_data *ed;
173 	DARRAY(struct cf_def) defines;
174 	DARRAY(char *) sys_include_dirs;
175 	DARRAY(struct cf_lexer) dependencies;
176 	DARRAY(struct cf_token) tokens;
177 	bool ignore_state;
178 };
179 
180 EXPORT void cf_preprocessor_init(struct cf_preprocessor *pp);
181 EXPORT void cf_preprocessor_free(struct cf_preprocessor *pp);
182 
183 EXPORT bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
184 			  struct error_data *ed);
185 
186 static inline void
cf_preprocessor_add_sys_include_dir(struct cf_preprocessor * pp,const char * include_dir)187 cf_preprocessor_add_sys_include_dir(struct cf_preprocessor *pp,
188 				    const char *include_dir)
189 {
190 	char *str = bstrdup(include_dir);
191 	if (include_dir)
192 		da_push_back(pp->sys_include_dirs, &str);
193 }
194 
195 EXPORT void cf_preprocessor_add_def(struct cf_preprocessor *pp,
196 				    struct cf_def *def);
197 EXPORT void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
198 				       const char *def_name);
199 
200 static inline struct cf_token *
cf_preprocessor_get_tokens(struct cf_preprocessor * pp)201 cf_preprocessor_get_tokens(struct cf_preprocessor *pp)
202 {
203 	return pp->tokens.array;
204 }
205 
206 #ifdef __cplusplus
207 }
208 #endif
209