1 /*
2 * Copyright (c) 2013 Hugh Bailey <obs.jim@gmail.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17 #pragma once
18
19 #include "lexer.h"
20
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24
25 EXPORT char *cf_literal_to_str(const char *literal, size_t count);
26
27 /* ------------------------------------------------------------------------- */
28 /*
29 * A C-family lexer token is defined as:
30 * 1.) A generic 'name' token. (abc123_def456)
31 * 2.) A numeric sequence (usually starting with a number)
32 * 3.) A sequence of generic whitespace defined as spaces and tabs
33 * 4.) A newline
34 * 5.) A string or character sequence (surrounded by single or double quotes)
35 * 6.) A single character of a type not specified above
36 */
37 enum cf_token_type {
38 CFTOKEN_NONE,
39 CFTOKEN_NAME,
40 CFTOKEN_NUM,
41 CFTOKEN_SPACETAB,
42 CFTOKEN_NEWLINE,
43 CFTOKEN_STRING,
44 CFTOKEN_OTHER
45 };
46
47 struct cf_token {
48 const struct cf_lexer *lex;
49 struct strref str;
50 struct strref unmerged_str;
51 enum cf_token_type type;
52 };
53
cf_token_clear(struct cf_token * t)54 static inline void cf_token_clear(struct cf_token *t)
55 {
56 memset(t, 0, sizeof(struct cf_token));
57 }
58
cf_token_copy(struct cf_token * dst,const struct cf_token * src)59 static inline void cf_token_copy(struct cf_token *dst,
60 const struct cf_token *src)
61 {
62 memcpy(dst, src, sizeof(struct cf_token));
63 }
64
cf_token_add(struct cf_token * dst,const struct cf_token * add)65 static inline void cf_token_add(struct cf_token *dst,
66 const struct cf_token *add)
67 {
68 strref_add(&dst->str, &add->str);
69 strref_add(&dst->unmerged_str, &add->unmerged_str);
70 }
71
72 /* ------------------------------------------------------------------------- */
73 /*
74 * The c-family lexer is a base lexer for generating a list of string
75 * reference tokens to be used with c-style languages.
76 *
77 * This base lexer is meant to be used as a stepping stone for an actual
78 * language lexer/parser.
79 *
80 * It reformats the text in the two following ways:
81 * 1.) Spliced lines (escaped newlines) are merged
82 * 2.) All comments are converted to a single space
83 */
84
85 struct cf_lexer {
86 char *file;
87 struct lexer base_lexer;
88 char *reformatted, *write_offset;
89 DARRAY(struct cf_token) tokens;
90 bool unexpected_eof; /* unexpected multi-line comment eof */
91 };
92
93 EXPORT void cf_lexer_init(struct cf_lexer *lex);
94 EXPORT void cf_lexer_free(struct cf_lexer *lex);
95
cf_lexer_get_tokens(struct cf_lexer * lex)96 static inline struct cf_token *cf_lexer_get_tokens(struct cf_lexer *lex)
97 {
98 return lex->tokens.array;
99 }
100
101 EXPORT bool cf_lexer_lex(struct cf_lexer *lex, const char *str,
102 const char *file);
103
104 /* ------------------------------------------------------------------------- */
105 /* c-family preprocessor definition */
106
107 struct cf_def {
108 struct cf_token name;
109 DARRAY(struct cf_token) params;
110 DARRAY(struct cf_token) tokens;
111 bool macro;
112 };
113
cf_def_init(struct cf_def * cfd)114 static inline void cf_def_init(struct cf_def *cfd)
115 {
116 cf_token_clear(&cfd->name);
117 da_init(cfd->params);
118 da_init(cfd->tokens);
119 cfd->macro = false;
120 }
121
cf_def_addparam(struct cf_def * cfd,struct cf_token * param)122 static inline void cf_def_addparam(struct cf_def *cfd, struct cf_token *param)
123 {
124 da_push_back(cfd->params, param);
125 }
126
cf_def_addtoken(struct cf_def * cfd,struct cf_token * token)127 static inline void cf_def_addtoken(struct cf_def *cfd, struct cf_token *token)
128 {
129 da_push_back(cfd->tokens, token);
130 }
131
cf_def_getparam(const struct cf_def * cfd,size_t idx)132 static inline struct cf_token *cf_def_getparam(const struct cf_def *cfd,
133 size_t idx)
134 {
135 return cfd->params.array + idx;
136 }
137
cf_def_free(struct cf_def * cfd)138 static inline void cf_def_free(struct cf_def *cfd)
139 {
140 cf_token_clear(&cfd->name);
141 da_free(cfd->params);
142 da_free(cfd->tokens);
143 }
144
145 /* ------------------------------------------------------------------------- */
146 /*
147 * C-family preprocessor
148 *
149 * This preprocessor allows for standard c-style preprocessor directives
150 * to be applied to source text, such as:
151 *
152 * + #include
153 * + #define/#undef
154 * + #ifdef/#ifndef/#if/#elif/#else/#endif
155 *
156 * Still left to implement (TODO):
157 * + #if/#elif
158 * + "defined" preprocessor keyword
159 * + system includes
160 * + variadic macros
161 * + custom callbacks (for things like pragma)
162 * + option to exclude features such as #import, variadic macros, and other
163 * features for certain language implementations
164 * + macro parameter string operator #
165 * + macro parameter token concatenation operator ##
166 * + predefined macros
167 * + restricted macros
168 */
169
170 struct cf_preprocessor {
171 struct cf_lexer *lex;
172 struct error_data *ed;
173 DARRAY(struct cf_def) defines;
174 DARRAY(char *) sys_include_dirs;
175 DARRAY(struct cf_lexer) dependencies;
176 DARRAY(struct cf_token) tokens;
177 bool ignore_state;
178 };
179
180 EXPORT void cf_preprocessor_init(struct cf_preprocessor *pp);
181 EXPORT void cf_preprocessor_free(struct cf_preprocessor *pp);
182
183 EXPORT bool cf_preprocess(struct cf_preprocessor *pp, struct cf_lexer *lex,
184 struct error_data *ed);
185
186 static inline void
cf_preprocessor_add_sys_include_dir(struct cf_preprocessor * pp,const char * include_dir)187 cf_preprocessor_add_sys_include_dir(struct cf_preprocessor *pp,
188 const char *include_dir)
189 {
190 char *str = bstrdup(include_dir);
191 if (include_dir)
192 da_push_back(pp->sys_include_dirs, &str);
193 }
194
195 EXPORT void cf_preprocessor_add_def(struct cf_preprocessor *pp,
196 struct cf_def *def);
197 EXPORT void cf_preprocessor_remove_def(struct cf_preprocessor *pp,
198 const char *def_name);
199
200 static inline struct cf_token *
cf_preprocessor_get_tokens(struct cf_preprocessor * pp)201 cf_preprocessor_get_tokens(struct cf_preprocessor *pp)
202 {
203 return pp->tokens.array;
204 }
205
206 #ifdef __cplusplus
207 }
208 #endif
209