1 /*
2  * Copyright (c) 2013 Hugh Bailey <obs.jim@gmail.com>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 #pragma once
18 
19 #include "c99defs.h"
20 #include "dstr.h"
21 #include "darray.h"
22 
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26 
27 /* ------------------------------------------------------------------------- */
28 /* string reference (string segment within an already existing array) */
29 
30 struct strref {
31 	const char *array;
32 	size_t len;
33 };
34 
strref_clear(struct strref * dst)35 static inline void strref_clear(struct strref *dst)
36 {
37 	dst->array = NULL;
38 	dst->len = 0;
39 }
40 
strref_set(struct strref * dst,const char * array,size_t len)41 static inline void strref_set(struct strref *dst, const char *array, size_t len)
42 {
43 	dst->array = array;
44 	dst->len = len;
45 }
46 
strref_copy(struct strref * dst,const struct strref * src)47 static inline void strref_copy(struct strref *dst, const struct strref *src)
48 {
49 	dst->array = src->array;
50 	dst->len = src->len;
51 }
52 
strref_add(struct strref * dst,const struct strref * t)53 static inline void strref_add(struct strref *dst, const struct strref *t)
54 {
55 	if (!dst->array)
56 		strref_copy(dst, t);
57 	else
58 		dst->len += t->len;
59 }
60 
strref_is_empty(const struct strref * str)61 static inline bool strref_is_empty(const struct strref *str)
62 {
63 	return !str || !str->array || !str->len || !*str->array;
64 }
65 
66 EXPORT int strref_cmp(const struct strref *str1, const char *str2);
67 EXPORT int strref_cmpi(const struct strref *str1, const char *str2);
68 EXPORT int strref_cmp_strref(const struct strref *str1,
69 			     const struct strref *str2);
70 EXPORT int strref_cmpi_strref(const struct strref *str1,
71 			      const struct strref *str2);
72 
73 /* ------------------------------------------------------------------------- */
74 
75 EXPORT bool valid_int_str(const char *str, size_t n);
76 EXPORT bool valid_float_str(const char *str, size_t n);
77 
valid_int_strref(const struct strref * str)78 static inline bool valid_int_strref(const struct strref *str)
79 {
80 	return valid_int_str(str->array, str->len);
81 }
82 
valid_float_strref(const struct strref * str)83 static inline bool valid_float_strref(const struct strref *str)
84 {
85 	return valid_float_str(str->array, str->len);
86 }
87 
is_whitespace(char ch)88 static inline bool is_whitespace(char ch)
89 {
90 	return ch == ' ' || ch == '\r' || ch == '\t' || ch == '\n';
91 }
92 
is_newline(char ch)93 static inline bool is_newline(char ch)
94 {
95 	return ch == '\r' || ch == '\n';
96 }
97 
is_space_or_tab(const char ch)98 static inline bool is_space_or_tab(const char ch)
99 {
100 	return ch == ' ' || ch == '\t';
101 }
102 
is_newline_pair(char ch1,char ch2)103 static inline bool is_newline_pair(char ch1, char ch2)
104 {
105 	return (ch1 == '\r' && ch2 == '\n') || (ch1 == '\n' && ch2 == '\r');
106 }
107 
newline_size(const char * array)108 static inline int newline_size(const char *array)
109 {
110 	if (strncmp(array, "\r\n", 2) == 0 || strncmp(array, "\n\r", 2) == 0)
111 		return 2;
112 	else if (*array == '\r' || *array == '\n')
113 		return 1;
114 
115 	return 0;
116 }
117 
118 /* ------------------------------------------------------------------------- */
119 
120 /*
121  * A "base" token is one of four things:
122  *   1.) A sequence of alpha characters
123  *   2.) A sequence of numeric characters
124  *   3.) A single whitespace character if whitespace is not ignored
125  *   4.) A single character that does not fall into the above 3 categories
126  */
127 
128 enum base_token_type {
129 	BASETOKEN_NONE,
130 	BASETOKEN_ALPHA,
131 	BASETOKEN_DIGIT,
132 	BASETOKEN_WHITESPACE,
133 	BASETOKEN_OTHER,
134 };
135 
136 struct base_token {
137 	struct strref text;
138 	enum base_token_type type;
139 	bool passed_whitespace;
140 };
141 
base_token_clear(struct base_token * t)142 static inline void base_token_clear(struct base_token *t)
143 {
144 	memset(t, 0, sizeof(struct base_token));
145 }
146 
base_token_copy(struct base_token * dst,struct base_token * src)147 static inline void base_token_copy(struct base_token *dst,
148 				   struct base_token *src)
149 {
150 	memcpy(dst, src, sizeof(struct base_token));
151 }
152 
153 /* ------------------------------------------------------------------------- */
154 
155 #define LEX_ERROR 0
156 #define LEX_WARNING 1
157 
158 struct error_item {
159 	char *error;
160 	const char *file;
161 	uint32_t row, column;
162 	int level;
163 };
164 
error_item_init(struct error_item * ei)165 static inline void error_item_init(struct error_item *ei)
166 {
167 	memset(ei, 0, sizeof(struct error_item));
168 }
169 
error_item_free(struct error_item * ei)170 static inline void error_item_free(struct error_item *ei)
171 {
172 	bfree(ei->error);
173 	error_item_init(ei);
174 }
175 
error_item_array_free(struct error_item * array,size_t num)176 static inline void error_item_array_free(struct error_item *array, size_t num)
177 {
178 	size_t i;
179 	for (i = 0; i < num; i++)
180 		error_item_free(array + i);
181 }
182 
183 /* ------------------------------------------------------------------------- */
184 
185 struct error_data {
186 	DARRAY(struct error_item) errors;
187 };
188 
error_data_init(struct error_data * data)189 static inline void error_data_init(struct error_data *data)
190 {
191 	da_init(data->errors);
192 }
193 
error_data_free(struct error_data * data)194 static inline void error_data_free(struct error_data *data)
195 {
196 	error_item_array_free(data->errors.array, data->errors.num);
197 	da_free(data->errors);
198 }
199 
error_data_item(struct error_data * ed,size_t idx)200 static inline const struct error_item *error_data_item(struct error_data *ed,
201 						       size_t idx)
202 {
203 	return ed->errors.array + idx;
204 }
205 
206 EXPORT char *error_data_buildstring(struct error_data *ed);
207 
208 EXPORT void error_data_add(struct error_data *ed, const char *file,
209 			   uint32_t row, uint32_t column, const char *msg,
210 			   int level);
211 
error_data_type_count(struct error_data * ed,int type)212 static inline size_t error_data_type_count(struct error_data *ed, int type)
213 {
214 	size_t count = 0, i;
215 	for (i = 0; i < ed->errors.num; i++) {
216 		if (ed->errors.array[i].level == type)
217 			count++;
218 	}
219 
220 	return count;
221 }
222 
error_data_has_errors(struct error_data * ed)223 static inline bool error_data_has_errors(struct error_data *ed)
224 {
225 	size_t i;
226 	for (i = 0; i < ed->errors.num; i++)
227 		if (ed->errors.array[i].level == LEX_ERROR)
228 			return true;
229 
230 	return false;
231 }
232 
233 /* ------------------------------------------------------------------------- */
234 
235 struct lexer {
236 	char *text;
237 	const char *offset;
238 };
239 
lexer_init(struct lexer * lex)240 static inline void lexer_init(struct lexer *lex)
241 {
242 	memset(lex, 0, sizeof(struct lexer));
243 }
244 
lexer_free(struct lexer * lex)245 static inline void lexer_free(struct lexer *lex)
246 {
247 	bfree(lex->text);
248 	lexer_init(lex);
249 }
250 
lexer_start(struct lexer * lex,const char * text)251 static inline void lexer_start(struct lexer *lex, const char *text)
252 {
253 	lexer_free(lex);
254 	lex->text = bstrdup(text);
255 	lex->offset = lex->text;
256 }
257 
lexer_start_move(struct lexer * lex,char * text)258 static inline void lexer_start_move(struct lexer *lex, char *text)
259 {
260 	lexer_free(lex);
261 	lex->text = text;
262 	lex->offset = lex->text;
263 }
264 
lexer_reset(struct lexer * lex)265 static inline void lexer_reset(struct lexer *lex)
266 {
267 	lex->offset = lex->text;
268 }
269 
270 enum ignore_whitespace { PARSE_WHITESPACE, IGNORE_WHITESPACE };
271 
272 EXPORT bool lexer_getbasetoken(struct lexer *lex, struct base_token *t,
273 			       enum ignore_whitespace iws);
274 
275 EXPORT void lexer_getstroffset(const struct lexer *lex, const char *str,
276 			       uint32_t *row, uint32_t *col);
277 
278 #ifdef __cplusplus
279 }
280 #endif
281