1 /*
2  * libtu/tokenizer.h
3  *
4  * Copyright (c) Tuomo Valkonen 1999-2002.
5  *
6  * You may distribute and modify this library under the terms of either
7  * the Clarified Artistic License or the GNU LGPL, version 2.1 or later.
8  */
9 
10 #ifndef LIBTU_TOKENIZER_H
11 #define LIBTU_TOKENIZER_H
12 
13 #include <stdio.h>
14 #include "types.h"
15 
16 
17 #define TOK_SET_BOOL(TOK, VAL) 		{(TOK)->type=TOK_BOOL; (TOK)->u.bval=VAL;}
18 #define TOK_SET_LONG(TOK, VAL) 		{(TOK)->type=TOK_LONG; (TOK)->u.lval=VAL;}
19 #define TOK_SET_DOUBLE(TOK, VAL) 	{(TOK)->type=TOK_DOUBLE; (TOK)->u.dval=VAL;}
20 #define TOK_SET_CHAR(TOK, VAL) 		{(TOK)->type=TOK_CHAR; (TOK)->u.cval=VAL;}
21 #define TOK_SET_STRING(TOK, VAL) 	{(TOK)->type=TOK_STRING; (TOK)->u.sval=VAL;}
22 #define TOK_SET_IDENT(TOK, VAL) 	{(TOK)->type=TOK_IDENT; (TOK)->u.sval=VAL;}
23 #define TOK_SET_COMMENT(TOK, VAL) 	{(TOK)->type=TOK_COMMENT; (TOK)->u.sval=VAL;}
24 #define TOK_SET_OP(TOK, VAL) 		{(TOK)->type=TOK_OP; (TOK)->u.opval=VAL;}
25 
26 #define TOK_TYPE(TOK)				((TOK)->type)
27 #define TOK_BOOL_VAL(TOK)			((TOK)->u.bval)
28 #define TOK_LONG_VAL(TOK)			((TOK)->u.lval)
29 #define TOK_DOUBLE_VAL(TOK)			((TOK)->u.dval)
30 #define TOK_CHAR_VAL(TOK)			((TOK)->u.cval)
31 #define TOK_STRING_VAL(TOK)			((TOK)->u.sval)
32 #define TOK_IDENT_VAL(TOK)			((TOK)->u.sval)
33 #define TOK_COMMENT_VAL(TOK)		((TOK)->u.sval)
34 #define TOK_OP_VAL(TOK)				((TOK)->u.opval)
35 
36 #define TOK_IS_INVALID(TOK)			((TOK)->type==TOK_INVALID)
37 #define TOK_IS_BOOL(TOK)			((TOK)->type==TOK_BOOL)
38 #define TOK_IS_LONG(TOK)			((TOK)->type==TOK_LONG)
39 #define TOK_IS_DOUBLE(TOK)			((TOK)->type==TOK_DOUBLE)
40 #define TOK_IS_CHAR(TOK)			((TOK)->type==TOK_CHAR)
41 #define TOK_IS_STRING(TOK)			((TOK)->type==TOK_STRING)
42 #define TOK_IS_IDENT(TOK)			((TOK)->type==TOK_IDENT)
43 #define TOK_IS_COMMENT(TOK)			((TOK)->type==TOK_COMMENT)
44 #define TOK_IS_OP(TOK)				((TOK)->type==TOK_OP)
45 
46 #define TOK_OP_IS(TOK, OP)			((TOK)->type==TOK_OP && (TOK)->u.opval==(OP))
47 
48 #define TOK_TAKE_STRING_VAL(TOK)	((TOK)->type=TOK_INVALID, (TOK)->u.sval)
49 #define TOK_TAKE_IDENT_VAL(TOK)		((TOK)->type=TOK_INVALID, (TOK)->u.sval)
50 #define TOK_TAKE_COMMENT_VAL(TOK)	((TOK)->type=TOK_INVALID, (TOK)->u.sval)
51 
52 
53 enum{
54 	TOK_INVALID=0,
55 	TOK_LONG='l',
56 	TOK_DOUBLE='d',
57 	TOK_CHAR='c',
58 	TOK_STRING='s',
59 	TOK_IDENT='i',
60 	TOK_BOOL='b',
61 	TOK_COMMENT='#',
62 	TOK_OP='+'
63 };
64 
65 
66 enum{
67 #define OP2(X,Y)   ((X)|((Y)<<8))
68 #define OP3(X,Y,Z) ((X)|((Y)<<8)|((Z)<<16))
69 
70 	OP_L_PAR=	'(', OP_R_PAR=	')', OP_L_BRK=	'[', OP_R_BRK=	']',
71 	OP_L_BRC=	'{', OP_R_BRC=	'}', OP_COMMA=	',', OP_SCOLON=	';',
72 
73 	OP_PLUS=	'+', OP_MINUS=	'-', OP_MUL=	'*', OP_DIV=	'/',
74 	OP_MOD=		'%', OP_POW=	'^', OP_OR= 	'|', OP_AND=	'&',
75 	/*OP_NOT=	'~',*/ OP_NOT=	'!', OP_ASGN=	'=', OP_LT=		'<',
76 	OP_GT=		'>', OP_DOT=	'.', OP_COLON=	':', OP_QMARK=	'?',
77 	OP_AT=		'@',
78 	OP_NEXTLINE='\n',OP_EOF=	-1,
79 
80 	OP_INC=		OP2('+','+'),		 OP_DEC=	OP2('-','-'),
81 	OP_LSHIFT=	OP2('<','<'), 		 OP_RSHIFT=	OP2('>','>'),
82 	OP_AS_INC=	OP2('+','='), 		 OP_AS_DEC= OP2('-','='),
83 	OP_AS_MUL=	OP2('*','='), 		 OP_AS_DIV= OP2('/','='),
84 	OP_AS_MOD=	OP2('%','='), 		 OP_AS_POW= OP2('^','='),
85 
86 /*	AS_OR=		OP2('|','='),		 AS_AND=	OP2('&','='), */
87 	OP_EQ=		OP2('=','='), 		 OP_NE=		OP2('!','='),
88 	OP_LE=		OP2('<','='), 		 OP_GE=		OP2('>','=')
89 
90 /*	L_AND=		OP2('&','&'), L_OR=		OP2('|','|'),
91 	L_XOR=		OP2('^','^'), */
92 
93 /*	AsLShift=	OP3('<','<','='),
94 	AsRShift=	OP3('>','>','='), */
95 
96 #undef OP2
97 #undef OP3
98 };
99 
100 
101 typedef struct{
102 	int type;
103 	int line;
104 	union{
105 		bool bval;
106 		long lval;
107 		double dval;
108 		char cval;
109 		char *sval;
110 		int opval;
111 	} u;
112 } Token;
113 
114 #define TOK_INIT {0, 0, {0}}
115 
116 
117 extern void tok_free(Token*tok);
118 extern void tok_init(Token*tok);
119 
120 
121 /* */
122 
123 
124 enum{
125 	TOKZ_IGNORE_NEXTLINE=0x1,
126 	TOKZ_READ_COMMENTS=0x2,
127 	TOKZ_PARSER_INDENT_MODE=0x04,
128 	TOKZ_ERROR_TOLERANT=0x8,
129 	TOKZ_READ_FROM_BUFFER=0x10,
130 	TOKZ_DEFAULT_OPTION=0x20
131 };
132 
133 
134 enum{
135 	E_TOKZ_UNEXPECTED_EOF=1,
136 	E_TOKZ_UNEXPECTED_EOL,
137 	E_TOKZ_EOL_EXPECTED,
138 	E_TOKZ_INVALID_CHAR,
139 	E_TOKZ_TOOBIG,
140 	E_TOKZ_NUMFMT,
141 	E_TOKZ_NUM_JUNK,
142 	E_TOKZ_NOTINT,
143 	E_TOKZ_RANGE,
144 	E_TOKZ_MULTICHAR,
145 
146 	E_TOKZ_TOKEN_LIMIT,
147 	E_TOKZ_UNKNOWN_OPTION,
148 	E_TOKZ_SYNTAX,
149 	E_TOKZ_INVALID_ARGUMENT,
150 	E_TOKZ_EOS_EXPECTED,
151 	E_TOKZ_TOO_FEW_ARGS,
152 	E_TOKZ_TOO_MANY_ARGS,
153 	E_TOKZ_MAX_NEST,
154 	E_TOKZ_IDENTIFIER_EXPECTED,
155 
156 	E_TOKZ_LBRACE_EXPECTED
157 };
158 
159 
160 struct _ConfOpt;
161 
162 typedef struct _Tokenizer_FInfo{
163 	FILE *file;
164 	char *name;
165 	int line;
166 	int ungetc;
167 	Token ungettok;
168 } Tokenizer_FInfo;
169 
170 typedef struct _Tokenizer_Buffer{
171 		char *data;
172 		int len;
173 		int pos;
174 } Tokenizer_Buffer;
175 
176 typedef struct _Tokenizer{
177 	FILE *file;
178 	char *name;
179 	int line;
180 	int ungetc;
181 	Token ungettok;
182 
183 	Tokenizer_Buffer buffer;
184 
185 	int flags;
186 	const struct _ConfOpt **optstack;
187 	int nest_lvl;
188 	void *user_data;
189 
190 	int filestack_n;
191 	Tokenizer_FInfo *filestack;
192 
193 	char **includepaths;
194 } Tokenizer;
195 
196 
197 extern Tokenizer *tokz_open(const char *fname);
198 extern Tokenizer *tokz_open_file(FILE *file, const char *fname);
199 extern Tokenizer *tokz_prepare_buffer(char *buffer, int len);
200 extern void tokz_close(Tokenizer *tokz);
201 extern bool tokz_get_token(Tokenizer *tokz, Token *tok);
202 extern void tokz_unget_token(Tokenizer *tokz, Token *tok);
203 extern void tokz_warn_error(const Tokenizer *tokz, int line, int e);
204 extern void tokz_warn(const Tokenizer *tokz, int line, const char *fmt, ...);
205 
206 extern bool tokz_pushf(Tokenizer *tokz, const char *fname);
207 extern bool tokz_pushf_file(Tokenizer *tokz, FILE *file, const char *fname);
208 extern bool tokz_popf(Tokenizer *tokz);
209 
210 extern void tokz_set_includepaths(Tokenizer *tokz, char **paths);
211 
212 #endif /* LIBTU_TOKENIZER_H */
213