1 /*
2 * Copyright (C) 2014 Christian Heckendorf <heckendorfc@gmail.com>
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "lex_dfa.h"
19 #include "lex.h"
20 #include "edit_shell.h"
21
free_tokens(TokenList * t)22 void free_tokens(TokenList *t){
23 TokenList *p;
24 if(!t)return;
25 for(p=t->next;t;){
26 if(t->token.word)
27 free(t->token.word);
28 free(t);
29 t=p;
30 if(p)p=p->next;
31 }
32 }
33
34 STATIC
split_token(TokenList * token,const int start,const int word_i)35 int split_token(TokenList *token, const int start, const int word_i){
36 const int len=strlen(token->token.word);
37 int count=0;
38 TokenList *orig,*ptr;
39 orig=token;
40
41 if(start==0 && word_i==len)
42 return 0;
43
44 if(start>0){
45 count|=SPLIT_BEFORE;
46 ptr=token->next;
47 INIT_MEM(token->next,1);
48 token->next->next=ptr;
49 INIT_MEM(token->next->token.word,(word_i-start)+1);
50 memcpy(token->next->token.word,orig->token.word+start,word_i-start);
51 token->next->token.word[word_i-start]=0;
52 token=token->next;
53 }
54 if(word_i<len){
55 count|=SPLIT_AFTER;
56 ptr=token->next;
57 INIT_MEM(token->next,1);
58 token->token.type=orig->token.type;
59 token->next->token.type=TOK_NULL;
60 token->next->next=ptr;
61 INIT_MEM(token->next->token.word,(len-word_i)+1);
62 memcpy(token->next->token.word,orig->token.word+word_i,len-word_i);
63 token->next->token.word[len-word_i]=0;
64 }
65
66 if(start>0){
67 orig->token.word[start]=0;
68 orig->next->token.type=orig->token.type;
69 orig->token.type=TOK_NULL;
70 }
71 else{
72 orig->token.word[word_i]=0;
73 }
74
75 return count;
76 }
77
78 STATIC
identify(TokenList * token,State * q)79 int identify(TokenList *token,State *q){
80 int i,c=0;
81 char *str=token->token.word;
82 State *test=q;
83 int candidate[NUM_CANDIDATE];
84 int tok[NUM_CANDIDATE];
85 int tok_start=-1;
86 //int l_tok=TOK_NULL;
87
88 if(!str || !*str)
89 return 0;
90
91 for(i=0;str[i];i++){
92 if(str[i]=='\\'){
93 if(c>0) /* We don't look for TEXT tokens */
94 break;
95 else{
96 i++;
97 continue;
98 }
99 }
100
101 test=test->out[(int)str[i]].state;
102
103 if(!test)break;
104
105 if(tok_start<0 && test!=q)
106 tok_start=i;
107
108 if(test->final){
109 candidate[c]=i;
110 tok[c]=test->final;
111 c++;
112
113 assert(c<NUM_CANDIDATE);
114 }
115 if(test->out==NULL){
116 /* No more processing can be done on this segment */
117 break;
118 }
119 }
120 if(c>0){
121 token->token.type=tok[c-1];
122 int split=split_token(token,tok_start,candidate[c-1]+1);
123 if(split==(SPLIT_BEFORE|SPLIT_AFTER)){
124 identify(token->next->next,q);
125 }
126 else if(split&SPLIT_AFTER){
127 identify(token->next,q);
128 }
129 return split;
130 }
131 return 0;
132 }
133
134 #if 0
135 STATIC
136 int identify_full(TokenList *token, State *q){
137 State *test=q;
138 char *str=token->token.word;
139 int i;
140
141 if(!str || !*str)
142 return 0;
143
144 for(i=0;str[i];i++){
145 if(!test->out)
146 return 0;
147
148 test=test->out[(int)str[i]].state;
149
150 if(!test)
151 return 0;
152 }
153 if(test->final)
154 token->token.type=test->final;
155
156 return test->final;
157 }
158
159 STATIC
160 void strip_backslash(Token *token){
161 int i,j;
162 /*TODO: replace with special backslash chars (\n) */
163 for(i=0;token->word[i];i++){
164 if(token->word[i]=='\\'){
165 for(j=i;token->word[j];j++)
166 token->word[j]=token->word[j+1];
167 }
168 }
169 }
170 #endif
171
172 STATIC
create_tokens(char * str)173 TokenList* create_tokens(char *str){
174 TokenList *list,*t;
175 char *start,*ptr=str;
176 char quote=0;
177 int escape=0;
178
179 INIT_MEM(list,1);
180 t=list;
181 start=ptr;
182 for(;*ptr;ptr++){
183 if(!escape && *ptr=='\\'){
184 escape=1;
185 ptr++;
186 if(*ptr==0)break;
187 }
188
189 if(!escape && quote==0 && (*ptr=='"' || *ptr=='\'')){ /* Found initial quote */
190 quote=*ptr;
191 *ptr=0;
192 ptr++;
193
194 if(*start){
195 t->token.word=strdup(start);
196 t->token.type=TOK_NULL;
197 INIT_MEM(t->next,1);
198 t=t->next;
199 }
200
201 INIT_MEM(t->token.word,2);
202 sprintf(t->token.word,"%c",quote);
203 t->token.type=TOK_QUOTE;
204 INIT_MEM(t->next,1);
205 t=t->next;
206
207 if(*ptr==0)break;
208
209 start=ptr;
210
211 continue;
212 }
213
214 if(!escape && quote && *ptr==quote){ /* Found matching quote */
215 *ptr=0;
216 if(*start){
217 t->token.word=strdup(start);
218 t->token.type=TOK_QUOTE_STR;
219 INIT_MEM(t->next,1);
220 t=t->next;
221 }
222
223 INIT_MEM(t->token.word,2);
224 sprintf(t->token.word,"%c",quote);
225 t->token.type=TOK_QUOTE;
226 INIT_MEM(t->next,1);
227 t=t->next;
228
229 start=ptr+1;
230 quote=0;
231
232 continue;
233 }
234
235 if(!escape && quote==0 && (*ptr==' ' || *ptr=='\t' || *ptr=='\n')){
236 *ptr=0;
237
238 if(*start){
239 t->token.word=strdup(start);
240 t->token.type=TOK_NULL;
241 INIT_MEM(t->next,1);
242 t=t->next;
243 }
244
245 t->token.word=strdup(" ");
246 t->token.type=TOK_WHITESPACE;
247 INIT_MEM(t->next,1);
248 t=t->next;
249
250 while(ptr[1]==' ' || ptr[1]=='\t' || ptr[1]=='\n'){
251 ptr++;
252 }
253 start=ptr+1;
254 }
255
256 if(escape) escape=0;
257 }
258
259 if(*start){
260 t->token.word=strdup(start);
261 t->token.type=TOK_NULL;
262 t->next=NULL;
263 }
264 else{
265 TokenList* tp=list;
266 while(tp->next!=t)tp=tp->next;
267 free(tp->next);
268 tp->next=NULL;
269 }
270
271 return list;
272 }
273
lex(const char * str)274 TokenList* lex(const char *str){
275 char *tofree,*strp;
276 static State *op_dfa=NULL;
277 //static State *reserved_dfa=NULL;
278 TokenList *tokens,*tptr;
279
280 if(!op_dfa)op_dfa=generate_operator_dfa();
281 //if(!reserved_dfa)reserved_dfa=generate_reserved_dfa();
282
283 tofree=strp=strdup(str);
284
285 INIT_MEM(tptr,1);
286 tokens=tptr;
287 tptr->token.word=NULL;
288 tptr->token.type=TOK_NULL;
289 tptr->next=NULL;
290
291 tptr=tptr->next=create_tokens(strp);
292
293 for(tptr=tokens->next;tptr!=NULL;tptr=tptr->next){
294 if(tptr->token.type!=TOK_NULL)continue;
295 identify(tptr,op_dfa);
296 }
297 /*
298 for(tptr=tokens->next;tptr!=NULL;tptr=tptr->next){
299 if(tptr->token.type!=TOK_NULL)continue;
300 identify_full(tptr,reserved_dfa);
301 }
302 */
303 for(tptr=tokens->next;tptr!=NULL;tptr=tptr->next){
304 if(tptr->next && tptr->next->token.type==TOK_WHITESPACE &&
305 tptr->next->next && tptr->next->next->token.type&(TOK_OPERATOR|TOK_RESERVED)){
306 TokenList *temp=tptr->next->next;
307 free(tptr->next->token.word);
308 free(tptr->next);
309 tptr->next=temp;
310 }
311 if(tptr->next && tptr->token.type&(TOK_OPERATOR|TOK_RESERVED) && tptr->next->token.type==(TOK_WHITESPACE)){
312 TokenList *temp=tptr->next->next;
313 free(tptr->next->token.word);
314 free(tptr->next);
315 tptr->next=temp;
316 }
317 }
318 for(tptr=tokens->next;tptr!=NULL;tptr=tptr->next){
319 if(tptr->token.type==TOK_NULL)
320 tptr->token.type=TOK_TEXT;
321 }
322
323 free(tofree);
324
325 return tokens;
326 }
327
yyerror(const char * str)328 int yyerror(const char *str){
329 fprintf(stderr,"%s\n",str);
330 return 0;
331 }
332
yylex()333 int yylex(){
334 int ret;
335
336 if(!tlist || !tlist->next)return 0;
337
338 for(;tlist->next && tlist->token.type==TOK_WHITESPACE && tlist->next->token.type==TOK_WHITESPACE;tlist=tlist->next);
339
340 tlist=tlist->next;
341
342 if(tlist->token.type==TOK_WHITESPACE && tlist->next==NULL)
343 return 0;
344
345 ret=TOKEN_MASK&tlist->token.type;
346 //printf("YYLEX|%d|%s\n",ret,tlist->token.word);
347 yylval.word=tlist->token.word;
348
349 return ret;
350 }
351