1 %option noinput
2 %option nounput
3 %top {
4 #ifdef HAVE_CONFIG_H
5 # include <config.h>
6 #endif
7 }
8 %{
9 /* This file is part of Grecs.
10    Copyright (C) 2012-2016 Sergey Poznyakoff.
11 
12    Grecs is free software; you can redistribute it and/or modify
13    it under the terms of the GNU General Public License as published by
14    the Free Software Foundation; either version 3, or (at your option)
15    any later version.
16 
17    Grecs is distributed in the hope that it will be useful,
18    but WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20    GNU General Public License for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with Grecs.  If not, see <http://www.gnu.org/licenses/>. */
24 
25 #include "grecs.h"
26 #include "json-gram.h"
27 #include "grecs/json.h"
28 
29 static char const *input_ptr;
30 static size_t input_size;
31 struct grecs_locus_point json_current_locus_point;   /* Input location */
32 
33 char const *json_err_diag;
34 struct grecs_locus json_err_locus;
35 
36 #undef YY_INPUT
37 #define YY_INPUT(buf,result,max_size)					\
38 	do {								\
39 		size_t n = (max_size > input_size) ? input_size : max_size; \
40 		if (n) {						\
41 			memcpy(buf, input_ptr, n);			\
42 			input_ptr += n;					\
43 			input_size -= n;				\
44 		}							\
45 		result = n;						\
46 	} while(0)
47 
48 #define YY_USER_ACTION do {						\
49 		if (YYSTATE == 0) {					\
50 			yylloc.beg = json_current_locus_point;		\
51 			yylloc.beg.col++;				\
52 		}							\
53   		json_current_locus_point.col += yyleng;			\
54  		yylloc.end = json_current_locus_point;			\
55    	} while (0);
56 
57 void
jsonlex_diag(const char * s)58 jsonlex_diag(const char *s)
59 {
60 	if (!json_err_diag) {
61 		json_err_diag = s;
62 		json_err_locus = yylloc;
63 	}
64 }
65 
66 static int
utf8_wctomb(char * u)67 utf8_wctomb(char *u)
68 {
69 	unsigned int wc = strtoul(u, NULL, 16);
70 	int count;
71 	char r[6];
72 
73 	if (wc < 0x80)
74 		count = 1;
75 	else if (wc < 0x800)
76 		count = 2;
77 	else if (wc < 0x10000)
78 		count = 3;
79 	else if (wc < 0x200000)
80 		count = 4;
81 	else if (wc < 0x4000000)
82 		count = 5;
83 	else if (wc <= 0x7fffffff)
84 		count = 6;
85 	else
86 		return -1;
87 
88 	switch (count) {
89 		/* Note: code falls through cases! */
90 	case 6:
91 		r[5] = 0x80 | (wc & 0x3f);
92 		wc = wc >> 6;
93 		wc |= 0x4000000;
94 	case 5:
95 		r[4] = 0x80 | (wc & 0x3f);
96 		wc = wc >> 6;
97 		wc |= 0x200000;
98 	case 4:
99 		r[3] = 0x80 | (wc & 0x3f);
100 		wc = wc >> 6;
101 		wc |= 0x10000;
102 	case 3:
103 		r[2] = 0x80 | (wc & 0x3f);
104 		wc = wc >> 6;
105 		wc |= 0x800;
106 	case 2:
107 		r[1] = 0x80 | (wc & 0x3f);
108 		wc = wc >> 6;
109 		wc |= 0xc0;
110 	case 1:
111 		r[0] = wc;
112 	}
113 	grecs_line_acc_grow(r, count);
114 	return count;
115 }
116 
117 int
json_unescape(char c,char * o)118 json_unescape(char c, char *o)
119 {
120 	static char transtab[] = "\\\\\"\"b\bf\fn\nr\rt\t";
121 	char *p;
122 
123 	for (p = transtab; *p; p += 2) {
124 		if (p[0] == c) {
125 			*o = p[1];
126 			return 0;
127 		}
128 	}
129 	return -1;
130 }
131 
132 #define YY_SKIP_YYWRAP 1
133 static int
yywrap()134 yywrap()
135 {
136 	return 1;
137 }
138 
139 static struct grecs_txtacc *json_line_acc;
140 
141 static void
json_line_begin()142 json_line_begin()
143 {
144 	if (!json_line_acc)
145 		json_line_acc = grecs_txtacc_create();
146 }
147 
148 static char *
json_line_finish()149 json_line_finish()
150 {
151 	if (json_line_acc) {
152 		char nil = 0;
153 		grecs_txtacc_grow(json_line_acc, &nil, 1);
154 		return grecs_txtacc_finish(json_line_acc, 1);
155 	}
156 	return NULL;
157 }
158 
159 static void
json_line_grow(char const * text,size_t len)160 json_line_grow(char const *text, size_t len)
161 {
162 	grecs_txtacc_grow(json_line_acc, text, len);
163 }
164 %}
165 D [0-9]
166 X [0-9a-fA-F]
167 %x STR
168 %%
169 "-"?{D}{D}*(.{D}{D}*)?([eE][-+]?{D}{D}*)?  {
170 	yylval.n = strtod(yytext, NULL);
171 	return T_NUMBER;
172       }
173 \"[^\\\"]*\"       { json_line_begin();
174                      json_line_grow(yytext + 1, yyleng - 2);
175                      yylval.s = json_line_finish();
176                      return T_STRING; }
177 \"[^\\\"]*\\{X}{4} { BEGIN(STR);
178                      json_line_begin();
179                      json_line_grow(yytext + 1, yyleng - 5);
180                      utf8_wctomb(yytext + yyleng - 4);
181                    }
182 \"[^\\\"]*\\.      { char c;
183                      BEGIN(STR);
184                      json_line_begin();
185                      json_line_grow(yytext + 1, yyleng - 3);
186                      if (json_unescape(yytext[yyleng - 1], &c)) {
187                              jsonlex_diag("invalid UTF-8 codepoint");
188                              return T_ERR;
189                      }
190 		     json_line_grow(&c, 1);
191                    }
192 <STR>[^\\\"]*\"       { BEGIN(INITIAL);
193                         if (yyleng > 1)
194 				json_line_grow(yytext, yyleng - 1);
195                         yylval.s = json_line_finish();
196 	                return T_STRING; }
197 <STR>[^\\\"]*\\{X}{4} {
198                      json_line_grow(yytext, yyleng - 5);
199                      utf8_wctomb(yytext + yyleng - 4);
200 }
201 <STR>[^\\\"]*\\.      {
202                      char c;
203                      json_line_grow(yytext, yyleng - 2);
204                      if (json_unescape(yytext[yyleng - 1], &c)) {
205                              jsonlex_diag("invalid UTF-8 codepoint");
206                              return T_ERR;
207                      }
208                      json_line_grow(&c, 1); }
209 
210 null      { return T_NULL; }
211 true      { yylval.b = 1; return T_BOOL; }
212 false     { yylval.b = 0; return T_BOOL; }
213 "{"|"}"|"["|"]"|":"|","  return yytext[0];
214 [ \t]*    ;
215 \n        grecs_locus_point_advance_line(json_current_locus_point);
216 .         { jsonlex_diag("bogus character");
217             return T_ERR; }
218 %%
219 void
220 jsonlex_setup(char const *s, size_t l)
221 {
222 	input_ptr = s;
223 	input_size = l;
224 	json_current_locus_point.file = "input";
225 	json_current_locus_point.line = 1;
226 	json_current_locus_point.col = 0;
227 	json_err_diag = NULL;
228         yy_flex_debug = 0;
229         BEGIN(INITIAL);
230         yyrestart(NULL);
231 }
232 
233 void
234 jsonlex_cleanup(void)
235 {
236         if (json_line_acc) {
237 	        grecs_txtacc_free(json_line_acc);
238                 json_line_acc = NULL;
239 	}
240 }
241