1 %option noinput
2 %option nounput
3 %top {
4 #ifdef HAVE_CONFIG_H
5 # include <config.h>
6 #endif
7 }
8 %{
9 /* This file is part of Grecs.
10 Copyright (C) 2012-2016 Sergey Poznyakoff.
11
12 Grecs is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 3, or (at your option)
15 any later version.
16
17 Grecs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with Grecs. If not, see <http://www.gnu.org/licenses/>. */
24
25 #include "grecs.h"
26 #include "json-gram.h"
27 #include "grecs/json.h"
28
29 static char const *input_ptr;
30 static size_t input_size;
31 struct grecs_locus_point json_current_locus_point; /* Input location */
32
33 char const *json_err_diag;
34 struct grecs_locus json_err_locus;
35
36 #undef YY_INPUT
37 #define YY_INPUT(buf,result,max_size) \
38 do { \
39 size_t n = (max_size > input_size) ? input_size : max_size; \
40 if (n) { \
41 memcpy(buf, input_ptr, n); \
42 input_ptr += n; \
43 input_size -= n; \
44 } \
45 result = n; \
46 } while(0)
47
48 #define YY_USER_ACTION do { \
49 if (YYSTATE == 0) { \
50 yylloc.beg = json_current_locus_point; \
51 yylloc.beg.col++; \
52 } \
53 json_current_locus_point.col += yyleng; \
54 yylloc.end = json_current_locus_point; \
55 } while (0);
56
57 void
jsonlex_diag(const char * s)58 jsonlex_diag(const char *s)
59 {
60 if (!json_err_diag) {
61 json_err_diag = s;
62 json_err_locus = yylloc;
63 }
64 }
65
66 static int
utf8_wctomb(char * u)67 utf8_wctomb(char *u)
68 {
69 unsigned int wc = strtoul(u, NULL, 16);
70 int count;
71 char r[6];
72
73 if (wc < 0x80)
74 count = 1;
75 else if (wc < 0x800)
76 count = 2;
77 else if (wc < 0x10000)
78 count = 3;
79 else if (wc < 0x200000)
80 count = 4;
81 else if (wc < 0x4000000)
82 count = 5;
83 else if (wc <= 0x7fffffff)
84 count = 6;
85 else
86 return -1;
87
88 switch (count) {
89 /* Note: code falls through cases! */
90 case 6:
91 r[5] = 0x80 | (wc & 0x3f);
92 wc = wc >> 6;
93 wc |= 0x4000000;
94 case 5:
95 r[4] = 0x80 | (wc & 0x3f);
96 wc = wc >> 6;
97 wc |= 0x200000;
98 case 4:
99 r[3] = 0x80 | (wc & 0x3f);
100 wc = wc >> 6;
101 wc |= 0x10000;
102 case 3:
103 r[2] = 0x80 | (wc & 0x3f);
104 wc = wc >> 6;
105 wc |= 0x800;
106 case 2:
107 r[1] = 0x80 | (wc & 0x3f);
108 wc = wc >> 6;
109 wc |= 0xc0;
110 case 1:
111 r[0] = wc;
112 }
113 grecs_line_acc_grow(r, count);
114 return count;
115 }
116
117 int
json_unescape(char c,char * o)118 json_unescape(char c, char *o)
119 {
120 static char transtab[] = "\\\\\"\"b\bf\fn\nr\rt\t";
121 char *p;
122
123 for (p = transtab; *p; p += 2) {
124 if (p[0] == c) {
125 *o = p[1];
126 return 0;
127 }
128 }
129 return -1;
130 }
131
132 #define YY_SKIP_YYWRAP 1
133 static int
yywrap()134 yywrap()
135 {
136 return 1;
137 }
138
139 static struct grecs_txtacc *json_line_acc;
140
141 static void
json_line_begin()142 json_line_begin()
143 {
144 if (!json_line_acc)
145 json_line_acc = grecs_txtacc_create();
146 }
147
148 static char *
json_line_finish()149 json_line_finish()
150 {
151 if (json_line_acc) {
152 char nil = 0;
153 grecs_txtacc_grow(json_line_acc, &nil, 1);
154 return grecs_txtacc_finish(json_line_acc, 1);
155 }
156 return NULL;
157 }
158
159 static void
json_line_grow(char const * text,size_t len)160 json_line_grow(char const *text, size_t len)
161 {
162 grecs_txtacc_grow(json_line_acc, text, len);
163 }
164 %}
165 D [0-9]
166 X [0-9a-fA-F]
167 %x STR
168 %%
169 "-"?{D}{D}*(.{D}{D}*)?([eE][-+]?{D}{D}*)? {
170 yylval.n = strtod(yytext, NULL);
171 return T_NUMBER;
172 }
173 \"[^\\\"]*\" { json_line_begin();
174 json_line_grow(yytext + 1, yyleng - 2);
175 yylval.s = json_line_finish();
176 return T_STRING; }
177 \"[^\\\"]*\\{X}{4} { BEGIN(STR);
178 json_line_begin();
179 json_line_grow(yytext + 1, yyleng - 5);
180 utf8_wctomb(yytext + yyleng - 4);
181 }
182 \"[^\\\"]*\\. { char c;
183 BEGIN(STR);
184 json_line_begin();
185 json_line_grow(yytext + 1, yyleng - 3);
186 if (json_unescape(yytext[yyleng - 1], &c)) {
187 jsonlex_diag("invalid UTF-8 codepoint");
188 return T_ERR;
189 }
190 json_line_grow(&c, 1);
191 }
192 <STR>[^\\\"]*\" { BEGIN(INITIAL);
193 if (yyleng > 1)
194 json_line_grow(yytext, yyleng - 1);
195 yylval.s = json_line_finish();
196 return T_STRING; }
197 <STR>[^\\\"]*\\{X}{4} {
198 json_line_grow(yytext, yyleng - 5);
199 utf8_wctomb(yytext + yyleng - 4);
200 }
201 <STR>[^\\\"]*\\. {
202 char c;
203 json_line_grow(yytext, yyleng - 2);
204 if (json_unescape(yytext[yyleng - 1], &c)) {
205 jsonlex_diag("invalid UTF-8 codepoint");
206 return T_ERR;
207 }
208 json_line_grow(&c, 1); }
209
210 null { return T_NULL; }
211 true { yylval.b = 1; return T_BOOL; }
212 false { yylval.b = 0; return T_BOOL; }
213 "{"|"}"|"["|"]"|":"|"," return yytext[0];
214 [ \t]* ;
215 \n grecs_locus_point_advance_line(json_current_locus_point);
216 . { jsonlex_diag("bogus character");
217 return T_ERR; }
218 %%
219 void
220 jsonlex_setup(char const *s, size_t l)
221 {
222 input_ptr = s;
223 input_size = l;
224 json_current_locus_point.file = "input";
225 json_current_locus_point.line = 1;
226 json_current_locus_point.col = 0;
227 json_err_diag = NULL;
228 yy_flex_debug = 0;
229 BEGIN(INITIAL);
230 yyrestart(NULL);
231 }
232
233 void
234 jsonlex_cleanup(void)
235 {
236 if (json_line_acc) {
237 grecs_txtacc_free(json_line_acc);
238 json_line_acc = NULL;
239 }
240 }
241