1 #include "lib/mlr_globals.h"
2 #include "lib/mlrutil.h"
3 #include "cli/json_array_ingest.h"
4 #include "input/mlr_json_adapter.h"
5
6 static lrec_t* validate_millerable_object(json_value_t* pjson_object, char* flatten_sep,
7 json_array_ingest_t json_array_ingest);
8 static int populate_from_nested_object(lrec_t* prec, json_value_t* pjson_object, char* prefix, char* flatten_sep,
9 json_array_ingest_t json_array_ingest);
10 static int populate_from_nested_array(lrec_t* prec, json_value_t* pjson_array, char* prefix, char* flatten_sep,
11 json_array_ingest_t json_array_ingest);
12
13 // ----------------------------------------------------------------
reference_json_objects_as_lrecs(sllv_t * precords,json_value_t * ptop_level_json,char * flatten_sep,json_array_ingest_t json_array_ingest)14 int reference_json_objects_as_lrecs(sllv_t* precords, json_value_t* ptop_level_json, char* flatten_sep,
15 json_array_ingest_t json_array_ingest)
16 {
17 if (ptop_level_json->type == JSON_ARRAY) {
18 int n = ptop_level_json->u.array.length;
19 for (int i = 0; i < n; i++) {
20 json_value_t* pnext_level_json = ptop_level_json->u.array.values[i];
21 if (pnext_level_json->type != JSON_OBJECT) {
22 fprintf(stderr,
23 "%s: found non-object (type %s) within top-level array. This is valid but unmillerable JSON.\n",
24 MLR_GLOBALS.bargv0, json_describe_type(ptop_level_json->type));
25 return FALSE;
26 }
27 lrec_t* prec = validate_millerable_object(pnext_level_json, flatten_sep, json_array_ingest);
28 if (prec == NULL)
29 return FALSE;
30 sllv_append(precords, prec);
31 }
32 } else if (ptop_level_json->type == JSON_OBJECT) {
33 lrec_t* prec = validate_millerable_object(ptop_level_json, flatten_sep, json_array_ingest);
34 if (prec == NULL)
35 return FALSE;
36 sllv_append(precords, prec);
37 } else {
38 fprintf(stderr,
39 "%s: found non-terminal (type %s) at top level. This is valid but unmillerable JSON.\n",
40 MLR_GLOBALS.bargv0, json_describe_type(ptop_level_json->type));
41 return FALSE;
42 }
43 return TRUE;
44 }
45
46 // ----------------------------------------------------------------
47 // Returns NULL if the JSON object is not millerable, else returns a new lrec with string pointers
48 // backed by the JSON object.
49 //
50 // Precondition: the JSON value is assumed to have already been checked to be of type JSON_OBJECT.
51
validate_millerable_object(json_value_t * pjson,char * flatten_sep,json_array_ingest_t json_array_ingest)52 lrec_t* validate_millerable_object(json_value_t* pjson, char* flatten_sep, json_array_ingest_t json_array_ingest) {
53 lrec_t* prec = lrec_unbacked_alloc();
54 int n = pjson->u.array.length;
55 for (int i = 0; i < n; i++) {
56 json_object_entry_t* pobject_entry = &pjson->u.object.p.values[i];
57 char* key = (char*)pobject_entry->name;
58 char* prefix = NULL;
59
60 json_value_t* pjson_value = pobject_entry->pvalue;
61 switch (pjson_value->type) {
62
63 case JSON_NONE:
64 lrec_put(prec, key, "", NO_FREE);
65 break;
66 case JSON_NULL:
67 lrec_put(prec, key, "", NO_FREE);
68 break;
69
70 case JSON_OBJECT:
71 // This could be made more efficient ... the string length is in the json_value_t.
72 prefix = mlr_paste_2_strings(key, flatten_sep);
73 if (!populate_from_nested_object(prec, pjson_value, prefix, flatten_sep, json_array_ingest))
74 return NULL;
75 free(prefix);
76 break;
77 case JSON_ARRAY:
78 switch (json_array_ingest) {
79 case JSON_ARRAY_INGEST_FATAL:
80 fprintf(stderr,
81 "%s: found array item within JSON object. This is valid but unmillerable JSON.\n"
82 "Use --json-skip-arrays-on-input to exclude these from input without fataling.\n"
83 "Or, --json-map-arrays-on-input to convert them to integer-indexed maps.\n",
84 MLR_GLOBALS.bargv0);
85 return NULL;
86 break;
87 case JSON_ARRAY_INGEST_AS_MAP:
88 prefix = mlr_paste_2_strings(key, flatten_sep);
89 if (!populate_from_nested_array(prec, pjson_value, prefix, flatten_sep, json_array_ingest)) {
90 free(prefix);
91 return NULL;
92 }
93 free(prefix);
94 break;
95 // xxx other cases!
96 default:
97 break;
98 }
99 break;
100
101 case JSON_STRING:
102 lrec_put(prec, key, pjson_value->u.string.ptr, NO_FREE);
103 break;
104
105 case JSON_BOOLEAN:
106 lrec_put(prec, key, pjson_value->u.boolean.sval, NO_FREE);
107 break;
108 case JSON_INTEGER:
109 lrec_put(prec, key, pjson_value->u.integer.sval, NO_FREE);
110 break;
111 case JSON_DOUBLE:
112 lrec_put(prec, key, pjson_value->u.dbl.sval, NO_FREE);
113 break;
114 default:
115 MLR_INTERNAL_CODING_ERROR();
116 break;
117 }
118
119 }
120 return prec;
121 }
122
123 // ----------------------------------------------------------------
124 // Example: the JSON object has { "a": { "b" : 1, "c" : 2 } }. Then we add "a:b" => "1" and "a:c" => "2"
125 // to the lrec.
126
populate_from_nested_object(lrec_t * prec,json_value_t * pjson_object,char * prefix,char * flatten_sep,json_array_ingest_t json_array_ingest)127 static int populate_from_nested_object(lrec_t* prec, json_value_t* pjson_object, char* prefix, char* flatten_sep,
128 json_array_ingest_t json_array_ingest)
129 {
130 int n = pjson_object->u.object.length;
131 for (int i = 0; i < n; i++) {
132 json_object_entry_t* pobject_entry = &pjson_object->u.object.p.values[i];
133 char* json_key = (char*)pobject_entry->name;
134 json_value_t* pjson_value = pobject_entry->pvalue;
135 char* lrec_key = mlr_paste_2_strings(prefix, json_key);
136 char* next_prefix = NULL;
137
138 switch (pjson_value->type) {
139 case JSON_NONE:
140 lrec_put(prec, lrec_key, "", FREE_ENTRY_KEY);
141 break;
142 case JSON_NULL:
143 lrec_put(prec, lrec_key, "", FREE_ENTRY_KEY);
144 break;
145 case JSON_STRING:
146 lrec_put(prec, lrec_key, pjson_value->u.string.ptr, FREE_ENTRY_KEY);
147 break;
148 case JSON_BOOLEAN:
149 lrec_put(prec, lrec_key, pjson_value->u.boolean.sval, FREE_ENTRY_KEY);
150 break;
151 case JSON_OBJECT:
152 next_prefix = mlr_paste_2_strings(lrec_key, flatten_sep);
153 if (!populate_from_nested_object(prec, pjson_value, next_prefix, flatten_sep, json_array_ingest))
154 return FALSE;
155 free(next_prefix);
156 free(lrec_key);
157 break;
158 case JSON_ARRAY:
159 switch (json_array_ingest) {
160 case JSON_ARRAY_INGEST_FATAL:
161 fprintf(stderr,
162 "%s: found array item within JSON object. This is valid but unmillerable JSON.\n"
163 "Use --json-skip-arrays-on-input to exclude these from input without fataling.\n"
164 "Or, --json-map-arrays-on-input to convert them to integer-indexed maps.\n",
165 MLR_GLOBALS.bargv0);
166 free(lrec_key);
167 return FALSE;
168 break;
169 case JSON_ARRAY_INGEST_AS_MAP:
170 next_prefix = mlr_paste_2_strings(lrec_key, flatten_sep);
171 if (!populate_from_nested_array(prec, pjson_value, next_prefix, flatten_sep, json_array_ingest)) {
172 free(next_prefix);
173 free(lrec_key);
174 return FALSE;
175 }
176 free(next_prefix);
177 free(lrec_key);
178 break;
179 // xxx other cases!
180 default:
181 free(lrec_key);
182 break;
183 }
184 break;
185 case JSON_INTEGER:
186 lrec_put(prec, lrec_key, pjson_value->u.integer.sval, FREE_ENTRY_KEY);
187 break;
188 case JSON_DOUBLE:
189 lrec_put(prec, lrec_key, pjson_value->u.dbl.sval, FREE_ENTRY_KEY);
190 break;
191 default:
192 MLR_INTERNAL_CODING_ERROR();
193 break;
194 }
195 }
196 return TRUE;
197 }
198
populate_from_nested_array(lrec_t * prec,json_value_t * pjson_array,char * prefix,char * flatten_sep,json_array_ingest_t json_array_ingest)199 static int populate_from_nested_array(lrec_t* prec, json_value_t* pjson_array, char* prefix, char* flatten_sep,
200 json_array_ingest_t json_array_ingest)
201 {
202 int n = pjson_array->u.array.length;
203 for (int i = 0; i < n; i++) {
204 json_value_t* pjson_value = pjson_array->u.array.values[i];
205
206 char free_flags = NO_FREE;
207 char* json_key = low_int_to_string(i, &free_flags);
208 char* lrec_key = mlr_paste_2_strings(prefix, json_key);
209 if (free_flags)
210 free(json_key);
211 char* next_prefix = NULL;
212
213 switch (pjson_value->type) {
214 case JSON_NONE:
215 lrec_put(prec, lrec_key, "", FREE_ENTRY_KEY);
216 break;
217 case JSON_NULL:
218 lrec_put(prec, lrec_key, "", FREE_ENTRY_KEY);
219 break;
220 case JSON_STRING:
221 lrec_put(prec, lrec_key, pjson_value->u.string.ptr, FREE_ENTRY_KEY);
222 break;
223 case JSON_BOOLEAN:
224 lrec_put(prec, lrec_key, pjson_value->u.boolean.sval, FREE_ENTRY_KEY);
225 break;
226 case JSON_OBJECT:
227 next_prefix = mlr_paste_2_strings(lrec_key, flatten_sep);
228 if (!populate_from_nested_object(prec, pjson_value, next_prefix, flatten_sep, json_array_ingest))
229 return FALSE;
230 free(next_prefix);
231 free(lrec_key);
232 break;
233 case JSON_ARRAY:
234 switch (json_array_ingest) {
235 case JSON_ARRAY_INGEST_FATAL:
236 fprintf(stderr,
237 "%s: found array item within JSON object. This is valid but unmillerable JSON.\n"
238 "Use --json-skip-arrays-on-input to exclude these from input without fataling.\n"
239 "Or, --json-map-arrays-on-input to convert them to integer-indexed maps.\n",
240 MLR_GLOBALS.bargv0);
241 return FALSE;
242 break;
243 case JSON_ARRAY_INGEST_AS_MAP:
244 next_prefix = mlr_paste_2_strings(lrec_key, flatten_sep);
245 if (!populate_from_nested_array(prec, pjson_value, next_prefix, flatten_sep, json_array_ingest)) {
246 free(lrec_key);
247 free(next_prefix);
248 return FALSE;
249 }
250 free(lrec_key);
251 free(next_prefix);
252 break;
253 // xxx other cases!
254 default:
255 free(lrec_key);
256 break;
257 }
258 break;
259
260 case JSON_INTEGER:
261 lrec_put(prec, lrec_key, pjson_value->u.integer.sval, FREE_ENTRY_KEY);
262 break;
263 case JSON_DOUBLE:
264 lrec_put(prec, lrec_key, pjson_value->u.dbl.sval, FREE_ENTRY_KEY);
265 break;
266 default:
267 MLR_INTERNAL_CODING_ERROR();
268 break;
269 }
270
271 }
272 return TRUE;
273 }
274
275 // ----------------------------------------------------------------
276 // * The buffer is an entire JSON blob, e.g. contents from stdio read; peof-psof is the file size so peof is one
277 // byte *after* the last valid file byte.
278 // * The buffer is not assumed to be null-terminated.
279 // * Any lines beginning with comment_string are modified by poking space characters up to line_term.
mlr_json_strip_comments(char * psof,char * peof,comment_handling_t comment_handling,char * comment_string,char * line_term)280 void mlr_json_strip_comments(char* psof, char* peof, comment_handling_t comment_handling, char* comment_string, char* line_term) {
281 int comment_string_len = strlen(comment_string);
282 int line_term_len = strlen(line_term);
283 int at_line_start = TRUE;
284 for (char* p = psof; p < peof; /* increment in loop */) {
285 if (streqn(p, line_term, line_term_len)) {
286 p += line_term_len;
287 at_line_start = TRUE;
288 } else if (at_line_start && streqn(p, comment_string, comment_string_len)) {
289 // Fill with spaces to end of line
290 while (p < peof && !streqn(p, line_term, line_term_len)) {
291 if (comment_handling == PASS_COMMENTS)
292 fputc(*p, stdout);
293 *p = ' ';
294 p++;
295 }
296 if (comment_handling == PASS_COMMENTS)
297 fputs(line_term, stdout);
298 at_line_start = TRUE;
299 } else {
300 at_line_start = FALSE;
301 p++;
302 }
303 }
304 }
305
306 // ----------------------------------------------------------------
307 // I'm using a 3rd-party JSON parser and it's easy to strip all trailing whitespace
308 // than tweak the parser to handle those.
309 //
310 // peof is one past the last valid byte.
311 // pend is the last valid byte.
mlr_json_end_strip(char * psof,char ** ppeof)312 void mlr_json_end_strip(char* psof, char** ppeof) {
313 char* pend = *ppeof - 1;
314
315 while (pend >= psof && (*pend == ' ' || *pend == '\t' || *pend == '\r' || *pend == '\n')) {
316 pend--;
317 }
318
319 *ppeof = pend + 1;
320 }
321