1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 2010-2012 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * *
19 ***********************************************************************/
20 #pragma prototyped
21 /*
22 * xml method
23 *
24 * Glenn Fowler
25 * AT&T Research
26 */
27
28 static const char usage[] =
29 "[+DESCRIPTION?The \bdss\b xml method reads XML data in two formats: "
30 "pure XML (\aname=value\a attributes within tags ignored) and JSON. In "
31 "general XML data provides field names but not type information, so by "
32 "default all fields are treated as strings. Only fields specified in the "
33 "\bdss\b(1) \aexpression\a are parsed from the data. Fields are named "
34 "using \b.\b'd notation, where each prefix name represents XML tag "
35 "nesting. For XML data, if all records have the same XML tag prefix then "
36 "that prefix may be omitted, except that all field names must have at "
37 "least one prefix component. For example, \busers.user.name\b and "
38 "\buser.name\b are valid, but \bname\b is not.]"
39 "[+?The xml method schema is an XML document that specifies the type of "
40 "one or more fields, and any libraries required to support those types.]"
41 "[T:test?Enable implementation-specific tests and tracing.]#[mask]"
42 "[+TAGS?The supported tags are:]{"
43 ;
44
45 #include <dsslib.h>
46 #include <ctype.h>
47
48 struct Field_s; typedef struct Field_s Field_t;
49 struct File_s; typedef struct File_s File_t;
50 struct Value_s; typedef struct Value_s Value_t;
51 struct Library_s; typedef struct Library_s Library_t;
52 struct Xml_s; typedef struct Xml_s Xml_t;
53
54 struct Library_s /* library list */
55 {
56 Library_t* next; /* next in list */
57 char name[1]; /* library name */
58 };
59
60 struct Value_s /* value in current record */
61 {
62 uintmax_t record; /* record number for value */
63 size_t offset; /* File_t.value offset */
64 size_t size; /* string value size */
65 int number; /* value is a number */
66 Cxinternal_f internalf; /* convert to internal value */
67 };
68
69 struct File_s /* file read state */
70 {
71 uintmax_t record; /* current record number */
72 unsigned char* buf; /* input buffer */
73 unsigned char* rec; /* input record position */
74 unsigned char* cur; /* input buffer position */
75 unsigned char* end; /* input buffer end */
76 char* name; /* current .'d name */
77 char* root; /* root path */
78 char* value; /* current record tag values */
79 int image; /* keep current record image */
80 int level; /* part[] index */
81 int maxlevel; /* max part[] index */
82 int maxname; /* max .'d name length */
83 int prefix; /* implied .'d prefix */
84 int save; /* real char at *f->end */
85 size_t maxvalue; /* size of value */
86 unsigned char* prv; /* previous buffer chunk */
87 size_t prvsize; /* max previous buffer size */
88 size_t prvlen; /* current previous buffer size */
89 char* part[1]; /* .'d part stack */
90 };
91
92 struct Field_s /* current proto schema field */
93 {
94 Field_t* next; /* next in list */
95 char* name; /* qualified field name */
96 char* type; /* field type name */
97 Cxformat_t format; /* field output format */
98 };
99
100 struct Xml_s /* Dssmeth_t.data */
101 {
102 Dsstagdisc_t dsstagdisc;
103 Dssmeth_t meth;
104 Dssmeth_t* basemeth;
105 Library_t* libraries;
106 Library_t* lastlibrary;
107 Field_t* fields;
108 Field_t* lastfield;
109 Cxflags_t test;
110 char* root;
111 int image;
112 int maxname;
113 int maxlevel;
114 int prefix;
115 };
116
117 static const char null[1];
118
119 static char xml_beg_tag[UCHAR_MAX+1];
120 static char xml_end_tag[UCHAR_MAX+1];
121 static char xml_end_att[UCHAR_MAX+1];
122
123 static char json_beg_tag[UCHAR_MAX+1];
124 static char json_end_val[UCHAR_MAX+1];
125
126 extern Dsslib_t dss_lib_xml;
127
128 /*
129 * xml var create/lookup
130 * type==0 for prefix components
131 */
132
133 static Cxvariable_t*
xmlvar(Cx_t * cx,char * name,const char * type,Cxdisc_t * disc)134 xmlvar(Cx_t* cx, char* name, const char* type, Cxdisc_t* disc)
135 {
136 Xml_t* xml = (Xml_t*)DSS(cx)->meth->data;
137 Cxvariable_t* var;
138 Value_t* val;
139 char* s;
140 int n;
141 int i;
142
143 if (*name == '.')
144 var = cxvariable(cx, name, NiL, disc);
145 else if (!(var = dtmatch(cx->variables, name)))
146 {
147 n = strlen(name) + 1;
148 if (!(var = vmnewof(cx->vm, 0, Cxvariable_t, 1, sizeof(Value_t) + n)))
149 {
150 if (disc->errorf)
151 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
152 return 0;
153 }
154 var->data = val = (Value_t*)(var + 1);
155 strcpy((char*)(var->name = (const char*)(var + 1) + sizeof(Value_t)), name);
156 var->type = (Cxtype_t*)(type ? type : "number");
157 if (cxaddvariable(cx, var, disc))
158 return 0;
159 if ((val->number = cxisnumber(var->type)) && !(val->internalf = var->type->internalf) && var->type->base)
160 val->internalf = var->type->base->internalf;
161 if (type)
162 {
163 if (xml->maxname < ++n)
164 xml->maxname = n;
165 n = 0;
166 for (s = name; *s; s++)
167 if (*s == '.')
168 {
169 *s = 0;
170 i = !xmlvar(cx, name, NiL, disc);
171 *s = '.';
172 if (i)
173 return 0;
174 n++;
175 }
176 if (xml->maxlevel < n)
177 xml->maxlevel = n;
178 if (n && !xml->root && (s = strchr(var->name, '.')))
179 {
180 if (xml->root = vmnewof(cx->vm, 0, char, s - var->name, 1))
181 memcpy(xml->root, var->name, s - var->name);
182 else
183 {
184 if (disc->errorf)
185 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
186 return 0;
187 }
188 }
189 }
190 }
191 return var;
192 }
193
194 /*
195 * xml identf
196 */
197
198 static int
xmlident(Dssfile_t * file,void * buf,size_t n,Dssdisc_t * disc)199 xmlident(Dssfile_t* file, void* buf, size_t n, Dssdisc_t* disc)
200 {
201 static const char magic[] = "<?xml";
202
203 return (n > (sizeof(magic) - 1) && !memcmp(buf, magic, sizeof(magic) - 1));
204 }
205
206 /*
207 * refill the input buffer and return the next char, -1 on error
208 */
209
210 static int
refill(Dssfile_t * file,register File_t * f,int c,Dssdisc_t * disc)211 refill(Dssfile_t* file, register File_t* f, int c, Dssdisc_t* disc)
212 {
213 size_t n;
214
215 if (f->cur >= f->end)
216 {
217 if (f->rec)
218 {
219 if ((n = f->cur - f->rec + f->prvlen) > f->prvsize)
220 {
221 f->prvsize = roundof(f->prvsize + n, 1024);
222 if (!(f->prv = vmnewof(file->vm, f->prv, unsigned char, f->prvsize, 0)))
223 {
224 if (disc->errorf)
225 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
226 return -1;
227 }
228 }
229 if (n > 1)
230 memcpy(f->prv + f->prvlen, f->rec, n - 1);
231 f->prv[n-1] = f->save;
232 f->prvlen += n;
233 }
234 if (!(f->buf = (unsigned char*)sfreserve(file->io, SF_UNBOUND, 0)))
235 return -1;
236 if (f->rec)
237 f->rec = f->buf;
238 f->cur = f->buf;
239 c = f->save;
240 f->end = f->buf + sfvalue(file->io) - 1;
241 f->save = *f->end;
242 *f->end = 0;
243 }
244 return c;
245 }
246
247 #define REFILL(f,c,r) do { if ((c = refill(file, f, c, disc)) < 0) r; } while (0)
248
249 #define RESIZE() \
250 do \
251 { \
252 o = vp - f->value; \
253 f->maxvalue += 1024; \
254 if (!(f->value = vmnewof(file->vm, f->value, char, f->maxvalue, 0))) \
255 { \
256 if (disc->errorf) \
257 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space"); \
258 return -1; \
259 } \
260 vb = f->value; \
261 vp = vb + o; \
262 ve = vb + f->maxvalue - 1; \
263 } while (0)
264
265 /*
266 * xml readf -- consume 1 xml record and retain field values of interest
267 */
268
269 static int
xmlread(Dssfile_t * file,Dssrecord_t * record,Dssdisc_t * disc)270 xmlread(Dssfile_t* file, Dssrecord_t* record, Dssdisc_t* disc)
271 {
272 register File_t* f = file->data;
273 register char* np;
274 register char* ne;
275 register char* vp;
276 register char* ve;
277 register int c;
278 char* vb;
279 Cxvariable_t* v;
280 ssize_t o;
281 int q;
282
283 f->record++;
284 f->rec = 0;
285 f->prvlen = 0;
286 vp = vb = f->value;
287 ve = f->value + f->maxvalue - 1;
288 ne = f->name + f->maxname;
289 np = f->level >= f->maxlevel ? ne : f->level ? f->part[f->level] : f->name;
290 for (;;)
291 {
292 /* find the next tag */
293
294 for (;;)
295 {
296 while (!xml_beg_tag[*f->cur++]);
297 if (*(f->cur - 1))
298 break;
299 REFILL(f, c, goto done);
300 if (c == '<')
301 break;
302 }
303 tag:
304 if (!(c = *f->cur++))
305 REFILL(f, c, -1);
306 switch (c)
307 {
308 case '/':
309 if (f->level)
310 f->level--;
311 if (f->level <= f->maxlevel)
312 np = f->part[f->level];
313 /*FALLTHROUGH*/
314 case '?':
315 for (;;)
316 {
317 while (!xml_end_tag[*f->cur++]);
318 if (*(f->cur - 1))
319 break;
320 REFILL(f, o, goto incomplete);
321 if (o == '>')
322 break;
323 }
324 if (c == '/' && f->level == f->prefix)
325 {
326 record->data = f;
327 file->count = f->record;
328 return 1;
329 }
330 break;
331 default:
332 if (np <= ne && f->level > f->prefix)
333 {
334 *np = 0;
335 if (v = dtmatch(file->dss->cx->variables, f->part[f->prefix] + 1))
336 {
337 if (f->image && !f->rec)
338 f->rec = f->cur - 1;
339 ((Value_t*)v->data)->record = f->record;
340 ((Value_t*)v->data)->offset = vp - vb;
341 ((Value_t*)v->data)->size = 1;
342 if (vp >= ve)
343 RESIZE();
344 *vp++ = '1';
345 if (vp >= ve)
346 RESIZE();
347 *vp++ = 0;
348 }
349 }
350 if (f->level <= f->maxlevel)
351 f->part[f->level] = np;
352 else
353 np = ne + 1;
354 f->level++;
355 if (np < ne)
356 *np++ = '.';
357 if (np < ne)
358 *np++ = c;
359 q = 0;
360 for (;;)
361 {
362 while (!xml_end_tag[c = *f->cur++])
363 if (np < ne)
364 *np++ = c;
365 else
366 q = c;
367 if (c)
368 break;
369 REFILL(f, c, goto incomplete);
370 if (c == '>')
371 break;
372 if (np < ne)
373 *np++ = c;
374 else
375 q = c;
376 }
377 if (!q && *(np - 1) == '/' || q == '/')
378 {
379 /* null tag */
380
381 if (f->level)
382 f->level--;
383 if (f->level <= f->maxlevel)
384 np = f->part[f->level];
385 }
386 else
387 {
388 /* ignore tag name=value attributes -- why did they allow them */
389
390 if (c == ' ')
391 {
392 q = 0;
393 for (;;)
394 {
395 while (!xml_end_att[c = *f->cur++]);
396 if (!c)
397 REFILL(f, c, goto incomplete);
398 if (c == '"')
399 q = !q;
400 else if (!q && c == '>')
401 break;
402 }
403 }
404 if (np < ne && f->level > f->prefix)
405 {
406 *np = 0;
407 if (v = dtmatch(file->dss->cx->variables, f->part[f->prefix] + 1))
408 {
409 if (f->image && !f->rec)
410 f->rec = f->cur - 1;
411 ((Value_t*)v->data)->record = f->record;
412 ((Value_t*)v->data)->offset = vp - vb;
413 for (;;)
414 {
415 while (!xml_beg_tag[c = *f->cur++])
416 {
417 if (vp >= ve)
418 RESIZE();
419 *vp++ = c;
420 }
421 if (*(f->cur - 1))
422 break;
423 REFILL(f, c, goto incomplete);
424 if (c == '<')
425 break;
426 if (vp >= ve)
427 RESIZE();
428 *vp++ = c;
429 }
430 ((Value_t*)v->data)->size = vp - (vb + ((Value_t*)v->data)->offset);
431 if (vp >= ve)
432 RESIZE();
433 *vp++ = 0;
434 goto tag;
435 }
436 }
437 }
438 break;
439 }
440 }
441 done:
442 if (f->level <= f->prefix)
443 return 0;
444 incomplete:
445 if (disc->errorf)
446 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: record %I*u incomplete", file->path, sizeof(f->record), f->record);
447 return -1;
448 }
449
450 /*
451 * xml writef -- output current record
452 */
453
454 static int
xmlwrite(Dssfile_t * file,Dssrecord_t * record,Dssdisc_t * disc)455 xmlwrite(Dssfile_t* file, Dssrecord_t* record, Dssdisc_t* disc)
456 {
457 register File_t* r = (File_t*)record->data;
458 size_t n;
459
460 sfprintf(file->io, "<%s", r->root);
461 if (r->prvlen && sfwrite(file->io, r->prv, r->prvlen) != r->prvlen)
462 {
463 if (disc->errorf)
464 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: write error", file->path);
465 return -1;
466 }
467 if ((n = r->cur - r->rec) && sfwrite(file->io, r->rec, n) != n)
468 {
469 if (disc->errorf)
470 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: write error", file->path);
471 return -1;
472 }
473 sfputc(file->io, '\n');
474 return 0;
475 }
476
477 /*
478 * xml fopenf
479 */
480
481 static int
xmlfopen(Dssfile_t * file,Dssdisc_t * disc)482 xmlfopen(Dssfile_t* file, Dssdisc_t* disc)
483 {
484 register Xml_t* xml = (Xml_t*)file->dss->meth->data;
485 register unsigned char* s;
486 register unsigned char* t;
487 register int n;
488 File_t* f;
489 int c;
490 int m;
491 int x;
492 unsigned char* buf;
493 unsigned char* end;
494
495 if (file->flags & DSS_FILE_WRITE)
496 buf = 0;
497 else if (buf = (unsigned char*)sfreserve(file->io, SF_UNBOUND, 0))
498 {
499 end = buf + sfvalue(file->io) - 1;
500 if (xml->prefix < 0)
501 {
502 xml->image = !!(file->dss->flags & DSS_WRITE);
503 if (file->format->readf == xmlread)
504 {
505 if (!xml->root)
506 {
507 if (disc->errorf)
508 (*disc->errorf)(NiL, disc, 2, "%s variable names must be qualified by at least the immediate containing tag", file->format->name);
509 return -1;
510 }
511 x = *end;
512 *end = 0;
513 s = buf;
514 n = 0;
515 m = -1;
516 for (;;)
517 {
518 while (!xml_beg_tag[*s++]);
519 if (*(s - 1))
520 {
521 t = s;
522 while (!xml_end_tag[*s++]);
523 if (*t == '/')
524 {
525 if (m > 0)
526 {
527 m--;
528 n -= s - t - 2;
529 }
530 }
531 else if (*t != '?')
532 {
533 m++;
534 n += (c = s - t - 1);
535 if (!memcmp(xml->root, t, c) && !*(xml->root + c))
536 break;
537 }
538 }
539 else if (s >= end)
540 break;
541 }
542 *end = x;
543 xml->prefix = m;
544 xml->maxlevel += m;
545 xml->maxname += n;
546 }
547 else
548 {
549 xml->prefix = 0;
550 xml->maxname += 1024; /*XXX*/
551 }
552 }
553 }
554 if (!(f = vmnewof(file->vm, 0, File_t, 1, (xml->maxlevel + 1) * sizeof(char*) + xml->maxname + 1)))
555 {
556 if (disc->errorf)
557 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
558 return -1;
559 }
560 file->data = f;
561 f->name = (char*)(f + 1) + (xml->maxlevel + 1) * sizeof(char*);
562 if (!(file->flags & DSS_FILE_WRITE))
563 {
564 if (buf)
565 {
566 f->cur = f->buf = buf;
567 f->end = end;
568 f->save = *end;
569 *end = 0;
570 }
571 else
572 f->buf = f->cur = f->end = (unsigned char*)null;
573 f->image = xml->image;
574 f->prefix = xml->prefix;
575 f->maxlevel = xml->maxlevel;
576 f->maxname = xml->maxname;
577 f->maxvalue = 1024;
578 f->root = xml->root;
579 if (!(f->value = vmnewof(file->vm, 0, char, f->maxvalue, 0)))
580 {
581 vmfree(file->vm, f);
582 if (disc->errorf)
583 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
584 return -1;
585 }
586 }
587 return 0;
588 }
589
590 /*
591 * xml fclosef
592 */
593
594 static int
xmlfclose(Dssfile_t * file,Dssdisc_t * disc)595 xmlfclose(Dssfile_t* file, Dssdisc_t* disc)
596 {
597 if (!file || !file->data)
598 return -1;
599 return 0;
600 }
601
602 static Dssformat_t xml_format =
603 {
604 "xml",
605 "xml format (2010-05-19)",
606 CXH,
607 xmlident,
608 xmlfopen,
609 xmlread,
610 xmlwrite,
611 0,
612 xmlfclose,
613 0,
614 0,
615 0
616 };
617
618 /*
619 * json identf
620 */
621
622 static int
jsonident(Dssfile_t * file,void * buf,size_t n,Dssdisc_t * disc)623 jsonident(Dssfile_t* file, void* buf, size_t n, Dssdisc_t* disc)
624 {
625 register char* s;
626 register char* e;
627 register const char* m;
628
629 static const char magic[] = "{\"";
630
631 s = buf;
632 e = s + n;
633 for (m = magic; s < e; s++)
634 if (isspace(*s))
635 ;
636 else if (*s != *m)
637 return 0;
638 else if (!*++m)
639 return 1;
640 return 0;
641 }
642
643 /*
644 * json readf
645 */
646
647 static int
jsonread(register Dssfile_t * file,register Dssrecord_t * record,Dssdisc_t * disc)648 jsonread(register Dssfile_t* file, register Dssrecord_t* record, Dssdisc_t* disc)
649 {
650 register File_t* f = file->data;
651 register char* np;
652 register char* ne;
653 register char* vp;
654 register char* ve;
655 register int c;
656 char* vb;
657 Cxvariable_t* v;
658 size_t o;
659 int a;
660 int e;
661 int q;
662 int n;
663
664 f->record++;
665 error(-1, "AHA jsonread file=%p record.file=%p", file, record->file);
666 f->rec = 0;
667 f->prvlen = 0;
668 vp = vb = f->value;
669 ve = f->value + f->maxvalue - 1;
670 ne = f->name + f->maxname;
671 np = f->level > f->maxlevel ? ne : f->level ? f->part[f->level] : f->name;
672 for (;;)
673 {
674 beg:
675 do
676 {
677 while (!json_beg_tag[c = *f->cur++]);
678 if (!c)
679 REFILL(f, c, goto done);
680 if (c == '}')
681 {
682 if (!f->level)
683 {
684 record->data = f;
685 file->count = f->record;
686 return 1;
687 }
688 if (--f->level <= f->maxlevel)
689 np = f->part[f->level];
690 }
691 } while (c != '{' && c != ',');
692 if (f->image && !f->rec)
693 f->rec = f->cur - 1;
694 tag:
695 do
696 {
697 while (json_end_val[c = *f->cur++] == 1);
698 if (!c)
699 REFILL(f, c, goto incomplete);
700 } while (json_end_val[c] == 1);
701 if (f->level <= f->maxlevel)
702 f->part[f->level] = np;
703 else
704 np = ne + 1;
705 f->level++;
706 if (np < ne)
707 *np++ = '.';
708 if (!(q = c == '"') && np < ne)
709 *np++ = c;
710 for (;;)
711 {
712 while (!json_end_val[c = *f->cur++])
713 if (np < ne)
714 *np++ = c;
715 if (!c)
716 REFILL(f, c, goto incomplete);
717 if (c == '"')
718 {
719 q = !q;
720 continue;
721 }
722 else if (c == '\\')
723 {
724 if (!(c = *f->cur++))
725 REFILL(f, c, goto incomplete);
726 }
727 else if (!q)
728 {
729 if (c == '}')
730 {
731 if (!f->level)
732 {
733 record->data = f;
734 file->count = f->record;
735 return 1;
736 }
737 if (--f->level <= f->maxlevel)
738 np = f->part[f->level];
739 break;
740 }
741 else if (c == ':')
742 {
743 do
744 {
745 while (json_end_val[c = *f->cur++] == 1);
746 if (!c)
747 REFILL(f, c, goto incomplete);
748 } while (json_end_val[c] == 1);
749 if (c == '{')
750 {
751 if (np <= ne)
752 {
753 *np = 0;
754 if (v = dtmatch(file->dss->cx->variables, f->part[f->prefix] + 1))
755 {
756 ((Value_t*)v->data)->record = f->record;
757 ((Value_t*)v->data)->offset = vp - vb;
758 if (vp >= ve)
759 RESIZE();
760 *vp++ = '1';
761 ((Value_t*)v->data)->size = 1;
762 if (vp >= ve)
763 RESIZE();
764 *vp++ = 0;
765 }
766 }
767 goto tag;
768 }
769 a = 0;
770 q = 0;
771 if (np < ne && f->level > f->prefix)
772 {
773 *np = 0;
774 if (v = dtmatch(file->dss->cx->variables, f->part[f->prefix] + 1))
775 {
776 ((Value_t*)v->data)->record = f->record;
777 ((Value_t*)v->data)->offset = vp - vb;
778 e = c == 'n';
779 for (;;)
780 {
781 if (c == '"')
782 {
783 q = !q;
784 goto ignore;
785 }
786 else if (c == '\\')
787 {
788 if (!(c = *f->cur++))
789 REFILL(f, c, goto incomplete);
790 if (c != '\\' && c != '"' && c != ',' && c != '}')
791 {
792 if (vp >= ve)
793 RESIZE();
794 *vp++ = '\\';
795 }
796 }
797 else if (!q)
798 {
799 if (c == '[')
800 {
801 a++;
802 goto ignore;
803 }
804 else if (c == ']')
805 {
806 if (a)
807 a--;
808 goto ignore;
809 }
810 else if (json_end_val[c] == 1)
811 goto ignore;
812 else if (a)
813 /*array*/;
814 else if (c == '}')
815 {
816 if (!f->level)
817 {
818 record->data = f;
819 file->count = f->record;
820 return 1;
821 }
822 if (--f->level <= f->maxlevel)
823 np = f->part[f->level];
824 break;
825 }
826 else if (c == ',')
827 break;
828 }
829 if (vp >= ve)
830 RESIZE();
831 *vp++ = c;
832 ignore:
833 while (!json_end_val[c = *f->cur++])
834 {
835 if (vp >= ve)
836 RESIZE();
837 *vp++ = c;
838 }
839 if (!c)
840 REFILL(f, c, goto incomplete);
841 }
842 if (e)
843 vp = vb + ((Value_t*)v->data)->offset;
844 ((Value_t*)v->data)->size = vp - (vb + ((Value_t*)v->data)->offset);
845 *vp++ = 0;
846 if (!f->level)
847 {
848 record->data = f;
849 file->count = f->record;
850 return 1;
851 }
852 if (--f->level <= f->maxlevel)
853 np = f->part[f->level];
854 if (c == ',')
855 goto tag;
856 goto beg;
857 }
858 }
859 n = 1;
860 for (;;)
861 {
862 if (c == '"')
863 q = !q;
864 else if (c == '\\')
865 {
866 if (!(c = *f->cur++))
867 REFILL(f, c, goto incomplete);
868 }
869 else if (!q)
870 {
871 if (c == '[')
872 a++;
873 else if (c == ']')
874 {
875 if (a)
876 a--;
877 }
878 else if (a)
879 /*array*/;
880 else if (c == '{')
881 n++;
882 else if (c == '}' && !--n)
883 {
884 if (!f->level)
885 {
886 record->data = f;
887 file->count = f->record;
888 return 1;
889 }
890 if (--f->level <= f->maxlevel)
891 np = f->part[f->level];
892 break;
893 }
894 else if (c == ',' && n == 1)
895 break;
896 }
897 while (!json_end_val[c = *f->cur++]);
898 if (!c)
899 REFILL(f, c, goto done);
900 }
901 if (!f->level)
902 {
903 record->data = f;
904 file->count = f->record;
905 return 1;
906 }
907 if (--f->level <= f->maxlevel)
908 np = f->part[f->level];
909 if (c == ',')
910 goto tag;
911 goto beg;
912 }
913 else if (json_end_val[c] == 1)
914 continue;
915 }
916 if (np < ne)
917 *np++ = c;
918 }
919 }
920 done:
921 if (!f->level)
922 return 0;
923 incomplete:
924 if (disc->errorf)
925 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: record %I*u incomplete", file->path, sizeof(f->record), f->record);
926 return -1;
927 }
928
929 /*
930 * xml writef -- output current record
931 */
932
933 static int
jsonwrite(Dssfile_t * file,Dssrecord_t * record,Dssdisc_t * disc)934 jsonwrite(Dssfile_t* file, Dssrecord_t* record, Dssdisc_t* disc)
935 {
936 register File_t* r = (File_t*)record->data;
937 size_t n;
938
939 if (r->prvlen && sfwrite(file->io, r->prv, r->prvlen) != r->prvlen)
940 {
941 if (disc->errorf)
942 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: write error", file->path);
943 return -1;
944 }
945 if ((n = r->cur - r->rec) && sfwrite(file->io, r->rec, n) != n)
946 {
947 if (disc->errorf)
948 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: write error", file->path);
949 return -1;
950 }
951 sfputc(file->io, '\n');
952 return 0;
953 }
954
955 static Dssformat_t json_format =
956 {
957 "json",
958 "json format (2010-05-19)",
959 CXH,
960 jsonident,
961 xmlfopen,
962 jsonread,
963 jsonwrite,
964 0,
965 xmlfclose,
966 0,
967 0,
968 0
969 };
970
971 static int
op_get(Cx_t * cx,Cxinstruction_t * pc,Cxoperand_t * r,Cxoperand_t * a,Cxoperand_t * b,void * data,Cxdisc_t * disc)972 op_get(Cx_t* cx, Cxinstruction_t* pc, Cxoperand_t* r, Cxoperand_t* a, Cxoperand_t* b, void* data, Cxdisc_t* disc)
973 {
974 File_t* f = (File_t*)DSSDATA(data);
975 Value_t* v = (Value_t*)pc->data.variable->data;
976 char* s;
977
978 if (v)
979 {
980 if (v->record == f->record)
981 s = f->value + v->offset;
982 else
983 {
984 s = (char*)null;
985 v->size = 0;
986 }
987 if (!v->internalf)
988 {
989 r->value.string.data = s;
990 r->value.string.size = v->size;
991 }
992 else if ((*v->internalf)(cx, pc->data.variable->type, NiL, &pc->data.variable->format, r, s, v->size, cx->rm, disc) < 0)
993 return -1;
994 }
995 return 0;
996 }
997
998 static int
op_ref(Cx_t * cx,Cxinstruction_t * pc,Cxoperand_t * r,Cxoperand_t * a,Cxoperand_t * b,void * data,Cxdisc_t * disc)999 op_ref(Cx_t* cx, Cxinstruction_t* pc, Cxoperand_t* r, Cxoperand_t* a, Cxoperand_t* b, void* data, Cxdisc_t* disc)
1000 {
1001 return (r->value.variable = xmlvar(cx, b->value.string.data, "string", disc)) ? 0 : -1;
1002 }
1003
1004 static Cxcallout_t local_callouts[] =
1005 {
1006 CXC(CX_GET, "void", "void", op_get, 0)
1007 CXC(CX_REF, "string", "void", op_ref, 0)
1008 };
1009
1010 static int
xml_field_name_dat(Tag_t * tag,Tagframe_t * fp,const char * data,Tagdisc_t * disc)1011 xml_field_name_dat(Tag_t* tag, Tagframe_t* fp, const char* data, Tagdisc_t* disc)
1012 {
1013 register Xml_t* xml = (Xml_t*)disc;
1014
1015 if (!(xml->lastfield->name = strdup(data)))
1016 {
1017 if (disc->errorf)
1018 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1019 return -1;
1020 }
1021 return 0;
1022 }
1023
1024 static int
xml_field_type_dat(Tag_t * tag,Tagframe_t * fp,const char * data,Tagdisc_t * disc)1025 xml_field_type_dat(Tag_t* tag, Tagframe_t* fp, const char* data, Tagdisc_t* disc)
1026 {
1027 register Xml_t* xml = (Xml_t*)disc;
1028 char* s;
1029
1030 memset(&xml->lastfield->format, 0, sizeof(xml->lastfield->format));
1031 (void)cxattr(NiL, data, &s, &xml->lastfield->format, NiL);
1032 if (!*s)
1033 s = "number";
1034 if (!(xml->lastfield->type = strdup(s)))
1035 {
1036 if (disc->errorf)
1037 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1038 return -1;
1039 }
1040 if (xml->lastfield->format.flags & CX_FLOAT)
1041 xml->lastfield->format.flags &= ~(CX_STRING|CX_BUFFER|CX_UNSIGNED|CX_INTEGER);
1042 else if (xml->lastfield->format.flags & CX_UNSIGNED)
1043 {
1044 xml->lastfield->format.flags &= ~(CX_STRING|CX_BUFFER);
1045 xml->lastfield->format.flags |= CX_UNSIGNED|CX_INTEGER;
1046 }
1047 else if (!(xml->lastfield->format.flags & (CX_STRING|CX_BUFFER|CX_INTEGER)))
1048 {
1049 if (streq(s, "string"))
1050 xml->lastfield->format.flags |= CX_STRING;
1051 else if (streq(s, "buffer"))
1052 xml->lastfield->format.flags |= CX_BUFFER;
1053 }
1054 return 0;
1055 }
1056
1057 static Tags_t tags_xml_field[] =
1058 {
1059 "NAME", "Field name.",
1060 0,0,xml_field_name_dat,0,
1061 "TYPE", "Field type. The intrinsic types are number and"
1062 " string. Other types are defined in optional"
1063 " method and schema libraries.",
1064 0,0,xml_field_type_dat,0,
1065 0
1066 };
1067
1068 static int
xml_name_dat(Tag_t * tag,Tagframe_t * fp,const char * data,Tagdisc_t * disc)1069 xml_name_dat(Tag_t* tag, Tagframe_t* fp, const char* data, Tagdisc_t* disc)
1070 {
1071 register Xml_t* xml = (Xml_t*)disc;
1072
1073 if (!(xml->meth.name = strdup(data)))
1074 {
1075 if (disc->errorf)
1076 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1077 return -1;
1078 }
1079 return 0;
1080 }
1081
1082 static int
xml_description_dat(Tag_t * tag,Tagframe_t * fp,const char * data,Tagdisc_t * disc)1083 xml_description_dat(Tag_t* tag, Tagframe_t* fp, const char* data, Tagdisc_t* disc)
1084 {
1085 register Xml_t* xml = (Xml_t*)disc;
1086
1087 if (!(xml->meth.description = strdup(data)))
1088 {
1089 if (disc->errorf)
1090 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1091 return -1;
1092 }
1093 return 0;
1094 }
1095
1096 static int
xml_library_dat(Tag_t * tag,Tagframe_t * fp,const char * data,Tagdisc_t * disc)1097 xml_library_dat(Tag_t* tag, Tagframe_t* fp, const char* data, Tagdisc_t* disc)
1098 {
1099 register Xml_t* xml = (Xml_t*)disc;
1100 register Library_t* p;
1101
1102 if (!(p = newof(0, Library_t, 1, strlen(data))))
1103 {
1104 if (disc->errorf)
1105 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1106 return -1;
1107 }
1108 strcpy(p->name, data);
1109 if (!xml->lastlibrary)
1110 xml->libraries = xml->lastlibrary = p;
1111 else
1112 xml->lastlibrary = xml->lastlibrary->next = p;
1113 return 0;
1114 }
1115
1116 static Tags_t*
xml_field_beg(Tag_t * tag,Tagframe_t * fp,const char * name,Tagdisc_t * disc)1117 xml_field_beg(Tag_t* tag, Tagframe_t* fp, const char* name, Tagdisc_t* disc)
1118 {
1119 register Xml_t* xml = (Xml_t*)disc;
1120 Field_t* f;
1121
1122 if (name)
1123 {
1124 if (!(f = newof(0, Field_t, 1, 0)))
1125 {
1126 if (disc->errorf)
1127 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1128 return 0;
1129 }
1130 if (!xml->lastfield)
1131 xml->fields = f;
1132 else
1133 xml->lastfield->next = f;
1134 xml->lastfield = f;
1135 }
1136 return &tags_xml_field[0];
1137 }
1138
1139 static int
xml_field_end(Tag_t * tag,Tagframe_t * fp,Tagdisc_t * disc)1140 xml_field_end(Tag_t* tag, Tagframe_t* fp, Tagdisc_t* disc)
1141 {
1142 register Xml_t* xml = (Xml_t*)disc;
1143
1144 if (xml->lastfield && (!xml->lastfield->name || !xml->lastfield->type))
1145 {
1146 if (disc->errorf)
1147 (*disc->errorf)(NiL, disc, 2, "field name and type must be specified");
1148 return -1;
1149 }
1150 return 0;
1151 }
1152
1153 static Tags_t tags_xml[] =
1154 {
1155 "NAME", "Schema name.",
1156 0,0,xml_name_dat,0,
1157 "DESCRIPTION", "Schema description.",
1158 0,0,xml_description_dat,0,
1159 "LIBRARY", "Required type/map library name;"
1160 " more than one library may be specified.",
1161 0,0,xml_library_dat,0,
1162 "FIELD", "Field info.",
1163 0,xml_field_beg,0,xml_field_end,
1164 0
1165 };
1166
1167 static Tags_t*
xml_beg(Tag_t * tag,Tagframe_t * fp,const char * name,Tagdisc_t * disc)1168 xml_beg(Tag_t* tag, Tagframe_t* fp, const char* name, Tagdisc_t* disc)
1169 {
1170 return &tags_xml[0];
1171 }
1172
1173 static Tags_t tags[] =
1174 {
1175 "METHOD", "Method name; must be xml.",
1176 0,0,0,0,
1177 "XML", "xml method schema.",
1178 0,xml_beg,0,0,
1179 0
1180 };
1181
1182 /*
1183 * methf
1184 */
1185
1186 static Dssmeth_t*
xmlmeth(const char * name,const char * options,const char * schema,Dssdisc_t * disc,Dssmeth_t * meth)1187 xmlmeth(const char* name, const char* options, const char* schema, Dssdisc_t* disc, Dssmeth_t* meth)
1188 {
1189 register Xml_t* xml;
1190 Tag_t* tag;
1191 Sfio_t* sp;
1192 Library_t* p;
1193 char* s;
1194 char path[PATH_MAX];
1195
1196 if (!(xml = newof(0, Xml_t, 1, 0)))
1197 {
1198 if (disc->errorf)
1199 (*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1200 return 0;
1201 }
1202 xml->basemeth = meth;
1203 xml->meth = *meth;
1204 meth = &xml->meth;
1205 meth->data = xml;
1206 taginit(&xml->dsstagdisc.tagdisc, disc->errorf);
1207 xml->dsstagdisc.tagdisc.id = DSS_ID;
1208 xml->dsstagdisc.disc = disc;
1209 xml->dsstagdisc.meth = meth;
1210 sp = 0;
1211 if (options)
1212 {
1213 if (!(sp = sfstropen()))
1214 goto drop;
1215 sfprintf(sp, "%s", usage);
1216 if (tagusage(tags, sp, &xml->dsstagdisc.tagdisc))
1217 goto drop;
1218 sfprintf(sp, "}\n");
1219 if (dssoptlib(meth->cx->buf, &dss_lib_xml, sfstruse(sp), disc))
1220 goto drop;
1221 sfclose(sp);
1222 sp = 0;
1223 s = sfstruse(meth->cx->buf);
1224 for (;;)
1225 {
1226 switch (optstr(options, s))
1227 {
1228 case 'T':
1229 xml->test = opt_info.num;
1230 continue;
1231 case '?':
1232 if (disc->errorf)
1233 (*disc->errorf)(NiL, disc, ERROR_USAGE|4, "%s", opt_info.arg);
1234 goto drop;
1235 case ':':
1236 if (disc->errorf)
1237 (*disc->errorf)(NiL, disc, 2, "%s", opt_info.arg);
1238 goto drop;
1239 }
1240 break;
1241 }
1242 }
1243 if (schema && *schema)
1244 {
1245 if (!(sp = dssfind(schema, NiL, DSS_VERBOSE, path, sizeof(path), disc)))
1246 return 0;
1247 if (!(tag = tagopen(sp, path, 1, &tags[0], &xml->dsstagdisc.tagdisc)) || tagclose(tag))
1248 goto drop;
1249 sfclose(sp);
1250 sp = 0;
1251 }
1252 dtinsert(meth->formats, &xml_format);
1253 dtinsert(meth->formats, &json_format);
1254 for (p = xml->libraries; p; p = p->next)
1255 if (!dssload(p->name, disc))
1256 return 0;
1257 return meth;
1258 drop:
1259 free(xml);
1260 if (sp)
1261 sfclose(sp);
1262 return 0;
1263 }
1264
1265 /*
1266 * openf
1267 */
1268
1269 static int
xmlopen(Dss_t * dss,Dssdisc_t * disc)1270 xmlopen(Dss_t* dss, Dssdisc_t* disc)
1271 {
1272 Xml_t* xml = (Xml_t*)dss->meth->data;
1273 Field_t* f;
1274 Field_t* g;
1275 Cxvariable_t* v;
1276 int i;
1277
1278 if (xml)
1279 {
1280 dss->cx->ctype['.'] |= CX_CTYPE_ALPHA;
1281 for (i = 0; i < elementsof(local_callouts); i++)
1282 if (cxaddcallout(dss->cx, &local_callouts[i], disc))
1283 return -1;
1284
1285 xml_beg_tag[0] = 1;
1286 xml_beg_tag['<'] = 1;
1287
1288 xml_end_tag[0] = 1;
1289 xml_end_tag['>'] = 1;
1290 xml_end_tag[' '] = 1;
1291
1292 xml_end_att[0] = 1;
1293 xml_end_att['"'] = 1;
1294 xml_end_att['>'] = 1;
1295
1296 json_beg_tag[0] = 2;
1297 json_beg_tag[','] = 2;
1298 json_beg_tag['{'] = 2;
1299 json_beg_tag['}'] = 2;
1300
1301 json_end_val[0] = 2;
1302 json_end_val['\\'] = 2;
1303 json_end_val['"'] = 2;
1304 json_end_val[':'] = 2;
1305 json_end_val[','] = 2;
1306 json_end_val['{'] = 2;
1307 json_end_val['}'] = 2;
1308 json_end_val['['] = 2;
1309 json_end_val[']'] = 2;
1310 json_end_val[' '] = 1;
1311 json_end_val['\n'] = 1;
1312 json_end_val['\r'] = 1;
1313 json_end_val['\t'] = 1;
1314 json_end_val['\v'] = 1;
1315
1316 xml->prefix = -1;
1317 for (f = xml->fields; f; f = g)
1318 {
1319 g = f->next;
1320 if (!(v = xmlvar(dss->cx, f->name, f->type, disc)))
1321 return -1;
1322 v->format = f->format;
1323 free(f->name);
1324 free(f->type);
1325 free(f);
1326 }
1327 }
1328 return 0;
1329 }
1330
1331 static Dssmeth_t method =
1332 {
1333 "xml",
1334 "xml and json method",
1335 CXH,
1336 xmlmeth,
1337 xmlopen,
1338 0,
1339 0
1340 };
1341
1342 Dsslib_t dss_lib_xml =
1343 {
1344 "xml",
1345 "xml method"
1346 "[-1ls5Pp0?\n@(#)$Id: dss xml method (AT&T Research) 2010-04-22 $\n]"
1347 USAGE_LICENSE,
1348 CXH,
1349 0,
1350 &method,
1351 };
1352