1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 2010-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 /*
22  * xml method
23  *
24  * Glenn Fowler
25  * AT&T Research
26  */
27 
28 static const char usage[] =
29 "[+DESCRIPTION?The \bdss\b xml method reads XML data in two formats: "
30     "pure XML (\aname=value\a attributes within tags ignored) and JSON. In "
31     "general XML data provides field names but not type information, so by "
32     "default all fields are treated as strings. Only fields specified in the "
33     "\bdss\b(1) \aexpression\a are parsed from the data. Fields are named "
34     "using \b.\b'd notation, where each prefix name represents XML tag "
35     "nesting. For XML data, if all records have the same XML tag prefix then "
36     "that prefix may be omitted, except that all field names must have at "
37     "least one prefix component. For example, \busers.user.name\b and "
38     "\buser.name\b are valid, but \bname\b is not.]"
39 "[+?The xml method schema is an XML document that specifies the type of "
40     "one or more fields, and any libraries required to support those types.]"
41 "[T:test?Enable implementation-specific tests and tracing.]#[mask]"
42 "[+TAGS?The supported tags are:]{"
43 ;
44 
45 #include <dsslib.h>
46 #include <ctype.h>
47 
48 struct Field_s; typedef struct Field_s Field_t;
49 struct File_s; typedef struct File_s File_t;
50 struct Value_s; typedef struct Value_s Value_t;
51 struct Library_s; typedef struct Library_s Library_t;
52 struct Xml_s; typedef struct Xml_s Xml_t;
53 
54 struct Library_s			/* library list			*/
55 {
56 	Library_t*	next;		/* next in list			*/
57 	char		name[1];	/* library name			*/
58 };
59 
60 struct Value_s				/* value in current record	*/
61 {
62 	uintmax_t	record;		/* record number for value	*/
63 	size_t		offset;		/* File_t.value offset		*/
64 	size_t		size;		/* string value size		*/
65 	int		number;		/* value is a number		*/
66 	Cxinternal_f	internalf;	/* convert to internal value	*/
67 };
68 
69 struct File_s				/* file read state		*/
70 {
71 	uintmax_t	record;		/* current record number	*/
72 	unsigned char*	buf;		/* input buffer 		*/
73 	unsigned char*	rec;		/* input record position	*/
74 	unsigned char*	cur;		/* input buffer position	*/
75 	unsigned char*	end;		/* input buffer end		*/
76 	char*		name;		/* current .'d name		*/
77 	char*		root;		/* root path			*/
78 	char*		value;		/* current record tag values	*/
79 	int		image;		/* keep current record image	*/
80 	int		level;		/* part[] index			*/
81 	int		maxlevel;	/* max part[] index		*/
82 	int		maxname;	/* max .'d name length		*/
83 	int		prefix;		/* implied .'d prefix		*/
84 	int		save;		/* real char at *f->end		*/
85 	size_t		maxvalue;	/* size of value		*/
86 	unsigned char*	prv;		/* previous buffer chunk	*/
87 	size_t		prvsize;	/* max previous buffer size	*/
88 	size_t		prvlen;		/* current previous buffer size	*/
89 	char*		part[1];	/* .'d part stack		*/
90 };
91 
92 struct Field_s				/* current proto schema field	*/
93 {
94 	Field_t*	next;		/* next in list			*/
95 	char*		name;		/* qualified field name		*/
96 	char*		type;		/* field type name		*/
97 	Cxformat_t	format;		/* field output format		*/
98 };
99 
100 struct Xml_s				/* Dssmeth_t.data		*/
101 {
102 	Dsstagdisc_t	dsstagdisc;
103 	Dssmeth_t	meth;
104 	Dssmeth_t*	basemeth;
105 	Library_t*	libraries;
106 	Library_t*	lastlibrary;
107 	Field_t*	fields;
108 	Field_t*	lastfield;
109 	Cxflags_t	test;
110 	char*		root;
111 	int		image;
112 	int		maxname;
113 	int		maxlevel;
114 	int		prefix;
115 };
116 
117 static const char	null[1];
118 
119 static char		xml_beg_tag[UCHAR_MAX+1];
120 static char		xml_end_tag[UCHAR_MAX+1];
121 static char		xml_end_att[UCHAR_MAX+1];
122 
123 static char		json_beg_tag[UCHAR_MAX+1];
124 static char		json_end_val[UCHAR_MAX+1];
125 
126 extern Dsslib_t		dss_lib_xml;
127 
128 /*
129  * xml var create/lookup
130  * type==0 for prefix components
131  */
132 
133 static Cxvariable_t*
xmlvar(Cx_t * cx,char * name,const char * type,Cxdisc_t * disc)134 xmlvar(Cx_t* cx, char* name, const char* type, Cxdisc_t* disc)
135 {
136 	Xml_t*		xml = (Xml_t*)DSS(cx)->meth->data;
137 	Cxvariable_t*	var;
138 	Value_t*	val;
139 	char*		s;
140 	int		n;
141 	int		i;
142 
143 	if (*name == '.')
144 		var = cxvariable(cx, name, NiL, disc);
145 	else if (!(var = dtmatch(cx->variables, name)))
146 	{
147 		n = strlen(name) + 1;
148 		if (!(var = vmnewof(cx->vm, 0, Cxvariable_t, 1, sizeof(Value_t) + n)))
149 		{
150 			if (disc->errorf)
151 				(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
152 			return 0;
153 		}
154 		var->data = val = (Value_t*)(var + 1);
155 		strcpy((char*)(var->name = (const char*)(var + 1) + sizeof(Value_t)), name);
156 		var->type = (Cxtype_t*)(type ? type : "number");
157 		if (cxaddvariable(cx, var, disc))
158 			return 0;
159 		if ((val->number = cxisnumber(var->type)) && !(val->internalf = var->type->internalf) && var->type->base)
160 			val->internalf = var->type->base->internalf;
161 		if (type)
162 		{
163 			if (xml->maxname < ++n)
164 				xml->maxname = n;
165 			n = 0;
166 			for (s = name; *s; s++)
167 				if (*s == '.')
168 				{
169 					*s = 0;
170 					i = !xmlvar(cx, name, NiL, disc);
171 					*s = '.';
172 					if (i)
173 						return 0;
174 					n++;
175 				}
176 			if (xml->maxlevel < n)
177 				xml->maxlevel = n;
178 			if (n && !xml->root && (s = strchr(var->name, '.')))
179 			{
180 				if (xml->root = vmnewof(cx->vm, 0, char, s - var->name, 1))
181 					memcpy(xml->root, var->name, s - var->name);
182 				else
183 				{
184 					if (disc->errorf)
185 						(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
186 					return 0;
187 				}
188 			}
189 		}
190 	}
191 	return var;
192 }
193 
194 /*
195  * xml identf
196  */
197 
198 static int
xmlident(Dssfile_t * file,void * buf,size_t n,Dssdisc_t * disc)199 xmlident(Dssfile_t* file, void* buf, size_t n, Dssdisc_t* disc)
200 {
201 	static const char	magic[] = "<?xml";
202 
203 	return (n > (sizeof(magic) - 1) && !memcmp(buf, magic, sizeof(magic) - 1));
204 }
205 
206 /*
207  * refill the input buffer and return the next char, -1 on error
208  */
209 
210 static int
refill(Dssfile_t * file,register File_t * f,int c,Dssdisc_t * disc)211 refill(Dssfile_t* file, register File_t* f, int c, Dssdisc_t* disc)
212 {
213 	size_t	n;
214 
215 	if (f->cur >= f->end)
216 	{
217 		if (f->rec)
218 		{
219 			if ((n = f->cur - f->rec + f->prvlen) > f->prvsize)
220 			{
221 				f->prvsize = roundof(f->prvsize + n, 1024);
222 				if (!(f->prv = vmnewof(file->vm, f->prv, unsigned char, f->prvsize, 0)))
223 				{
224 					if (disc->errorf)
225 						(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
226 					return -1;
227 				}
228 			}
229 			if (n > 1)
230 				memcpy(f->prv + f->prvlen, f->rec, n - 1);
231 			f->prv[n-1] = f->save;
232 			f->prvlen += n;
233 		}
234 		if (!(f->buf = (unsigned char*)sfreserve(file->io, SF_UNBOUND, 0)))
235 			return -1;
236 		if (f->rec)
237 			f->rec = f->buf;
238 		f->cur = f->buf;
239 		c = f->save;
240 		f->end = f->buf + sfvalue(file->io) - 1;
241 		f->save = *f->end;
242 		*f->end = 0;
243 	}
244 	return c;
245 }
246 
247 #define REFILL(f,c,r)	do { if ((c = refill(file, f, c, disc)) < 0) r; } while (0)
248 
249 #define RESIZE() \
250 	do \
251 	{ \
252 		o = vp - f->value; \
253 		f->maxvalue += 1024; \
254 		if (!(f->value = vmnewof(file->vm, f->value, char, f->maxvalue, 0))) \
255 		{ \
256 			if (disc->errorf) \
257 				(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space"); \
258 			return -1; \
259 		} \
260 		vb = f->value; \
261 		vp = vb + o; \
262 		ve = vb + f->maxvalue - 1; \
263 	} while (0)
264 
265 /*
266  * xml readf -- consume 1 xml record and retain field values of interest
267  */
268 
269 static int
xmlread(Dssfile_t * file,Dssrecord_t * record,Dssdisc_t * disc)270 xmlread(Dssfile_t* file, Dssrecord_t* record, Dssdisc_t* disc)
271 {
272 	register File_t*	f = file->data;
273 	register char*		np;
274 	register char*		ne;
275 	register char*		vp;
276 	register char*		ve;
277 	register int		c;
278 	char*			vb;
279 	Cxvariable_t*		v;
280 	ssize_t			o;
281 	int			q;
282 
283 	f->record++;
284 	f->rec = 0;
285 	f->prvlen = 0;
286 	vp = vb = f->value;
287 	ve = f->value + f->maxvalue - 1;
288 	ne = f->name + f->maxname;
289 	np = f->level >= f->maxlevel ? ne : f->level ? f->part[f->level] : f->name;
290 	for (;;)
291 	{
292 		/* find the next tag */
293 
294 		for (;;)
295 		{
296 			while (!xml_beg_tag[*f->cur++]);
297 			if (*(f->cur - 1))
298 				break;
299 			REFILL(f, c, goto done);
300 			if (c == '<')
301 				break;
302 		}
303 	tag:
304 		if (!(c = *f->cur++))
305 			REFILL(f, c, -1);
306 		switch (c)
307 		{
308 		case '/':
309 			if (f->level)
310 				f->level--;
311 			if (f->level <= f->maxlevel)
312 				np = f->part[f->level];
313 			/*FALLTHROUGH*/
314 		case '?':
315 			for (;;)
316 			{
317 				while (!xml_end_tag[*f->cur++]);
318 				if (*(f->cur - 1))
319 					break;
320 				REFILL(f, o, goto incomplete);
321 				if (o == '>')
322 					break;
323 			}
324 			if (c == '/' && f->level == f->prefix)
325 			{
326 				record->data = f;
327 				file->count = f->record;
328 				return 1;
329 			}
330 			break;
331 		default:
332 			if (np <= ne && f->level > f->prefix)
333 			{
334 				*np = 0;
335 				if (v = dtmatch(file->dss->cx->variables, f->part[f->prefix] + 1))
336 				{
337 					if (f->image && !f->rec)
338 						f->rec = f->cur - 1;
339 					((Value_t*)v->data)->record = f->record;
340 					((Value_t*)v->data)->offset = vp - vb;
341 					((Value_t*)v->data)->size = 1;
342 					if (vp >= ve)
343 						RESIZE();
344 					*vp++ = '1';
345 					if (vp >= ve)
346 						RESIZE();
347 					*vp++ = 0;
348 				}
349 			}
350 			if (f->level <= f->maxlevel)
351 				f->part[f->level] = np;
352 			else
353 				np = ne + 1;
354 			f->level++;
355 			if (np < ne)
356 				*np++ = '.';
357 			if (np < ne)
358 				*np++ = c;
359 			q = 0;
360 			for (;;)
361 			{
362 				while (!xml_end_tag[c = *f->cur++])
363 					if (np < ne)
364 						*np++ = c;
365 					else
366 						q = c;
367 				if (c)
368 					break;
369 				REFILL(f, c, goto incomplete);
370 				if (c == '>')
371 					break;
372 				if (np < ne)
373 					*np++ = c;
374 				else
375 					q = c;
376 			}
377 			if (!q && *(np - 1) == '/' || q == '/')
378 			{
379 				/* null tag */
380 
381 				if (f->level)
382 					f->level--;
383 				if (f->level <= f->maxlevel)
384 					np = f->part[f->level];
385 			}
386 			else
387 			{
388 				/* ignore tag name=value attributes -- why did they allow them */
389 
390 				if (c == ' ')
391 				{
392 					q = 0;
393 					for (;;)
394 					{
395 						while (!xml_end_att[c = *f->cur++]);
396 						if (!c)
397 							REFILL(f, c, goto incomplete);
398 						if (c == '"')
399 							q = !q;
400 						else if (!q && c == '>')
401 							break;
402 					}
403 				}
404 				if (np < ne && f->level > f->prefix)
405 				{
406 					*np = 0;
407 					if (v = dtmatch(file->dss->cx->variables, f->part[f->prefix] + 1))
408 					{
409 						if (f->image && !f->rec)
410 							f->rec = f->cur - 1;
411 						((Value_t*)v->data)->record = f->record;
412 						((Value_t*)v->data)->offset = vp - vb;
413 						for (;;)
414 						{
415 							while (!xml_beg_tag[c = *f->cur++])
416 							{
417 								if (vp >= ve)
418 									RESIZE();
419 								*vp++ = c;
420 							}
421 							if (*(f->cur - 1))
422 								break;
423 							REFILL(f, c, goto incomplete);
424 							if (c == '<')
425 								break;
426 							if (vp >= ve)
427 								RESIZE();
428 							*vp++ = c;
429 						}
430 						((Value_t*)v->data)->size = vp - (vb + ((Value_t*)v->data)->offset);
431 						if (vp >= ve)
432 							RESIZE();
433 						*vp++ = 0;
434 						goto tag;
435 					}
436 				}
437 			}
438 			break;
439 		}
440 	}
441  done:
442 	if (f->level <= f->prefix)
443 		return 0;
444  incomplete:
445 	if (disc->errorf)
446 		(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: record %I*u incomplete", file->path, sizeof(f->record), f->record);
447 	return -1;
448 }
449 
450 /*
451  * xml writef -- output current record
452  */
453 
454 static int
xmlwrite(Dssfile_t * file,Dssrecord_t * record,Dssdisc_t * disc)455 xmlwrite(Dssfile_t* file, Dssrecord_t* record, Dssdisc_t* disc)
456 {
457 	register File_t*	r = (File_t*)record->data;
458 	size_t			n;
459 
460 	sfprintf(file->io, "<%s", r->root);
461 	if (r->prvlen && sfwrite(file->io, r->prv, r->prvlen) != r->prvlen)
462 	{
463 		if (disc->errorf)
464 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: write error", file->path);
465 		return -1;
466 	}
467 	if ((n = r->cur - r->rec) && sfwrite(file->io, r->rec, n) != n)
468 	{
469 		if (disc->errorf)
470 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: write error", file->path);
471 		return -1;
472 	}
473 	sfputc(file->io, '\n');
474 	return 0;
475 }
476 
477 /*
478  * xml fopenf
479  */
480 
481 static int
xmlfopen(Dssfile_t * file,Dssdisc_t * disc)482 xmlfopen(Dssfile_t* file, Dssdisc_t* disc)
483 {
484 	register Xml_t*		xml = (Xml_t*)file->dss->meth->data;
485 	register unsigned char*	s;
486 	register unsigned char*	t;
487 	register int		n;
488 	File_t*			f;
489 	int			c;
490 	int			m;
491 	int			x;
492 	unsigned char*		buf;
493 	unsigned char*		end;
494 
495 	if (file->flags & DSS_FILE_WRITE)
496 		buf = 0;
497 	else if (buf = (unsigned char*)sfreserve(file->io, SF_UNBOUND, 0))
498 	{
499 		end = buf + sfvalue(file->io) - 1;
500 		if (xml->prefix < 0)
501 		{
502 			xml->image = !!(file->dss->flags & DSS_WRITE);
503 			if (file->format->readf == xmlread)
504 			{
505 				if (!xml->root)
506 				{
507 					if (disc->errorf)
508 						(*disc->errorf)(NiL, disc, 2, "%s variable names must be qualified by at least the immediate containing tag", file->format->name);
509 					return -1;
510 				}
511 				x = *end;
512 				*end = 0;
513 				s = buf;
514 				n = 0;
515 				m = -1;
516 				for (;;)
517 				{
518 					while (!xml_beg_tag[*s++]);
519 					if (*(s - 1))
520 					{
521 						t = s;
522 						while (!xml_end_tag[*s++]);
523 						if (*t == '/')
524 						{
525 							if (m > 0)
526 							{
527 								m--;
528 								n -= s - t - 2;
529 							}
530 						}
531 						else if (*t != '?')
532 						{
533 							m++;
534 							n += (c = s - t - 1);
535 							if (!memcmp(xml->root, t, c) && !*(xml->root + c))
536 								break;
537 						}
538 					}
539 					else if (s >= end)
540 						break;
541 				}
542 				*end = x;
543 				xml->prefix = m;
544 				xml->maxlevel += m;
545 				xml->maxname += n;
546 			}
547 			else
548 			{
549 				xml->prefix = 0;
550 				xml->maxname += 1024; /*XXX*/
551 			}
552 		}
553 	}
554 	if (!(f = vmnewof(file->vm, 0, File_t, 1, (xml->maxlevel + 1) * sizeof(char*) + xml->maxname + 1)))
555 	{
556 		if (disc->errorf)
557 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
558 		return -1;
559 	}
560 	file->data = f;
561 	f->name = (char*)(f + 1) + (xml->maxlevel + 1) * sizeof(char*);
562 	if (!(file->flags & DSS_FILE_WRITE))
563 	{
564 		if (buf)
565 		{
566 			f->cur = f->buf = buf;
567 			f->end = end;
568 			f->save = *end;
569 			*end = 0;
570 		}
571 		else
572 			f->buf = f->cur = f->end = (unsigned char*)null;
573 		f->image = xml->image;
574 		f->prefix = xml->prefix;
575 		f->maxlevel = xml->maxlevel;
576 		f->maxname = xml->maxname;
577 		f->maxvalue = 1024;
578 		f->root = xml->root;
579 		if (!(f->value = vmnewof(file->vm, 0, char, f->maxvalue, 0)))
580 		{
581 			vmfree(file->vm, f);
582 			if (disc->errorf)
583 				(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
584 			return -1;
585 		}
586 	}
587 	return 0;
588 }
589 
590 /*
591  * xml fclosef
592  */
593 
594 static int
xmlfclose(Dssfile_t * file,Dssdisc_t * disc)595 xmlfclose(Dssfile_t* file, Dssdisc_t* disc)
596 {
597 	if (!file || !file->data)
598 		return -1;
599 	return 0;
600 }
601 
602 static Dssformat_t xml_format =
603 {
604 	"xml",
605 	"xml format (2010-05-19)",
606 	CXH,
607 	xmlident,
608 	xmlfopen,
609 	xmlread,
610 	xmlwrite,
611 	0,
612 	xmlfclose,
613 	0,
614 	0,
615 	0
616 };
617 
618 /*
619  * json identf
620  */
621 
622 static int
jsonident(Dssfile_t * file,void * buf,size_t n,Dssdisc_t * disc)623 jsonident(Dssfile_t* file, void* buf, size_t n, Dssdisc_t* disc)
624 {
625 	register char*		s;
626 	register char*		e;
627 	register const char*	m;
628 
629 	static const char	magic[] = "{\"";
630 
631 	s = buf;
632 	e = s + n;
633 	for (m = magic; s < e; s++)
634 		if (isspace(*s))
635 			;
636 		else if (*s != *m)
637 			return 0;
638 		else if (!*++m)
639 			return 1;
640 	return 0;
641 }
642 
643 /*
644  * json readf
645  */
646 
647 static int
jsonread(register Dssfile_t * file,register Dssrecord_t * record,Dssdisc_t * disc)648 jsonread(register Dssfile_t* file, register Dssrecord_t* record, Dssdisc_t* disc)
649 {
650 	register File_t*	f = file->data;
651 	register char*		np;
652 	register char*		ne;
653 	register char*		vp;
654 	register char*		ve;
655 	register int		c;
656 	char*			vb;
657 	Cxvariable_t*		v;
658 	size_t			o;
659 	int			a;
660 	int			e;
661 	int			q;
662 	int			n;
663 
664 	f->record++;
665 	error(-1, "AHA jsonread file=%p record.file=%p", file, record->file);
666 	f->rec = 0;
667 	f->prvlen = 0;
668 	vp = vb = f->value;
669 	ve = f->value + f->maxvalue - 1;
670 	ne = f->name + f->maxname;
671 	np = f->level > f->maxlevel ? ne : f->level ? f->part[f->level] : f->name;
672 	for (;;)
673 	{
674 	beg:
675 		do
676 		{
677 			while (!json_beg_tag[c = *f->cur++]);
678 			if (!c)
679 				REFILL(f, c, goto done);
680 			if (c == '}')
681 			{
682 				if (!f->level)
683 				{
684 					record->data = f;
685 					file->count = f->record;
686 					return 1;
687 				}
688 				if (--f->level <= f->maxlevel)
689 					np = f->part[f->level];
690 			}
691 		} while (c != '{' && c != ',');
692 		if (f->image && !f->rec)
693 			f->rec = f->cur - 1;
694 	tag:
695 		do
696 		{
697 			while (json_end_val[c = *f->cur++] == 1);
698 			if (!c)
699 				REFILL(f, c, goto incomplete);
700 		} while (json_end_val[c] == 1);
701 		if (f->level <= f->maxlevel)
702 			f->part[f->level] = np;
703 		else
704 			np = ne + 1;
705 		f->level++;
706 		if (np < ne)
707 			*np++ = '.';
708 		if (!(q = c == '"') && np < ne)
709 			*np++ = c;
710 		for (;;)
711 		{
712 			while (!json_end_val[c = *f->cur++])
713 				if (np < ne)
714 					*np++ = c;
715 			if (!c)
716 				REFILL(f, c, goto incomplete);
717 			if (c == '"')
718 			{
719 				q = !q;
720 				continue;
721 			}
722 			else if (c == '\\')
723 			{
724 				if (!(c = *f->cur++))
725 					REFILL(f, c, goto incomplete);
726 			}
727 			else if (!q)
728 			{
729 				if (c == '}')
730 				{
731 					if (!f->level)
732 					{
733 						record->data = f;
734 						file->count = f->record;
735 						return 1;
736 					}
737 					if (--f->level <= f->maxlevel)
738 						np = f->part[f->level];
739 					break;
740 				}
741 				else if (c == ':')
742 				{
743 					do
744 					{
745 						while (json_end_val[c = *f->cur++] == 1);
746 						if (!c)
747 							REFILL(f, c, goto incomplete);
748 					} while (json_end_val[c] == 1);
749 					if (c == '{')
750 					{
751 						if (np <= ne)
752 						{
753 							*np = 0;
754 							if (v = dtmatch(file->dss->cx->variables, f->part[f->prefix] + 1))
755 							{
756 								((Value_t*)v->data)->record = f->record;
757 								((Value_t*)v->data)->offset = vp - vb;
758 								if (vp >= ve)
759 									RESIZE();
760 								*vp++ = '1';
761 								((Value_t*)v->data)->size = 1;
762 								if (vp >= ve)
763 									RESIZE();
764 								*vp++ = 0;
765 							}
766 						}
767 						goto tag;
768 					}
769 					a = 0;
770 					q = 0;
771 					if (np < ne && f->level > f->prefix)
772 					{
773 						*np = 0;
774 						if (v = dtmatch(file->dss->cx->variables, f->part[f->prefix] + 1))
775 						{
776 							((Value_t*)v->data)->record = f->record;
777 							((Value_t*)v->data)->offset = vp - vb;
778 							e = c == 'n';
779 							for (;;)
780 							{
781 								if (c == '"')
782 								{
783 									q = !q;
784 									goto ignore;
785 								}
786 								else if (c == '\\')
787 								{
788 									if (!(c = *f->cur++))
789 										REFILL(f, c, goto incomplete);
790 									if (c != '\\' && c != '"' && c != ',' && c != '}')
791 									{
792 										if (vp >= ve)
793 											RESIZE();
794 										*vp++ = '\\';
795 									}
796 								}
797 								else if (!q)
798 								{
799 									if (c == '[')
800 									{
801 										a++;
802 										goto ignore;
803 									}
804 									else if (c == ']')
805 									{
806 										if (a)
807 											a--;
808 										goto ignore;
809 									}
810 									else if (json_end_val[c] == 1)
811 										goto ignore;
812 									else if (a)
813 										/*array*/;
814 									else if (c == '}')
815 									{
816 										if (!f->level)
817 										{
818 											record->data = f;
819 											file->count = f->record;
820 											return 1;
821 										}
822 										if (--f->level <= f->maxlevel)
823 											np = f->part[f->level];
824 										break;
825 									}
826 									else if (c == ',')
827 										break;
828 								}
829 								if (vp >= ve)
830 									RESIZE();
831 								*vp++ = c;
832 							ignore:
833 								while (!json_end_val[c = *f->cur++])
834 								{
835 									if (vp >= ve)
836 										RESIZE();
837 									*vp++ = c;
838 								}
839 								if (!c)
840 									REFILL(f, c, goto incomplete);
841 							}
842 							if (e)
843 								vp = vb + ((Value_t*)v->data)->offset;
844 							((Value_t*)v->data)->size = vp - (vb + ((Value_t*)v->data)->offset);
845 							*vp++ = 0;
846 							if (!f->level)
847 							{
848 								record->data = f;
849 								file->count = f->record;
850 								return 1;
851 							}
852 							if (--f->level <= f->maxlevel)
853 								np = f->part[f->level];
854 							if (c == ',')
855 								goto tag;
856 							goto beg;
857 						}
858 					}
859 					n = 1;
860 					for (;;)
861 					{
862 						if (c == '"')
863 							q = !q;
864 						else if (c == '\\')
865 						{
866 							if (!(c = *f->cur++))
867 								REFILL(f, c, goto incomplete);
868 						}
869 						else if (!q)
870 						{
871 							if (c == '[')
872 								a++;
873 							else if (c == ']')
874 							{
875 								if (a)
876 									a--;
877 							}
878 							else if (a)
879 								/*array*/;
880 							else if (c == '{')
881 								n++;
882 							else if (c == '}' && !--n)
883 							{
884 								if (!f->level)
885 								{
886 									record->data = f;
887 									file->count = f->record;
888 									return 1;
889 								}
890 								if (--f->level <= f->maxlevel)
891 									np = f->part[f->level];
892 								break;
893 							}
894 							else if (c == ',' && n == 1)
895 								break;
896 						}
897 						while (!json_end_val[c = *f->cur++]);
898 						if (!c)
899 							REFILL(f, c, goto done);
900 					}
901 					if (!f->level)
902 					{
903 						record->data = f;
904 						file->count = f->record;
905 						return 1;
906 					}
907 					if (--f->level <= f->maxlevel)
908 						np = f->part[f->level];
909 					if (c == ',')
910 						goto tag;
911 					goto beg;
912 				}
913 				else if (json_end_val[c] == 1)
914 					continue;
915 			}
916 			if (np < ne)
917 				*np++ = c;
918 		}
919 	}
920  done:
921 	if (!f->level)
922 		return 0;
923  incomplete:
924 	if (disc->errorf)
925 		(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: record %I*u incomplete", file->path, sizeof(f->record), f->record);
926 	return -1;
927 }
928 
929 /*
930  * xml writef -- output current record
931  */
932 
933 static int
jsonwrite(Dssfile_t * file,Dssrecord_t * record,Dssdisc_t * disc)934 jsonwrite(Dssfile_t* file, Dssrecord_t* record, Dssdisc_t* disc)
935 {
936 	register File_t*	r = (File_t*)record->data;
937 	size_t			n;
938 
939 	if (r->prvlen && sfwrite(file->io, r->prv, r->prvlen) != r->prvlen)
940 	{
941 		if (disc->errorf)
942 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: write error", file->path);
943 		return -1;
944 	}
945 	if ((n = r->cur - r->rec) && sfwrite(file->io, r->rec, n) != n)
946 	{
947 		if (disc->errorf)
948 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "%s: write error", file->path);
949 		return -1;
950 	}
951 	sfputc(file->io, '\n');
952 	return 0;
953 }
954 
955 static Dssformat_t json_format =
956 {
957 	"json",
958 	"json format (2010-05-19)",
959 	CXH,
960 	jsonident,
961 	xmlfopen,
962 	jsonread,
963 	jsonwrite,
964 	0,
965 	xmlfclose,
966 	0,
967 	0,
968 	0
969 };
970 
971 static int
op_get(Cx_t * cx,Cxinstruction_t * pc,Cxoperand_t * r,Cxoperand_t * a,Cxoperand_t * b,void * data,Cxdisc_t * disc)972 op_get(Cx_t* cx, Cxinstruction_t* pc, Cxoperand_t* r, Cxoperand_t* a, Cxoperand_t* b, void* data, Cxdisc_t* disc)
973 {
974 	File_t*		f = (File_t*)DSSDATA(data);
975 	Value_t*	v = (Value_t*)pc->data.variable->data;
976 	char*		s;
977 
978 	if (v)
979 	{
980 		if (v->record == f->record)
981 			s = f->value + v->offset;
982 		else
983 		{
984 			s = (char*)null;
985 			v->size = 0;
986 		}
987 		if (!v->internalf)
988 		{
989 			r->value.string.data = s;
990 			r->value.string.size = v->size;
991 		}
992 		else if ((*v->internalf)(cx, pc->data.variable->type, NiL, &pc->data.variable->format, r, s, v->size, cx->rm, disc) < 0)
993 			return -1;
994 	}
995 	return 0;
996 }
997 
998 static int
op_ref(Cx_t * cx,Cxinstruction_t * pc,Cxoperand_t * r,Cxoperand_t * a,Cxoperand_t * b,void * data,Cxdisc_t * disc)999 op_ref(Cx_t* cx, Cxinstruction_t* pc, Cxoperand_t* r, Cxoperand_t* a, Cxoperand_t* b, void* data, Cxdisc_t* disc)
1000 {
1001 	return (r->value.variable = xmlvar(cx, b->value.string.data, "string", disc)) ? 0 : -1;
1002 }
1003 
1004 static Cxcallout_t local_callouts[] =
1005 {
1006 CXC(CX_GET, "void", "void", op_get, 0)
1007 CXC(CX_REF, "string", "void", op_ref, 0)
1008 };
1009 
1010 static int
xml_field_name_dat(Tag_t * tag,Tagframe_t * fp,const char * data,Tagdisc_t * disc)1011 xml_field_name_dat(Tag_t* tag, Tagframe_t* fp, const char* data, Tagdisc_t* disc)
1012 {
1013 	register Xml_t*		xml = (Xml_t*)disc;
1014 
1015 	if (!(xml->lastfield->name = strdup(data)))
1016 	{
1017 		if (disc->errorf)
1018 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1019 		return -1;
1020 	}
1021 	return 0;
1022 }
1023 
1024 static int
xml_field_type_dat(Tag_t * tag,Tagframe_t * fp,const char * data,Tagdisc_t * disc)1025 xml_field_type_dat(Tag_t* tag, Tagframe_t* fp, const char* data, Tagdisc_t* disc)
1026 {
1027 	register Xml_t*		xml = (Xml_t*)disc;
1028 	char*			s;
1029 
1030 	memset(&xml->lastfield->format, 0, sizeof(xml->lastfield->format));
1031 	(void)cxattr(NiL, data, &s, &xml->lastfield->format, NiL);
1032 	if (!*s)
1033 		s = "number";
1034 	if (!(xml->lastfield->type = strdup(s)))
1035 	{
1036 		if (disc->errorf)
1037 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1038 		return -1;
1039 	}
1040 	if (xml->lastfield->format.flags & CX_FLOAT)
1041 		xml->lastfield->format.flags &= ~(CX_STRING|CX_BUFFER|CX_UNSIGNED|CX_INTEGER);
1042 	else if (xml->lastfield->format.flags & CX_UNSIGNED)
1043 	{
1044 		xml->lastfield->format.flags &= ~(CX_STRING|CX_BUFFER);
1045 		xml->lastfield->format.flags |= CX_UNSIGNED|CX_INTEGER;
1046 	}
1047 	else if (!(xml->lastfield->format.flags & (CX_STRING|CX_BUFFER|CX_INTEGER)))
1048 	{
1049 		if (streq(s, "string"))
1050 			xml->lastfield->format.flags |= CX_STRING;
1051 		else if (streq(s, "buffer"))
1052 			xml->lastfield->format.flags |= CX_BUFFER;
1053 	}
1054 	return 0;
1055 }
1056 
1057 static Tags_t	tags_xml_field[] =
1058 {
1059 	"NAME",		"Field name.",
1060 			0,0,xml_field_name_dat,0,
1061 	"TYPE",		"Field type. The intrinsic types are number and"
1062 			" string. Other types are defined in optional"
1063 			" method and schema libraries.",
1064 			0,0,xml_field_type_dat,0,
1065 	0
1066 };
1067 
1068 static int
xml_name_dat(Tag_t * tag,Tagframe_t * fp,const char * data,Tagdisc_t * disc)1069 xml_name_dat(Tag_t* tag, Tagframe_t* fp, const char* data, Tagdisc_t* disc)
1070 {
1071 	register Xml_t*	xml = (Xml_t*)disc;
1072 
1073 	if (!(xml->meth.name = strdup(data)))
1074 	{
1075 		if (disc->errorf)
1076 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1077 		return -1;
1078 	}
1079 	return 0;
1080 }
1081 
1082 static int
xml_description_dat(Tag_t * tag,Tagframe_t * fp,const char * data,Tagdisc_t * disc)1083 xml_description_dat(Tag_t* tag, Tagframe_t* fp, const char* data, Tagdisc_t* disc)
1084 {
1085 	register Xml_t*	xml = (Xml_t*)disc;
1086 
1087 	if (!(xml->meth.description = strdup(data)))
1088 	{
1089 		if (disc->errorf)
1090 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1091 		return -1;
1092 	}
1093 	return 0;
1094 }
1095 
1096 static int
xml_library_dat(Tag_t * tag,Tagframe_t * fp,const char * data,Tagdisc_t * disc)1097 xml_library_dat(Tag_t* tag, Tagframe_t* fp, const char* data, Tagdisc_t* disc)
1098 {
1099 	register Xml_t*		xml = (Xml_t*)disc;
1100 	register Library_t*	p;
1101 
1102 	if (!(p = newof(0, Library_t, 1, strlen(data))))
1103 	{
1104 		if (disc->errorf)
1105 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1106 		return -1;
1107 	}
1108 	strcpy(p->name, data);
1109 	if (!xml->lastlibrary)
1110 		xml->libraries = xml->lastlibrary = p;
1111 	else
1112 		xml->lastlibrary = xml->lastlibrary->next = p;
1113 	return 0;
1114 }
1115 
1116 static Tags_t*
xml_field_beg(Tag_t * tag,Tagframe_t * fp,const char * name,Tagdisc_t * disc)1117 xml_field_beg(Tag_t* tag, Tagframe_t* fp, const char* name, Tagdisc_t* disc)
1118 {
1119 	register Xml_t*		xml = (Xml_t*)disc;
1120 	Field_t*		f;
1121 
1122 	if (name)
1123 	{
1124 		if (!(f = newof(0, Field_t, 1, 0)))
1125 		{
1126 			if (disc->errorf)
1127 				(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1128 			return 0;
1129 		}
1130 		if (!xml->lastfield)
1131 			xml->fields = f;
1132 		else
1133 			xml->lastfield->next = f;
1134 		xml->lastfield = f;
1135 	}
1136 	return &tags_xml_field[0];
1137 }
1138 
1139 static int
xml_field_end(Tag_t * tag,Tagframe_t * fp,Tagdisc_t * disc)1140 xml_field_end(Tag_t* tag, Tagframe_t* fp, Tagdisc_t* disc)
1141 {
1142 	register Xml_t*		xml = (Xml_t*)disc;
1143 
1144 	if (xml->lastfield && (!xml->lastfield->name || !xml->lastfield->type))
1145 	{
1146 		if (disc->errorf)
1147 			(*disc->errorf)(NiL, disc, 2, "field name and type must be specified");
1148 		return -1;
1149 	}
1150 	return 0;
1151 }
1152 
1153 static Tags_t	tags_xml[] =
1154 {
1155 	"NAME",		"Schema name.",
1156 			0,0,xml_name_dat,0,
1157 	"DESCRIPTION",	"Schema description.",
1158 			0,0,xml_description_dat,0,
1159 	"LIBRARY",	"Required type/map library name;"
1160 			" more than one library may be specified.",
1161 			0,0,xml_library_dat,0,
1162 	"FIELD",	"Field info.",
1163 			0,xml_field_beg,0,xml_field_end,
1164 	0
1165 };
1166 
1167 static Tags_t*
xml_beg(Tag_t * tag,Tagframe_t * fp,const char * name,Tagdisc_t * disc)1168 xml_beg(Tag_t* tag, Tagframe_t* fp, const char* name, Tagdisc_t* disc)
1169 {
1170 	return &tags_xml[0];
1171 }
1172 
1173 static Tags_t	tags[] =
1174 {
1175 	"METHOD",	"Method name; must be xml.",
1176 			0,0,0,0,
1177 	"XML",		"xml method schema.",
1178 			0,xml_beg,0,0,
1179 	0
1180 };
1181 
1182 /*
1183  * methf
1184  */
1185 
1186 static Dssmeth_t*
xmlmeth(const char * name,const char * options,const char * schema,Dssdisc_t * disc,Dssmeth_t * meth)1187 xmlmeth(const char* name, const char* options, const char* schema, Dssdisc_t* disc, Dssmeth_t* meth)
1188 {
1189 	register Xml_t*		xml;
1190 	Tag_t*			tag;
1191 	Sfio_t*			sp;
1192 	Library_t*		p;
1193 	char*			s;
1194 	char			path[PATH_MAX];
1195 
1196 	if (!(xml = newof(0, Xml_t, 1, 0)))
1197 	{
1198 		if (disc->errorf)
1199 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
1200 		return 0;
1201 	}
1202 	xml->basemeth = meth;
1203 	xml->meth = *meth;
1204 	meth = &xml->meth;
1205 	meth->data = xml;
1206 	taginit(&xml->dsstagdisc.tagdisc, disc->errorf);
1207 	xml->dsstagdisc.tagdisc.id = DSS_ID;
1208 	xml->dsstagdisc.disc = disc;
1209 	xml->dsstagdisc.meth = meth;
1210 	sp = 0;
1211 	if (options)
1212 	{
1213 		if (!(sp = sfstropen()))
1214 			goto drop;
1215 		sfprintf(sp, "%s", usage);
1216 		if (tagusage(tags, sp, &xml->dsstagdisc.tagdisc))
1217 			goto drop;
1218 		sfprintf(sp, "}\n");
1219 		if (dssoptlib(meth->cx->buf, &dss_lib_xml, sfstruse(sp), disc))
1220 			goto drop;
1221 		sfclose(sp);
1222 		sp = 0;
1223 		s = sfstruse(meth->cx->buf);
1224 		for (;;)
1225 		{
1226 			switch (optstr(options, s))
1227 			{
1228 			case 'T':
1229 				xml->test = opt_info.num;
1230 				continue;
1231 			case '?':
1232 				if (disc->errorf)
1233 					(*disc->errorf)(NiL, disc, ERROR_USAGE|4, "%s", opt_info.arg);
1234 				goto drop;
1235 			case ':':
1236 				if (disc->errorf)
1237 					(*disc->errorf)(NiL, disc, 2, "%s", opt_info.arg);
1238 				goto drop;
1239 			}
1240 			break;
1241 		}
1242 	}
1243 	if (schema && *schema)
1244 	{
1245 		if (!(sp = dssfind(schema, NiL, DSS_VERBOSE, path, sizeof(path), disc)))
1246 			return 0;
1247 		if (!(tag = tagopen(sp, path, 1, &tags[0], &xml->dsstagdisc.tagdisc)) || tagclose(tag))
1248 			goto drop;
1249 		sfclose(sp);
1250 		sp = 0;
1251 	}
1252 	dtinsert(meth->formats, &xml_format);
1253 	dtinsert(meth->formats, &json_format);
1254 	for (p = xml->libraries; p; p = p->next)
1255 		if (!dssload(p->name, disc))
1256 			return 0;
1257 	return meth;
1258  drop:
1259 	free(xml);
1260 	if (sp)
1261 		sfclose(sp);
1262 	return 0;
1263 }
1264 
1265 /*
1266  * openf
1267  */
1268 
1269 static int
xmlopen(Dss_t * dss,Dssdisc_t * disc)1270 xmlopen(Dss_t* dss, Dssdisc_t* disc)
1271 {
1272 	Xml_t*		xml = (Xml_t*)dss->meth->data;
1273 	Field_t*	f;
1274 	Field_t*	g;
1275 	Cxvariable_t*	v;
1276 	int		i;
1277 
1278 	if (xml)
1279 	{
1280 		dss->cx->ctype['.'] |= CX_CTYPE_ALPHA;
1281 		for (i = 0; i < elementsof(local_callouts); i++)
1282 			if (cxaddcallout(dss->cx, &local_callouts[i], disc))
1283 				return -1;
1284 
1285 		xml_beg_tag[0] = 1;
1286 		xml_beg_tag['<'] = 1;
1287 
1288 		xml_end_tag[0] = 1;
1289 		xml_end_tag['>'] = 1;
1290 		xml_end_tag[' '] = 1;
1291 
1292 		xml_end_att[0] = 1;
1293 		xml_end_att['"'] = 1;
1294 		xml_end_att['>'] = 1;
1295 
1296 		json_beg_tag[0] = 2;
1297 		json_beg_tag[','] = 2;
1298 		json_beg_tag['{'] = 2;
1299 		json_beg_tag['}'] = 2;
1300 
1301 		json_end_val[0] = 2;
1302 		json_end_val['\\'] = 2;
1303 		json_end_val['"'] = 2;
1304 		json_end_val[':'] = 2;
1305 		json_end_val[','] = 2;
1306 		json_end_val['{'] = 2;
1307 		json_end_val['}'] = 2;
1308 		json_end_val['['] = 2;
1309 		json_end_val[']'] = 2;
1310 		json_end_val[' '] = 1;
1311 		json_end_val['\n'] = 1;
1312 		json_end_val['\r'] = 1;
1313 		json_end_val['\t'] = 1;
1314 		json_end_val['\v'] = 1;
1315 
1316 		xml->prefix = -1;
1317 		for (f = xml->fields; f; f = g)
1318 		{
1319 			g = f->next;
1320 			if (!(v = xmlvar(dss->cx, f->name, f->type, disc)))
1321 				return -1;
1322 			v->format = f->format;
1323 			free(f->name);
1324 			free(f->type);
1325 			free(f);
1326 		}
1327 	}
1328 	return 0;
1329 }
1330 
1331 static Dssmeth_t method =
1332 {
1333 	"xml",
1334 	"xml and json method",
1335 	CXH,
1336 	xmlmeth,
1337 	xmlopen,
1338 	0,
1339 	0
1340 };
1341 
1342 Dsslib_t dss_lib_xml =
1343 {
1344 	"xml",
1345 	"xml method"
1346 	"[-1ls5Pp0?\n@(#)$Id: dss xml method (AT&T Research) 2010-04-22 $\n]"
1347 	USAGE_LICENSE,
1348 	CXH,
1349 	0,
1350 	&method,
1351 };
1352