1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 2003-2011 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *               Glenn Fowler <glenn.s.fowler@gmail.com>                *
18 *                                                                      *
19 ***********************************************************************/
20 #pragma prototyped
21 
22 static const char validate_usage[] =
23 "[+PLUGIN?\findex\f]"
24 "[+DESCRIPTION?The validate query validates the constraints of the"
25 "	\afield\a operands. If no operands are specified then all"
26 "	fields with constraints or maps are validated. A warning is"
27 "	printed if there are no fields with constraints or maps.]"
28 "[d:discard?Discard records containing invalid fields.]"
29 "[l:list?List the field constraints and exit.]"
30 "[s:summary?Print a summary after all records have been read.]"
31 "[r:repair?Repair invalid fields if possible.]"
32 "[v:verbose?Warn about each invalid field and the action taken.]"
33 "\n"
34 "\n[ field ... ]\n"
35 "\n"
36 ;
37 
38 #include <dsslib.h>
39 #include <ast_float.h>
40 
41 struct Field_s; typedef struct Field_s Field_t;
42 struct Invalid_s; typedef struct Invalid_s Invalid_t;
43 struct State_s; typedef struct State_s State_t;
44 
45 struct Field_s
46 {
47 	Field_t*	next;
48 	Cxvariable_t*	variable;
49 	Cxinteger_t	invalid;
50 	Cxinteger_t	discarded;
51 	Cxinteger_t	repaired;
52 };
53 
54 struct Invalid_s
55 {
56 	Dtlink_t	link;
57 	Cxvalue_t	value;
58 	Cxvariable_t*	variable;
59 	Cxunsigned_t	count;
60 };
61 
62 struct State_s
63 {
64 	Field_t*	field;
65 	Cxcallout_f	getf;
66 	Cxcallout_f	setf;
67 	Dt_t*		invalid;
68 	Dtdisc_t	invaliddisc;
69 	Vmalloc_t*	vm;
70 	unsigned char	discard;
71 	unsigned char	summary;
72 	unsigned char	verbose;
73 };
74 
75 extern Dsslib_t		dss_lib_validate;
76 
77 static void
number(Sfio_t * op,const char * label,Cxnumber_t n,Cxformat_t * format)78 number(Sfio_t* op, const char* label, Cxnumber_t n, Cxformat_t* format)
79 {
80 	sfprintf(op, " %s=", label);
81 	if (format->details)
82 	{
83 		if (((n >= 0) ? n : -n) < 1)
84 			n = 0;
85 		else if (n > FLTMAX_INTMAX_MAX)
86 			n = FLTMAX_INTMAX_MAX;
87 		else if (n < FLTMAX_INTMAX_MIN)
88 			n = FLTMAX_INTMAX_MIN;
89 		sfprintf(op, format->details, (Cxinteger_t)n);
90 	}
91 	else if (n == 0 || ((n >= 0) ? n : -n) >= 1 && n >= FLTMAX_INTMAX_MIN && n <= FLTMAX_UINTMAX_MAX && n == (Cxinteger_t)n)
92 		sfprintf(op, (format->flags & CX_UNSIGNED) ? "%llu" : "%lld", (Cxinteger_t)n);
93 	else
94 		sfprintf(op, "%1.15Lg", n);
95 }
96 
97 static int
invalidcmp(Dt_t * dict,void * a,void * b,Dtdisc_t * disc)98 invalidcmp(Dt_t* dict, void* a, void* b, Dtdisc_t* disc)
99 {
100 	Invalid_t*	ap = (Invalid_t*)a;
101 	Invalid_t*	bp = (Invalid_t*)b;
102 	size_t		az;
103 	size_t		bz;
104 	int		r;
105 
106 	if (!(r = strcmp(ap->variable->name, bp->variable->name)))
107 	{
108 		if (cxisstring(ap->variable->type) || cxisbuffer(ap->variable->type))
109 		{
110 			az = ap->value.buffer.size;
111 			bz = bp->value.buffer.size;
112 			if (!(r = memcmp(ap->value.buffer.data, bp->value.buffer.data, az < bz ? az : bz)))
113 			{
114 				if (az < bz)
115 					r = -1;
116 				if (az > bz)
117 					r = 1;
118 			}
119 		}
120 		else if (ap->value.number < bp->value.number)
121 			r = -1;
122 		else if (ap->value.number > bp->value.number)
123 			r = 1;
124 	}
125 	return r;
126 }
127 
128 static int
validate_beg(Cx_t * cx,Cxexpr_t * expr,void * data,Cxdisc_t * disc)129 validate_beg(Cx_t* cx, Cxexpr_t* expr, void* data, Cxdisc_t* disc)
130 {
131 	char**			argv = (char**)data;
132 	int			errors = error_info.errors;
133 	char*			s;
134 	State_t*		state;
135 	Cxvariable_t*		variable;
136 	register Field_t*	field;
137 	Field_t*		lastfield;
138 	Cxconstraint_t*		constraint;
139 	int			all;
140 	int			list;
141 	Vmalloc_t*		vm;
142 
143 	if (!(vm = vmopen(Vmdcheap, Vmlast, 0)) || !(state = vmnewof(vm, 0, State_t, 1, 0)))
144 	{
145 		if (vm)
146 			vmclose(vm);
147 		if (disc->errorf)
148 			(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
149 		return -1;
150 	}
151 	state->vm = vm;
152 	list = 0;
153 	sfprintf(cx->buf, "%s%s", strchr(dss_lib_validate.description, '['), validate_usage);
154 	s = sfstruse(cx->buf);
155 	for (;;)
156 	{
157 		switch (optget(argv, s))
158 		{
159 		case 'd':
160 			state->discard = 1;
161 			continue;
162 		case 'l':
163 			list = 1;
164 			continue;
165 		case 'r':
166 			if (!(state->setf = cxcallout(cx, CX_SET, cx->state->type_void, cx->state->type_void, cx->disc)))
167 			{
168 				if (cx->disc->errorf)
169 					(*cx->disc->errorf)(NiL, cx->disc, 3, "reair requires CX_SET callout");
170 				return -1;
171 			}
172 			continue;
173 		case 's':
174 			state->summary = 1;
175 			continue;
176 		case 'v':
177 			state->summary = state->verbose = 1;
178 			continue;
179 		case '?':
180 			if (disc->errorf)
181 				(*disc->errorf)(NiL, disc, ERROR_USAGE|4, "%s", opt_info.arg);
182 			else
183 				return -1;
184 			continue;
185 		case ':':
186 			if (disc->errorf)
187 				(*disc->errorf)(NiL, disc, 2, "%s", opt_info.arg);
188 			else
189 				return -1;
190 			continue;
191 		}
192 		break;
193 	}
194 	if (error_info.errors > errors)
195 		goto bad;
196 	argv += opt_info.index;
197 	if (all = !*argv)
198 		variable = 0;
199 	do
200 	{
201 		if (all)
202 		{
203 			if (!(variable = (Cxvariable_t*)(variable ? dtnext(cx->fields, variable) : dtfirst(cx->fields))))
204 				break;
205 		}
206 		else if (!(variable = cxvariable(cx, *argv, NiL, disc)))
207 			goto bad;
208 		if (variable->format.constraint || variable->format.map)
209 		{
210 			if (!(field = vmnewof(vm, 0, Field_t, 1, 0)))
211 			{
212 				if (disc->errorf)
213 					(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
214 				goto bad;
215 			}
216 			field->variable = variable;
217 			if (state->field)
218 				lastfield = lastfield->next = field;
219 			else
220 				lastfield = state->field = field;
221 		}
222 	} while (all || *++argv);
223 	if (!state->field && disc->errorf)
224 		(*disc->errorf)(NiL, disc, 1, "no field has constraints or maps");
225 	if (list)
226 	{
227 		for (field = state->field; field; field = field->next)
228 		{
229 			sfprintf(expr->op, "%16s", field->variable->name);
230 			if (field->variable->format.map)
231 				sfprintf(expr->op, " map");
232 			if (constraint = field->variable->format.constraint)
233 			{
234 				if (constraint->name)
235 					sfprintf(expr->op, " name=%s", constraint->name);
236 				if (constraint->constraintf)
237 					sfprintf(expr->op, " external");
238 				if (cxisnumber(field->variable->type))
239 				{
240 					if (constraint->def)
241 						number(expr->op, "default", constraint->def->number, &field->variable->format);
242 					if (constraint->min)
243 						number(expr->op, "min", constraint->min->number, &field->variable->format);
244 					if (constraint->max)
245 						number(expr->op, "max", constraint->max->number, &field->variable->format);
246 				}
247 				else if (cxisstring(field->variable->type) && constraint->def)
248 					sfprintf(expr->op, " default=\"%-.*s\"", constraint->def->string.size, constraint->def->string.data);
249 				if (constraint->expression)
250 					sfprintf(expr->op, " expression=\"%s\"", constraint->expression);
251 				if (constraint->pattern)
252 					sfprintf(expr->op, " pattern=\"%s\"", constraint->pattern);
253 			}
254 			sfprintf(expr->op, "\n");
255 		}
256 		goto bad;
257 	}
258 	if (!(state->getf = cxcallout(cx, CX_GET, cx->state->type_void, cx->state->type_void, cx->disc)))
259 	{
260 		if (cx->disc->errorf)
261 			(*cx->disc->errorf)(NiL, cx->disc, 3, "validation requires CX_GET callout");
262 		goto bad;
263 	}
264 	if (!state->verbose)
265 	{
266 		state->invaliddisc.comparf = invalidcmp;
267 		if (!(state->invalid = dtnew(vm, &state->invaliddisc, Dtoset)))
268 		{
269 			if (cx->disc->errorf)
270 				(*cx->disc->errorf)(NiL, cx->disc, 3, "validation requires CX_GET callout");
271 			goto bad;
272 		}
273 	}
274 	expr->data = state;
275 	return 0;
276  bad:
277 	vmclose(vm);
278 	return -1;
279 }
280 
281 static int
validate_sel(Cx_t * cx,Cxexpr_t * expr,void * data,Cxdisc_t * disc)282 validate_sel(Cx_t* cx, Cxexpr_t* expr, void* data, Cxdisc_t* disc)
283 {
284 	register State_t*	state = (State_t*)expr->data;
285 	register Field_t*	field;
286 	register Cxconstraint_t*constraint;
287 	Cxoperand_t		o;
288 	Cxinstruction_t		x;
289 	Invalid_t		key;
290 	Invalid_t*		ip;
291 	size_t			n;
292 
293 	for (field = state->field; field; field = field->next)
294 	{
295 		x.data.variable = field->variable;
296 		if ((*state->getf)(cx, &x, &o, NiL, NiL, data, disc))
297 			return -1;
298 		if (field->variable->format.map)
299 		{
300 			if (cxisstring(field->variable->type))
301 			{
302 				if (cxstr2num(cx, &field->variable->format, o.value.string.data, o.value.string.size, NiL))
303 				{
304 					if (state->verbose && disc->errorf)
305 						(*disc->errorf)(NiL, disc, 1, "%s%s: %-.*s: unknown map name", cxlocation(cx, data), field->variable->name, o.value.string.size, o.value.string.data);
306 					goto invalid;
307 				}
308 			}
309 			else if (cxisnumber(field->variable->type))
310 			{
311 				if (cxnum2str(cx, &field->variable->format, (Cxinteger_t)o.value.number, NiL))
312 				{
313 					if (state->verbose && disc->errorf)
314 						(*disc->errorf)(NiL, disc, 1, "%s%s: %I*d: unknown map value", cxlocation(cx, data), field->variable->name, sizeof(Cxinteger_t), (Cxinteger_t)o.value.number);
315 					goto invalid;
316 				}
317 			}
318 		}
319 		if (constraint = field->variable->format.constraint)
320 		{
321 			if (constraint->constraintf)
322 				;
323 			if (cxisnumber(field->variable->type))
324 			{
325 				if (constraint->min && o.value.number < constraint->min->number)
326 				{
327 					if (state->verbose && disc->errorf)
328 						(*disc->errorf)(NiL, disc, 1, "%s%s: %1.15Lg violates min constraint %1.15Lg", cxlocation(cx, data), field->variable->name, o.value.number, constraint->min->number);
329 					goto invalid;
330 				}
331 				if (constraint->max && o.value.number > constraint->max->number)
332 				{
333 					if (state->verbose && disc->errorf)
334 						(*disc->errorf)(NiL, disc, 1, "%s%s: %1.15Lg violates max constraint %1.15Lg", cxlocation(cx, data), field->variable->name, o.value.number, constraint->max->number);
335 					goto invalid;
336 				}
337 			}
338 			if (constraint->expression)
339 				;
340 			if (constraint->pattern)
341 				;
342 		}
343 		continue;
344 	invalid:
345 		if (state->invalid)
346 		{
347 			key.variable = field->variable;
348 			key.value = o.value;
349 			if (!(ip = (Invalid_t*)dtsearch(state->invalid, &key)))
350 			{
351 				n = cxsize(field->variable->type, &o.value);
352 				if (!(ip = vmnewof(state->vm, 0, Invalid_t, 1, n)))
353 				{
354 					if (disc->errorf)
355 						(*disc->errorf)(NiL, disc, ERROR_SYSTEM|2, "out of space");
356 					return -1;
357 				}
358 				*ip = key;
359 				ip->value = o.value;
360 				if (n)
361 				{
362 					ip->value.buffer.data = (void*)(ip + 1);
363 					memcpy(ip->value.buffer.data, o.value.buffer.data, n);
364 				}
365 				dtinsert(state->invalid, ip);
366 				ip->count = 0;
367 			}
368 			ip->count++;
369 		}
370 		if (state->setf && constraint && constraint->def)
371 		{
372 			o.type = field->variable->type;
373 			o.value = *constraint->def;
374 			if ((*state->setf)(cx, &x, &o, &o, NiL, data, disc))
375 				return -1;
376 			field->repaired++;
377 		}
378 		else if (state->discard)
379 		{
380 			field->discarded++;
381 			return 0;
382 		}
383 		else
384 			field->invalid++;
385 	}
386 	return 1;
387 }
388 
389 static int
validate_end(Cx_t * cx,Cxexpr_t * expr,void * data,Cxdisc_t * disc)390 validate_end(Cx_t* cx, Cxexpr_t* expr, void* data, Cxdisc_t* disc)
391 {
392 	register State_t*	state = (State_t*)expr->data;
393 	register Field_t*	field;
394 	Invalid_t*		ip;
395 	Cxoperand_t		val;
396 	int			heading;
397 
398 	if (state->summary)
399 	{
400 		heading = 1;
401 		if (state->invalid && dtsize(state->invalid))
402 		{
403 			heading = 0;
404 			sfprintf(expr->op, "%16s  %11s  %s\n", "FIELD", "COUNT", "VALUE");
405 			for (ip = (Invalid_t*)dtfirst(state->invalid); ip; ip = (Invalid_t*)dtnext(state->invalid, ip))
406 			{
407 				val.type = ip->variable->type;
408 				val.value = ip->value;
409 				if (!cxcast(cx, &val, NiL, cx->state->type_string, NiL, NiL))
410 					sfprintf(expr->op, "%16s  %11I*u  %*.*s\n", ip->variable->name, sizeof(ip->count), ip->count, val.value.string.size, val.value.string.size, val.value.string.data);
411 			}
412 		}
413 		if (!heading)
414 		{
415 			heading = 1;
416 			sfprintf(expr->op, "\n");
417 		}
418 		for (field = state->field; field; field = field->next)
419 			if (field->invalid || field->discarded || field->repaired)
420 			{
421 				if (heading)
422 				{
423 					heading = 0;
424 					sfprintf(expr->op, "%16s  %11s %11s %11s\n", "FIELD", "INVALID", "DISCARDED", "REPAIRED");
425 				}
426 				sfprintf(expr->op, "%16s  %11I*u %11I*u %11I*u\n", field->variable->name, sizeof(field->invalid), field->invalid, sizeof(field->discarded), field->discarded, sizeof(field->repaired), field->repaired);
427 			}
428 	}
429 	vmclose(state->vm);
430 	return 0;
431 }
432 
433 static Cxquery_t	queries[] =
434 {
435 	{
436 		"validate",
437 		"validate field value constraints",
438 		CXH,
439 		validate_beg,
440 		validate_sel,
441 		0,
442 		validate_end
443 	},
444 	{0}
445 };
446 
447 Dsslib_t		dss_lib_validate =
448 {
449 	"validate",
450 	"validate query"
451 	"[-1lms5P?\n@(#)$Id: dss validate query (AT&T Research) 2003-04-05 $\n]"
452 	USAGE_LICENSE,
453 	CXH,
454 	0,
455 	0,
456 	0,
457 	0,
458 	0,
459 	0,
460 	&queries[0]
461 };
462