1 /*
2  * rwarray.c - Builtin functions to binary read / write arrays to a file.
3  *
4  * Arnold Robbins
5  * May 2009
6  * Redone June 2012
7  * Improved September 2017
8  */
9 
10 /*
11  * Copyright (C) 2009-2014, 2017, 2018, 2020 the Free Software Foundation, Inc.
12  *
13  * This file is part of GAWK, the GNU implementation of the
14  * AWK Programming Language.
15  *
16  * GAWK is free software; you can redistribute it and/or modify
17  * it under the terms of the GNU General Public License as published by
18  * the Free Software Foundation; either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * GAWK is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24  * GNU General Public License for more details.
25  *
26  * You should have received a copy of the GNU General Public License
27  * along with this program; if not, write to the Free Software
28  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include <config.h>
33 #endif
34 
35 #include <stdio.h>
36 #include <assert.h>
37 #include <errno.h>
38 #include <fcntl.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 
43 #ifdef __MINGW32__
44 #include <winsock2.h>
45 #include <stdint.h>
46 #else
47 #include <arpa/inet.h>
48 #endif
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 
52 #include "gawkapi.h"
53 
54 #include "gettext.h"
55 #define _(msgid)  gettext(msgid)
56 #define N_(msgid) msgid
57 
58 #define MAGIC "awkrulz\n"
59 #define MAJOR 3
60 #define MINOR 1
61 
62 static const gawk_api_t *api;	/* for convenience macros to work */
63 static awk_ext_id_t ext_id;
64 static const char *ext_version = "rwarray extension: version 1.2";
65 static awk_bool_t (*init_func)(void) = NULL;
66 
67 int plugin_is_GPL_compatible;
68 
69 static awk_bool_t write_array(FILE *fp, awk_array_t array);
70 static awk_bool_t write_elem(FILE *fp, awk_element_t *element);
71 static awk_bool_t write_value(FILE *fp, awk_value_t *val);
72 
73 static awk_bool_t read_array(FILE *fp, awk_array_t array);
74 static awk_bool_t read_elem(FILE *fp, awk_element_t *element);
75 static awk_bool_t read_value(FILE *fp, awk_value_t *value);
76 
77 /*
78  * Format of array info:
79  *
80  * MAGIC		8 bytes
81  * Major version	4 bytes - network order
82  * Minor version	4 bytes - network order
83  * Element count	4 bytes - network order
84  * Elements
85  *
86  * For each element:
87  * Length of index val:	4 bytes - network order
88  * Index val as characters (N bytes)
89  * Value type		4 bytes (0 = string, 1 = number, 2 = array, 3 = regex, 4 = strnum, 5 = undefined)
90  * IF string:
91  * 	Length of value	4 bytes
92  * 	Value as characters (N bytes)
93  * ELSE IF number:
94  * 	8 bytes as native double
95  * ELSE
96  * 	Element count
97  * 	Elements
98  * END IF
99  */
100 
101 /* do_writea --- write an array */
102 
103 static awk_value_t *
do_writea(int nargs,awk_value_t * result,struct awk_ext_func * unused)104 do_writea(int nargs, awk_value_t *result, struct awk_ext_func *unused)
105 {
106 	awk_value_t filename, array;
107 	FILE *fp = NULL;
108 	uint32_t major = MAJOR;
109 	uint32_t minor = MINOR;
110 
111 	assert(result != NULL);
112 	make_number(0.0, result);
113 
114 	if (nargs < 2)
115 		goto out;
116 
117 	/* filename is first arg, array to dump is second */
118 	if (! get_argument(0, AWK_STRING, & filename)) {
119 		warning(ext_id, _("do_writea: first argument is not a string"));
120 		errno = EINVAL;
121 		goto done1;
122 	}
123 
124 	if (! get_argument(1, AWK_ARRAY, & array)) {
125 		warning(ext_id, _("do_writea: second argument is not an array"));
126 		errno = EINVAL;
127 		goto done1;
128 	}
129 
130 	/* open the file, if error, set ERRNO and return */
131 	fp = fopen(filename.str_value.str, "wb");
132 	if (fp == NULL)
133 		goto done1;
134 
135 	if (fwrite(MAGIC, 1, strlen(MAGIC), fp) != strlen(MAGIC))
136 		goto done1;
137 
138 	major = htonl(major);
139 	if (fwrite(& major, 1, sizeof(major), fp) != sizeof(major))
140 		goto done1;
141 
142 	minor = htonl(minor);
143 	if (fwrite(& minor, 1, sizeof(minor), fp) != sizeof(minor))
144 		goto done1;
145 
146 	if (write_array(fp, array.array_cookie)) {
147 		make_number(1.0, result);
148 		goto done0;
149 	}
150 
151 done1:
152 	update_ERRNO_int(errno);
153 	unlink(filename.str_value.str);
154 
155 done0:
156 	fclose(fp);
157 out:
158 	return result;
159 }
160 
161 
162 /* write_array --- write out an array or a sub-array */
163 
164 static awk_bool_t
write_array(FILE * fp,awk_array_t array)165 write_array(FILE *fp, awk_array_t array)
166 {
167 	uint32_t i;
168 	uint32_t count;
169 	awk_flat_array_t *flat_array;
170 
171 	if (! flatten_array(array, & flat_array)) {
172 		warning(ext_id, _("write_array: could not flatten array"));
173 		return awk_false;
174 	}
175 
176 	count = htonl(flat_array->count);
177 	if (fwrite(& count, 1, sizeof(count), fp) != sizeof(count))
178 		return awk_false;
179 
180 	for (i = 0; i < flat_array->count; i++) {
181 		if (! write_elem(fp, & flat_array->elements[i])) {
182 			(void) release_flattened_array(array, flat_array);
183 			return awk_false;
184 		}
185 	}
186 
187 	if (! release_flattened_array(array, flat_array)) {
188 		warning(ext_id, _("write_array: could not release flattened array"));
189 		return awk_false;
190 	}
191 
192 	return awk_true;
193 }
194 
195 /* write_elem --- write out a single element */
196 
197 static awk_bool_t
write_elem(FILE * fp,awk_element_t * element)198 write_elem(FILE *fp, awk_element_t *element)
199 {
200 	uint32_t indexval_len;
201 	ssize_t write_count;
202 
203 	indexval_len = htonl(element->index.str_value.len);
204 	if (fwrite(& indexval_len, 1, sizeof(indexval_len), fp) != sizeof(indexval_len))
205 		return awk_false;
206 
207 	if (element->index.str_value.len > 0) {
208 		write_count = fwrite(element->index.str_value.str,
209 				1, element->index.str_value.len, fp);
210 		if (write_count != (ssize_t) element->index.str_value.len)
211 			return awk_false;
212 	}
213 
214 	return write_value(fp, & element->value);
215 }
216 
217 /* write_value --- write a number or a string or a strnum or a regex or an array */
218 
219 static awk_bool_t
write_value(FILE * fp,awk_value_t * val)220 write_value(FILE *fp, awk_value_t *val)
221 {
222 	uint32_t code, len;
223 
224 	if (val->val_type == AWK_ARRAY) {
225 		code = htonl(2);
226 		if (fwrite(& code, 1, sizeof(code), fp) != sizeof(code))
227 			return awk_false;
228 		return write_array(fp, val->array_cookie);
229 	}
230 
231 	if (val->val_type == AWK_NUMBER) {
232 		code = htonl(1);
233 		if (fwrite(& code, 1, sizeof(code), fp) != sizeof(code))
234 			return awk_false;
235 
236 		if (fwrite(& val->num_value, 1, sizeof(val->num_value), fp) != sizeof(val->num_value))
237 			return awk_false;
238 	} else {
239 		switch (val->val_type) {
240 		case AWK_STRING:
241 			code = htonl(0);
242 			break;
243 		case AWK_STRNUM:
244 			code = htonl(4);
245 			break;
246 		case AWK_REGEX:
247 			code = htonl(3);
248 			break;
249 		case AWK_UNDEFINED:
250 			code = htonl(5);
251 			break;
252 		default:
253 			/* XXX can this happen? */
254 			code = htonl(0);
255 			warning(ext_id, _("array value has unknown type %d"), val->val_type);
256 			break;
257 		}
258 		if (fwrite(& code, 1, sizeof(code), fp) != sizeof(code))
259 			return awk_false;
260 
261 		len = htonl(val->str_value.len);
262 		if (fwrite(& len, 1, sizeof(len), fp) != sizeof(len))
263 			return awk_false;
264 
265 		if (fwrite(val->str_value.str, 1, val->str_value.len, fp)
266 				!= (ssize_t) val->str_value.len)
267 			return awk_false;
268 	}
269 
270 	return awk_true;
271 }
272 
273 /* do_reada --- read an array */
274 
275 static awk_value_t *
do_reada(int nargs,awk_value_t * result,struct awk_ext_func * unused)276 do_reada(int nargs, awk_value_t *result, struct awk_ext_func *unused)
277 {
278 	awk_value_t filename, array;
279 	FILE *fp = NULL;
280 	uint32_t major;
281 	uint32_t minor;
282 	char magic_buf[30];
283 
284 	assert(result != NULL);
285 	make_number(0.0, result);
286 
287 	if (nargs < 2)
288 		goto out;
289 
290 	/* directory is first arg, array to read is second */
291 	if (! get_argument(0, AWK_STRING, & filename)) {
292 		warning(ext_id, _("do_reada: first argument is not a string"));
293 		errno = EINVAL;
294 		goto done1;
295 	}
296 
297 	if (! get_argument(1, AWK_ARRAY, & array)) {
298 		warning(ext_id, _("do_reada: second argument is not an array"));
299 		errno = EINVAL;
300 		goto done1;
301 	}
302 
303 	fp = fopen(filename.str_value.str, "rb");
304 	if (fp == NULL)
305 		goto done1;
306 
307 	memset(magic_buf, '\0', sizeof(magic_buf));
308 	if (fread(magic_buf, 1, strlen(MAGIC), fp) != strlen(MAGIC)) {
309 		errno = EBADF;
310 		goto done1;
311 	}
312 
313 	if (strcmp(magic_buf, MAGIC) != 0) {
314 		errno = EBADF;
315 		goto done1;
316 	}
317 
318 	if (fread(& major, 1, sizeof(major), fp) != sizeof(major)) {
319 		errno = EBADF;
320 		goto done1;
321 	}
322 	major = ntohl(major);
323 
324 	if (major != MAJOR) {
325 		errno = EBADF;
326 		goto done1;
327 	}
328 
329 	if (fread(& minor, 1, sizeof(minor), fp) != sizeof(minor)) {
330 		/* read() sets errno */
331 		goto done1;
332 	}
333 
334 	minor = ntohl(minor);
335 	if (minor != MINOR) {
336 		errno = EBADF;
337 		goto done1;
338 	}
339 
340 	if (! clear_array(array.array_cookie)) {
341 		errno = ENOMEM;
342 		warning(ext_id, _("do_reada: clear_array failed"));
343 		goto done1;
344 	}
345 
346 	if (read_array(fp, array.array_cookie)) {
347 		make_number(1.0, result);
348 		goto done0;
349 	}
350 
351 done1:
352 	update_ERRNO_int(errno);
353 done0:
354 	if (fp != NULL)
355 		fclose(fp);
356 out:
357 	return result;
358 }
359 
360 
361 /* read_array --- read in an array or sub-array */
362 
363 static awk_bool_t
read_array(FILE * fp,awk_array_t array)364 read_array(FILE *fp, awk_array_t array)
365 {
366 	uint32_t i;
367 	uint32_t count;
368 	awk_element_t new_elem;
369 
370 	if (fread(& count, 1, sizeof(count), fp) != sizeof(count))
371 		return awk_false;
372 
373 	count = ntohl(count);
374 
375 	for (i = 0; i < count; i++) {
376 		if (read_elem(fp, & new_elem)) {
377 			/* add to array */
378 			if (! set_array_element_by_elem(array, & new_elem)) {
379 				warning(ext_id, _("read_array: set_array_element failed"));
380 				return awk_false;
381 			}
382 		} else
383 			break;
384 	}
385 
386 	if (i != count)
387 		return awk_false;
388 
389 	return awk_true;
390 }
391 
392 /* read_elem --- read in a single element */
393 
394 static awk_bool_t
read_elem(FILE * fp,awk_element_t * element)395 read_elem(FILE *fp, awk_element_t *element)
396 {
397 	uint32_t index_len;
398 	static char *buffer;
399 	static uint32_t buflen;
400 	ssize_t ret;
401 
402 	if ((ret = fread(& index_len, 1, sizeof(index_len), fp)) != sizeof(index_len)) {
403 		return awk_false;
404 	}
405 	index_len = ntohl(index_len);
406 
407 	memset(element, 0, sizeof(*element));
408 
409 	if (index_len > 0) {
410 		if (buffer == NULL) {
411 			/* allocate buffer */
412 			emalloc(buffer, char *, index_len, "read_elem");
413 			buflen = index_len;
414 		} else if (buflen < index_len) {
415 			/* reallocate buffer */
416 			char *cp = gawk_realloc(buffer, index_len);
417 
418 			if (cp == NULL)
419 				return awk_false;
420 
421 			buffer = cp;
422 			buflen = index_len;
423 		}
424 
425 		if (fread(buffer, 1, index_len, fp) != (ssize_t) index_len) {
426 			return awk_false;
427 		}
428 		make_const_string(buffer, index_len, & element->index);
429 	} else {
430 		make_null_string(& element->index);
431 	}
432 
433 	if (! read_value(fp, & element->value))
434 		return awk_false;
435 
436 	return awk_true;
437 }
438 
439 /* read_value --- read a number or a string */
440 
441 static awk_bool_t
read_value(FILE * fp,awk_value_t * value)442 read_value(FILE *fp, awk_value_t *value)
443 {
444 	uint32_t code, len;
445 
446 	if (fread(& code, 1, sizeof(code), fp) != sizeof(code))
447 		return awk_false;
448 
449 	code = ntohl(code);
450 
451 	if (code == 2) {
452 		awk_array_t array = create_array();
453 
454 		if (! read_array(fp, array))
455 			return awk_false;
456 
457 		/* hook into value */
458 		value->val_type = AWK_ARRAY;
459 		value->array_cookie = array;
460 	} else if (code == 1) {
461 		double d;
462 
463 		if (fread(& d, 1, sizeof(d), fp) != sizeof(d))
464 			return awk_false;
465 
466 		/* hook into value */
467 		value->val_type = AWK_NUMBER;
468 		value->num_value = d;
469 	} else {
470 		if (fread(& len, 1, sizeof(len), fp) != sizeof(len)) {
471 			return awk_false;
472 		}
473 		len = ntohl(len);
474 		switch (code) {
475 		case 0:
476 			value->val_type = AWK_STRING;
477 			break;
478 		case 3:
479 			value->val_type = AWK_REGEX;
480 			break;
481 		case 4:
482 			value->val_type = AWK_STRNUM;
483 			break;
484 		case 5:
485 			value->val_type = AWK_UNDEFINED;
486 			break;
487 		default:
488 			/* this cannot happen! */
489 			warning(ext_id, _("treating recovered value with unknown type code %d as a string"), code);
490 			value->val_type = AWK_STRING;
491 			break;
492 		}
493 		value->str_value.len = len;
494 		value->str_value.str = gawk_malloc(len + 1);
495 
496 		if (fread(value->str_value.str, 1, len, fp) != (ssize_t) len) {
497 			gawk_free(value->str_value.str);
498 			return awk_false;
499 		}
500 		value->str_value.str[len] = '\0';
501 	}
502 
503 	return awk_true;
504 }
505 
506 static awk_ext_func_t func_table[] = {
507 	{ "writea", do_writea, 2, 2, awk_false, NULL },
508 	{ "reada", do_reada, 2, 2, awk_false, NULL },
509 };
510 
511 
512 /* define the dl_load function using the boilerplate macro */
513 
514 dl_load_func(func_table, rwarray, "")
515