1 /*
2  * gawkapi.h -- Definitions for use by extension functions calling into gawk.
3  */
4 
5 /*
6  * Copyright (C) 2012-2019, 2021 the Free Software Foundation, Inc.
7  *
8  * This file is part of GAWK, the GNU implementation of the
9  * AWK Programming Language.
10  *
11  * GAWK is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 3 of the License, or
14  * (at your option) any later version.
15  *
16  * GAWK is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
24  */
25 
26 /*
27  * The following types and/or macros and/or functions are referenced
28  * in this file.  For correct use, you must therefore include the
29  * corresponding standard header file BEFORE including this file.
30  *
31  * FILE			- <stdio.h>
32  * NULL			- <stddef.h>
33  * memset(), memcpy()	- <string.h>
34  * size_t		- <sys/types.h>
35  * struct stat		- <sys/stat.h>
36  *
37  * Due to portability concerns, especially to systems that are not
38  * fully standards-compliant, it is your responsibility to include
39  * the correct files in the correct way. This requirement is necessary
40  * in order to keep this file clean, instead of becoming a portability
41  * hodge-podge as can be seen in the gawk source code.
42  *
43  * To pass reasonable integer values for ERRNO, you will also need to
44  * include <errno.h>.
45  */
46 
47 #ifndef _GAWK_API_H
48 #define _GAWK_API_H
49 
50 /*
51  * General introduction:
52  *
53  * This API purposely restricts itself to ISO C 90 features.  In particular, no
54  * bool, no // comments, no use of the restrict keyword, or anything else,
55  * in order to provide maximal portability.
56  *
57  * Exception: the "inline" keyword is used below in the "constructor"
58  * functions. If your compiler doesn't support it, you should either
59  * -Dinline='' on your command line, or use the autotools and include a
60  * config.h in your extensions.
61  *
62  * Additional important information:
63  *
64  * 1. ALL string values in awk_value_t objects need to come from api_malloc().
65  * Gawk will handle releasing the storage if necessary.  This is slightly
66  * awkward, in that you can't take an awk_value_t that you got from gawk
67  * and reuse it directly, even for something that is conceptually pass
68  * by value.
69  *
70  * 2. Due to gawk internals, after using sym_update() to install an array
71  * into gawk, you have to retrieve the array cookie from the value
72  * passed in to sym_update().  Like so:
73  *
74  *	new_array = create_array();
75  *	val.val_type = AWK_ARRAY;
76  *	val.array_cookie = new_array;
77  *	sym_update("array", & val);	// install array in the symbol table
78  *
79  *	new_array = val.array_cookie;	// MUST DO THIS
80  *
81  *	// fill in new array with lots of subscripts and values
82  *
83  * Similarly, if installing a new array as a subarray of an existing
84  * array, you must add the new array to its parent before adding any
85  * elements to it.
86  *
87  * You must also retrieve the value of the array_cookie after the call
88  * to set_element().
89  *
90  * Thus, the correct way to build an array is to work "top down".
91  * Create the array, and immediately install it in gawk's symbol table
92  * using sym_update(), or install it as an element in a previously
93  * existing array using set_element().
94  *
95  * Thus the new array must ultimately be rooted in a global symbol. This is
96  * necessary before installing any subarrays in it, due to gawk's
97  * internal implementation.  Strictly speaking, this is required only
98  * for arrays that will have subarrays as elements; however it is
99  * a good idea to always do this.  This restriction may be relaxed
100  * in a subsequent revision of the API.
101  */
102 
103 /* Allow use in C++ code.  */
104 #ifdef __cplusplus
105 extern "C" {
106 #endif
107 
108 /* This is used to keep extensions from modifying certain fields in some structs. */
109 #ifdef GAWK
110 #define awk_const
111 #else
112 #define awk_const const
113 #endif
114 
115 typedef enum awk_bool {
116 	awk_false = 0,
117 	awk_true
118 } awk_bool_t;	/* we don't use <stdbool.h> on purpose */
119 
120 /*
121  * If an input parser would like to specify the field positions in the input
122  * record, it may populate an awk_fieldwidth_info_t structure to indicate
123  * the location of each field. The use_chars boolean controls whether the
124  * field lengths are specified in terms of bytes or potentially multi-byte
125  * characters. Performance will be better if the values are supplied in
126  * terms of bytes. The fields[0].skip value indicates how many bytes (or
127  * characters) to skip before $1, and fields[0].len is the length of $1, etc.
128  */
129 
130 typedef struct {
131 	awk_bool_t	use_chars;	/* false ==> use bytes */
132 	size_t		nf;
133 	struct awk_field_info {
134 		size_t	skip;	/* amount to skip before field starts */
135 		size_t	len;	/* length of field */
136 	} fields[1];		/* actual dimension should be nf */
137 } awk_fieldwidth_info_t;
138 
139 /*
140  * This macro calculates the total struct size needed. This is useful when
141  * calling malloc or realloc.
142  */
143 #define awk_fieldwidth_info_size(NF) (sizeof(awk_fieldwidth_info_t) + \
144 			(((NF)-1) * sizeof(struct awk_field_info)))
145 
146 /* The information about input files that input parsers need to know: */
147 typedef struct awk_input {
148 	const char *name;	/* filename */
149 	int fd;			/* file descriptor */
150 #define INVALID_HANDLE (-1)
151 	void *opaque;           /* private data for input parsers */
152 
153 	/*
154 	 * The get_record function is called to read the next record of data.
155 	 *
156 	 * It should return the length of the input record or EOF, and it
157 	 * should set *out to point to the contents of $0. The rt_start
158 	 * and rt_len arguments should be used to return RT to gawk.
159 	 * If EOF is not returned, the parser must set *rt_len (and
160 	 * *rt_start if *rt_len is non-zero).
161 	 *
162 	 * Note that gawk will make a copy of the record in *out, so the
163 	 * parser is responsible for managing its own memory buffer.
164 	 * Similarly, gawk will make its own copy of RT, so the parser
165 	 * is also responsible for managing this memory.
166 	 *
167 	 * It is guaranteed that errcode is a valid pointer, so there is
168 	 * no need to test for a NULL value.  Gawk sets *errcode to 0,
169 	 * so there is no need to set it unless an error occurs.
170 	 *
171 	 * If an error does occur, the function should return EOF and set
172 	 * *errcode to a positive value.  In that case, if *errcode is greater
173 	 * than zero, gawk will automatically update the ERRNO variable based
174 	 * on the value of *errcode (e.g., setting *errcode = errno should do
175 	 * the right thing).
176 	 *
177 	 * If field_width is non-NULL, then *field_width will be initialized
178 	 * to NULL, and the function may set it to point to a structure
179 	 * supplying field width information to override the default
180 	 * gawk field parsing mechanism. Note that this structure will not
181 	 * be copied by gawk; it must persist at least until the next call
182 	 * to get_record or close_func. Note also that field_width will
183 	 * be NULL when getline is assigning the results to a variable, thus
184 	 * field parsing is not needed.
185 	 */
186 	int (*get_record)(char **out, struct awk_input *iobuf, int *errcode,
187 			char **rt_start, size_t *rt_len,
188 			const awk_fieldwidth_info_t **field_width);
189 
190 	/*
191 	 * This replaces the POSIX read() system call. Use it if you want to
192 	 * manage reading raw bytes yourself, and let gawk parse the record.
193 	 */
194 	ssize_t (*read_func)(int, void *, size_t);
195 
196 	/*
197 	 * The close_func is called to allow the parser to free private data.
198 	 * Gawk itself will close the fd unless close_func first sets it to
199 	 * INVALID_HANDLE.
200 	 */
201 	void (*close_func)(struct awk_input *iobuf);
202 
203 	/* put last, for alignment. bleah */
204 	struct stat sbuf;       /* stat buf */
205 
206 } awk_input_buf_t;
207 
208 typedef struct awk_input_parser {
209 	const char *name;	/* name of parser */
210 
211 	/*
212 	 * The can_take_file function should return true if the parser
213 	 * would like to parse this file.  It should not change any gawk
214 	 * state!
215 	 */
216 	awk_bool_t (*can_take_file)(const awk_input_buf_t *iobuf);
217 
218 	/*
219 	 * If this parser is selected, then take_control_of will be called.
220 	 * It can assume that a previous call to can_take_file was successful,
221 	 * and no gawk state has changed since that call.  It should populate
222 	 * the awk_input_buf_t's get_record, close_func, and opaque values as needed.
223 	 * It should return true if successful.
224 	 */
225 	awk_bool_t (*take_control_of)(awk_input_buf_t *iobuf);
226 
227 	awk_const struct awk_input_parser *awk_const next;	/* for use by gawk */
228 } awk_input_parser_t;
229 
230 /*
231  * Similar for output wrapper.
232  */
233 
234 /* First the data structure */
235 typedef struct awk_output_buf {
236 	const char *name;	/* name of output file */
237 	const char *mode;	/* mode argument to fopen */
238 	FILE *fp;		/* stdio file pointer */
239 	awk_bool_t redirected;	/* true if a wrapper is active */
240 	void *opaque;		/* for use by output wrapper */
241 
242 	/*
243 	 * Replacement functions for I/O.  Just like the regular
244 	 * versions but also take the opaque pointer argument.
245 	 */
246 	size_t (*gawk_fwrite)(const void *buf, size_t size, size_t count,
247 				FILE *fp, void *opaque);
248 	int (*gawk_fflush)(FILE *fp, void *opaque);
249 	int (*gawk_ferror)(FILE *fp, void *opaque);
250 	int (*gawk_fclose)(FILE *fp, void *opaque);
251 } awk_output_buf_t;
252 
253 /* Next the output wrapper registered with gawk */
254 typedef struct awk_output_wrapper {
255 	const char *name;	/* name of the wrapper */
256 
257 	/*
258 	 * The can_take_file function should return true if the wrapper
259 	 * would like to process this file.  It should not change any gawk
260 	 * state!
261 	 */
262 	awk_bool_t (*can_take_file)(const awk_output_buf_t *outbuf);
263 
264 	/*
265 	 * If this wrapper is selected, then take_control_of will be called.
266 	 * It can assume that a previous call to can_take_file was successful,
267 	 * and no gawk state has changed since that call.  It should populate
268 	 * the awk_output_buf_t function pointers and opaque pointer as needed.
269 	 * It should return true if successful.
270 	 */
271 	awk_bool_t (*take_control_of)(awk_output_buf_t *outbuf);
272 
273 	awk_const struct awk_output_wrapper *awk_const next;  /* for use by gawk */
274 } awk_output_wrapper_t;
275 
276 /* A two-way processor combines an input parser and an output wrapper. */
277 typedef struct awk_two_way_processor {
278 	const char *name;	/* name of the two-way processor */
279 
280 	/*
281 	 * The can_take_file function should return true if the two-way
282 	 * processor would like to parse this file.  It should not change
283 	 * any gawk state!
284 	 */
285 	awk_bool_t (*can_take_two_way)(const char *name);
286 
287 	/*
288 	 * If this processor is selected, then take_control_of will be called.
289 	 * It can assume that a previous call to can_take_file was successful,
290 	 * and no gawk state has changed since that call.  It should populate
291 	 * the awk_input_buf_t and awk_otuput_buf_t structures as needed.
292 	 * It should return true if successful.
293 	 */
294 	awk_bool_t (*take_control_of)(const char *name, awk_input_buf_t *inbuf,
295 					awk_output_buf_t *outbuf);
296 
297 	awk_const struct awk_two_way_processor *awk_const next;  /* for use by gawk */
298 } awk_two_way_processor_t;
299 
300 #define gawk_api_major_version 3
301 #define gawk_api_minor_version 1
302 
303 /* Current version of the API. */
304 enum {
305 	GAWK_API_MAJOR_VERSION = gawk_api_major_version,
306 	GAWK_API_MINOR_VERSION = gawk_api_minor_version
307 };
308 
309 /* A number of typedefs related to different types of values. */
310 
311 /*
312  * A mutable string. Gawk owns the memory pointed to if it supplied
313  * the value. Otherwise, it takes ownership of the memory pointed to.
314  *
315  * The API deals exclusively with regular chars; these strings may
316  * be multibyte encoded in the current locale's encoding and character
317  * set. Gawk will convert internally to wide characters if necessary.
318  *
319  * Note that a string provided by gawk will always be terminated
320  * with a '\0' character.
321  */
322 typedef struct awk_string {
323 	char *str;	/* data */
324 	size_t len;	/* length thereof, in chars */
325 } awk_string_t;
326 
327 enum AWK_NUMBER_TYPE {
328 	AWK_NUMBER_TYPE_DOUBLE,
329 	AWK_NUMBER_TYPE_MPFR,
330 	AWK_NUMBER_TYPE_MPZ
331 };
332 
333 /*
334  * When type is AWK_NUMBER_MPFR or AWK_NUMBER_MPZ, the memory pointed to
335  * by the ptr member belongs to gawk if it came from gawk.  Otherwise the
336  * memory belongs to the extension and gawk copies it when its received.
337  * See the manual for further discussion.
338  */
339 
340 typedef struct awk_number {
341 	double d;	/* always populated in data received from gawk */
342 	enum AWK_NUMBER_TYPE type;
343 	void *ptr;	/* either NULL or mpfr_ptr or mpz_ptr */
344 } awk_number_t;
345 
346 /* Arrays are represented as an opaque type. */
347 typedef void *awk_array_t;
348 
349 /* Scalars can be represented as an opaque type. */
350 typedef void *awk_scalar_t;
351 
352 /* Any value can be stored as a cookie. */
353 typedef void *awk_value_cookie_t;
354 
355 /*
356  * This tag defines the type of a value.
357  *
358  * Values are associated with regular variables and with array elements.
359  * Since arrays can be multidimensional (as can regular variables)
360  * it's valid to have a "value" that is actually an array.
361  */
362 typedef enum {
363 	AWK_UNDEFINED,
364 	AWK_NUMBER,
365 	AWK_STRING,
366 	AWK_REGEX,
367 	AWK_STRNUM,
368 	AWK_ARRAY,
369 	AWK_SCALAR,		/* opaque access to a variable */
370 	AWK_VALUE_COOKIE	/* for updating a previously created value */
371 } awk_valtype_t;
372 
373 /*
374  * An awk value. The val_type tag indicates what
375  * is in the union.
376  */
377 typedef struct awk_value {
378 	awk_valtype_t	val_type;
379 	union {
380 		awk_string_t	s;
381 		awk_number_t	n;
382 		awk_array_t	a;
383 		awk_scalar_t	scl;
384 		awk_value_cookie_t vc;
385 	} u;
386 #define str_value	u.s
387 #define strnum_value	str_value
388 #define regex_value	str_value
389 #define num_value	u.n.d
390 #define num_type	u.n.type
391 #define num_ptr		u.n.ptr
392 #define array_cookie	u.a
393 #define scalar_cookie	u.scl
394 #define value_cookie	u.vc
395 } awk_value_t;
396 
397 /*
398  * A "flattened" array element. Gawk produces an array of these
399  * inside the awk_flat_array_t.
400  * ALL memory pointed to belongs to gawk. Individual elements may
401  * be marked for deletion. New elements must be added individually,
402  * one at a time, using the separate API for that purpose.
403  */
404 
405 typedef struct awk_element {
406 	/* convenience linked list pointer, not used by gawk */
407 	struct awk_element *next;
408 	enum {
409 		AWK_ELEMENT_DEFAULT = 0,	/* set by gawk */
410 		AWK_ELEMENT_DELETE = 1		/* set by extension if
411 						   should be deleted */
412 	} flags;
413 	awk_value_t	index;
414 	awk_value_t	value;
415 } awk_element_t;
416 
417 /*
418  * A "flattened" array. See the description above for how
419  * to use the elements contained herein.
420  */
421 typedef struct awk_flat_array {
422 	awk_const void *awk_const opaque1;	/* private data for use by gawk */
423 	awk_const void *awk_const opaque2;	/* private data for use by gawk */
424 	awk_const size_t count;			/* how many elements */
425 	awk_element_t elements[1];		/* will be extended */
426 } awk_flat_array_t;
427 
428 /*
429  * A record describing an extension function. Upon being
430  * loaded, the extension should pass in one of these to gawk for
431  * each C function.
432  *
433  * Each called function must fill in the result with either a scalar
434  * (number, string, or regex). Gawk takes ownership of any string memory.
435  *
436  * The called function must return the value of `result'.
437  * This is for the convenience of the calling code inside gawk.
438  *
439  * Each extension function may decide what to do if the number of
440  * arguments isn't what it expected.  Following awk functions, it
441  * is likely OK to ignore extra arguments.
442  *
443  * 'min_required_args' indicates how many arguments MUST be passed.
444  * The API will throw a fatal error if not enough are passed.
445  *
446  * 'max_expected_args' is more benign; if more than that are passed,
447  * the API prints a lint message (IFF lint is enabled, of course).
448  *
449  * In any case, the extension function itself need not compare the
450  * actual number of arguments passed to those two values if it does
451  * not want to.
452  */
453 typedef struct awk_ext_func {
454 	const char *name;
455 	awk_value_t *(*const function)(int num_actual_args,
456 					awk_value_t *result,
457 					struct awk_ext_func *finfo);
458 	const size_t max_expected_args;
459 	const size_t min_required_args;
460 	awk_bool_t suppress_lint;
461 	void *data;		/* opaque pointer to any extra state */
462 } awk_ext_func_t;
463 
464 typedef void *awk_ext_id_t;	/* opaque type for extension id */
465 
466 /*
467  * The API into gawk. Lots of functions here. We hope that they are
468  * logically organized.
469  *
470  * !!! If you make any changes to this structure, please remember to bump !!!
471  * !!! gawk_api_major_version and/or gawk_api_minor_version.              !!!
472  */
473 typedef struct gawk_api {
474 	/* First, data fields. */
475 
476 	/* These are what gawk thinks the API version is. */
477 	awk_const int major_version;
478 	awk_const int minor_version;
479 
480 	/* GMP/MPFR versions, if extended-precision is available */
481 	awk_const int gmp_major_version;
482 	awk_const int gmp_minor_version;
483 	awk_const int mpfr_major_version;
484 	awk_const int mpfr_minor_version;
485 
486 	/*
487 	 * These can change on the fly as things happen within gawk.
488 	 * Currently only do_lint is prone to change, but we reserve
489 	 * the right to allow the others to do so also.
490 	 */
491 #define DO_FLAGS_SIZE	6
492 	awk_const int do_flags[DO_FLAGS_SIZE];
493 /* Use these as indices into do_flags[] array to check the values */
494 #define gawk_do_lint		0
495 #define gawk_do_traditional	1
496 #define gawk_do_profile		2
497 #define gawk_do_sandbox		3
498 #define gawk_do_debug		4
499 #define gawk_do_mpfr		5
500 
501 	/* Next, registration functions: */
502 
503 	/*
504 	 * Add a function to the interpreter, returns true upon success.
505 	 * Gawk does not modify what func points to, but the extension
506 	 * function itself receives this pointer and can modify what it
507 	 * points to, thus it's not const.
508 	 */
509 	awk_bool_t (*api_add_ext_func)(awk_ext_id_t id, const char *name_space,
510 			awk_ext_func_t *func);
511 
512 	/* Register an input parser; for opening files read-only */
513 	void (*api_register_input_parser)(awk_ext_id_t id,
514 					awk_input_parser_t *input_parser);
515 
516 	/* Register an output wrapper, for writing files */
517 	void (*api_register_output_wrapper)(awk_ext_id_t id,
518 					awk_output_wrapper_t *output_wrapper);
519 
520 	/* Register a processor for two way I/O */
521 	void (*api_register_two_way_processor)(awk_ext_id_t id,
522 				awk_two_way_processor_t *two_way_processor);
523 
524 	/*
525 	 * Add an exit call back.
526 	 *
527 	 * arg0 is a private data pointer for use by the extension;
528 	 * gawk saves it and passes it into the function pointed
529 	 * to by funcp at exit.
530 	 *
531 	 * Exit callback functions are called in LIFO order.
532 	 */
533 	void (*api_awk_atexit)(awk_ext_id_t id,
534 			void (*funcp)(void *data, int exit_status),
535 			void *arg0);
536 
537 	/* Register a version string for this extension with gawk. */
538 	void (*api_register_ext_version)(awk_ext_id_t id, const char *version);
539 
540 	/* Functions to print messages */
541 	void (*api_fatal)(awk_ext_id_t id, const char *format, ...);
542 	void (*api_warning)(awk_ext_id_t id, const char *format, ...);
543 	void (*api_lintwarn)(awk_ext_id_t id, const char *format, ...);
544 	void (*api_nonfatal)(awk_ext_id_t id, const char *format, ...);
545 
546 	/* Functions to update ERRNO */
547 	void (*api_update_ERRNO_int)(awk_ext_id_t id, int errno_val);
548 	void (*api_update_ERRNO_string)(awk_ext_id_t id, const char *string);
549 	void (*api_unset_ERRNO)(awk_ext_id_t id);
550 
551 	/*
552 	 * All of the functions that return a value from inside gawk
553 	 * (get a parameter, get a global variable, get an array element)
554 	 * behave in the same way.
555 	 *
556 	 * For a function parameter, the return is false if the argument
557 	 * count is out of range, or if the actual parameter does not match
558 	 * what is specified in wanted. In that case,  result->val_type
559 	 * will hold the actual type of what was passed.
560 	 *
561 	 * Similarly for symbol table access to variables and array elements,
562 	 * the return is false if the actual variable or array element does
563 	 * not match what was requested, and result->val_type will hold
564 	 * the actual type.
565 
566 	Table entry is type returned:
567 
568 
569 	                        +-------------------------------------------------------+
570 	                        |                   Type of Actual Value:               |
571 	                        +--------+--------+--------+--------+-------+-----------+
572 	                        | String | Strnum | Number | Regex  | Array | Undefined |
573 	+-----------+-----------+--------+--------+--------+--------+-------+-----------+
574 	|           | String    | String | String | String | String | false | false     |
575 	|           +-----------+--------+--------+--------+--------+-------+-----------+
576 	|           | Strnum    | false  | Strnum | Strnum | false  | false | false     |
577 	|           +-----------+--------+--------+--------+--------+-------+-----------+
578 	|           | Number    | Number | Number | Number | false  | false | false     |
579 	|           +-----------+--------+--------+--------+--------+-------+-----------+
580 	|           | Regex     | false  | false  | false  | Regex  | false | false     |
581 	|           +-----------+--------+--------+--------+--------+-------+-----------+
582 	|   Type    | Array     | false  | false  | false  | false  | Array | false     |
583 	| Requested +-----------+--------+--------+--------+--------+-------+-----------+
584 	|           | Scalar    | Scalar | Scalar | Scalar | Scalar | false | false     |
585 	|           +-----------+--------+--------+--------+--------+-------+-----------+
586 	|           | Undefined | String | Strnum | Number | Regex  | Array | Undefined |
587 	|           +-----------+--------+--------+--------+--------+-------+-----------+
588 	|           | Value     | false  | false  | false  | false  | false | false     |
589 	|           | Cookie    |        |        |        |        |       |           |
590 	+-----------+-----------+--------+--------+--------+--------+-------+-----------+
591 	*/
592 
593 	/* Functions to handle parameters passed to the extension. */
594 
595 	/*
596 	 * Get the count'th parameter, zero-based.
597 	 * Returns false if count is out of range, or if actual parameter
598 	 * does not match what is specified in wanted. In that case,
599 	 * result->val_type is as described above.
600 	 */
601 	awk_bool_t (*api_get_argument)(awk_ext_id_t id, size_t count,
602 					  awk_valtype_t wanted,
603 					  awk_value_t *result);
604 
605 	/*
606 	 * Convert a parameter that was undefined into an array
607 	 * (provide call-by-reference for arrays).  Returns false
608 	 * if count is too big, or if the argument's type is
609 	 * not undefined.
610 	 */
611 	awk_bool_t (*api_set_argument)(awk_ext_id_t id,
612 					size_t count,
613 					awk_array_t array);
614 
615 	/*
616 	 * Symbol table access:
617 	 * 	- Read-only access to special variables (NF, etc.)
618 	 * 	- One special exception: PROCINFO.
619 	 *	- Use sym_update() to change a value, including from UNDEFINED
620 	 *	  to scalar or array.
621 	 */
622 	/*
623 	 * Lookup a variable, fill in value. No messing with the value
624 	 * returned.
625 	 * Returns false if the variable doesn't exist or if the wrong type
626 	 * was requested.  In the latter case, vaule->val_type will have
627 	 * the real type, as described above.
628 	 *
629 	 * 	awk_value_t val;
630 	 * 	if (! api->sym_lookup(id, name, wanted, & val))
631 	 * 		error_code_here();
632 	 *	else {
633 	 *		// safe to use val
634 	 *	}
635 	 */
636 	awk_bool_t (*api_sym_lookup)(awk_ext_id_t id,
637 				const char *name_space,
638 				const char *name,
639 				awk_valtype_t wanted,
640 				awk_value_t *result);
641 
642 	/*
643 	 * Update a value. Adds it to the symbol table if not there.
644 	 * Changing types (scalar <--> array) is not allowed.
645 	 * In fact, using this to update an array is not allowed, either.
646 	 * Such an attempt returns false.
647 	 */
648 	awk_bool_t (*api_sym_update)(awk_ext_id_t id,
649 				const char *name_space,
650 				const char *name,
651 				awk_value_t *value);
652 
653 	/*
654 	 * A ``scalar cookie'' is an opaque handle that provide access
655 	 * to a global variable or array. It is an optimization that
656 	 * avoids looking up variables in gawk's symbol table every time
657 	 * access is needed.
658 	 *
659 	 * This function retrieves the current value of a scalar cookie.
660 	 * Once you have obtained a scalar_cookie using sym_lookup, you can
661 	 * use this function to get its value more efficiently.
662 	 *
663 	 * Return will be false if the value cannot be retrieved.
664 	 *
665 	 * Flow is thus
666 	 *	awk_value_t val;
667 	 * 	awk_scalar_t cookie;
668 	 * 	api->sym_lookup(id, "variable", AWK_SCALAR, & val);	// get the cookie
669 	 *	cookie = val.scalar_cookie;
670 	 *	...
671 	 *	api->sym_lookup_scalar(id, cookie, wanted, & val);	// get the value
672 	 */
673 	awk_bool_t (*api_sym_lookup_scalar)(awk_ext_id_t id,
674 				awk_scalar_t cookie,
675 				awk_valtype_t wanted,
676 				awk_value_t *result);
677 
678 	/*
679 	 * Update the value associated with a scalar cookie.
680 	 * Flow is
681 	 * 	sym_lookup with wanted == AWK_SCALAR
682 	 * 	if returns false
683 	 * 		sym_update with real initial value to install it
684 	 * 		sym_lookup again with AWK_SCALAR
685 	 *	else
686 	 *		use the scalar cookie
687 	 *
688 	 * Return will be false if the new value is not one of
689 	 * AWK_STRING, AWK_NUMBER, AWK_REGEX.
690 	 *
691 	 * Here too, the built-in variables may not be updated.
692 	 */
693 	awk_bool_t (*api_sym_update_scalar)(awk_ext_id_t id,
694 				awk_scalar_t cookie, awk_value_t *value);
695 
696 	/* Cached values */
697 
698 	/*
699 	 * Create a cached string,regex, or numeric value for efficient later
700 	 * assignment. This improves performance when you want to assign
701 	 * the same value to one or more variables repeatedly.  Only
702 	 * AWK_NUMBER, AWK_STRING, AWK_REGEX and AWK_STRNUM values are allowed.
703 	 * Any other type is rejected.  We disallow AWK_UNDEFINED since that
704 	 * case would result in inferior performance.
705 	 */
706 	awk_bool_t (*api_create_value)(awk_ext_id_t id, awk_value_t *value,
707 		    awk_value_cookie_t *result);
708 
709 	/*
710 	 * Release the memory associated with a cookie from api_create_value.
711 	 * Please call this to free memory when the value is no longer needed.
712 	 */
713 	awk_bool_t (*api_release_value)(awk_ext_id_t id, awk_value_cookie_t vc);
714 
715 	/* Array management */
716 
717 	/*
718 	 * Retrieve total number of elements in array.
719 	 * Returns false if some kind of error.
720 	 */
721 	awk_bool_t (*api_get_element_count)(awk_ext_id_t id,
722 			awk_array_t a_cookie, size_t *count);
723 
724 	/*
725 	 * Return the value of an element - read only!
726 	 * Use set_array_element() to change it.
727 	 * Behavior for value and return is same as for api_get_argument
728 	 * and sym_lookup.
729 	 */
730 	awk_bool_t (*api_get_array_element)(awk_ext_id_t id,
731 			awk_array_t a_cookie,
732 			const awk_value_t *const index,
733 			awk_valtype_t wanted,
734 			awk_value_t *result);
735 
736 	/*
737 	 * Change (or create) element in existing array with
738 	 * index and value.
739 	 *
740 	 * ARGV and ENVIRON may not be updated.
741 	 */
742 	awk_bool_t (*api_set_array_element)(awk_ext_id_t id, awk_array_t a_cookie,
743 					const awk_value_t *const index,
744 					const awk_value_t *const value);
745 
746 	/*
747 	 * Remove the element with the given index.
748 	 * Returns true if removed or false if element did not exist.
749 	 */
750 	awk_bool_t (*api_del_array_element)(awk_ext_id_t id,
751 			awk_array_t a_cookie, const awk_value_t* const index);
752 
753 	/* Create a new array cookie to which elements may be added. */
754 	awk_array_t (*api_create_array)(awk_ext_id_t id);
755 
756 	/* Clear out an array. */
757 	awk_bool_t (*api_clear_array)(awk_ext_id_t id, awk_array_t a_cookie);
758 
759 	/*
760 	 * Flatten out an array with type conversions as requested.
761 	 * This supersedes the earlier api_flatten_array function that
762 	 * did not allow the caller to specify the requested types.
763 	 * (That API is still available as a macro, defined below.)
764 	 */
765 	awk_bool_t (*api_flatten_array_typed)(awk_ext_id_t id,
766 			awk_array_t a_cookie,
767 			awk_flat_array_t **data,
768 			awk_valtype_t index_type, awk_valtype_t value_type);
769 
770 	/* When done, delete any marked elements, release the memory. */
771 	awk_bool_t (*api_release_flattened_array)(awk_ext_id_t id,
772 			awk_array_t a_cookie,
773 			awk_flat_array_t *data);
774 
775 	/*
776 	 * Hooks to provide access to gawk's memory allocation functions.
777 	 * This ensures that memory passed between gawk and the extension
778 	 * is allocated and released by the same library.
779 	 */
780 	void *(*api_malloc)(size_t size);
781 	void *(*api_calloc)(size_t nmemb, size_t size);
782 	void *(*api_realloc)(void *ptr, size_t size);
783 	void (*api_free)(void *ptr);
784 
785 	/*
786 	 * Obsolete function, should not be used. It remains only
787 	 * for binary compatibility.  Any value it returns should be
788 	 * freed via api_free.
789 	 */
790 	void *(*api_get_mpfr)(awk_ext_id_t id);
791 
792 	/*
793 	 * Obsolete function, should not be used. It remains only
794 	 * for binary compatibility.  Any value it returns should be
795 	 * freed via api_free.
796 	 */
797 	void *(*api_get_mpz)(awk_ext_id_t id);
798 
799         /*
800 	 * Look up a file.  If the name is NULL or name_len is 0, it returns
801 	 * data for the currently open input file corresponding to FILENAME
802 	 * (and it will not access the filetype argument, so that may be
803 	 * undefined).
804 	 *
805 	 * If the file is not already open, try to open it.
806 	 *
807 	 * The "filetype" argument should be one of:
808 	 *
809 	 *    ">", ">>", "<", "|>", "|<", and "|&"
810 	 *
811 	 * If the file is not already open, and the fd argument is non-negative,
812 	 * gawk will use that file descriptor instead of opening the file
813 	 * in the usual way.
814 	 *
815 	 * If the fd is non-negative, but the file exists already, gawk
816 	 * ignores the fd and returns the existing file.  It is the caller's
817 	 * responsibility to notice that the fd in the returned
818 	 * awk_input_buf_t does not match the requested value.
819 	 *
820 	 * Note that supplying a file descriptor is currently NOT supported
821 	 * for pipes. It should work for input, output, append, and two-way
822 	 * (coprocess) sockets.  If the filetype is two-way, we assume that
823 	 * it is a socket!
824 	 *
825 	 * Note that in the two-way case, the input and output file descriptors
826 	 * may differ.  To check for success, one must check that either of
827 	 * them matches.
828 	 *
829 	 * ibufp and obufp point at gawk's internal copies of the
830 	 * awk_input_buf_t and awk_output_t associated with the open
831 	 * file.  Treat these data structures as read-only!
832 	 */
833 	awk_bool_t (*api_get_file)(awk_ext_id_t id,
834 			const char *name,
835 			size_t name_len,
836 			const char *filetype,
837 			int fd,
838 			/*
839 			 * Return values (on success, one or both should
840 			 * be non-NULL):
841 			 */
842 			const awk_input_buf_t **ibufp,
843 			const awk_output_buf_t **obufp);
844 } gawk_api_t;
845 
846 #ifndef GAWK	/* these are not for the gawk code itself! */
847 /*
848  * Use these if you want to define "global" variables named api
849  * and ext_id to make the code a little easier to read.
850  * See the sample boilerplate code, below.
851  */
852 #define do_lint		(api->do_flags[gawk_do_lint])
853 #define do_traditional	(api->do_flags[gawk_do_traditional])
854 #define do_profile	(api->do_flags[gawk_do_profile])
855 #define do_sandbox	(api->do_flags[gawk_do_sandbox])
856 #define do_debug	(api->do_flags[gawk_do_debug])
857 #define do_mpfr		(api->do_flags[gawk_do_mpfr])
858 
859 #define get_argument(count, wanted, result) \
860 	(api->api_get_argument(ext_id, count, wanted, result))
861 #define set_argument(count, new_array) \
862 	(api->api_set_argument(ext_id, count, new_array))
863 
864 #define fatal		api->api_fatal
865 #define nonfatal	api->api_nonfatal
866 #define warning		api->api_warning
867 #define lintwarn	api->api_lintwarn
868 
869 #define register_input_parser(parser)	(api->api_register_input_parser(ext_id, parser))
870 #define register_output_wrapper(wrapper) (api->api_register_output_wrapper(ext_id, wrapper))
871 #define register_two_way_processor(processor) \
872 	(api->api_register_two_way_processor(ext_id, processor))
873 
874 #define update_ERRNO_int(e)	(api->api_update_ERRNO_int(ext_id, e))
875 #define update_ERRNO_string(str) \
876 	(api->api_update_ERRNO_string(ext_id, str))
877 #define unset_ERRNO()	(api->api_unset_ERRNO(ext_id))
878 
879 #define add_ext_func(ns, func)	(api->api_add_ext_func(ext_id, ns, func))
880 #define awk_atexit(funcp, arg0)	(api->api_awk_atexit(ext_id, funcp, arg0))
881 
882 #define sym_lookup(name, wanted, result) \
883 	sym_lookup_ns("", name, wanted, result)
884 #define sym_update(name, value) \
885 	sym_update_ns("", name, value)
886 
887 #define sym_lookup_ns(name_space, name, wanted, result) \
888 	(api->api_sym_lookup(ext_id, name_space, name, wanted, result))
889 #define sym_update_ns(name_space, name, value) \
890 	(api->api_sym_update(ext_id, name_space, name, value))
891 
892 #define sym_lookup_scalar(scalar_cookie, wanted, result) \
893 	(api->api_sym_lookup_scalar(ext_id, scalar_cookie, wanted, result))
894 #define sym_update_scalar(scalar_cookie, value) \
895 	(api->api_sym_update_scalar)(ext_id, scalar_cookie, value)
896 
897 #define get_array_element(array, index, wanted, result) \
898 	(api->api_get_array_element(ext_id, array, index, wanted, result))
899 
900 #define set_array_element(array, index, value) \
901 	(api->api_set_array_element(ext_id, array, index, value))
902 
903 #define set_array_element_by_elem(array, elem) \
904 	(api->api_set_array_element(ext_id, array, & (elem)->index, & (elem)->value))
905 
906 #define del_array_element(array, index) \
907 	(api->api_del_array_element(ext_id, array, index))
908 
909 #define get_element_count(array, count_p) \
910 	(api->api_get_element_count(ext_id, array, count_p))
911 
912 #define create_array()		(api->api_create_array(ext_id))
913 
914 #define clear_array(array)	(api->api_clear_array(ext_id, array))
915 
916 #define flatten_array_typed(array, data, index_type, value_type) \
917 	(api->api_flatten_array_typed(ext_id, array, data, index_type, value_type))
918 
919 #define flatten_array(array, data) \
920 	flatten_array_typed(array, data, AWK_STRING, AWK_UNDEFINED)
921 
922 #define release_flattened_array(array, data) \
923 	(api->api_release_flattened_array(ext_id, array, data))
924 
925 #define gawk_malloc(size)		(api->api_malloc(size))
926 #define gawk_calloc(nmemb, size)	(api->api_calloc(nmemb, size))
927 #define gawk_realloc(ptr, size)		(api->api_realloc(ptr, size))
928 #define gawk_free(ptr)			(api->api_free(ptr))
929 
930 #define create_value(value, result) \
931 	(api->api_create_value(ext_id, value,result))
932 
933 #define release_value(value) \
934 	(api->api_release_value(ext_id, value))
935 
936 #define get_file(name, namelen, filetype, fd, ibuf, obuf) \
937 	(api->api_get_file(ext_id, name, namelen, filetype, fd, ibuf, obuf))
938 
939 /* These two are obsolete and should not be used. */
940 #define get_mpfr_ptr() (api->api_get_mpfr(ext_id))
941 #define get_mpz_ptr() (api->api_get_mpz(ext_id))
942 
943 #define register_ext_version(version) \
944 	(api->api_register_ext_version(ext_id, version))
945 
946 #define emalloc(pointer, type, size, message) \
947 	do { \
948 		if ((pointer = (type) gawk_malloc(size)) == 0) \
949 			fatal(ext_id, "%s: malloc of %d bytes failed", message, size); \
950 	} while(0)
951 
952 #define ezalloc(pointer, type, size, message) \
953 	do { \
954 		if ((pointer = (type) gawk_calloc(1, size)) == 0) \
955 			fatal(ext_id, "%s: calloc of %d bytes failed", message, size); \
956 	} while(0)
957 
958 #define erealloc(pointer, type, size, message) \
959 	do { \
960 		if ((pointer = (type) gawk_realloc(pointer, size)) == 0) \
961 			fatal(ext_id, "%s: realloc of %d bytes failed", message, size); \
962 	} while(0)
963 
964 /* Constructor functions */
965 
966 /* r_make_string_type --- make a string or strnum or regexp value in result from the passed-in string */
967 
968 static inline awk_value_t *
r_make_string_type(const gawk_api_t * api,awk_ext_id_t ext_id,const char * string,size_t length,awk_bool_t duplicate,awk_value_t * result,awk_valtype_t val_type)969 r_make_string_type(const gawk_api_t *api,	/* needed for emalloc */
970 		   awk_ext_id_t ext_id,		/* ditto */
971 		   const char *string,
972 		   size_t length,
973 		   awk_bool_t duplicate,
974 		   awk_value_t *result,
975 		   awk_valtype_t val_type)
976 {
977 	char *cp = NULL;
978 
979 	memset(result, 0, sizeof(*result));
980 
981 	result->val_type = val_type;
982 	result->str_value.len = length;
983 
984 	if (duplicate) {
985 		emalloc(cp, char *, length + 1, "r_make_string");
986 		memcpy(cp, string, length);
987 		cp[length] = '\0';
988 		result->str_value.str = cp;
989 	} else {
990 		result->str_value.str = (char *) string;
991 	}
992 
993 	return result;
994 }
995 
996 /* r_make_string --- make a string value in result from the passed-in string */
997 
998 static inline awk_value_t *
r_make_string(const gawk_api_t * api,awk_ext_id_t ext_id,const char * string,size_t length,awk_bool_t duplicate,awk_value_t * result)999 r_make_string(const gawk_api_t *api,	/* needed for emalloc */
1000 	      awk_ext_id_t ext_id,	/* ditto */
1001 	      const char *string,
1002 	      size_t length,
1003 	      awk_bool_t duplicate,
1004 	      awk_value_t *result)
1005 {
1006 	return r_make_string_type(api, ext_id, string, length, duplicate, result, AWK_STRING);
1007 }
1008 
1009 #define make_const_string(str, len, result)	r_make_string(api, ext_id, str, len, awk_true, result)
1010 #define make_malloced_string(str, len, result)	r_make_string(api, ext_id, str, len, awk_false, result)
1011 
1012 #define make_const_regex(str, len, result)	r_make_string_type(api, ext_id, str, len, awk_true, result, AWK_REGEX)
1013 #define make_malloced_regex(str, len, result)	r_make_string_type(api, ext_id, str, len, awk_false, result, AWK_REGEX)
1014 
1015 /*
1016  * Note: The caller may not create a STRNUM, but it can create a string that is
1017  * flagged as user input that MAY be a STRNUM. Gawk will decide whether it's a
1018  * STRNUM or a string by checking whether the string is numeric.
1019  */
1020 #define make_const_user_input(str, len, result)	r_make_string_type(api, ext_id, str, len, 1, result, AWK_STRNUM)
1021 #define make_malloced_user_input(str, len, result)	r_make_string_type(api, ext_id, str, len, 0, result, AWK_STRNUM)
1022 
1023 /* make_null_string --- make a null string value */
1024 
1025 static inline awk_value_t *
make_null_string(awk_value_t * result)1026 make_null_string(awk_value_t *result)
1027 {
1028 	memset(result, 0, sizeof(*result));
1029 	result->val_type = AWK_UNDEFINED;
1030 
1031 	return result;
1032 }
1033 
1034 /* make_number --- make a number value in result */
1035 
1036 static inline awk_value_t *
make_number(double num,awk_value_t * result)1037 make_number(double num, awk_value_t *result)
1038 {
1039 	result->val_type = AWK_NUMBER;
1040 	result->num_value = num;
1041 	result->num_type = AWK_NUMBER_TYPE_DOUBLE;
1042 	return result;
1043 }
1044 
1045 /*
1046  * make_number_mpz --- make an mpz number value in result.
1047  * The mpz_ptr must be from a call to get_mpz_ptr.
1048  */
1049 
1050 static inline awk_value_t *
make_number_mpz(void * mpz_ptr,awk_value_t * result)1051 make_number_mpz(void *mpz_ptr, awk_value_t *result)
1052 {
1053 	result->val_type = AWK_NUMBER;
1054 	result->num_type = AWK_NUMBER_TYPE_MPZ;
1055 	result->num_ptr = mpz_ptr;
1056 	return result;
1057 }
1058 
1059 /*
1060  * make_number_mpfr --- make an mpfr number value in result.
1061  * The mpfr_ptr must be from a call to get_mpfr_ptr.
1062  */
1063 
1064 static inline awk_value_t *
make_number_mpfr(void * mpfr_ptr,awk_value_t * result)1065 make_number_mpfr(void *mpfr_ptr, awk_value_t *result)
1066 {
1067 	result->val_type = AWK_NUMBER;
1068 	result->num_type = AWK_NUMBER_TYPE_MPFR;
1069 	result->num_ptr = mpfr_ptr;
1070 	return result;
1071 }
1072 
1073 
1074 /*
1075  * Each extension must define a function with this prototype:
1076  *
1077  *	int dl_load(gawk_api_t *api_p, awk_ext_id_t id)
1078  *
1079  * The return value should be zero on failure and non-zero on success.
1080  *
1081  * For the macros to work, the function should save api_p in a global
1082  * variable named 'api' and save id in a global variable named 'ext_id'.
1083  * In addition, a global function pointer named 'init_func' should be
1084  * defined and set to either NULL or an initialization function that
1085  * returns non-zero on success and zero upon failure.
1086  */
1087 
1088 extern int dl_load(const gawk_api_t *const api_p, awk_ext_id_t id);
1089 
1090 #if 0
1091 /* Boilerplate code: */
1092 int plugin_is_GPL_compatible;
1093 
1094 static gawk_api_t *const api;
1095 static awk_ext_id_t ext_id;
1096 static const char *ext_version = NULL; /* or ... = "some string" */
1097 
1098 static awk_ext_func_t func_table[] = {
1099 	{ "name", do_name, 1 },
1100 	/* ... */
1101 };
1102 
1103 /* EITHER: */
1104 
1105 static awk_bool_t (*init_func)(void) = NULL;
1106 
1107 /* OR: */
1108 
1109 static awk_bool_t
1110 init_my_extension(void)
1111 {
1112 	...
1113 }
1114 
1115 static awk_bool_t (*init_func)(void) = init_my_extension;
1116 
1117 dl_load_func(func_table, some_name, "name_space_in_quotes")
1118 #endif
1119 
1120 #define dl_load_func(func_table, extension, name_space) \
1121 int dl_load(const gawk_api_t *const api_p, awk_ext_id_t id)  \
1122 { \
1123 	size_t i, j; \
1124 	int errors = 0; \
1125 \
1126 	api = api_p; \
1127 	ext_id = (void **) id; \
1128 \
1129 	if (api->major_version != GAWK_API_MAJOR_VERSION \
1130 	    || api->minor_version < GAWK_API_MINOR_VERSION) { \
1131 		fprintf(stderr, #extension ": version mismatch with gawk!\n"); \
1132 		fprintf(stderr, "\tmy version (API %d.%d), gawk version (API %d.%d)\n", \
1133 			GAWK_API_MAJOR_VERSION, GAWK_API_MINOR_VERSION, \
1134 			api->major_version, api->minor_version); \
1135 		exit(1); \
1136 	} \
1137 \
1138 	check_mpfr_version(extension); \
1139 \
1140 	/* load functions */ \
1141 	for (i = 0, j = sizeof(func_table) / sizeof(func_table[0]); i < j; i++) { \
1142 		if (func_table[i].name == NULL) \
1143 			break; \
1144 		if (! add_ext_func(name_space, & func_table[i])) { \
1145 			warning(ext_id, #extension ": could not add %s", \
1146 					func_table[i].name); \
1147 			errors++; \
1148 		} \
1149 	} \
1150 \
1151 	if (init_func != NULL) { \
1152 		if (! init_func()) { \
1153 			warning(ext_id, #extension ": initialization function failed"); \
1154 			errors++; \
1155 		} \
1156 	} \
1157 \
1158 	if (ext_version != NULL) \
1159 		register_ext_version(ext_version); \
1160 \
1161 	return (errors == 0); \
1162 }
1163 
1164 #if defined __GNU_MP_VERSION && defined MPFR_VERSION_MAJOR
1165 #define check_mpfr_version(extension) do { \
1166 	if (api->gmp_major_version != __GNU_MP_VERSION \
1167 	    || api->gmp_minor_version < __GNU_MP_VERSION_MINOR) { \
1168 		fprintf(stderr, #extension ": GMP version mismatch with gawk!\n"); \
1169 		fprintf(stderr, "\tmy version (%d, %d), gawk version (%d, %d)\n", \
1170 			__GNU_MP_VERSION, __GNU_MP_VERSION_MINOR, \
1171 			api->gmp_major_version, api->gmp_minor_version); \
1172 		exit(1); \
1173 	} \
1174 	if (api->mpfr_major_version != MPFR_VERSION_MAJOR \
1175 	    || api->mpfr_minor_version < MPFR_VERSION_MINOR) { \
1176 		fprintf(stderr, #extension ": MPFR version mismatch with gawk!\n"); \
1177 		fprintf(stderr, "\tmy version (%d, %d), gawk version (%d, %d)\n", \
1178 			MPFR_VERSION_MAJOR, MPFR_VERSION_MINOR, \
1179 			api->mpfr_major_version, api->mpfr_minor_version); \
1180 		exit(1); \
1181 	} \
1182 } while (0)
1183 #else
1184 #define check_mpfr_version(extension) /* nothing */
1185 #endif
1186 
1187 #endif /* GAWK */
1188 
1189 #ifdef __cplusplus
1190 }
1191 #endif	/* C++ */
1192 
1193 #endif /* _GAWK_API_H */
1194