1 /*
2 * gawkapi.h -- Definitions for use by extension functions calling into gawk.
3 */
4
5 /*
6 * Copyright (C) 2012-2019, 2021 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 */
25
26 /*
27 * The following types and/or macros and/or functions are referenced
28 * in this file. For correct use, you must therefore include the
29 * corresponding standard header file BEFORE including this file.
30 *
31 * FILE - <stdio.h>
32 * NULL - <stddef.h>
33 * memset(), memcpy() - <string.h>
34 * size_t - <sys/types.h>
35 * struct stat - <sys/stat.h>
36 *
37 * Due to portability concerns, especially to systems that are not
38 * fully standards-compliant, it is your responsibility to include
39 * the correct files in the correct way. This requirement is necessary
40 * in order to keep this file clean, instead of becoming a portability
41 * hodge-podge as can be seen in the gawk source code.
42 *
43 * To pass reasonable integer values for ERRNO, you will also need to
44 * include <errno.h>.
45 */
46
47 #ifndef _GAWK_API_H
48 #define _GAWK_API_H
49
50 /*
51 * General introduction:
52 *
53 * This API purposely restricts itself to ISO C 90 features. In particular, no
54 * bool, no // comments, no use of the restrict keyword, or anything else,
55 * in order to provide maximal portability.
56 *
57 * Exception: the "inline" keyword is used below in the "constructor"
58 * functions. If your compiler doesn't support it, you should either
59 * -Dinline='' on your command line, or use the autotools and include a
60 * config.h in your extensions.
61 *
62 * Additional important information:
63 *
64 * 1. ALL string values in awk_value_t objects need to come from api_malloc().
65 * Gawk will handle releasing the storage if necessary. This is slightly
66 * awkward, in that you can't take an awk_value_t that you got from gawk
67 * and reuse it directly, even for something that is conceptually pass
68 * by value.
69 *
70 * 2. Due to gawk internals, after using sym_update() to install an array
71 * into gawk, you have to retrieve the array cookie from the value
72 * passed in to sym_update(). Like so:
73 *
74 * new_array = create_array();
75 * val.val_type = AWK_ARRAY;
76 * val.array_cookie = new_array;
77 * sym_update("array", & val); // install array in the symbol table
78 *
79 * new_array = val.array_cookie; // MUST DO THIS
80 *
81 * // fill in new array with lots of subscripts and values
82 *
83 * Similarly, if installing a new array as a subarray of an existing
84 * array, you must add the new array to its parent before adding any
85 * elements to it.
86 *
87 * You must also retrieve the value of the array_cookie after the call
88 * to set_element().
89 *
90 * Thus, the correct way to build an array is to work "top down".
91 * Create the array, and immediately install it in gawk's symbol table
92 * using sym_update(), or install it as an element in a previously
93 * existing array using set_element().
94 *
95 * Thus the new array must ultimately be rooted in a global symbol. This is
96 * necessary before installing any subarrays in it, due to gawk's
97 * internal implementation. Strictly speaking, this is required only
98 * for arrays that will have subarrays as elements; however it is
99 * a good idea to always do this. This restriction may be relaxed
100 * in a subsequent revision of the API.
101 */
102
103 /* Allow use in C++ code. */
104 #ifdef __cplusplus
105 extern "C" {
106 #endif
107
108 /* This is used to keep extensions from modifying certain fields in some structs. */
109 #ifdef GAWK
110 #define awk_const
111 #else
112 #define awk_const const
113 #endif
114
115 typedef enum awk_bool {
116 awk_false = 0,
117 awk_true
118 } awk_bool_t; /* we don't use <stdbool.h> on purpose */
119
120 /*
121 * If an input parser would like to specify the field positions in the input
122 * record, it may populate an awk_fieldwidth_info_t structure to indicate
123 * the location of each field. The use_chars boolean controls whether the
124 * field lengths are specified in terms of bytes or potentially multi-byte
125 * characters. Performance will be better if the values are supplied in
126 * terms of bytes. The fields[0].skip value indicates how many bytes (or
127 * characters) to skip before $1, and fields[0].len is the length of $1, etc.
128 */
129
130 typedef struct {
131 awk_bool_t use_chars; /* false ==> use bytes */
132 size_t nf;
133 struct awk_field_info {
134 size_t skip; /* amount to skip before field starts */
135 size_t len; /* length of field */
136 } fields[1]; /* actual dimension should be nf */
137 } awk_fieldwidth_info_t;
138
139 /*
140 * This macro calculates the total struct size needed. This is useful when
141 * calling malloc or realloc.
142 */
143 #define awk_fieldwidth_info_size(NF) (sizeof(awk_fieldwidth_info_t) + \
144 (((NF)-1) * sizeof(struct awk_field_info)))
145
146 /* The information about input files that input parsers need to know: */
147 typedef struct awk_input {
148 const char *name; /* filename */
149 int fd; /* file descriptor */
150 #define INVALID_HANDLE (-1)
151 void *opaque; /* private data for input parsers */
152
153 /*
154 * The get_record function is called to read the next record of data.
155 *
156 * It should return the length of the input record or EOF, and it
157 * should set *out to point to the contents of $0. The rt_start
158 * and rt_len arguments should be used to return RT to gawk.
159 * If EOF is not returned, the parser must set *rt_len (and
160 * *rt_start if *rt_len is non-zero).
161 *
162 * Note that gawk will make a copy of the record in *out, so the
163 * parser is responsible for managing its own memory buffer.
164 * Similarly, gawk will make its own copy of RT, so the parser
165 * is also responsible for managing this memory.
166 *
167 * It is guaranteed that errcode is a valid pointer, so there is
168 * no need to test for a NULL value. Gawk sets *errcode to 0,
169 * so there is no need to set it unless an error occurs.
170 *
171 * If an error does occur, the function should return EOF and set
172 * *errcode to a positive value. In that case, if *errcode is greater
173 * than zero, gawk will automatically update the ERRNO variable based
174 * on the value of *errcode (e.g., setting *errcode = errno should do
175 * the right thing).
176 *
177 * If field_width is non-NULL, then *field_width will be initialized
178 * to NULL, and the function may set it to point to a structure
179 * supplying field width information to override the default
180 * gawk field parsing mechanism. Note that this structure will not
181 * be copied by gawk; it must persist at least until the next call
182 * to get_record or close_func. Note also that field_width will
183 * be NULL when getline is assigning the results to a variable, thus
184 * field parsing is not needed.
185 */
186 int (*get_record)(char **out, struct awk_input *iobuf, int *errcode,
187 char **rt_start, size_t *rt_len,
188 const awk_fieldwidth_info_t **field_width);
189
190 /*
191 * This replaces the POSIX read() system call. Use it if you want to
192 * manage reading raw bytes yourself, and let gawk parse the record.
193 */
194 ssize_t (*read_func)(int, void *, size_t);
195
196 /*
197 * The close_func is called to allow the parser to free private data.
198 * Gawk itself will close the fd unless close_func first sets it to
199 * INVALID_HANDLE.
200 */
201 void (*close_func)(struct awk_input *iobuf);
202
203 /* put last, for alignment. bleah */
204 struct stat sbuf; /* stat buf */
205
206 } awk_input_buf_t;
207
208 typedef struct awk_input_parser {
209 const char *name; /* name of parser */
210
211 /*
212 * The can_take_file function should return true if the parser
213 * would like to parse this file. It should not change any gawk
214 * state!
215 */
216 awk_bool_t (*can_take_file)(const awk_input_buf_t *iobuf);
217
218 /*
219 * If this parser is selected, then take_control_of will be called.
220 * It can assume that a previous call to can_take_file was successful,
221 * and no gawk state has changed since that call. It should populate
222 * the awk_input_buf_t's get_record, close_func, and opaque values as needed.
223 * It should return true if successful.
224 */
225 awk_bool_t (*take_control_of)(awk_input_buf_t *iobuf);
226
227 awk_const struct awk_input_parser *awk_const next; /* for use by gawk */
228 } awk_input_parser_t;
229
230 /*
231 * Similar for output wrapper.
232 */
233
234 /* First the data structure */
235 typedef struct awk_output_buf {
236 const char *name; /* name of output file */
237 const char *mode; /* mode argument to fopen */
238 FILE *fp; /* stdio file pointer */
239 awk_bool_t redirected; /* true if a wrapper is active */
240 void *opaque; /* for use by output wrapper */
241
242 /*
243 * Replacement functions for I/O. Just like the regular
244 * versions but also take the opaque pointer argument.
245 */
246 size_t (*gawk_fwrite)(const void *buf, size_t size, size_t count,
247 FILE *fp, void *opaque);
248 int (*gawk_fflush)(FILE *fp, void *opaque);
249 int (*gawk_ferror)(FILE *fp, void *opaque);
250 int (*gawk_fclose)(FILE *fp, void *opaque);
251 } awk_output_buf_t;
252
253 /* Next the output wrapper registered with gawk */
254 typedef struct awk_output_wrapper {
255 const char *name; /* name of the wrapper */
256
257 /*
258 * The can_take_file function should return true if the wrapper
259 * would like to process this file. It should not change any gawk
260 * state!
261 */
262 awk_bool_t (*can_take_file)(const awk_output_buf_t *outbuf);
263
264 /*
265 * If this wrapper is selected, then take_control_of will be called.
266 * It can assume that a previous call to can_take_file was successful,
267 * and no gawk state has changed since that call. It should populate
268 * the awk_output_buf_t function pointers and opaque pointer as needed.
269 * It should return true if successful.
270 */
271 awk_bool_t (*take_control_of)(awk_output_buf_t *outbuf);
272
273 awk_const struct awk_output_wrapper *awk_const next; /* for use by gawk */
274 } awk_output_wrapper_t;
275
276 /* A two-way processor combines an input parser and an output wrapper. */
277 typedef struct awk_two_way_processor {
278 const char *name; /* name of the two-way processor */
279
280 /*
281 * The can_take_file function should return true if the two-way
282 * processor would like to parse this file. It should not change
283 * any gawk state!
284 */
285 awk_bool_t (*can_take_two_way)(const char *name);
286
287 /*
288 * If this processor is selected, then take_control_of will be called.
289 * It can assume that a previous call to can_take_file was successful,
290 * and no gawk state has changed since that call. It should populate
291 * the awk_input_buf_t and awk_otuput_buf_t structures as needed.
292 * It should return true if successful.
293 */
294 awk_bool_t (*take_control_of)(const char *name, awk_input_buf_t *inbuf,
295 awk_output_buf_t *outbuf);
296
297 awk_const struct awk_two_way_processor *awk_const next; /* for use by gawk */
298 } awk_two_way_processor_t;
299
300 #define gawk_api_major_version 3
301 #define gawk_api_minor_version 1
302
303 /* Current version of the API. */
304 enum {
305 GAWK_API_MAJOR_VERSION = gawk_api_major_version,
306 GAWK_API_MINOR_VERSION = gawk_api_minor_version
307 };
308
309 /* A number of typedefs related to different types of values. */
310
311 /*
312 * A mutable string. Gawk owns the memory pointed to if it supplied
313 * the value. Otherwise, it takes ownership of the memory pointed to.
314 *
315 * The API deals exclusively with regular chars; these strings may
316 * be multibyte encoded in the current locale's encoding and character
317 * set. Gawk will convert internally to wide characters if necessary.
318 *
319 * Note that a string provided by gawk will always be terminated
320 * with a '\0' character.
321 */
322 typedef struct awk_string {
323 char *str; /* data */
324 size_t len; /* length thereof, in chars */
325 } awk_string_t;
326
327 enum AWK_NUMBER_TYPE {
328 AWK_NUMBER_TYPE_DOUBLE,
329 AWK_NUMBER_TYPE_MPFR,
330 AWK_NUMBER_TYPE_MPZ
331 };
332
333 /*
334 * When type is AWK_NUMBER_MPFR or AWK_NUMBER_MPZ, the memory pointed to
335 * by the ptr member belongs to gawk if it came from gawk. Otherwise the
336 * memory belongs to the extension and gawk copies it when its received.
337 * See the manual for further discussion.
338 */
339
340 typedef struct awk_number {
341 double d; /* always populated in data received from gawk */
342 enum AWK_NUMBER_TYPE type;
343 void *ptr; /* either NULL or mpfr_ptr or mpz_ptr */
344 } awk_number_t;
345
346 /* Arrays are represented as an opaque type. */
347 typedef void *awk_array_t;
348
349 /* Scalars can be represented as an opaque type. */
350 typedef void *awk_scalar_t;
351
352 /* Any value can be stored as a cookie. */
353 typedef void *awk_value_cookie_t;
354
355 /*
356 * This tag defines the type of a value.
357 *
358 * Values are associated with regular variables and with array elements.
359 * Since arrays can be multidimensional (as can regular variables)
360 * it's valid to have a "value" that is actually an array.
361 */
362 typedef enum {
363 AWK_UNDEFINED,
364 AWK_NUMBER,
365 AWK_STRING,
366 AWK_REGEX,
367 AWK_STRNUM,
368 AWK_ARRAY,
369 AWK_SCALAR, /* opaque access to a variable */
370 AWK_VALUE_COOKIE /* for updating a previously created value */
371 } awk_valtype_t;
372
373 /*
374 * An awk value. The val_type tag indicates what
375 * is in the union.
376 */
377 typedef struct awk_value {
378 awk_valtype_t val_type;
379 union {
380 awk_string_t s;
381 awk_number_t n;
382 awk_array_t a;
383 awk_scalar_t scl;
384 awk_value_cookie_t vc;
385 } u;
386 #define str_value u.s
387 #define strnum_value str_value
388 #define regex_value str_value
389 #define num_value u.n.d
390 #define num_type u.n.type
391 #define num_ptr u.n.ptr
392 #define array_cookie u.a
393 #define scalar_cookie u.scl
394 #define value_cookie u.vc
395 } awk_value_t;
396
397 /*
398 * A "flattened" array element. Gawk produces an array of these
399 * inside the awk_flat_array_t.
400 * ALL memory pointed to belongs to gawk. Individual elements may
401 * be marked for deletion. New elements must be added individually,
402 * one at a time, using the separate API for that purpose.
403 */
404
405 typedef struct awk_element {
406 /* convenience linked list pointer, not used by gawk */
407 struct awk_element *next;
408 enum {
409 AWK_ELEMENT_DEFAULT = 0, /* set by gawk */
410 AWK_ELEMENT_DELETE = 1 /* set by extension if
411 should be deleted */
412 } flags;
413 awk_value_t index;
414 awk_value_t value;
415 } awk_element_t;
416
417 /*
418 * A "flattened" array. See the description above for how
419 * to use the elements contained herein.
420 */
421 typedef struct awk_flat_array {
422 awk_const void *awk_const opaque1; /* private data for use by gawk */
423 awk_const void *awk_const opaque2; /* private data for use by gawk */
424 awk_const size_t count; /* how many elements */
425 awk_element_t elements[1]; /* will be extended */
426 } awk_flat_array_t;
427
428 /*
429 * A record describing an extension function. Upon being
430 * loaded, the extension should pass in one of these to gawk for
431 * each C function.
432 *
433 * Each called function must fill in the result with either a scalar
434 * (number, string, or regex). Gawk takes ownership of any string memory.
435 *
436 * The called function must return the value of `result'.
437 * This is for the convenience of the calling code inside gawk.
438 *
439 * Each extension function may decide what to do if the number of
440 * arguments isn't what it expected. Following awk functions, it
441 * is likely OK to ignore extra arguments.
442 *
443 * 'min_required_args' indicates how many arguments MUST be passed.
444 * The API will throw a fatal error if not enough are passed.
445 *
446 * 'max_expected_args' is more benign; if more than that are passed,
447 * the API prints a lint message (IFF lint is enabled, of course).
448 *
449 * In any case, the extension function itself need not compare the
450 * actual number of arguments passed to those two values if it does
451 * not want to.
452 */
453 typedef struct awk_ext_func {
454 const char *name;
455 awk_value_t *(*const function)(int num_actual_args,
456 awk_value_t *result,
457 struct awk_ext_func *finfo);
458 const size_t max_expected_args;
459 const size_t min_required_args;
460 awk_bool_t suppress_lint;
461 void *data; /* opaque pointer to any extra state */
462 } awk_ext_func_t;
463
464 typedef void *awk_ext_id_t; /* opaque type for extension id */
465
466 /*
467 * The API into gawk. Lots of functions here. We hope that they are
468 * logically organized.
469 *
470 * !!! If you make any changes to this structure, please remember to bump !!!
471 * !!! gawk_api_major_version and/or gawk_api_minor_version. !!!
472 */
473 typedef struct gawk_api {
474 /* First, data fields. */
475
476 /* These are what gawk thinks the API version is. */
477 awk_const int major_version;
478 awk_const int minor_version;
479
480 /* GMP/MPFR versions, if extended-precision is available */
481 awk_const int gmp_major_version;
482 awk_const int gmp_minor_version;
483 awk_const int mpfr_major_version;
484 awk_const int mpfr_minor_version;
485
486 /*
487 * These can change on the fly as things happen within gawk.
488 * Currently only do_lint is prone to change, but we reserve
489 * the right to allow the others to do so also.
490 */
491 #define DO_FLAGS_SIZE 6
492 awk_const int do_flags[DO_FLAGS_SIZE];
493 /* Use these as indices into do_flags[] array to check the values */
494 #define gawk_do_lint 0
495 #define gawk_do_traditional 1
496 #define gawk_do_profile 2
497 #define gawk_do_sandbox 3
498 #define gawk_do_debug 4
499 #define gawk_do_mpfr 5
500
501 /* Next, registration functions: */
502
503 /*
504 * Add a function to the interpreter, returns true upon success.
505 * Gawk does not modify what func points to, but the extension
506 * function itself receives this pointer and can modify what it
507 * points to, thus it's not const.
508 */
509 awk_bool_t (*api_add_ext_func)(awk_ext_id_t id, const char *name_space,
510 awk_ext_func_t *func);
511
512 /* Register an input parser; for opening files read-only */
513 void (*api_register_input_parser)(awk_ext_id_t id,
514 awk_input_parser_t *input_parser);
515
516 /* Register an output wrapper, for writing files */
517 void (*api_register_output_wrapper)(awk_ext_id_t id,
518 awk_output_wrapper_t *output_wrapper);
519
520 /* Register a processor for two way I/O */
521 void (*api_register_two_way_processor)(awk_ext_id_t id,
522 awk_two_way_processor_t *two_way_processor);
523
524 /*
525 * Add an exit call back.
526 *
527 * arg0 is a private data pointer for use by the extension;
528 * gawk saves it and passes it into the function pointed
529 * to by funcp at exit.
530 *
531 * Exit callback functions are called in LIFO order.
532 */
533 void (*api_awk_atexit)(awk_ext_id_t id,
534 void (*funcp)(void *data, int exit_status),
535 void *arg0);
536
537 /* Register a version string for this extension with gawk. */
538 void (*api_register_ext_version)(awk_ext_id_t id, const char *version);
539
540 /* Functions to print messages */
541 void (*api_fatal)(awk_ext_id_t id, const char *format, ...);
542 void (*api_warning)(awk_ext_id_t id, const char *format, ...);
543 void (*api_lintwarn)(awk_ext_id_t id, const char *format, ...);
544 void (*api_nonfatal)(awk_ext_id_t id, const char *format, ...);
545
546 /* Functions to update ERRNO */
547 void (*api_update_ERRNO_int)(awk_ext_id_t id, int errno_val);
548 void (*api_update_ERRNO_string)(awk_ext_id_t id, const char *string);
549 void (*api_unset_ERRNO)(awk_ext_id_t id);
550
551 /*
552 * All of the functions that return a value from inside gawk
553 * (get a parameter, get a global variable, get an array element)
554 * behave in the same way.
555 *
556 * For a function parameter, the return is false if the argument
557 * count is out of range, or if the actual parameter does not match
558 * what is specified in wanted. In that case, result->val_type
559 * will hold the actual type of what was passed.
560 *
561 * Similarly for symbol table access to variables and array elements,
562 * the return is false if the actual variable or array element does
563 * not match what was requested, and result->val_type will hold
564 * the actual type.
565
566 Table entry is type returned:
567
568
569 +-------------------------------------------------------+
570 | Type of Actual Value: |
571 +--------+--------+--------+--------+-------+-----------+
572 | String | Strnum | Number | Regex | Array | Undefined |
573 +-----------+-----------+--------+--------+--------+--------+-------+-----------+
574 | | String | String | String | String | String | false | false |
575 | +-----------+--------+--------+--------+--------+-------+-----------+
576 | | Strnum | false | Strnum | Strnum | false | false | false |
577 | +-----------+--------+--------+--------+--------+-------+-----------+
578 | | Number | Number | Number | Number | false | false | false |
579 | +-----------+--------+--------+--------+--------+-------+-----------+
580 | | Regex | false | false | false | Regex | false | false |
581 | +-----------+--------+--------+--------+--------+-------+-----------+
582 | Type | Array | false | false | false | false | Array | false |
583 | Requested +-----------+--------+--------+--------+--------+-------+-----------+
584 | | Scalar | Scalar | Scalar | Scalar | Scalar | false | false |
585 | +-----------+--------+--------+--------+--------+-------+-----------+
586 | | Undefined | String | Strnum | Number | Regex | Array | Undefined |
587 | +-----------+--------+--------+--------+--------+-------+-----------+
588 | | Value | false | false | false | false | false | false |
589 | | Cookie | | | | | | |
590 +-----------+-----------+--------+--------+--------+--------+-------+-----------+
591 */
592
593 /* Functions to handle parameters passed to the extension. */
594
595 /*
596 * Get the count'th parameter, zero-based.
597 * Returns false if count is out of range, or if actual parameter
598 * does not match what is specified in wanted. In that case,
599 * result->val_type is as described above.
600 */
601 awk_bool_t (*api_get_argument)(awk_ext_id_t id, size_t count,
602 awk_valtype_t wanted,
603 awk_value_t *result);
604
605 /*
606 * Convert a parameter that was undefined into an array
607 * (provide call-by-reference for arrays). Returns false
608 * if count is too big, or if the argument's type is
609 * not undefined.
610 */
611 awk_bool_t (*api_set_argument)(awk_ext_id_t id,
612 size_t count,
613 awk_array_t array);
614
615 /*
616 * Symbol table access:
617 * - Read-only access to special variables (NF, etc.)
618 * - One special exception: PROCINFO.
619 * - Use sym_update() to change a value, including from UNDEFINED
620 * to scalar or array.
621 */
622 /*
623 * Lookup a variable, fill in value. No messing with the value
624 * returned.
625 * Returns false if the variable doesn't exist or if the wrong type
626 * was requested. In the latter case, vaule->val_type will have
627 * the real type, as described above.
628 *
629 * awk_value_t val;
630 * if (! api->sym_lookup(id, name, wanted, & val))
631 * error_code_here();
632 * else {
633 * // safe to use val
634 * }
635 */
636 awk_bool_t (*api_sym_lookup)(awk_ext_id_t id,
637 const char *name_space,
638 const char *name,
639 awk_valtype_t wanted,
640 awk_value_t *result);
641
642 /*
643 * Update a value. Adds it to the symbol table if not there.
644 * Changing types (scalar <--> array) is not allowed.
645 * In fact, using this to update an array is not allowed, either.
646 * Such an attempt returns false.
647 */
648 awk_bool_t (*api_sym_update)(awk_ext_id_t id,
649 const char *name_space,
650 const char *name,
651 awk_value_t *value);
652
653 /*
654 * A ``scalar cookie'' is an opaque handle that provide access
655 * to a global variable or array. It is an optimization that
656 * avoids looking up variables in gawk's symbol table every time
657 * access is needed.
658 *
659 * This function retrieves the current value of a scalar cookie.
660 * Once you have obtained a scalar_cookie using sym_lookup, you can
661 * use this function to get its value more efficiently.
662 *
663 * Return will be false if the value cannot be retrieved.
664 *
665 * Flow is thus
666 * awk_value_t val;
667 * awk_scalar_t cookie;
668 * api->sym_lookup(id, "variable", AWK_SCALAR, & val); // get the cookie
669 * cookie = val.scalar_cookie;
670 * ...
671 * api->sym_lookup_scalar(id, cookie, wanted, & val); // get the value
672 */
673 awk_bool_t (*api_sym_lookup_scalar)(awk_ext_id_t id,
674 awk_scalar_t cookie,
675 awk_valtype_t wanted,
676 awk_value_t *result);
677
678 /*
679 * Update the value associated with a scalar cookie.
680 * Flow is
681 * sym_lookup with wanted == AWK_SCALAR
682 * if returns false
683 * sym_update with real initial value to install it
684 * sym_lookup again with AWK_SCALAR
685 * else
686 * use the scalar cookie
687 *
688 * Return will be false if the new value is not one of
689 * AWK_STRING, AWK_NUMBER, AWK_REGEX.
690 *
691 * Here too, the built-in variables may not be updated.
692 */
693 awk_bool_t (*api_sym_update_scalar)(awk_ext_id_t id,
694 awk_scalar_t cookie, awk_value_t *value);
695
696 /* Cached values */
697
698 /*
699 * Create a cached string,regex, or numeric value for efficient later
700 * assignment. This improves performance when you want to assign
701 * the same value to one or more variables repeatedly. Only
702 * AWK_NUMBER, AWK_STRING, AWK_REGEX and AWK_STRNUM values are allowed.
703 * Any other type is rejected. We disallow AWK_UNDEFINED since that
704 * case would result in inferior performance.
705 */
706 awk_bool_t (*api_create_value)(awk_ext_id_t id, awk_value_t *value,
707 awk_value_cookie_t *result);
708
709 /*
710 * Release the memory associated with a cookie from api_create_value.
711 * Please call this to free memory when the value is no longer needed.
712 */
713 awk_bool_t (*api_release_value)(awk_ext_id_t id, awk_value_cookie_t vc);
714
715 /* Array management */
716
717 /*
718 * Retrieve total number of elements in array.
719 * Returns false if some kind of error.
720 */
721 awk_bool_t (*api_get_element_count)(awk_ext_id_t id,
722 awk_array_t a_cookie, size_t *count);
723
724 /*
725 * Return the value of an element - read only!
726 * Use set_array_element() to change it.
727 * Behavior for value and return is same as for api_get_argument
728 * and sym_lookup.
729 */
730 awk_bool_t (*api_get_array_element)(awk_ext_id_t id,
731 awk_array_t a_cookie,
732 const awk_value_t *const index,
733 awk_valtype_t wanted,
734 awk_value_t *result);
735
736 /*
737 * Change (or create) element in existing array with
738 * index and value.
739 *
740 * ARGV and ENVIRON may not be updated.
741 */
742 awk_bool_t (*api_set_array_element)(awk_ext_id_t id, awk_array_t a_cookie,
743 const awk_value_t *const index,
744 const awk_value_t *const value);
745
746 /*
747 * Remove the element with the given index.
748 * Returns true if removed or false if element did not exist.
749 */
750 awk_bool_t (*api_del_array_element)(awk_ext_id_t id,
751 awk_array_t a_cookie, const awk_value_t* const index);
752
753 /* Create a new array cookie to which elements may be added. */
754 awk_array_t (*api_create_array)(awk_ext_id_t id);
755
756 /* Clear out an array. */
757 awk_bool_t (*api_clear_array)(awk_ext_id_t id, awk_array_t a_cookie);
758
759 /*
760 * Flatten out an array with type conversions as requested.
761 * This supersedes the earlier api_flatten_array function that
762 * did not allow the caller to specify the requested types.
763 * (That API is still available as a macro, defined below.)
764 */
765 awk_bool_t (*api_flatten_array_typed)(awk_ext_id_t id,
766 awk_array_t a_cookie,
767 awk_flat_array_t **data,
768 awk_valtype_t index_type, awk_valtype_t value_type);
769
770 /* When done, delete any marked elements, release the memory. */
771 awk_bool_t (*api_release_flattened_array)(awk_ext_id_t id,
772 awk_array_t a_cookie,
773 awk_flat_array_t *data);
774
775 /*
776 * Hooks to provide access to gawk's memory allocation functions.
777 * This ensures that memory passed between gawk and the extension
778 * is allocated and released by the same library.
779 */
780 void *(*api_malloc)(size_t size);
781 void *(*api_calloc)(size_t nmemb, size_t size);
782 void *(*api_realloc)(void *ptr, size_t size);
783 void (*api_free)(void *ptr);
784
785 /*
786 * Obsolete function, should not be used. It remains only
787 * for binary compatibility. Any value it returns should be
788 * freed via api_free.
789 */
790 void *(*api_get_mpfr)(awk_ext_id_t id);
791
792 /*
793 * Obsolete function, should not be used. It remains only
794 * for binary compatibility. Any value it returns should be
795 * freed via api_free.
796 */
797 void *(*api_get_mpz)(awk_ext_id_t id);
798
799 /*
800 * Look up a file. If the name is NULL or name_len is 0, it returns
801 * data for the currently open input file corresponding to FILENAME
802 * (and it will not access the filetype argument, so that may be
803 * undefined).
804 *
805 * If the file is not already open, try to open it.
806 *
807 * The "filetype" argument should be one of:
808 *
809 * ">", ">>", "<", "|>", "|<", and "|&"
810 *
811 * If the file is not already open, and the fd argument is non-negative,
812 * gawk will use that file descriptor instead of opening the file
813 * in the usual way.
814 *
815 * If the fd is non-negative, but the file exists already, gawk
816 * ignores the fd and returns the existing file. It is the caller's
817 * responsibility to notice that the fd in the returned
818 * awk_input_buf_t does not match the requested value.
819 *
820 * Note that supplying a file descriptor is currently NOT supported
821 * for pipes. It should work for input, output, append, and two-way
822 * (coprocess) sockets. If the filetype is two-way, we assume that
823 * it is a socket!
824 *
825 * Note that in the two-way case, the input and output file descriptors
826 * may differ. To check for success, one must check that either of
827 * them matches.
828 *
829 * ibufp and obufp point at gawk's internal copies of the
830 * awk_input_buf_t and awk_output_t associated with the open
831 * file. Treat these data structures as read-only!
832 */
833 awk_bool_t (*api_get_file)(awk_ext_id_t id,
834 const char *name,
835 size_t name_len,
836 const char *filetype,
837 int fd,
838 /*
839 * Return values (on success, one or both should
840 * be non-NULL):
841 */
842 const awk_input_buf_t **ibufp,
843 const awk_output_buf_t **obufp);
844 } gawk_api_t;
845
846 #ifndef GAWK /* these are not for the gawk code itself! */
847 /*
848 * Use these if you want to define "global" variables named api
849 * and ext_id to make the code a little easier to read.
850 * See the sample boilerplate code, below.
851 */
852 #define do_lint (api->do_flags[gawk_do_lint])
853 #define do_traditional (api->do_flags[gawk_do_traditional])
854 #define do_profile (api->do_flags[gawk_do_profile])
855 #define do_sandbox (api->do_flags[gawk_do_sandbox])
856 #define do_debug (api->do_flags[gawk_do_debug])
857 #define do_mpfr (api->do_flags[gawk_do_mpfr])
858
859 #define get_argument(count, wanted, result) \
860 (api->api_get_argument(ext_id, count, wanted, result))
861 #define set_argument(count, new_array) \
862 (api->api_set_argument(ext_id, count, new_array))
863
864 #define fatal api->api_fatal
865 #define nonfatal api->api_nonfatal
866 #define warning api->api_warning
867 #define lintwarn api->api_lintwarn
868
869 #define register_input_parser(parser) (api->api_register_input_parser(ext_id, parser))
870 #define register_output_wrapper(wrapper) (api->api_register_output_wrapper(ext_id, wrapper))
871 #define register_two_way_processor(processor) \
872 (api->api_register_two_way_processor(ext_id, processor))
873
874 #define update_ERRNO_int(e) (api->api_update_ERRNO_int(ext_id, e))
875 #define update_ERRNO_string(str) \
876 (api->api_update_ERRNO_string(ext_id, str))
877 #define unset_ERRNO() (api->api_unset_ERRNO(ext_id))
878
879 #define add_ext_func(ns, func) (api->api_add_ext_func(ext_id, ns, func))
880 #define awk_atexit(funcp, arg0) (api->api_awk_atexit(ext_id, funcp, arg0))
881
882 #define sym_lookup(name, wanted, result) \
883 sym_lookup_ns("", name, wanted, result)
884 #define sym_update(name, value) \
885 sym_update_ns("", name, value)
886
887 #define sym_lookup_ns(name_space, name, wanted, result) \
888 (api->api_sym_lookup(ext_id, name_space, name, wanted, result))
889 #define sym_update_ns(name_space, name, value) \
890 (api->api_sym_update(ext_id, name_space, name, value))
891
892 #define sym_lookup_scalar(scalar_cookie, wanted, result) \
893 (api->api_sym_lookup_scalar(ext_id, scalar_cookie, wanted, result))
894 #define sym_update_scalar(scalar_cookie, value) \
895 (api->api_sym_update_scalar)(ext_id, scalar_cookie, value)
896
897 #define get_array_element(array, index, wanted, result) \
898 (api->api_get_array_element(ext_id, array, index, wanted, result))
899
900 #define set_array_element(array, index, value) \
901 (api->api_set_array_element(ext_id, array, index, value))
902
903 #define set_array_element_by_elem(array, elem) \
904 (api->api_set_array_element(ext_id, array, & (elem)->index, & (elem)->value))
905
906 #define del_array_element(array, index) \
907 (api->api_del_array_element(ext_id, array, index))
908
909 #define get_element_count(array, count_p) \
910 (api->api_get_element_count(ext_id, array, count_p))
911
912 #define create_array() (api->api_create_array(ext_id))
913
914 #define clear_array(array) (api->api_clear_array(ext_id, array))
915
916 #define flatten_array_typed(array, data, index_type, value_type) \
917 (api->api_flatten_array_typed(ext_id, array, data, index_type, value_type))
918
919 #define flatten_array(array, data) \
920 flatten_array_typed(array, data, AWK_STRING, AWK_UNDEFINED)
921
922 #define release_flattened_array(array, data) \
923 (api->api_release_flattened_array(ext_id, array, data))
924
925 #define gawk_malloc(size) (api->api_malloc(size))
926 #define gawk_calloc(nmemb, size) (api->api_calloc(nmemb, size))
927 #define gawk_realloc(ptr, size) (api->api_realloc(ptr, size))
928 #define gawk_free(ptr) (api->api_free(ptr))
929
930 #define create_value(value, result) \
931 (api->api_create_value(ext_id, value,result))
932
933 #define release_value(value) \
934 (api->api_release_value(ext_id, value))
935
936 #define get_file(name, namelen, filetype, fd, ibuf, obuf) \
937 (api->api_get_file(ext_id, name, namelen, filetype, fd, ibuf, obuf))
938
939 /* These two are obsolete and should not be used. */
940 #define get_mpfr_ptr() (api->api_get_mpfr(ext_id))
941 #define get_mpz_ptr() (api->api_get_mpz(ext_id))
942
943 #define register_ext_version(version) \
944 (api->api_register_ext_version(ext_id, version))
945
946 #define emalloc(pointer, type, size, message) \
947 do { \
948 if ((pointer = (type) gawk_malloc(size)) == 0) \
949 fatal(ext_id, "%s: malloc of %d bytes failed", message, size); \
950 } while(0)
951
952 #define ezalloc(pointer, type, size, message) \
953 do { \
954 if ((pointer = (type) gawk_calloc(1, size)) == 0) \
955 fatal(ext_id, "%s: calloc of %d bytes failed", message, size); \
956 } while(0)
957
958 #define erealloc(pointer, type, size, message) \
959 do { \
960 if ((pointer = (type) gawk_realloc(pointer, size)) == 0) \
961 fatal(ext_id, "%s: realloc of %d bytes failed", message, size); \
962 } while(0)
963
964 /* Constructor functions */
965
966 /* r_make_string_type --- make a string or strnum or regexp value in result from the passed-in string */
967
968 static inline awk_value_t *
r_make_string_type(const gawk_api_t * api,awk_ext_id_t ext_id,const char * string,size_t length,awk_bool_t duplicate,awk_value_t * result,awk_valtype_t val_type)969 r_make_string_type(const gawk_api_t *api, /* needed for emalloc */
970 awk_ext_id_t ext_id, /* ditto */
971 const char *string,
972 size_t length,
973 awk_bool_t duplicate,
974 awk_value_t *result,
975 awk_valtype_t val_type)
976 {
977 char *cp = NULL;
978
979 memset(result, 0, sizeof(*result));
980
981 result->val_type = val_type;
982 result->str_value.len = length;
983
984 if (duplicate) {
985 emalloc(cp, char *, length + 1, "r_make_string");
986 memcpy(cp, string, length);
987 cp[length] = '\0';
988 result->str_value.str = cp;
989 } else {
990 result->str_value.str = (char *) string;
991 }
992
993 return result;
994 }
995
996 /* r_make_string --- make a string value in result from the passed-in string */
997
998 static inline awk_value_t *
r_make_string(const gawk_api_t * api,awk_ext_id_t ext_id,const char * string,size_t length,awk_bool_t duplicate,awk_value_t * result)999 r_make_string(const gawk_api_t *api, /* needed for emalloc */
1000 awk_ext_id_t ext_id, /* ditto */
1001 const char *string,
1002 size_t length,
1003 awk_bool_t duplicate,
1004 awk_value_t *result)
1005 {
1006 return r_make_string_type(api, ext_id, string, length, duplicate, result, AWK_STRING);
1007 }
1008
1009 #define make_const_string(str, len, result) r_make_string(api, ext_id, str, len, awk_true, result)
1010 #define make_malloced_string(str, len, result) r_make_string(api, ext_id, str, len, awk_false, result)
1011
1012 #define make_const_regex(str, len, result) r_make_string_type(api, ext_id, str, len, awk_true, result, AWK_REGEX)
1013 #define make_malloced_regex(str, len, result) r_make_string_type(api, ext_id, str, len, awk_false, result, AWK_REGEX)
1014
1015 /*
1016 * Note: The caller may not create a STRNUM, but it can create a string that is
1017 * flagged as user input that MAY be a STRNUM. Gawk will decide whether it's a
1018 * STRNUM or a string by checking whether the string is numeric.
1019 */
1020 #define make_const_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 1, result, AWK_STRNUM)
1021 #define make_malloced_user_input(str, len, result) r_make_string_type(api, ext_id, str, len, 0, result, AWK_STRNUM)
1022
1023 /* make_null_string --- make a null string value */
1024
1025 static inline awk_value_t *
make_null_string(awk_value_t * result)1026 make_null_string(awk_value_t *result)
1027 {
1028 memset(result, 0, sizeof(*result));
1029 result->val_type = AWK_UNDEFINED;
1030
1031 return result;
1032 }
1033
1034 /* make_number --- make a number value in result */
1035
1036 static inline awk_value_t *
make_number(double num,awk_value_t * result)1037 make_number(double num, awk_value_t *result)
1038 {
1039 result->val_type = AWK_NUMBER;
1040 result->num_value = num;
1041 result->num_type = AWK_NUMBER_TYPE_DOUBLE;
1042 return result;
1043 }
1044
1045 /*
1046 * make_number_mpz --- make an mpz number value in result.
1047 * The mpz_ptr must be from a call to get_mpz_ptr.
1048 */
1049
1050 static inline awk_value_t *
make_number_mpz(void * mpz_ptr,awk_value_t * result)1051 make_number_mpz(void *mpz_ptr, awk_value_t *result)
1052 {
1053 result->val_type = AWK_NUMBER;
1054 result->num_type = AWK_NUMBER_TYPE_MPZ;
1055 result->num_ptr = mpz_ptr;
1056 return result;
1057 }
1058
1059 /*
1060 * make_number_mpfr --- make an mpfr number value in result.
1061 * The mpfr_ptr must be from a call to get_mpfr_ptr.
1062 */
1063
1064 static inline awk_value_t *
make_number_mpfr(void * mpfr_ptr,awk_value_t * result)1065 make_number_mpfr(void *mpfr_ptr, awk_value_t *result)
1066 {
1067 result->val_type = AWK_NUMBER;
1068 result->num_type = AWK_NUMBER_TYPE_MPFR;
1069 result->num_ptr = mpfr_ptr;
1070 return result;
1071 }
1072
1073
1074 /*
1075 * Each extension must define a function with this prototype:
1076 *
1077 * int dl_load(gawk_api_t *api_p, awk_ext_id_t id)
1078 *
1079 * The return value should be zero on failure and non-zero on success.
1080 *
1081 * For the macros to work, the function should save api_p in a global
1082 * variable named 'api' and save id in a global variable named 'ext_id'.
1083 * In addition, a global function pointer named 'init_func' should be
1084 * defined and set to either NULL or an initialization function that
1085 * returns non-zero on success and zero upon failure.
1086 */
1087
1088 extern int dl_load(const gawk_api_t *const api_p, awk_ext_id_t id);
1089
1090 #if 0
1091 /* Boilerplate code: */
1092 int plugin_is_GPL_compatible;
1093
1094 static gawk_api_t *const api;
1095 static awk_ext_id_t ext_id;
1096 static const char *ext_version = NULL; /* or ... = "some string" */
1097
1098 static awk_ext_func_t func_table[] = {
1099 { "name", do_name, 1 },
1100 /* ... */
1101 };
1102
1103 /* EITHER: */
1104
1105 static awk_bool_t (*init_func)(void) = NULL;
1106
1107 /* OR: */
1108
1109 static awk_bool_t
1110 init_my_extension(void)
1111 {
1112 ...
1113 }
1114
1115 static awk_bool_t (*init_func)(void) = init_my_extension;
1116
1117 dl_load_func(func_table, some_name, "name_space_in_quotes")
1118 #endif
1119
1120 #define dl_load_func(func_table, extension, name_space) \
1121 int dl_load(const gawk_api_t *const api_p, awk_ext_id_t id) \
1122 { \
1123 size_t i, j; \
1124 int errors = 0; \
1125 \
1126 api = api_p; \
1127 ext_id = (void **) id; \
1128 \
1129 if (api->major_version != GAWK_API_MAJOR_VERSION \
1130 || api->minor_version < GAWK_API_MINOR_VERSION) { \
1131 fprintf(stderr, #extension ": version mismatch with gawk!\n"); \
1132 fprintf(stderr, "\tmy version (API %d.%d), gawk version (API %d.%d)\n", \
1133 GAWK_API_MAJOR_VERSION, GAWK_API_MINOR_VERSION, \
1134 api->major_version, api->minor_version); \
1135 exit(1); \
1136 } \
1137 \
1138 check_mpfr_version(extension); \
1139 \
1140 /* load functions */ \
1141 for (i = 0, j = sizeof(func_table) / sizeof(func_table[0]); i < j; i++) { \
1142 if (func_table[i].name == NULL) \
1143 break; \
1144 if (! add_ext_func(name_space, & func_table[i])) { \
1145 warning(ext_id, #extension ": could not add %s", \
1146 func_table[i].name); \
1147 errors++; \
1148 } \
1149 } \
1150 \
1151 if (init_func != NULL) { \
1152 if (! init_func()) { \
1153 warning(ext_id, #extension ": initialization function failed"); \
1154 errors++; \
1155 } \
1156 } \
1157 \
1158 if (ext_version != NULL) \
1159 register_ext_version(ext_version); \
1160 \
1161 return (errors == 0); \
1162 }
1163
1164 #if defined __GNU_MP_VERSION && defined MPFR_VERSION_MAJOR
1165 #define check_mpfr_version(extension) do { \
1166 if (api->gmp_major_version != __GNU_MP_VERSION \
1167 || api->gmp_minor_version < __GNU_MP_VERSION_MINOR) { \
1168 fprintf(stderr, #extension ": GMP version mismatch with gawk!\n"); \
1169 fprintf(stderr, "\tmy version (%d, %d), gawk version (%d, %d)\n", \
1170 __GNU_MP_VERSION, __GNU_MP_VERSION_MINOR, \
1171 api->gmp_major_version, api->gmp_minor_version); \
1172 exit(1); \
1173 } \
1174 if (api->mpfr_major_version != MPFR_VERSION_MAJOR \
1175 || api->mpfr_minor_version < MPFR_VERSION_MINOR) { \
1176 fprintf(stderr, #extension ": MPFR version mismatch with gawk!\n"); \
1177 fprintf(stderr, "\tmy version (%d, %d), gawk version (%d, %d)\n", \
1178 MPFR_VERSION_MAJOR, MPFR_VERSION_MINOR, \
1179 api->mpfr_major_version, api->mpfr_minor_version); \
1180 exit(1); \
1181 } \
1182 } while (0)
1183 #else
1184 #define check_mpfr_version(extension) /* nothing */
1185 #endif
1186
1187 #endif /* GAWK */
1188
1189 #ifdef __cplusplus
1190 }
1191 #endif /* C++ */
1192
1193 #endif /* _GAWK_API_H */
1194