1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include <ctype.h>
20 #include <errno.h>
21 #include <float.h>
22 #include <getopt.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28
29 #include "libpspp/assertion.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/float-format.h"
32 #include "libpspp/hash-functions.h"
33 #include "libpspp/hmap.h"
34 #include "libpspp/integer-format.h"
35
36 #include "gl/c-ctype.h"
37 #include "gl/error.h"
38 #include "gl/intprops.h"
pexecute(const char * program,char * const * argv,const char * pname,const char * temp_base,char ** errmsg_fmt,char ** errmsg_arg,int flags)39 #include "gl/progname.h"
40 #include "gl/xalloc.h"
41 #include "gl/xbinary-io.h"
42
43 struct buffer
44 {
45 uint8_t *data;
46 size_t size;
47 size_t allocated;
48 };
49
50 static void buffer_put (struct buffer *, const void *, size_t);
51 static void *buffer_put_uninit (struct buffer *, size_t);
52
53 enum token_type
54 {
55 T_EOF,
56 T_INTEGER,
57 T_FLOAT,
58 T_PCSYSMIS,
59 T_STRING,
60 T_SEMICOLON,
61 T_ASTERISK,
62 T_LPAREN,
63 T_RPAREN,
64 T_I8,
65 T_I16,
66 T_I64,
67 T_S,
68 T_COUNT,
69 T_COUNT8,
70 T_HEX,
71 T_LABEL,
72 T_AT,
73 T_MINUS,
74 T_PLUS,
75 };
76
77 static enum token_type token;
78 static unsigned long long int tok_integer;
79 static double tok_float;
80 static char *tok_string;
81 static size_t tok_strlen, tok_allocated;
82
83 /* Symbol table. */
84 struct symbol
pwait(int pid,int * status,int flags ATTRIBUTE_UNUSED)85 {
86 struct hmap_node hmap_node;
87 const char *name;
88 unsigned int offset;
89 };
90
91 static struct hmap symbol_table = HMAP_INITIALIZER (symbol_table);
92
93 /* --be, --le: Integer and floating-point formats. */
94 static enum float_format float_format = FLOAT_IEEE_DOUBLE_BE;
95 static enum integer_format integer_format = INTEGER_MSB_FIRST;
96
97 /* Input file and current position. */
98 static FILE *input;
99 static const char *input_file_name;
100 static int line_number;
101
102 static void PRINTF_FORMAT (1, 2)
103 fatal (const char *message, ...)
104 {
105 va_list args;
106
107 fprintf (stderr, "%s:%d: ", input_file_name, line_number);
108 va_start (args, message);
109 vfprintf (stderr, message, args);
110 va_end (args);
111 putc ('\n', stderr);
112
113 exit (EXIT_FAILURE);
114 }
115
116 static void
117 add_char__ (int c)
118 {
119 if (tok_strlen >= tok_allocated)
120 tok_string = x2realloc (tok_string, &tok_allocated);
121
122 tok_string[tok_strlen] = c;
123 }
124
125 static void
126 add_char (int c)
127 {
128 add_char__ (c);
129 tok_strlen++;
130 }
131
132 static void
133 get_token (void)
134 {
135 int c;
136
137 do
138 {
139 c = getc (input);
140 if (c == '#')
141 {
142 while ((c = getc (input)) != '\n' && c != EOF)
143 continue;
144 }
145 if (c == '\n')
146 line_number++;
147 }
148 while (isspace (c) || c == '<' || c == '>');
149
150 tok_strlen = 0;
151 if (c == EOF)
152 {
153 if (token == T_EOF)
154 fatal ("unexpected end of input");
155 token = T_EOF;
156 }
157 else if (isdigit (c) || c == '-')
158 {
159 do
160 {
161 add_char (c);
162 c = getc (input);
163 }
164 while (isdigit (c) || isalpha (c) || c == '.');
165 add_char__ ('\0');
166 ungetc (c, input);
167
168 if (!strcmp (tok_string, "-"))
169 token = T_MINUS;
170 else
171 {
172 char *tail;
173
174 errno = 0;
175 if (strchr (tok_string, '.') == NULL)
176 {
177 token = T_INTEGER;
178 tok_integer = strtoull (tok_string, &tail, 0);
179 }
180 else
181 {
182 token = T_FLOAT;
183 tok_float = strtod (tok_string, &tail);
184 }
185 if (errno || *tail)
186 fatal ("invalid numeric syntax \"%s\"", tok_string);
187 }
188 }
189 else if (c == '"')
190 {
191 token = T_STRING;
192 while ((c = getc (input)) != '"')
193 {
194 if (c == '\n')
195 fatal ("new-line inside string");
196 add_char (c);
197 }
198 add_char__ ('\0');
199 }
200 else if (c == ';')
201 token = T_SEMICOLON;
202 else if (c == '*')
203 token = T_ASTERISK;
204 else if (c == '+')
205 token = T_PLUS;
206 else if (c == '(')
207 token = T_LPAREN;
208 else if (c == ')')
209 token = T_RPAREN;
210 else if (isalpha (c) || c == '@' || c == '_')
211 {
212 do
213 {
214 add_char (c);
215 c = getc (input);
216 }
217 while (isdigit (c) || isalpha (c) || c == '.' || c == '_');
218 add_char ('\0');
219
220 if (c == ':')
221 {
222 token = T_LABEL;
223 return;
224 }
225 ungetc (c, input);
226 if (tok_string[0] == '@')
227 {
228 token = T_AT;
229 return;
230 }
231
232 if (!strcmp (tok_string, "i8"))
233 token = T_I8;
234 else if (!strcmp (tok_string, "i16"))
235 token = T_I16;
236 else if (!strcmp (tok_string, "i64"))
237 token = T_I64;
238 else if (tok_string[0] == 's')
239 {
240 token = T_S;
241 tok_integer = atoi (tok_string + 1);
242 }
243 else if (!strcmp (tok_string, "SYSMIS"))
244 {
245 token = T_FLOAT;
246 tok_float = -DBL_MAX;
247 }
248 else if (!strcmp (tok_string, "PCSYSMIS"))
249 token = T_PCSYSMIS;
250 else if (!strcmp (tok_string, "LOWEST"))
251 {
252 token = T_FLOAT;
253 tok_float = float_get_lowest ();
254 }
255 else if (!strcmp (tok_string, "HIGHEST"))
256 {
257 token = T_FLOAT;
258 tok_float = DBL_MAX;
259 }
260 else if (!strcmp (tok_string, "ENDIAN"))
261 {
262 token = T_INTEGER;
263 tok_integer = integer_format == INTEGER_MSB_FIRST ? 1 : 2;
264 }
265 else if (!strcmp (tok_string, "COUNT"))
266 token = T_COUNT;
267 else if (!strcmp (tok_string, "COUNT8"))
268 token = T_COUNT8;
269 else if (!strcmp (tok_string, "hex"))
270 token = T_HEX;
271 else
272 fatal ("invalid token `%s'", tok_string);
273 }
274 else
275 fatal ("invalid input byte `%c'", c);
276 }
277
278 static void
279 buffer_put (struct buffer *buffer, const void *data, size_t n)
280 {
281 memcpy (buffer_put_uninit (buffer, n), data, n);
282 }
283
284 static void *
285 buffer_put_uninit (struct buffer *buffer, size_t n)
286 {
287 buffer->size += n;
288 if (buffer->size > buffer->allocated)
289 {
290 buffer->allocated = buffer->size * 2;
291 buffer->data = xrealloc (buffer->data, buffer->allocated);
292 }
293 return &buffer->data[buffer->size - n];
294 }
295
296 /* Returns the integer value of hex digit C. */
297 static int
298 hexit_value (int c)
299 {
300 const char s[] = "0123456789abcdef";
301 const char *cp = strchr (s, c_tolower ((unsigned char) c));
302
303 assert (cp != NULL);
304 return cp - s;
305 }
306
307 static void
308 usage (void)
309 {
310 printf ("\
311 %s, SAv Construction Kit\n\
312 usage: %s [OPTIONS] INPUT\n\
313 \nOptions:\n\
314 --be big-endian output format (default)\n\
315 --le little-endian output format\n\
316 --help print this help message and exit\n\
317 \n\
318 The input is a sequence of data items, each followed by a semicolon.\n\
319 Each data item is converted to the output format and written on\n\
320 stdout. A data item is one of the following\n\
321 \n\
322 - An integer in decimal, in hexadecimal prefixed by 0x, or in octal\n\
323 prefixed by 0. Output as a 32-bit binary integer.\n\
324 \n\
325 - A floating-point number. Output in 64-bit IEEE 754 format.\n\
326 \n\
327 - A string enclosed in double quotes. Output literally. There is\n\
328 no syntax for \"escapes\". Strings may not contain new-lines.\n\
329 \n\
330 - A literal of the form s<number> followed by a quoted string as\n\
331 above. Output as the string's contents followed by enough spaces\n\
332 to fill up <number> bytes. For example, s8 \"foo\" is output as\n\
333 the \"foo\" followed by 5 spaces.\n\
334 \n\
335 - The literal \"i8\", \"i16\", or \"i64\" followed by an integer. Output\n\
336 as a binary integer with the specified number of bits.\n\
337 \n\
338 - One of the literals SYSMIS, LOWEST, or HIGHEST. Output as a\n\
339 64-bit IEEE 754 float of the appropriate PSPP value.\n\
340 \n\
341 - PCSYSMIS. Output as SPSS/PC+ system-missing value.\n\
342 \n\
343 - The literal ENDIAN. Output as a 32-bit binary integer, either\n\
344 with value 1 if --be is in effect or 2 if --le is in effect.\n\
345 \n\
346 - A pair of parentheses enclosing a sequence of data items, each\n\
347 followed by a semicolon (the last semicolon is optional).\n\
348 Output as the enclosed data items in sequence.\n\
349 \n\
350 - The literal COUNT or COUNT8 followed by a sequence of parenthesized\n\
351 data items, as above. Output as a 32-bit or 8-bit binary integer whose\n\
352 value is the number of bytes enclosed within the parentheses, followed\n\
353 by the enclosed data items themselves.\n\
354 \n\
355 optionally followed by an asterisk and a positive integer, which\n\
356 specifies a repeat count for the data item.\n",
357 program_name, program_name);
358 exit (EXIT_SUCCESS);
359 }
360
361 static const char *
362 parse_options (int argc, char **argv)
363 {
364 for (;;)
365 {
366 enum {
367 OPT_BE = UCHAR_MAX + 1,
368 OPT_LE,
369 OPT_HELP
370 };
371 static const struct option options[] =
372 {
373 {"be", no_argument, NULL, OPT_BE},
374 {"le", no_argument, NULL, OPT_LE},
375 {"help", no_argument, NULL, OPT_HELP},
376 {NULL, 0, NULL, 0},
377 };
378
379 int c = getopt_long (argc, argv, "", options, NULL);
380 if (c == -1)
381 break;
382
383 switch (c)
384 {
385 case OPT_BE:
386 float_format = FLOAT_IEEE_DOUBLE_BE;
387 integer_format = INTEGER_MSB_FIRST;
388 break;
389
390 case OPT_LE:
391 float_format = FLOAT_IEEE_DOUBLE_LE;
392 integer_format = INTEGER_LSB_FIRST;
393 break;
394
395 case OPT_HELP:
396 usage ();
397
398 case 0:
399 break;
400
401 case '?':
402 exit (EXIT_FAILURE);
403 break;
404
405 default:
406 NOT_REACHED ();
407 }
408
409 }
410
411 if (optind + 1 != argc)
412 error (1, 0, "exactly one non-option argument required; "
413 "use --help for help");
414 return argv[optind];
415 }
416
417 static struct symbol *
418 symbol_find (const char *name)
419 {
420 struct symbol *symbol;
421 unsigned int hash;
422
423 if (name[0] == '@')
424 name++;
425 hash = hash_string (name, 0);
426 HMAP_FOR_EACH_WITH_HASH (symbol, struct symbol, hmap_node,
427 hash, &symbol_table)
428 if (!strcmp (name, symbol->name))
429 return symbol;
430
431 symbol = xmalloc (sizeof *symbol);
432 hmap_insert (&symbol_table, &symbol->hmap_node, hash);
433 symbol->name = xstrdup (name);
434 symbol->offset = UINT_MAX;
435 return symbol;
436 }
437
438 static void
439 parse_data_item (struct buffer *output)
440 {
441 size_t old_size = output->size;
442
443 if (token == T_INTEGER)
444 {
445 integer_put (tok_integer, integer_format,
446 buffer_put_uninit (output, 4), 4);
447 get_token ();
448 }
449 else if (token == T_FLOAT)
450 {
451 float_convert (FLOAT_NATIVE_DOUBLE, &tok_float,
452 float_format, buffer_put_uninit (output, 8));
453 get_token ();
454 }
455 else if (token == T_PCSYSMIS)
456 {
457 static const uint8_t pcsysmis[] =
458 { 0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff, };
459 buffer_put (output, pcsysmis, sizeof pcsysmis);
460 get_token ();
461 }
462 else if (token == T_I8)
463 {
464 uint8_t byte;
465
466 get_token ();
467 do
468 {
469 if (token != T_INTEGER)
470 fatal ("integer expected after `i8'");
471 byte = tok_integer;
472 buffer_put (output, &byte, 1);
473 get_token ();
474 }
475 while (token == T_INTEGER);
476 }
477 else if (token == T_I16)
478 {
479 get_token ();
480 do
481 {
482 if (token != T_INTEGER)
483 fatal ("integer expected after `i16'");
484 integer_put (tok_integer, integer_format,
485 buffer_put_uninit (output, 2), 2);
486 get_token ();
487 }
488 while (token == T_INTEGER);
489 }
490 else if (token == T_I64)
491 {
492 get_token ();
493 do
494 {
495 if (token != T_INTEGER)
496 fatal ("integer expected after `i64'");
497 integer_put (tok_integer, integer_format,
498 buffer_put_uninit (output, 8), 8);
499 get_token ();
500 }
501 while (token == T_INTEGER);
502 }
503 else if (token == T_STRING)
504 {
505 buffer_put (output, tok_string, tok_strlen);
506 get_token ();
507 }
508 else if (token == T_S)
509 {
510 int n;
511
512 n = tok_integer;
513 get_token ();
514
515 if (token != T_STRING)
516 fatal ("string expected");
517 if (tok_strlen > n)
518 fatal ("%zu-byte string is longer than pad length %d",
519 tok_strlen, n);
520
521 buffer_put (output, tok_string, tok_strlen);
522 memset (buffer_put_uninit (output, n - tok_strlen), ' ',
523 n - tok_strlen);
524 get_token ();
525 }
526 else if (token == T_LPAREN)
527 {
528 get_token ();
529
530 while (token != T_RPAREN)
531 parse_data_item (output);
532
533 get_token ();
534 }
535 else if (token == T_COUNT)
536 {
537 buffer_put_uninit (output, 4);
538
539 get_token ();
540 if (token != T_LPAREN)
541 fatal ("`(' expected after COUNT");
542 get_token ();
543
544 while (token != T_RPAREN)
545 parse_data_item (output);
546 get_token ();
547
548 integer_put (output->size - old_size - 4, integer_format,
549 output->data + old_size, 4);
550 }
551 else if (token == T_COUNT8)
552 {
553 buffer_put_uninit (output, 1);
554
555 get_token ();
556 if (token != T_LPAREN)
557 fatal ("`(' expected after COUNT8");
558 get_token ();
559
560 while (token != T_RPAREN)
561 parse_data_item (output);
562 get_token ();
563
564 integer_put (output->size - old_size - 1, integer_format,
565 output->data + old_size, 1);
566 }
567 else if (token == T_HEX)
568 {
569 const char *p;
570
571 get_token ();
572
573 if (token != T_STRING)
574 fatal ("string expected");
575
576 for (p = tok_string; *p; p++)
577 {
578 if (isspace ((unsigned char) *p))
579 continue;
580 else if (isxdigit ((unsigned char) p[0])
581 && isxdigit ((unsigned char) p[1]))
582 {
583 int high = hexit_value (p[0]);
584 int low = hexit_value (p[1]);
585 uint8_t byte = high * 16 + low;
586 buffer_put (output, &byte, 1);
587 p++;
588 }
589 else
590 fatal ("invalid format in hex string");
591 }
592 get_token ();
593 }
594 else if (token == T_LABEL)
595 {
596 struct symbol *sym = symbol_find (tok_string);
597 if (sym->offset == UINT_MAX)
598 sym->offset = output->size;
599 else if (sym->offset != output->size)
600 fatal ("%s: can't redefine label for offset %u with offset %zu",
601 tok_string, sym->offset, output->size);
602 get_token ();
603 return;
604 }
605 else if (token == T_AT)
606 {
607 unsigned int value = symbol_find (tok_string)->offset;
608 get_token ();
609
610 while (token == T_MINUS || token == T_PLUS)
611 {
612 enum token_type op = token;
613 unsigned int operand;
614 get_token ();
615 if (token == T_AT)
616 operand = symbol_find (tok_string)->offset;
617 else if (token == T_INTEGER)
618 operand = tok_integer;
619 else
620 fatal ("expecting @label");
621 get_token ();
622
623 if (op == T_PLUS)
624 value += operand;
625 else
626 value -= operand;
627 }
628 integer_put (value, integer_format, buffer_put_uninit (output, 4), 4);
629 }
630 else
631 fatal ("syntax error");
632
633 if (token == T_ASTERISK)
634 {
635 size_t n = output->size - old_size;
636 char *p;
637
638 get_token ();
639
640 if (token != T_INTEGER || tok_integer < 1)
641 fatal ("positive integer expected after `*'");
642 p = buffer_put_uninit (output, (tok_integer - 1) * n);
643 while (--tok_integer > 0)
644 {
645 memcpy (p, output->data + old_size, n);
646 p += n;
647 }
648
649 get_token ();
650 }
651
652 if (token == T_SEMICOLON)
653 get_token ();
654 else if (token != T_RPAREN)
655 fatal ("`;' expected");
656 }
657
658 int
659 main (int argc, char **argv)
660 {
661 struct buffer output;
662
663 set_program_name (argv[0]);
664 input_file_name = parse_options (argc, argv);
665
666 if (!strcmp (input_file_name, "-"))
667 input = stdin;
668 else
669 {
670 input = fopen (input_file_name, "r");
671 if (input == NULL)
672 error (1, errno, "%s: open failed", input_file_name);
673 }
674
675 if (isatty (STDOUT_FILENO))
676 error (1, 0, "not writing binary data to a terminal; redirect to a file");
677
678 output.data = NULL;
679 output.size = 0;
680 output.allocated = 0;
681
682 line_number = 1;
683 get_token ();
684 while (token != T_EOF)
685 parse_data_item (&output);
686
687 if (!hmap_is_empty (&symbol_table))
688 {
689 struct symbol *symbol;
690
691 HMAP_FOR_EACH (symbol, struct symbol, hmap_node, &symbol_table)
692 if (symbol->offset == UINT_MAX)
693 error (1, 0, "label %s used but never defined", symbol->name);
694
695 output.size = 0;
696 if (fseek (input, 0, SEEK_SET) != 0)
697 error (1, 0, "failed to rewind stdin for second pass");
698
699 line_number = 1;
700 get_token ();
701 while (token != T_EOF)
702 parse_data_item (&output);
703 }
704
705 if (input != stdin)
706 fclose (input);
707
708 xset_binary_mode (fileno (stdout), O_BINARY);
709 fwrite (output.data, output.size, 1, stdout);
710 free (output.data);
711
712 return 0;
713 }
714