1 /* PSPP - a program for statistical analysis.
2    Copyright (C) 1997-9, 2000, 2009, 2010, 2011, 2012, 2020 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16 
17 #include <config.h>
18 
19 #include "language/lexer/variable-parser.h"
20 
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdbool.h>
24 #include <stdlib.h>
25 
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/variable.h"
29 #include "language/lexer/lexer.h"
30 #include "libpspp/assertion.h"
31 #include "libpspp/cast.h"
32 #include "libpspp/hash-functions.h"
33 #include "libpspp/i18n.h"
34 #include "libpspp/hmapx.h"
35 #include "libpspp/message.h"
36 #include "libpspp/misc.h"
37 #include "libpspp/pool.h"
38 #include "libpspp/str.h"
39 #include "libpspp/stringi-set.h"
40 
41 #include "math/interaction.h"
42 
43 #include "gl/c-ctype.h"
44 #include "gl/xalloc.h"
45 
46 #include "gettext.h"
47 #define _(msgid) gettext (msgid)
48 
49 static struct variable *var_set_get_var (const struct var_set *, size_t);
50 static struct variable *var_set_lookup_var (const struct var_set *,
51 					    const char *);
52 static bool var_set_lookup_var_idx (const struct var_set *, const char *,
53 				    size_t *);
54 static bool var_set_get_names_must_be_ids (const struct var_set *);
55 
56 static bool
is_name_token(const struct lexer * lexer,bool names_must_be_ids)57 is_name_token (const struct lexer *lexer, bool names_must_be_ids)
58 {
59   return (lex_token (lexer) == T_ID
60           || (!names_must_be_ids && lex_token (lexer) == T_STRING));
61 }
62 
63 static bool
is_vs_name_token(const struct lexer * lexer,const struct var_set * vs)64 is_vs_name_token (const struct lexer *lexer, const struct var_set *vs)
65 {
66   return is_name_token (lexer, var_set_get_names_must_be_ids (vs));
67 }
68 
69 static bool
is_dict_name_token(const struct lexer * lexer,const struct dictionary * d)70 is_dict_name_token (const struct lexer *lexer, const struct dictionary *d)
71 {
72   return is_name_token (lexer, dict_get_names_must_be_ids (d));
73 }
74 
75 /* Parses a name as a variable within VS.  Sets *IDX to the
76    variable's index and returns true if successful.  On failure
77    emits an error message and returns false. */
78 static bool
parse_vs_variable_idx(struct lexer * lexer,const struct var_set * vs,size_t * idx)79 parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs,
80                        size_t *idx)
81 {
82   assert (idx != NULL);
83 
84   if (!is_vs_name_token (lexer, vs))
85     {
86       lex_error (lexer, _("expecting variable name"));
87       return false;
88     }
89   else if (var_set_lookup_var_idx (vs, lex_tokcstr (lexer), idx))
90     {
91       lex_get (lexer);
92       return true;
93     }
94   else
95     {
96       msg (SE, _("%s is not a variable name."), lex_tokcstr (lexer));
97       return false;
98     }
99 }
100 
101 /* Parses a name as a variable within VS and returns the variable
102    if successful.  On failure emits an error message and returns
103    a null pointer. */
104 static struct variable *
parse_vs_variable(struct lexer * lexer,const struct var_set * vs)105 parse_vs_variable (struct lexer *lexer, const struct var_set *vs)
106 {
107   size_t idx;
108   return parse_vs_variable_idx (lexer, vs, &idx) ? var_set_get_var (vs, idx) : NULL;
109 }
110 
111 /* Parses a variable name in dictionary D and returns the
112    variable if successful.  On failure emits an error message and
113    returns a null pointer. */
114 struct variable *
parse_variable(struct lexer * lexer,const struct dictionary * d)115 parse_variable (struct lexer *lexer, const struct dictionary *d)
116 {
117   struct var_set *vs = var_set_create_from_dict (d);
118   struct variable *var = parse_vs_variable (lexer, vs);
119   var_set_destroy (vs);
120   return var;
121 }
122 
123 /* Parses a set of variables from dictionary D given options
124    OPTS.  Resulting list of variables stored in *VAR and the
125    number of variables into *CNT.  Returns true only if
126    successful.  The dictionary D must contain at least one
127    variable.  */
128 bool
parse_variables(struct lexer * lexer,const struct dictionary * d,struct variable *** var,size_t * cnt,int opts)129 parse_variables (struct lexer *lexer, const struct dictionary *d,
130 			struct variable ***var,
131 			size_t *cnt, int opts)
132 {
133   struct var_set *vs;
134   int success;
135 
136   assert (d != NULL);
137   assert (var != NULL);
138   assert (cnt != NULL);
139 
140   vs = var_set_create_from_dict (d);
141   if (var_set_get_cnt (vs) == 0)
142     {
143       *cnt = 0;
144       var_set_destroy (vs);
145       return false;
146     }
147   success = parse_var_set_vars (lexer, vs, var, cnt, opts);
148   var_set_destroy (vs);
149   return success;
150 }
151 
152 /* Parses a set of variables from dictionary D given options
153    OPTS.  Resulting list of variables stored in *VARS and the
154    number of variables into *VAR_CNT.  Returns true only if
155    successful.  Same behavior as parse_variables, except that all
156    allocations are taken from the given POOL. */
157 bool
parse_variables_pool(struct lexer * lexer,struct pool * pool,const struct dictionary * dict,struct variable *** vars,size_t * var_cnt,int opts)158 parse_variables_pool (struct lexer *lexer, struct pool *pool,
159 		const struct dictionary *dict,
160 		struct variable ***vars, size_t *var_cnt, int opts)
161 {
162   int retval;
163 
164   /* PV_APPEND is unsafe because parse_variables would free the
165      existing names on failure, but those names are presumably
166      already in the pool, which would attempt to re-free it
167      later. */
168   assert (!(opts & PV_APPEND));
169 
170   retval = parse_variables (lexer, dict, vars, var_cnt, opts);
171   if (retval)
172     pool_register (pool, free, *vars);
173   return retval;
174 }
175 
176 /* Parses a variable name from VS.  If successful, sets *IDX to
177    the variable's index in VS, *CLASS to the variable's
178    dictionary class, and returns true.  Returns false on
179    failure. */
180 static bool
parse_var_idx_class(struct lexer * lexer,const struct var_set * vs,size_t * idx,enum dict_class * class)181 parse_var_idx_class (struct lexer *lexer, const struct var_set *vs,
182 			size_t *idx,
183 			enum dict_class *class)
184 {
185   if (!parse_vs_variable_idx (lexer, vs, idx))
186     return false;
187 
188   *class = dict_class_from_id (var_get_name (var_set_get_var (vs, *idx)));
189   return true;
190 }
191 
192 /* Add the variable from VS with index IDX to the list of
193    variables V that has *NV elements and room for *MV.
194    Uses and updates INCLUDED to avoid duplicates if indicated by
195    PV_OPTS, which also affects what variables are allowed in
196    appropriate ways. */
197 static void
add_variable(struct variable *** v,size_t * nv,size_t * mv,char * included,int pv_opts,const struct var_set * vs,size_t idx)198 add_variable (struct variable ***v, size_t *nv, size_t *mv,
199               char *included, int pv_opts,
200               const struct var_set *vs, size_t idx)
201 {
202   struct variable *add = var_set_get_var (vs, idx);
203   const char *add_name = var_get_name (add);
204 
205   if ((pv_opts & PV_NUMERIC) && !var_is_numeric (add))
206     msg (SW, _("%s is not a numeric variable.  It will not be "
207                "included in the variable list."), add_name);
208   else if ((pv_opts & PV_STRING) && !var_is_alpha (add))
209     msg (SE, _("%s is not a string variable.  It will not be "
210                "included in the variable list."), add_name);
211   else if ((pv_opts & PV_NO_SCRATCH)
212            && dict_class_from_id (add_name) == DC_SCRATCH)
213     msg (SE, _("Scratch variables (such as %s) are not allowed "
214                "here."), add_name);
215   else if ((pv_opts & (PV_SAME_TYPE | PV_SAME_WIDTH)) && *nv
216            && var_get_type (add) != var_get_type ((*v)[0]))
217     msg (SE, _("%s and %s are not the same type.  All variables in "
218                "this variable list must be of the same type.  %s "
219                "will be omitted from the list."),
220          var_get_name ((*v)[0]), add_name, add_name);
221   else if ((pv_opts & PV_SAME_WIDTH) && *nv
222            && var_get_width (add) != var_get_width ((*v)[0]))
223     msg (SE, _("%s and %s are string variables with different widths.  "
224                "All variables in this variable list must have the "
225                "same width.  %s will be omitted from the list."),
226          var_get_name ((*v)[0]), add_name, add_name);
227   else if ((pv_opts & PV_NO_DUPLICATE) && included && included[idx])
228     msg (SE, _("Variable %s appears twice in variable list."), add_name);
229   else if ((pv_opts & PV_DUPLICATE) || !included || !included[idx])
230     {
231       if (*nv >= *mv)
232         {
233           *mv = 2 * (*nv + 1);
234           *v = xnrealloc (*v, *mv, sizeof **v);
235         }
236       (*v)[(*nv)++] = add;
237       if (included != NULL)
238         included[idx] = 1;
239     }
240 }
241 
242 /* Adds the variables in VS with indexes FIRST_IDX through
243    LAST_IDX, inclusive, to the list of variables V that has *NV
244    elements and room for *MV.  Uses and updates INCLUDED to avoid
245    duplicates if indicated by PV_OPTS, which also affects what
246    variables are allowed in appropriate ways. */
247 static void
add_variables(struct variable *** v,size_t * nv,size_t * mv,char * included,int pv_opts,const struct var_set * vs,int first_idx,int last_idx,enum dict_class class)248 add_variables (struct variable ***v, size_t *nv, size_t *mv, char *included,
249                int pv_opts,
250                const struct var_set *vs, int first_idx, int last_idx,
251                enum dict_class class)
252 {
253   size_t i;
254 
255   for (i = first_idx; i <= last_idx; i++)
256     if (dict_class_from_id (var_get_name (var_set_get_var (vs, i))) == class)
257       add_variable (v, nv, mv, included, pv_opts, vs, i);
258 }
259 
260 /* Note that if parse_variables() returns false, *v is free()'d.
261    Conversely, if parse_variables() returns true, then *nv is
262    nonzero and *v is non-NULL. */
263 bool
parse_var_set_vars(struct lexer * lexer,const struct var_set * vs,struct variable *** v,size_t * nv,int pv_opts)264 parse_var_set_vars (struct lexer *lexer, const struct var_set *vs,
265                     struct variable ***v, size_t *nv,
266                     int pv_opts)
267 {
268   size_t mv;
269   char *included;
270 
271   assert (vs != NULL);
272   assert (v != NULL);
273   assert (nv != NULL);
274 
275   /* At most one of PV_NUMERIC, PV_STRING, PV_SAME_TYPE,
276      PV_SAME_WIDTH may be specified. */
277   assert (((pv_opts & PV_NUMERIC) != 0)
278           + ((pv_opts & PV_STRING) != 0)
279           + ((pv_opts & PV_SAME_TYPE) != 0)
280           + ((pv_opts & PV_SAME_WIDTH) != 0) <= 1);
281 
282   /* PV_DUPLICATE and PV_NO_DUPLICATE are incompatible. */
283   assert (!(pv_opts & PV_DUPLICATE) || !(pv_opts & PV_NO_DUPLICATE));
284 
285   if (!(pv_opts & PV_APPEND))
286     {
287       *v = NULL;
288       *nv = 0;
289       mv = 0;
290     }
291   else
292     mv = *nv;
293 
294   if (!(pv_opts & PV_DUPLICATE))
295     {
296       size_t i;
297 
298       included = xcalloc (var_set_get_cnt (vs), sizeof *included);
299       for (i = 0; i < *nv; i++)
300         {
301           size_t index;
302           if (!var_set_lookup_var_idx (vs, var_get_name ((*v)[i]), &index))
303             NOT_REACHED ();
304           included[index] = 1;
305         }
306     }
307   else
308     included = NULL;
309 
310   do
311     {
312       if (lex_match (lexer, T_ALL))
313         add_variables (v, nv, &mv, included, pv_opts,
314                        vs, 0, var_set_get_cnt (vs) - 1, DC_ORDINARY);
315       else
316         {
317           enum dict_class class;
318           size_t first_idx;
319 
320           if (!parse_var_idx_class (lexer, vs, &first_idx, &class))
321             goto fail;
322 
323           if (!lex_match (lexer, T_TO))
324             add_variable (v, nv, &mv, included, pv_opts, vs, first_idx);
325           else
326             {
327               size_t last_idx;
328               enum dict_class last_class;
329               struct variable *first_var, *last_var;
330 
331               if (!parse_var_idx_class (lexer, vs, &last_idx, &last_class))
332                 goto fail;
333 
334               first_var = var_set_get_var (vs, first_idx);
335               last_var = var_set_get_var (vs, last_idx);
336 
337               if (last_idx < first_idx)
338                 {
339                   const char *first_name = var_get_name (first_var);
340                   const char *last_name = var_get_name (last_var);
341                   msg (SE, _("%s TO %s is not valid syntax since %s "
342                              "precedes %s in the dictionary."),
343                        first_name, last_name, first_name, last_name);
344                   goto fail;
345                 }
346 
347               if (class != last_class)
348                 {
349                   msg (SE, _("When using the TO keyword to specify several "
350                              "variables, both variables must be from "
351                              "the same variable dictionaries, of either "
352                              "ordinary, scratch, or system variables.  "
353                              "%s is a %s variable, whereas %s is %s."),
354                        var_get_name (first_var), dict_class_to_name (class),
355                        var_get_name (last_var),
356                        dict_class_to_name (last_class));
357                   goto fail;
358                 }
359 
360               add_variables (v, nv, &mv, included, pv_opts,
361                              vs, first_idx, last_idx, class);
362             }
363         }
364 
365       if (pv_opts & PV_SINGLE)
366         break;
367       lex_match (lexer, T_COMMA);
368     }
369   while (lex_token (lexer) == T_ALL
370          || (is_vs_name_token (lexer, vs)
371              && var_set_lookup_var (vs, lex_tokcstr (lexer)) != NULL));
372 
373   if (*nv == 0)
374     goto fail;
375 
376   free (included);
377   return 1;
378 
379 fail:
380   free (included);
381   free (*v);
382   *v = NULL;
383   *nv = 0;
384   return 0;
385 }
386 
387 char *
parse_DATA_LIST_var(struct lexer * lexer,const struct dictionary * d)388 parse_DATA_LIST_var (struct lexer *lexer, const struct dictionary *d)
389 {
390   if (!is_dict_name_token (lexer, d))
391     {
392       lex_error (lexer, "expecting variable name");
393       return NULL;
394     }
395   if (!dict_id_is_valid (d, lex_tokcstr (lexer), true))
396     return NULL;
397 
398   char *name = xstrdup (lex_tokcstr (lexer));
399   lex_get (lexer);
400   return name;
401 }
402 
403 /* Attempts to break UTF-8 encoded NAME into a root (whose contents are
404    arbitrary except that it does not end in a digit) followed by an integer
405    numeric suffix.  On success, stores the value of the suffix into *NUMBERP,
406    the number of digits in the suffix into *N_DIGITSP, and returns the number
407    of bytes in the root.  On failure, returns 0. */
408 static int
extract_numeric_suffix(const char * name,unsigned long int * numberp,int * n_digitsp)409 extract_numeric_suffix (const char *name,
410                         unsigned long int *numberp, int *n_digitsp)
411 {
412   size_t root_len, n_digits;
413   size_t i;
414 
415   /* Count length of root. */
416   root_len = 1;                 /* Valid identifier never starts with digit. */
417   for (i = 1; name[i] != '\0'; i++)
418     if (!c_isdigit (name[i]))
419       root_len = i + 1;
420   n_digits = i - root_len;
421 
422   if (n_digits == 0)
423     {
424       msg (SE, _("`%s' cannot be used with TO because it does not end in "
425                  "a digit."), name);
426       return 0;
427     }
428 
429   *numberp = strtoull (name + root_len, NULL, 10);
430   if (*numberp == ULONG_MAX)
431     {
432       msg (SE, _("Numeric suffix on `%s' is larger than supported with TO."),
433            name);
434       return 0;
435     }
436   *n_digitsp = n_digits;
437   return root_len;
438 }
439 
440 static bool
add_var_name(char * name,char *** names,size_t * n_vars,size_t * allocated_vars,struct stringi_set * set,int pv_opts)441 add_var_name (char *name,
442               char ***names, size_t *n_vars, size_t *allocated_vars,
443               struct stringi_set *set, int pv_opts)
444 {
445   if (pv_opts & PV_NO_DUPLICATE && !stringi_set_insert (set, name))
446     {
447       msg (SE, _("Variable %s appears twice in variable list."),
448            name);
449       return false;
450     }
451 
452   if (*n_vars >= *allocated_vars)
453     *names = x2nrealloc (*names, allocated_vars, sizeof **names);
454   (*names)[(*n_vars)++] = name;
455   return true;
456 }
457 
458 /* Parses a list of variable names according to the DATA LIST version
459    of the TO convention.  */
460 bool
parse_DATA_LIST_vars(struct lexer * lexer,const struct dictionary * dict,char *** namesp,size_t * n_varsp,int pv_opts)461 parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict,
462                       char ***namesp, size_t *n_varsp, int pv_opts)
463 {
464   char **names;
465   size_t n_vars;
466   size_t allocated_vars;
467 
468   struct stringi_set set;
469 
470   char *name1 = NULL;
471   char *name2 = NULL;
472 
473   bool ok = false;
474 
475   assert ((pv_opts & ~(PV_APPEND | PV_SINGLE
476                        | PV_NO_SCRATCH | PV_NO_DUPLICATE)) == 0);
477   stringi_set_init (&set);
478 
479   if (pv_opts & PV_APPEND)
480     {
481       n_vars = allocated_vars = *n_varsp;
482       names = *namesp;
483 
484       if (pv_opts & PV_NO_DUPLICATE)
485         {
486           size_t i;
487 
488           for (i = 0; i < n_vars; i++)
489             stringi_set_insert (&set, names[i]);
490         }
491     }
492   else
493     {
494       n_vars = allocated_vars = 0;
495       names = NULL;
496     }
497 
498   do
499     {
500       name1 = parse_DATA_LIST_var (lexer, dict);
501       if (!name1)
502         goto exit;
503       if (dict_class_from_id (name1) == DC_SCRATCH && pv_opts & PV_NO_SCRATCH)
504 	{
505 	  msg (SE, _("Scratch variables not allowed here."));
506 	  goto exit;
507 	}
508       if (lex_match (lexer, T_TO))
509 	{
510 	  unsigned long int num1, num2;
511           int n_digits1, n_digits2;
512           int root_len1, root_len2;
513           unsigned long int number;
514 
515           name2 = parse_DATA_LIST_var (lexer, dict);
516           if (!name2)
517             goto exit;
518 
519           root_len1 = extract_numeric_suffix (name1, &num1, &n_digits1);
520           if (root_len1 == 0)
521             goto exit;
522 
523           root_len2 = extract_numeric_suffix (name2, &num2, &n_digits2);
524           if (root_len2 == 0)
525 	    goto exit;
526 
527 	  if (root_len1 != root_len2 || memcasecmp (name1, name2, root_len1))
528 	    {
529 	      msg (SE, _("Prefixes don't match in use of TO convention."));
530 	      goto exit;
531 	    }
532 	  if (num1 > num2)
533 	    {
534 	      msg (SE, _("Bad bounds in use of TO convention."));
535 	      goto exit;
536 	    }
537 
538 	  for (number = num1; number <= num2; number++)
539 	    {
540               char *name = xasprintf ("%.*s%0*lu",
541                                       root_len1, name1,
542                                       n_digits1, number);
543               if (!add_var_name (name, &names, &n_vars, &allocated_vars,
544                                  &set, pv_opts))
545                 {
546                   free (name);
547                   goto exit;
548                 }
549 	    }
550 
551           free (name1);
552           name1 = NULL;
553           free (name2);
554           name2 = NULL;
555 	}
556       else
557 	{
558           if (!add_var_name (name1, &names, &n_vars, &allocated_vars,
559                              &set, pv_opts))
560             goto exit;
561           name1 = NULL;
562 	}
563 
564       lex_match (lexer, T_COMMA);
565 
566       if (pv_opts & PV_SINGLE)
567 	break;
568     }
569   while (lex_token (lexer) == T_ID);
570   ok = true;
571 
572 exit:
573   stringi_set_destroy (&set);
574   if (ok)
575     {
576       *namesp = names;
577       *n_varsp = n_vars;
578     }
579   else
580     {
581       int i;
582       for (i = 0; i < n_vars; i++)
583 	free (names[i]);
584       free (names);
585       *namesp = NULL;
586       *n_varsp = 0;
587 
588       free (name1);
589       free (name2);
590     }
591   return ok;
592 }
593 
594 /* Registers each of the NAMES[0...NNAMES - 1] in POOL, as well
595    as NAMES itself. */
596 static void
register_vars_pool(struct pool * pool,char ** names,size_t nnames)597 register_vars_pool (struct pool *pool, char **names, size_t nnames)
598 {
599   size_t i;
600 
601   for (i = 0; i < nnames; i++)
602     pool_register (pool, free, names[i]);
603   pool_register (pool, free, names);
604 }
605 
606 /* Parses a list of variable names according to the DATA LIST
607    version of the TO convention.  Same args as
608    parse_DATA_LIST_vars(), except that all allocations are taken
609    from the given POOL. */
610 bool
parse_DATA_LIST_vars_pool(struct lexer * lexer,const struct dictionary * dict,struct pool * pool,char *** names,size_t * nnames,int pv_opts)611 parse_DATA_LIST_vars_pool (struct lexer *lexer, const struct dictionary *dict,
612                            struct pool *pool,
613                            char ***names, size_t *nnames, int pv_opts)
614 {
615   int retval;
616 
617   /* PV_APPEND is unsafe because parse_DATA_LIST_vars would free
618      the existing names on failure, but those names are
619      presumably already in the pool, which would attempt to
620      re-free it later. */
621   assert (!(pv_opts & PV_APPEND));
622 
623   retval = parse_DATA_LIST_vars (lexer, dict, names, nnames, pv_opts);
624   if (retval)
625     register_vars_pool (pool, *names, *nnames);
626   return retval;
627 }
628 
629 /* Parses a list of variables where some of the variables may be
630    existing and the rest are to be created.  Same args as
631    parse_DATA_LIST_vars(). */
632 bool
parse_mixed_vars(struct lexer * lexer,const struct dictionary * dict,char *** names,size_t * nnames,int pv_opts)633 parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict,
634 		  char ***names, size_t *nnames, int pv_opts)
635 {
636   size_t i;
637 
638   assert (names != NULL);
639   assert (nnames != NULL);
640 
641   if (!(pv_opts & PV_APPEND))
642     {
643       *names = NULL;
644       *nnames = 0;
645     }
646   while (is_dict_name_token (lexer, dict) || lex_token (lexer) == T_ALL)
647     {
648       if (lex_token (lexer) == T_ALL || dict_lookup_var (dict, lex_tokcstr (lexer)) != NULL)
649 	{
650 	  struct variable **v;
651 	  size_t nv;
652 
653 	  if (!parse_variables (lexer, dict, &v, &nv, pv_opts))
654 	    goto fail;
655 	  *names = xnrealloc (*names, *nnames + nv, sizeof **names);
656 	  for (i = 0; i < nv; i++)
657 	    (*names)[*nnames + i] = xstrdup (var_get_name (v[i]));
658 	  free (v);
659 	  *nnames += nv;
660 	}
661       else if (!parse_DATA_LIST_vars (lexer, dict, names, nnames, PV_APPEND | pv_opts))
662 	goto fail;
663     }
664   if (*nnames == 0)
665     goto fail;
666 
667   return true;
668 
669 fail:
670   for (i = 0; i < *nnames; i++)
671     free ((*names)[i]);
672   free (*names);
673   *names = NULL;
674   *nnames = 0;
675   return false;
676 }
677 
678 /* Parses a list of variables where some of the variables may be
679    existing and the rest are to be created.  Same args as
680    parse_mixed_vars(), except that all allocations are taken
681    from the given POOL. */
682 bool
parse_mixed_vars_pool(struct lexer * lexer,const struct dictionary * dict,struct pool * pool,char *** names,size_t * nnames,int pv_opts)683 parse_mixed_vars_pool (struct lexer *lexer, const struct dictionary *dict, struct pool *pool,
684                        char ***names, size_t *nnames, int pv_opts)
685 {
686   int retval;
687 
688   /* PV_APPEND is unsafe because parse_mixed_vars_pool would free
689      the existing names on failure, but those names are
690      presumably already in the pool, which would attempt to
691      re-free it later. */
692   assert (!(pv_opts & PV_APPEND));
693 
694   retval = parse_mixed_vars (lexer, dict, names, nnames, pv_opts);
695   if (retval)
696     register_vars_pool (pool, *names, *nnames);
697   return retval;
698 }
699 
700 /* A set of variables. */
701 struct var_set
702   {
703     bool names_must_be_ids;
704     size_t (*get_cnt) (const struct var_set *);
705     struct variable *(*get_var) (const struct var_set *, size_t idx);
706     bool (*lookup_var_idx) (const struct var_set *, const char *, size_t *);
707     void (*destroy) (struct var_set *);
708     void *aux;
709   };
710 
711 /* Returns the number of variables in VS. */
712 size_t
var_set_get_cnt(const struct var_set * vs)713 var_set_get_cnt (const struct var_set *vs)
714 {
715   assert (vs != NULL);
716 
717   return vs->get_cnt (vs);
718 }
719 
720 /* Return variable with index IDX in VS.
721    IDX must be less than the number of variables in VS. */
722 static struct variable *
var_set_get_var(const struct var_set * vs,size_t idx)723 var_set_get_var (const struct var_set *vs, size_t idx)
724 {
725   assert (vs != NULL);
726   assert (idx < var_set_get_cnt (vs));
727 
728   return vs->get_var (vs, idx);
729 }
730 
731 /* Returns the variable in VS named NAME, or a null pointer if VS
732    contains no variable with that name. */
733 struct variable *
var_set_lookup_var(const struct var_set * vs,const char * name)734 var_set_lookup_var (const struct var_set *vs, const char *name)
735 {
736   size_t idx;
737   return (var_set_lookup_var_idx (vs, name, &idx)
738           ? var_set_get_var (vs, idx)
739           : NULL);
740 }
741 
742 /* If VS contains a variable named NAME, sets *IDX to its index
743    and returns true.  Otherwise, returns false. */
744 bool
var_set_lookup_var_idx(const struct var_set * vs,const char * name,size_t * idx)745 var_set_lookup_var_idx (const struct var_set *vs, const char *name,
746                         size_t *idx)
747 {
748   assert (vs != NULL);
749   assert (name != NULL);
750 
751   return vs->lookup_var_idx (vs, name, idx);
752 }
753 
754 /* Destroys VS. */
755 void
var_set_destroy(struct var_set * vs)756 var_set_destroy (struct var_set *vs)
757 {
758   if (vs != NULL)
759     vs->destroy (vs);
760 }
761 
762 static bool
var_set_get_names_must_be_ids(const struct var_set * vs)763 var_set_get_names_must_be_ids (const struct var_set *vs)
764 {
765   return vs->names_must_be_ids;
766 }
767 
768 /* Returns the number of variables in VS. */
769 static size_t
dict_var_set_get_cnt(const struct var_set * vs)770 dict_var_set_get_cnt (const struct var_set *vs)
771 {
772   struct dictionary *d = vs->aux;
773 
774   return dict_get_var_cnt (d);
775 }
776 
777 /* Return variable with index IDX in VS.
778    IDX must be less than the number of variables in VS. */
779 static struct variable *
dict_var_set_get_var(const struct var_set * vs,size_t idx)780 dict_var_set_get_var (const struct var_set *vs, size_t idx)
781 {
782   struct dictionary *d = vs->aux;
783 
784   return dict_get_var (d, idx);
785 }
786 
787 /* If VS contains a variable named NAME, sets *IDX to its index
788    and returns true.  Otherwise, returns false. */
789 static bool
dict_var_set_lookup_var_idx(const struct var_set * vs,const char * name,size_t * idx)790 dict_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
791                              size_t *idx)
792 {
793   struct dictionary *d = vs->aux;
794   struct variable *v = dict_lookup_var (d, name);
795   if (v != NULL)
796     {
797       *idx = var_get_dict_index (v);
798       return true;
799     }
800   else
801     return false;
802 }
803 
804 /* Destroys VS. */
805 static void
dict_var_set_destroy(struct var_set * vs)806 dict_var_set_destroy (struct var_set *vs)
807 {
808   free (vs);
809 }
810 
811 /* Returns a variable set based on D. */
812 struct var_set *
var_set_create_from_dict(const struct dictionary * d)813 var_set_create_from_dict (const struct dictionary *d)
814 {
815   struct var_set *vs = xmalloc (sizeof *vs);
816   vs->names_must_be_ids = dict_get_names_must_be_ids (d);
817   vs->get_cnt = dict_var_set_get_cnt;
818   vs->get_var = dict_var_set_get_var;
819   vs->lookup_var_idx = dict_var_set_lookup_var_idx;
820   vs->destroy = dict_var_set_destroy;
821   vs->aux = (void *) d;
822   return vs;
823 }
824 
825 /* A variable set based on an array. */
826 struct array_var_set
827   {
828     struct variable *const *var;/* Array of variables. */
829     size_t var_cnt;             /* Number of elements in var. */
830     struct hmapx vars_by_name;  /* Variables hashed by name. */
831   };
832 
833 /* Returns the number of variables in VS. */
834 static size_t
array_var_set_get_cnt(const struct var_set * vs)835 array_var_set_get_cnt (const struct var_set *vs)
836 {
837   struct array_var_set *avs = vs->aux;
838 
839   return avs->var_cnt;
840 }
841 
842 /* Return variable with index IDX in VS.
843    IDX must be less than the number of variables in VS. */
844 static struct variable *
array_var_set_get_var(const struct var_set * vs,size_t idx)845 array_var_set_get_var (const struct var_set *vs, size_t idx)
846 {
847   struct array_var_set *avs = vs->aux;
848 
849   return CONST_CAST (struct variable *, avs->var[idx]);
850 }
851 
852 /* If VS contains a variable named NAME, sets *IDX to its index
853    and returns true.  Otherwise, returns false. */
854 static bool
array_var_set_lookup_var_idx(const struct var_set * vs,const char * name,size_t * idx)855 array_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
856                               size_t *idx)
857 {
858   struct array_var_set *avs = vs->aux;
859   struct hmapx_node *node;
860   struct variable **varp;
861 
862   HMAPX_FOR_EACH_WITH_HASH (varp, node, utf8_hash_case_string (name, 0),
863                             &avs->vars_by_name)
864     if (!utf8_strcasecmp (name, var_get_name (*varp)))
865       {
866         *idx = varp - avs->var;
867         return true;
868       }
869 
870   return false;
871 }
872 
873 /* Destroys VS. */
874 static void
array_var_set_destroy(struct var_set * vs)875 array_var_set_destroy (struct var_set *vs)
876 {
877   struct array_var_set *avs = vs->aux;
878 
879   hmapx_destroy (&avs->vars_by_name);
880   free (avs);
881   free (vs);
882 }
883 
884 /* Returns a variable set based on the VAR_CNT variables in VAR. */
885 struct var_set *
var_set_create_from_array(struct variable * const * var,size_t var_cnt)886 var_set_create_from_array (struct variable *const *var, size_t var_cnt)
887 {
888   struct var_set *vs;
889   struct array_var_set *avs;
890   size_t i;
891 
892   vs = xmalloc (sizeof *vs);
893   vs->names_must_be_ids = true;
894   vs->get_cnt = array_var_set_get_cnt;
895   vs->get_var = array_var_set_get_var;
896   vs->lookup_var_idx = array_var_set_lookup_var_idx;
897   vs->destroy = array_var_set_destroy;
898   vs->aux = avs = xmalloc (sizeof *avs);
899   avs->var = var;
900   avs->var_cnt = var_cnt;
901   hmapx_init (&avs->vars_by_name);
902   for (i = 0; i < var_cnt; i++)
903     {
904       const char *name = var_get_name (var[i]);
905       size_t idx;
906 
907       if (array_var_set_lookup_var_idx (vs, name, &idx))
908         {
909           var_set_destroy (vs);
910           return NULL;
911         }
912       hmapx_insert (&avs->vars_by_name, CONST_CAST (void *, &avs->var[i]),
913                     utf8_hash_case_string (name, 0));
914     }
915 
916   return vs;
917 }
918 
919 
920 /* Match a variable.
921    If the match succeeds, the variable will be placed in VAR.
922    Returns true if successful */
923 bool
lex_match_variable(struct lexer * lexer,const struct dictionary * dict,const struct variable ** var)924 lex_match_variable (struct lexer *lexer, const struct dictionary *dict, const struct variable **var)
925 {
926   if (lex_token (lexer) !=  T_ID)
927     return false;
928 
929   *var = parse_variable_const  (lexer, dict);
930 
931   if (*var == NULL)
932     return false;
933   return true;
934 }
935 
936 /* An interaction is a variable followed by {*, BY} followed by an interaction */
937 static bool
parse_internal_interaction(struct lexer * lexer,const struct dictionary * dict,struct interaction ** iact,struct interaction ** it)938 parse_internal_interaction (struct lexer *lexer, const struct dictionary *dict, struct interaction **iact, struct interaction **it)
939 {
940   const struct variable *v = NULL;
941   assert (iact);
942 
943   switch  (lex_next_token (lexer, 1))
944     {
945     case T_ENDCMD:
946     case T_SLASH:
947     case T_COMMA:
948     case T_ID:
949     case T_BY:
950     case T_ASTERISK:
951       break;
952     default:
953       return false;
954       break;
955     }
956 
957   if (! lex_match_variable (lexer, dict, &v))
958     {
959       if (it)
960 	interaction_destroy (*it);
961       *iact = NULL;
962       return false;
963     }
964 
965   assert (v);
966 
967   if (*iact == NULL)
968     *iact = interaction_create (v);
969   else
970     interaction_add_variable (*iact, v);
971 
972   if (lex_match (lexer, T_ASTERISK) || lex_match (lexer, T_BY))
973     {
974       return parse_internal_interaction (lexer, dict, iact, iact);
975     }
976 
977   return true;
978 }
979 
980 bool
parse_design_interaction(struct lexer * lexer,const struct dictionary * dict,struct interaction ** iact)981 parse_design_interaction (struct lexer *lexer, const struct dictionary *dict, struct interaction **iact)
982 {
983   return parse_internal_interaction (lexer, dict, iact, NULL);
984 }
985 
986