1 /* PSPP - a program for statistical analysis.
2    Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012, 2013, 2014,
3    2015, 2020 Free Software Foundation, Inc.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include <config.h>
19 
20 #include "data/dictionary.h"
21 
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <ctype.h>
25 #include <unistr.h>
26 
27 #include "data/attributes.h"
28 #include "data/case.h"
29 #include "data/identifier.h"
30 #include "data/mrset.h"
31 #include "data/settings.h"
32 #include "data/value-labels.h"
33 #include "data/vardict.h"
34 #include "data/variable.h"
35 #include "data/vector.h"
36 #include "libpspp/array.h"
37 #include "libpspp/assertion.h"
38 #include "libpspp/compiler.h"
39 #include "libpspp/hash-functions.h"
40 #include "libpspp/hmap.h"
41 #include "libpspp/i18n.h"
42 #include "libpspp/message.h"
43 #include "libpspp/misc.h"
44 #include "libpspp/pool.h"
45 #include "libpspp/str.h"
46 #include "libpspp/string-array.h"
47 #include "libpspp/ll.h"
48 
49 #include "gl/intprops.h"
50 #include "gl/minmax.h"
51 #include "gl/xalloc.h"
52 #include "gl/xmemdup0.h"
53 
54 #include "gettext.h"
55 #define _(msgid) gettext (msgid)
56 
57 /* A dictionary. */
58 struct dictionary
59   {
60     int ref_cnt;
61     struct vardict_info *var;	/* Variables. */
62     size_t var_cnt, var_cap;    /* Number of variables, capacity. */
63     struct caseproto *proto;    /* Prototype for dictionary cases
64                                    (updated lazily). */
65     struct hmap name_map;	/* Variable index by name. */
66     int next_value_idx;         /* Index of next `union value' to allocate. */
67     const struct variable **split;    /* SPLIT FILE vars. */
68     size_t split_cnt;           /* SPLIT FILE count. */
69     struct variable *weight;    /* WEIGHT variable. */
70     struct variable *filter;    /* FILTER variable. */
71     casenumber case_limit;      /* Current case limit (N command). */
72     char *label;		/* File label. */
73     struct string_array documents; /* Documents. */
74     struct vector **vector;     /* Vectors of variables. */
75     size_t vector_cnt;          /* Number of vectors. */
76     struct attrset attributes;  /* Custom attributes. */
77     struct mrset **mrsets;      /* Multiple response sets. */
78     size_t n_mrsets;            /* Number of multiple response sets. */
79 
80     /* Whether variable names must be valid identifiers.  Normally, this is
81        true, but sometimes a dictionary is prepared for external use
82        (e.g. output to a CSV file) where names don't have to be valid. */
83     bool names_must_be_ids;
84 
85     char *encoding;             /* Character encoding of string data */
86 
87     const struct dict_callbacks *callbacks; /* Callbacks on dictionary
88 					       modification */
89     void *cb_data ;                  /* Data passed to callbacks */
90 
91     void (*changed) (struct dictionary *, void *); /* Generic change callback */
92     void *changed_data;
93   };
94 
95 static void dict_unset_split_var (struct dictionary *, struct variable *, bool);
96 static void dict_unset_mrset_var (struct dictionary *, struct variable *);
97 
98 /* Compares two double pointers to variables, which should point
99    to elements of a struct dictionary's `var' member array. */
100 static int
compare_var_ptrs(const void * a_,const void * b_,const void * aux UNUSED)101 compare_var_ptrs (const void *a_, const void *b_, const void *aux UNUSED)
102 {
103   struct variable *const *a = a_;
104   struct variable *const *b = b_;
105 
106   return *a < *b ? -1 : *a > *b;
107 }
108 
109 static void
unindex_var(struct dictionary * d,struct vardict_info * vardict)110 unindex_var (struct dictionary *d, struct vardict_info *vardict)
111 {
112   hmap_delete (&d->name_map, &vardict->name_node);
113 }
114 
115 /* This function assumes that vardict->name_node.hash is valid, that is, that
116    its name has not changed since it was hashed (rename_var() updates this
117    hash along with the name itself). */
118 static void
reindex_var(struct dictionary * d,struct vardict_info * vardict,bool skip_callbacks)119 reindex_var (struct dictionary *d, struct vardict_info *vardict, bool skip_callbacks)
120 {
121   struct variable *old = (d->callbacks && d->callbacks->var_changed
122                           ? var_clone (vardict->var)
123                           : NULL);
124 
125   struct variable *var = vardict->var;
126   var_set_vardict (var, vardict);
127   hmap_insert_fast (&d->name_map, &vardict->name_node,
128                     vardict->name_node.hash);
129 
130   if (! skip_callbacks)
131     {
132       if (d->changed) d->changed (d, d->changed_data);
133       if (old)
134         {
135           d->callbacks->var_changed (d, var_get_dict_index (var), VAR_TRAIT_POSITION, old, d->cb_data);
136           var_unref (old);
137         }
138     }
139 }
140 
141 /* Sets the case_index in V's vardict to CASE_INDEX. */
142 static void
set_var_case_index(struct variable * v,int case_index)143 set_var_case_index (struct variable *v, int case_index)
144 {
145   var_get_vardict (v)->case_index = case_index;
146 }
147 
148 /* Removes the dictionary variables with indexes from FROM to TO (exclusive)
149    from name_map. */
150 static void
unindex_vars(struct dictionary * d,size_t from,size_t to)151 unindex_vars (struct dictionary *d, size_t from, size_t to)
152 {
153   size_t i;
154 
155   for (i = from; i < to; i++)
156     unindex_var (d, &d->var[i]);
157 }
158 
159 /* Re-sets the dict_index in the dictionary variables with
160    indexes from FROM to TO (exclusive). */
161 static void
reindex_vars(struct dictionary * d,size_t from,size_t to,bool skip_callbacks)162 reindex_vars (struct dictionary *d, size_t from, size_t to, bool skip_callbacks)
163 {
164   size_t i;
165 
166   for (i = from; i < to; i++)
167     reindex_var (d, &d->var[i], skip_callbacks);
168 }
169 
170 
171 
172 /* Returns the encoding for data in dictionary D.  The return value is a
173    nonnull string that contains an IANA character set name. */
174 const char *
dict_get_encoding(const struct dictionary * d)175 dict_get_encoding (const struct dictionary *d)
176 {
177   return d->encoding ;
178 }
179 
180 /* Returns true if UTF-8 string ID is an acceptable identifier in DICT's
181    encoding, false otherwise.  If ISSUE_ERROR is true, issues an explanatory
182    error message on failure. */
183 bool
dict_id_is_valid(const struct dictionary * dict,const char * id,bool issue_error)184 dict_id_is_valid (const struct dictionary *dict, const char *id,
185                   bool issue_error)
186 {
187   return (!dict->names_must_be_ids
188           || id_is_valid (id, dict->encoding, issue_error));
189 }
190 
191 void
dict_set_change_callback(struct dictionary * d,void (* changed)(struct dictionary *,void *),void * data)192 dict_set_change_callback (struct dictionary *d,
193 			  void (*changed) (struct dictionary *, void*),
194 			  void *data)
195 {
196   d->changed = changed;
197   d->changed_data = data;
198 }
199 
200 /* Discards dictionary D's caseproto.  (It will be regenerated
201    lazily, on demand.) */
202 static void
invalidate_proto(struct dictionary * d)203 invalidate_proto (struct dictionary *d)
204 {
205   caseproto_unref (d->proto);
206   d->proto = NULL;
207 }
208 
209 /* Print a representation of dictionary D to stdout, for
210    debugging purposes. */
211 void
dict_dump(const struct dictionary * d)212 dict_dump (const struct dictionary *d)
213 {
214   int i;
215   for (i = 0 ; i < d->var_cnt ; ++i)
216     {
217       const struct variable *v = d->var[i].var;
218       printf ("Name: %s;\tdict_idx: %zu; case_idx: %zu\n",
219 	      var_get_name (v),
220 	      var_get_dict_index (v),
221 	      var_get_case_index (v));
222 
223     }
224 }
225 
226 /* Associate CALLBACKS with DICT.  Callbacks will be invoked whenever
227    the dictionary or any of the variables it contains are modified.
228    Each callback will get passed CALLBACK_DATA.
229    Any callback may be NULL, in which case it'll be ignored.
230 */
231 void
dict_set_callbacks(struct dictionary * dict,const struct dict_callbacks * callbacks,void * callback_data)232 dict_set_callbacks (struct dictionary *dict,
233 		    const struct dict_callbacks *callbacks,
234 		    void *callback_data)
235 {
236   dict->callbacks = callbacks;
237   dict->cb_data = callback_data;
238 }
239 
240 /* Shallow copy the callbacks from SRC to DEST */
241 void
dict_copy_callbacks(struct dictionary * dest,const struct dictionary * src)242 dict_copy_callbacks (struct dictionary *dest,
243 		     const struct dictionary *src)
244 {
245   dest->callbacks = src->callbacks;
246   dest->cb_data = src->cb_data;
247 }
248 
249 /* Creates and returns a new dictionary with the specified ENCODING. */
250 struct dictionary *
dict_create(const char * encoding)251 dict_create (const char *encoding)
252 {
253   struct dictionary *d = xzalloc (sizeof *d);
254 
255   d->encoding = xstrdup (encoding);
256   d->names_must_be_ids = true;
257   hmap_init (&d->name_map);
258   attrset_init (&d->attributes);
259   d->ref_cnt = 1;
260 
261   return d;
262 }
263 
264 /* Creates and returns a (deep) copy of an existing
265    dictionary.
266 
267    The new dictionary's case indexes are copied from the old
268    dictionary.  If the new dictionary won't be used to access
269    cases produced with the old dictionary, then the new
270    dictionary's case indexes should be compacted with
271    dict_compact_values to save space.
272 
273    Callbacks are not cloned. */
274 struct dictionary *
dict_clone(const struct dictionary * s)275 dict_clone (const struct dictionary *s)
276 {
277   struct dictionary *d;
278   size_t i;
279 
280   d = dict_create (s->encoding);
281   dict_set_names_must_be_ids (d, dict_get_names_must_be_ids (s));
282 
283   for (i = 0; i < s->var_cnt; i++)
284     {
285       struct variable *sv = s->var[i].var;
286       struct variable *dv = dict_clone_var_assert (d, sv);
287       size_t i;
288 
289       for (i = 0; i < var_get_short_name_cnt (sv); i++)
290         var_set_short_name (dv, i, var_get_short_name (sv, i));
291 
292       var_get_vardict (dv)->case_index = var_get_vardict (sv)->case_index;
293     }
294 
295   d->next_value_idx = s->next_value_idx;
296 
297   d->split_cnt = s->split_cnt;
298   if (d->split_cnt > 0)
299     {
300        d->split = xnmalloc (d->split_cnt, sizeof *d->split);
301       for (i = 0; i < d->split_cnt; i++)
302         d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i]));
303     }
304 
305   if (s->weight != NULL)
306     dict_set_weight (d, dict_lookup_var_assert (d, var_get_name (s->weight)));
307 
308   if (s->filter != NULL)
309     dict_set_filter (d, dict_lookup_var_assert (d, var_get_name (s->filter)));
310 
311   d->case_limit = s->case_limit;
312   dict_set_label (d, dict_get_label (s));
313   dict_set_documents (d, dict_get_documents (s));
314 
315   d->vector_cnt = s->vector_cnt;
316   d->vector = xnmalloc (d->vector_cnt, sizeof *d->vector);
317   for (i = 0; i < s->vector_cnt; i++)
318     d->vector[i] = vector_clone (s->vector[i], s, d);
319 
320   dict_set_attributes (d, dict_get_attributes (s));
321 
322   for (i = 0; i < s->n_mrsets; i++)
323     {
324       const struct mrset *old = s->mrsets[i];
325       struct mrset *new;
326       size_t j;
327 
328       /* Clone old mrset, then replace vars from D by vars from S. */
329       new = mrset_clone (old);
330       for (j = 0; j < new->n_vars; j++)
331         new->vars[j] = dict_lookup_var_assert (d, var_get_name (new->vars[j]));
332 
333       dict_add_mrset (d, new);
334     }
335 
336   return d;
337 }
338 
339 
340 
341 /* Returns the SPLIT FILE vars (see cmd_split_file()).  Call
342    dict_get_split_cnt() to determine how many SPLIT FILE vars
343    there are.  Returns a null pointer if and only if there are no
344    SPLIT FILE vars. */
345 const struct variable *const *
dict_get_split_vars(const struct dictionary * d)346 dict_get_split_vars (const struct dictionary *d)
347 {
348   return d->split;
349 }
350 
351 /* Returns the number of SPLIT FILE vars. */
352 size_t
dict_get_split_cnt(const struct dictionary * d)353 dict_get_split_cnt (const struct dictionary *d)
354 {
355   return d->split_cnt;
356 }
357 
358 /* Removes variable V, which must be in D, from D's set of split
359    variables. */
360 static void
dict_unset_split_var(struct dictionary * d,struct variable * v,bool skip_callbacks)361 dict_unset_split_var (struct dictionary *d, struct variable *v, bool skip_callbacks)
362 {
363   int orig_count;
364 
365   assert (dict_contains_var (d, v));
366 
367   orig_count = d->split_cnt;
368   d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
369                                &v, compare_var_ptrs, NULL);
370   if (orig_count != d->split_cnt && !skip_callbacks)
371     {
372       if (d->changed) d->changed (d, d->changed_data);
373       /* We changed the set of split variables so invoke the
374          callback. */
375       if (d->callbacks &&  d->callbacks->split_changed)
376         d->callbacks->split_changed (d, d->cb_data);
377     }
378 }
379 
380 
381 /* Sets CNT split vars SPLIT in dictionary D. */
382 static void
dict_set_split_vars__(struct dictionary * d,struct variable * const * split,size_t cnt,bool skip_callbacks)383 dict_set_split_vars__ (struct dictionary *d,
384                        struct variable *const *split, size_t cnt, bool skip_callbacks)
385 {
386   assert (cnt == 0 || split != NULL);
387 
388   d->split_cnt = cnt;
389   if (cnt > 0)
390    {
391     d->split = xnrealloc (d->split, cnt, sizeof *d->split) ;
392     memcpy (d->split, split, cnt * sizeof *d->split);
393    }
394   else
395    {
396     free (d->split);
397     d->split = NULL;
398    }
399 
400  if (!skip_callbacks)
401     {
402       if (d->changed) d->changed (d, d->changed_data);
403       if (d->callbacks &&  d->callbacks->split_changed)
404         d->callbacks->split_changed (d, d->cb_data);
405     }
406 }
407 
408 /* Sets CNT split vars SPLIT in dictionary D. */
409 void
dict_set_split_vars(struct dictionary * d,struct variable * const * split,size_t cnt)410 dict_set_split_vars (struct dictionary *d,
411                      struct variable *const *split, size_t cnt)
412 {
413   dict_set_split_vars__ (d, split, cnt, false);
414 }
415 
416 
417 
418 /* Deletes variable V from dictionary D and frees V.
419 
420    This is a very bad idea if there might be any pointers to V
421    from outside D.  In general, no variable in the active dataset's
422    dictionary should be deleted when any transformations are
423    active on the dictionary's dataset, because those
424    transformations might reference the deleted variable.  The
425    safest time to delete a variable is just after a procedure has
426    been executed, as done by DELETE VARIABLES.
427 
428    Pointers to V within D are not a problem, because
429    dict_delete_var() knows to remove V from split variables,
430    weights, filters, etc. */
431 static void
dict_delete_var__(struct dictionary * d,struct variable * v,bool skip_callbacks)432 dict_delete_var__ (struct dictionary *d, struct variable *v, bool skip_callbacks)
433 {
434   int dict_index = var_get_dict_index (v);
435   const int case_index = var_get_case_index (v);
436 
437   assert (dict_contains_var (d, v));
438 
439   dict_unset_split_var (d, v, skip_callbacks);
440   dict_unset_mrset_var (d, v);
441 
442   if (d->weight == v)
443     dict_set_weight (d, NULL);
444 
445   if (d->filter == v)
446     dict_set_filter (d, NULL);
447 
448   dict_clear_vectors (d);
449 
450   /* Remove V from var array. */
451   unindex_vars (d, dict_index, d->var_cnt);
452   remove_element (d->var, d->var_cnt, sizeof *d->var, dict_index);
453   d->var_cnt--;
454 
455   /* Update dict_index for each affected variable. */
456   reindex_vars (d, dict_index, d->var_cnt, skip_callbacks);
457 
458   /* Free memory. */
459   var_clear_vardict (v);
460 
461   if (! skip_callbacks)
462     {
463       if (d->changed) d->changed (d, d->changed_data);
464       if (d->callbacks &&  d->callbacks->var_deleted)
465         d->callbacks->var_deleted (d, v, dict_index, case_index, d->cb_data);
466     }
467 
468   invalidate_proto (d);
469   var_unref (v);
470 }
471 
472 /* Deletes variable V from dictionary D and frees V.
473 
474    This is a very bad idea if there might be any pointers to V
475    from outside D.  In general, no variable in the active dataset's
476    dictionary should be deleted when any transformations are
477    active on the dictionary's dataset, because those
478    transformations might reference the deleted variable.  The
479    safest time to delete a variable is just after a procedure has
480    been executed, as done by DELETE VARIABLES.
481 
482    Pointers to V within D are not a problem, because
483    dict_delete_var() knows to remove V from split variables,
484    weights, filters, etc. */
485 void
dict_delete_var(struct dictionary * d,struct variable * v)486 dict_delete_var (struct dictionary *d, struct variable *v)
487 {
488   dict_delete_var__ (d, v, false);
489 }
490 
491 
492 /* Deletes the COUNT variables listed in VARS from D.  This is
493    unsafe; see the comment on dict_delete_var() for details. */
494 void
dict_delete_vars(struct dictionary * d,struct variable * const * vars,size_t count)495 dict_delete_vars (struct dictionary *d,
496                   struct variable *const *vars, size_t count)
497 {
498   /* FIXME: this can be done in O(count) time, but this algorithm
499      is O(count**2). */
500   assert (count == 0 || vars != NULL);
501 
502   while (count-- > 0)
503     dict_delete_var (d, *vars++);
504 }
505 
506 /* Deletes the COUNT variables in D starting at index IDX.  This
507    is unsafe; see the comment on dict_delete_var() for
508    details. Deleting consecutive vars will result in less callbacks
509    compared to iterating over dict_delete_var.
510    A simple while loop over dict_delete_var will
511    produce (d->var_cnt - IDX) * COUNT variable changed callbacks
512    plus COUNT variable delete callbacks.
513    This here produces d->var_cnt - IDX variable changed callbacks
514    plus COUNT variable delete callbacks. */
515 void
dict_delete_consecutive_vars(struct dictionary * d,size_t idx,size_t count)516 dict_delete_consecutive_vars (struct dictionary *d, size_t idx, size_t count)
517 {
518   assert (idx + count <= d->var_cnt);
519 
520   /* We need to store the variable and the corresponding case_index
521      for the delete callbacks later. We store them in a linked list.*/
522   struct delvar {
523     struct ll ll;
524     struct variable *var;
525     int case_index;
526   };
527   struct ll_list list = LL_INITIALIZER (list);
528 
529   for (size_t i = idx; i < idx + count; i++)
530     {
531       struct delvar *dv = xmalloc (sizeof (struct delvar));
532       assert (dv);
533       struct variable *v = d->var[i].var;
534 
535       dict_unset_split_var (d, v, false);
536       dict_unset_mrset_var (d, v);
537 
538       if (d->weight == v)
539 	dict_set_weight (d, NULL);
540 
541       if (d->filter == v)
542 	dict_set_filter (d, NULL);
543 
544       dv->var = v;
545       dv->case_index = var_get_case_index (v);
546       ll_push_tail (&list, (struct ll *)dv);
547     }
548 
549   dict_clear_vectors (d);
550 
551   /* Remove variables from var array. */
552   unindex_vars (d, idx, d->var_cnt);
553   remove_range (d->var, d->var_cnt, sizeof *d->var, idx, count);
554   d->var_cnt -= count;
555 
556   /* Reindexing will result variable-changed callback */
557   reindex_vars (d, idx, d->var_cnt, false);
558 
559   invalidate_proto (d);
560   if (d->changed) d->changed (d, d->changed_data);
561 
562   /* Now issue the variable delete callbacks and delete
563      the variables. The vardict is not valid at this point
564      anymore. That is the reason why we stored the
565      caseindex before reindexing. */
566   for (size_t vi = idx; vi < idx + count; vi++)
567     {
568       struct delvar *dv = (struct delvar *) ll_pop_head (&list);
569       var_clear_vardict (dv->var);
570       if (d->callbacks &&  d->callbacks->var_deleted)
571         d->callbacks->var_deleted (d, dv->var, vi, dv->case_index, d->cb_data);
572       var_unref (dv->var);
573       free (dv);
574     }
575 }
576 
577 /* Deletes scratch variables from dictionary D. */
578 void
dict_delete_scratch_vars(struct dictionary * d)579 dict_delete_scratch_vars (struct dictionary *d)
580 {
581   int i;
582 
583   /* FIXME: this can be done in O(count) time, but this algorithm
584      is O(count**2). */
585   for (i = 0; i < d->var_cnt;)
586     if (var_get_dict_class (d->var[i].var) == DC_SCRATCH)
587       dict_delete_var (d, d->var[i].var);
588     else
589       i++;
590 }
591 
592 
593 
594 /* Clears the contents from a dictionary without destroying the
595    dictionary itself. */
596 static void
dict_clear__(struct dictionary * d,bool skip_callbacks)597 dict_clear__ (struct dictionary *d, bool skip_callbacks)
598 {
599   /* FIXME?  Should we really clear case_limit, label, documents?
600      Others are necessarily cleared by deleting all the variables.*/
601   while (d->var_cnt > 0)
602     {
603       dict_delete_var__ (d, d->var[d->var_cnt - 1].var, skip_callbacks);
604     }
605 
606   free (d->var);
607   d->var = NULL;
608   d->var_cnt = d->var_cap = 0;
609   invalidate_proto (d);
610   hmap_clear (&d->name_map);
611   d->next_value_idx = 0;
612   dict_set_split_vars__ (d, NULL, 0, skip_callbacks);
613 
614   if (skip_callbacks)
615     {
616       d->weight = NULL;
617       d->filter = NULL;
618     }
619   else
620     {
621       dict_set_weight (d, NULL);
622       dict_set_filter (d, NULL);
623     }
624   d->case_limit = 0;
625   free (d->label);
626   d->label = NULL;
627   string_array_clear (&d->documents);
628   dict_clear_vectors (d);
629   attrset_clear (&d->attributes);
630 }
631 
632 /* Clears the contents from a dictionary without destroying the
633    dictionary itself. */
634 void
dict_clear(struct dictionary * d)635 dict_clear (struct dictionary *d)
636 {
637   dict_clear__ (d, false);
638 }
639 
640 /* Clears a dictionary and destroys it. */
641 static void
_dict_destroy(struct dictionary * d)642 _dict_destroy (struct dictionary *d)
643 {
644   /* In general, we don't want callbacks occurring, if the dictionary
645      is being destroyed */
646   d->callbacks  = NULL ;
647 
648   dict_clear__ (d, true);
649   string_array_destroy (&d->documents);
650   hmap_destroy (&d->name_map);
651   attrset_destroy (&d->attributes);
652   dict_clear_mrsets (d);
653   free (d->encoding);
654   free (d);
655 }
656 
657 struct dictionary *
dict_ref(struct dictionary * d)658 dict_ref (struct dictionary *d)
659 {
660   d->ref_cnt++;
661   return d;
662 }
663 
664 void
dict_unref(struct dictionary * d)665 dict_unref (struct dictionary *d)
666 {
667   if (d == NULL)
668     return;
669   d->ref_cnt--;
670   assert (d->ref_cnt >= 0);
671   if (d->ref_cnt == 0)
672     _dict_destroy (d);
673 }
674 
675 /* Returns the number of variables in D. */
676 size_t
dict_get_var_cnt(const struct dictionary * d)677 dict_get_var_cnt (const struct dictionary *d)
678 {
679   return d->var_cnt;
680 }
681 
682 /* Returns the variable in D with dictionary index IDX, which
683    must be between 0 and the count returned by
684    dict_get_var_cnt(), exclusive. */
685 struct variable *
dict_get_var(const struct dictionary * d,size_t idx)686 dict_get_var (const struct dictionary *d, size_t idx)
687 {
688   assert (idx < d->var_cnt);
689 
690   return d->var[idx].var;
691 }
692 
693 /* Sets *VARS to an array of pointers to variables in D and *CNT
694    to the number of variables in *D.  All variables are returned
695    except for those, if any, in the classes indicated by EXCLUDE.
696    (There is no point in putting DC_SYSTEM in EXCLUDE as
697    dictionaries never include system variables.) */
698 void
dict_get_vars(const struct dictionary * d,const struct variable *** vars,size_t * cnt,enum dict_class exclude)699 dict_get_vars (const struct dictionary *d, const struct variable ***vars,
700                size_t *cnt, enum dict_class exclude)
701 {
702   dict_get_vars_mutable (d, (struct variable ***) vars, cnt, exclude);
703 }
704 
705 /* Sets *VARS to an array of pointers to variables in D and *CNT
706    to the number of variables in *D.  All variables are returned
707    except for those, if any, in the classes indicated by EXCLUDE.
708    (There is no point in putting DC_SYSTEM in EXCLUDE as
709    dictionaries never include system variables.) */
710 void
dict_get_vars_mutable(const struct dictionary * d,struct variable *** vars,size_t * cnt,enum dict_class exclude)711 dict_get_vars_mutable (const struct dictionary *d, struct variable ***vars,
712                        size_t *cnt, enum dict_class exclude)
713 {
714   size_t count;
715   size_t i;
716 
717   assert (exclude == (exclude & DC_ALL));
718 
719   count = 0;
720   for (i = 0; i < d->var_cnt; i++)
721     {
722       enum dict_class class = var_get_dict_class (d->var[i].var);
723       if (!(class & exclude))
724         count++;
725     }
726 
727   *vars = xnmalloc (count, sizeof **vars);
728   *cnt = 0;
729   for (i = 0; i < d->var_cnt; i++)
730     {
731       enum dict_class class = var_get_dict_class (d->var[i].var);
732       if (!(class & exclude))
733         (*vars)[(*cnt)++] = d->var[i].var;
734     }
735   assert (*cnt == count);
736 }
737 
738 static struct variable *
add_var_with_case_index(struct dictionary * d,struct variable * v,int case_index)739 add_var_with_case_index (struct dictionary *d, struct variable *v,
740                          int case_index)
741 {
742   struct vardict_info *vardict;
743 
744   assert (case_index >= d->next_value_idx);
745 
746   /* Update dictionary. */
747   if (d->var_cnt >= d->var_cap)
748     {
749       size_t i;
750 
751       d->var = x2nrealloc (d->var, &d->var_cap, sizeof *d->var);
752       hmap_clear (&d->name_map);
753       for (i = 0; i < d->var_cnt; i++)
754         {
755           var_set_vardict (d->var[i].var, &d->var[i]);
756           hmap_insert_fast (&d->name_map, &d->var[i].name_node,
757                             d->var[i].name_node.hash);
758         }
759     }
760 
761   vardict = &d->var[d->var_cnt++];
762   vardict->dict = d;
763   vardict->var = v;
764   hmap_insert (&d->name_map, &vardict->name_node,
765                utf8_hash_case_string (var_get_name (v), 0));
766   vardict->case_index = case_index;
767   var_set_vardict (v, vardict);
768 
769   if (d->changed) d->changed (d, d->changed_data);
770   if (d->callbacks &&  d->callbacks->var_added)
771     d->callbacks->var_added (d, var_get_dict_index (v), d->cb_data);
772 
773   invalidate_proto (d);
774   d->next_value_idx = case_index + 1;
775 
776   return v;
777 }
778 
779 static struct variable *
add_var(struct dictionary * d,struct variable * v)780 add_var (struct dictionary *d, struct variable *v)
781 {
782   return add_var_with_case_index (d, v, d->next_value_idx);
783 }
784 
785 /* Creates and returns a new variable in D with the given NAME
786    and WIDTH.  Returns a null pointer if the given NAME would
787    duplicate that of an existing variable in the dictionary. */
788 struct variable *
dict_create_var(struct dictionary * d,const char * name,int width)789 dict_create_var (struct dictionary *d, const char *name, int width)
790 {
791   return (dict_lookup_var (d, name) == NULL
792           ? dict_create_var_assert (d, name, width)
793           : NULL);
794 }
795 
796 /* Creates and returns a new variable in D with the given NAME
797    and WIDTH.  Assert-fails if the given NAME would duplicate
798    that of an existing variable in the dictionary. */
799 struct variable *
dict_create_var_assert(struct dictionary * d,const char * name,int width)800 dict_create_var_assert (struct dictionary *d, const char *name, int width)
801 {
802   assert (dict_lookup_var (d, name) == NULL);
803   return add_var (d, var_create (name, width));
804 }
805 
806 /* Creates and returns a new variable in D, as a copy of existing variable
807    OLD_VAR, which need not be in D or in any dictionary.  Returns a null
808    pointer if OLD_VAR's name would duplicate that of an existing variable in
809    the dictionary. */
810 struct variable *
dict_clone_var(struct dictionary * d,const struct variable * old_var)811 dict_clone_var (struct dictionary *d, const struct variable *old_var)
812 {
813   return dict_clone_var_as (d, old_var, var_get_name (old_var));
814 }
815 
816 /* Creates and returns a new variable in D, as a copy of existing variable
817    OLD_VAR, which need not be in D or in any dictionary.  Assert-fails if
818    OLD_VAR's name would duplicate that of an existing variable in the
819    dictionary. */
820 struct variable *
dict_clone_var_assert(struct dictionary * d,const struct variable * old_var)821 dict_clone_var_assert (struct dictionary *d, const struct variable *old_var)
822 {
823   return dict_clone_var_as_assert (d, old_var, var_get_name (old_var));
824 }
825 
826 /* Creates and returns a new variable in D with name NAME, as a copy of
827    existing variable OLD_VAR, which need not be in D or in any dictionary.
828    Returns a null pointer if the given NAME would duplicate that of an existing
829    variable in the dictionary. */
830 struct variable *
dict_clone_var_as(struct dictionary * d,const struct variable * old_var,const char * name)831 dict_clone_var_as (struct dictionary *d, const struct variable *old_var,
832                    const char *name)
833 {
834   return (dict_lookup_var (d, name) == NULL
835           ? dict_clone_var_as_assert (d, old_var, name)
836           : NULL);
837 }
838 
839 /* Creates and returns a new variable in D with name NAME, as a copy of
840    existing variable OLD_VAR, which need not be in D or in any dictionary.
841    Assert-fails if the given NAME would duplicate that of an existing variable
842    in the dictionary. */
843 struct variable *
dict_clone_var_as_assert(struct dictionary * d,const struct variable * old_var,const char * name)844 dict_clone_var_as_assert (struct dictionary *d, const struct variable *old_var,
845                           const char *name)
846 {
847   struct variable *new_var = var_clone (old_var);
848   assert (dict_lookup_var (d, name) == NULL);
849   var_set_name (new_var, name);
850   return add_var (d, new_var);
851 }
852 
853 struct variable *
dict_clone_var_in_place_assert(struct dictionary * d,const struct variable * old_var)854 dict_clone_var_in_place_assert (struct dictionary *d,
855                                 const struct variable *old_var)
856 {
857   assert (dict_lookup_var (d, var_get_name (old_var)) == NULL);
858   return add_var_with_case_index (d, var_clone (old_var),
859                                   var_get_case_index (old_var));
860 }
861 
862 /* Returns the variable named NAME in D, or a null pointer if no
863    variable has that name. */
864 struct variable *
dict_lookup_var(const struct dictionary * d,const char * name)865 dict_lookup_var (const struct dictionary *d, const char *name)
866 {
867   struct vardict_info *vardict;
868 
869   HMAP_FOR_EACH_WITH_HASH (vardict, struct vardict_info, name_node,
870                            utf8_hash_case_string (name, 0), &d->name_map)
871     {
872       struct variable *var = vardict->var;
873       if (!utf8_strcasecmp (var_get_name (var), name))
874         return var;
875     }
876 
877   return NULL;
878 }
879 
880 /* Returns the variable named NAME in D.  Assert-fails if no
881    variable has that name. */
882 struct variable *
dict_lookup_var_assert(const struct dictionary * d,const char * name)883 dict_lookup_var_assert (const struct dictionary *d, const char *name)
884 {
885   struct variable *v = dict_lookup_var (d, name);
886   assert (v != NULL);
887   return v;
888 }
889 
890 /* Returns true if variable V is in dictionary D,
891    false otherwise. */
892 bool
dict_contains_var(const struct dictionary * d,const struct variable * v)893 dict_contains_var (const struct dictionary *d, const struct variable *v)
894 {
895   return (var_has_vardict (v)
896           && vardict_get_dictionary (var_get_vardict (v)) == d);
897 }
898 
899 /* Moves V to 0-based position IDX in D.  Other variables in D,
900    if any, retain their relative positions.  Runs in time linear
901    in the distance moved. */
902 void
dict_reorder_var(struct dictionary * d,struct variable * v,size_t new_index)903 dict_reorder_var (struct dictionary *d, struct variable *v, size_t new_index)
904 {
905   size_t old_index = var_get_dict_index (v);
906 
907   assert (new_index < d->var_cnt);
908 
909   unindex_vars (d, MIN (old_index, new_index), MAX (old_index, new_index) + 1);
910   move_element (d->var, d->var_cnt, sizeof *d->var, old_index, new_index);
911   reindex_vars (d, MIN (old_index, new_index), MAX (old_index, new_index) + 1, false);
912 }
913 
914 /* Reorders the variables in D, placing the COUNT variables
915    listed in ORDER in that order at the beginning of D.  The
916    other variables in D, if any, retain their relative
917    positions. */
918 void
dict_reorder_vars(struct dictionary * d,struct variable * const * order,size_t count)919 dict_reorder_vars (struct dictionary *d,
920                    struct variable *const *order, size_t count)
921 {
922   struct vardict_info *new_var;
923   size_t i;
924 
925   assert (count == 0 || order != NULL);
926   assert (count <= d->var_cnt);
927 
928   new_var = xnmalloc (d->var_cap, sizeof *new_var);
929 
930   /* Add variables in ORDER to new_var. */
931   for (i = 0; i < count; i++)
932     {
933       struct vardict_info *old_var;
934 
935       assert (dict_contains_var (d, order[i]));
936 
937       old_var = var_get_vardict (order[i]);
938       new_var[i] = *old_var;
939       old_var->dict = NULL;
940     }
941 
942   /* Add remaining variables to new_var. */
943   for (i = 0; i < d->var_cnt; i++)
944     if (d->var[i].dict != NULL)
945       new_var[count++] = d->var[i];
946   assert (count == d->var_cnt);
947 
948   /* Replace old vardicts by new ones. */
949   free (d->var);
950   d->var = new_var;
951 
952   hmap_clear (&d->name_map);
953   reindex_vars (d, 0, d->var_cnt, false);
954 }
955 
956 /* Changes the name of variable V that is currently in a dictionary to
957    NEW_NAME. */
958 static void
rename_var(struct variable * v,const char * new_name)959 rename_var (struct variable *v, const char *new_name)
960 {
961   struct vardict_info *vardict = var_get_vardict (v);
962   var_clear_vardict (v);
963   var_set_name (v, new_name);
964   vardict->name_node.hash = utf8_hash_case_string (new_name, 0);
965   var_set_vardict (v, vardict);
966 }
967 
968 /* Tries to changes the name of V in D to name NEW_NAME.  Returns true if
969    successful, false if a variable (other than V) with the given name already
970    exists in D. */
971 bool
dict_try_rename_var(struct dictionary * d,struct variable * v,const char * new_name)972 dict_try_rename_var (struct dictionary *d, struct variable *v,
973                      const char *new_name)
974 {
975   struct variable *conflict = dict_lookup_var (d, new_name);
976   if (conflict && v != conflict)
977     return false;
978 
979   struct variable *old = var_clone (v);
980   unindex_var (d, var_get_vardict (v));
981   rename_var (v, new_name);
982   reindex_var (d, var_get_vardict (v), false);
983 
984   if (settings_get_algorithm () == ENHANCED)
985     var_clear_short_names (v);
986 
987   if (d->changed) d->changed (d, d->changed_data);
988   if (d->callbacks &&  d->callbacks->var_changed)
989     d->callbacks->var_changed (d, var_get_dict_index (v), VAR_TRAIT_NAME, old, d->cb_data);
990 
991   var_unref (old);
992 
993   return true;
994 }
995 
996 /* Changes the name of V in D to name NEW_NAME.  Assert-fails if
997    a variable named NEW_NAME is already in D, except that
998    NEW_NAME may be the same as V's existing name. */
999 void
dict_rename_var(struct dictionary * d,struct variable * v,const char * new_name)1000 dict_rename_var (struct dictionary *d, struct variable *v,
1001                  const char *new_name)
1002 {
1003   bool ok UNUSED = dict_try_rename_var (d, v, new_name);
1004   assert (ok);
1005 }
1006 
1007 /* Renames COUNT variables specified in VARS to the names given
1008    in NEW_NAMES within dictionary D.  If the renaming would
1009    result in a duplicate variable name, returns false and stores a
1010    name that would be duplicated into *ERR_NAME (if ERR_NAME is
1011    non-null).  Otherwise, the renaming is successful, and true
1012    is returned. */
1013 bool
dict_rename_vars(struct dictionary * d,struct variable ** vars,char ** new_names,size_t count,char ** err_name)1014 dict_rename_vars (struct dictionary *d,
1015                   struct variable **vars, char **new_names, size_t count,
1016                   char **err_name)
1017 {
1018   struct pool *pool;
1019   char **old_names;
1020   size_t i;
1021 
1022   assert (count == 0 || vars != NULL);
1023   assert (count == 0 || new_names != NULL);
1024 
1025   /* Save the names of the variables to be renamed. */
1026   pool = pool_create ();
1027   old_names = pool_nalloc (pool, count, sizeof *old_names);
1028   for (i = 0; i < count; i++)
1029     old_names[i] = pool_strdup (pool, var_get_name (vars[i]));
1030 
1031   /* Remove the variables to be renamed from the name hash,
1032      and rename them. */
1033   for (i = 0; i < count; i++)
1034     {
1035       unindex_var (d, var_get_vardict (vars[i]));
1036       rename_var (vars[i], new_names[i]);
1037     }
1038 
1039   /* Add the renamed variables back into the name hash,
1040      checking for conflicts. */
1041   for (i = 0; i < count; i++)
1042     {
1043       if (dict_lookup_var (d, var_get_name (vars[i])) != NULL)
1044         {
1045           /* There is a name conflict.
1046              Back out all the name changes that have already
1047              taken place, and indicate failure. */
1048           size_t fail_idx = i;
1049           if (err_name != NULL)
1050             *err_name = new_names[i];
1051 
1052           for (i = 0; i < fail_idx; i++)
1053             unindex_var (d, var_get_vardict (vars[i]));
1054 
1055           for (i = 0; i < count; i++)
1056             {
1057               rename_var (vars[i], old_names[i]);
1058               reindex_var (d, var_get_vardict (vars[i]), false);
1059             }
1060 
1061           pool_destroy (pool);
1062           return false;
1063         }
1064       reindex_var (d, var_get_vardict (vars[i]), false);
1065     }
1066 
1067   /* Clear short names. */
1068   if (settings_get_algorithm () == ENHANCED)
1069     for (i = 0; i < count; i++)
1070       var_clear_short_names (vars[i]);
1071 
1072   pool_destroy (pool);
1073   return true;
1074 }
1075 
1076 /* Returns true if a variable named NAME may be inserted in DICT;
1077    that is, if there is not already a variable with that name in
1078    DICT and if NAME is not a reserved word.  (The caller's checks
1079    have already verified that NAME is otherwise acceptable as a
1080    variable name.) */
1081 static bool
var_name_is_insertable(const struct dictionary * dict,const char * name)1082 var_name_is_insertable (const struct dictionary *dict, const char *name)
1083 {
1084   return (dict_lookup_var (dict, name) == NULL
1085           && lex_id_to_token (ss_cstr (name)) == T_ID);
1086 }
1087 
1088 static char *
make_hinted_name(const struct dictionary * dict,const char * hint)1089 make_hinted_name (const struct dictionary *dict, const char *hint)
1090 {
1091   size_t hint_len = strlen (hint);
1092   bool dropped = false;
1093   char *root, *rp;
1094   size_t ofs;
1095   int mblen;
1096 
1097   /* The allocation size here is OK: characters that are copied directly fit
1098      OK, and characters that are not copied directly are replaced by a single
1099      '_' byte.  If u8_mbtouc() replaces bad input by 0xfffd, then that will get
1100      replaced by '_' too.  */
1101   root = rp = xmalloc (hint_len + 1);
1102   for (ofs = 0; ofs < hint_len; ofs += mblen)
1103     {
1104       ucs4_t uc;
1105 
1106       mblen = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, hint + ofs),
1107                          hint_len - ofs);
1108       if (rp == root
1109           ? lex_uc_is_id1 (uc) && uc != '$'
1110           : lex_uc_is_idn (uc))
1111         {
1112           if (dropped)
1113             {
1114               *rp++ = '_';
1115               dropped = false;
1116             }
1117           rp += u8_uctomb (CHAR_CAST (uint8_t *, rp), uc, 6);
1118         }
1119       else if (rp != root)
1120         dropped = true;
1121     }
1122   *rp = '\0';
1123 
1124   if (root[0] != '\0')
1125     {
1126       unsigned long int i;
1127 
1128       if (var_name_is_insertable (dict, root))
1129         return root;
1130 
1131       for (i = 0; i < ULONG_MAX; i++)
1132         {
1133           char suffix[INT_BUFSIZE_BOUND (i) + 1];
1134           char *name;
1135 
1136           suffix[0] = '_';
1137           if (!str_format_26adic (i + 1, true, &suffix[1], sizeof suffix - 1))
1138             NOT_REACHED ();
1139 
1140           name = utf8_encoding_concat (root, suffix, dict->encoding, 64);
1141           if (var_name_is_insertable (dict, name))
1142             {
1143               free (root);
1144               return name;
1145             }
1146           free (name);
1147         }
1148     }
1149 
1150   free (root);
1151 
1152   return NULL;
1153 }
1154 
1155 static char *
make_numeric_name(const struct dictionary * dict,unsigned long int * num_start)1156 make_numeric_name (const struct dictionary *dict, unsigned long int *num_start)
1157 {
1158   unsigned long int number;
1159 
1160   for (number = num_start != NULL ? MAX (*num_start, 1) : 1;
1161        number < ULONG_MAX;
1162        number++)
1163     {
1164       char name[3 + INT_STRLEN_BOUND (number) + 1];
1165 
1166       sprintf (name, "VAR%03lu", number);
1167       if (dict_lookup_var (dict, name) == NULL)
1168         {
1169           if (num_start != NULL)
1170             *num_start = number + 1;
1171           return xstrdup (name);
1172         }
1173     }
1174 
1175   NOT_REACHED ();
1176 }
1177 
1178 
1179 /* Devises and returns a variable name unique within DICT.  The variable name
1180    is owned by the caller, which must free it with free() when it is no longer
1181    needed.
1182 
1183    HINT, if it is non-null, is used as a suggestion that will be
1184    modified for suitability as a variable name and for
1185    uniqueness.
1186 
1187    If HINT is null or entirely unsuitable, a name in the form
1188    "VAR%03d" will be generated, where the smallest unused integer
1189    value is used.  If NUM_START is non-null, then its value is
1190    used as the minimum numeric value to check, and it is updated
1191    to the next value to be checked.
1192 */
1193 char *
dict_make_unique_var_name(const struct dictionary * dict,const char * hint,unsigned long int * num_start)1194 dict_make_unique_var_name (const struct dictionary *dict, const char *hint,
1195                            unsigned long int *num_start)
1196 {
1197   if (hint != NULL)
1198     {
1199       char *hinted_name = make_hinted_name (dict, hint);
1200       if (hinted_name != NULL)
1201         return hinted_name;
1202     }
1203   return make_numeric_name (dict, num_start);
1204 }
1205 
1206 /* Returns whether variable names must be valid identifiers.  Normally, this is
1207    true, but sometimes a dictionary is prepared for external use (e.g. output
1208    to a CSV file) where names don't have to be valid. */
1209 bool
dict_get_names_must_be_ids(const struct dictionary * d)1210 dict_get_names_must_be_ids (const struct dictionary *d)
1211 {
1212   return d->names_must_be_ids;
1213 }
1214 
1215 /* Sets whether variable names must be valid identifiers.  Normally, this is
1216    true, but sometimes a dictionary is prepared for external use (e.g. output
1217    to a CSV file) where names don't have to be valid.
1218 
1219    Changing this setting from false to true doesn't make the dictionary check
1220    all the existing variable names, so it can cause an invariant violation. */
1221 void
dict_set_names_must_be_ids(struct dictionary * d,bool names_must_be_ids)1222 dict_set_names_must_be_ids (struct dictionary *d, bool names_must_be_ids)
1223 {
1224   d->names_must_be_ids = names_must_be_ids;
1225 }
1226 
1227 /* Returns the weighting variable in dictionary D, or a null
1228    pointer if the dictionary is unweighted. */
1229 struct variable *
dict_get_weight(const struct dictionary * d)1230 dict_get_weight (const struct dictionary *d)
1231 {
1232   assert (d->weight == NULL || dict_contains_var (d, d->weight));
1233 
1234   return d->weight;
1235 }
1236 
1237 /* Returns the value of D's weighting variable in case C, except
1238    that a negative weight is returned as 0.  Returns 1 if the
1239    dictionary is unweighted.  Will warn about missing, negative,
1240    or zero values if *WARN_ON_INVALID is true.  The function will
1241    set *WARN_ON_INVALID to false if an invalid weight is
1242    found. */
1243 double
dict_get_case_weight(const struct dictionary * d,const struct ccase * c,bool * warn_on_invalid)1244 dict_get_case_weight (const struct dictionary *d, const struct ccase *c,
1245 		      bool *warn_on_invalid)
1246 {
1247   assert (c != NULL);
1248 
1249   if (d->weight == NULL)
1250     return 1.0;
1251   else
1252     {
1253       double w = case_num (c, d->weight);
1254 
1255       return var_force_valid_weight (d->weight, w, warn_on_invalid);
1256     }
1257 }
1258 
1259 /* Returns the format to use for weights. */
1260 const struct fmt_spec *
dict_get_weight_format(const struct dictionary * d)1261 dict_get_weight_format (const struct dictionary *d)
1262 {
1263   return d->weight ? var_get_print_format (d->weight) : &F_8_0;
1264 }
1265 
1266 /* Sets the weighting variable of D to V, or turning off
1267    weighting if V is a null pointer. */
1268 void
dict_set_weight(struct dictionary * d,struct variable * v)1269 dict_set_weight (struct dictionary *d, struct variable *v)
1270 {
1271   assert (v == NULL || dict_contains_var (d, v));
1272   assert (v == NULL || var_is_numeric (v));
1273 
1274   d->weight = v;
1275 
1276   if (d->changed) d->changed (d, d->changed_data);
1277   if (d->callbacks &&  d->callbacks->weight_changed)
1278     d->callbacks->weight_changed (d,
1279                                   v ? var_get_dict_index (v) : -1,
1280                                   d->cb_data);
1281 }
1282 
1283 /* Returns the filter variable in dictionary D (see cmd_filter())
1284    or a null pointer if the dictionary is unfiltered. */
1285 struct variable *
dict_get_filter(const struct dictionary * d)1286 dict_get_filter (const struct dictionary *d)
1287 {
1288   assert (d->filter == NULL || dict_contains_var (d, d->filter));
1289 
1290   return d->filter;
1291 }
1292 
1293 /* Sets V as the filter variable for dictionary D.  Passing a
1294    null pointer for V turn off filtering. */
1295 void
dict_set_filter(struct dictionary * d,struct variable * v)1296 dict_set_filter (struct dictionary *d, struct variable *v)
1297 {
1298   assert (v == NULL || dict_contains_var (d, v));
1299   assert (v == NULL || var_is_numeric (v));
1300 
1301   d->filter = v;
1302 
1303   if (d->changed) d->changed (d, d->changed_data);
1304   if (d->callbacks && d->callbacks->filter_changed)
1305     d->callbacks->filter_changed (d,
1306                                   v ? var_get_dict_index (v) : -1,
1307                                       d->cb_data);
1308 }
1309 
1310 /* Returns the case limit for dictionary D, or zero if the number
1311    of cases is unlimited. */
1312 casenumber
dict_get_case_limit(const struct dictionary * d)1313 dict_get_case_limit (const struct dictionary *d)
1314 {
1315   return d->case_limit;
1316 }
1317 
1318 /* Sets CASE_LIMIT as the case limit for dictionary D.  Use
1319    0 for CASE_LIMIT to indicate no limit. */
1320 void
dict_set_case_limit(struct dictionary * d,casenumber case_limit)1321 dict_set_case_limit (struct dictionary *d, casenumber case_limit)
1322 {
1323   d->case_limit = case_limit;
1324 }
1325 
1326 /* Returns the prototype used for cases created by dictionary D. */
1327 const struct caseproto *
dict_get_proto(const struct dictionary * d_)1328 dict_get_proto (const struct dictionary *d_)
1329 {
1330   struct dictionary *d = CONST_CAST (struct dictionary *, d_);
1331   if (d->proto == NULL)
1332     {
1333       size_t i;
1334 
1335       d->proto = caseproto_create ();
1336       d->proto = caseproto_reserve (d->proto, d->var_cnt);
1337       for (i = 0; i < d->var_cnt; i++)
1338         d->proto = caseproto_set_width (d->proto,
1339                                         var_get_case_index (d->var[i].var),
1340                                         var_get_width (d->var[i].var));
1341     }
1342   return d->proto;
1343 }
1344 
1345 /* Returns the case index of the next value to be added to D.
1346    This value is the number of `union value's that need to be
1347    allocated to store a case for dictionary D. */
1348 int
dict_get_next_value_idx(const struct dictionary * d)1349 dict_get_next_value_idx (const struct dictionary *d)
1350 {
1351   return d->next_value_idx;
1352 }
1353 
1354 /* Returns the number of bytes needed to store a case for
1355    dictionary D. */
1356 size_t
dict_get_case_size(const struct dictionary * d)1357 dict_get_case_size (const struct dictionary *d)
1358 {
1359   return sizeof (union value) * dict_get_next_value_idx (d);
1360 }
1361 
1362 /* Reassigns values in dictionary D so that fragmentation is
1363    eliminated. */
1364 void
dict_compact_values(struct dictionary * d)1365 dict_compact_values (struct dictionary *d)
1366 {
1367   size_t i;
1368 
1369   d->next_value_idx = 0;
1370   for (i = 0; i < d->var_cnt; i++)
1371     {
1372       struct variable *v = d->var[i].var;
1373       set_var_case_index (v, d->next_value_idx++);
1374     }
1375   invalidate_proto (d);
1376 }
1377 
1378 /* Returns the number of values occupied by the variables in
1379    dictionary D.  All variables are considered if EXCLUDE_CLASSES
1380    is 0, or it may contain one or more of (1u << DC_ORDINARY),
1381    (1u << DC_SYSTEM), or (1u << DC_SCRATCH) to exclude the
1382    corresponding type of variable.
1383 
1384    The return value may be less than the number of values in one
1385    of dictionary D's cases (as returned by
1386    dict_get_next_value_idx) even if E is 0, because there may be
1387    gaps in D's cases due to deleted variables. */
1388 size_t
dict_count_values(const struct dictionary * d,unsigned int exclude_classes)1389 dict_count_values (const struct dictionary *d, unsigned int exclude_classes)
1390 {
1391   size_t i;
1392   size_t cnt;
1393 
1394   assert ((exclude_classes & ~((1u << DC_ORDINARY)
1395                                | (1u << DC_SYSTEM)
1396                                | (1u << DC_SCRATCH))) == 0);
1397 
1398   cnt = 0;
1399   for (i = 0; i < d->var_cnt; i++)
1400     {
1401       enum dict_class class = var_get_dict_class (d->var[i].var);
1402       if (!(exclude_classes & (1u << class)))
1403         cnt++;
1404     }
1405   return cnt;
1406 }
1407 
1408 /* Returns the case prototype that would result after deleting
1409    all variables from D that are not in one of the
1410    EXCLUDE_CLASSES and compacting the dictionary with
1411    dict_compact().
1412 
1413    The caller must unref the returned caseproto when it is no
1414    longer needed. */
1415 struct caseproto *
dict_get_compacted_proto(const struct dictionary * d,unsigned int exclude_classes)1416 dict_get_compacted_proto (const struct dictionary *d,
1417                           unsigned int exclude_classes)
1418 {
1419   struct caseproto *proto;
1420   size_t i;
1421 
1422   assert ((exclude_classes & ~((1u << DC_ORDINARY)
1423                                | (1u << DC_SYSTEM)
1424                                | (1u << DC_SCRATCH))) == 0);
1425 
1426   proto = caseproto_create ();
1427   for (i = 0; i < d->var_cnt; i++)
1428     {
1429       struct variable *v = d->var[i].var;
1430       if (!(exclude_classes & (1u << var_get_dict_class (v))))
1431         proto = caseproto_add_width (proto, var_get_width (v));
1432     }
1433   return proto;
1434 }
1435 /* Returns the file label for D, or a null pointer if D is
1436    unlabeled (see cmd_file_label()). */
1437 const char *
dict_get_label(const struct dictionary * d)1438 dict_get_label (const struct dictionary *d)
1439 {
1440   return d->label;
1441 }
1442 
1443 /* Sets D's file label to LABEL, truncating it to at most 60 bytes in D's
1444    encoding.
1445 
1446    Removes D's label if LABEL is null or the empty string. */
1447 void
dict_set_label(struct dictionary * d,const char * label)1448 dict_set_label (struct dictionary *d, const char *label)
1449 {
1450   free (d->label);
1451   if (label == NULL || label[0] == '\0')
1452     d->label = NULL;
1453   else
1454     d->label = utf8_encoding_trunc (label, d->encoding, 60);
1455 }
1456 
1457 /* Returns the documents for D, as an UTF-8 encoded string_array.  The
1458    return value is always nonnull; if there are no documents then the
1459    string_arary is empty.*/
1460 const struct string_array *
dict_get_documents(const struct dictionary * d)1461 dict_get_documents (const struct dictionary *d)
1462 {
1463   return &d->documents;
1464 }
1465 
1466 /* Replaces the documents for D by NEW_DOCS, a UTF-8 encoded string_array. */
1467 void
dict_set_documents(struct dictionary * d,const struct string_array * new_docs)1468 dict_set_documents (struct dictionary *d, const struct string_array *new_docs)
1469 {
1470   size_t i;
1471 
1472   dict_clear_documents (d);
1473 
1474   for (i = 0; i < new_docs->n; i++)
1475     dict_add_document_line (d, new_docs->strings[i], false);
1476 }
1477 
1478 /* Replaces the documents for D by UTF-8 encoded string NEW_DOCS, dividing it
1479    into individual lines at new-line characters.  Each line is truncated to at
1480    most DOC_LINE_LENGTH bytes in D's encoding. */
1481 void
dict_set_documents_string(struct dictionary * d,const char * new_docs)1482 dict_set_documents_string (struct dictionary *d, const char *new_docs)
1483 {
1484   const char *s;
1485 
1486   dict_clear_documents (d);
1487   for (s = new_docs; *s != '\0';)
1488     {
1489       size_t len = strcspn (s, "\n");
1490       char *line = xmemdup0 (s, len);
1491       dict_add_document_line (d, line, false);
1492       free (line);
1493 
1494       s += len;
1495       if (*s == '\n')
1496         s++;
1497     }
1498 }
1499 
1500 /* Drops the documents from dictionary D. */
1501 void
dict_clear_documents(struct dictionary * d)1502 dict_clear_documents (struct dictionary *d)
1503 {
1504   string_array_clear (&d->documents);
1505 }
1506 
1507 /* Appends the UTF-8 encoded LINE to the documents in D.  LINE will be
1508    truncated so that it is no more than 80 bytes in the dictionary's
1509    encoding.  If this causes some text to be lost, and ISSUE_WARNING is true,
1510    then a warning will be issued. */
1511 bool
dict_add_document_line(struct dictionary * d,const char * line,bool issue_warning)1512 dict_add_document_line (struct dictionary *d, const char *line,
1513                         bool issue_warning)
1514 {
1515   size_t trunc_len;
1516   bool truncated;
1517 
1518   trunc_len = utf8_encoding_trunc_len (line, d->encoding, DOC_LINE_LENGTH);
1519   truncated = line[trunc_len] != '\0';
1520   if (truncated && issue_warning)
1521     {
1522       /* Note to translators: "bytes" is correct, not characters */
1523       msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH);
1524     }
1525 
1526   string_array_append_nocopy (&d->documents, xmemdup0 (line, trunc_len));
1527 
1528   return !truncated;
1529 }
1530 
1531 /* Returns the number of document lines in dictionary D. */
1532 size_t
dict_get_document_line_cnt(const struct dictionary * d)1533 dict_get_document_line_cnt (const struct dictionary *d)
1534 {
1535   return d->documents.n;
1536 }
1537 
1538 /* Returns document line number IDX in dictionary D.  The caller must not
1539    modify or free the returned string. */
1540 const char *
dict_get_document_line(const struct dictionary * d,size_t idx)1541 dict_get_document_line (const struct dictionary *d, size_t idx)
1542 {
1543   assert (idx < d->documents.n);
1544   return d->documents.strings[idx];
1545 }
1546 
1547 /* Creates in D a vector named NAME that contains the CNT
1548    variables in VAR.  Returns true if successful, or false if a
1549    vector named NAME already exists in D. */
1550 bool
dict_create_vector(struct dictionary * d,const char * name,struct variable ** var,size_t cnt)1551 dict_create_vector (struct dictionary *d,
1552                     const char *name,
1553                     struct variable **var, size_t cnt)
1554 {
1555   size_t i;
1556 
1557   assert (cnt > 0);
1558   for (i = 0; i < cnt; i++)
1559     assert (dict_contains_var (d, var[i]));
1560 
1561   if (dict_lookup_vector (d, name) == NULL)
1562     {
1563       d->vector = xnrealloc (d->vector, d->vector_cnt + 1, sizeof *d->vector);
1564       d->vector[d->vector_cnt++] = vector_create (name, var, cnt);
1565       return true;
1566     }
1567   else
1568     return false;
1569 }
1570 
1571 /* Creates in D a vector named NAME that contains the CNT
1572    variables in VAR.  A vector named NAME must not already exist
1573    in D. */
1574 void
dict_create_vector_assert(struct dictionary * d,const char * name,struct variable ** var,size_t cnt)1575 dict_create_vector_assert (struct dictionary *d,
1576                            const char *name,
1577                            struct variable **var, size_t cnt)
1578 {
1579   assert (dict_lookup_vector (d, name) == NULL);
1580   dict_create_vector (d, name, var, cnt);
1581 }
1582 
1583 /* Returns the vector in D with index IDX, which must be less
1584    than dict_get_vector_cnt (D). */
1585 const struct vector *
dict_get_vector(const struct dictionary * d,size_t idx)1586 dict_get_vector (const struct dictionary *d, size_t idx)
1587 {
1588   assert (idx < d->vector_cnt);
1589 
1590   return d->vector[idx];
1591 }
1592 
1593 /* Returns the number of vectors in D. */
1594 size_t
dict_get_vector_cnt(const struct dictionary * d)1595 dict_get_vector_cnt (const struct dictionary *d)
1596 {
1597   return d->vector_cnt;
1598 }
1599 
1600 /* Looks up and returns the vector within D with the given
1601    NAME. */
1602 const struct vector *
dict_lookup_vector(const struct dictionary * d,const char * name)1603 dict_lookup_vector (const struct dictionary *d, const char *name)
1604 {
1605   size_t i;
1606   for (i = 0; i < d->vector_cnt; i++)
1607     if (!utf8_strcasecmp (vector_get_name (d->vector[i]), name))
1608       return d->vector[i];
1609   return NULL;
1610 }
1611 
1612 /* Deletes all vectors from D. */
1613 void
dict_clear_vectors(struct dictionary * d)1614 dict_clear_vectors (struct dictionary *d)
1615 {
1616   size_t i;
1617 
1618   for (i = 0; i < d->vector_cnt; i++)
1619     vector_destroy (d->vector[i]);
1620   free (d->vector);
1621 
1622   d->vector = NULL;
1623   d->vector_cnt = 0;
1624 }
1625 
1626 /* Multiple response sets. */
1627 
1628 /* Returns the multiple response set in DICT with index IDX, which must be
1629    between 0 and the count returned by dict_get_n_mrsets(), exclusive. */
1630 const struct mrset *
dict_get_mrset(const struct dictionary * dict,size_t idx)1631 dict_get_mrset (const struct dictionary *dict, size_t idx)
1632 {
1633   assert (idx < dict->n_mrsets);
1634   return dict->mrsets[idx];
1635 }
1636 
1637 /* Returns the number of multiple response sets in DICT. */
1638 size_t
dict_get_n_mrsets(const struct dictionary * dict)1639 dict_get_n_mrsets (const struct dictionary *dict)
1640 {
1641   return dict->n_mrsets;
1642 }
1643 
1644 /* Looks for a multiple response set named NAME in DICT.  If it finds one,
1645    returns its index; otherwise, returns SIZE_MAX. */
1646 static size_t
dict_lookup_mrset_idx(const struct dictionary * dict,const char * name)1647 dict_lookup_mrset_idx (const struct dictionary *dict, const char *name)
1648 {
1649   size_t i;
1650 
1651   for (i = 0; i < dict->n_mrsets; i++)
1652     if (!utf8_strcasecmp (name, dict->mrsets[i]->name))
1653       return i;
1654 
1655   return SIZE_MAX;
1656 }
1657 
1658 /* Looks for a multiple response set named NAME in DICT.  If it finds one,
1659    returns it; otherwise, returns NULL. */
1660 const struct mrset *
dict_lookup_mrset(const struct dictionary * dict,const char * name)1661 dict_lookup_mrset (const struct dictionary *dict, const char *name)
1662 {
1663   size_t idx = dict_lookup_mrset_idx (dict, name);
1664   return idx != SIZE_MAX ? dict->mrsets[idx] : NULL;
1665 }
1666 
1667 /* Adds MRSET to DICT, replacing any existing set with the same name.  Returns
1668    true if a set was replaced, false if none existed with the specified name.
1669 
1670    Ownership of MRSET is transferred to DICT. */
1671 bool
dict_add_mrset(struct dictionary * dict,struct mrset * mrset)1672 dict_add_mrset (struct dictionary *dict, struct mrset *mrset)
1673 {
1674   size_t idx;
1675 
1676   assert (mrset_ok (mrset, dict));
1677 
1678   idx = dict_lookup_mrset_idx (dict, mrset->name);
1679   if (idx == SIZE_MAX)
1680     {
1681       dict->mrsets = xrealloc (dict->mrsets,
1682                                (dict->n_mrsets + 1) * sizeof *dict->mrsets);
1683       dict->mrsets[dict->n_mrsets++] = mrset;
1684       return true;
1685     }
1686   else
1687     {
1688       mrset_destroy (dict->mrsets[idx]);
1689       dict->mrsets[idx] = mrset;
1690       return false;
1691     }
1692 }
1693 
1694 /* Looks for a multiple response set in DICT named NAME.  If found, removes it
1695    from DICT and returns true.  If none is found, returns false without
1696    modifying DICT.
1697 
1698    Deleting one multiple response set causes the indexes of other sets within
1699    DICT to change. */
1700 bool
dict_delete_mrset(struct dictionary * dict,const char * name)1701 dict_delete_mrset (struct dictionary *dict, const char *name)
1702 {
1703   size_t idx = dict_lookup_mrset_idx (dict, name);
1704   if (idx != SIZE_MAX)
1705     {
1706       mrset_destroy (dict->mrsets[idx]);
1707       dict->mrsets[idx] = dict->mrsets[--dict->n_mrsets];
1708       return true;
1709     }
1710   else
1711     return false;
1712 }
1713 
1714 /* Deletes all multiple response sets from DICT. */
1715 void
dict_clear_mrsets(struct dictionary * dict)1716 dict_clear_mrsets (struct dictionary *dict)
1717 {
1718   size_t i;
1719 
1720   for (i = 0; i < dict->n_mrsets; i++)
1721     mrset_destroy (dict->mrsets[i]);
1722   free (dict->mrsets);
1723   dict->mrsets = NULL;
1724   dict->n_mrsets = 0;
1725 }
1726 
1727 /* Removes VAR, which must be in DICT, from DICT's multiple response sets. */
1728 static void
dict_unset_mrset_var(struct dictionary * dict,struct variable * var)1729 dict_unset_mrset_var (struct dictionary *dict, struct variable *var)
1730 {
1731   size_t i;
1732 
1733   assert (dict_contains_var (dict, var));
1734 
1735   for (i = 0; i < dict->n_mrsets;)
1736     {
1737       struct mrset *mrset = dict->mrsets[i];
1738       size_t j;
1739 
1740       for (j = 0; j < mrset->n_vars;)
1741         if (mrset->vars[j] == var)
1742           remove_element (mrset->vars, mrset->n_vars--,
1743                           sizeof *mrset->vars, j);
1744         else
1745           j++;
1746 
1747       if (mrset->n_vars < 2)
1748         {
1749           mrset_destroy (mrset);
1750           dict->mrsets[i] = dict->mrsets[--dict->n_mrsets];
1751         }
1752       else
1753         i++;
1754     }
1755 }
1756 
1757 /* Returns D's attribute set.  The caller may examine or modify
1758    the attribute set, but must not destroy it.  Destroying D or
1759    calling dict_set_attributes for D will also destroy D's
1760    attribute set. */
1761 struct attrset *
dict_get_attributes(const struct dictionary * d)1762 dict_get_attributes (const struct dictionary *d)
1763 {
1764   return CONST_CAST (struct attrset *, &d->attributes);
1765 }
1766 
1767 /* Replaces D's attributes set by a copy of ATTRS. */
1768 void
dict_set_attributes(struct dictionary * d,const struct attrset * attrs)1769 dict_set_attributes (struct dictionary *d, const struct attrset *attrs)
1770 {
1771   attrset_destroy (&d->attributes);
1772   attrset_clone (&d->attributes, attrs);
1773 }
1774 
1775 /* Returns true if D has at least one attribute in its attribute
1776    set, false if D's attribute set is empty. */
1777 bool
dict_has_attributes(const struct dictionary * d)1778 dict_has_attributes (const struct dictionary *d)
1779 {
1780   return attrset_count (&d->attributes) > 0;
1781 }
1782 
1783 /* Called from variable.c to notify the dictionary that some property (indicated
1784    by WHAT) of the variable has changed.  OLDVAR is a copy of V as it existed
1785    prior to the change.  OLDVAR is destroyed by this function.
1786 */
1787 void
dict_var_changed(const struct variable * v,unsigned int what,struct variable * oldvar)1788 dict_var_changed (const struct variable *v, unsigned int what, struct variable *oldvar)
1789 {
1790   if (var_has_vardict (v))
1791     {
1792       const struct vardict_info *vardict = var_get_vardict (v);
1793       struct dictionary *d = vardict->dict;
1794 
1795       if (NULL == d)
1796 	return;
1797 
1798       if (what & (VAR_TRAIT_WIDTH | VAR_TRAIT_POSITION))
1799         invalidate_proto (d);
1800 
1801       if (d->changed) d->changed (d, d->changed_data);
1802       if (d->callbacks && d->callbacks->var_changed)
1803         d->callbacks->var_changed (d, var_get_dict_index (v), what, oldvar, d->cb_data);
1804     }
1805   var_unref (oldvar);
1806 }
1807 
1808 
1809 
1810 /* Dictionary used to contain "internal variables". */
1811 static struct dictionary *internal_dict;
1812 
1813 /* Create a variable of the specified WIDTH to be used for internal
1814    calculations only.  The variable is assigned case index CASE_IDX. */
1815 struct variable *
dict_create_internal_var(int case_idx,int width)1816 dict_create_internal_var (int case_idx, int width)
1817 {
1818   if (internal_dict == NULL)
1819     internal_dict = dict_create ("UTF-8");
1820 
1821   for (;;)
1822     {
1823       static int counter = INT_MAX / 2;
1824       struct variable *var;
1825       char name[64];
1826 
1827       if (++counter == INT_MAX)
1828         counter = INT_MAX / 2;
1829 
1830       sprintf (name, "$internal%d", counter);
1831       var = dict_create_var (internal_dict, name, width);
1832       if (var != NULL)
1833         {
1834           set_var_case_index (var, case_idx);
1835           return var;
1836         }
1837     }
1838 }
1839 
1840 /* Destroys VAR, which must have been created with
1841    dict_create_internal_var(). */
1842 void
dict_destroy_internal_var(struct variable * var)1843 dict_destroy_internal_var (struct variable *var)
1844 {
1845   if (var != NULL)
1846     {
1847       dict_delete_var (internal_dict, var);
1848 
1849       /* Destroy internal_dict if it has no variables left, just so that
1850          valgrind --leak-check --show-reachable won't show internal_dict. */
1851       if (dict_get_var_cnt (internal_dict) == 0)
1852         {
1853           dict_unref (internal_dict);
1854           internal_dict = NULL;
1855         }
1856     }
1857 }
1858 
1859 int
vardict_get_dict_index(const struct vardict_info * vardict)1860 vardict_get_dict_index (const struct vardict_info *vardict)
1861 {
1862   return vardict - vardict->dict->var;
1863 }
1864