1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011, 2012, 2013, 2014,
3 2015, 2020 Free Software Foundation, Inc.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17
18 #include <config.h>
19
20 #include "data/dictionary.h"
21
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <ctype.h>
25 #include <unistr.h>
26
27 #include "data/attributes.h"
28 #include "data/case.h"
29 #include "data/identifier.h"
30 #include "data/mrset.h"
31 #include "data/settings.h"
32 #include "data/value-labels.h"
33 #include "data/vardict.h"
34 #include "data/variable.h"
35 #include "data/vector.h"
36 #include "libpspp/array.h"
37 #include "libpspp/assertion.h"
38 #include "libpspp/compiler.h"
39 #include "libpspp/hash-functions.h"
40 #include "libpspp/hmap.h"
41 #include "libpspp/i18n.h"
42 #include "libpspp/message.h"
43 #include "libpspp/misc.h"
44 #include "libpspp/pool.h"
45 #include "libpspp/str.h"
46 #include "libpspp/string-array.h"
47 #include "libpspp/ll.h"
48
49 #include "gl/intprops.h"
50 #include "gl/minmax.h"
51 #include "gl/xalloc.h"
52 #include "gl/xmemdup0.h"
53
54 #include "gettext.h"
55 #define _(msgid) gettext (msgid)
56
57 /* A dictionary. */
58 struct dictionary
59 {
60 int ref_cnt;
61 struct vardict_info *var; /* Variables. */
62 size_t var_cnt, var_cap; /* Number of variables, capacity. */
63 struct caseproto *proto; /* Prototype for dictionary cases
64 (updated lazily). */
65 struct hmap name_map; /* Variable index by name. */
66 int next_value_idx; /* Index of next `union value' to allocate. */
67 const struct variable **split; /* SPLIT FILE vars. */
68 size_t split_cnt; /* SPLIT FILE count. */
69 struct variable *weight; /* WEIGHT variable. */
70 struct variable *filter; /* FILTER variable. */
71 casenumber case_limit; /* Current case limit (N command). */
72 char *label; /* File label. */
73 struct string_array documents; /* Documents. */
74 struct vector **vector; /* Vectors of variables. */
75 size_t vector_cnt; /* Number of vectors. */
76 struct attrset attributes; /* Custom attributes. */
77 struct mrset **mrsets; /* Multiple response sets. */
78 size_t n_mrsets; /* Number of multiple response sets. */
79
80 /* Whether variable names must be valid identifiers. Normally, this is
81 true, but sometimes a dictionary is prepared for external use
82 (e.g. output to a CSV file) where names don't have to be valid. */
83 bool names_must_be_ids;
84
85 char *encoding; /* Character encoding of string data */
86
87 const struct dict_callbacks *callbacks; /* Callbacks on dictionary
88 modification */
89 void *cb_data ; /* Data passed to callbacks */
90
91 void (*changed) (struct dictionary *, void *); /* Generic change callback */
92 void *changed_data;
93 };
94
95 static void dict_unset_split_var (struct dictionary *, struct variable *, bool);
96 static void dict_unset_mrset_var (struct dictionary *, struct variable *);
97
98 /* Compares two double pointers to variables, which should point
99 to elements of a struct dictionary's `var' member array. */
100 static int
compare_var_ptrs(const void * a_,const void * b_,const void * aux UNUSED)101 compare_var_ptrs (const void *a_, const void *b_, const void *aux UNUSED)
102 {
103 struct variable *const *a = a_;
104 struct variable *const *b = b_;
105
106 return *a < *b ? -1 : *a > *b;
107 }
108
109 static void
unindex_var(struct dictionary * d,struct vardict_info * vardict)110 unindex_var (struct dictionary *d, struct vardict_info *vardict)
111 {
112 hmap_delete (&d->name_map, &vardict->name_node);
113 }
114
115 /* This function assumes that vardict->name_node.hash is valid, that is, that
116 its name has not changed since it was hashed (rename_var() updates this
117 hash along with the name itself). */
118 static void
reindex_var(struct dictionary * d,struct vardict_info * vardict,bool skip_callbacks)119 reindex_var (struct dictionary *d, struct vardict_info *vardict, bool skip_callbacks)
120 {
121 struct variable *old = (d->callbacks && d->callbacks->var_changed
122 ? var_clone (vardict->var)
123 : NULL);
124
125 struct variable *var = vardict->var;
126 var_set_vardict (var, vardict);
127 hmap_insert_fast (&d->name_map, &vardict->name_node,
128 vardict->name_node.hash);
129
130 if (! skip_callbacks)
131 {
132 if (d->changed) d->changed (d, d->changed_data);
133 if (old)
134 {
135 d->callbacks->var_changed (d, var_get_dict_index (var), VAR_TRAIT_POSITION, old, d->cb_data);
136 var_unref (old);
137 }
138 }
139 }
140
141 /* Sets the case_index in V's vardict to CASE_INDEX. */
142 static void
set_var_case_index(struct variable * v,int case_index)143 set_var_case_index (struct variable *v, int case_index)
144 {
145 var_get_vardict (v)->case_index = case_index;
146 }
147
148 /* Removes the dictionary variables with indexes from FROM to TO (exclusive)
149 from name_map. */
150 static void
unindex_vars(struct dictionary * d,size_t from,size_t to)151 unindex_vars (struct dictionary *d, size_t from, size_t to)
152 {
153 size_t i;
154
155 for (i = from; i < to; i++)
156 unindex_var (d, &d->var[i]);
157 }
158
159 /* Re-sets the dict_index in the dictionary variables with
160 indexes from FROM to TO (exclusive). */
161 static void
reindex_vars(struct dictionary * d,size_t from,size_t to,bool skip_callbacks)162 reindex_vars (struct dictionary *d, size_t from, size_t to, bool skip_callbacks)
163 {
164 size_t i;
165
166 for (i = from; i < to; i++)
167 reindex_var (d, &d->var[i], skip_callbacks);
168 }
169
170
171
172 /* Returns the encoding for data in dictionary D. The return value is a
173 nonnull string that contains an IANA character set name. */
174 const char *
dict_get_encoding(const struct dictionary * d)175 dict_get_encoding (const struct dictionary *d)
176 {
177 return d->encoding ;
178 }
179
180 /* Returns true if UTF-8 string ID is an acceptable identifier in DICT's
181 encoding, false otherwise. If ISSUE_ERROR is true, issues an explanatory
182 error message on failure. */
183 bool
dict_id_is_valid(const struct dictionary * dict,const char * id,bool issue_error)184 dict_id_is_valid (const struct dictionary *dict, const char *id,
185 bool issue_error)
186 {
187 return (!dict->names_must_be_ids
188 || id_is_valid (id, dict->encoding, issue_error));
189 }
190
191 void
dict_set_change_callback(struct dictionary * d,void (* changed)(struct dictionary *,void *),void * data)192 dict_set_change_callback (struct dictionary *d,
193 void (*changed) (struct dictionary *, void*),
194 void *data)
195 {
196 d->changed = changed;
197 d->changed_data = data;
198 }
199
200 /* Discards dictionary D's caseproto. (It will be regenerated
201 lazily, on demand.) */
202 static void
invalidate_proto(struct dictionary * d)203 invalidate_proto (struct dictionary *d)
204 {
205 caseproto_unref (d->proto);
206 d->proto = NULL;
207 }
208
209 /* Print a representation of dictionary D to stdout, for
210 debugging purposes. */
211 void
dict_dump(const struct dictionary * d)212 dict_dump (const struct dictionary *d)
213 {
214 int i;
215 for (i = 0 ; i < d->var_cnt ; ++i)
216 {
217 const struct variable *v = d->var[i].var;
218 printf ("Name: %s;\tdict_idx: %zu; case_idx: %zu\n",
219 var_get_name (v),
220 var_get_dict_index (v),
221 var_get_case_index (v));
222
223 }
224 }
225
226 /* Associate CALLBACKS with DICT. Callbacks will be invoked whenever
227 the dictionary or any of the variables it contains are modified.
228 Each callback will get passed CALLBACK_DATA.
229 Any callback may be NULL, in which case it'll be ignored.
230 */
231 void
dict_set_callbacks(struct dictionary * dict,const struct dict_callbacks * callbacks,void * callback_data)232 dict_set_callbacks (struct dictionary *dict,
233 const struct dict_callbacks *callbacks,
234 void *callback_data)
235 {
236 dict->callbacks = callbacks;
237 dict->cb_data = callback_data;
238 }
239
240 /* Shallow copy the callbacks from SRC to DEST */
241 void
dict_copy_callbacks(struct dictionary * dest,const struct dictionary * src)242 dict_copy_callbacks (struct dictionary *dest,
243 const struct dictionary *src)
244 {
245 dest->callbacks = src->callbacks;
246 dest->cb_data = src->cb_data;
247 }
248
249 /* Creates and returns a new dictionary with the specified ENCODING. */
250 struct dictionary *
dict_create(const char * encoding)251 dict_create (const char *encoding)
252 {
253 struct dictionary *d = xzalloc (sizeof *d);
254
255 d->encoding = xstrdup (encoding);
256 d->names_must_be_ids = true;
257 hmap_init (&d->name_map);
258 attrset_init (&d->attributes);
259 d->ref_cnt = 1;
260
261 return d;
262 }
263
264 /* Creates and returns a (deep) copy of an existing
265 dictionary.
266
267 The new dictionary's case indexes are copied from the old
268 dictionary. If the new dictionary won't be used to access
269 cases produced with the old dictionary, then the new
270 dictionary's case indexes should be compacted with
271 dict_compact_values to save space.
272
273 Callbacks are not cloned. */
274 struct dictionary *
dict_clone(const struct dictionary * s)275 dict_clone (const struct dictionary *s)
276 {
277 struct dictionary *d;
278 size_t i;
279
280 d = dict_create (s->encoding);
281 dict_set_names_must_be_ids (d, dict_get_names_must_be_ids (s));
282
283 for (i = 0; i < s->var_cnt; i++)
284 {
285 struct variable *sv = s->var[i].var;
286 struct variable *dv = dict_clone_var_assert (d, sv);
287 size_t i;
288
289 for (i = 0; i < var_get_short_name_cnt (sv); i++)
290 var_set_short_name (dv, i, var_get_short_name (sv, i));
291
292 var_get_vardict (dv)->case_index = var_get_vardict (sv)->case_index;
293 }
294
295 d->next_value_idx = s->next_value_idx;
296
297 d->split_cnt = s->split_cnt;
298 if (d->split_cnt > 0)
299 {
300 d->split = xnmalloc (d->split_cnt, sizeof *d->split);
301 for (i = 0; i < d->split_cnt; i++)
302 d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i]));
303 }
304
305 if (s->weight != NULL)
306 dict_set_weight (d, dict_lookup_var_assert (d, var_get_name (s->weight)));
307
308 if (s->filter != NULL)
309 dict_set_filter (d, dict_lookup_var_assert (d, var_get_name (s->filter)));
310
311 d->case_limit = s->case_limit;
312 dict_set_label (d, dict_get_label (s));
313 dict_set_documents (d, dict_get_documents (s));
314
315 d->vector_cnt = s->vector_cnt;
316 d->vector = xnmalloc (d->vector_cnt, sizeof *d->vector);
317 for (i = 0; i < s->vector_cnt; i++)
318 d->vector[i] = vector_clone (s->vector[i], s, d);
319
320 dict_set_attributes (d, dict_get_attributes (s));
321
322 for (i = 0; i < s->n_mrsets; i++)
323 {
324 const struct mrset *old = s->mrsets[i];
325 struct mrset *new;
326 size_t j;
327
328 /* Clone old mrset, then replace vars from D by vars from S. */
329 new = mrset_clone (old);
330 for (j = 0; j < new->n_vars; j++)
331 new->vars[j] = dict_lookup_var_assert (d, var_get_name (new->vars[j]));
332
333 dict_add_mrset (d, new);
334 }
335
336 return d;
337 }
338
339
340
341 /* Returns the SPLIT FILE vars (see cmd_split_file()). Call
342 dict_get_split_cnt() to determine how many SPLIT FILE vars
343 there are. Returns a null pointer if and only if there are no
344 SPLIT FILE vars. */
345 const struct variable *const *
dict_get_split_vars(const struct dictionary * d)346 dict_get_split_vars (const struct dictionary *d)
347 {
348 return d->split;
349 }
350
351 /* Returns the number of SPLIT FILE vars. */
352 size_t
dict_get_split_cnt(const struct dictionary * d)353 dict_get_split_cnt (const struct dictionary *d)
354 {
355 return d->split_cnt;
356 }
357
358 /* Removes variable V, which must be in D, from D's set of split
359 variables. */
360 static void
dict_unset_split_var(struct dictionary * d,struct variable * v,bool skip_callbacks)361 dict_unset_split_var (struct dictionary *d, struct variable *v, bool skip_callbacks)
362 {
363 int orig_count;
364
365 assert (dict_contains_var (d, v));
366
367 orig_count = d->split_cnt;
368 d->split_cnt = remove_equal (d->split, d->split_cnt, sizeof *d->split,
369 &v, compare_var_ptrs, NULL);
370 if (orig_count != d->split_cnt && !skip_callbacks)
371 {
372 if (d->changed) d->changed (d, d->changed_data);
373 /* We changed the set of split variables so invoke the
374 callback. */
375 if (d->callbacks && d->callbacks->split_changed)
376 d->callbacks->split_changed (d, d->cb_data);
377 }
378 }
379
380
381 /* Sets CNT split vars SPLIT in dictionary D. */
382 static void
dict_set_split_vars__(struct dictionary * d,struct variable * const * split,size_t cnt,bool skip_callbacks)383 dict_set_split_vars__ (struct dictionary *d,
384 struct variable *const *split, size_t cnt, bool skip_callbacks)
385 {
386 assert (cnt == 0 || split != NULL);
387
388 d->split_cnt = cnt;
389 if (cnt > 0)
390 {
391 d->split = xnrealloc (d->split, cnt, sizeof *d->split) ;
392 memcpy (d->split, split, cnt * sizeof *d->split);
393 }
394 else
395 {
396 free (d->split);
397 d->split = NULL;
398 }
399
400 if (!skip_callbacks)
401 {
402 if (d->changed) d->changed (d, d->changed_data);
403 if (d->callbacks && d->callbacks->split_changed)
404 d->callbacks->split_changed (d, d->cb_data);
405 }
406 }
407
408 /* Sets CNT split vars SPLIT in dictionary D. */
409 void
dict_set_split_vars(struct dictionary * d,struct variable * const * split,size_t cnt)410 dict_set_split_vars (struct dictionary *d,
411 struct variable *const *split, size_t cnt)
412 {
413 dict_set_split_vars__ (d, split, cnt, false);
414 }
415
416
417
418 /* Deletes variable V from dictionary D and frees V.
419
420 This is a very bad idea if there might be any pointers to V
421 from outside D. In general, no variable in the active dataset's
422 dictionary should be deleted when any transformations are
423 active on the dictionary's dataset, because those
424 transformations might reference the deleted variable. The
425 safest time to delete a variable is just after a procedure has
426 been executed, as done by DELETE VARIABLES.
427
428 Pointers to V within D are not a problem, because
429 dict_delete_var() knows to remove V from split variables,
430 weights, filters, etc. */
431 static void
dict_delete_var__(struct dictionary * d,struct variable * v,bool skip_callbacks)432 dict_delete_var__ (struct dictionary *d, struct variable *v, bool skip_callbacks)
433 {
434 int dict_index = var_get_dict_index (v);
435 const int case_index = var_get_case_index (v);
436
437 assert (dict_contains_var (d, v));
438
439 dict_unset_split_var (d, v, skip_callbacks);
440 dict_unset_mrset_var (d, v);
441
442 if (d->weight == v)
443 dict_set_weight (d, NULL);
444
445 if (d->filter == v)
446 dict_set_filter (d, NULL);
447
448 dict_clear_vectors (d);
449
450 /* Remove V from var array. */
451 unindex_vars (d, dict_index, d->var_cnt);
452 remove_element (d->var, d->var_cnt, sizeof *d->var, dict_index);
453 d->var_cnt--;
454
455 /* Update dict_index for each affected variable. */
456 reindex_vars (d, dict_index, d->var_cnt, skip_callbacks);
457
458 /* Free memory. */
459 var_clear_vardict (v);
460
461 if (! skip_callbacks)
462 {
463 if (d->changed) d->changed (d, d->changed_data);
464 if (d->callbacks && d->callbacks->var_deleted)
465 d->callbacks->var_deleted (d, v, dict_index, case_index, d->cb_data);
466 }
467
468 invalidate_proto (d);
469 var_unref (v);
470 }
471
472 /* Deletes variable V from dictionary D and frees V.
473
474 This is a very bad idea if there might be any pointers to V
475 from outside D. In general, no variable in the active dataset's
476 dictionary should be deleted when any transformations are
477 active on the dictionary's dataset, because those
478 transformations might reference the deleted variable. The
479 safest time to delete a variable is just after a procedure has
480 been executed, as done by DELETE VARIABLES.
481
482 Pointers to V within D are not a problem, because
483 dict_delete_var() knows to remove V from split variables,
484 weights, filters, etc. */
485 void
dict_delete_var(struct dictionary * d,struct variable * v)486 dict_delete_var (struct dictionary *d, struct variable *v)
487 {
488 dict_delete_var__ (d, v, false);
489 }
490
491
492 /* Deletes the COUNT variables listed in VARS from D. This is
493 unsafe; see the comment on dict_delete_var() for details. */
494 void
dict_delete_vars(struct dictionary * d,struct variable * const * vars,size_t count)495 dict_delete_vars (struct dictionary *d,
496 struct variable *const *vars, size_t count)
497 {
498 /* FIXME: this can be done in O(count) time, but this algorithm
499 is O(count**2). */
500 assert (count == 0 || vars != NULL);
501
502 while (count-- > 0)
503 dict_delete_var (d, *vars++);
504 }
505
506 /* Deletes the COUNT variables in D starting at index IDX. This
507 is unsafe; see the comment on dict_delete_var() for
508 details. Deleting consecutive vars will result in less callbacks
509 compared to iterating over dict_delete_var.
510 A simple while loop over dict_delete_var will
511 produce (d->var_cnt - IDX) * COUNT variable changed callbacks
512 plus COUNT variable delete callbacks.
513 This here produces d->var_cnt - IDX variable changed callbacks
514 plus COUNT variable delete callbacks. */
515 void
dict_delete_consecutive_vars(struct dictionary * d,size_t idx,size_t count)516 dict_delete_consecutive_vars (struct dictionary *d, size_t idx, size_t count)
517 {
518 assert (idx + count <= d->var_cnt);
519
520 /* We need to store the variable and the corresponding case_index
521 for the delete callbacks later. We store them in a linked list.*/
522 struct delvar {
523 struct ll ll;
524 struct variable *var;
525 int case_index;
526 };
527 struct ll_list list = LL_INITIALIZER (list);
528
529 for (size_t i = idx; i < idx + count; i++)
530 {
531 struct delvar *dv = xmalloc (sizeof (struct delvar));
532 assert (dv);
533 struct variable *v = d->var[i].var;
534
535 dict_unset_split_var (d, v, false);
536 dict_unset_mrset_var (d, v);
537
538 if (d->weight == v)
539 dict_set_weight (d, NULL);
540
541 if (d->filter == v)
542 dict_set_filter (d, NULL);
543
544 dv->var = v;
545 dv->case_index = var_get_case_index (v);
546 ll_push_tail (&list, (struct ll *)dv);
547 }
548
549 dict_clear_vectors (d);
550
551 /* Remove variables from var array. */
552 unindex_vars (d, idx, d->var_cnt);
553 remove_range (d->var, d->var_cnt, sizeof *d->var, idx, count);
554 d->var_cnt -= count;
555
556 /* Reindexing will result variable-changed callback */
557 reindex_vars (d, idx, d->var_cnt, false);
558
559 invalidate_proto (d);
560 if (d->changed) d->changed (d, d->changed_data);
561
562 /* Now issue the variable delete callbacks and delete
563 the variables. The vardict is not valid at this point
564 anymore. That is the reason why we stored the
565 caseindex before reindexing. */
566 for (size_t vi = idx; vi < idx + count; vi++)
567 {
568 struct delvar *dv = (struct delvar *) ll_pop_head (&list);
569 var_clear_vardict (dv->var);
570 if (d->callbacks && d->callbacks->var_deleted)
571 d->callbacks->var_deleted (d, dv->var, vi, dv->case_index, d->cb_data);
572 var_unref (dv->var);
573 free (dv);
574 }
575 }
576
577 /* Deletes scratch variables from dictionary D. */
578 void
dict_delete_scratch_vars(struct dictionary * d)579 dict_delete_scratch_vars (struct dictionary *d)
580 {
581 int i;
582
583 /* FIXME: this can be done in O(count) time, but this algorithm
584 is O(count**2). */
585 for (i = 0; i < d->var_cnt;)
586 if (var_get_dict_class (d->var[i].var) == DC_SCRATCH)
587 dict_delete_var (d, d->var[i].var);
588 else
589 i++;
590 }
591
592
593
594 /* Clears the contents from a dictionary without destroying the
595 dictionary itself. */
596 static void
dict_clear__(struct dictionary * d,bool skip_callbacks)597 dict_clear__ (struct dictionary *d, bool skip_callbacks)
598 {
599 /* FIXME? Should we really clear case_limit, label, documents?
600 Others are necessarily cleared by deleting all the variables.*/
601 while (d->var_cnt > 0)
602 {
603 dict_delete_var__ (d, d->var[d->var_cnt - 1].var, skip_callbacks);
604 }
605
606 free (d->var);
607 d->var = NULL;
608 d->var_cnt = d->var_cap = 0;
609 invalidate_proto (d);
610 hmap_clear (&d->name_map);
611 d->next_value_idx = 0;
612 dict_set_split_vars__ (d, NULL, 0, skip_callbacks);
613
614 if (skip_callbacks)
615 {
616 d->weight = NULL;
617 d->filter = NULL;
618 }
619 else
620 {
621 dict_set_weight (d, NULL);
622 dict_set_filter (d, NULL);
623 }
624 d->case_limit = 0;
625 free (d->label);
626 d->label = NULL;
627 string_array_clear (&d->documents);
628 dict_clear_vectors (d);
629 attrset_clear (&d->attributes);
630 }
631
632 /* Clears the contents from a dictionary without destroying the
633 dictionary itself. */
634 void
dict_clear(struct dictionary * d)635 dict_clear (struct dictionary *d)
636 {
637 dict_clear__ (d, false);
638 }
639
640 /* Clears a dictionary and destroys it. */
641 static void
_dict_destroy(struct dictionary * d)642 _dict_destroy (struct dictionary *d)
643 {
644 /* In general, we don't want callbacks occurring, if the dictionary
645 is being destroyed */
646 d->callbacks = NULL ;
647
648 dict_clear__ (d, true);
649 string_array_destroy (&d->documents);
650 hmap_destroy (&d->name_map);
651 attrset_destroy (&d->attributes);
652 dict_clear_mrsets (d);
653 free (d->encoding);
654 free (d);
655 }
656
657 struct dictionary *
dict_ref(struct dictionary * d)658 dict_ref (struct dictionary *d)
659 {
660 d->ref_cnt++;
661 return d;
662 }
663
664 void
dict_unref(struct dictionary * d)665 dict_unref (struct dictionary *d)
666 {
667 if (d == NULL)
668 return;
669 d->ref_cnt--;
670 assert (d->ref_cnt >= 0);
671 if (d->ref_cnt == 0)
672 _dict_destroy (d);
673 }
674
675 /* Returns the number of variables in D. */
676 size_t
dict_get_var_cnt(const struct dictionary * d)677 dict_get_var_cnt (const struct dictionary *d)
678 {
679 return d->var_cnt;
680 }
681
682 /* Returns the variable in D with dictionary index IDX, which
683 must be between 0 and the count returned by
684 dict_get_var_cnt(), exclusive. */
685 struct variable *
dict_get_var(const struct dictionary * d,size_t idx)686 dict_get_var (const struct dictionary *d, size_t idx)
687 {
688 assert (idx < d->var_cnt);
689
690 return d->var[idx].var;
691 }
692
693 /* Sets *VARS to an array of pointers to variables in D and *CNT
694 to the number of variables in *D. All variables are returned
695 except for those, if any, in the classes indicated by EXCLUDE.
696 (There is no point in putting DC_SYSTEM in EXCLUDE as
697 dictionaries never include system variables.) */
698 void
dict_get_vars(const struct dictionary * d,const struct variable *** vars,size_t * cnt,enum dict_class exclude)699 dict_get_vars (const struct dictionary *d, const struct variable ***vars,
700 size_t *cnt, enum dict_class exclude)
701 {
702 dict_get_vars_mutable (d, (struct variable ***) vars, cnt, exclude);
703 }
704
705 /* Sets *VARS to an array of pointers to variables in D and *CNT
706 to the number of variables in *D. All variables are returned
707 except for those, if any, in the classes indicated by EXCLUDE.
708 (There is no point in putting DC_SYSTEM in EXCLUDE as
709 dictionaries never include system variables.) */
710 void
dict_get_vars_mutable(const struct dictionary * d,struct variable *** vars,size_t * cnt,enum dict_class exclude)711 dict_get_vars_mutable (const struct dictionary *d, struct variable ***vars,
712 size_t *cnt, enum dict_class exclude)
713 {
714 size_t count;
715 size_t i;
716
717 assert (exclude == (exclude & DC_ALL));
718
719 count = 0;
720 for (i = 0; i < d->var_cnt; i++)
721 {
722 enum dict_class class = var_get_dict_class (d->var[i].var);
723 if (!(class & exclude))
724 count++;
725 }
726
727 *vars = xnmalloc (count, sizeof **vars);
728 *cnt = 0;
729 for (i = 0; i < d->var_cnt; i++)
730 {
731 enum dict_class class = var_get_dict_class (d->var[i].var);
732 if (!(class & exclude))
733 (*vars)[(*cnt)++] = d->var[i].var;
734 }
735 assert (*cnt == count);
736 }
737
738 static struct variable *
add_var_with_case_index(struct dictionary * d,struct variable * v,int case_index)739 add_var_with_case_index (struct dictionary *d, struct variable *v,
740 int case_index)
741 {
742 struct vardict_info *vardict;
743
744 assert (case_index >= d->next_value_idx);
745
746 /* Update dictionary. */
747 if (d->var_cnt >= d->var_cap)
748 {
749 size_t i;
750
751 d->var = x2nrealloc (d->var, &d->var_cap, sizeof *d->var);
752 hmap_clear (&d->name_map);
753 for (i = 0; i < d->var_cnt; i++)
754 {
755 var_set_vardict (d->var[i].var, &d->var[i]);
756 hmap_insert_fast (&d->name_map, &d->var[i].name_node,
757 d->var[i].name_node.hash);
758 }
759 }
760
761 vardict = &d->var[d->var_cnt++];
762 vardict->dict = d;
763 vardict->var = v;
764 hmap_insert (&d->name_map, &vardict->name_node,
765 utf8_hash_case_string (var_get_name (v), 0));
766 vardict->case_index = case_index;
767 var_set_vardict (v, vardict);
768
769 if (d->changed) d->changed (d, d->changed_data);
770 if (d->callbacks && d->callbacks->var_added)
771 d->callbacks->var_added (d, var_get_dict_index (v), d->cb_data);
772
773 invalidate_proto (d);
774 d->next_value_idx = case_index + 1;
775
776 return v;
777 }
778
779 static struct variable *
add_var(struct dictionary * d,struct variable * v)780 add_var (struct dictionary *d, struct variable *v)
781 {
782 return add_var_with_case_index (d, v, d->next_value_idx);
783 }
784
785 /* Creates and returns a new variable in D with the given NAME
786 and WIDTH. Returns a null pointer if the given NAME would
787 duplicate that of an existing variable in the dictionary. */
788 struct variable *
dict_create_var(struct dictionary * d,const char * name,int width)789 dict_create_var (struct dictionary *d, const char *name, int width)
790 {
791 return (dict_lookup_var (d, name) == NULL
792 ? dict_create_var_assert (d, name, width)
793 : NULL);
794 }
795
796 /* Creates and returns a new variable in D with the given NAME
797 and WIDTH. Assert-fails if the given NAME would duplicate
798 that of an existing variable in the dictionary. */
799 struct variable *
dict_create_var_assert(struct dictionary * d,const char * name,int width)800 dict_create_var_assert (struct dictionary *d, const char *name, int width)
801 {
802 assert (dict_lookup_var (d, name) == NULL);
803 return add_var (d, var_create (name, width));
804 }
805
806 /* Creates and returns a new variable in D, as a copy of existing variable
807 OLD_VAR, which need not be in D or in any dictionary. Returns a null
808 pointer if OLD_VAR's name would duplicate that of an existing variable in
809 the dictionary. */
810 struct variable *
dict_clone_var(struct dictionary * d,const struct variable * old_var)811 dict_clone_var (struct dictionary *d, const struct variable *old_var)
812 {
813 return dict_clone_var_as (d, old_var, var_get_name (old_var));
814 }
815
816 /* Creates and returns a new variable in D, as a copy of existing variable
817 OLD_VAR, which need not be in D or in any dictionary. Assert-fails if
818 OLD_VAR's name would duplicate that of an existing variable in the
819 dictionary. */
820 struct variable *
dict_clone_var_assert(struct dictionary * d,const struct variable * old_var)821 dict_clone_var_assert (struct dictionary *d, const struct variable *old_var)
822 {
823 return dict_clone_var_as_assert (d, old_var, var_get_name (old_var));
824 }
825
826 /* Creates and returns a new variable in D with name NAME, as a copy of
827 existing variable OLD_VAR, which need not be in D or in any dictionary.
828 Returns a null pointer if the given NAME would duplicate that of an existing
829 variable in the dictionary. */
830 struct variable *
dict_clone_var_as(struct dictionary * d,const struct variable * old_var,const char * name)831 dict_clone_var_as (struct dictionary *d, const struct variable *old_var,
832 const char *name)
833 {
834 return (dict_lookup_var (d, name) == NULL
835 ? dict_clone_var_as_assert (d, old_var, name)
836 : NULL);
837 }
838
839 /* Creates and returns a new variable in D with name NAME, as a copy of
840 existing variable OLD_VAR, which need not be in D or in any dictionary.
841 Assert-fails if the given NAME would duplicate that of an existing variable
842 in the dictionary. */
843 struct variable *
dict_clone_var_as_assert(struct dictionary * d,const struct variable * old_var,const char * name)844 dict_clone_var_as_assert (struct dictionary *d, const struct variable *old_var,
845 const char *name)
846 {
847 struct variable *new_var = var_clone (old_var);
848 assert (dict_lookup_var (d, name) == NULL);
849 var_set_name (new_var, name);
850 return add_var (d, new_var);
851 }
852
853 struct variable *
dict_clone_var_in_place_assert(struct dictionary * d,const struct variable * old_var)854 dict_clone_var_in_place_assert (struct dictionary *d,
855 const struct variable *old_var)
856 {
857 assert (dict_lookup_var (d, var_get_name (old_var)) == NULL);
858 return add_var_with_case_index (d, var_clone (old_var),
859 var_get_case_index (old_var));
860 }
861
862 /* Returns the variable named NAME in D, or a null pointer if no
863 variable has that name. */
864 struct variable *
dict_lookup_var(const struct dictionary * d,const char * name)865 dict_lookup_var (const struct dictionary *d, const char *name)
866 {
867 struct vardict_info *vardict;
868
869 HMAP_FOR_EACH_WITH_HASH (vardict, struct vardict_info, name_node,
870 utf8_hash_case_string (name, 0), &d->name_map)
871 {
872 struct variable *var = vardict->var;
873 if (!utf8_strcasecmp (var_get_name (var), name))
874 return var;
875 }
876
877 return NULL;
878 }
879
880 /* Returns the variable named NAME in D. Assert-fails if no
881 variable has that name. */
882 struct variable *
dict_lookup_var_assert(const struct dictionary * d,const char * name)883 dict_lookup_var_assert (const struct dictionary *d, const char *name)
884 {
885 struct variable *v = dict_lookup_var (d, name);
886 assert (v != NULL);
887 return v;
888 }
889
890 /* Returns true if variable V is in dictionary D,
891 false otherwise. */
892 bool
dict_contains_var(const struct dictionary * d,const struct variable * v)893 dict_contains_var (const struct dictionary *d, const struct variable *v)
894 {
895 return (var_has_vardict (v)
896 && vardict_get_dictionary (var_get_vardict (v)) == d);
897 }
898
899 /* Moves V to 0-based position IDX in D. Other variables in D,
900 if any, retain their relative positions. Runs in time linear
901 in the distance moved. */
902 void
dict_reorder_var(struct dictionary * d,struct variable * v,size_t new_index)903 dict_reorder_var (struct dictionary *d, struct variable *v, size_t new_index)
904 {
905 size_t old_index = var_get_dict_index (v);
906
907 assert (new_index < d->var_cnt);
908
909 unindex_vars (d, MIN (old_index, new_index), MAX (old_index, new_index) + 1);
910 move_element (d->var, d->var_cnt, sizeof *d->var, old_index, new_index);
911 reindex_vars (d, MIN (old_index, new_index), MAX (old_index, new_index) + 1, false);
912 }
913
914 /* Reorders the variables in D, placing the COUNT variables
915 listed in ORDER in that order at the beginning of D. The
916 other variables in D, if any, retain their relative
917 positions. */
918 void
dict_reorder_vars(struct dictionary * d,struct variable * const * order,size_t count)919 dict_reorder_vars (struct dictionary *d,
920 struct variable *const *order, size_t count)
921 {
922 struct vardict_info *new_var;
923 size_t i;
924
925 assert (count == 0 || order != NULL);
926 assert (count <= d->var_cnt);
927
928 new_var = xnmalloc (d->var_cap, sizeof *new_var);
929
930 /* Add variables in ORDER to new_var. */
931 for (i = 0; i < count; i++)
932 {
933 struct vardict_info *old_var;
934
935 assert (dict_contains_var (d, order[i]));
936
937 old_var = var_get_vardict (order[i]);
938 new_var[i] = *old_var;
939 old_var->dict = NULL;
940 }
941
942 /* Add remaining variables to new_var. */
943 for (i = 0; i < d->var_cnt; i++)
944 if (d->var[i].dict != NULL)
945 new_var[count++] = d->var[i];
946 assert (count == d->var_cnt);
947
948 /* Replace old vardicts by new ones. */
949 free (d->var);
950 d->var = new_var;
951
952 hmap_clear (&d->name_map);
953 reindex_vars (d, 0, d->var_cnt, false);
954 }
955
956 /* Changes the name of variable V that is currently in a dictionary to
957 NEW_NAME. */
958 static void
rename_var(struct variable * v,const char * new_name)959 rename_var (struct variable *v, const char *new_name)
960 {
961 struct vardict_info *vardict = var_get_vardict (v);
962 var_clear_vardict (v);
963 var_set_name (v, new_name);
964 vardict->name_node.hash = utf8_hash_case_string (new_name, 0);
965 var_set_vardict (v, vardict);
966 }
967
968 /* Tries to changes the name of V in D to name NEW_NAME. Returns true if
969 successful, false if a variable (other than V) with the given name already
970 exists in D. */
971 bool
dict_try_rename_var(struct dictionary * d,struct variable * v,const char * new_name)972 dict_try_rename_var (struct dictionary *d, struct variable *v,
973 const char *new_name)
974 {
975 struct variable *conflict = dict_lookup_var (d, new_name);
976 if (conflict && v != conflict)
977 return false;
978
979 struct variable *old = var_clone (v);
980 unindex_var (d, var_get_vardict (v));
981 rename_var (v, new_name);
982 reindex_var (d, var_get_vardict (v), false);
983
984 if (settings_get_algorithm () == ENHANCED)
985 var_clear_short_names (v);
986
987 if (d->changed) d->changed (d, d->changed_data);
988 if (d->callbacks && d->callbacks->var_changed)
989 d->callbacks->var_changed (d, var_get_dict_index (v), VAR_TRAIT_NAME, old, d->cb_data);
990
991 var_unref (old);
992
993 return true;
994 }
995
996 /* Changes the name of V in D to name NEW_NAME. Assert-fails if
997 a variable named NEW_NAME is already in D, except that
998 NEW_NAME may be the same as V's existing name. */
999 void
dict_rename_var(struct dictionary * d,struct variable * v,const char * new_name)1000 dict_rename_var (struct dictionary *d, struct variable *v,
1001 const char *new_name)
1002 {
1003 bool ok UNUSED = dict_try_rename_var (d, v, new_name);
1004 assert (ok);
1005 }
1006
1007 /* Renames COUNT variables specified in VARS to the names given
1008 in NEW_NAMES within dictionary D. If the renaming would
1009 result in a duplicate variable name, returns false and stores a
1010 name that would be duplicated into *ERR_NAME (if ERR_NAME is
1011 non-null). Otherwise, the renaming is successful, and true
1012 is returned. */
1013 bool
dict_rename_vars(struct dictionary * d,struct variable ** vars,char ** new_names,size_t count,char ** err_name)1014 dict_rename_vars (struct dictionary *d,
1015 struct variable **vars, char **new_names, size_t count,
1016 char **err_name)
1017 {
1018 struct pool *pool;
1019 char **old_names;
1020 size_t i;
1021
1022 assert (count == 0 || vars != NULL);
1023 assert (count == 0 || new_names != NULL);
1024
1025 /* Save the names of the variables to be renamed. */
1026 pool = pool_create ();
1027 old_names = pool_nalloc (pool, count, sizeof *old_names);
1028 for (i = 0; i < count; i++)
1029 old_names[i] = pool_strdup (pool, var_get_name (vars[i]));
1030
1031 /* Remove the variables to be renamed from the name hash,
1032 and rename them. */
1033 for (i = 0; i < count; i++)
1034 {
1035 unindex_var (d, var_get_vardict (vars[i]));
1036 rename_var (vars[i], new_names[i]);
1037 }
1038
1039 /* Add the renamed variables back into the name hash,
1040 checking for conflicts. */
1041 for (i = 0; i < count; i++)
1042 {
1043 if (dict_lookup_var (d, var_get_name (vars[i])) != NULL)
1044 {
1045 /* There is a name conflict.
1046 Back out all the name changes that have already
1047 taken place, and indicate failure. */
1048 size_t fail_idx = i;
1049 if (err_name != NULL)
1050 *err_name = new_names[i];
1051
1052 for (i = 0; i < fail_idx; i++)
1053 unindex_var (d, var_get_vardict (vars[i]));
1054
1055 for (i = 0; i < count; i++)
1056 {
1057 rename_var (vars[i], old_names[i]);
1058 reindex_var (d, var_get_vardict (vars[i]), false);
1059 }
1060
1061 pool_destroy (pool);
1062 return false;
1063 }
1064 reindex_var (d, var_get_vardict (vars[i]), false);
1065 }
1066
1067 /* Clear short names. */
1068 if (settings_get_algorithm () == ENHANCED)
1069 for (i = 0; i < count; i++)
1070 var_clear_short_names (vars[i]);
1071
1072 pool_destroy (pool);
1073 return true;
1074 }
1075
1076 /* Returns true if a variable named NAME may be inserted in DICT;
1077 that is, if there is not already a variable with that name in
1078 DICT and if NAME is not a reserved word. (The caller's checks
1079 have already verified that NAME is otherwise acceptable as a
1080 variable name.) */
1081 static bool
var_name_is_insertable(const struct dictionary * dict,const char * name)1082 var_name_is_insertable (const struct dictionary *dict, const char *name)
1083 {
1084 return (dict_lookup_var (dict, name) == NULL
1085 && lex_id_to_token (ss_cstr (name)) == T_ID);
1086 }
1087
1088 static char *
make_hinted_name(const struct dictionary * dict,const char * hint)1089 make_hinted_name (const struct dictionary *dict, const char *hint)
1090 {
1091 size_t hint_len = strlen (hint);
1092 bool dropped = false;
1093 char *root, *rp;
1094 size_t ofs;
1095 int mblen;
1096
1097 /* The allocation size here is OK: characters that are copied directly fit
1098 OK, and characters that are not copied directly are replaced by a single
1099 '_' byte. If u8_mbtouc() replaces bad input by 0xfffd, then that will get
1100 replaced by '_' too. */
1101 root = rp = xmalloc (hint_len + 1);
1102 for (ofs = 0; ofs < hint_len; ofs += mblen)
1103 {
1104 ucs4_t uc;
1105
1106 mblen = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, hint + ofs),
1107 hint_len - ofs);
1108 if (rp == root
1109 ? lex_uc_is_id1 (uc) && uc != '$'
1110 : lex_uc_is_idn (uc))
1111 {
1112 if (dropped)
1113 {
1114 *rp++ = '_';
1115 dropped = false;
1116 }
1117 rp += u8_uctomb (CHAR_CAST (uint8_t *, rp), uc, 6);
1118 }
1119 else if (rp != root)
1120 dropped = true;
1121 }
1122 *rp = '\0';
1123
1124 if (root[0] != '\0')
1125 {
1126 unsigned long int i;
1127
1128 if (var_name_is_insertable (dict, root))
1129 return root;
1130
1131 for (i = 0; i < ULONG_MAX; i++)
1132 {
1133 char suffix[INT_BUFSIZE_BOUND (i) + 1];
1134 char *name;
1135
1136 suffix[0] = '_';
1137 if (!str_format_26adic (i + 1, true, &suffix[1], sizeof suffix - 1))
1138 NOT_REACHED ();
1139
1140 name = utf8_encoding_concat (root, suffix, dict->encoding, 64);
1141 if (var_name_is_insertable (dict, name))
1142 {
1143 free (root);
1144 return name;
1145 }
1146 free (name);
1147 }
1148 }
1149
1150 free (root);
1151
1152 return NULL;
1153 }
1154
1155 static char *
make_numeric_name(const struct dictionary * dict,unsigned long int * num_start)1156 make_numeric_name (const struct dictionary *dict, unsigned long int *num_start)
1157 {
1158 unsigned long int number;
1159
1160 for (number = num_start != NULL ? MAX (*num_start, 1) : 1;
1161 number < ULONG_MAX;
1162 number++)
1163 {
1164 char name[3 + INT_STRLEN_BOUND (number) + 1];
1165
1166 sprintf (name, "VAR%03lu", number);
1167 if (dict_lookup_var (dict, name) == NULL)
1168 {
1169 if (num_start != NULL)
1170 *num_start = number + 1;
1171 return xstrdup (name);
1172 }
1173 }
1174
1175 NOT_REACHED ();
1176 }
1177
1178
1179 /* Devises and returns a variable name unique within DICT. The variable name
1180 is owned by the caller, which must free it with free() when it is no longer
1181 needed.
1182
1183 HINT, if it is non-null, is used as a suggestion that will be
1184 modified for suitability as a variable name and for
1185 uniqueness.
1186
1187 If HINT is null or entirely unsuitable, a name in the form
1188 "VAR%03d" will be generated, where the smallest unused integer
1189 value is used. If NUM_START is non-null, then its value is
1190 used as the minimum numeric value to check, and it is updated
1191 to the next value to be checked.
1192 */
1193 char *
dict_make_unique_var_name(const struct dictionary * dict,const char * hint,unsigned long int * num_start)1194 dict_make_unique_var_name (const struct dictionary *dict, const char *hint,
1195 unsigned long int *num_start)
1196 {
1197 if (hint != NULL)
1198 {
1199 char *hinted_name = make_hinted_name (dict, hint);
1200 if (hinted_name != NULL)
1201 return hinted_name;
1202 }
1203 return make_numeric_name (dict, num_start);
1204 }
1205
1206 /* Returns whether variable names must be valid identifiers. Normally, this is
1207 true, but sometimes a dictionary is prepared for external use (e.g. output
1208 to a CSV file) where names don't have to be valid. */
1209 bool
dict_get_names_must_be_ids(const struct dictionary * d)1210 dict_get_names_must_be_ids (const struct dictionary *d)
1211 {
1212 return d->names_must_be_ids;
1213 }
1214
1215 /* Sets whether variable names must be valid identifiers. Normally, this is
1216 true, but sometimes a dictionary is prepared for external use (e.g. output
1217 to a CSV file) where names don't have to be valid.
1218
1219 Changing this setting from false to true doesn't make the dictionary check
1220 all the existing variable names, so it can cause an invariant violation. */
1221 void
dict_set_names_must_be_ids(struct dictionary * d,bool names_must_be_ids)1222 dict_set_names_must_be_ids (struct dictionary *d, bool names_must_be_ids)
1223 {
1224 d->names_must_be_ids = names_must_be_ids;
1225 }
1226
1227 /* Returns the weighting variable in dictionary D, or a null
1228 pointer if the dictionary is unweighted. */
1229 struct variable *
dict_get_weight(const struct dictionary * d)1230 dict_get_weight (const struct dictionary *d)
1231 {
1232 assert (d->weight == NULL || dict_contains_var (d, d->weight));
1233
1234 return d->weight;
1235 }
1236
1237 /* Returns the value of D's weighting variable in case C, except
1238 that a negative weight is returned as 0. Returns 1 if the
1239 dictionary is unweighted. Will warn about missing, negative,
1240 or zero values if *WARN_ON_INVALID is true. The function will
1241 set *WARN_ON_INVALID to false if an invalid weight is
1242 found. */
1243 double
dict_get_case_weight(const struct dictionary * d,const struct ccase * c,bool * warn_on_invalid)1244 dict_get_case_weight (const struct dictionary *d, const struct ccase *c,
1245 bool *warn_on_invalid)
1246 {
1247 assert (c != NULL);
1248
1249 if (d->weight == NULL)
1250 return 1.0;
1251 else
1252 {
1253 double w = case_num (c, d->weight);
1254
1255 return var_force_valid_weight (d->weight, w, warn_on_invalid);
1256 }
1257 }
1258
1259 /* Returns the format to use for weights. */
1260 const struct fmt_spec *
dict_get_weight_format(const struct dictionary * d)1261 dict_get_weight_format (const struct dictionary *d)
1262 {
1263 return d->weight ? var_get_print_format (d->weight) : &F_8_0;
1264 }
1265
1266 /* Sets the weighting variable of D to V, or turning off
1267 weighting if V is a null pointer. */
1268 void
dict_set_weight(struct dictionary * d,struct variable * v)1269 dict_set_weight (struct dictionary *d, struct variable *v)
1270 {
1271 assert (v == NULL || dict_contains_var (d, v));
1272 assert (v == NULL || var_is_numeric (v));
1273
1274 d->weight = v;
1275
1276 if (d->changed) d->changed (d, d->changed_data);
1277 if (d->callbacks && d->callbacks->weight_changed)
1278 d->callbacks->weight_changed (d,
1279 v ? var_get_dict_index (v) : -1,
1280 d->cb_data);
1281 }
1282
1283 /* Returns the filter variable in dictionary D (see cmd_filter())
1284 or a null pointer if the dictionary is unfiltered. */
1285 struct variable *
dict_get_filter(const struct dictionary * d)1286 dict_get_filter (const struct dictionary *d)
1287 {
1288 assert (d->filter == NULL || dict_contains_var (d, d->filter));
1289
1290 return d->filter;
1291 }
1292
1293 /* Sets V as the filter variable for dictionary D. Passing a
1294 null pointer for V turn off filtering. */
1295 void
dict_set_filter(struct dictionary * d,struct variable * v)1296 dict_set_filter (struct dictionary *d, struct variable *v)
1297 {
1298 assert (v == NULL || dict_contains_var (d, v));
1299 assert (v == NULL || var_is_numeric (v));
1300
1301 d->filter = v;
1302
1303 if (d->changed) d->changed (d, d->changed_data);
1304 if (d->callbacks && d->callbacks->filter_changed)
1305 d->callbacks->filter_changed (d,
1306 v ? var_get_dict_index (v) : -1,
1307 d->cb_data);
1308 }
1309
1310 /* Returns the case limit for dictionary D, or zero if the number
1311 of cases is unlimited. */
1312 casenumber
dict_get_case_limit(const struct dictionary * d)1313 dict_get_case_limit (const struct dictionary *d)
1314 {
1315 return d->case_limit;
1316 }
1317
1318 /* Sets CASE_LIMIT as the case limit for dictionary D. Use
1319 0 for CASE_LIMIT to indicate no limit. */
1320 void
dict_set_case_limit(struct dictionary * d,casenumber case_limit)1321 dict_set_case_limit (struct dictionary *d, casenumber case_limit)
1322 {
1323 d->case_limit = case_limit;
1324 }
1325
1326 /* Returns the prototype used for cases created by dictionary D. */
1327 const struct caseproto *
dict_get_proto(const struct dictionary * d_)1328 dict_get_proto (const struct dictionary *d_)
1329 {
1330 struct dictionary *d = CONST_CAST (struct dictionary *, d_);
1331 if (d->proto == NULL)
1332 {
1333 size_t i;
1334
1335 d->proto = caseproto_create ();
1336 d->proto = caseproto_reserve (d->proto, d->var_cnt);
1337 for (i = 0; i < d->var_cnt; i++)
1338 d->proto = caseproto_set_width (d->proto,
1339 var_get_case_index (d->var[i].var),
1340 var_get_width (d->var[i].var));
1341 }
1342 return d->proto;
1343 }
1344
1345 /* Returns the case index of the next value to be added to D.
1346 This value is the number of `union value's that need to be
1347 allocated to store a case for dictionary D. */
1348 int
dict_get_next_value_idx(const struct dictionary * d)1349 dict_get_next_value_idx (const struct dictionary *d)
1350 {
1351 return d->next_value_idx;
1352 }
1353
1354 /* Returns the number of bytes needed to store a case for
1355 dictionary D. */
1356 size_t
dict_get_case_size(const struct dictionary * d)1357 dict_get_case_size (const struct dictionary *d)
1358 {
1359 return sizeof (union value) * dict_get_next_value_idx (d);
1360 }
1361
1362 /* Reassigns values in dictionary D so that fragmentation is
1363 eliminated. */
1364 void
dict_compact_values(struct dictionary * d)1365 dict_compact_values (struct dictionary *d)
1366 {
1367 size_t i;
1368
1369 d->next_value_idx = 0;
1370 for (i = 0; i < d->var_cnt; i++)
1371 {
1372 struct variable *v = d->var[i].var;
1373 set_var_case_index (v, d->next_value_idx++);
1374 }
1375 invalidate_proto (d);
1376 }
1377
1378 /* Returns the number of values occupied by the variables in
1379 dictionary D. All variables are considered if EXCLUDE_CLASSES
1380 is 0, or it may contain one or more of (1u << DC_ORDINARY),
1381 (1u << DC_SYSTEM), or (1u << DC_SCRATCH) to exclude the
1382 corresponding type of variable.
1383
1384 The return value may be less than the number of values in one
1385 of dictionary D's cases (as returned by
1386 dict_get_next_value_idx) even if E is 0, because there may be
1387 gaps in D's cases due to deleted variables. */
1388 size_t
dict_count_values(const struct dictionary * d,unsigned int exclude_classes)1389 dict_count_values (const struct dictionary *d, unsigned int exclude_classes)
1390 {
1391 size_t i;
1392 size_t cnt;
1393
1394 assert ((exclude_classes & ~((1u << DC_ORDINARY)
1395 | (1u << DC_SYSTEM)
1396 | (1u << DC_SCRATCH))) == 0);
1397
1398 cnt = 0;
1399 for (i = 0; i < d->var_cnt; i++)
1400 {
1401 enum dict_class class = var_get_dict_class (d->var[i].var);
1402 if (!(exclude_classes & (1u << class)))
1403 cnt++;
1404 }
1405 return cnt;
1406 }
1407
1408 /* Returns the case prototype that would result after deleting
1409 all variables from D that are not in one of the
1410 EXCLUDE_CLASSES and compacting the dictionary with
1411 dict_compact().
1412
1413 The caller must unref the returned caseproto when it is no
1414 longer needed. */
1415 struct caseproto *
dict_get_compacted_proto(const struct dictionary * d,unsigned int exclude_classes)1416 dict_get_compacted_proto (const struct dictionary *d,
1417 unsigned int exclude_classes)
1418 {
1419 struct caseproto *proto;
1420 size_t i;
1421
1422 assert ((exclude_classes & ~((1u << DC_ORDINARY)
1423 | (1u << DC_SYSTEM)
1424 | (1u << DC_SCRATCH))) == 0);
1425
1426 proto = caseproto_create ();
1427 for (i = 0; i < d->var_cnt; i++)
1428 {
1429 struct variable *v = d->var[i].var;
1430 if (!(exclude_classes & (1u << var_get_dict_class (v))))
1431 proto = caseproto_add_width (proto, var_get_width (v));
1432 }
1433 return proto;
1434 }
1435 /* Returns the file label for D, or a null pointer if D is
1436 unlabeled (see cmd_file_label()). */
1437 const char *
dict_get_label(const struct dictionary * d)1438 dict_get_label (const struct dictionary *d)
1439 {
1440 return d->label;
1441 }
1442
1443 /* Sets D's file label to LABEL, truncating it to at most 60 bytes in D's
1444 encoding.
1445
1446 Removes D's label if LABEL is null or the empty string. */
1447 void
dict_set_label(struct dictionary * d,const char * label)1448 dict_set_label (struct dictionary *d, const char *label)
1449 {
1450 free (d->label);
1451 if (label == NULL || label[0] == '\0')
1452 d->label = NULL;
1453 else
1454 d->label = utf8_encoding_trunc (label, d->encoding, 60);
1455 }
1456
1457 /* Returns the documents for D, as an UTF-8 encoded string_array. The
1458 return value is always nonnull; if there are no documents then the
1459 string_arary is empty.*/
1460 const struct string_array *
dict_get_documents(const struct dictionary * d)1461 dict_get_documents (const struct dictionary *d)
1462 {
1463 return &d->documents;
1464 }
1465
1466 /* Replaces the documents for D by NEW_DOCS, a UTF-8 encoded string_array. */
1467 void
dict_set_documents(struct dictionary * d,const struct string_array * new_docs)1468 dict_set_documents (struct dictionary *d, const struct string_array *new_docs)
1469 {
1470 size_t i;
1471
1472 dict_clear_documents (d);
1473
1474 for (i = 0; i < new_docs->n; i++)
1475 dict_add_document_line (d, new_docs->strings[i], false);
1476 }
1477
1478 /* Replaces the documents for D by UTF-8 encoded string NEW_DOCS, dividing it
1479 into individual lines at new-line characters. Each line is truncated to at
1480 most DOC_LINE_LENGTH bytes in D's encoding. */
1481 void
dict_set_documents_string(struct dictionary * d,const char * new_docs)1482 dict_set_documents_string (struct dictionary *d, const char *new_docs)
1483 {
1484 const char *s;
1485
1486 dict_clear_documents (d);
1487 for (s = new_docs; *s != '\0';)
1488 {
1489 size_t len = strcspn (s, "\n");
1490 char *line = xmemdup0 (s, len);
1491 dict_add_document_line (d, line, false);
1492 free (line);
1493
1494 s += len;
1495 if (*s == '\n')
1496 s++;
1497 }
1498 }
1499
1500 /* Drops the documents from dictionary D. */
1501 void
dict_clear_documents(struct dictionary * d)1502 dict_clear_documents (struct dictionary *d)
1503 {
1504 string_array_clear (&d->documents);
1505 }
1506
1507 /* Appends the UTF-8 encoded LINE to the documents in D. LINE will be
1508 truncated so that it is no more than 80 bytes in the dictionary's
1509 encoding. If this causes some text to be lost, and ISSUE_WARNING is true,
1510 then a warning will be issued. */
1511 bool
dict_add_document_line(struct dictionary * d,const char * line,bool issue_warning)1512 dict_add_document_line (struct dictionary *d, const char *line,
1513 bool issue_warning)
1514 {
1515 size_t trunc_len;
1516 bool truncated;
1517
1518 trunc_len = utf8_encoding_trunc_len (line, d->encoding, DOC_LINE_LENGTH);
1519 truncated = line[trunc_len] != '\0';
1520 if (truncated && issue_warning)
1521 {
1522 /* Note to translators: "bytes" is correct, not characters */
1523 msg (SW, _("Truncating document line to %d bytes."), DOC_LINE_LENGTH);
1524 }
1525
1526 string_array_append_nocopy (&d->documents, xmemdup0 (line, trunc_len));
1527
1528 return !truncated;
1529 }
1530
1531 /* Returns the number of document lines in dictionary D. */
1532 size_t
dict_get_document_line_cnt(const struct dictionary * d)1533 dict_get_document_line_cnt (const struct dictionary *d)
1534 {
1535 return d->documents.n;
1536 }
1537
1538 /* Returns document line number IDX in dictionary D. The caller must not
1539 modify or free the returned string. */
1540 const char *
dict_get_document_line(const struct dictionary * d,size_t idx)1541 dict_get_document_line (const struct dictionary *d, size_t idx)
1542 {
1543 assert (idx < d->documents.n);
1544 return d->documents.strings[idx];
1545 }
1546
1547 /* Creates in D a vector named NAME that contains the CNT
1548 variables in VAR. Returns true if successful, or false if a
1549 vector named NAME already exists in D. */
1550 bool
dict_create_vector(struct dictionary * d,const char * name,struct variable ** var,size_t cnt)1551 dict_create_vector (struct dictionary *d,
1552 const char *name,
1553 struct variable **var, size_t cnt)
1554 {
1555 size_t i;
1556
1557 assert (cnt > 0);
1558 for (i = 0; i < cnt; i++)
1559 assert (dict_contains_var (d, var[i]));
1560
1561 if (dict_lookup_vector (d, name) == NULL)
1562 {
1563 d->vector = xnrealloc (d->vector, d->vector_cnt + 1, sizeof *d->vector);
1564 d->vector[d->vector_cnt++] = vector_create (name, var, cnt);
1565 return true;
1566 }
1567 else
1568 return false;
1569 }
1570
1571 /* Creates in D a vector named NAME that contains the CNT
1572 variables in VAR. A vector named NAME must not already exist
1573 in D. */
1574 void
dict_create_vector_assert(struct dictionary * d,const char * name,struct variable ** var,size_t cnt)1575 dict_create_vector_assert (struct dictionary *d,
1576 const char *name,
1577 struct variable **var, size_t cnt)
1578 {
1579 assert (dict_lookup_vector (d, name) == NULL);
1580 dict_create_vector (d, name, var, cnt);
1581 }
1582
1583 /* Returns the vector in D with index IDX, which must be less
1584 than dict_get_vector_cnt (D). */
1585 const struct vector *
dict_get_vector(const struct dictionary * d,size_t idx)1586 dict_get_vector (const struct dictionary *d, size_t idx)
1587 {
1588 assert (idx < d->vector_cnt);
1589
1590 return d->vector[idx];
1591 }
1592
1593 /* Returns the number of vectors in D. */
1594 size_t
dict_get_vector_cnt(const struct dictionary * d)1595 dict_get_vector_cnt (const struct dictionary *d)
1596 {
1597 return d->vector_cnt;
1598 }
1599
1600 /* Looks up and returns the vector within D with the given
1601 NAME. */
1602 const struct vector *
dict_lookup_vector(const struct dictionary * d,const char * name)1603 dict_lookup_vector (const struct dictionary *d, const char *name)
1604 {
1605 size_t i;
1606 for (i = 0; i < d->vector_cnt; i++)
1607 if (!utf8_strcasecmp (vector_get_name (d->vector[i]), name))
1608 return d->vector[i];
1609 return NULL;
1610 }
1611
1612 /* Deletes all vectors from D. */
1613 void
dict_clear_vectors(struct dictionary * d)1614 dict_clear_vectors (struct dictionary *d)
1615 {
1616 size_t i;
1617
1618 for (i = 0; i < d->vector_cnt; i++)
1619 vector_destroy (d->vector[i]);
1620 free (d->vector);
1621
1622 d->vector = NULL;
1623 d->vector_cnt = 0;
1624 }
1625
1626 /* Multiple response sets. */
1627
1628 /* Returns the multiple response set in DICT with index IDX, which must be
1629 between 0 and the count returned by dict_get_n_mrsets(), exclusive. */
1630 const struct mrset *
dict_get_mrset(const struct dictionary * dict,size_t idx)1631 dict_get_mrset (const struct dictionary *dict, size_t idx)
1632 {
1633 assert (idx < dict->n_mrsets);
1634 return dict->mrsets[idx];
1635 }
1636
1637 /* Returns the number of multiple response sets in DICT. */
1638 size_t
dict_get_n_mrsets(const struct dictionary * dict)1639 dict_get_n_mrsets (const struct dictionary *dict)
1640 {
1641 return dict->n_mrsets;
1642 }
1643
1644 /* Looks for a multiple response set named NAME in DICT. If it finds one,
1645 returns its index; otherwise, returns SIZE_MAX. */
1646 static size_t
dict_lookup_mrset_idx(const struct dictionary * dict,const char * name)1647 dict_lookup_mrset_idx (const struct dictionary *dict, const char *name)
1648 {
1649 size_t i;
1650
1651 for (i = 0; i < dict->n_mrsets; i++)
1652 if (!utf8_strcasecmp (name, dict->mrsets[i]->name))
1653 return i;
1654
1655 return SIZE_MAX;
1656 }
1657
1658 /* Looks for a multiple response set named NAME in DICT. If it finds one,
1659 returns it; otherwise, returns NULL. */
1660 const struct mrset *
dict_lookup_mrset(const struct dictionary * dict,const char * name)1661 dict_lookup_mrset (const struct dictionary *dict, const char *name)
1662 {
1663 size_t idx = dict_lookup_mrset_idx (dict, name);
1664 return idx != SIZE_MAX ? dict->mrsets[idx] : NULL;
1665 }
1666
1667 /* Adds MRSET to DICT, replacing any existing set with the same name. Returns
1668 true if a set was replaced, false if none existed with the specified name.
1669
1670 Ownership of MRSET is transferred to DICT. */
1671 bool
dict_add_mrset(struct dictionary * dict,struct mrset * mrset)1672 dict_add_mrset (struct dictionary *dict, struct mrset *mrset)
1673 {
1674 size_t idx;
1675
1676 assert (mrset_ok (mrset, dict));
1677
1678 idx = dict_lookup_mrset_idx (dict, mrset->name);
1679 if (idx == SIZE_MAX)
1680 {
1681 dict->mrsets = xrealloc (dict->mrsets,
1682 (dict->n_mrsets + 1) * sizeof *dict->mrsets);
1683 dict->mrsets[dict->n_mrsets++] = mrset;
1684 return true;
1685 }
1686 else
1687 {
1688 mrset_destroy (dict->mrsets[idx]);
1689 dict->mrsets[idx] = mrset;
1690 return false;
1691 }
1692 }
1693
1694 /* Looks for a multiple response set in DICT named NAME. If found, removes it
1695 from DICT and returns true. If none is found, returns false without
1696 modifying DICT.
1697
1698 Deleting one multiple response set causes the indexes of other sets within
1699 DICT to change. */
1700 bool
dict_delete_mrset(struct dictionary * dict,const char * name)1701 dict_delete_mrset (struct dictionary *dict, const char *name)
1702 {
1703 size_t idx = dict_lookup_mrset_idx (dict, name);
1704 if (idx != SIZE_MAX)
1705 {
1706 mrset_destroy (dict->mrsets[idx]);
1707 dict->mrsets[idx] = dict->mrsets[--dict->n_mrsets];
1708 return true;
1709 }
1710 else
1711 return false;
1712 }
1713
1714 /* Deletes all multiple response sets from DICT. */
1715 void
dict_clear_mrsets(struct dictionary * dict)1716 dict_clear_mrsets (struct dictionary *dict)
1717 {
1718 size_t i;
1719
1720 for (i = 0; i < dict->n_mrsets; i++)
1721 mrset_destroy (dict->mrsets[i]);
1722 free (dict->mrsets);
1723 dict->mrsets = NULL;
1724 dict->n_mrsets = 0;
1725 }
1726
1727 /* Removes VAR, which must be in DICT, from DICT's multiple response sets. */
1728 static void
dict_unset_mrset_var(struct dictionary * dict,struct variable * var)1729 dict_unset_mrset_var (struct dictionary *dict, struct variable *var)
1730 {
1731 size_t i;
1732
1733 assert (dict_contains_var (dict, var));
1734
1735 for (i = 0; i < dict->n_mrsets;)
1736 {
1737 struct mrset *mrset = dict->mrsets[i];
1738 size_t j;
1739
1740 for (j = 0; j < mrset->n_vars;)
1741 if (mrset->vars[j] == var)
1742 remove_element (mrset->vars, mrset->n_vars--,
1743 sizeof *mrset->vars, j);
1744 else
1745 j++;
1746
1747 if (mrset->n_vars < 2)
1748 {
1749 mrset_destroy (mrset);
1750 dict->mrsets[i] = dict->mrsets[--dict->n_mrsets];
1751 }
1752 else
1753 i++;
1754 }
1755 }
1756
1757 /* Returns D's attribute set. The caller may examine or modify
1758 the attribute set, but must not destroy it. Destroying D or
1759 calling dict_set_attributes for D will also destroy D's
1760 attribute set. */
1761 struct attrset *
dict_get_attributes(const struct dictionary * d)1762 dict_get_attributes (const struct dictionary *d)
1763 {
1764 return CONST_CAST (struct attrset *, &d->attributes);
1765 }
1766
1767 /* Replaces D's attributes set by a copy of ATTRS. */
1768 void
dict_set_attributes(struct dictionary * d,const struct attrset * attrs)1769 dict_set_attributes (struct dictionary *d, const struct attrset *attrs)
1770 {
1771 attrset_destroy (&d->attributes);
1772 attrset_clone (&d->attributes, attrs);
1773 }
1774
1775 /* Returns true if D has at least one attribute in its attribute
1776 set, false if D's attribute set is empty. */
1777 bool
dict_has_attributes(const struct dictionary * d)1778 dict_has_attributes (const struct dictionary *d)
1779 {
1780 return attrset_count (&d->attributes) > 0;
1781 }
1782
1783 /* Called from variable.c to notify the dictionary that some property (indicated
1784 by WHAT) of the variable has changed. OLDVAR is a copy of V as it existed
1785 prior to the change. OLDVAR is destroyed by this function.
1786 */
1787 void
dict_var_changed(const struct variable * v,unsigned int what,struct variable * oldvar)1788 dict_var_changed (const struct variable *v, unsigned int what, struct variable *oldvar)
1789 {
1790 if (var_has_vardict (v))
1791 {
1792 const struct vardict_info *vardict = var_get_vardict (v);
1793 struct dictionary *d = vardict->dict;
1794
1795 if (NULL == d)
1796 return;
1797
1798 if (what & (VAR_TRAIT_WIDTH | VAR_TRAIT_POSITION))
1799 invalidate_proto (d);
1800
1801 if (d->changed) d->changed (d, d->changed_data);
1802 if (d->callbacks && d->callbacks->var_changed)
1803 d->callbacks->var_changed (d, var_get_dict_index (v), what, oldvar, d->cb_data);
1804 }
1805 var_unref (oldvar);
1806 }
1807
1808
1809
1810 /* Dictionary used to contain "internal variables". */
1811 static struct dictionary *internal_dict;
1812
1813 /* Create a variable of the specified WIDTH to be used for internal
1814 calculations only. The variable is assigned case index CASE_IDX. */
1815 struct variable *
dict_create_internal_var(int case_idx,int width)1816 dict_create_internal_var (int case_idx, int width)
1817 {
1818 if (internal_dict == NULL)
1819 internal_dict = dict_create ("UTF-8");
1820
1821 for (;;)
1822 {
1823 static int counter = INT_MAX / 2;
1824 struct variable *var;
1825 char name[64];
1826
1827 if (++counter == INT_MAX)
1828 counter = INT_MAX / 2;
1829
1830 sprintf (name, "$internal%d", counter);
1831 var = dict_create_var (internal_dict, name, width);
1832 if (var != NULL)
1833 {
1834 set_var_case_index (var, case_idx);
1835 return var;
1836 }
1837 }
1838 }
1839
1840 /* Destroys VAR, which must have been created with
1841 dict_create_internal_var(). */
1842 void
dict_destroy_internal_var(struct variable * var)1843 dict_destroy_internal_var (struct variable *var)
1844 {
1845 if (var != NULL)
1846 {
1847 dict_delete_var (internal_dict, var);
1848
1849 /* Destroy internal_dict if it has no variables left, just so that
1850 valgrind --leak-check --show-reachable won't show internal_dict. */
1851 if (dict_get_var_cnt (internal_dict) == 0)
1852 {
1853 dict_unref (internal_dict);
1854 internal_dict = NULL;
1855 }
1856 }
1857 }
1858
1859 int
vardict_get_dict_index(const struct vardict_info * vardict)1860 vardict_get_dict_index (const struct vardict_info *vardict)
1861 {
1862 return vardict - vardict->dict->var;
1863 }
1864