1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2005, 2009, 2011, 2013 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include "data/missing-values.h"
20
21 #include <assert.h>
22 #include <stdlib.h>
23
24 #include "data/variable.h"
25 #include "libpspp/assertion.h"
26 #include "libpspp/cast.h"
27 #include "libpspp/i18n.h"
28 #include "libpspp/str.h"
29
30 #include "gl/minmax.h"
31
32 /* Types of user-missing values.
33 Invisible--use access functions defined below instead. */
34 enum mv_type
35 {
36 MVT_NONE = 0, /* No user-missing values. */
37 MVT_1 = 1, /* One user-missing value. */
38 MVT_2 = 2, /* Two user-missing values. */
39 MVT_3 = 3, /* Three user-missing values. */
40 MVT_RANGE = 4, /* A range of user-missing values. */
41 MVT_RANGE_1 = 5 /* A range plus an individual value. */
42 };
43
44 /* Initializes MV as a set of missing values for a variable of
45 the given WIDTH. MV should be destroyed with mv_destroy when
46 it is no longer needed. */
47 void
mv_init(struct missing_values * mv,int width)48 mv_init (struct missing_values *mv, int width)
49 {
50 int i;
51
52 assert (width >= 0 && width <= MAX_STRING);
53 mv->type = MVT_NONE;
54 mv->width = width;
55 for (i = 0; i < 3; i++)
56 value_init (&mv->values[i], width);
57 }
58
59 /* Initializes MV as a set of missing values for a variable of
60 the given WIDTH. MV will be automatically destroyed along
61 with POOL; it must not be passed to mv_destroy for explicit
62 destruction. */
63 void
mv_init_pool(struct pool * pool,struct missing_values * mv,int width)64 mv_init_pool (struct pool *pool, struct missing_values *mv, int width)
65 {
66 int i;
67
68 assert (width >= 0 && width <= MAX_STRING);
69 mv->type = MVT_NONE;
70 mv->width = width;
71 for (i = 0; i < 3; i++)
72 value_init_pool (pool, &mv->values[i], width);
73 }
74
75 /* Frees any storage allocated by mv_init for MV. */
76 void
mv_destroy(struct missing_values * mv)77 mv_destroy (struct missing_values *mv)
78 {
79 if (mv != NULL)
80 {
81 int i;
82
83 for (i = 0; i < 3; i++)
84 value_destroy (&mv->values[i], mv->width);
85 }
86 }
87
88 /* Removes any missing values from MV. */
89 void
mv_clear(struct missing_values * mv)90 mv_clear (struct missing_values *mv)
91 {
92 mv->type = MVT_NONE;
93 }
94
95 /* Initializes MV as a copy of SRC. */
96 void
mv_copy(struct missing_values * mv,const struct missing_values * src)97 mv_copy (struct missing_values *mv, const struct missing_values *src)
98 {
99 int i;
100
101 mv_init (mv, src->width);
102 mv->type = src->type;
103 for (i = 0; i < 3; i++)
104 value_copy (&mv->values[i], &src->values[i], mv->width);
105 }
106
107 /* Returns true if VALUE, of the given WIDTH, may be added to a
108 missing value set also of the given WIDTH. This is normally
109 the case, but string missing values over MV_MAX_STRING bytes
110 long must consist solely of spaces after the first
111 MV_MAX_STRING bytes. */
112 bool
mv_is_acceptable(const union value * value,int width)113 mv_is_acceptable (const union value *value, int width)
114 {
115 int i;
116
117 for (i = MV_MAX_STRING; i < width; i++)
118 if (value->s[i] != ' ')
119 return false;
120 return true;
121 }
122
123 /* Returns true if MV is an empty set of missing values. */
124 bool
mv_is_empty(const struct missing_values * mv)125 mv_is_empty (const struct missing_values *mv)
126 {
127 return mv->type == MVT_NONE;
128 }
129
130 /* Returns the width of the missing values that MV may
131 contain. */
132 int
mv_get_width(const struct missing_values * mv)133 mv_get_width (const struct missing_values *mv)
134 {
135 return mv->width;
136 }
137
138 /* Attempts to add individual value V to the set of missing
139 values MV. Returns true if successful, false if MV has no
140 more room for missing values or if V is not an acceptable
141 missing value. */
142 bool
mv_add_value(struct missing_values * mv,const union value * v)143 mv_add_value (struct missing_values *mv, const union value *v)
144 {
145 if (!mv_is_acceptable (v, mv->width))
146 return false;
147
148 switch (mv->type)
149 {
150 case MVT_NONE:
151 case MVT_1:
152 case MVT_2:
153 case MVT_RANGE:
154 value_copy (&mv->values[mv->type & 3], v, mv->width);
155 mv->type++;
156 return true;
157
158 case MVT_3:
159 case MVT_RANGE_1:
160 return false;
161 }
162 NOT_REACHED ();
163 }
164
165 /* Attempts to add S, which is LEN bytes long, to the set of string missing
166 values MV. Returns true if successful, false if MV has no more room for
167 missing values or if S is not an acceptable missing value. */
168 bool
mv_add_str(struct missing_values * mv,const uint8_t s[],size_t len)169 mv_add_str (struct missing_values *mv, const uint8_t s[], size_t len)
170 {
171 union value v;
172 bool ok;
173
174 assert (mv->width > 0);
175 while (len > mv->width)
176 if (s[--len] != ' ')
177 return false;
178
179 value_init (&v, mv->width);
180 buf_copy_rpad (CHAR_CAST (char *, v.s), mv->width,
181 CHAR_CAST (char *, s), len, ' ');
182 ok = mv_add_value (mv, &v);
183 value_destroy (&v, mv->width);
184
185 return ok;
186 }
187
188 /* Attempts to add D to the set of numeric missing values MV.
189 Returns true if successful, false if MV has no more room for
190 missing values. */
191 bool
mv_add_num(struct missing_values * mv,double d)192 mv_add_num (struct missing_values *mv, double d)
193 {
194 union value v;
195 bool ok;
196
197 assert (mv->width == 0);
198 value_init (&v, 0);
199 v.f = d;
200 ok = mv_add_value (mv, &v);
201 value_destroy (&v, 0);
202
203 return ok;
204 }
205
206 /* Attempts to add range [LOW, HIGH] to the set of numeric
207 missing values MV. Returns true if successful, false if MV
208 has no room for a range, or if LOW > HIGH. */
209 bool
mv_add_range(struct missing_values * mv,double low,double high)210 mv_add_range (struct missing_values *mv, double low, double high)
211 {
212 assert (mv->width == 0);
213 if (low <= high && (mv->type == MVT_NONE || mv->type == MVT_1))
214 {
215 mv->values[1].f = low;
216 mv->values[2].f = high;
217 mv->type |= 4;
218 return true;
219 }
220 else
221 return false;
222 }
223
224 /* Returns true if MV contains an individual value,
225 false if MV is empty (or contains only a range). */
226 bool
mv_has_value(const struct missing_values * mv)227 mv_has_value (const struct missing_values *mv)
228 {
229 return mv_n_values (mv) > 0;
230 }
231
232 /* Removes one individual value from MV and stores it in V, which
233 must have been initialized as a value with the same width as MV.
234 MV must contain an individual value (as determined by
235 mv_has_value()).
236
237 We remove the first value from MV, not the last, because the
238 common use for this function is in iterating through a set of
239 missing values. If we remove the last value then we'll output
240 the missing values in order opposite of that in which they
241 were added, so that a GET followed by a SAVE would reverse the
242 order of missing values in the system file, a weird effect. */
243 void
mv_pop_value(struct missing_values * mv,union value * v)244 mv_pop_value (struct missing_values *mv, union value *v)
245 {
246 union value tmp;
247
248 assert (mv_has_value (mv));
249
250 value_copy (v, &mv->values[0], mv->width);
251 tmp = mv->values[0];
252 mv->values[0] = mv->values[1];
253 mv->values[1] = mv->values[2];
254 mv->values[2] = tmp;
255 mv->type--;
256 }
257
258 /* Returns MV's discrete value with index IDX. The caller must
259 not modify or free this value, or access it after MV is
260 modified or freed.
261 IDX must be less than the number of discrete values in MV, as
262 reported by mv_n_values. */
263 const union value *
mv_get_value(const struct missing_values * mv,int idx)264 mv_get_value (const struct missing_values *mv, int idx)
265 {
266 assert (idx >= 0 && idx < mv_n_values (mv));
267 return &mv->values[idx];
268 }
269
270 /* Replaces MV's discrete value with index IDX by a copy of V,
271 which must have the same width as MV.
272 IDX must be less than the number of discrete values in MV, as
273 reported by mv_n_values. */
274 bool
mv_replace_value(struct missing_values * mv,const union value * v,int idx)275 mv_replace_value (struct missing_values *mv, const union value *v, int idx)
276 {
277 assert (idx >= 0) ;
278 assert (idx < mv_n_values(mv));
279
280 if (!mv_is_acceptable (v, mv->width))
281 return false;
282
283 value_copy (&mv->values[idx], v, mv->width);
284 return true;
285 }
286
287 /* Returns the number of individual (not part of a range) missing
288 values in MV. */
289 int
mv_n_values(const struct missing_values * mv)290 mv_n_values (const struct missing_values *mv)
291 {
292 return mv->type & 3;
293 }
294
295
296 /* Returns true if MV contains a numeric range,
297 false if MV is empty (or contains only individual values). */
298 bool
mv_has_range(const struct missing_values * mv)299 mv_has_range (const struct missing_values *mv)
300 {
301 return mv->type == MVT_RANGE || mv->type == MVT_RANGE_1;
302 }
303
304 /* Removes the numeric range from MV and stores it in *LOW and
305 *HIGH. MV must contain a individual range (as determined by
306 mv_has_range()). */
307 void
mv_pop_range(struct missing_values * mv,double * low,double * high)308 mv_pop_range (struct missing_values *mv, double *low, double *high)
309 {
310 assert (mv_has_range (mv));
311 *low = mv->values[1].f;
312 *high = mv->values[2].f;
313 mv->type &= 3;
314 }
315
316 /* Returns the numeric range from MV into *LOW and
317 *HIGH. MV must contain a individual range (as determined by
318 mv_has_range()). */
319 void
mv_get_range(const struct missing_values * mv,double * low,double * high)320 mv_get_range (const struct missing_values *mv, double *low, double *high)
321 {
322 assert (mv_has_range (mv));
323 *low = mv->values[1].f;
324 *high = mv->values[2].f;
325 }
326
327 /* Returns true if values[IDX] is in use when the `type' member
328 is set to TYPE (in struct missing_values),
329 false otherwise. */
330 static bool
using_element(unsigned type,int idx)331 using_element (unsigned type, int idx)
332 {
333 assert (idx >= 0 && idx < 3);
334
335 switch (type)
336 {
337 case MVT_NONE:
338 return false;
339 case MVT_1:
340 return idx < 1;
341 case MVT_2:
342 return idx < 2;
343 case MVT_3:
344 return true;
345 case MVT_RANGE:
346 return idx > 0;
347 case MVT_RANGE_1:
348 return true;
349 }
350 NOT_REACHED ();
351 }
352
353 /* Returns true if MV can be resized to the given WIDTH with
354 mv_resize(), false otherwise. Resizing is possible only when
355 each value in MV (if any) is resizable from MV's current width
356 to WIDTH, as determined by value_is_resizable. */
357 bool
mv_is_resizable(const struct missing_values * mv,int width)358 mv_is_resizable (const struct missing_values *mv, int width)
359 {
360 int i;
361
362 for (i = 0; i < 3; i++)
363 if (using_element (mv->type, i)
364 && !value_is_resizable (&mv->values[i], mv->width, width))
365 return false;
366
367 return true;
368 }
369
370 /* Resizes MV to the given WIDTH. WIDTH must fit the constraints
371 explained for mv_is_resizable. */
372 void
mv_resize(struct missing_values * mv,int width)373 mv_resize (struct missing_values *mv, int width)
374 {
375 int i;
376
377 assert (mv_is_resizable (mv, width));
378 for (i = 0; i < 3; i++)
379 if (using_element (mv->type, i))
380 value_resize (&mv->values[i], mv->width, width);
381 else
382 {
383 value_destroy (&mv->values[i], mv->width);
384 value_init (&mv->values[i], width);
385 }
386 mv->width = width;
387 }
388
389 /* Returns true if D is a missing value in MV, false otherwise.
390 MV must be a set of numeric missing values. */
391 static bool
is_num_user_missing(const struct missing_values * mv,double d)392 is_num_user_missing (const struct missing_values *mv, double d)
393 {
394 const union value *v = mv->values;
395 assert (mv->width == 0);
396 switch (mv->type)
397 {
398 case MVT_NONE:
399 return false;
400 case MVT_1:
401 return v[0].f == d;
402 case MVT_2:
403 return v[0].f == d || v[1].f == d;
404 case MVT_3:
405 return v[0].f == d || v[1].f == d || v[2].f == d;
406 case MVT_RANGE:
407 return v[1].f <= d && d <= v[2].f;
408 case MVT_RANGE_1:
409 return v[0].f == d || (v[1].f <= d && d <= v[2].f);
410 }
411 NOT_REACHED ();
412 }
413
414 /* Returns true if S[] is a missing value in MV, false otherwise.
415 MV must be a set of string missing values.
416 S[] must contain exactly as many characters as MV's width. */
417 static bool
is_str_user_missing(const struct missing_values * mv,const uint8_t s[])418 is_str_user_missing (const struct missing_values *mv, const uint8_t s[])
419 {
420 const union value *v = mv->values;
421 assert (mv->width > 0);
422 switch (mv->type)
423 {
424 case MVT_NONE:
425 return false;
426 case MVT_1:
427 return !memcmp (v[0].s, s, mv->width);
428 case MVT_2:
429 return (!memcmp (v[0].s, s, mv->width)
430 || !memcmp (v[1].s, s, mv->width));
431 case MVT_3:
432 return (!memcmp (v[0].s, s, mv->width)
433 || !memcmp (v[1].s, s, mv->width)
434 || !memcmp (v[2].s, s, mv->width));
435 case MVT_RANGE:
436 case MVT_RANGE_1:
437 NOT_REACHED ();
438 }
439 NOT_REACHED ();
440 }
441
442 /* Returns true if V is a missing value in the given CLASS in MV,
443 false otherwise. */
444 bool
mv_is_value_missing(const struct missing_values * mv,const union value * v,enum mv_class class)445 mv_is_value_missing (const struct missing_values *mv, const union value *v,
446 enum mv_class class)
447 {
448 return (mv->width == 0
449 ? mv_is_num_missing (mv, v->f, class)
450 : mv_is_str_missing (mv, v->s, class));
451 }
452
453 /* Returns true if D is a missing value in the given CLASS in MV,
454 false otherwise.
455 MV must be a set of numeric missing values. */
456 bool
mv_is_num_missing(const struct missing_values * mv,double d,enum mv_class class)457 mv_is_num_missing (const struct missing_values *mv, double d,
458 enum mv_class class)
459 {
460 assert (mv->width == 0);
461 return ((class & MV_SYSTEM && d == SYSMIS)
462 || (class & MV_USER && is_num_user_missing (mv, d)));
463 }
464
465 /* Returns true if S[] is a missing value in the given CLASS in
466 MV, false otherwise.
467 MV must be a set of string missing values.
468 S[] must contain exactly as many characters as MV's width. */
469 bool
mv_is_str_missing(const struct missing_values * mv,const uint8_t s[],enum mv_class class)470 mv_is_str_missing (const struct missing_values *mv, const uint8_t s[],
471 enum mv_class class)
472 {
473 assert (mv->width > 0);
474 return class & MV_USER && is_str_user_missing (mv, s);
475 }
476
477 /* Like mv_is_value_missing(), this tests whether V is a missing value
478 in the given CLASS in MV. It supports the uncommon case where V
479 and MV might have different widths: the caller must specify VW, the
480 width of V. MV and VW must be both numeric or both string.
481
482 Comparison of strings of different width is done by conceptually
483 extending both strings to infinite width by appending spaces. */
484 bool
mv_is_value_missing_varwidth(const struct missing_values * mv,const union value * v,int vw,enum mv_class class)485 mv_is_value_missing_varwidth (const struct missing_values *mv,
486 const union value *v, int vw,
487 enum mv_class class)
488 {
489 int mvw = mv->width;
490 if (mvw == vw)
491 return mv_is_value_missing (mv, v, class);
492
493 /* Make sure they're both strings. */
494 assert (mvw && vw);
495 if (!(class & MV_USER) || mv->type == MVT_NONE)
496 return false;
497
498 for (int i = 0; i < mv->type; i++)
499 if (!buf_compare_rpad (CHAR_CAST_BUG (const char *, mv->values[i].s), mvw,
500 CHAR_CAST_BUG (const char *, v->s), vw))
501 return true;
502 return false;
503 }
504
505 char *
mv_to_string(const struct missing_values * mv,const char * encoding)506 mv_to_string (const struct missing_values *mv, const char *encoding)
507 {
508 struct string s = DS_EMPTY_INITIALIZER;
509 if (mv_has_range (mv))
510 {
511 double x, y;
512 mv_get_range (mv, &x, &y);
513 if (x == LOWEST)
514 ds_put_format (&s, "LOWEST THRU %.*g", DBL_DIG + 1, y);
515 else if (y == HIGHEST)
516 ds_put_format (&s, "%.*g THRU HIGHEST", DBL_DIG + 1, x);
517 else
518 ds_put_format (&s, "%.*g THRU %.*g",
519 DBL_DIG + 1, x,
520 DBL_DIG + 1, y);
521 }
522 for (size_t j = 0; j < mv_n_values (mv); j++)
523 {
524 const union value *value = mv_get_value (mv, j);
525 if (!ds_is_empty (&s))
526 ds_put_cstr (&s, "; ");
527 if (!mv->width)
528 ds_put_format (&s, "%.*g", DBL_DIG + 1, value->f);
529 else
530 {
531 char *mvs = recode_string (
532 "UTF-8", encoding, CHAR_CAST (char *, value->s),
533 MIN (mv->width, MV_MAX_STRING));
534 ds_put_format (&s, "\"%s\"", mvs);
535 free (mvs);
536 }
537 }
538 return ds_is_empty (&s) ? NULL : ds_steal_cstr (&s);
539 }
540