/* PSPP - a program for statistical analysis. Copyright (C) 2005, 2009, 2011, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include "data/missing-values.h" #include #include #include "data/variable.h" #include "libpspp/assertion.h" #include "libpspp/cast.h" #include "libpspp/i18n.h" #include "libpspp/str.h" #include "gl/minmax.h" /* Types of user-missing values. Invisible--use access functions defined below instead. */ enum mv_type { MVT_NONE = 0, /* No user-missing values. */ MVT_1 = 1, /* One user-missing value. */ MVT_2 = 2, /* Two user-missing values. */ MVT_3 = 3, /* Three user-missing values. */ MVT_RANGE = 4, /* A range of user-missing values. */ MVT_RANGE_1 = 5 /* A range plus an individual value. */ }; /* Initializes MV as a set of missing values for a variable of the given WIDTH. MV should be destroyed with mv_destroy when it is no longer needed. */ void mv_init (struct missing_values *mv, int width) { int i; assert (width >= 0 && width <= MAX_STRING); mv->type = MVT_NONE; mv->width = width; for (i = 0; i < 3; i++) value_init (&mv->values[i], width); } /* Initializes MV as a set of missing values for a variable of the given WIDTH. MV will be automatically destroyed along with POOL; it must not be passed to mv_destroy for explicit destruction. */ void mv_init_pool (struct pool *pool, struct missing_values *mv, int width) { int i; assert (width >= 0 && width <= MAX_STRING); mv->type = MVT_NONE; mv->width = width; for (i = 0; i < 3; i++) value_init_pool (pool, &mv->values[i], width); } /* Frees any storage allocated by mv_init for MV. */ void mv_destroy (struct missing_values *mv) { if (mv != NULL) { int i; for (i = 0; i < 3; i++) value_destroy (&mv->values[i], mv->width); } } /* Removes any missing values from MV. */ void mv_clear (struct missing_values *mv) { mv->type = MVT_NONE; } /* Initializes MV as a copy of SRC. */ void mv_copy (struct missing_values *mv, const struct missing_values *src) { int i; mv_init (mv, src->width); mv->type = src->type; for (i = 0; i < 3; i++) value_copy (&mv->values[i], &src->values[i], mv->width); } /* Returns true if VALUE, of the given WIDTH, may be added to a missing value set also of the given WIDTH. This is normally the case, but string missing values over MV_MAX_STRING bytes long must consist solely of spaces after the first MV_MAX_STRING bytes. */ bool mv_is_acceptable (const union value *value, int width) { int i; for (i = MV_MAX_STRING; i < width; i++) if (value->s[i] != ' ') return false; return true; } /* Returns true if MV is an empty set of missing values. */ bool mv_is_empty (const struct missing_values *mv) { return mv->type == MVT_NONE; } /* Returns the width of the missing values that MV may contain. */ int mv_get_width (const struct missing_values *mv) { return mv->width; } /* Attempts to add individual value V to the set of missing values MV. Returns true if successful, false if MV has no more room for missing values or if V is not an acceptable missing value. */ bool mv_add_value (struct missing_values *mv, const union value *v) { if (!mv_is_acceptable (v, mv->width)) return false; switch (mv->type) { case MVT_NONE: case MVT_1: case MVT_2: case MVT_RANGE: value_copy (&mv->values[mv->type & 3], v, mv->width); mv->type++; return true; case MVT_3: case MVT_RANGE_1: return false; } NOT_REACHED (); } /* Attempts to add S, which is LEN bytes long, to the set of string missing values MV. Returns true if successful, false if MV has no more room for missing values or if S is not an acceptable missing value. */ bool mv_add_str (struct missing_values *mv, const uint8_t s[], size_t len) { union value v; bool ok; assert (mv->width > 0); while (len > mv->width) if (s[--len] != ' ') return false; value_init (&v, mv->width); buf_copy_rpad (CHAR_CAST (char *, v.s), mv->width, CHAR_CAST (char *, s), len, ' '); ok = mv_add_value (mv, &v); value_destroy (&v, mv->width); return ok; } /* Attempts to add D to the set of numeric missing values MV. Returns true if successful, false if MV has no more room for missing values. */ bool mv_add_num (struct missing_values *mv, double d) { union value v; bool ok; assert (mv->width == 0); value_init (&v, 0); v.f = d; ok = mv_add_value (mv, &v); value_destroy (&v, 0); return ok; } /* Attempts to add range [LOW, HIGH] to the set of numeric missing values MV. Returns true if successful, false if MV has no room for a range, or if LOW > HIGH. */ bool mv_add_range (struct missing_values *mv, double low, double high) { assert (mv->width == 0); if (low <= high && (mv->type == MVT_NONE || mv->type == MVT_1)) { mv->values[1].f = low; mv->values[2].f = high; mv->type |= 4; return true; } else return false; } /* Returns true if MV contains an individual value, false if MV is empty (or contains only a range). */ bool mv_has_value (const struct missing_values *mv) { return mv_n_values (mv) > 0; } /* Removes one individual value from MV and stores it in V, which must have been initialized as a value with the same width as MV. MV must contain an individual value (as determined by mv_has_value()). We remove the first value from MV, not the last, because the common use for this function is in iterating through a set of missing values. If we remove the last value then we'll output the missing values in order opposite of that in which they were added, so that a GET followed by a SAVE would reverse the order of missing values in the system file, a weird effect. */ void mv_pop_value (struct missing_values *mv, union value *v) { union value tmp; assert (mv_has_value (mv)); value_copy (v, &mv->values[0], mv->width); tmp = mv->values[0]; mv->values[0] = mv->values[1]; mv->values[1] = mv->values[2]; mv->values[2] = tmp; mv->type--; } /* Returns MV's discrete value with index IDX. The caller must not modify or free this value, or access it after MV is modified or freed. IDX must be less than the number of discrete values in MV, as reported by mv_n_values. */ const union value * mv_get_value (const struct missing_values *mv, int idx) { assert (idx >= 0 && idx < mv_n_values (mv)); return &mv->values[idx]; } /* Replaces MV's discrete value with index IDX by a copy of V, which must have the same width as MV. IDX must be less than the number of discrete values in MV, as reported by mv_n_values. */ bool mv_replace_value (struct missing_values *mv, const union value *v, int idx) { assert (idx >= 0) ; assert (idx < mv_n_values(mv)); if (!mv_is_acceptable (v, mv->width)) return false; value_copy (&mv->values[idx], v, mv->width); return true; } /* Returns the number of individual (not part of a range) missing values in MV. */ int mv_n_values (const struct missing_values *mv) { return mv->type & 3; } /* Returns true if MV contains a numeric range, false if MV is empty (or contains only individual values). */ bool mv_has_range (const struct missing_values *mv) { return mv->type == MVT_RANGE || mv->type == MVT_RANGE_1; } /* Removes the numeric range from MV and stores it in *LOW and *HIGH. MV must contain a individual range (as determined by mv_has_range()). */ void mv_pop_range (struct missing_values *mv, double *low, double *high) { assert (mv_has_range (mv)); *low = mv->values[1].f; *high = mv->values[2].f; mv->type &= 3; } /* Returns the numeric range from MV into *LOW and *HIGH. MV must contain a individual range (as determined by mv_has_range()). */ void mv_get_range (const struct missing_values *mv, double *low, double *high) { assert (mv_has_range (mv)); *low = mv->values[1].f; *high = mv->values[2].f; } /* Returns true if values[IDX] is in use when the `type' member is set to TYPE (in struct missing_values), false otherwise. */ static bool using_element (unsigned type, int idx) { assert (idx >= 0 && idx < 3); switch (type) { case MVT_NONE: return false; case MVT_1: return idx < 1; case MVT_2: return idx < 2; case MVT_3: return true; case MVT_RANGE: return idx > 0; case MVT_RANGE_1: return true; } NOT_REACHED (); } /* Returns true if MV can be resized to the given WIDTH with mv_resize(), false otherwise. Resizing is possible only when each value in MV (if any) is resizable from MV's current width to WIDTH, as determined by value_is_resizable. */ bool mv_is_resizable (const struct missing_values *mv, int width) { int i; for (i = 0; i < 3; i++) if (using_element (mv->type, i) && !value_is_resizable (&mv->values[i], mv->width, width)) return false; return true; } /* Resizes MV to the given WIDTH. WIDTH must fit the constraints explained for mv_is_resizable. */ void mv_resize (struct missing_values *mv, int width) { int i; assert (mv_is_resizable (mv, width)); for (i = 0; i < 3; i++) if (using_element (mv->type, i)) value_resize (&mv->values[i], mv->width, width); else { value_destroy (&mv->values[i], mv->width); value_init (&mv->values[i], width); } mv->width = width; } /* Returns true if D is a missing value in MV, false otherwise. MV must be a set of numeric missing values. */ static bool is_num_user_missing (const struct missing_values *mv, double d) { const union value *v = mv->values; assert (mv->width == 0); switch (mv->type) { case MVT_NONE: return false; case MVT_1: return v[0].f == d; case MVT_2: return v[0].f == d || v[1].f == d; case MVT_3: return v[0].f == d || v[1].f == d || v[2].f == d; case MVT_RANGE: return v[1].f <= d && d <= v[2].f; case MVT_RANGE_1: return v[0].f == d || (v[1].f <= d && d <= v[2].f); } NOT_REACHED (); } /* Returns true if S[] is a missing value in MV, false otherwise. MV must be a set of string missing values. S[] must contain exactly as many characters as MV's width. */ static bool is_str_user_missing (const struct missing_values *mv, const uint8_t s[]) { const union value *v = mv->values; assert (mv->width > 0); switch (mv->type) { case MVT_NONE: return false; case MVT_1: return !memcmp (v[0].s, s, mv->width); case MVT_2: return (!memcmp (v[0].s, s, mv->width) || !memcmp (v[1].s, s, mv->width)); case MVT_3: return (!memcmp (v[0].s, s, mv->width) || !memcmp (v[1].s, s, mv->width) || !memcmp (v[2].s, s, mv->width)); case MVT_RANGE: case MVT_RANGE_1: NOT_REACHED (); } NOT_REACHED (); } /* Returns true if V is a missing value in the given CLASS in MV, false otherwise. */ bool mv_is_value_missing (const struct missing_values *mv, const union value *v, enum mv_class class) { return (mv->width == 0 ? mv_is_num_missing (mv, v->f, class) : mv_is_str_missing (mv, v->s, class)); } /* Returns true if D is a missing value in the given CLASS in MV, false otherwise. MV must be a set of numeric missing values. */ bool mv_is_num_missing (const struct missing_values *mv, double d, enum mv_class class) { assert (mv->width == 0); return ((class & MV_SYSTEM && d == SYSMIS) || (class & MV_USER && is_num_user_missing (mv, d))); } /* Returns true if S[] is a missing value in the given CLASS in MV, false otherwise. MV must be a set of string missing values. S[] must contain exactly as many characters as MV's width. */ bool mv_is_str_missing (const struct missing_values *mv, const uint8_t s[], enum mv_class class) { assert (mv->width > 0); return class & MV_USER && is_str_user_missing (mv, s); } /* Like mv_is_value_missing(), this tests whether V is a missing value in the given CLASS in MV. It supports the uncommon case where V and MV might have different widths: the caller must specify VW, the width of V. MV and VW must be both numeric or both string. Comparison of strings of different width is done by conceptually extending both strings to infinite width by appending spaces. */ bool mv_is_value_missing_varwidth (const struct missing_values *mv, const union value *v, int vw, enum mv_class class) { int mvw = mv->width; if (mvw == vw) return mv_is_value_missing (mv, v, class); /* Make sure they're both strings. */ assert (mvw && vw); if (!(class & MV_USER) || mv->type == MVT_NONE) return false; for (int i = 0; i < mv->type; i++) if (!buf_compare_rpad (CHAR_CAST_BUG (const char *, mv->values[i].s), mvw, CHAR_CAST_BUG (const char *, v->s), vw)) return true; return false; } char * mv_to_string (const struct missing_values *mv, const char *encoding) { struct string s = DS_EMPTY_INITIALIZER; if (mv_has_range (mv)) { double x, y; mv_get_range (mv, &x, &y); if (x == LOWEST) ds_put_format (&s, "LOWEST THRU %.*g", DBL_DIG + 1, y); else if (y == HIGHEST) ds_put_format (&s, "%.*g THRU HIGHEST", DBL_DIG + 1, x); else ds_put_format (&s, "%.*g THRU %.*g", DBL_DIG + 1, x, DBL_DIG + 1, y); } for (size_t j = 0; j < mv_n_values (mv); j++) { const union value *value = mv_get_value (mv, j); if (!ds_is_empty (&s)) ds_put_cstr (&s, "; "); if (!mv->width) ds_put_format (&s, "%.*g", DBL_DIG + 1, value->f); else { char *mvs = recode_string ( "UTF-8", encoding, CHAR_CAST (char *, value->s), MIN (mv->width, MV_MAX_STRING)); ds_put_format (&s, "\"%s\"", mvs); free (mvs); } } return ds_is_empty (&s) ? NULL : ds_steal_cstr (&s); }