1 /* PSPP - a program for statistical analysis.
2    Copyright (C) 2004, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16 
17 #include <config.h>
18 
19 #include "data/case.h"
20 
21 #include <limits.h>
22 #include <stddef.h>
23 #include <stdlib.h>
24 
25 #include "data/value.h"
26 #include "data/variable.h"
27 #include "libpspp/assertion.h"
28 #include "libpspp/str.h"
29 
30 #include "gl/minmax.h"
31 #include "gl/xalloc.h"
32 
33 /* Set this flag to 1 to copy cases instead of ref counting them.
34    This is sometimes helpful in debugging situations. */
35 #define DEBUG_CASEREFS 0
36 
37 #if DEBUG_CASEREFS
38 #warning "Caseref debug enabled.  CASES ARE NOT BEING SHARED!!"
39 #endif
40 
41 static size_t case_size (const struct caseproto *);
42 static void assert_variable_matches_case (const struct ccase *,
43                                    const struct variable *);
44 static void copy_forward (struct ccase *dst, size_t dst_idx,
45                           const struct ccase *src, size_t src_idx,
46                           size_t n_values);
47 static void copy_backward (struct ccase *dst, size_t dst_idx,
48                            const struct ccase *src, size_t src_idx,
49                            size_t n_values);
50 
51 /* Creates and returns a new case that stores data of the form
52    specified by PROTO.  The data in the case have indeterminate
53    contents until explicitly written.
54 
55    The caller retains ownership of PROTO. */
56 struct ccase *
case_create(const struct caseproto * proto)57 case_create (const struct caseproto *proto)
58 {
59   struct ccase *c = case_try_create (proto);
60   if (c == NULL)
61     xalloc_die ();
62   return c;
63 }
64 
65 /* Like case_create, but returns a null pointer if not enough
66    memory is available. */
67 struct ccase *
case_try_create(const struct caseproto * proto)68 case_try_create (const struct caseproto *proto)
69 {
70   struct ccase *c = malloc (case_size (proto));
71   if (c != NULL)
72     {
73       if (caseproto_try_init_values (proto, c->values))
74         {
75           c->proto = caseproto_ref (proto);
76           c->ref_cnt = 1;
77           return c;
78         }
79       free (c);
80     }
81   return NULL;
82 }
83 
84 /* Creates and returns an unshared copy of case C. */
85 struct ccase *
case_clone(const struct ccase * c)86 case_clone (const struct ccase *c)
87 {
88   return case_unshare (case_ref (c));
89 }
90 
91 /* Increments case C's reference count and returns C.  Afterward,
92    case C is shared among its reference count holders. */
93 struct ccase *
case_ref(const struct ccase * c_)94 case_ref (const struct ccase *c_)
95 {
96   struct ccase *c = CONST_CAST (struct ccase *, c_);
97   c->ref_cnt++;
98 #if DEBUG_CASEREFS
99   c = case_unshare__ (c);
100 #endif
101   return c;
102 }
103 
104 /* Returns an estimate of the number of bytes of memory that
105    would be consumed in creating a case based on PROTO.  The
106    estimate includes typical overhead from malloc() in addition
107    to the actual size of data. */
108 size_t
case_get_cost(const struct caseproto * proto)109 case_get_cost (const struct caseproto *proto)
110 {
111   /* FIXME: improve approximation? */
112   return (1 + caseproto_get_n_widths (proto)
113           + 3 * caseproto_get_n_strings (proto)) * sizeof (union value);
114 }
115 
116 /* Changes the prototype for case C, which must not be shared.
117    The new PROTO must be conformable with C's current prototype
118    (as defined by caseproto_is_conformable).
119 
120    Any new values created by this function have indeterminate
121    content that the caller is responsible for initializing.
122 
123    The caller retains ownership of PROTO.
124 
125    Returns a new case that replaces C, which is freed. */
126 struct ccase *
case_resize(struct ccase * c,const struct caseproto * new_proto)127 case_resize (struct ccase *c, const struct caseproto *new_proto)
128 {
129   struct caseproto *old_proto = c->proto;
130   size_t old_n_widths = caseproto_get_n_widths (old_proto);
131   size_t new_n_widths = caseproto_get_n_widths (new_proto);
132 
133   assert (!case_is_shared (c));
134   expensive_assert (caseproto_is_conformable (old_proto, new_proto));
135 
136   if (old_n_widths != new_n_widths)
137     {
138       if (new_n_widths < old_n_widths)
139         caseproto_reinit_values (old_proto, new_proto, c->values);
140       c = xrealloc (c, case_size (new_proto));
141       if (new_n_widths > old_n_widths)
142         caseproto_reinit_values (old_proto, new_proto, c->values);
143 
144       caseproto_unref (old_proto);
145       c->proto = caseproto_ref (new_proto);
146     }
147 
148   return c;
149 }
150 
151 /* case_unshare_and_resize(C, PROTO) is equivalent to
152    case_resize(case_unshare(C), PROTO), but it is faster if case
153    C is shared.
154 
155    Any new values created by this function have indeterminate
156    content that the caller is responsible for initializing.
157 
158    The caller retains ownership of PROTO.
159 
160    Returns the new case that replaces C, which is freed. */
161 struct ccase *
case_unshare_and_resize(struct ccase * c,const struct caseproto * proto)162 case_unshare_and_resize (struct ccase *c, const struct caseproto *proto)
163 {
164   if (!case_is_shared (c))
165     return case_resize (c, proto);
166   else
167     {
168       struct ccase *new = case_create (proto);
169       size_t old_n_values = caseproto_get_n_widths (c->proto);
170       size_t new_n_values = caseproto_get_n_widths (proto);
171       case_copy (new, 0, c, 0, MIN (old_n_values, new_n_values));
172       c->ref_cnt--;
173       return new;
174     }
175 }
176 
177 /* Sets all of the numeric values in case C to the system-missing
178    value, and all of the string values to spaces. */
179 void
case_set_missing(struct ccase * c)180 case_set_missing (struct ccase *c)
181 {
182   size_t i;
183 
184   assert (!case_is_shared (c));
185   for (i = 0; i < caseproto_get_n_widths (c->proto); i++)
186     value_set_missing (&c->values[i], caseproto_get_width (c->proto, i));
187 }
188 
189 /* Copies N_VALUES values from SRC (starting at SRC_IDX) to DST
190    (starting at DST_IDX).  Each value that is copied into must
191    have the same width as the value that it is copied from.
192 
193    Properly handles overlapping ranges when DST == SRC.
194 
195    DST must not be shared. */
196 void
case_copy(struct ccase * dst,size_t dst_idx,const struct ccase * src,size_t src_idx,size_t n_values)197 case_copy (struct ccase *dst, size_t dst_idx,
198            const struct ccase *src, size_t src_idx,
199            size_t n_values)
200 {
201   assert (!case_is_shared (dst));
202   assert (caseproto_range_is_valid (dst->proto, dst_idx, n_values));
203   assert (caseproto_range_is_valid (src->proto, src_idx, n_values));
204   assert (caseproto_equal (dst->proto, dst_idx, src->proto, src_idx,
205                            n_values));
206 
207   if (dst != src)
208     {
209       if (!dst->proto->n_strings || !src->proto->n_strings)
210         memcpy (&dst->values[dst_idx], &src->values[src_idx],
211                 sizeof dst->values[0] * n_values);
212       else
213         copy_forward (dst, dst_idx, src, src_idx, n_values);
214     }
215   else if (dst_idx != src_idx)
216     {
217       if (!dst->proto->n_strings)
218         memmove (&dst->values[dst_idx], &src->values[src_idx],
219                  sizeof dst->values[0] * n_values);
220       else if (dst_idx < src_idx)
221         copy_forward (dst, dst_idx, src, src_idx, n_values);
222       else /* dst_idx > src_idx */
223         copy_backward (dst, dst_idx, src, src_idx, n_values);
224     }
225 }
226 
227 /* Copies N_VALUES values out of case C to VALUES, starting at
228    the given START_IDX. */
229 void
case_copy_out(const struct ccase * c,size_t start_idx,union value * values,size_t n_values)230 case_copy_out (const struct ccase *c,
231                size_t start_idx, union value *values, size_t n_values)
232 {
233   size_t i;
234 
235   assert (caseproto_range_is_valid (c->proto, start_idx, n_values));
236 
237   for (i = 0; i < n_values; i++)
238     value_copy (&values[i], &c->values[start_idx + i],
239                 caseproto_get_width (c->proto, start_idx + i));
240 }
241 
242 /* Copies N_VALUES values from VALUES into case C, starting at
243    the given START_IDX.
244 
245    C must not be shared. */
246 void
case_copy_in(struct ccase * c,size_t start_idx,const union value * values,size_t n_values)247 case_copy_in (struct ccase *c,
248               size_t start_idx, const union value *values, size_t n_values)
249 {
250   size_t i;
251 
252   assert (!case_is_shared (c));
253   assert (caseproto_range_is_valid (c->proto, start_idx, n_values));
254 
255   for (i = 0; i < n_values; i++)
256     value_copy (&c->values[start_idx + i], &values[i],
257                 caseproto_get_width (c->proto, start_idx + i));
258 }
259 
260 /* Returns a pointer to the `union value' used for the
261    element of C for variable V.
262    Case C must be drawn from V's dictionary.
263    The caller must not modify the returned data. */
264 const union value *
case_data(const struct ccase * c,const struct variable * v)265 case_data (const struct ccase *c, const struct variable *v)
266 {
267   assert_variable_matches_case (c, v);
268   return &c->values[var_get_case_index (v)];
269 }
270 
271 /* Returns a pointer to the `union value' used for the element of
272    C numbered IDX.  The caller must not modify the returned
273    data. */
274 const union value *
case_data_idx(const struct ccase * c,size_t idx)275 case_data_idx (const struct ccase *c, size_t idx)
276 {
277   assert (idx < c->proto->n_widths);
278   return &c->values[idx];
279 }
280 
281 /* Returns a pointer to the `union value' used for the element of
282    C for variable V.  Case C must be drawn from V's dictionary.
283    The caller is allowed to modify the returned data.
284 
285    Case C must not be shared. */
286 union value *
case_data_rw(struct ccase * c,const struct variable * v)287 case_data_rw (struct ccase *c, const struct variable *v)
288 {
289   assert_variable_matches_case (c, v);
290   assert (!case_is_shared (c));
291   return &c->values[var_get_case_index (v)];
292 }
293 
294 /* Returns a pointer to the `union value' used for the
295    element of C numbered IDX.
296    The caller is allowed to modify the returned data.
297 
298    Case C must not be shared. */
299 union value *
case_data_rw_idx(struct ccase * c,size_t idx)300 case_data_rw_idx (struct ccase *c, size_t idx)
301 {
302   assert (idx < c->proto->n_widths);
303   assert (!case_is_shared (c));
304   return &c->values[idx];
305 }
306 
307 /* Returns the numeric value of the `union value' in C for
308    variable V.
309    Case C must be drawn from V's dictionary. */
310 double
case_num(const struct ccase * c,const struct variable * v)311 case_num (const struct ccase *c, const struct variable *v)
312 {
313   assert_variable_matches_case (c, v);
314   return c->values[var_get_case_index (v)].f;
315 }
316 
317 /* Returns the numeric value of the `union value' in C numbered
318    IDX. */
319 double
case_num_idx(const struct ccase * c,size_t idx)320 case_num_idx (const struct ccase *c, size_t idx)
321 {
322   assert (idx < c->proto->n_widths);
323   return c->values[idx].f;
324 }
325 
326 /* Returns the string value of the `union value' in C for
327    variable V.  Case C must be drawn from V's dictionary.  The
328    caller must not modify the return value.
329 
330    Like the strings embedded in all "union value"s, the return
331    value is not null-terminated. */
332 const uint8_t *
case_str(const struct ccase * c,const struct variable * v)333 case_str (const struct ccase *c, const struct variable *v)
334 {
335   assert_variable_matches_case (c, v);
336   return c->values[var_get_case_index (v)].s;
337 }
338 
339 /* Returns the string value of the `union value' in C numbered
340    IDX.  The caller must not modify the return value.
341 
342    Like the strings embedded in all "union value"s, the return
343    value is not null-terminated. */
344 const uint8_t *
case_str_idx(const struct ccase * c,size_t idx)345 case_str_idx (const struct ccase *c, size_t idx)
346 {
347   assert (idx < c->proto->n_widths);
348   return c->values[idx].s;
349 }
350 
351 /* Returns the string value of the `union value' in C for
352    variable V.  Case C must be drawn from V's dictionary.  The
353    caller may modify the return value.
354 
355    Case C must not be shared.
356 
357    Like the strings embedded in all "union value"s, the return
358    value is not null-terminated. */
359 uint8_t *
case_str_rw(struct ccase * c,const struct variable * v)360 case_str_rw (struct ccase *c, const struct variable *v)
361 {
362   assert_variable_matches_case (c, v);
363   size_t idx = var_get_case_index (v);
364   assert (!case_is_shared (c));
365   return c->values[idx].s;
366 }
367 
368 /* Returns the string value of the `union value' in C numbered
369    IDX.  The caller may modify the return value.
370 
371    Case C must not be shared.
372 
373    Like the strings embedded in all "union value"s, the return
374    value is not null-terminated. */
375 uint8_t *
case_str_rw_idx(struct ccase * c,size_t idx)376 case_str_rw_idx (struct ccase *c, size_t idx)
377 {
378   assert (idx < c->proto->n_widths);
379   assert (!case_is_shared (c));
380   return c->values[idx].s;
381 }
382 
383 /* Compares the values of the N_VARS variables in VP
384    in cases A and B and returns a strcmp()-type result. */
385 int
case_compare(const struct ccase * a,const struct ccase * b,const struct variable * const * vp,size_t n_vars)386 case_compare (const struct ccase *a, const struct ccase *b,
387               const struct variable *const *vp, size_t n_vars)
388 {
389   return case_compare_2dict (a, b, vp, vp, n_vars);
390 }
391 
392 /* Compares the values of the N_VARS variables in VAP in case CA
393    to the values of the N_VARS variables in VBP in CB
394    and returns a strcmp()-type result. */
395 int
case_compare_2dict(const struct ccase * ca,const struct ccase * cb,const struct variable * const * vap,const struct variable * const * vbp,size_t n_vars)396 case_compare_2dict (const struct ccase *ca, const struct ccase *cb,
397                     const struct variable *const *vap,
398                     const struct variable *const *vbp,
399                     size_t n_vars)
400 {
401   int cmp = 0;
402   for (; !cmp && n_vars-- > 0; vap++, vbp++)
403     {
404       const union value *va = case_data (ca, *vap);
405       const union value *vb = case_data (cb, *vbp);
406       assert (var_get_width (*vap) == var_get_width (*vbp));
407       cmp = value_compare_3way (va, vb, var_get_width (*vap));
408     }
409   return cmp;
410 }
411 
412 /* Returns a pointer to the array of `union value's used for C.
413    The caller must *not* modify the returned data.
414 
415    This function breaks the case abstraction.  It should *not* be
416    commonly used.  Prefer the other case functions. */
417 const union value *
case_data_all(const struct ccase * c)418 case_data_all (const struct ccase *c)
419 {
420   return c->values;
421 }
422 
423 /* Returns a pointer to the array of `union value's used for C.
424    The caller is allowed to modify the returned data.
425 
426    Case C must not be shared.
427 
428    This function breaks the case abstraction.  It should *not* be
429    commonly used.  Prefer the other case functions. */
430 union value *
case_data_all_rw(struct ccase * c)431 case_data_all_rw (struct ccase *c)
432 {
433   assert (!case_is_shared (c));
434   return c->values;
435 }
436 
437 /* Internal helper function for case_unshare. */
438 struct ccase *
case_unshare__(struct ccase * old)439 case_unshare__ (struct ccase *old)
440 {
441   struct ccase *new = case_create (old->proto);
442   case_copy (new, 0, old, 0, caseproto_get_n_widths (new->proto));
443   --old->ref_cnt;
444   return new;
445 }
446 
447 /* Internal helper function for case_unref. */
448 void
case_unref__(struct ccase * c)449 case_unref__ (struct ccase *c)
450 {
451   caseproto_destroy_values (c->proto, c->values);
452   caseproto_unref (c->proto);
453   free (c);
454 }
455 
456 /* Returns the number of bytes needed by a case for case
457    prototype PROTO. */
458 static size_t
case_size(const struct caseproto * proto)459 case_size (const struct caseproto *proto)
460 {
461   return (offsetof (struct ccase, values)
462           + caseproto_get_n_widths (proto) * sizeof (union value));
463 }
464 
465 /* Returns true if C contains a value at V's case index with the
466    same width as V; that is, if V may plausibly be used to read
467    or write data in C.
468 
469    Useful in assertions. */
470 static void
assert_variable_matches_case(const struct ccase * c,const struct variable * v)471 assert_variable_matches_case (const struct ccase *c, const struct variable *v)
472 {
473   size_t case_idx = var_get_case_index (v);
474   assert (case_idx < caseproto_get_n_widths (c->proto));
475   assert (caseproto_get_width (c->proto, case_idx) == var_get_width (v));
476 }
477 
478 /* Internal helper function for case_copy(). */
479 static void
copy_forward(struct ccase * dst,size_t dst_idx,const struct ccase * src,size_t src_idx,size_t n_values)480 copy_forward (struct ccase *dst, size_t dst_idx,
481               const struct ccase *src, size_t src_idx,
482               size_t n_values)
483 {
484   size_t i;
485 
486   for (i = 0; i < n_values; i++)
487     value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i],
488                 caseproto_get_width (dst->proto, dst_idx + i));
489 }
490 
491 /* Internal helper function for case_copy(). */
492 static void
copy_backward(struct ccase * dst,size_t dst_idx,const struct ccase * src,size_t src_idx,size_t n_values)493 copy_backward (struct ccase *dst, size_t dst_idx,
494                const struct ccase *src, size_t src_idx,
495                size_t n_values)
496 {
497   size_t i;
498 
499   for (i = n_values; i-- != 0;)
500     value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i],
501                 caseproto_get_width (dst->proto, dst_idx + i));
502 }
503 
504