1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include "data/case.h"
20
21 #include <limits.h>
22 #include <stddef.h>
23 #include <stdlib.h>
24
25 #include "data/value.h"
26 #include "data/variable.h"
27 #include "libpspp/assertion.h"
28 #include "libpspp/str.h"
29
30 #include "gl/minmax.h"
31 #include "gl/xalloc.h"
32
33 /* Set this flag to 1 to copy cases instead of ref counting them.
34 This is sometimes helpful in debugging situations. */
35 #define DEBUG_CASEREFS 0
36
37 #if DEBUG_CASEREFS
38 #warning "Caseref debug enabled. CASES ARE NOT BEING SHARED!!"
39 #endif
40
41 static size_t case_size (const struct caseproto *);
42 static void assert_variable_matches_case (const struct ccase *,
43 const struct variable *);
44 static void copy_forward (struct ccase *dst, size_t dst_idx,
45 const struct ccase *src, size_t src_idx,
46 size_t n_values);
47 static void copy_backward (struct ccase *dst, size_t dst_idx,
48 const struct ccase *src, size_t src_idx,
49 size_t n_values);
50
51 /* Creates and returns a new case that stores data of the form
52 specified by PROTO. The data in the case have indeterminate
53 contents until explicitly written.
54
55 The caller retains ownership of PROTO. */
56 struct ccase *
case_create(const struct caseproto * proto)57 case_create (const struct caseproto *proto)
58 {
59 struct ccase *c = case_try_create (proto);
60 if (c == NULL)
61 xalloc_die ();
62 return c;
63 }
64
65 /* Like case_create, but returns a null pointer if not enough
66 memory is available. */
67 struct ccase *
case_try_create(const struct caseproto * proto)68 case_try_create (const struct caseproto *proto)
69 {
70 struct ccase *c = malloc (case_size (proto));
71 if (c != NULL)
72 {
73 if (caseproto_try_init_values (proto, c->values))
74 {
75 c->proto = caseproto_ref (proto);
76 c->ref_cnt = 1;
77 return c;
78 }
79 free (c);
80 }
81 return NULL;
82 }
83
84 /* Creates and returns an unshared copy of case C. */
85 struct ccase *
case_clone(const struct ccase * c)86 case_clone (const struct ccase *c)
87 {
88 return case_unshare (case_ref (c));
89 }
90
91 /* Increments case C's reference count and returns C. Afterward,
92 case C is shared among its reference count holders. */
93 struct ccase *
case_ref(const struct ccase * c_)94 case_ref (const struct ccase *c_)
95 {
96 struct ccase *c = CONST_CAST (struct ccase *, c_);
97 c->ref_cnt++;
98 #if DEBUG_CASEREFS
99 c = case_unshare__ (c);
100 #endif
101 return c;
102 }
103
104 /* Returns an estimate of the number of bytes of memory that
105 would be consumed in creating a case based on PROTO. The
106 estimate includes typical overhead from malloc() in addition
107 to the actual size of data. */
108 size_t
case_get_cost(const struct caseproto * proto)109 case_get_cost (const struct caseproto *proto)
110 {
111 /* FIXME: improve approximation? */
112 return (1 + caseproto_get_n_widths (proto)
113 + 3 * caseproto_get_n_strings (proto)) * sizeof (union value);
114 }
115
116 /* Changes the prototype for case C, which must not be shared.
117 The new PROTO must be conformable with C's current prototype
118 (as defined by caseproto_is_conformable).
119
120 Any new values created by this function have indeterminate
121 content that the caller is responsible for initializing.
122
123 The caller retains ownership of PROTO.
124
125 Returns a new case that replaces C, which is freed. */
126 struct ccase *
case_resize(struct ccase * c,const struct caseproto * new_proto)127 case_resize (struct ccase *c, const struct caseproto *new_proto)
128 {
129 struct caseproto *old_proto = c->proto;
130 size_t old_n_widths = caseproto_get_n_widths (old_proto);
131 size_t new_n_widths = caseproto_get_n_widths (new_proto);
132
133 assert (!case_is_shared (c));
134 expensive_assert (caseproto_is_conformable (old_proto, new_proto));
135
136 if (old_n_widths != new_n_widths)
137 {
138 if (new_n_widths < old_n_widths)
139 caseproto_reinit_values (old_proto, new_proto, c->values);
140 c = xrealloc (c, case_size (new_proto));
141 if (new_n_widths > old_n_widths)
142 caseproto_reinit_values (old_proto, new_proto, c->values);
143
144 caseproto_unref (old_proto);
145 c->proto = caseproto_ref (new_proto);
146 }
147
148 return c;
149 }
150
151 /* case_unshare_and_resize(C, PROTO) is equivalent to
152 case_resize(case_unshare(C), PROTO), but it is faster if case
153 C is shared.
154
155 Any new values created by this function have indeterminate
156 content that the caller is responsible for initializing.
157
158 The caller retains ownership of PROTO.
159
160 Returns the new case that replaces C, which is freed. */
161 struct ccase *
case_unshare_and_resize(struct ccase * c,const struct caseproto * proto)162 case_unshare_and_resize (struct ccase *c, const struct caseproto *proto)
163 {
164 if (!case_is_shared (c))
165 return case_resize (c, proto);
166 else
167 {
168 struct ccase *new = case_create (proto);
169 size_t old_n_values = caseproto_get_n_widths (c->proto);
170 size_t new_n_values = caseproto_get_n_widths (proto);
171 case_copy (new, 0, c, 0, MIN (old_n_values, new_n_values));
172 c->ref_cnt--;
173 return new;
174 }
175 }
176
177 /* Sets all of the numeric values in case C to the system-missing
178 value, and all of the string values to spaces. */
179 void
case_set_missing(struct ccase * c)180 case_set_missing (struct ccase *c)
181 {
182 size_t i;
183
184 assert (!case_is_shared (c));
185 for (i = 0; i < caseproto_get_n_widths (c->proto); i++)
186 value_set_missing (&c->values[i], caseproto_get_width (c->proto, i));
187 }
188
189 /* Copies N_VALUES values from SRC (starting at SRC_IDX) to DST
190 (starting at DST_IDX). Each value that is copied into must
191 have the same width as the value that it is copied from.
192
193 Properly handles overlapping ranges when DST == SRC.
194
195 DST must not be shared. */
196 void
case_copy(struct ccase * dst,size_t dst_idx,const struct ccase * src,size_t src_idx,size_t n_values)197 case_copy (struct ccase *dst, size_t dst_idx,
198 const struct ccase *src, size_t src_idx,
199 size_t n_values)
200 {
201 assert (!case_is_shared (dst));
202 assert (caseproto_range_is_valid (dst->proto, dst_idx, n_values));
203 assert (caseproto_range_is_valid (src->proto, src_idx, n_values));
204 assert (caseproto_equal (dst->proto, dst_idx, src->proto, src_idx,
205 n_values));
206
207 if (dst != src)
208 {
209 if (!dst->proto->n_strings || !src->proto->n_strings)
210 memcpy (&dst->values[dst_idx], &src->values[src_idx],
211 sizeof dst->values[0] * n_values);
212 else
213 copy_forward (dst, dst_idx, src, src_idx, n_values);
214 }
215 else if (dst_idx != src_idx)
216 {
217 if (!dst->proto->n_strings)
218 memmove (&dst->values[dst_idx], &src->values[src_idx],
219 sizeof dst->values[0] * n_values);
220 else if (dst_idx < src_idx)
221 copy_forward (dst, dst_idx, src, src_idx, n_values);
222 else /* dst_idx > src_idx */
223 copy_backward (dst, dst_idx, src, src_idx, n_values);
224 }
225 }
226
227 /* Copies N_VALUES values out of case C to VALUES, starting at
228 the given START_IDX. */
229 void
case_copy_out(const struct ccase * c,size_t start_idx,union value * values,size_t n_values)230 case_copy_out (const struct ccase *c,
231 size_t start_idx, union value *values, size_t n_values)
232 {
233 size_t i;
234
235 assert (caseproto_range_is_valid (c->proto, start_idx, n_values));
236
237 for (i = 0; i < n_values; i++)
238 value_copy (&values[i], &c->values[start_idx + i],
239 caseproto_get_width (c->proto, start_idx + i));
240 }
241
242 /* Copies N_VALUES values from VALUES into case C, starting at
243 the given START_IDX.
244
245 C must not be shared. */
246 void
case_copy_in(struct ccase * c,size_t start_idx,const union value * values,size_t n_values)247 case_copy_in (struct ccase *c,
248 size_t start_idx, const union value *values, size_t n_values)
249 {
250 size_t i;
251
252 assert (!case_is_shared (c));
253 assert (caseproto_range_is_valid (c->proto, start_idx, n_values));
254
255 for (i = 0; i < n_values; i++)
256 value_copy (&c->values[start_idx + i], &values[i],
257 caseproto_get_width (c->proto, start_idx + i));
258 }
259
260 /* Returns a pointer to the `union value' used for the
261 element of C for variable V.
262 Case C must be drawn from V's dictionary.
263 The caller must not modify the returned data. */
264 const union value *
case_data(const struct ccase * c,const struct variable * v)265 case_data (const struct ccase *c, const struct variable *v)
266 {
267 assert_variable_matches_case (c, v);
268 return &c->values[var_get_case_index (v)];
269 }
270
271 /* Returns a pointer to the `union value' used for the element of
272 C numbered IDX. The caller must not modify the returned
273 data. */
274 const union value *
case_data_idx(const struct ccase * c,size_t idx)275 case_data_idx (const struct ccase *c, size_t idx)
276 {
277 assert (idx < c->proto->n_widths);
278 return &c->values[idx];
279 }
280
281 /* Returns a pointer to the `union value' used for the element of
282 C for variable V. Case C must be drawn from V's dictionary.
283 The caller is allowed to modify the returned data.
284
285 Case C must not be shared. */
286 union value *
case_data_rw(struct ccase * c,const struct variable * v)287 case_data_rw (struct ccase *c, const struct variable *v)
288 {
289 assert_variable_matches_case (c, v);
290 assert (!case_is_shared (c));
291 return &c->values[var_get_case_index (v)];
292 }
293
294 /* Returns a pointer to the `union value' used for the
295 element of C numbered IDX.
296 The caller is allowed to modify the returned data.
297
298 Case C must not be shared. */
299 union value *
case_data_rw_idx(struct ccase * c,size_t idx)300 case_data_rw_idx (struct ccase *c, size_t idx)
301 {
302 assert (idx < c->proto->n_widths);
303 assert (!case_is_shared (c));
304 return &c->values[idx];
305 }
306
307 /* Returns the numeric value of the `union value' in C for
308 variable V.
309 Case C must be drawn from V's dictionary. */
310 double
case_num(const struct ccase * c,const struct variable * v)311 case_num (const struct ccase *c, const struct variable *v)
312 {
313 assert_variable_matches_case (c, v);
314 return c->values[var_get_case_index (v)].f;
315 }
316
317 /* Returns the numeric value of the `union value' in C numbered
318 IDX. */
319 double
case_num_idx(const struct ccase * c,size_t idx)320 case_num_idx (const struct ccase *c, size_t idx)
321 {
322 assert (idx < c->proto->n_widths);
323 return c->values[idx].f;
324 }
325
326 /* Returns the string value of the `union value' in C for
327 variable V. Case C must be drawn from V's dictionary. The
328 caller must not modify the return value.
329
330 Like the strings embedded in all "union value"s, the return
331 value is not null-terminated. */
332 const uint8_t *
case_str(const struct ccase * c,const struct variable * v)333 case_str (const struct ccase *c, const struct variable *v)
334 {
335 assert_variable_matches_case (c, v);
336 return c->values[var_get_case_index (v)].s;
337 }
338
339 /* Returns the string value of the `union value' in C numbered
340 IDX. The caller must not modify the return value.
341
342 Like the strings embedded in all "union value"s, the return
343 value is not null-terminated. */
344 const uint8_t *
case_str_idx(const struct ccase * c,size_t idx)345 case_str_idx (const struct ccase *c, size_t idx)
346 {
347 assert (idx < c->proto->n_widths);
348 return c->values[idx].s;
349 }
350
351 /* Returns the string value of the `union value' in C for
352 variable V. Case C must be drawn from V's dictionary. The
353 caller may modify the return value.
354
355 Case C must not be shared.
356
357 Like the strings embedded in all "union value"s, the return
358 value is not null-terminated. */
359 uint8_t *
case_str_rw(struct ccase * c,const struct variable * v)360 case_str_rw (struct ccase *c, const struct variable *v)
361 {
362 assert_variable_matches_case (c, v);
363 size_t idx = var_get_case_index (v);
364 assert (!case_is_shared (c));
365 return c->values[idx].s;
366 }
367
368 /* Returns the string value of the `union value' in C numbered
369 IDX. The caller may modify the return value.
370
371 Case C must not be shared.
372
373 Like the strings embedded in all "union value"s, the return
374 value is not null-terminated. */
375 uint8_t *
case_str_rw_idx(struct ccase * c,size_t idx)376 case_str_rw_idx (struct ccase *c, size_t idx)
377 {
378 assert (idx < c->proto->n_widths);
379 assert (!case_is_shared (c));
380 return c->values[idx].s;
381 }
382
383 /* Compares the values of the N_VARS variables in VP
384 in cases A and B and returns a strcmp()-type result. */
385 int
case_compare(const struct ccase * a,const struct ccase * b,const struct variable * const * vp,size_t n_vars)386 case_compare (const struct ccase *a, const struct ccase *b,
387 const struct variable *const *vp, size_t n_vars)
388 {
389 return case_compare_2dict (a, b, vp, vp, n_vars);
390 }
391
392 /* Compares the values of the N_VARS variables in VAP in case CA
393 to the values of the N_VARS variables in VBP in CB
394 and returns a strcmp()-type result. */
395 int
case_compare_2dict(const struct ccase * ca,const struct ccase * cb,const struct variable * const * vap,const struct variable * const * vbp,size_t n_vars)396 case_compare_2dict (const struct ccase *ca, const struct ccase *cb,
397 const struct variable *const *vap,
398 const struct variable *const *vbp,
399 size_t n_vars)
400 {
401 int cmp = 0;
402 for (; !cmp && n_vars-- > 0; vap++, vbp++)
403 {
404 const union value *va = case_data (ca, *vap);
405 const union value *vb = case_data (cb, *vbp);
406 assert (var_get_width (*vap) == var_get_width (*vbp));
407 cmp = value_compare_3way (va, vb, var_get_width (*vap));
408 }
409 return cmp;
410 }
411
412 /* Returns a pointer to the array of `union value's used for C.
413 The caller must *not* modify the returned data.
414
415 This function breaks the case abstraction. It should *not* be
416 commonly used. Prefer the other case functions. */
417 const union value *
case_data_all(const struct ccase * c)418 case_data_all (const struct ccase *c)
419 {
420 return c->values;
421 }
422
423 /* Returns a pointer to the array of `union value's used for C.
424 The caller is allowed to modify the returned data.
425
426 Case C must not be shared.
427
428 This function breaks the case abstraction. It should *not* be
429 commonly used. Prefer the other case functions. */
430 union value *
case_data_all_rw(struct ccase * c)431 case_data_all_rw (struct ccase *c)
432 {
433 assert (!case_is_shared (c));
434 return c->values;
435 }
436
437 /* Internal helper function for case_unshare. */
438 struct ccase *
case_unshare__(struct ccase * old)439 case_unshare__ (struct ccase *old)
440 {
441 struct ccase *new = case_create (old->proto);
442 case_copy (new, 0, old, 0, caseproto_get_n_widths (new->proto));
443 --old->ref_cnt;
444 return new;
445 }
446
447 /* Internal helper function for case_unref. */
448 void
case_unref__(struct ccase * c)449 case_unref__ (struct ccase *c)
450 {
451 caseproto_destroy_values (c->proto, c->values);
452 caseproto_unref (c->proto);
453 free (c);
454 }
455
456 /* Returns the number of bytes needed by a case for case
457 prototype PROTO. */
458 static size_t
case_size(const struct caseproto * proto)459 case_size (const struct caseproto *proto)
460 {
461 return (offsetof (struct ccase, values)
462 + caseproto_get_n_widths (proto) * sizeof (union value));
463 }
464
465 /* Returns true if C contains a value at V's case index with the
466 same width as V; that is, if V may plausibly be used to read
467 or write data in C.
468
469 Useful in assertions. */
470 static void
assert_variable_matches_case(const struct ccase * c,const struct variable * v)471 assert_variable_matches_case (const struct ccase *c, const struct variable *v)
472 {
473 size_t case_idx = var_get_case_index (v);
474 assert (case_idx < caseproto_get_n_widths (c->proto));
475 assert (caseproto_get_width (c->proto, case_idx) == var_get_width (v));
476 }
477
478 /* Internal helper function for case_copy(). */
479 static void
copy_forward(struct ccase * dst,size_t dst_idx,const struct ccase * src,size_t src_idx,size_t n_values)480 copy_forward (struct ccase *dst, size_t dst_idx,
481 const struct ccase *src, size_t src_idx,
482 size_t n_values)
483 {
484 size_t i;
485
486 for (i = 0; i < n_values; i++)
487 value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i],
488 caseproto_get_width (dst->proto, dst_idx + i));
489 }
490
491 /* Internal helper function for case_copy(). */
492 static void
copy_backward(struct ccase * dst,size_t dst_idx,const struct ccase * src,size_t src_idx,size_t n_values)493 copy_backward (struct ccase *dst, size_t dst_idx,
494 const struct ccase *src, size_t src_idx,
495 size_t n_values)
496 {
497 size_t i;
498
499 for (i = n_values; i-- != 0;)
500 value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i],
501 caseproto_get_width (dst->proto, dst_idx + i));
502 }
503
504