1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2009, 2010, 2011, 2015 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include <stdlib.h>
20
21 #include "data/case.h"
22 #include "data/dataset.h"
23 #include "data/dictionary.h"
24 #include "data/transformations.h"
25 #include "data/variable.h"
26 #include "language/command.h"
27 #include "language/lexer/lexer.h"
28 #include "language/lexer/value-parser.h"
29 #include "language/lexer/variable-parser.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/i18n.h"
32 #include "libpspp/message.h"
33 #include "libpspp/pool.h"
34 #include "libpspp/str.h"
35
36 #include "gl/xalloc.h"
37
38 #include "gettext.h"
39 #define _(msgid) gettext (msgid)
40
41 /* Value or range? */
42 enum value_type
43 {
44 CNT_SINGLE, /* Single value. */
45 CNT_RANGE /* a <= x <= b. */
46 };
47
48 /* Numeric count criteria. */
49 struct num_value
50 {
51 enum value_type type; /* How to interpret a, b. */
52 double a, b; /* Values to count. */
53 };
54
55 struct criteria
56 {
57 struct criteria *next;
58
59 /* Variables to count. */
60 const struct variable **vars;
61 size_t var_cnt;
62
63 /* Count special values? */
64 bool count_system_missing; /* Count system missing? */
65 bool count_user_missing; /* Count user missing? */
66
67 /* Criterion values. */
68 size_t value_cnt;
69 union
70 {
71 struct num_value *num;
72 char **str;
73 }
74 values;
75 };
76
77 struct dst_var
78 {
79 struct dst_var *next;
80 struct variable *var; /* Destination variable. */
81 char *name; /* Name of dest var. */
82 struct criteria *crit; /* The criteria specifications. */
83 };
84
85 struct count_trns
86 {
87 struct dst_var *dst_vars;
88 struct pool *pool;
89 };
90
91 static trns_proc_func count_trns_proc;
92 static trns_free_func count_trns_free;
93
94 static bool parse_numeric_criteria (struct lexer *, struct pool *, struct criteria *);
95 static bool parse_string_criteria (struct lexer *, struct pool *,
96 struct criteria *,
97 const char *dict_encoding);
98
99 int
cmd_count(struct lexer * lexer,struct dataset * ds)100 cmd_count (struct lexer *lexer, struct dataset *ds)
101 {
102 struct dst_var *dv; /* Destination var being parsed. */
103 struct count_trns *trns; /* Transformation. */
104
105 /* Parses each slash-delimited specification. */
106 trns = pool_create_container (struct count_trns, pool);
107 trns->dst_vars = dv = pool_alloc (trns->pool, sizeof *dv);
108 for (;;)
109 {
110 struct criteria *crit;
111
112 /* Initialize this struct dst_var to ensure proper cleanup. */
113 dv->next = NULL;
114 dv->var = NULL;
115 dv->crit = NULL;
116
117 /* Get destination variable, or at least its name. */
118 if (!lex_force_id (lexer))
119 goto fail;
120 dv->var = dict_lookup_var (dataset_dict (ds), lex_tokcstr (lexer));
121 if (dv->var != NULL)
122 {
123 if (var_is_alpha (dv->var))
124 {
125 msg (SE, _("Destination cannot be a string variable."));
126 goto fail;
127 }
128 }
129 else
130 dv->name = pool_strdup (trns->pool, lex_tokcstr (lexer));
131
132 lex_get (lexer);
133 if (!lex_force_match (lexer, T_EQUALS))
134 goto fail;
135
136 crit = dv->crit = pool_alloc (trns->pool, sizeof *crit);
137 for (;;)
138 {
139 struct dictionary *dict = dataset_dict (ds);
140 bool ok;
141
142 crit->next = NULL;
143 crit->vars = NULL;
144 if (!parse_variables_const (lexer, dict, &crit->vars,
145 &crit->var_cnt,
146 PV_DUPLICATE | PV_SAME_TYPE))
147 goto fail;
148 pool_register (trns->pool, free, crit->vars);
149
150 if (!lex_force_match (lexer, T_LPAREN))
151 goto fail;
152
153 crit->value_cnt = 0;
154 if (var_is_numeric (crit->vars[0]))
155 ok = parse_numeric_criteria (lexer, trns->pool, crit);
156 else
157 ok = parse_string_criteria (lexer, trns->pool, crit,
158 dict_get_encoding (dict));
159 if (!ok)
160 goto fail;
161
162 if (lex_token (lexer) == T_SLASH || lex_token (lexer) == T_ENDCMD)
163 break;
164
165 crit = crit->next = pool_alloc (trns->pool, sizeof *crit);
166 }
167
168 if (lex_token (lexer) == T_ENDCMD)
169 break;
170
171 if (!lex_force_match (lexer, T_SLASH))
172 goto fail;
173 dv = dv->next = pool_alloc (trns->pool, sizeof *dv);
174 }
175
176 /* Create all the nonexistent destination variables. */
177 for (dv = trns->dst_vars; dv; dv = dv->next)
178 if (dv->var == NULL)
179 {
180 /* It's valid, though motivationally questionable, to count to
181 the same dest var more than once. */
182 dv->var = dict_lookup_var (dataset_dict (ds), dv->name);
183
184 if (dv->var == NULL)
185 dv->var = dict_create_var_assert (dataset_dict (ds), dv->name, 0);
186 }
187
188 add_transformation (ds, count_trns_proc, count_trns_free, trns);
189 return CMD_SUCCESS;
190
191 fail:
192 count_trns_free (trns);
193 return CMD_FAILURE;
194 }
195
196 /* Parses a set of numeric criterion values. Returns success. */
197 static bool
parse_numeric_criteria(struct lexer * lexer,struct pool * pool,struct criteria * crit)198 parse_numeric_criteria (struct lexer *lexer, struct pool *pool, struct criteria *crit)
199 {
200 size_t allocated = 0;
201
202 crit->values.num = NULL;
203 crit->count_system_missing = false;
204 crit->count_user_missing = false;
205 for (;;)
206 {
207 double low, high;
208
209 if (lex_match_id (lexer, "SYSMIS"))
210 crit->count_system_missing = true;
211 else if (lex_match_id (lexer, "MISSING"))
212 crit->count_system_missing = crit->count_user_missing = true;
213 else if (parse_num_range (lexer, &low, &high, NULL))
214 {
215 struct num_value *cur;
216
217 if (crit->value_cnt >= allocated)
218 crit->values.num = pool_2nrealloc (pool, crit->values.num,
219 &allocated,
220 sizeof *crit->values.num);
221 cur = &crit->values.num[crit->value_cnt++];
222 cur->type = low == high ? CNT_SINGLE : CNT_RANGE;
223 cur->a = low;
224 cur->b = high;
225 }
226 else
227 return false;
228
229 lex_match (lexer, T_COMMA);
230 if (lex_match (lexer, T_RPAREN))
231 break;
232 }
233 return true;
234 }
235
236 /* Parses a set of string criteria values. Returns success. */
237 static bool
parse_string_criteria(struct lexer * lexer,struct pool * pool,struct criteria * crit,const char * dict_encoding)238 parse_string_criteria (struct lexer *lexer, struct pool *pool,
239 struct criteria *crit, const char *dict_encoding)
240 {
241 int len = 0;
242 size_t allocated = 0;
243 size_t i;
244
245 for (i = 0; i < crit->var_cnt; i++)
246 if (var_get_width (crit->vars[i]) > len)
247 len = var_get_width (crit->vars[i]);
248
249 crit->values.str = NULL;
250 for (;;)
251 {
252 char **cur;
253 char *s;
254
255 if (crit->value_cnt >= allocated)
256 crit->values.str = pool_2nrealloc (pool, crit->values.str,
257 &allocated,
258 sizeof *crit->values.str);
259
260 if (!lex_force_string (lexer))
261 return false;
262
263 s = recode_string (dict_encoding, "UTF-8", lex_tokcstr (lexer),
264 ss_length (lex_tokss (lexer)));
265
266 cur = &crit->values.str[crit->value_cnt++];
267 *cur = pool_alloc (pool, len + 1);
268 str_copy_rpad (*cur, len + 1, s);
269 lex_get (lexer);
270
271 free (s);
272
273 lex_match (lexer, T_COMMA);
274 if (lex_match (lexer, T_RPAREN))
275 break;
276 }
277
278 return true;
279 }
280
281 /* Transformation. */
282
283 /* Counts the number of values in case C matching CRIT. */
284 static int
count_numeric(struct criteria * crit,const struct ccase * c)285 count_numeric (struct criteria *crit, const struct ccase *c)
286 {
287 int counter = 0;
288 size_t i;
289
290 for (i = 0; i < crit->var_cnt; i++)
291 {
292 double x = case_num (c, crit->vars[i]);
293 struct num_value *v;
294
295 for (v = crit->values.num; v < crit->values.num + crit->value_cnt;
296 v++)
297 if (v->type == CNT_SINGLE ? x == v->a : x >= v->a && x <= v->b)
298 {
299 counter++;
300 break;
301 }
302
303 if (var_is_num_missing (crit->vars[i], x, MV_ANY)
304 && (x == SYSMIS
305 ? crit->count_system_missing
306 : crit->count_user_missing))
307 {
308 counter++;
309 continue;
310 }
311
312 }
313
314 return counter;
315 }
316
317 /* Counts the number of values in case C matching CRIT. */
318 static int
count_string(struct criteria * crit,const struct ccase * c)319 count_string (struct criteria *crit, const struct ccase *c)
320 {
321 int counter = 0;
322 size_t i;
323
324 for (i = 0; i < crit->var_cnt; i++)
325 {
326 char **v;
327 for (v = crit->values.str; v < crit->values.str + crit->value_cnt; v++)
328 if (!memcmp (case_str (c, crit->vars[i]), *v,
329 var_get_width (crit->vars[i])))
330 {
331 counter++;
332 break;
333 }
334 }
335
336 return counter;
337 }
338
339 /* Performs the COUNT transformation T on case C. */
340 static int
count_trns_proc(void * trns_,struct ccase ** c,casenumber case_num UNUSED)341 count_trns_proc (void *trns_, struct ccase **c,
342 casenumber case_num UNUSED)
343 {
344 struct count_trns *trns = trns_;
345 struct dst_var *dv;
346
347 *c = case_unshare (*c);
348 for (dv = trns->dst_vars; dv; dv = dv->next)
349 {
350 struct criteria *crit;
351 int counter;
352
353 counter = 0;
354 for (crit = dv->crit; crit; crit = crit->next)
355 if (var_is_numeric (crit->vars[0]))
356 counter += count_numeric (crit, *c);
357 else
358 counter += count_string (crit, *c);
359 case_data_rw (*c, dv->var)->f = counter;
360 }
361 return TRNS_CONTINUE;
362 }
363
364 /* Destroys all dynamic data structures associated with TRNS. */
365 static bool
count_trns_free(void * trns_)366 count_trns_free (void *trns_)
367 {
368 struct count_trns *trns = trns_;
369 pool_destroy (trns->pool);
370 return true;
371 }
372