1 /* PSPP - a program for statistical analysis.
2    Copyright (C) 1997-9, 2000, 2006, 2007, 2008, 2010, 2011,
3    2019, 2020 Free Software Foundation, Inc.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
17 
18 #include <config.h>
19 
20 #include "language/data-io/trim.h"
21 
22 #include <stdlib.h>
23 
24 #include "data/dictionary.h"
25 #include "data/variable.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/variable-parser.h"
28 #include "libpspp/message.h"
29 #include "libpspp/misc.h"
30 
31 #include "gl/xalloc.h"
32 
33 #include "gettext.h"
34 #define _(msgid) gettext (msgid)
35 
36 /* Commands that read and write system files share a great deal
37    of common syntactic structure for rearranging and dropping
38    variables.  This function parses this syntax and modifies DICT
39    appropriately.  If RELAX is true, then the modified dictionary
40    need not conform to the usual variable name rules.  Returns
41    true on success, false on failure. */
42 bool
parse_dict_trim(struct lexer * lexer,struct dictionary * dict,bool relax)43 parse_dict_trim (struct lexer *lexer, struct dictionary *dict, bool relax)
44 {
45   if (lex_match_id (lexer, "MAP"))
46     {
47       /* FIXME. */
48       return true;
49     }
50   else if (lex_match_id (lexer, "DROP"))
51     return parse_dict_drop (lexer, dict);
52   else if (lex_match_id (lexer, "KEEP"))
53     return parse_dict_keep (lexer, dict);
54   else if (lex_match_id (lexer, "RENAME"))
55     return parse_dict_rename (lexer, dict, relax);
56   else
57     {
58       lex_error (lexer, _("expecting a valid subcommand"));
59       return false;
60     }
61 }
62 
63 /* Check that OLD_NAME can be renamed to NEW_NAME in DICT.  */
64 static bool
check_rename(const struct dictionary * dict,const char * old_name,const char * new_name)65 check_rename (const struct dictionary *dict, const char *old_name, const char *new_name)
66 {
67   if (dict_lookup_var (dict, new_name) != NULL)
68     {
69       msg (SE, _("Cannot rename %s as %s because there already exists "
70                  "a variable named %s.  To rename variables with "
71                  "overlapping names, use a single RENAME subcommand "
72                  "such as `/RENAME (A=B)(B=C)(C=A)', or equivalently, "
73                  "`/RENAME (A B C=B C A)'."),
74            old_name, new_name, new_name);
75       return false;
76     }
77   return true;
78 }
79 
80 /* Parse a  "VarX TO VarY" sequence where X and Y are integers
81    such that X >= Y.
82    If successfull, returns a string to the prefix Var and sets FIRST
83    to X and LAST to Y.  Returns NULL on failure.
84    The caller must free the return value.  */
85 static char *
try_to_sequence(struct lexer * lexer,const struct dictionary * dict,int * first,int * last)86 try_to_sequence (struct lexer *lexer, const struct dictionary *dict,
87                  int *first, int *last)
88 {
89   /* Check that the next 3 tokens are of the correct type.  */
90   if (lex_token (lexer) != T_ID
91       || lex_next_token (lexer, 1) != T_TO
92       || lex_next_token (lexer, 2) != T_ID)
93     return NULL;
94 
95   /* Check that the first and last tokens are suitable as
96      variable names.  */
97   const char *s0 = lex_tokcstr (lexer);
98   if (!id_is_valid (s0, dict_get_encoding (dict), true))
99     return NULL;
100 
101   const char *s1 = lex_next_tokcstr (lexer, 2);
102   if (!id_is_valid (s1, dict_get_encoding (dict), true))
103     return NULL;
104 
105   int x0 = strcspn (s0, "0123456789");
106   int x1 = strcspn (s1, "0123456789");
107 
108   /* The non-digit parts of s0 and s1 must be the same length.  */
109   if (x0 != x1)
110     return NULL;
111 
112   /* Both s0 and s1 must have some digits.  */
113   if (strlen (s0) <= x0)
114     return NULL;
115 
116   if (strlen (s1) <= x1)
117     return NULL;
118 
119   /* The non-digit parts of s0 and s1 must be identical.  */
120   if (0 != strncmp (s0, s1, x0))
121     return NULL;
122 
123   /* Both names must end with digits.  */
124   int len_s0_pfx = strspn (s0 + x0, "0123456789");
125   if (len_s0_pfx + x0 != strlen (s0))
126     return NULL;
127 
128   int len_s1_pfx = strspn (s1 + x1, "0123456789");
129   if (len_s1_pfx + x1 != strlen (s1))
130     return NULL;
131 
132   const char *n_start = s0 + x0;
133   const char *n_stop = s1 + x1;
134 
135   /* The first may not be greater than the last.  */
136   if (atoi (n_start) > atoi (n_stop))
137     return NULL;
138 
139   char *prefix = xstrndup (s0, x1);
140 
141   *first = atoi (n_start);
142   *last = atoi (n_stop);
143 
144   return prefix;
145 }
146 
147 
148 /* Parses and performs the RENAME subcommand of GET, SAVE, and
149    related commands.  If RELAX is true, then the new variable
150    names need  not conform to the normal dictionary rules.
151 */
152 bool
parse_dict_rename(struct lexer * lexer,struct dictionary * dict,bool relax)153 parse_dict_rename (struct lexer *lexer, struct dictionary *dict,
154 		   bool relax)
155 {
156   struct variable **oldvars = NULL;
157   size_t n_newvars = 0;
158   int group = 0;
159   char **newnames = NULL;
160   lex_match (lexer, T_EQUALS);
161 
162   while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
163     {
164       size_t n_oldvars = 0;
165       oldvars = NULL;
166       n_newvars = 0;
167       n_oldvars = 0;
168       oldvars = NULL;
169 
170       bool paren = lex_match (lexer, T_LPAREN);
171       group++;
172       if (!parse_variables (lexer, dict, &oldvars, &n_oldvars, PV_NO_DUPLICATE))
173 	goto fail;
174 
175       if (!lex_force_match (lexer, T_EQUALS))
176 	goto fail;
177 
178       newnames = xmalloc (sizeof *newnames * n_oldvars);
179 
180       char *prefix = NULL;
181       int first, last;
182       /* First attempt to parse v1 TO v10 format.  */
183       if ((prefix = try_to_sequence (lexer, dict, &first, &last)))
184         {
185           /* These 3 tokens have already been checked in the
186              try_to_sequence function.  */
187           lex_get (lexer);
188           lex_get (lexer);
189           lex_get (lexer);
190 
191           /* Make sure the new names are suitable.  */
192           for (int i = first; i <= last; ++i)
193             {
194               int sz = strlen (prefix) + intlog10 (last) + 1;
195               char *vn = malloc (sz);
196               snprintf (vn, sz, "%s%d", prefix, i);
197 
198               if (!check_rename (dict, var_get_name (oldvars[n_newvars]), vn))
199                 {
200                   free (prefix);
201                   goto fail;
202                 }
203 
204               newnames[i - first] = vn;
205               n_newvars++;
206             }
207         }
208       else
209       while (lex_token (lexer) == T_ID || lex_token (lexer) == T_STRING)
210         {
211           if (n_newvars >= n_oldvars)
212             break;
213           const char *new_name = lex_tokcstr (lexer);
214           if (!relax && ! id_is_plausible (new_name, true))
215             goto fail;
216 
217           if (!check_rename (dict, var_get_name (oldvars[n_newvars]), new_name))
218             goto fail;
219           newnames[n_newvars] = strdup (new_name);
220           lex_get (lexer);
221           n_newvars++;
222         }
223       free (prefix);
224 
225       if (n_newvars != n_oldvars)
226 	{
227 	  msg (SE, _("Number of variables on left side of `=' (%zu) does not "
228                      "match number of variables on right side (%zu), in "
229                      "parenthesized group %d of RENAME subcommand."),
230 	       n_oldvars, n_newvars, group);
231 	  goto fail;
232 	}
233 
234       if (paren)
235 	if (!lex_force_match (lexer, T_RPAREN))
236 	  goto fail;
237 
238       char *errname = 0;
239       if (!dict_rename_vars (dict, oldvars, newnames, n_newvars, &errname))
240 	{
241 	  msg (SE,
242 	       _("Requested renaming duplicates variable name %s."),
243 	       errname);
244 	  goto fail;
245 	}
246       free (oldvars);
247       for (int i = 0; i < n_newvars; ++i)
248 	free (newnames[i]);
249       free (newnames);
250       newnames = NULL;
251     }
252 
253   return true;
254 
255  fail:
256   free (oldvars);
257   for (int i = 0; i < n_newvars; ++i)
258     free (newnames[i]);
259   free (newnames);
260   newnames = NULL;
261   return false;
262 }
263 
264 /* Parses and performs the DROP subcommand of GET, SAVE, and
265    related commands.
266    Returns true if successful, false on failure.*/
267 bool
parse_dict_drop(struct lexer * lexer,struct dictionary * dict)268 parse_dict_drop (struct lexer *lexer, struct dictionary *dict)
269 {
270   struct variable **v;
271   size_t nv;
272 
273   lex_match (lexer, T_EQUALS);
274   if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
275     return false;
276   dict_delete_vars (dict, v, nv);
277   free (v);
278 
279   if (dict_get_var_cnt (dict) == 0)
280     {
281       msg (SE, _("Cannot DROP all variables from dictionary."));
282       return false;
283     }
284   return true;
285 }
286 
287 /* Parses and performs the KEEP subcommand of GET, SAVE, and
288    related commands.
289    Returns true if successful, false on failure.*/
290 bool
parse_dict_keep(struct lexer * lexer,struct dictionary * dict)291 parse_dict_keep (struct lexer *lexer, struct dictionary *dict)
292 {
293   struct variable **v;
294   size_t nv;
295   size_t i;
296 
297   lex_match (lexer, T_EQUALS);
298   if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
299     return false;
300 
301   /* Move the specified variables to the beginning. */
302   dict_reorder_vars (dict, v, nv);
303 
304   /* Delete the remaining variables. */
305   if (dict_get_var_cnt (dict) == nv)
306     {
307       free (v);
308       return true;
309     }
310 
311   v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
312   for (i = nv; i < dict_get_var_cnt (dict); i++)
313     v[i - nv] = dict_get_var (dict, i);
314   dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
315   free (v);
316 
317   return true;
318 }
319