1 /* PSPPIRE - a graphical user interface for PSPP.
2 Copyright (C) 2008, 2010, 2011, 2014 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include "ui/syntax-gen.h"
20
21 #include <ctype.h>
22 #include <mbchar.h>
23
24 #include "data/data-in.h"
25 #include "data/data-out.h"
26 #include "data/format.h"
27 #include "data/value.h"
28 #include "libpspp/assertion.h"
29 #include "libpspp/cast.h"
30 #include "libpspp/i18n.h"
31 #include "libpspp/message.h"
32 #include "libpspp/str.h"
33 #include "libpspp/misc.h"
34
35 #include "gl/c-ctype.h"
36 #include "gl/ftoastr.h"
37
38 /* Appends to OUTPUT a pair of hex digits for each byte in IN. */
39 static void
syntax_gen_hex_digits(struct string * output,struct substring in)40 syntax_gen_hex_digits (struct string *output, struct substring in)
41 {
42 size_t i;
43 for (i = 0; i < in.length; i++)
44 {
45 unsigned char c = in.string[i];
46 ds_put_byte (output, "0123456789ABCDEF"[c >> 4]);
47 ds_put_byte (output, "0123456789ABCDEF"[c & 0xf]);
48 }
49 }
50
51 /* Returns true if IN contains any control characters, false
52 otherwise */
53 static bool
has_control_chars(struct substring in)54 has_control_chars (struct substring in)
55 {
56 size_t i;
57
58 for (i = 0; i < in.length; i++)
59 if (iscntrl ((unsigned char) in.string[i]))
60 return true;
61 return false;
62 }
63
64 static bool
has_single_quote(struct substring str)65 has_single_quote (struct substring str)
66 {
67 return (SIZE_MAX != ss_find_byte (str, '\''));
68 }
69
70 static bool
has_double_quote(struct substring str)71 has_double_quote (struct substring str)
72 {
73 return (SIZE_MAX != ss_find_byte (str, '"'));
74 }
75
76 /* Appends to OUTPUT valid PSPP syntax for a quoted string that
77 contains IN.
78
79 IN must be encoded in UTF-8, and the quoted result will also
80 be encoded in UTF-8.
81
82 The string will be output as a regular quoted string unless it
83 contains control characters, in which case it is output as a
84 hex string. */
85 void
syntax_gen_string(struct string * output,struct substring in)86 syntax_gen_string (struct string *output, struct substring in)
87 {
88 if (has_control_chars (in))
89 {
90 ds_put_cstr (output, "X'");
91 syntax_gen_hex_digits (output, in);
92 ds_put_byte (output, '\'');
93 }
94 else
95 {
96 int quote;
97 size_t i;
98
99 /* This seemingly simple implementation is possible, because UTF-8
100 guarantees that bytes corresponding to basic characters (such as
101 '\'') cannot appear in a multi-byte character sequence except to
102 represent that basic character.
103 */
104 assert (is_basic ('\''));
105
106 quote = has_double_quote (in) && !has_single_quote (in) ? '\'' : '"';
107 ds_put_byte (output, quote);
108 for (i = 0; i < in.length; i++)
109 {
110 char c = in.string[i];
111 if (c == quote)
112 ds_put_byte (output, quote);
113 ds_put_byte (output, c);
114 }
115 ds_put_byte (output, quote);
116 }
117 }
118
119 /* Appends to OUTPUT a representation of NUMBER in PSPP syntax.
120 The representation is precise, that is, when PSPP parses the
121 representation, its value will be exactly NUMBER. (This might
122 not be the case on a C implementation where double has a
123 different representation.)
124
125 If NUMBER is the system-missing value, it is output as the
126 identifier SYSMIS. This may not be appropriate, because
127 SYSMIS is not consistently parsed throughout PSPP syntax as
128 the system-missing value. But in such circumstances the
129 system-missing value would not be meaningful anyhow, so the
130 caller should refrain from supplying the system-missing value
131 in such cases.
132
133 A value of LOWEST or HIGHEST is not treated specially.
134
135 If FORMAT is null, then the representation will be in numeric
136 form, e.g. 123 or 1.23e10.
137
138 If FORMAT is non-null, then it must point to a numeric format.
139 If the format is one easier for a user to understand when
140 expressed as a string than as a number (for example, a date
141 format), and the string representation precisely represents
142 NUMBER, then the string representation is written to OUTPUT.
143 Otherwise, NUMBER is output as if FORMAT was a null
144 pointer. */
145 void
syntax_gen_number(struct string * output,double number,const struct fmt_spec * format)146 syntax_gen_number (struct string *output,
147 double number, const struct fmt_spec *format)
148 {
149 assert (format == NULL || fmt_is_numeric (format->type));
150 if (format != NULL
151 && (format->type
152 & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)))
153 {
154 union value v_in, v_out;
155 char *s, *error;
156 bool ok;
157
158 v_in.f = number;
159 s = data_out (&v_in, "FIXME", format);
160
161 /* FIXME: UTF8 encoded strings will fail here */
162 error = data_in (ss_cstr (s), C_ENCODING, format->type, &v_out, 0, NULL);
163 ok = error == NULL;
164 free (error);
165
166 if (ok && v_out.f == number)
167 {
168 syntax_gen_string (output, ss_cstr (s));
169 free (s);
170 return;
171 }
172 free (s);
173 }
174
175 if (number == SYSMIS)
176 ds_put_cstr (output, "SYSMIS");
177 else
178 {
179 char s[DBL_BUFSIZE_BOUND];
180
181 c_dtoastr (s, sizeof s, 0, 0, number);
182 ds_put_cstr (output, s);
183 }
184 }
185
186 /* Appends to OUTPUT a representation of VALUE, which has the
187 specified WIDTH. If FORMAT is non-null, it influences the
188 output format. The representation is precise, that is, when
189 PSPP parses the representation, its value will be exactly
190 VALUE. */
191 void
syntax_gen_value(struct string * output,const union value * value,int width,const struct fmt_spec * format)192 syntax_gen_value (struct string *output, const union value *value, int width,
193 const struct fmt_spec *format)
194 {
195 assert (format == NULL || fmt_var_width (format) == width);
196 if (width == 0)
197 syntax_gen_number (output, value->f, format);
198 else
199 syntax_gen_string (output,
200 ss_buffer (CHAR_CAST (const char *, value->s), width));
201 }
202
203 /* Appends <low> THRU <high> to OUTPUT. If LOW is LOWEST, then
204 it is formatted as the identifier LO; if HIGH is HIGHEST, then
205 it is formatted as the identifier HI. Otherwise, LOW and HIGH
206 are formatted as with a call to syntax_gen_num with the specified
207 FORMAT.
208
209 This is the opposite of the function parse_num_range. */
210 void
syntax_gen_num_range(struct string * output,double low,double high,const struct fmt_spec * format)211 syntax_gen_num_range (struct string *output, double low, double high,
212 const struct fmt_spec *format)
213 {
214 if (low == LOWEST)
215 ds_put_cstr (output, "LO");
216 else
217 syntax_gen_number (output, low, format);
218
219 ds_put_cstr (output, " THRU ");
220
221 if (high == HIGHEST)
222 ds_put_cstr (output, "HI");
223 else
224 syntax_gen_number (output, high, format);
225 }
226
227 /* Same as syntax_gen_pspp, below, but takes a va_list. */
228 void
syntax_gen_pspp_valist(struct string * output,const char * format,va_list args)229 syntax_gen_pspp_valist (struct string *output, const char *format,
230 va_list args)
231 {
232 for (;;)
233 {
234 char qualifier[16];
235 int precision = -1;
236 char directive;
237 size_t copy = strcspn (format, "%");
238 ds_put_substring (output, ss_buffer (format, copy));
239 format += copy;
240
241 if (*format == '\0')
242 return;
243 assert (*format == '%');
244 format++;
245 directive = *format++;
246 if (directive == '.')
247 {
248 int x = 0;
249 while (directive = *format++, c_isdigit (directive))
250 {
251 assert (x < 16);
252 qualifier[x++] = directive;
253 }
254 qualifier[x++] = '\0';
255 precision = atoi (qualifier);
256 }
257 switch (directive)
258 {
259 case 's':
260 {
261 const char *s = va_arg (args, char *);
262 switch (*format++)
263 {
264 case 'q':
265 syntax_gen_string (output, ss_cstr (s));
266 break;
267 case 's':
268 ds_put_cstr (output, s);
269 break;
270 default:
271 NOT_REACHED ();
272 }
273 }
274 break;
275
276 case 'd':
277 {
278 int i = va_arg (args, int);
279 ds_put_format (output, "%d", i);
280 }
281 break;
282
283 case 'f':
284 case 'g':
285 {
286 char conv[32];
287 double d = va_arg (args, double);
288 int x = 0;
289 conv[x++] = '%';
290 conv[x] = '\0';
291 if (precision != -1)
292 {
293 strcat (conv, ".");
294 strcat (conv, qualifier);
295 x += strlen (qualifier) + 1;
296 }
297 conv[x++] = directive;
298 conv[x++] = '\0';
299
300 ds_put_c_format (output, conv, d);
301 break;
302 }
303
304 case '%':
305 ds_put_byte (output, '%');
306 break;
307
308 default:
309 NOT_REACHED ();
310 }
311 }
312 }
313
314 /* printf-like function specialized for outputting PSPP syntax.
315 FORMAT is appended to OUTPUT. The following substitutions are
316 supported:
317
318 %sq: The char * argument is formatted as a PSPP string, as
319 if with a call to syntax_gen_string.
320
321 %ss: The char * argument is appended literally.
322
323 %d: Same as printf's %d.
324
325 %f %g: Same as printf.
326
327 %%: Literal %.
328
329 (These substitutions were chosen to allow GCC to check for
330 correct argument types.)
331
332 This function is somewhat experimental. If it proves useful,
333 the allowed substitutions will almost certainly be
334 expanded. */
335 void
syntax_gen_pspp(struct string * output,const char * format,...)336 syntax_gen_pspp (struct string *output, const char *format, ...)
337 {
338 va_list args;
339 va_start (args, format);
340 syntax_gen_pspp_valist (output, format, args);
341 va_end (args);
342 }
343