1 /* PSPPIRE - a graphical user interface for PSPP.
2    Copyright (C) 2008, 2010, 2011, 2014 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16 
17 #include <config.h>
18 
19 #include "ui/syntax-gen.h"
20 
21 #include <ctype.h>
22 #include <mbchar.h>
23 
24 #include "data/data-in.h"
25 #include "data/data-out.h"
26 #include "data/format.h"
27 #include "data/value.h"
28 #include "libpspp/assertion.h"
29 #include "libpspp/cast.h"
30 #include "libpspp/i18n.h"
31 #include "libpspp/message.h"
32 #include "libpspp/str.h"
33 #include "libpspp/misc.h"
34 
35 #include "gl/c-ctype.h"
36 #include "gl/ftoastr.h"
37 
38 /* Appends to OUTPUT a pair of hex digits for each byte in IN. */
39 static void
syntax_gen_hex_digits(struct string * output,struct substring in)40 syntax_gen_hex_digits (struct string *output, struct substring in)
41 {
42   size_t i;
43   for (i = 0; i < in.length; i++)
44     {
45       unsigned char c = in.string[i];
46       ds_put_byte (output, "0123456789ABCDEF"[c >> 4]);
47       ds_put_byte (output, "0123456789ABCDEF"[c & 0xf]);
48     }
49 }
50 
51 /* Returns true if IN contains any control characters, false
52    otherwise */
53 static bool
has_control_chars(struct substring in)54 has_control_chars (struct substring in)
55 {
56   size_t i;
57 
58   for (i = 0; i < in.length; i++)
59     if (iscntrl ((unsigned char) in.string[i]))
60       return true;
61   return false;
62 }
63 
64 static bool
has_single_quote(struct substring str)65 has_single_quote (struct substring str)
66 {
67   return (SIZE_MAX != ss_find_byte (str, '\''));
68 }
69 
70 static bool
has_double_quote(struct substring str)71 has_double_quote (struct substring str)
72 {
73   return (SIZE_MAX != ss_find_byte (str, '"'));
74 }
75 
76 /* Appends to OUTPUT valid PSPP syntax for a quoted string that
77    contains IN.
78 
79    IN must be encoded in UTF-8, and the quoted result will also
80    be encoded in UTF-8.
81 
82    The string will be output as a regular quoted string unless it
83    contains control characters, in which case it is output as a
84    hex string. */
85 void
syntax_gen_string(struct string * output,struct substring in)86 syntax_gen_string (struct string *output, struct substring in)
87 {
88   if (has_control_chars (in))
89     {
90       ds_put_cstr (output, "X'");
91       syntax_gen_hex_digits (output, in);
92       ds_put_byte (output, '\'');
93     }
94   else
95     {
96       int quote;
97       size_t i;
98 
99       /* This seemingly simple implementation is possible, because UTF-8
100          guarantees that bytes corresponding to basic characters (such as
101          '\'') cannot appear in a multi-byte character sequence except to
102          represent that basic character.
103       */
104       assert (is_basic ('\''));
105 
106       quote = has_double_quote (in) && !has_single_quote (in) ? '\'' : '"';
107       ds_put_byte (output, quote);
108       for (i = 0; i < in.length; i++)
109         {
110           char c = in.string[i];
111           if (c == quote)
112             ds_put_byte (output, quote);
113           ds_put_byte (output, c);
114         }
115       ds_put_byte (output, quote);
116     }
117 }
118 
119 /* Appends to OUTPUT a representation of NUMBER in PSPP syntax.
120    The representation is precise, that is, when PSPP parses the
121    representation, its value will be exactly NUMBER.  (This might
122    not be the case on a C implementation where double has a
123    different representation.)
124 
125    If NUMBER is the system-missing value, it is output as the
126    identifier SYSMIS.  This may not be appropriate, because
127    SYSMIS is not consistently parsed throughout PSPP syntax as
128    the system-missing value.  But in such circumstances the
129    system-missing value would not be meaningful anyhow, so the
130    caller should refrain from supplying the system-missing value
131    in such cases.
132 
133    A value of LOWEST or HIGHEST is not treated specially.
134 
135    If FORMAT is null, then the representation will be in numeric
136    form, e.g. 123 or 1.23e10.
137 
138    If FORMAT is non-null, then it must point to a numeric format.
139    If the format is one easier for a user to understand when
140    expressed as a string than as a number (for example, a date
141    format), and the string representation precisely represents
142    NUMBER, then the string representation is written to OUTPUT.
143    Otherwise, NUMBER is output as if FORMAT was a null
144    pointer. */
145 void
syntax_gen_number(struct string * output,double number,const struct fmt_spec * format)146 syntax_gen_number (struct string *output,
147                    double number, const struct fmt_spec *format)
148 {
149   assert (format == NULL || fmt_is_numeric (format->type));
150   if (format != NULL
151       && (format->type
152           & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)))
153     {
154       union value v_in, v_out;
155       char *s, *error;
156       bool ok;
157 
158       v_in.f = number;
159       s = data_out (&v_in, "FIXME",  format);
160 
161       /* FIXME: UTF8 encoded strings will fail here */
162       error = data_in (ss_cstr (s), C_ENCODING, format->type, &v_out, 0, NULL);
163       ok = error == NULL;
164       free (error);
165 
166       if (ok && v_out.f == number)
167         {
168           syntax_gen_string (output, ss_cstr (s));
169 	  free (s);
170           return;
171         }
172       free (s);
173     }
174 
175   if (number == SYSMIS)
176     ds_put_cstr (output, "SYSMIS");
177   else
178     {
179       char s[DBL_BUFSIZE_BOUND];
180 
181       c_dtoastr (s, sizeof s, 0, 0, number);
182       ds_put_cstr (output, s);
183     }
184 }
185 
186 /* Appends to OUTPUT a representation of VALUE, which has the
187    specified WIDTH.  If FORMAT is non-null, it influences the
188    output format.  The representation is precise, that is, when
189    PSPP parses the representation, its value will be exactly
190    VALUE. */
191 void
syntax_gen_value(struct string * output,const union value * value,int width,const struct fmt_spec * format)192 syntax_gen_value (struct string *output, const union value *value, int width,
193                   const struct fmt_spec *format)
194 {
195   assert (format == NULL || fmt_var_width (format) == width);
196   if (width == 0)
197     syntax_gen_number (output, value->f, format);
198   else
199     syntax_gen_string (output,
200                        ss_buffer (CHAR_CAST (const char *, value->s), width));
201 }
202 
203 /* Appends <low> THRU <high> to OUTPUT.  If LOW is LOWEST, then
204    it is formatted as the identifier LO; if HIGH is HIGHEST, then
205    it is formatted as the identifier HI.  Otherwise, LOW and HIGH
206    are formatted as with a call to syntax_gen_num with the specified
207    FORMAT.
208 
209    This is the opposite of the function parse_num_range. */
210 void
syntax_gen_num_range(struct string * output,double low,double high,const struct fmt_spec * format)211 syntax_gen_num_range (struct string *output, double low, double high,
212                       const struct fmt_spec *format)
213 {
214   if (low == LOWEST)
215     ds_put_cstr (output, "LO");
216   else
217     syntax_gen_number (output, low, format);
218 
219   ds_put_cstr (output, " THRU ");
220 
221   if (high == HIGHEST)
222     ds_put_cstr (output, "HI");
223   else
224     syntax_gen_number (output, high, format);
225 }
226 
227 /* Same as syntax_gen_pspp, below, but takes a va_list. */
228 void
syntax_gen_pspp_valist(struct string * output,const char * format,va_list args)229 syntax_gen_pspp_valist (struct string *output, const char *format,
230                         va_list args)
231 {
232   for (;;)
233     {
234       char qualifier[16];
235       int precision = -1;
236       char directive;
237       size_t copy = strcspn (format, "%");
238       ds_put_substring (output, ss_buffer (format, copy));
239       format += copy;
240 
241       if (*format == '\0')
242         return;
243       assert (*format == '%');
244       format++;
245       directive = *format++;
246       if (directive == '.')
247         {
248           int x = 0;
249           while (directive = *format++, c_isdigit (directive))
250             {
251               assert (x < 16);
252               qualifier[x++] = directive;
253             }
254           qualifier[x++] = '\0';
255           precision = atoi (qualifier);
256         }
257       switch (directive)
258         {
259         case 's':
260           {
261             const char *s = va_arg (args, char *);
262             switch (*format++)
263               {
264               case 'q':
265                 syntax_gen_string (output, ss_cstr (s));
266                 break;
267               case 's':
268                 ds_put_cstr (output, s);
269                 break;
270               default:
271                 NOT_REACHED ();
272               }
273           }
274           break;
275 
276         case 'd':
277           {
278             int i = va_arg (args, int);
279             ds_put_format (output, "%d", i);
280           }
281           break;
282 
283         case 'f':
284 	case 'g':
285           {
286 	    char conv[32];
287             double d = va_arg (args, double);
288             int x = 0;
289 	    conv[x++] = '%';
290             conv[x] = '\0';
291             if (precision != -1)
292               {
293                 strcat (conv, ".");
294                 strcat (conv, qualifier);
295                 x += strlen (qualifier) + 1;
296               }
297 	    conv[x++] = directive;
298 	    conv[x++] = '\0';
299 
300             ds_put_c_format (output, conv, d);
301             break;
302           }
303 
304         case '%':
305           ds_put_byte (output, '%');
306           break;
307 
308         default:
309           NOT_REACHED ();
310         }
311     }
312 }
313 
314 /* printf-like function specialized for outputting PSPP syntax.
315    FORMAT is appended to OUTPUT.  The following substitutions are
316    supported:
317 
318      %sq: The char * argument is formatted as a PSPP string, as
319           if with a call to syntax_gen_string.
320 
321      %ss: The char * argument is appended literally.
322 
323      %d: Same as printf's %d.
324 
325      %f %g: Same as printf.
326 
327      %%: Literal %.
328 
329    (These substitutions were chosen to allow GCC to check for
330    correct argument types.)
331 
332    This function is somewhat experimental.  If it proves useful,
333    the allowed substitutions will almost certainly be
334    expanded. */
335 void
syntax_gen_pspp(struct string * output,const char * format,...)336 syntax_gen_pspp (struct string *output, const char *format, ...)
337 {
338   va_list args;
339   va_start (args, format);
340   syntax_gen_pspp_valist (output, format, args);
341   va_end (args);
342 }
343