1 // -*- coding: utf-8 -*-
2 // Copyright (C) 2012-2017, 2019, 2021 Laboratoire de Recherche et
3 // Développement de l'Epita (LRDE).
4 //
5 // This file is part of Spot, a model checking library.
6 //
7 // Spot is free software; you can redistribute it and/or modify it
8 // under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Spot is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 // License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 #include "common_finput.hh"
21 #include "common_setup.hh"
22 #include "error.h"
23
24 #include <fstream>
25 #include <cstring>
26 #include <unistd.h>
27
28 enum {
29 OPT_LBT = 1,
30 OPT_LENIENT,
31 };
32
33 jobs_t jobs;
34 bool lbt_input = false;
35 static bool lenient = false;
36
37 static const argp_option options[] =
38 {
39 { nullptr, 0, nullptr, 0, "Input options:", 1 },
40 { "formula", 'f', "STRING", 0, "process the formula STRING", 1 },
41 { "file", 'F', "FILENAME[/COL]", 0,
42 "process each line of FILENAME as a formula; if COL is a "
43 "positive integer, assume a CSV file and read column COL; use "
44 "a negative COL to drop the first line of the CSV file", 1 },
45 { "lbt-input", OPT_LBT, nullptr, 0,
46 "read all formulas using LBT's prefix syntax", 1 },
47 { "lenient", OPT_LENIENT, nullptr, 0,
48 "parenthesized blocks that cannot be parsed as subformulas "
49 "are considered as atomic properties", 1 },
50 { nullptr, 0, nullptr, 0, nullptr, 0 }
51 };
52
53 const struct argp finput_argp = { options, parse_opt_finput,
54 nullptr, nullptr, nullptr,
55 nullptr, nullptr };
56
57 const struct argp finput_argp_headless = { options + 1, parse_opt_finput,
58 nullptr, nullptr, nullptr,
59 nullptr, nullptr };
60
61
62 int
parse_opt_finput(int key,char * arg,struct argp_state *)63 parse_opt_finput(int key, char* arg, struct argp_state*)
64 {
65 // Called from C code, so should not raise any exception.
66 BEGIN_EXCEPTION_PROTECT;
67 // This switch is alphabetically-ordered.
68 switch (key)
69 {
70 case 'f':
71 jobs.emplace_back(arg, false);
72 break;
73 case 'F':
74 jobs.emplace_back(arg, true);
75 break;
76 case OPT_LBT:
77 lbt_input = true;
78 break;
79 case OPT_LENIENT:
80 lenient = true;
81 break;
82 default:
83 return ARGP_ERR_UNKNOWN;
84 }
85 END_EXCEPTION_PROTECT;
86 return 0;
87 }
88
89 spot::parsed_formula
parse_formula(const std::string & s)90 parse_formula(const std::string& s)
91 {
92 if (lbt_input)
93 return spot::parse_prefix_ltl(s);
94 else
95 return spot::parse_infix_psl
96 (s, spot::default_environment::instance(), false, lenient);
97 }
98
job_processor()99 job_processor::job_processor()
100 : abort_run(false), real_filename(nullptr),
101 col_to_read(0), prefix(nullptr), suffix(nullptr)
102 {
103 }
104
~job_processor()105 job_processor::~job_processor()
106 {
107 if (real_filename)
108 free(real_filename);
109 if (prefix)
110 free(prefix);
111 if (suffix)
112 free(suffix);
113 }
114
115
116 int
process_string(const std::string & input,const char * filename,int linenum)117 job_processor::process_string(const std::string& input,
118 const char* filename,
119 int linenum)
120 {
121 auto pf = parse_formula(input);
122
123 if (!pf.f || !pf.errors.empty())
124 {
125 if (filename)
126 error_at_line(0, 0, filename, linenum, "parse error:");
127 pf.format_errors(std::cerr);
128 return 2;
129 }
130 return process_formula(pf.f, filename, linenum);
131 }
132
133 int
process_stream(std::istream & is,const char * filename)134 job_processor::process_stream(std::istream& is,
135 const char* filename)
136 {
137 int error = 0;
138 int linenum = 1;
139 std::string line;
140
141 // Discard the first line of a CSV file if requested.
142 if (col_to_read < 0)
143 {
144 std::getline(is, line);
145 col_to_read = -col_to_read;
146 ++linenum;
147 }
148
149 // Each line of the file and send them to process_string,
150 // optionally extracting a column of a CSV file.
151 while (!abort_run && std::getline(is, line))
152 if (!line.empty())
153 {
154 if (col_to_read == 0)
155 {
156 error |= process_string(line, filename, linenum++);
157 }
158 else // We are reading column COL_TO_READ in a CSV file.
159 {
160 // Some people save CSV files with MSDOS encoding, and
161 // we don't want to include the \r in any %> output.
162 if (*line.rbegin() == '\r')
163 line.pop_back();
164
165 // If the line we have read contains an odd number
166 // of double-quotes, then it is an incomplete CSV line
167 // that should be completed by the next lines.
168 unsigned dquotes = 0;
169 std::string fullline;
170 unsigned csvlines = 0;
171 do
172 {
173 ++csvlines;
174 size_t s = line.size();
175 for (unsigned i = 0; i < s; ++i)
176 dquotes += line[i] == '"';
177 if (fullline.empty())
178 fullline = line;
179 else
180 (fullline += '\n') += line;
181 if (!(dquotes &= 1))
182 break;
183 }
184 while (std::getline(is, line));
185 if (dquotes)
186 error_at_line(2, errno, filename, linenum,
187 "mismatched double-quote, "
188 "reached EOF while parsing this line");
189
190 // Now that we have a full CSV line, extract the right
191 // column.
192
193 const char* str = fullline.c_str();
194 const char* col1_start = str;
195 // Delimiters for the extracted column.
196 const char* coln_start = str;
197 const char* coln_end = nullptr;
198 // The current column. (1-based)
199 int colnum = 1;
200 // Whether we are parsing a double-quoted string.
201 bool instring = false;
202 // Note that RFC 4180 has strict rules about
203 // double-quotes: ① if a field is double-quoted, the first
204 // and last characters of the field should be
205 // double-quotes; ② if a field contains a double-quote
206 // then it should be double quoted, and the occurrences
207 // of double-quotes should be doubled. Therefore a CSV file
208 // may no contain a line such as:
209 // foo,bar"ba""z",12
210 // Tools have different interpretation of such a line.
211 // For instance Python's pandas.read_csv() function will
212 // load the second field verbatim as the string 'bar"ba""z"',
213 // while R's read.csv() function will load it as the
214 // string 'barba"z'. We use this second interpretation, because
215 // it also makes it possible to parse CSVs fields formatted
216 // with leading spaces (often for cosmetic purpose). When
217 // extracting the second field of
218 // foo, "ba""z", 12
219 // we will return ' baz' and the leading space will be ignored
220 // by our LTL formula parser.
221 while (*str)
222 {
223 switch (*str)
224 {
225 case '"':
226 // Doubled double-quotes are used to escape
227 // double-quotes.
228 if (instring && str[1] == '"')
229 ++str;
230 else
231 instring = !instring;
232 break;
233 case ',':
234 if (!instring)
235 {
236 if (col_to_read == colnum)
237 coln_end = str;
238 ++colnum;
239 if (col_to_read == colnum)
240 coln_start = str + 1;
241 }
242 break;
243 }
244 // Once we have the end delimiter for our target
245 // column, we have all we need.
246 if (coln_end)
247 break;
248 ++str;
249 }
250 if (!*str)
251 {
252 if (colnum != col_to_read)
253 // Skip this line as it has no enough columns.
254 continue;
255 else
256 // The target columns ends at the end of the line.
257 coln_end = str;
258 }
259
260 // Skip the line if it has an empty field.
261 if (coln_start == coln_end)
262 continue;
263
264 // save the contents before and after that columns for the
265 // %< and %> escapes (ignoring the trailing and leading
266 // commas).
267 prefix = (col_to_read != 1) ?
268 strndup(col1_start, coln_start - col1_start - 1) : nullptr;
269 suffix = (*coln_end != 0) ? strdup(coln_end + 1) : nullptr;
270 std::string field(coln_start, coln_end);
271 // Remove double-quotes if any.
272 if (field.find('"') != std::string::npos)
273 {
274 unsigned dst = 0;
275 bool instring = false;
276 for (; coln_start != coln_end; ++coln_start)
277 if (*coln_start == '"')
278 // A doubled double-quote instead a double-quoted
279 // string is an escaped double-quote.
280 if (instring && coln_start[1] == '"')
281 field[dst++] = *++coln_start;
282 else
283 instring = !instring;
284 else
285 field[dst++] = *coln_start;
286 field.resize(dst);
287 }
288 error |= process_string(field, filename, linenum);
289 linenum += csvlines;
290 if (prefix)
291 {
292 free(prefix);
293 prefix = nullptr;
294 }
295 if (suffix)
296 {
297 free(suffix);
298 suffix = nullptr;
299 }
300 }
301 }
302 return error;
303 }
304
305 int
process_file(const char * filename)306 job_processor::process_file(const char* filename)
307 {
308 // Special case for stdin.
309 if (filename[0] == '-' && filename[1] == 0)
310 return process_stream(std::cin, filename);
311
312 errno = 0;
313 std::ifstream input(filename);
314 if (input)
315 return process_stream(input, filename);
316 int saved_errno = errno;
317
318 // If we have a filename like "foo/NN" such
319 // that:
320 // ① foo/NN is not a file (already the case),
321 // ② NN is a number,
322 // ③ foo is a file,
323 // then it means we want to open foo as
324 // a CSV file and process column NN.
325
326 if (const char* slash = strrchr(filename, '/'))
327 {
328 char* end;
329 errno = 0;
330 long int col = strtol(slash + 1, &end, 10);
331 if (errno == 0 && !*end && col != 0)
332 {
333 col_to_read = col;
334 if (real_filename)
335 free(real_filename);
336 real_filename = strndup(filename, slash - filename);
337
338 // Special case for stdin.
339 if (real_filename[0] == '-' && real_filename[1] == 0)
340 return process_stream(std::cin, real_filename);
341
342 std::ifstream input(real_filename);
343 if (input)
344 return process_stream(input, real_filename);
345
346 error(2, errno, "cannot open '%s' nor '%s'",
347 filename, real_filename);
348 }
349 }
350
351 error(2, saved_errno, "cannot open '%s'", filename);
352 return -1;
353 }
354
355 int
run()356 job_processor::run()
357 {
358 int error = 0;
359 for (auto& j: jobs)
360 {
361 if (!j.file_p)
362 error |= process_string(j.str);
363 else
364 error |= process_file(j.str);
365 if (abort_run)
366 break;
367 }
368 return error;
369 }
370
check_no_formula()371 void check_no_formula()
372 {
373 if (!jobs.empty())
374 return;
375 if (isatty(STDIN_FILENO))
376 error(2, 0, "No formula to translate? Run '%s --help' for help.\n"
377 "Use '%s -' to force reading formulas from the standard "
378 "input.", program_name, program_name);
379 jobs.emplace_back("-", true);
380 }
381
check_no_automaton()382 void check_no_automaton()
383 {
384 if (!jobs.empty())
385 return;
386 if (isatty(STDIN_FILENO))
387 error(2, 0, "No automaton to process? Run '%s --help' for help.\n"
388 "Use '%s -' to force reading automata from the standard "
389 "input.", program_name, program_name);
390 jobs.emplace_back("-", true);
391 }
392