1 // -*- coding: utf-8 -*-
2 // Copyright (C) 2012-2017, 2019, 2021 Laboratoire de Recherche et
3 // Développement de l'Epita (LRDE).
4 //
5 // This file is part of Spot, a model checking library.
6 //
7 // Spot is free software; you can redistribute it and/or modify it
8 // under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Spot is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 // or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 // License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 
20 #include "common_finput.hh"
21 #include "common_setup.hh"
22 #include "error.h"
23 
24 #include <fstream>
25 #include <cstring>
26 #include <unistd.h>
27 
28 enum {
29   OPT_LBT = 1,
30   OPT_LENIENT,
31 };
32 
33 jobs_t jobs;
34 bool lbt_input = false;
35 static bool lenient = false;
36 
37 static const argp_option options[] =
38   {
39     { nullptr, 0, nullptr, 0, "Input options:", 1 },
40     { "formula", 'f', "STRING", 0, "process the formula STRING", 1 },
41     { "file", 'F', "FILENAME[/COL]", 0,
42       "process each line of FILENAME as a formula; if COL is a "
43       "positive integer, assume a CSV file and read column COL; use "
44       "a negative COL to drop the first line of the CSV file", 1 },
45     { "lbt-input", OPT_LBT, nullptr, 0,
46       "read all formulas using LBT's prefix syntax", 1 },
47     { "lenient", OPT_LENIENT, nullptr, 0,
48       "parenthesized blocks that cannot be parsed as subformulas "
49       "are considered as atomic properties", 1 },
50     { nullptr, 0, nullptr, 0, nullptr, 0 }
51   };
52 
53 const struct argp finput_argp = { options, parse_opt_finput,
54                                   nullptr, nullptr, nullptr,
55                                   nullptr, nullptr };
56 
57 const struct argp finput_argp_headless = { options + 1, parse_opt_finput,
58                                            nullptr, nullptr, nullptr,
59                                            nullptr, nullptr };
60 
61 
62 int
parse_opt_finput(int key,char * arg,struct argp_state *)63 parse_opt_finput(int key, char* arg, struct argp_state*)
64 {
65   // Called from C code, so should not raise any exception.
66   BEGIN_EXCEPTION_PROTECT;
67   // This switch is alphabetically-ordered.
68   switch (key)
69     {
70     case 'f':
71       jobs.emplace_back(arg, false);
72       break;
73     case 'F':
74       jobs.emplace_back(arg, true);
75       break;
76     case OPT_LBT:
77       lbt_input = true;
78       break;
79     case OPT_LENIENT:
80       lenient = true;
81       break;
82     default:
83       return ARGP_ERR_UNKNOWN;
84     }
85   END_EXCEPTION_PROTECT;
86   return 0;
87 }
88 
89 spot::parsed_formula
parse_formula(const std::string & s)90 parse_formula(const std::string& s)
91 {
92   if (lbt_input)
93     return spot::parse_prefix_ltl(s);
94   else
95     return spot::parse_infix_psl
96       (s, spot::default_environment::instance(), false, lenient);
97 }
98 
job_processor()99 job_processor::job_processor()
100   : abort_run(false), real_filename(nullptr),
101     col_to_read(0), prefix(nullptr), suffix(nullptr)
102 {
103 }
104 
~job_processor()105 job_processor::~job_processor()
106 {
107   if (real_filename)
108     free(real_filename);
109   if (prefix)
110     free(prefix);
111   if (suffix)
112     free(suffix);
113 }
114 
115 
116 int
process_string(const std::string & input,const char * filename,int linenum)117 job_processor::process_string(const std::string& input,
118                               const char* filename,
119                               int linenum)
120 {
121   auto pf = parse_formula(input);
122 
123   if (!pf.f || !pf.errors.empty())
124     {
125       if (filename)
126         error_at_line(0, 0, filename, linenum, "parse error:");
127       pf.format_errors(std::cerr);
128       return 2;
129     }
130   return process_formula(pf.f, filename, linenum);
131 }
132 
133 int
process_stream(std::istream & is,const char * filename)134 job_processor::process_stream(std::istream& is,
135                               const char* filename)
136 {
137   int error = 0;
138   int linenum = 1;
139   std::string line;
140 
141   // Discard the first line of a CSV file if requested.
142   if (col_to_read < 0)
143     {
144       std::getline(is, line);
145       col_to_read = -col_to_read;
146       ++linenum;
147     }
148 
149   // Each line of the file and send them to process_string,
150   // optionally extracting a column of a CSV file.
151   while (!abort_run && std::getline(is, line))
152     if (!line.empty())
153       {
154         if (col_to_read == 0)
155           {
156             error |= process_string(line, filename, linenum++);
157           }
158         else // We are reading column COL_TO_READ in a CSV file.
159           {
160             // Some people save CSV files with MSDOS encoding, and
161             // we don't want to include the \r in any %> output.
162             if (*line.rbegin() == '\r')
163               line.pop_back();
164 
165             // If the line we have read contains an odd number
166             // of double-quotes, then it is an incomplete CSV line
167             // that should be completed by the next lines.
168             unsigned dquotes = 0;
169             std::string fullline;
170             unsigned csvlines = 0;
171             do
172               {
173                 ++csvlines;
174                 size_t s = line.size();
175                 for (unsigned i = 0; i < s; ++i)
176                   dquotes += line[i] == '"';
177                 if (fullline.empty())
178                   fullline = line;
179                 else
180                   (fullline += '\n') += line;
181                 if (!(dquotes &= 1))
182                   break;
183               }
184             while (std::getline(is, line));
185             if (dquotes)
186               error_at_line(2, errno, filename, linenum,
187                             "mismatched double-quote, "
188                             "reached EOF while parsing this line");
189 
190             // Now that we have a full CSV line, extract the right
191             // column.
192 
193             const char* str = fullline.c_str();
194             const char* col1_start = str;
195             // Delimiters for the extracted column.
196             const char* coln_start = str;
197             const char* coln_end = nullptr;
198             // The current column.  (1-based)
199             int colnum = 1;
200             // Whether we are parsing a double-quoted string.
201             bool instring = false;
202             // Note that RFC 4180 has strict rules about
203             // double-quotes: ① if a field is double-quoted, the first
204             // and last characters of the field should be
205             // double-quotes; ② if a field contains a double-quote
206             // then it should be double quoted, and the occurrences
207             // of double-quotes should be doubled.  Therefore a CSV file
208             // may no contain a line such as:
209             //    foo,bar"ba""z",12
210             // Tools have different interpretation of such a line.
211             // For instance Python's pandas.read_csv() function will
212             // load the second field verbatim as the string 'bar"ba""z"',
213             // while R's read.csv() function will load it as the
214             // string 'barba"z'.  We use this second interpretation, because
215             // it also makes it possible to parse CSVs fields formatted
216             // with leading spaces (often for cosmetic purpose).  When
217             // extracting the second field of
218             //    foo, "ba""z", 12
219             // we will return ' baz' and the leading space will be ignored
220             // by our LTL formula parser.
221             while (*str)
222               {
223                 switch (*str)
224                   {
225                   case '"':
226                     // Doubled double-quotes are used to escape
227                     // double-quotes.
228                     if (instring && str[1] == '"')
229                       ++str;
230                     else
231                       instring = !instring;
232                     break;
233                   case ',':
234                     if (!instring)
235                       {
236                         if (col_to_read == colnum)
237                           coln_end = str;
238                         ++colnum;
239                         if (col_to_read == colnum)
240                           coln_start = str + 1;
241                       }
242                     break;
243                   }
244                 // Once we have the end delimiter for our target
245                 // column, we have all we need.
246                 if (coln_end)
247                   break;
248                 ++str;
249               }
250             if (!*str)
251               {
252                 if (colnum != col_to_read)
253                   // Skip this line as it has no enough columns.
254                   continue;
255                 else
256                   // The target columns ends at the end of the line.
257                   coln_end = str;
258               }
259 
260             // Skip the line if it has an empty field.
261             if (coln_start == coln_end)
262               continue;
263 
264             // save the contents before and after that columns for the
265             // %< and %> escapes (ignoring the trailing and leading
266             // commas).
267             prefix = (col_to_read != 1) ?
268               strndup(col1_start, coln_start - col1_start - 1) : nullptr;
269             suffix = (*coln_end != 0) ? strdup(coln_end + 1) : nullptr;
270             std::string field(coln_start, coln_end);
271             // Remove double-quotes if any.
272             if (field.find('"') != std::string::npos)
273               {
274                 unsigned dst = 0;
275                 bool instring = false;
276                 for (; coln_start != coln_end; ++coln_start)
277                   if (*coln_start == '"')
278                     // A doubled double-quote instead a double-quoted
279                     // string is an escaped double-quote.
280                     if (instring && coln_start[1] == '"')
281                       field[dst++] = *++coln_start;
282                     else
283                       instring = !instring;
284                   else
285                     field[dst++] = *coln_start;
286                 field.resize(dst);
287               }
288             error |= process_string(field, filename, linenum);
289             linenum += csvlines;
290             if (prefix)
291               {
292                 free(prefix);
293                 prefix = nullptr;
294               }
295             if (suffix)
296               {
297                 free(suffix);
298                 suffix = nullptr;
299               }
300           }
301       }
302   return error;
303 }
304 
305 int
process_file(const char * filename)306 job_processor::process_file(const char* filename)
307 {
308   // Special case for stdin.
309   if (filename[0] == '-' && filename[1] == 0)
310     return process_stream(std::cin, filename);
311 
312   errno = 0;
313   std::ifstream input(filename);
314   if (input)
315     return process_stream(input, filename);
316   int saved_errno = errno;
317 
318   // If we have a filename like "foo/NN" such
319   // that:
320   // ① foo/NN is not a file (already the case),
321   // ② NN is a number,
322   // ③ foo is a file,
323   // then it means we want to open foo as
324   // a CSV file and process column NN.
325 
326   if (const char* slash = strrchr(filename, '/'))
327     {
328       char* end;
329       errno = 0;
330       long int col = strtol(slash + 1, &end, 10);
331       if (errno == 0 && !*end && col != 0)
332         {
333           col_to_read = col;
334           if (real_filename)
335             free(real_filename);
336           real_filename = strndup(filename, slash - filename);
337 
338           // Special case for stdin.
339           if (real_filename[0] == '-' && real_filename[1] == 0)
340             return process_stream(std::cin, real_filename);
341 
342           std::ifstream input(real_filename);
343           if (input)
344             return process_stream(input, real_filename);
345 
346           error(2, errno, "cannot open '%s' nor '%s'",
347                 filename, real_filename);
348         }
349     }
350 
351   error(2, saved_errno, "cannot open '%s'", filename);
352   return -1;
353 }
354 
355 int
run()356 job_processor::run()
357 {
358   int error = 0;
359   for (auto& j: jobs)
360     {
361       if (!j.file_p)
362         error |= process_string(j.str);
363       else
364         error |= process_file(j.str);
365       if (abort_run)
366         break;
367     }
368   return error;
369 }
370 
check_no_formula()371 void check_no_formula()
372 {
373   if (!jobs.empty())
374     return;
375   if (isatty(STDIN_FILENO))
376     error(2, 0, "No formula to translate?  Run '%s --help' for help.\n"
377           "Use '%s -' to force reading formulas from the standard "
378           "input.", program_name, program_name);
379   jobs.emplace_back("-", true);
380 }
381 
check_no_automaton()382 void check_no_automaton()
383 {
384   if (!jobs.empty())
385     return;
386   if (isatty(STDIN_FILENO))
387     error(2, 0, "No automaton to process?  Run '%s --help' for help.\n"
388           "Use '%s -' to force reading automata from the standard "
389           "input.", program_name, program_name);
390   jobs.emplace_back("-", true);
391 }
392