1/* -*- coding: utf-8 -*-
2** Copyright (C) 2010-2015, 2017-2019, 2021, Laboratoire de Recherche
3** et Développement de l'Epita (LRDE).
4** Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6
5** (LIP6), département Systèmes Répartis Coopératifs (SRC), Université
6** Pierre et Marie Curie.
7**
8** This file is part of Spot, a model checking library.
9**
10** Spot is free software; you can redistribute it and/or modify it
11** under the terms of the GNU General Public License as published by
12** the Free Software Foundation; either version 3 of the License, or
13** (at your option) any later version.
14**
15** Spot is distributed in the hope that it will be useful, but WITHOUT
16** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17** or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
18** License for more details.
19**
20** You should have received a copy of the GNU General Public License
21** along with this program.  If not, see <http://www.gnu.org/licenses/>.
22*/
23%option noyywrap warn 8bit batch
24%option prefix="tlyy"
25%option outfile="lex.yy.c"
26%option stack
27%option never-interactive
28
29%top{
30#include "config.h"
31}
32%{
33#include <cstdlib>
34#include <string>
35
36#include <spot/parsetl/parsedecl.hh>
37#include "spot/priv/trim.hh"
38#include "spot/tl/formula.hh"
39
40#define YY_USER_ACTION \
41  yylloc->columns(yyleng);
42
43static int start_token = 0;
44static int parent_level = 0;
45static bool missing_parent = false;
46static bool lenient_mode = false;
47static int orig_cond = 0;
48static unsigned comment_level = 0;
49
50typedef tlyy::parser::token token;
51
52%}
53
54%s not_prop
55%x in_par
56%x in_bra
57%x sqbracket
58%x lbt
59%x in_COMMENT in_STRING
60
61BOX       "[]"|"□"|"⬜"|"◻"
62DIAMOND   "<>"|"◇"|"⋄"|"♢"
63ARROWL    "->"|"-->"|"→"|"⟶"
64DARROWL   "=>"|"⇒"|"⟹"
65ARROWLR   "<->"|"<-->"|"↔"
66DARROWLR  "<=>"|"⇔"
67CIRCLE    "()"|"○"|"◯"
68CIRCLEX   "Ⓧ"
69NOT       "!"|"~"|"¬"
70BOXARROW  {BOX}{ARROWL}|"|"{ARROWL}|"↦"
71BOXDARROW {BOX}{DARROWL}|"|"{DARROWL}|"⤇"
72eol         \n+|\r+
73eol2        (\n\r)+|(\r\n)+
74
75%%
76
77%{
78  if (start_token)
79    {
80      int t = start_token;
81      start_token = 0;
82      return t;
83    }
84  yylloc->step();
85  std::string s;
86%}
87
88<*>"/""*"+		{
89			  if (YY_START != in_COMMENT)
90			    {
91                              orig_cond = YY_START;
92                              BEGIN(in_COMMENT);
93			      comment_level = 0;
94			    }
95			  ++comment_level;
96			}
97<in_COMMENT>{
98  [^*/\n\r]*		continue;
99  "/"[^*\n\r]*		continue;
100  "*"+[^*/\n\r]*	continue;
101  {eol}			yylloc->lines(yyleng); yylloc->end.column = 1;
102  {eol2}		yylloc->lines(yyleng / 2); yylloc->end.column = 1;
103  "*"+"/"		if (--comment_level == 0) BEGIN(orig_cond);
104  <<EOF>>		{
105                           BEGIN(orig_cond);
106                           error_list.push_back(
107			     spot::one_parse_error(*yylloc,
108			       "unclosed comment"));
109			   return 0;
110                        }
111}
112
113"("				{
114				  if (!lenient_mode)
115                                    {
116				      BEGIN(0);
117				      return token::PAR_OPEN;
118                                    }
119                                  /* Parse any (...) block as a single block,
120				     taking care of nested parentheses.  The
121				     parser will then try to parse this block
122				     recursively.  */
123                                  BEGIN(in_par);
124                                  parent_level = 1;
125				  yylval->str = new std::string();
126                                }
127<in_par>{
128	 "("			{
129				  ++parent_level;
130				  yylval->str->append(yytext, yyleng);
131				}
132	 ")"			{
133				  if (--parent_level)
134				    {
135                                      yylval->str->append(yytext, yyleng);
136				    }
137				  else
138				    {
139                                      BEGIN(not_prop);
140				      spot::trim(*yylval->str);
141				      return token::PAR_BLOCK;
142				    }
143				}
144         [^()]+			yylval->str->append(yytext, yyleng);
145	 <<EOF>>		{
146				  unput(')');
147				  if (!missing_parent)
148                                    error_list.push_back(
149				      spot::one_parse_error(*yylloc,
150 					"missing closing parenthesis"));
151				  missing_parent = true;
152				}
153}
154
155"{"				{
156				  if (!lenient_mode)
157				  {
158				     BEGIN(0);
159			             return token::BRACE_OPEN;
160                                  }
161                                  /* Parse any {...} block as a single block,
162				     taking care of nested parentheses.  The
163				     parser will then try to parse this block
164				     recursively.  */
165                                  BEGIN(in_bra);
166                                  parent_level = 1;
167				  yylval->str = new std::string();
168                                }
169<in_bra>{
170	 "{"			{
171				  ++parent_level;
172				  yylval->str->append(yytext, yyleng);
173				}
174         "}"[ \t]*"!"           {
175				  if (--parent_level)
176				    {
177                                      yylval->str->append(yytext, yyleng);
178				    }
179				  else
180				    {
181                                      BEGIN(not_prop);
182				      spot::trim(*yylval->str);
183				      return token::BRA_BANG_BLOCK;
184				    }
185                                }
186	 "}"			{
187				  if (--parent_level)
188				    {
189                                      yylval->str->append(yytext, yyleng);
190				    }
191				  else
192				    {
193                                      BEGIN(not_prop);
194				      spot::trim(*yylval->str);
195				      return token::BRA_BLOCK;
196				    }
197				}
198         [^{}]+			yylval->str->append(yytext, yyleng);
199	 <<EOF>>		{
200				  unput('}');
201				  if (!missing_parent)
202                                    error_list.push_back(
203				      spot::one_parse_error(*yylloc,
204 					"missing closing brace"));
205				  missing_parent = true;
206				}
207}
208
209")"				BEGIN(not_prop); return token::PAR_CLOSE;
210"}"[ \t]*"!"			BEGIN(not_prop); return token::BRACE_BANG_CLOSE;
211"}"				BEGIN(not_prop); return token::BRACE_CLOSE;
212
213  /* Must go before the other operators, because the F of FALSE
214     should not be mistaken with a unary F. */
215"1"|[tT][rR][uU][eE]		BEGIN(0); return token::CONST_TRUE;
216"0"|[fF][aA][lL][sS][eE]	BEGIN(0); return token::CONST_FALSE;
217
218
219  /* ~ comes from Goal, ! from everybody else */
220{NOT}				BEGIN(0); return token::OP_NOT;
221
222"first_match"                   BEGIN(0); return token::OP_FIRST_MATCH;
223
224  /* SVA operators */
225"##"[0-9]{1,2}                  {
226                                  yylval->num = strtoul(yytext + 2, 0, 10);
227                                  return token::OP_DELAY_N;
228                                }
229"##"[0-9]{3,}                   {
230				  errno = 0;
231				  unsigned long n = strtoul(yytext + 2, 0, 10);
232                                  yylval->num = n;
233				  if (errno || yylval->num != n)
234				    {
235                                      error_list.push_back(
236				        spot::one_parse_error(*yylloc,
237					  "value too large ignored"));
238                                      yylval->num = 1;
239                                    }
240                                  if (yylval->num >= spot::fnode::unbounded())
241                                    {
242                                      auto max = spot::fnode::unbounded() - 1;
243                                      std::ostringstream s;
244                                      s << yylval->num
245                                        << (" exceeds maximum supported "
246                                            "repetition (")
247                                        << max << ")";
248                                      error_list.emplace_back(*yylloc, s.str());
249                                      yylval->num = max;
250                                    }
251                                  return token::OP_DELAY_N;
252                                }
253"##[+]"                         BEGIN(0); return token::OP_DELAY_PLUS;
254"##[*]"                         BEGIN(0); return token::OP_DELAY_STAR;
255"##["                           BEGIN(sqbracket); return token::OP_DELAY_OPEN;
256
257  /* PSL operators */
258{BOXARROW}			BEGIN(0); return token::OP_UCONCAT;
259{DIAMOND}{ARROWL}		BEGIN(0); return token::OP_ECONCAT;
260{BOXDARROW}			BEGIN(0); return token::OP_UCONCAT_NONO;
261{DIAMOND}{DARROWL}		BEGIN(0); return token::OP_ECONCAT_NONO;
262";"				BEGIN(0); return token::OP_CONCAT;
263":"				BEGIN(0); return token::OP_FUSION;
264"*"				BEGIN(0); return token::OP_STAR;
265"[*]"				BEGIN(0); return token::OP_BSTAR;
266"[+]"				BEGIN(0); return token::OP_PLUS;
267"[*"				BEGIN(sqbracket); return token::OP_STAR_OPEN;
268"[:*]"				BEGIN(0); return token::OP_BFSTAR;
269"[:+]"				BEGIN(0); return token::OP_FPLUS;
270"[:*"				BEGIN(sqbracket); return token::OP_FSTAR_OPEN;
271"[="				BEGIN(sqbracket); return token::OP_EQUAL_OPEN;
272"["{ARROWL}			BEGIN(sqbracket); return token::OP_GOTO_OPEN;
273<sqbracket>"]"			BEGIN(0); return token::OP_SQBKT_CLOSE;
274<sqbracket>"!]"                 BEGIN(0); return token::OP_SQBKT_STRONG_CLOSE;
275<sqbracket>[0-9]+		{
276				  errno = 0;
277				  unsigned long n = strtoul(yytext, 0, 10);
278                                  yylval->num = n;
279				  if (errno || yylval->num != n)
280				    {
281                                      error_list.push_back(
282				        spot::one_parse_error(*yylloc,
283					  "value too large ignored"));
284				      // Skip this number and read next token
285                                      yylloc->step();
286                                    }
287				  else
288                                    {
289                                      return token::OP_SQBKT_NUM;
290                                    }
291				}
292  /* .. is from PSL and EDL
293   : is from Verilog and PSL
294   to is from VHDL
295   , is from Perl */
296<sqbracket>","|".."|":"|"to"	return token::OP_SQBKT_SEP;
297
298  /* In SVA you use [=1:$] instead of [=1..].  We will also accept
299     [=1..$] and [=1:].  The PSL LRM shows examples like [=1:inf]
300     instead, so will accept this too.  */
301<sqbracket>"$"|"inf"		return token::OP_UNBOUNDED;
302
303  /* & and | come from Spin.  && and || from LTL2BA.
304     /\, \/, and xor are from LBTT.
305     --> and <--> come from Goal.
306     +,*,^ are from Wring. */
307"||"|"|"|"+"|"\\/"|"∨"|"∪"	BEGIN(0); return token::OP_OR;
308"&&"|"/\\"|"∧"|"∩"		BEGIN(0); return token::OP_AND;
309"&"				BEGIN(0); return token::OP_SHORT_AND;
310"^"|"xor"|"⊕"			BEGIN(0); return token::OP_XOR;
311{ARROWL}|{DARROWL}		BEGIN(0); return token::OP_IMPLIES;
312{ARROWLR}|{DARROWLR}		BEGIN(0); return token::OP_EQUIV;
313
314 /* TSLF-like syntactic sugar:
315    X[3]f = XXXf
316    F[2..4]f = XX(f | X(f | Xf))
317    G[2..4]f = XX(f & X(f & Xf))
318    We also have to deal with the Spin-like notation for G.
319    X[]f = XGf
320 */
321"X"[ \t\n\r]*"[]"               yyless(1); return token::OP_X;
322"F"[ \t\n\r]*"[]"               yyless(1); return token::OP_F;
323"G"[ \t\n\r]*"[]"               yyless(1); return token::OP_G;
324"X"[ \t\n\r]*"["                BEGIN(sqbracket); return token::OP_XREP;
325"F"[ \t\n\r]*"["                BEGIN(sqbracket); return token::OP_FREP;
326"G"[ \t\n\r]*"["                BEGIN(sqbracket); return token::OP_GREP;
327  /* <> (DIAMOND) and [] (BOX), are used in Spin.
328     () (CIRCLE) is not, but would make sense.  */
329"F"|{DIAMOND}			BEGIN(0); return token::OP_F;
330"G"|{BOX}			BEGIN(0); return token::OP_G;
331"U"				BEGIN(0); return token::OP_U;
332"R"|"V"				BEGIN(0); return token::OP_R;
333"X"|{CIRCLE}			BEGIN(0); return token::OP_X;
334{CIRCLEX}                       BEGIN(0); return token::OP_STRONG_X;
335"W"				BEGIN(0); return token::OP_W;
336"M"				BEGIN(0); return token::OP_M;
337
338  /* The combining overline or macron (overbar) should normally
339     occur only after a single letter, but we do not check that. */
340"=0"|"̅"|"̄"			return token::OP_POST_NEG;
341"=1"				return token::OP_POST_POS;
342
343<*>[ \t\n\r]+			/* discard whitespace */ yylloc->step ();
344
345  /* An Atomic proposition cannot start with the letter
346     used by a unary operator (F,G,X), unless this
347     letter is followed by a digit in which case we assume
348     it's an ATOMIC_PROP (even though F0 could be seen as Ffalse, we
349     don't, because Ffalse is never used in practice).
350  */
351<INITIAL>[a-zA-EH-WYZ_.][a-zA-Z0-9_.]* |
352<INITIAL>[FGX][0-9][a-zA-Z0-9_.]* |
353  /*
354     However if we have just parsed an atomic proposition, then we are
355     not expecting another atomic proposition, so we can be stricter
356     and disallow propositions that start with M, U, R, V, and W.  If
357     you wonder why we do this, consider the Wring formula `p=0Uq=1'.
358     When p is parsed, we enter the not_prop start condition, we
359     remain into this condition when `=0' is processed, and then
360     because we are in this condition we will not consider `Uq' as an
361     atomic proposition but as a `U' operator followed by a `q' atomic
362     proposition.
363
364     We also disable atomic proposition that may look like a combination
365     of a binary operator followed by several unary operators.
366     E.g. UFXp.   This way, `p=0UFXp=1' will be parsed as `(p=0)U(F(X(p=1)))'.
367  */
368<not_prop>[a-zA-EH-LN-QSTYZ_.][a-zA-EH-WYZ0-9_.]* |
369<not_prop>[a-zA-EH-LN-QSTYZ_.][a-zA-EH-WYZ0-9_.][a-zA-Z0-9_.]* {
370			  yylval->str = new std::string(yytext, yyleng);
371			  BEGIN(not_prop);
372			  return token::ATOMIC_PROP;
373			}
374
375  /* Atomic propositions can also be enclosed in double quotes.
376
377     orig_cond stores the parsing condition to use after the
378     string has been parsed. */
379"\""                    {
380                          orig_cond = not_prop;
381			  BEGIN(in_STRING);
382			}
383<lbt>"\""		{
384			  orig_cond = lbt;
385			  BEGIN(in_STRING);
386			}
387
388<in_STRING>{
389  \"	                {
390                           BEGIN(orig_cond);
391			   yylval->str = new std::string(s);
392			   return token::ATOMIC_PROP;
393 			}
394  {eol}			{
395  			  s.append(yytext, yyleng);
396                          yylloc->lines(yyleng); yylloc->end.column = 1;
397			}
398  {eol2}		{
399  			  s.append(yytext, yyleng);
400                          yylloc->lines(yyleng / 2); yylloc->end.column = 1;
401			}
402  \\.			s += yytext[1];
403  [^\\\"\n\r]+		s.append(yytext, yyleng);
404  <<EOF>>		{
405                           error_list.push_back(
406			     spot::one_parse_error(*yylloc,
407			       "unclosed string"));
408                           BEGIN(orig_cond);
409			   yylval->str = new std::string(s);
410			   return token::ATOMIC_PROP;
411                        }
412}
413
414  /* these are operators */
415<lbt>[eitfXFGUVRWM]	return *yytext;
416  /* in LBT's format, atomic proposition look like p0 or p3141592, but
417     for compatibility with ltl2dstar we also accept any alphanumeric
418     string that is not an operator. */
419<lbt>[a-zA-Z._][a-zA-Z0-9._]*            {
420			  yylval->str = new std::string(yytext, yyleng);
421			  return token::ATOMIC_PROP;
422			}
423
424
425<*>.			return *yytext;
426
427<<EOF>>			return token::END_OF_INPUT;
428
429%%
430
431void
432flex_set_buffer(const std::string& buf,
433		int start_tok, bool lenient)
434{
435  yypush_buffer_state(YY_CURRENT_BUFFER);
436  (void) yy_scan_bytes(buf.c_str(), buf.size());
437  start_token = start_tok;
438  if (start_tok == token::START_LBT)
439    yy_push_state(lbt);
440  else
441    yy_push_state(0);
442  lenient_mode = lenient;
443}
444
445void
446flex_unset_buffer()
447{
448  (void)&yy_top_state; // shut up a g++ warning.
449  yy_pop_state();
450  yypop_buffer_state();
451  missing_parent = false;
452}
453