1/* -*- coding: utf-8 -*- 2** Copyright (C) 2010-2015, 2017-2019, 2021, Laboratoire de Recherche 3** et Développement de l'Epita (LRDE). 4** Copyright (C) 2003, 2004 Laboratoire d'Informatique de Paris 6 5** (LIP6), département Systèmes Répartis Coopératifs (SRC), Université 6** Pierre et Marie Curie. 7** 8** This file is part of Spot, a model checking library. 9** 10** Spot is free software; you can redistribute it and/or modify it 11** under the terms of the GNU General Public License as published by 12** the Free Software Foundation; either version 3 of the License, or 13** (at your option) any later version. 14** 15** Spot is distributed in the hope that it will be useful, but WITHOUT 16** ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 17** or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 18** License for more details. 19** 20** You should have received a copy of the GNU General Public License 21** along with this program. If not, see <http://www.gnu.org/licenses/>. 22*/ 23%option noyywrap warn 8bit batch 24%option prefix="tlyy" 25%option outfile="lex.yy.c" 26%option stack 27%option never-interactive 28 29%top{ 30#include "config.h" 31} 32%{ 33#include <cstdlib> 34#include <string> 35 36#include <spot/parsetl/parsedecl.hh> 37#include "spot/priv/trim.hh" 38#include "spot/tl/formula.hh" 39 40#define YY_USER_ACTION \ 41 yylloc->columns(yyleng); 42 43static int start_token = 0; 44static int parent_level = 0; 45static bool missing_parent = false; 46static bool lenient_mode = false; 47static int orig_cond = 0; 48static unsigned comment_level = 0; 49 50typedef tlyy::parser::token token; 51 52%} 53 54%s not_prop 55%x in_par 56%x in_bra 57%x sqbracket 58%x lbt 59%x in_COMMENT in_STRING 60 61BOX "[]"|"□"|"⬜"|"◻" 62DIAMOND "<>"|"◇"|"⋄"|"♢" 63ARROWL "->"|"-->"|"→"|"⟶" 64DARROWL "=>"|"⇒"|"⟹" 65ARROWLR "<->"|"<-->"|"↔" 66DARROWLR "<=>"|"⇔" 67CIRCLE "()"|"○"|"◯" 68CIRCLEX "Ⓧ" 69NOT "!"|"~"|"¬" 70BOXARROW {BOX}{ARROWL}|"|"{ARROWL}|"↦" 71BOXDARROW {BOX}{DARROWL}|"|"{DARROWL}|"⤇" 72eol \n+|\r+ 73eol2 (\n\r)+|(\r\n)+ 74 75%% 76 77%{ 78 if (start_token) 79 { 80 int t = start_token; 81 start_token = 0; 82 return t; 83 } 84 yylloc->step(); 85 std::string s; 86%} 87 88<*>"/""*"+ { 89 if (YY_START != in_COMMENT) 90 { 91 orig_cond = YY_START; 92 BEGIN(in_COMMENT); 93 comment_level = 0; 94 } 95 ++comment_level; 96 } 97<in_COMMENT>{ 98 [^*/\n\r]* continue; 99 "/"[^*\n\r]* continue; 100 "*"+[^*/\n\r]* continue; 101 {eol} yylloc->lines(yyleng); yylloc->end.column = 1; 102 {eol2} yylloc->lines(yyleng / 2); yylloc->end.column = 1; 103 "*"+"/" if (--comment_level == 0) BEGIN(orig_cond); 104 <<EOF>> { 105 BEGIN(orig_cond); 106 error_list.push_back( 107 spot::one_parse_error(*yylloc, 108 "unclosed comment")); 109 return 0; 110 } 111} 112 113"(" { 114 if (!lenient_mode) 115 { 116 BEGIN(0); 117 return token::PAR_OPEN; 118 } 119 /* Parse any (...) block as a single block, 120 taking care of nested parentheses. The 121 parser will then try to parse this block 122 recursively. */ 123 BEGIN(in_par); 124 parent_level = 1; 125 yylval->str = new std::string(); 126 } 127<in_par>{ 128 "(" { 129 ++parent_level; 130 yylval->str->append(yytext, yyleng); 131 } 132 ")" { 133 if (--parent_level) 134 { 135 yylval->str->append(yytext, yyleng); 136 } 137 else 138 { 139 BEGIN(not_prop); 140 spot::trim(*yylval->str); 141 return token::PAR_BLOCK; 142 } 143 } 144 [^()]+ yylval->str->append(yytext, yyleng); 145 <<EOF>> { 146 unput(')'); 147 if (!missing_parent) 148 error_list.push_back( 149 spot::one_parse_error(*yylloc, 150 "missing closing parenthesis")); 151 missing_parent = true; 152 } 153} 154 155"{" { 156 if (!lenient_mode) 157 { 158 BEGIN(0); 159 return token::BRACE_OPEN; 160 } 161 /* Parse any {...} block as a single block, 162 taking care of nested parentheses. The 163 parser will then try to parse this block 164 recursively. */ 165 BEGIN(in_bra); 166 parent_level = 1; 167 yylval->str = new std::string(); 168 } 169<in_bra>{ 170 "{" { 171 ++parent_level; 172 yylval->str->append(yytext, yyleng); 173 } 174 "}"[ \t]*"!" { 175 if (--parent_level) 176 { 177 yylval->str->append(yytext, yyleng); 178 } 179 else 180 { 181 BEGIN(not_prop); 182 spot::trim(*yylval->str); 183 return token::BRA_BANG_BLOCK; 184 } 185 } 186 "}" { 187 if (--parent_level) 188 { 189 yylval->str->append(yytext, yyleng); 190 } 191 else 192 { 193 BEGIN(not_prop); 194 spot::trim(*yylval->str); 195 return token::BRA_BLOCK; 196 } 197 } 198 [^{}]+ yylval->str->append(yytext, yyleng); 199 <<EOF>> { 200 unput('}'); 201 if (!missing_parent) 202 error_list.push_back( 203 spot::one_parse_error(*yylloc, 204 "missing closing brace")); 205 missing_parent = true; 206 } 207} 208 209")" BEGIN(not_prop); return token::PAR_CLOSE; 210"}"[ \t]*"!" BEGIN(not_prop); return token::BRACE_BANG_CLOSE; 211"}" BEGIN(not_prop); return token::BRACE_CLOSE; 212 213 /* Must go before the other operators, because the F of FALSE 214 should not be mistaken with a unary F. */ 215"1"|[tT][rR][uU][eE] BEGIN(0); return token::CONST_TRUE; 216"0"|[fF][aA][lL][sS][eE] BEGIN(0); return token::CONST_FALSE; 217 218 219 /* ~ comes from Goal, ! from everybody else */ 220{NOT} BEGIN(0); return token::OP_NOT; 221 222"first_match" BEGIN(0); return token::OP_FIRST_MATCH; 223 224 /* SVA operators */ 225"##"[0-9]{1,2} { 226 yylval->num = strtoul(yytext + 2, 0, 10); 227 return token::OP_DELAY_N; 228 } 229"##"[0-9]{3,} { 230 errno = 0; 231 unsigned long n = strtoul(yytext + 2, 0, 10); 232 yylval->num = n; 233 if (errno || yylval->num != n) 234 { 235 error_list.push_back( 236 spot::one_parse_error(*yylloc, 237 "value too large ignored")); 238 yylval->num = 1; 239 } 240 if (yylval->num >= spot::fnode::unbounded()) 241 { 242 auto max = spot::fnode::unbounded() - 1; 243 std::ostringstream s; 244 s << yylval->num 245 << (" exceeds maximum supported " 246 "repetition (") 247 << max << ")"; 248 error_list.emplace_back(*yylloc, s.str()); 249 yylval->num = max; 250 } 251 return token::OP_DELAY_N; 252 } 253"##[+]" BEGIN(0); return token::OP_DELAY_PLUS; 254"##[*]" BEGIN(0); return token::OP_DELAY_STAR; 255"##[" BEGIN(sqbracket); return token::OP_DELAY_OPEN; 256 257 /* PSL operators */ 258{BOXARROW} BEGIN(0); return token::OP_UCONCAT; 259{DIAMOND}{ARROWL} BEGIN(0); return token::OP_ECONCAT; 260{BOXDARROW} BEGIN(0); return token::OP_UCONCAT_NONO; 261{DIAMOND}{DARROWL} BEGIN(0); return token::OP_ECONCAT_NONO; 262";" BEGIN(0); return token::OP_CONCAT; 263":" BEGIN(0); return token::OP_FUSION; 264"*" BEGIN(0); return token::OP_STAR; 265"[*]" BEGIN(0); return token::OP_BSTAR; 266"[+]" BEGIN(0); return token::OP_PLUS; 267"[*" BEGIN(sqbracket); return token::OP_STAR_OPEN; 268"[:*]" BEGIN(0); return token::OP_BFSTAR; 269"[:+]" BEGIN(0); return token::OP_FPLUS; 270"[:*" BEGIN(sqbracket); return token::OP_FSTAR_OPEN; 271"[=" BEGIN(sqbracket); return token::OP_EQUAL_OPEN; 272"["{ARROWL} BEGIN(sqbracket); return token::OP_GOTO_OPEN; 273<sqbracket>"]" BEGIN(0); return token::OP_SQBKT_CLOSE; 274<sqbracket>"!]" BEGIN(0); return token::OP_SQBKT_STRONG_CLOSE; 275<sqbracket>[0-9]+ { 276 errno = 0; 277 unsigned long n = strtoul(yytext, 0, 10); 278 yylval->num = n; 279 if (errno || yylval->num != n) 280 { 281 error_list.push_back( 282 spot::one_parse_error(*yylloc, 283 "value too large ignored")); 284 // Skip this number and read next token 285 yylloc->step(); 286 } 287 else 288 { 289 return token::OP_SQBKT_NUM; 290 } 291 } 292 /* .. is from PSL and EDL 293 : is from Verilog and PSL 294 to is from VHDL 295 , is from Perl */ 296<sqbracket>","|".."|":"|"to" return token::OP_SQBKT_SEP; 297 298 /* In SVA you use [=1:$] instead of [=1..]. We will also accept 299 [=1..$] and [=1:]. The PSL LRM shows examples like [=1:inf] 300 instead, so will accept this too. */ 301<sqbracket>"$"|"inf" return token::OP_UNBOUNDED; 302 303 /* & and | come from Spin. && and || from LTL2BA. 304 /\, \/, and xor are from LBTT. 305 --> and <--> come from Goal. 306 +,*,^ are from Wring. */ 307"||"|"|"|"+"|"\\/"|"∨"|"∪" BEGIN(0); return token::OP_OR; 308"&&"|"/\\"|"∧"|"∩" BEGIN(0); return token::OP_AND; 309"&" BEGIN(0); return token::OP_SHORT_AND; 310"^"|"xor"|"⊕" BEGIN(0); return token::OP_XOR; 311{ARROWL}|{DARROWL} BEGIN(0); return token::OP_IMPLIES; 312{ARROWLR}|{DARROWLR} BEGIN(0); return token::OP_EQUIV; 313 314 /* TSLF-like syntactic sugar: 315 X[3]f = XXXf 316 F[2..4]f = XX(f | X(f | Xf)) 317 G[2..4]f = XX(f & X(f & Xf)) 318 We also have to deal with the Spin-like notation for G. 319 X[]f = XGf 320 */ 321"X"[ \t\n\r]*"[]" yyless(1); return token::OP_X; 322"F"[ \t\n\r]*"[]" yyless(1); return token::OP_F; 323"G"[ \t\n\r]*"[]" yyless(1); return token::OP_G; 324"X"[ \t\n\r]*"[" BEGIN(sqbracket); return token::OP_XREP; 325"F"[ \t\n\r]*"[" BEGIN(sqbracket); return token::OP_FREP; 326"G"[ \t\n\r]*"[" BEGIN(sqbracket); return token::OP_GREP; 327 /* <> (DIAMOND) and [] (BOX), are used in Spin. 328 () (CIRCLE) is not, but would make sense. */ 329"F"|{DIAMOND} BEGIN(0); return token::OP_F; 330"G"|{BOX} BEGIN(0); return token::OP_G; 331"U" BEGIN(0); return token::OP_U; 332"R"|"V" BEGIN(0); return token::OP_R; 333"X"|{CIRCLE} BEGIN(0); return token::OP_X; 334{CIRCLEX} BEGIN(0); return token::OP_STRONG_X; 335"W" BEGIN(0); return token::OP_W; 336"M" BEGIN(0); return token::OP_M; 337 338 /* The combining overline or macron (overbar) should normally 339 occur only after a single letter, but we do not check that. */ 340"=0"|"̅"|"̄" return token::OP_POST_NEG; 341"=1" return token::OP_POST_POS; 342 343<*>[ \t\n\r]+ /* discard whitespace */ yylloc->step (); 344 345 /* An Atomic proposition cannot start with the letter 346 used by a unary operator (F,G,X), unless this 347 letter is followed by a digit in which case we assume 348 it's an ATOMIC_PROP (even though F0 could be seen as Ffalse, we 349 don't, because Ffalse is never used in practice). 350 */ 351<INITIAL>[a-zA-EH-WYZ_.][a-zA-Z0-9_.]* | 352<INITIAL>[FGX][0-9][a-zA-Z0-9_.]* | 353 /* 354 However if we have just parsed an atomic proposition, then we are 355 not expecting another atomic proposition, so we can be stricter 356 and disallow propositions that start with M, U, R, V, and W. If 357 you wonder why we do this, consider the Wring formula `p=0Uq=1'. 358 When p is parsed, we enter the not_prop start condition, we 359 remain into this condition when `=0' is processed, and then 360 because we are in this condition we will not consider `Uq' as an 361 atomic proposition but as a `U' operator followed by a `q' atomic 362 proposition. 363 364 We also disable atomic proposition that may look like a combination 365 of a binary operator followed by several unary operators. 366 E.g. UFXp. This way, `p=0UFXp=1' will be parsed as `(p=0)U(F(X(p=1)))'. 367 */ 368<not_prop>[a-zA-EH-LN-QSTYZ_.][a-zA-EH-WYZ0-9_.]* | 369<not_prop>[a-zA-EH-LN-QSTYZ_.][a-zA-EH-WYZ0-9_.][a-zA-Z0-9_.]* { 370 yylval->str = new std::string(yytext, yyleng); 371 BEGIN(not_prop); 372 return token::ATOMIC_PROP; 373 } 374 375 /* Atomic propositions can also be enclosed in double quotes. 376 377 orig_cond stores the parsing condition to use after the 378 string has been parsed. */ 379"\"" { 380 orig_cond = not_prop; 381 BEGIN(in_STRING); 382 } 383<lbt>"\"" { 384 orig_cond = lbt; 385 BEGIN(in_STRING); 386 } 387 388<in_STRING>{ 389 \" { 390 BEGIN(orig_cond); 391 yylval->str = new std::string(s); 392 return token::ATOMIC_PROP; 393 } 394 {eol} { 395 s.append(yytext, yyleng); 396 yylloc->lines(yyleng); yylloc->end.column = 1; 397 } 398 {eol2} { 399 s.append(yytext, yyleng); 400 yylloc->lines(yyleng / 2); yylloc->end.column = 1; 401 } 402 \\. s += yytext[1]; 403 [^\\\"\n\r]+ s.append(yytext, yyleng); 404 <<EOF>> { 405 error_list.push_back( 406 spot::one_parse_error(*yylloc, 407 "unclosed string")); 408 BEGIN(orig_cond); 409 yylval->str = new std::string(s); 410 return token::ATOMIC_PROP; 411 } 412} 413 414 /* these are operators */ 415<lbt>[eitfXFGUVRWM] return *yytext; 416 /* in LBT's format, atomic proposition look like p0 or p3141592, but 417 for compatibility with ltl2dstar we also accept any alphanumeric 418 string that is not an operator. */ 419<lbt>[a-zA-Z._][a-zA-Z0-9._]* { 420 yylval->str = new std::string(yytext, yyleng); 421 return token::ATOMIC_PROP; 422 } 423 424 425<*>. return *yytext; 426 427<<EOF>> return token::END_OF_INPUT; 428 429%% 430 431void 432flex_set_buffer(const std::string& buf, 433 int start_tok, bool lenient) 434{ 435 yypush_buffer_state(YY_CURRENT_BUFFER); 436 (void) yy_scan_bytes(buf.c_str(), buf.size()); 437 start_token = start_tok; 438 if (start_tok == token::START_LBT) 439 yy_push_state(lbt); 440 else 441 yy_push_state(0); 442 lenient_mode = lenient; 443} 444 445void 446flex_unset_buffer() 447{ 448 (void)&yy_top_state; // shut up a g++ warning. 449 yy_pop_state(); 450 yypop_buffer_state(); 451 missing_parent = false; 452} 453