1 // -*- coding: utf-8 -*- 2 // Copyright (C) 2012-2016, 2018-2020 Laboratoire de Recherche et 3 // Développement de l'Epita (LRDE). 4 // 5 // This file is part of Spot, a model checking library. 6 // 7 // Spot is free software; you can redistribute it and/or modify it 8 // under the terms of the GNU General Public License as published by 9 // the Free Software Foundation; either version 3 of the License, or 10 // (at your option) any later version. 11 // 12 // Spot is distributed in the hope that it will be useful, but WITHOUT 13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 // License for more details. 16 // 17 // You should have received a copy of the GNU General Public License 18 // along with this program. If not, see <http://www.gnu.org/licenses/>. 19 20 #include "config.h" 21 #include <spot/tl/relabel.hh> 22 #include <sstream> 23 #include <spot/misc/hash.hh> 24 #include <map> 25 #include <set> 26 #include <stack> 27 #include <iostream> 28 29 namespace spot 30 { 31 ////////////////////////////////////////////////////////////////////// 32 // Basic relabeler 33 ////////////////////////////////////////////////////////////////////// 34 35 namespace 36 { 37 struct ap_generator 38 { 39 virtual formula next() = 0; ~ap_generatorspot::__anon0f70f2ca0111::ap_generator40 virtual ~ap_generator() {} 41 }; 42 43 struct pnn_generator final: ap_generator 44 { 45 unsigned nn; pnn_generatorspot::__anon0f70f2ca0111::pnn_generator46 pnn_generator() 47 : nn(0) 48 { 49 } 50 nextspot::__anon0f70f2ca0111::pnn_generator51 virtual formula next() override 52 { 53 std::ostringstream s; 54 s << 'p' << nn++; 55 return formula::ap(s.str()); 56 } 57 }; 58 59 struct abc_generator final: ap_generator 60 { 61 public: abc_generatorspot::__anon0f70f2ca0111::abc_generator62 abc_generator() 63 : nn(0) 64 { 65 } 66 67 unsigned nn; 68 nextspot::__anon0f70f2ca0111::abc_generator69 virtual formula next() override 70 { 71 std::string s; 72 unsigned n = nn++; 73 do 74 { 75 s.push_back('a' + (n % 26)); 76 n /= 26; 77 } 78 while (n); 79 return formula::ap(s); 80 } 81 }; 82 83 84 class relabeler 85 { 86 public: 87 typedef std::unordered_map<formula, formula> map; 88 map newname; 89 ap_generator* gen; 90 relabeling_map* oldnames; 91 relabeler(ap_generator * gen,relabeling_map * m)92 relabeler(ap_generator* gen, relabeling_map* m) 93 : gen(gen), oldnames(m) 94 { 95 } 96 ~relabeler()97 ~relabeler() 98 { 99 delete gen; 100 } 101 rename(formula old)102 formula rename(formula old) 103 { 104 auto r = newname.emplace(old, nullptr); 105 if (!r.second) 106 { 107 return r.first->second; 108 } 109 else 110 { 111 formula res = gen->next(); 112 r.first->second = res; 113 if (oldnames) 114 (*oldnames)[res] = old; 115 return res; 116 } 117 } 118 119 formula visit(formula f)120 visit(formula f) 121 { 122 if (f.is(op::ap)) 123 return rename(f); 124 else 125 return f.map([this](formula f) 126 { 127 return this->visit(f); 128 }); 129 } 130 131 }; 132 133 } 134 135 136 formula relabel(formula f,relabeling_style style,relabeling_map * m)137 relabel(formula f, relabeling_style style, relabeling_map* m) 138 { 139 ap_generator* gen = nullptr; 140 switch (style) 141 { 142 case Pnn: 143 gen = new pnn_generator; 144 break; 145 case Abc: 146 gen = new abc_generator; 147 break; 148 } 149 150 relabeler r(gen, m); 151 return r.visit(f); 152 } 153 154 namespace 155 { 156 typedef std::map<formula, int> sub_formula_count_t; 157 158 static void sub_formula_collect(formula f,sub_formula_count_t * s)159 sub_formula_collect(formula f, sub_formula_count_t* s) 160 { 161 assert(s); 162 f.traverse([&](const formula& f) 163 { 164 auto p = s->emplace(f, 1); 165 if (p.second) 166 return false; 167 p.first->second += 1; 168 return true; 169 }); 170 } 171 172 static std::pair<formula, formula> split_used_once(formula f,const sub_formula_count_t & subcount)173 split_used_once(formula f, const sub_formula_count_t& subcount) 174 { 175 assert(f.is_boolean()); 176 unsigned sz = f.size(); 177 if (sz <= 2) 178 return {f, nullptr}; 179 // If we have a Boolean formula with more than two 180 // children, like (a & b & c & d) where some children 181 // (assume {a,b}) are used only once, but some other 182 // (assume {c,d}) are used multiple time in the formula, 183 // then split that into ((a & b) & (c & d)) to give 184 // (a & b) a chance to be relabeled as a whole. 185 bool has_once = false; 186 bool has_mult = false; 187 for (unsigned j = 0; j < sz; ++j) 188 { 189 auto p = subcount.find(f[j]); 190 assert(p != subcount.end()); 191 unsigned sc = p->second; 192 assert(sc > 0); 193 if (sc == 1) 194 has_once = true; 195 else 196 has_mult = true; 197 if (has_once && has_mult) 198 { 199 std::vector<formula> once; 200 std::vector<formula> mult; 201 for (unsigned i = 0; i < j; ++i) 202 mult.push_back(f[i]); 203 once.push_back(f[j]); 204 if (sc > 1) 205 std::swap(once, mult); 206 for (++j; j < sz; ++j) 207 { 208 auto p = subcount.find(f[j]); 209 assert(p != subcount.end()); 210 unsigned sc = p->second; 211 ((sc == 1) ? once : mult).push_back(f[j]); 212 } 213 formula f1 = formula::multop(f.kind(), std::move(once)); 214 formula f2 = formula::multop(f.kind(), std::move(mult)); 215 return { f1, f2 }; 216 } 217 } 218 return {f, nullptr}; 219 } 220 } 221 222 223 ////////////////////////////////////////////////////////////////////// 224 // Boolean-subexpression relabeler 225 ////////////////////////////////////////////////////////////////////// 226 227 // Here we want to rewrite a formula such as 228 // "a & b & X(c & d) & GF(c & d)" into "p0 & Xp1 & GFp1" 229 // where Boolean subexpressions are replaced by fresh propositions. 230 // 231 // Detecting Boolean subexpressions is not a problem. 232 // Furthermore, because we are already representing LTL formulas 233 // with sharing of identical sub-expressions we can easily rename 234 // a subexpression (such as c&d above) only once. However this 235 // scheme has two problems: 236 // 237 // A. It will not detect inter-dependent Boolean subexpressions. 238 // For instance it will mistakenly relabel "(a & b) U (a & !b)" 239 // as "p0 U p1", hiding the dependency between a&b and a&!b. 240 // 241 // B. Because of our n-ary operators, it will fail to 242 // notice that (a & b) is a sub-expression of (a & b & c). 243 // 244 // The way we compute the subexpressions that can be relabeled is 245 // by transforming the formula syntax tree into an undirected 246 // graph, and computing the cut points of this graph. The cut 247 // points (or articulation points) are the nodes whose removal 248 // would split the graph in two components. To ensure that a 249 // Boolean operator is only considered as a cut point if it would 250 // separate all of its children from the rest of the graph, we 251 // connect all the children of Boolean operators. 252 // 253 // For instance (a & b) U (c & d) has two (Boolean) cut points 254 // corresponding to the two AND operators: 255 // 256 // (a&b)U(c&d) 257 // ╱ ╲ 258 // a&b c&d 259 // ╱ ╲ ╱ ╲ 260 // a─────b c─────d 261 // 262 // (The root node is also a cut point, but we only consider Boolean 263 // cut points for relabeling.) 264 // 265 // On the other hand, (a & b) U (b & !c) has only one Boolean 266 // cut-point which corresponds to the NOT operator: 267 // 268 // (a&b)U(b&!c) 269 // ╱ ╲ 270 // a&b b&!c 271 // ╱ ╲ ╱ ╲ 272 // a─────b────!c 273 // │ 274 // c 275 // 276 // Note that if the children of a&b and b&c were not connected, 277 // a&b and b&c would be considered as cut points because they 278 // separate "a" or "!c" from the rest of the graph. 279 // 280 // The relabeling of a formula is therefore done in 3 passes: 281 // 1. convert the formula's syntax tree into an undirected graph, 282 // adding links between children of Boolean operators 283 // 2. compute the (Boolean) cut points of that graph, using the 284 // Hopcroft-Tarjan algorithm (see below for a reference) 285 // 3. recursively scan the formula's tree until we reach 286 // either a (Boolean) cut point or an atomic proposition, and 287 // replace that node by a fresh atomic proposition. 288 // 289 // In the example above (a&b)U(b&!c), the last recursion 290 // stops on a, b, and !c, producing (p0&p1)U(p1&p2). 291 // 292 // Problem #B above (handling of n-ary expression) need some 293 // additional tricks. Consider (a&b&c&d) U X(c&d), and assume 294 // {a,b,c,d} are Boolean subformulas. The construction, as we have 295 // presented it, would interconnect all of {a,b,c,d}, preventing c&d 296 // from being relabeled together. To help with that, we count the 297 // number of time of each subformula is used (or how many parents 298 // its has in the syntax DAG), and use that to split (a&b&c&d) into 299 // (a&b)&(c&d), separating subformulas that are used only once. The 300 // counting is done by sub_formula_collect(), and the split by 301 // split_used_once(). 302 namespace 303 { 304 typedef std::vector<formula> succ_vec; 305 typedef std::map<formula, succ_vec> fgraph; 306 307 // Convert the formula's syntax tree into an undirected graph 308 // labeled by subformulas. 309 class formula_to_fgraph 310 { 311 public: 312 fgraph& g; 313 std::stack<formula> s; 314 sub_formula_count_t& subcount; 315 formula_to_fgraph(fgraph & g,sub_formula_count_t & subcount)316 formula_to_fgraph(fgraph& g, sub_formula_count_t& subcount): 317 g(g), subcount(subcount) 318 { 319 } 320 ~formula_to_fgraph()321 ~formula_to_fgraph() 322 { 323 } 324 325 void visit(formula f)326 visit(formula f) 327 { 328 { 329 // Connect to parent 330 auto in = g.emplace(f, succ_vec()); 331 if (!s.empty()) 332 { 333 formula top = s.top(); 334 in.first->second.emplace_back(top); 335 g[top].emplace_back(f); 336 if (!in.second) 337 return; 338 } 339 else 340 { 341 assert(in.second); 342 } 343 } 344 s.push(f); 345 346 unsigned sz = f.size(); 347 unsigned i = 0; 348 if (sz > 2 && f.is_boolean()) 349 { 350 // If we have a Boolean formula with more than two 351 // children, like (a & b & c & d) where some children 352 // (assume {a,b}) are used only once, but some other 353 // (assume {c,d}) are used multiple time in the formula, 354 // then split that into ((a & b) & (c & d)) to give 355 // (a & b) a chance to be relabeled as a whole. 356 auto pair = split_used_once(f, subcount); 357 if (pair.second) 358 { 359 visit(pair.first); 360 visit(pair.second); 361 g[pair.first].emplace_back(pair.second); 362 g[pair.second].emplace_back(pair.first); 363 goto done; 364 } 365 } 366 if (sz > 2 && !f.is_boolean()) 367 { 368 /// If we have a formula like (a & b & Xc), consider 369 /// it as ((a & b) & Xc) in the graph to isolate the 370 /// Boolean operands as a single node. 371 formula b = f.boolean_operands(&i); 372 if (b) 373 visit(b); 374 } 375 for (; i < sz; ++i) 376 visit(f[i]); 377 if (sz > 1 && f.is_boolean()) 378 { 379 // For Boolean nodes, connect all children in a 380 // loop. This way the node can only be a cut point 381 // if it separates all children from the reset of 382 // the graph (not only one). 383 formula pred = f[0]; 384 for (i = 1; i < sz; ++i) 385 { 386 formula next = f[i]; 387 // Note that we only add an edge in both directions, 388 // as the cut point algorithm really need undirected 389 // graphs. (We used to do only one direction, and 390 // that turned out to be a bug.) 391 g[pred].emplace_back(next); 392 g[next].emplace_back(pred); 393 pred = next; 394 } 395 g[pred].emplace_back(f[0]); 396 g[f[0]].emplace_back(pred); 397 } 398 done: 399 s.pop(); 400 } 401 }; 402 403 404 typedef std::set<formula> fset; 405 struct data_entry // for each node of the graph 406 { 407 unsigned num; // serial number, in pre-order 408 unsigned low; // lowest number accessible via unstacked descendants data_entryspot::__anon0f70f2ca0511::data_entry409 data_entry(unsigned num = 0, unsigned low = 0) 410 : num(num), low(low) 411 { 412 } 413 }; 414 typedef std::unordered_map<formula, data_entry> fmap_t; 415 struct stack_entry 416 { 417 formula grand_parent; 418 formula parent; // current node 419 succ_vec::const_iterator current_child; 420 succ_vec::const_iterator last_child; 421 }; 422 typedef std::stack<stack_entry> stack_t; 423 424 // Fill c with the Boolean cutpoints of g, starting from start. 425 // 426 // This is based no "Efficient Algorithms for Graph 427 // Manipulation", J. Hopcroft & R. Tarjan, in Communications of 428 // the ACM, 16 (6), June 1973. 429 // 430 // It differs from the original algorithm by returning only the 431 // Boolean cutpoints, and not dealing with the initial state 432 // properly (our initial state will always be considered as a 433 // cut-point, but since we only return Boolean cut-points it's 434 // OK: if the top-most formula is Boolean we want to replace it 435 // as a whole). cut_points(const fgraph & g,fset & c,formula start)436 void cut_points(const fgraph& g, fset& c, formula start) 437 { 438 stack_t s; 439 440 unsigned num = 0; 441 fmap_t data; 442 data_entry d = { num, num }; 443 data[start] = d; 444 ++num; 445 const succ_vec& children = g.find(start)->second; 446 stack_entry e = { start, start, children.begin(), children.end() }; 447 s.push(e); 448 449 while (!s.empty()) 450 { 451 stack_entry& e = s.top(); 452 if (e.current_child != e.last_child) 453 { 454 // Skip the edge if it is just the reverse of the one 455 // we took. 456 formula child = *e.current_child; 457 if (child == e.grand_parent) 458 { 459 ++e.current_child; 460 continue; 461 } 462 auto i = data.emplace(std::piecewise_construct, 463 std::forward_as_tuple(child), 464 std::forward_as_tuple(num, num)); 465 if (i.second) // New destination. 466 { 467 ++num; 468 const succ_vec& children = g.find(child)->second; 469 stack_entry newe = { e.parent, child, 470 children.begin(), children.end() }; 471 s.push(newe); 472 } 473 else // Destination exists. 474 { 475 data_entry& dparent = data[e.parent]; 476 data_entry& dchild = i.first->second; 477 // If this is a back-edge, update 478 // the low field of the parent. 479 if (dchild.num <= dparent.num) 480 if (dparent.low > dchild.num) 481 dparent.low = dchild.num; 482 } 483 ++e.current_child; 484 } 485 else 486 { 487 formula grand_parent = e.grand_parent; 488 formula parent = e.parent; 489 s.pop(); 490 if (!s.empty()) 491 { 492 data_entry& dparent = data[parent]; 493 data_entry& dgrand_parent = data[grand_parent]; 494 if (dparent.low >= dgrand_parent.num // cut-point 495 && grand_parent.is_boolean()) 496 c.insert(grand_parent); 497 if (dparent.low < dgrand_parent.low) 498 dgrand_parent.low = dparent.low; 499 } 500 } 501 } 502 } 503 504 505 class bse_relabeler final: public relabeler 506 { 507 public: 508 const fset& c; 509 const sub_formula_count_t& subcount; 510 bse_relabeler(ap_generator * gen,const fset & c,relabeling_map * m,const sub_formula_count_t & subcount)511 bse_relabeler(ap_generator* gen, const fset& c, 512 relabeling_map* m, const sub_formula_count_t& subcount) 513 : relabeler(gen, m), c(c), subcount(subcount) 514 { 515 } 516 517 using relabeler::visit; 518 519 formula visit(formula f)520 visit(formula f) 521 { 522 if (f.is(op::ap) || (c.find(f) != c.end())) 523 return rename(f); 524 525 unsigned sz = f.size(); 526 if (sz <= 2) 527 return f.map([this](formula f) 528 { 529 return visit(f); 530 }); 531 532 unsigned i = 0; 533 std::vector<formula> res; 534 if (f.is_boolean() && sz > 2) 535 { 536 // If we have a Boolean formula with more than two 537 // children, like (a & b & c & d) where some children 538 // (assume {a,b}) are used only once, but some other 539 // (assume {c,d}) are used multiple time in the formula, 540 // then split that into ((a & b) & (c & d)) to give 541 // (a & b) a chance to be relabeled as a whole. 542 auto pair = split_used_once(f, subcount); 543 if (pair.second) 544 return formula::multop(f.kind(), { visit(pair.first), 545 visit(pair.second) }); 546 } 547 /// If we have a formula like (a & b & Xc), consider 548 /// it as ((a & b) & Xc) in the graph to isolate the 549 /// Boolean operands as a single node. 550 formula b = f.boolean_operands(&i); 551 if (b && b != f) 552 { 553 res.reserve(sz - i + 1); 554 res.emplace_back(visit(b)); 555 } 556 else 557 { 558 i = 0; 559 res.reserve(sz); 560 } 561 for (; i < sz; ++i) 562 res.emplace_back(visit(f[i])); 563 return formula::multop(f.kind(), res); 564 } 565 }; 566 } 567 568 569 formula relabel_bse(formula f,relabeling_style style,relabeling_map * m)570 relabel_bse(formula f, relabeling_style style, relabeling_map* m) 571 { 572 fgraph g; 573 sub_formula_count_t subcount; 574 575 // Scan f for sub-formulas used once. 576 sub_formula_collect(f, &subcount); 577 578 // Build the graph g from the formula f. 579 { 580 formula_to_fgraph conv(g, subcount); 581 conv.visit(f); 582 } 583 584 // Compute its cut-points 585 fset c; 586 cut_points(g, c, f); 587 588 // Relabel the formula recursively, stopping 589 // at cut-points or atomic propositions. 590 ap_generator* gen = nullptr; 591 switch (style) 592 { 593 case Pnn: 594 gen = new pnn_generator; 595 break; 596 case Abc: 597 gen = new abc_generator; 598 break; 599 } 600 bse_relabeler rel(gen, c, m, subcount); 601 return rel.visit(f); 602 } 603 604 formula relabel_apply(formula f,relabeling_map * m)605 relabel_apply(formula f, relabeling_map* m) 606 { 607 if (f.is(op::ap)) 608 { 609 auto i = m->find(f); 610 if (i != m->end()) 611 return i->second; 612 } 613 return f.map(relabel_apply, m); 614 } 615 616 } 617