1 // -*- coding: utf-8 -*-
2 // Copyright (C) 2012-2016, 2018-2020 Laboratoire de Recherche et
3 // Développement de l'Epita (LRDE).
4 //
5 // This file is part of Spot, a model checking library.
6 //
7 // Spot is free software; you can redistribute it and/or modify it
8 // under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Spot is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 // or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15 // License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 
20 #include "config.h"
21 #include <spot/tl/relabel.hh>
22 #include <sstream>
23 #include <spot/misc/hash.hh>
24 #include <map>
25 #include <set>
26 #include <stack>
27 #include <iostream>
28 
29 namespace spot
30 {
31   //////////////////////////////////////////////////////////////////////
32   // Basic relabeler
33   //////////////////////////////////////////////////////////////////////
34 
35   namespace
36   {
37     struct ap_generator
38     {
39       virtual formula next() = 0;
~ap_generatorspot::__anon0f70f2ca0111::ap_generator40       virtual ~ap_generator() {}
41     };
42 
43     struct pnn_generator final: ap_generator
44     {
45       unsigned nn;
pnn_generatorspot::__anon0f70f2ca0111::pnn_generator46       pnn_generator()
47         : nn(0)
48         {
49         }
50 
nextspot::__anon0f70f2ca0111::pnn_generator51       virtual formula next() override
52       {
53         std::ostringstream s;
54         s << 'p' << nn++;
55         return formula::ap(s.str());
56       }
57     };
58 
59     struct abc_generator final: ap_generator
60     {
61     public:
abc_generatorspot::__anon0f70f2ca0111::abc_generator62       abc_generator()
63         : nn(0)
64         {
65         }
66 
67       unsigned nn;
68 
nextspot::__anon0f70f2ca0111::abc_generator69       virtual formula next() override
70       {
71         std::string s;
72         unsigned n = nn++;
73         do
74           {
75             s.push_back('a' + (n % 26));
76             n /= 26;
77           }
78         while (n);
79         return formula::ap(s);
80       }
81     };
82 
83 
84     class relabeler
85     {
86     public:
87       typedef std::unordered_map<formula, formula> map;
88       map newname;
89       ap_generator* gen;
90       relabeling_map* oldnames;
91 
relabeler(ap_generator * gen,relabeling_map * m)92       relabeler(ap_generator* gen, relabeling_map* m)
93         : gen(gen), oldnames(m)
94       {
95       }
96 
~relabeler()97       ~relabeler()
98       {
99         delete gen;
100       }
101 
rename(formula old)102       formula rename(formula old)
103       {
104         auto r = newname.emplace(old, nullptr);
105         if (!r.second)
106           {
107             return r.first->second;
108           }
109         else
110           {
111             formula res = gen->next();
112             r.first->second = res;
113             if (oldnames)
114               (*oldnames)[res] = old;
115             return res;
116           }
117       }
118 
119       formula
visit(formula f)120       visit(formula f)
121       {
122         if (f.is(op::ap))
123           return rename(f);
124         else
125           return f.map([this](formula f)
126                        {
127                          return this->visit(f);
128                        });
129       }
130 
131     };
132 
133   }
134 
135 
136   formula
relabel(formula f,relabeling_style style,relabeling_map * m)137   relabel(formula f, relabeling_style style, relabeling_map* m)
138   {
139     ap_generator* gen = nullptr;
140     switch (style)
141       {
142       case Pnn:
143         gen = new pnn_generator;
144         break;
145       case Abc:
146         gen = new abc_generator;
147         break;
148       }
149 
150     relabeler r(gen, m);
151     return r.visit(f);
152   }
153 
154   namespace
155   {
156     typedef std::map<formula, int> sub_formula_count_t;
157 
158     static void
sub_formula_collect(formula f,sub_formula_count_t * s)159     sub_formula_collect(formula f, sub_formula_count_t* s)
160     {
161       assert(s);
162       f.traverse([&](const formula& f)
163                  {
164                    auto p = s->emplace(f, 1);
165                    if (p.second)
166                      return false;
167                    p.first->second += 1;
168                    return true;
169                  });
170     }
171 
172     static std::pair<formula, formula>
split_used_once(formula f,const sub_formula_count_t & subcount)173     split_used_once(formula f, const sub_formula_count_t& subcount)
174     {
175       assert(f.is_boolean());
176       unsigned sz = f.size();
177       if (sz <= 2)
178         return {f, nullptr};
179       // If we have a Boolean formula with more than two
180       // children, like (a & b & c & d) where some children
181       // (assume {a,b}) are used only once, but some other
182       // (assume {c,d}) are used multiple time in the formula,
183       // then split that into ((a & b) & (c & d)) to give
184       // (a & b) a chance to be relabeled as a whole.
185       bool has_once = false;
186       bool has_mult = false;
187       for (unsigned j = 0; j < sz; ++j)
188         {
189           auto p = subcount.find(f[j]);
190           assert(p != subcount.end());
191           unsigned sc = p->second;
192           assert(sc > 0);
193           if (sc == 1)
194             has_once = true;
195           else
196             has_mult = true;
197           if (has_once && has_mult)
198             {
199               std::vector<formula> once;
200               std::vector<formula> mult;
201               for (unsigned i = 0; i < j; ++i)
202                 mult.push_back(f[i]);
203               once.push_back(f[j]);
204               if (sc > 1)
205                 std::swap(once, mult);
206               for (++j; j < sz; ++j)
207                 {
208                   auto p = subcount.find(f[j]);
209                   assert(p != subcount.end());
210                   unsigned sc = p->second;
211                   ((sc == 1) ? once : mult).push_back(f[j]);
212                 }
213               formula f1 = formula::multop(f.kind(), std::move(once));
214               formula f2 = formula::multop(f.kind(), std::move(mult));
215               return { f1, f2 };
216             }
217         }
218       return {f, nullptr};
219     }
220   }
221 
222 
223   //////////////////////////////////////////////////////////////////////
224   // Boolean-subexpression relabeler
225   //////////////////////////////////////////////////////////////////////
226 
227   // Here we want to rewrite a formula such as
228   //   "a & b & X(c & d) & GF(c & d)" into "p0 & Xp1 & GFp1"
229   // where Boolean subexpressions are replaced by fresh propositions.
230   //
231   // Detecting Boolean subexpressions is not a problem.
232   // Furthermore, because we are already representing LTL formulas
233   // with sharing of identical sub-expressions we can easily rename
234   // a subexpression (such as c&d above) only once.  However this
235   // scheme has two problems:
236   //
237   //   A. It will not detect inter-dependent Boolean subexpressions.
238   //      For instance it will mistakenly relabel "(a & b) U (a & !b)"
239   //      as "p0 U p1", hiding the dependency between a&b and a&!b.
240   //
241   //   B. Because of our n-ary operators, it will fail to
242   //      notice that (a & b) is a sub-expression of (a & b & c).
243   //
244   // The way we compute the subexpressions that can be relabeled is
245   // by transforming the formula syntax tree into an undirected
246   // graph, and computing the cut points of this graph.  The cut
247   // points (or articulation points) are the nodes whose removal
248   // would split the graph in two components.  To ensure that a
249   // Boolean operator is only considered as a cut point if it would
250   // separate all of its children from the rest of the graph, we
251   // connect all the children of Boolean operators.
252   //
253   // For instance (a & b) U (c & d) has two (Boolean) cut points
254   // corresponding to the two AND operators:
255   //
256   //             (a&b)U(c&d)
257   //             ╱         ╲
258   //           a&b         c&d
259   //          ╱   ╲       ╱   ╲
260   //         a─────b     c─────d
261   //
262   // (The root node is also a cut point, but we only consider Boolean
263   // cut points for relabeling.)
264   //
265   // On the other hand, (a & b) U (b & !c) has only one Boolean
266   // cut-point which corresponds to the NOT operator:
267   //
268   //             (a&b)U(b&!c)
269   //                ╱   ╲
270   //              a&b   b&!c
271   //             ╱   ╲ ╱   ╲
272   //            a─────b────!c
273   //                        │
274   //                        c
275   //
276   // Note that if the children of a&b and b&c were not connected,
277   // a&b and b&c would be considered as cut points because they
278   // separate "a" or "!c" from the rest of the graph.
279   //
280   // The relabeling of a formula is therefore done in 3 passes:
281   //  1. convert the formula's syntax tree into an undirected graph,
282   //     adding links between children of Boolean operators
283   //  2. compute the (Boolean) cut points of that graph, using the
284   //     Hopcroft-Tarjan algorithm (see below for a reference)
285   //  3. recursively scan the formula's tree until we reach
286   //     either a (Boolean) cut point or an atomic proposition, and
287   //     replace that node by a fresh atomic proposition.
288   //
289   // In the example above (a&b)U(b&!c), the last recursion
290   // stops on a, b, and !c, producing (p0&p1)U(p1&p2).
291   //
292   // Problem #B above (handling of n-ary expression) need some
293   // additional tricks.  Consider (a&b&c&d) U X(c&d), and assume
294   // {a,b,c,d} are Boolean subformulas.  The construction, as we have
295   // presented it, would interconnect all of {a,b,c,d}, preventing c&d
296   // from being relabeled together.  To help with that, we count the
297   // number of time of each subformula is used (or how many parents
298   // its has in the syntax DAG), and use that to split (a&b&c&d) into
299   // (a&b)&(c&d), separating subformulas that are used only once.  The
300   // counting is done by sub_formula_collect(), and the split by
301   // split_used_once().
302   namespace
303   {
304     typedef std::vector<formula> succ_vec;
305     typedef std::map<formula, succ_vec> fgraph;
306 
307     // Convert the formula's syntax tree into an undirected graph
308     // labeled by subformulas.
309     class formula_to_fgraph
310     {
311     public:
312       fgraph& g;
313       std::stack<formula> s;
314       sub_formula_count_t& subcount;
315 
formula_to_fgraph(fgraph & g,sub_formula_count_t & subcount)316       formula_to_fgraph(fgraph& g, sub_formula_count_t& subcount):
317         g(g), subcount(subcount)
318         {
319         }
320 
~formula_to_fgraph()321       ~formula_to_fgraph()
322         {
323         }
324 
325       void
visit(formula f)326         visit(formula f)
327       {
328         {
329           // Connect to parent
330           auto in = g.emplace(f, succ_vec());
331           if (!s.empty())
332             {
333               formula top = s.top();
334               in.first->second.emplace_back(top);
335               g[top].emplace_back(f);
336               if (!in.second)
337                 return;
338             }
339           else
340             {
341               assert(in.second);
342             }
343         }
344         s.push(f);
345 
346         unsigned sz = f.size();
347         unsigned i = 0;
348         if (sz > 2 && f.is_boolean())
349           {
350             // If we have a Boolean formula with more than two
351             // children, like (a & b & c & d) where some children
352             // (assume {a,b}) are used only once, but some other
353             // (assume {c,d}) are used multiple time in the formula,
354             // then split that into ((a & b) & (c & d)) to give
355             // (a & b) a chance to be relabeled as a whole.
356             auto pair = split_used_once(f, subcount);
357             if (pair.second)
358               {
359                 visit(pair.first);
360                 visit(pair.second);
361                 g[pair.first].emplace_back(pair.second);
362                 g[pair.second].emplace_back(pair.first);
363                 goto done;
364               }
365           }
366         if (sz > 2 && !f.is_boolean())
367           {
368             /// If we have a formula like (a & b & Xc), consider
369             /// it as ((a & b) & Xc) in the graph to isolate the
370             /// Boolean operands as a single node.
371             formula b = f.boolean_operands(&i);
372             if (b)
373               visit(b);
374           }
375         for (; i < sz; ++i)
376           visit(f[i]);
377         if (sz > 1 && f.is_boolean())
378           {
379             // For Boolean nodes, connect all children in a
380             // loop.  This way the node can only be a cut point
381             // if it separates all children from the reset of
382             // the graph (not only one).
383             formula pred = f[0];
384             for (i = 1; i < sz; ++i)
385               {
386                 formula next = f[i];
387                 // Note that we only add an edge in both directions,
388                 // as the cut point algorithm really need undirected
389                 // graphs.  (We used to do only one direction, and
390                 // that turned out to be a bug.)
391                 g[pred].emplace_back(next);
392                 g[next].emplace_back(pred);
393                 pred = next;
394               }
395             g[pred].emplace_back(f[0]);
396             g[f[0]].emplace_back(pred);
397           }
398       done:
399         s.pop();
400       }
401     };
402 
403 
404     typedef std::set<formula> fset;
405     struct data_entry // for each node of the graph
406     {
407       unsigned num; // serial number, in pre-order
408       unsigned low; // lowest number accessible via unstacked descendants
data_entryspot::__anon0f70f2ca0511::data_entry409       data_entry(unsigned num = 0, unsigned low = 0)
410         : num(num), low(low)
411       {
412       }
413     };
414     typedef std::unordered_map<formula, data_entry> fmap_t;
415     struct stack_entry
416     {
417       formula grand_parent;
418       formula parent;        // current node
419       succ_vec::const_iterator current_child;
420       succ_vec::const_iterator last_child;
421     };
422     typedef std::stack<stack_entry> stack_t;
423 
424     // Fill c with the Boolean cutpoints of g, starting from start.
425     //
426     // This is based no "Efficient Algorithms for Graph
427     // Manipulation", J. Hopcroft & R. Tarjan, in Communications of
428     // the ACM, 16 (6), June 1973.
429     //
430     // It differs from the original algorithm by returning only the
431     // Boolean cutpoints, and not dealing with the initial state
432     // properly (our initial state will always be considered as a
433     // cut-point, but since we only return Boolean cut-points it's
434     // OK: if the top-most formula is Boolean we want to replace it
435     // as a whole).
cut_points(const fgraph & g,fset & c,formula start)436     void cut_points(const fgraph& g, fset& c, formula start)
437     {
438       stack_t s;
439 
440       unsigned num = 0;
441       fmap_t data;
442       data_entry d = { num, num };
443       data[start] = d;
444       ++num;
445       const succ_vec& children = g.find(start)->second;
446       stack_entry e = { start, start, children.begin(), children.end() };
447       s.push(e);
448 
449       while (!s.empty())
450         {
451           stack_entry& e  = s.top();
452           if (e.current_child != e.last_child)
453             {
454               // Skip the edge if it is just the reverse of the one
455               // we took.
456               formula child = *e.current_child;
457               if (child == e.grand_parent)
458                 {
459                   ++e.current_child;
460                   continue;
461                 }
462               auto i = data.emplace(std::piecewise_construct,
463                                     std::forward_as_tuple(child),
464                                     std::forward_as_tuple(num, num));
465               if (i.second)        // New destination.
466                 {
467                   ++num;
468                   const succ_vec& children = g.find(child)->second;
469                   stack_entry newe = { e.parent, child,
470                                        children.begin(), children.end() };
471                   s.push(newe);
472                 }
473               else           // Destination exists.
474                 {
475                   data_entry& dparent = data[e.parent];
476                   data_entry& dchild = i.first->second;
477                   // If this is a back-edge, update
478                   // the low field of the parent.
479                   if (dchild.num <= dparent.num)
480                     if (dparent.low > dchild.num)
481                       dparent.low = dchild.num;
482                 }
483               ++e.current_child;
484             }
485           else
486             {
487               formula grand_parent = e.grand_parent;
488               formula parent = e.parent;
489               s.pop();
490               if (!s.empty())
491                 {
492                   data_entry& dparent = data[parent];
493                   data_entry& dgrand_parent = data[grand_parent];
494                   if (dparent.low >= dgrand_parent.num // cut-point
495                       && grand_parent.is_boolean())
496                     c.insert(grand_parent);
497                   if (dparent.low < dgrand_parent.low)
498                     dgrand_parent.low = dparent.low;
499                 }
500             }
501         }
502     }
503 
504 
505     class bse_relabeler final: public relabeler
506     {
507     public:
508       const fset& c;
509       const sub_formula_count_t& subcount;
510 
bse_relabeler(ap_generator * gen,const fset & c,relabeling_map * m,const sub_formula_count_t & subcount)511       bse_relabeler(ap_generator* gen, const fset& c,
512                     relabeling_map* m, const sub_formula_count_t& subcount)
513         : relabeler(gen, m), c(c), subcount(subcount)
514       {
515       }
516 
517       using relabeler::visit;
518 
519       formula
visit(formula f)520         visit(formula f)
521       {
522         if (f.is(op::ap) || (c.find(f) != c.end()))
523           return rename(f);
524 
525         unsigned sz = f.size();
526         if (sz <= 2)
527           return f.map([this](formula f)
528                        {
529                          return visit(f);
530                        });
531 
532         unsigned i = 0;
533         std::vector<formula> res;
534         if (f.is_boolean() && sz > 2)
535           {
536             // If we have a Boolean formula with more than two
537             // children, like (a & b & c & d) where some children
538             // (assume {a,b}) are used only once, but some other
539             // (assume {c,d}) are used multiple time in the formula,
540             // then split that into ((a & b) & (c & d)) to give
541             // (a & b) a chance to be relabeled as a whole.
542             auto pair = split_used_once(f, subcount);
543             if (pair.second)
544               return formula::multop(f.kind(), { visit(pair.first),
545                                                  visit(pair.second) });
546           }
547         /// If we have a formula like (a & b & Xc), consider
548         /// it as ((a & b) & Xc) in the graph to isolate the
549         /// Boolean operands as a single node.
550         formula b = f.boolean_operands(&i);
551         if (b && b != f)
552           {
553             res.reserve(sz - i + 1);
554             res.emplace_back(visit(b));
555           }
556         else
557           {
558             i = 0;
559             res.reserve(sz);
560           }
561         for (; i < sz; ++i)
562           res.emplace_back(visit(f[i]));
563         return formula::multop(f.kind(), res);
564       }
565     };
566   }
567 
568 
569   formula
relabel_bse(formula f,relabeling_style style,relabeling_map * m)570   relabel_bse(formula f, relabeling_style style, relabeling_map* m)
571   {
572     fgraph g;
573     sub_formula_count_t subcount;
574 
575     // Scan f for sub-formulas used once.
576     sub_formula_collect(f, &subcount);
577 
578     // Build the graph g from the formula f.
579     {
580       formula_to_fgraph conv(g, subcount);
581       conv.visit(f);
582     }
583 
584     // Compute its cut-points
585     fset c;
586     cut_points(g, c, f);
587 
588     // Relabel the formula recursively, stopping
589     // at cut-points or atomic propositions.
590     ap_generator* gen = nullptr;
591     switch (style)
592       {
593       case Pnn:
594         gen = new pnn_generator;
595         break;
596       case Abc:
597         gen = new abc_generator;
598         break;
599       }
600     bse_relabeler rel(gen, c, m, subcount);
601     return rel.visit(f);
602   }
603 
604   formula
relabel_apply(formula f,relabeling_map * m)605   relabel_apply(formula f, relabeling_map* m)
606   {
607     if (f.is(op::ap))
608       {
609         auto i = m->find(f);
610         if (i != m->end())
611           return i->second;
612       }
613     return f.map(relabel_apply, m);
614   }
615 
616 }
617