1 //
2 // libsemigroups - C++ library for semigroups and monoids
3 // Copyright (C) 2019 James D. Mitchell
4 //
5 // This program is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 //
18 
19 // This file contains the implementation of the class KnuthBendixImpl, which is
20 // the main implementation for the class KnuthBendix.
21 
22 #ifndef LIBSEMIGROUPS_SRC_KNUTH_BENDIX_IMPL_HPP_
23 #define LIBSEMIGROUPS_SRC_KNUTH_BENDIX_IMPL_HPP_
24 
25 #include <algorithm>    // for max, min
26 #include <atomic>       // for atomic
27 #include <cinttypes>    // for int64_t
28 #include <cstddef>      // for size_t
29 #include <limits>       // for numeric_limits
30 #include <list>         // for list, list<>::iterator
31 #include <ostream>      // for string
32 #include <set>          // for set
33 #include <stack>        // for stack
34 #include <string>       // for operator!=, basic_strin...
35 #include <type_traits>  // for swap
36 #include <utility>      // for pair
37 #include <vector>       // for vector
38 
39 #include "constants.hpp"             // for POSITIVE_INFINITY
40 #include "knuth-bendix.hpp"          // for KnuthBendix, KnuthBendi...
41 #include "libsemigroups-config.hpp"  // for LIBSEMIGROUPS_DEBUG
42 #include "libsemigroups-debug.hpp"   // for LIBSEMIGROUPS_ASSERT
43 #include "order.hpp"                 // for shortlex_compare
44 #include "report.hpp"                // for REPORT
45 #include "string.hpp"                // for detail::is_suffix, maximum_comm...
46 #include "timer.hpp"                 // for detail::Timer
47 #include "types.hpp"                 // for word_type
48 
49 namespace libsemigroups {
50   namespace detail {
51     class KBE;  // Forward declarations
52   }
53   namespace fpsemigroup {
54 
55     class KnuthBendix::KnuthBendixImpl {
56       ////////////////////////////////////////////////////////////////////////
57       // KnuthBendixImpl - typedefs/aliases - private
58       ////////////////////////////////////////////////////////////////////////
59 
60       using external_string_type = std::string;
61       using internal_string_type = std::string;
62       using external_char_type   = char;
63       using internal_char_type   = char;
64 
65       ////////////////////////////////////////////////////////////////////////
66       // KnuthBendixImpl - nested subclasses - private
67       ////////////////////////////////////////////////////////////////////////
68 
69       // Rule and RuleLookup classes
70       class Rule {
71        public:
72         // Construct from KnuthBendix with new but empty internal_string_type's
Rule(KnuthBendixImpl const * kbimpl,int64_t id)73         explicit Rule(KnuthBendixImpl const* kbimpl, int64_t id)
74             : _kbimpl(kbimpl),
75               _lhs(new internal_string_type()),
76               _rhs(new internal_string_type()),
77               _id(-1 * id) {
78           LIBSEMIGROUPS_ASSERT(_id < 0);
79         }
80 
81         // The Rule class does not support an assignment contructor to avoid
82         // accidental copying.
83         Rule& operator=(Rule const& copy) = delete;
84 
85         // The Rule class does not support a copy contructor to avoid
86         // accidental copying.
87         Rule(Rule const& copy) = delete;
88 
89         // Destructor, deletes pointers used to create the rule.
~Rule()90         ~Rule() {
91           delete _lhs;
92           delete _rhs;
93         }
94 
95         // Returns the left hand side of the rule, which is guaranteed to be
96         // greater than its right hand side according to the reduction ordering
97         // of the KnuthBendix used to construct this.
lhs() const98         internal_string_type* lhs() const {
99           return _lhs;
100         }
101 
102         // Returns the right hand side of the rule, which is guaranteed to be
103         // less than its left hand side according to the reduction ordering of
104         // the KnuthBendix used to construct this.
rhs() const105         internal_string_type* rhs() const {
106           return _rhs;
107         }
108 
rewrite()109         void rewrite() {
110           LIBSEMIGROUPS_ASSERT(_id != 0);
111           _kbimpl->internal_rewrite(_lhs);
112           _kbimpl->internal_rewrite(_rhs);
113           // reorder if necessary
114           if (shortlex_compare(_lhs, _rhs)) {
115             std::swap(_lhs, _rhs);
116           }
117         }
118 
clear()119         void clear() {
120           LIBSEMIGROUPS_ASSERT(_id != 0);
121           _lhs->clear();
122           _rhs->clear();
123         }
124 
active() const125         inline bool active() const {
126           LIBSEMIGROUPS_ASSERT(_id != 0);
127           return (_id > 0);
128         }
129 
deactivate()130         void deactivate() {
131           LIBSEMIGROUPS_ASSERT(_id != 0);
132           if (active()) {
133             _id *= -1;
134           }
135         }
136 
activate()137         void activate() {
138           LIBSEMIGROUPS_ASSERT(_id != 0);
139           if (!active()) {
140             _id *= -1;
141           }
142         }
143 
set_id(int64_t id)144         void set_id(int64_t id) {
145           LIBSEMIGROUPS_ASSERT(id > 0);
146           LIBSEMIGROUPS_ASSERT(!active());
147           _id = -1 * id;
148         }
149 
id() const150         int64_t id() const {
151           LIBSEMIGROUPS_ASSERT(_id != 0);
152           return _id;
153         }
154 
155         KnuthBendixImpl const* _kbimpl;
156         internal_string_type*  _lhs;
157         internal_string_type*  _rhs;
158         int64_t                _id;
159       };  // struct Rule
160 
161       // Simple class wrapping a two iterators to an internal_string_type and a
162       // Rule const*
163 
164       class RuleLookup {
165        public:
RuleLookup()166         RuleLookup() : _rule(nullptr) {}
167 
RuleLookup(Rule * rule)168         explicit RuleLookup(Rule* rule)
169             : _first(rule->lhs()->cbegin()),
170               _last(rule->lhs()->cend()),
171               _rule(rule) {}
172 
operator ()(internal_string_type::iterator const & first,internal_string_type::iterator const & last)173         RuleLookup& operator()(internal_string_type::iterator const& first,
174                                internal_string_type::iterator const& last) {
175           _first = first;
176           _last  = last;
177           return *this;
178         }
179 
rule() const180         Rule const* rule() const {
181           return _rule;
182         }
183 
184         // This implements reverse lex comparison of this and that, which
185         // satisfies the requirement of std::set that equivalent items be
186         // incomparable, so, for example bcbc and abcbc are considered
187         // equivalent, but abcba and bcbc are not.
operator <(RuleLookup const & that) const188         bool operator<(RuleLookup const& that) const {
189           auto it_this = _last - 1;
190           auto it_that = that._last - 1;
191           while (it_this > _first && it_that > that._first
192                  && *it_this == *it_that) {
193             --it_that;
194             --it_this;
195           }
196           return *it_this < *it_that;
197         }
198 
199        private:
200         internal_string_type::const_iterator _first;
201         internal_string_type::const_iterator _last;
202         Rule const*                          _rule;
203       };  // class RuleLookup
204 
205       // Overlap measures
206       struct OverlapMeasure {
207         virtual size_t operator()(Rule const*,
208                                   Rule const*,
209                                   internal_string_type::const_iterator const&)
210             = 0;
~OverlapMeasurelibsemigroups::fpsemigroup::KnuthBendix::KnuthBendixImpl::OverlapMeasure211         virtual ~OverlapMeasure() {}
212       };
213 
214       struct ABC : OverlapMeasure {
215         size_t
operator ()libsemigroups::fpsemigroup::KnuthBendix::KnuthBendixImpl::ABC216         operator()(Rule const*                                 AB,
217                    Rule const*                                 BC,
218                    internal_string_type::const_iterator const& it) override {
219           LIBSEMIGROUPS_ASSERT(AB->active() && BC->active());
220           LIBSEMIGROUPS_ASSERT(AB->lhs()->cbegin() <= it);
221           LIBSEMIGROUPS_ASSERT(it < AB->lhs()->cend());
222           // |A| + |BC|
223           return (it - AB->lhs()->cbegin()) + BC->lhs()->size();
224         }
225       };
226 
227       struct AB_BC : OverlapMeasure {
228         size_t
operator ()libsemigroups::fpsemigroup::KnuthBendix::KnuthBendixImpl::AB_BC229         operator()(Rule const*                                 AB,
230                    Rule const*                                 BC,
231                    internal_string_type::const_iterator const& it) override {
232           LIBSEMIGROUPS_ASSERT(AB->active() && BC->active());
233           LIBSEMIGROUPS_ASSERT(AB->lhs()->cbegin() <= it);
234           LIBSEMIGROUPS_ASSERT(it < AB->lhs()->cend());
235           (void) it;
236           // |AB| + |BC|
237           return AB->lhs()->size() + BC->lhs()->size();
238         }
239       };
240 
241       struct MAX_AB_BC : OverlapMeasure {
242         size_t
operator ()libsemigroups::fpsemigroup::KnuthBendix::KnuthBendixImpl::MAX_AB_BC243         operator()(Rule const*                                 AB,
244                    Rule const*                                 BC,
245                    internal_string_type::const_iterator const& it) override {
246           LIBSEMIGROUPS_ASSERT(AB->active() && BC->active());
247           LIBSEMIGROUPS_ASSERT(AB->lhs()->cbegin() <= it);
248           LIBSEMIGROUPS_ASSERT(it < AB->lhs()->cend());
249           (void) it;
250           // max(|AB|, |BC|)
251           return std::max(AB->lhs()->size(), BC->lhs()->size());
252         }
253       };
254 
255       //////////////////////////////////////////////////////////////////////////
256       // KnuthBendixImpl - friend declarations - private
257       //////////////////////////////////////////////////////////////////////////
258 
259       friend class Rule;                          // defined in this file
260       friend class ::libsemigroups::detail::KBE;  // defined in detail::kbe.hpp
261 
262      public:
263       //////////////////////////////////////////////////////////////////////////
264       // KnuthBendixImpl - constructors + destructor - public
265       //////////////////////////////////////////////////////////////////////////
266 
KnuthBendixImpl(KnuthBendix * kb)267       explicit KnuthBendixImpl(KnuthBendix* kb)
268           : _active_rules(),
269             _confluent(false),
270             _confluence_known(false),
271             _inactive_rules(),
272             _internal_is_same_as_external(false),
273             _kb(kb),
274             _min_length_lhs_rule(std::numeric_limits<size_t>::max()),
275             _overlap_measure(nullptr),
276             _stack(),
277             _tmp_word1(new internal_string_type()),
278             _tmp_word2(new internal_string_type()),
279             _total_rules(0) {
280         _next_rule_it1 = _active_rules.end();  // null
281         _next_rule_it2 = _active_rules.end();  // null
282         this->set_overlap_policy(policy::overlap::ABC);
283 #ifdef LIBSEMIGROUPS_VERBOSE
284         _max_stack_depth        = 0;
285         _max_word_length        = 0;
286         _max_active_word_length = 0;
287         _max_active_rules       = 0;
288 #endif
289       }
290 
~KnuthBendixImpl()291       ~KnuthBendixImpl() {
292         delete _overlap_measure;
293         delete _tmp_word1;
294         delete _tmp_word2;
295         for (Rule const* rule : _active_rules) {
296           delete const_cast<Rule*>(rule);
297         }
298         for (Rule* rule : _inactive_rules) {
299           delete rule;
300         }
301         while (!_stack.empty()) {
302           Rule* rule = _stack.top();
303           _stack.pop();
304           delete rule;
305         }
306       }
307 
308      private:
309       //////////////////////////////////////////////////////////////////////////
310       // KnuthBendixImpl - converting ints <-> string/char - private
311       //////////////////////////////////////////////////////////////////////////
312 
internal_char_to_uint(internal_char_type c)313       static size_t internal_char_to_uint(internal_char_type c) {
314 #ifdef LIBSEMIGROUPS_DEBUG
315         LIBSEMIGROUPS_ASSERT(c >= 97);
316         return static_cast<size_t>(c - 97);
317 #else
318         return static_cast<size_t>(c - 1);
319 #endif
320       }
321 
uint_to_internal_char(size_t a)322       static internal_char_type uint_to_internal_char(size_t a) {
323         LIBSEMIGROUPS_ASSERT(a
324                              <= std::numeric_limits<internal_char_type>::max());
325 #ifdef LIBSEMIGROUPS_DEBUG
326         LIBSEMIGROUPS_ASSERT(a <= std::numeric_limits<internal_char_type>::max()
327                                       - 97);
328         return static_cast<internal_char_type>(a + 97);
329 #else
330         return static_cast<internal_char_type>(a + 1);
331 #endif
332       }
333 
uint_to_internal_string(size_t const i)334       static internal_string_type uint_to_internal_string(size_t const i) {
335         LIBSEMIGROUPS_ASSERT(i
336                              <= std::numeric_limits<internal_char_type>::max());
337         return internal_string_type({uint_to_internal_char(i)});
338       }
339 
internal_string_to_word(internal_string_type const & s)340       static word_type internal_string_to_word(internal_string_type const& s) {
341         word_type w;
342         w.reserve(s.size());
343         for (internal_char_type const& c : s) {
344           w.push_back(internal_char_to_uint(c));
345         }
346         return w;
347       }
348 
349       static internal_string_type*
word_to_internal_string(word_type const & w,internal_string_type * ww)350       word_to_internal_string(word_type const& w, internal_string_type* ww) {
351         ww->clear();
352         for (size_t const& a : w) {
353           (*ww) += uint_to_internal_char(a);
354         }
355         return ww;
356       }
357 
word_to_internal_string(word_type const & u)358       static internal_string_type word_to_internal_string(word_type const& u) {
359         internal_string_type v;
360         v.reserve(u.size());
361         for (size_t const& l : u) {
362           v += uint_to_internal_char(l);
363         }
364         return v;
365       }
366 
external_to_internal_char(external_char_type c) const367       internal_char_type external_to_internal_char(external_char_type c) const {
368         LIBSEMIGROUPS_ASSERT(!_internal_is_same_as_external);
369         return uint_to_internal_char(_kb->char_to_uint(c));
370       }
371 
internal_to_external_char(internal_char_type a) const372       external_char_type internal_to_external_char(internal_char_type a) const {
373         LIBSEMIGROUPS_ASSERT(!_internal_is_same_as_external);
374         return _kb->uint_to_char(internal_char_to_uint(a));
375       }
376 
external_to_internal_string(external_string_type & w) const377       void external_to_internal_string(external_string_type& w) const {
378         if (_internal_is_same_as_external) {
379           return;
380         }
381         for (auto& a : w) {
382           a = external_to_internal_char(a);
383         }
384       }
385 
internal_to_external_string(internal_string_type & w) const386       void internal_to_external_string(internal_string_type& w) const {
387         if (_internal_is_same_as_external) {
388           return;
389         }
390         for (auto& a : w) {
391           a = internal_to_external_char(a);
392         }
393       }
394 
395      public:
396       //////////////////////////////////////////////////////////////////////////
397       // KnuthBendixImpl - methods for rules - public
398       //////////////////////////////////////////////////////////////////////////
399 
add_rule(std::string const & p,std::string const & q)400       void add_rule(std::string const& p, std::string const& q) {
401         LIBSEMIGROUPS_ASSERT(p != q);
402         auto pp = new external_string_type(p);
403         auto qq = new external_string_type(q);
404         external_to_internal_string(*pp);
405         external_to_internal_string(*qq);
406         add_rule(new_rule(pp, qq));
407       }
408 
add_rules(KnuthBendixImpl const * impl)409       void add_rules(KnuthBendixImpl const* impl) {
410         for (Rule const* rule : impl->_active_rules) {
411           add_rule(new_rule(rule));
412         }
413       }
414 
rules() const415       std::vector<std::pair<std::string, std::string>> rules() const {
416         std::vector<std::pair<external_string_type, external_string_type>>
417             rules;
418         rules.reserve(_active_rules.size());
419         for (Rule const* rule : _active_rules) {
420           internal_string_type lhs = internal_string_type(*rule->lhs());
421           internal_string_type rhs = internal_string_type(*rule->rhs());
422           internal_to_external_string(lhs);
423           internal_to_external_string(rhs);
424           rules.emplace_back(lhs, rhs);
425         }
426         std::sort(
427             rules.begin(),
428             rules.end(),
429             [](std::pair<external_string_type, external_string_type> rule1,
430                std::pair<external_string_type, external_string_type> rule2) {
431               return shortlex_compare(rule1.first, rule2.first)
432                      || (rule1.first == rule2.first
433                          && shortlex_compare(rule1.second, rule2.second));
434             });
435         return rules;
436       }
437 
nr_rules() const438       size_t nr_rules() const {
439         return _active_rules.size();
440       }
441 
442      private:
443       //////////////////////////////////////////////////////////////////////////
444       // KnuthBendixImpl - methods for rules - private
445       //////////////////////////////////////////////////////////////////////////
446 
new_rule() const447       Rule* new_rule() const {
448         ++_total_rules;
449         Rule* rule;
450         if (!_inactive_rules.empty()) {
451           rule = _inactive_rules.front();
452           rule->clear();
453           rule->set_id(_total_rules);
454           _inactive_rules.erase(_inactive_rules.begin());
455         } else {
456           rule = new Rule(this, _total_rules);
457         }
458         LIBSEMIGROUPS_ASSERT(!rule->active());
459         return rule;
460       }
461 
new_rule(internal_string_type * lhs,internal_string_type * rhs) const462       Rule* new_rule(internal_string_type* lhs,
463                      internal_string_type* rhs) const {
464         Rule* rule = new_rule();
465         delete rule->_lhs;
466         delete rule->_rhs;
467         if (shortlex_compare(rhs, lhs)) {
468           rule->_lhs = lhs;
469           rule->_rhs = rhs;
470         } else {
471           rule->_lhs = rhs;
472           rule->_rhs = lhs;
473         }
474         return rule;
475       }
476 
new_rule(Rule const * rule1) const477       Rule* new_rule(Rule const* rule1) const {
478         Rule* rule2 = new_rule();
479         rule2->_lhs->append(*rule1->lhs());  // copies lhs
480         rule2->_rhs->append(*rule1->rhs());  // copies rhs
481         return rule2;
482       }
483 
new_rule(internal_string_type::const_iterator begin_lhs,internal_string_type::const_iterator end_lhs,internal_string_type::const_iterator begin_rhs,internal_string_type::const_iterator end_rhs) const484       Rule* new_rule(internal_string_type::const_iterator begin_lhs,
485                      internal_string_type::const_iterator end_lhs,
486                      internal_string_type::const_iterator begin_rhs,
487                      internal_string_type::const_iterator end_rhs) const {
488         Rule* rule = new_rule();
489         rule->_lhs->append(begin_lhs, end_lhs);
490         rule->_rhs->append(begin_rhs, end_rhs);
491         return rule;
492       }
493 
add_rule(Rule * rule)494       void add_rule(Rule* rule) {
495         LIBSEMIGROUPS_ASSERT(*rule->lhs() != *rule->rhs());
496 #ifdef LIBSEMIGROUPS_VERBOSE
497         _max_word_length  = std::max(_max_word_length, rule->lhs()->size());
498         _max_active_rules = std::max(_max_active_rules, _active_rules.size());
499         _unique_lhs_rules.insert(*rule->lhs());
500 #endif
501         if (!_set_rules.emplace(RuleLookup(rule)).second) {
502           // The rules are not reduced, this should only happen if we are
503           // calling add_rule from outside the class (i.e. we are initialising
504           // the KnuthBendix).
505           push_stack(rule);
506           return;  // Do not activate or actually add the rule at this point
507         }
508         rule->activate();
509         _active_rules.push_back(rule);
510         if (_next_rule_it1 == _active_rules.end()) {
511           --_next_rule_it1;
512         }
513         if (_next_rule_it2 == _active_rules.end()) {
514           --_next_rule_it2;
515         }
516         _confluence_known = false;
517         if (rule->lhs()->size() < _min_length_lhs_rule) {
518           // TODO(later) this is not valid when using non-length reducing
519           // orderings (such as RECURSIVE)
520           _min_length_lhs_rule = rule->lhs()->size();
521         }
522         LIBSEMIGROUPS_ASSERT(_set_rules.size() == _active_rules.size());
523       }
524 
525       std::list<Rule const*>::iterator
remove_rule(std::list<Rule const * >::iterator it)526       remove_rule(std::list<Rule const*>::iterator it) {
527 #ifdef LIBSEMIGROUPS_VERBOSE
528         _unique_lhs_rules.erase(*((*it)->lhs()));
529 #endif
530         Rule* rule = const_cast<Rule*>(*it);
531         rule->deactivate();
532         if (it != _next_rule_it1 && it != _next_rule_it2) {
533           it = _active_rules.erase(it);
534         } else if (it == _next_rule_it1 && it != _next_rule_it2) {
535           _next_rule_it1 = _active_rules.erase(it);
536           it             = _next_rule_it1;
537         } else if (it != _next_rule_it1 && it == _next_rule_it2) {
538           _next_rule_it2 = _active_rules.erase(it);
539           it             = _next_rule_it2;
540         } else {
541           _next_rule_it1 = _active_rules.erase(it);
542           _next_rule_it2 = _next_rule_it1;
543           it             = _next_rule_it1;
544         }
545 #ifdef LIBSEMIGROUPS_DEBUG
546         LIBSEMIGROUPS_ASSERT(_set_rules.erase(RuleLookup(rule)));
547 #else
548         _set_rules.erase(RuleLookup(rule));
549 #endif
550         LIBSEMIGROUPS_ASSERT(_set_rules.size() == _active_rules.size());
551         return it;
552       }
553 
554      public:
555       //////////////////////////////////////////////////////////////////////////
556       // KnuthBendixImpl - other methods - public
557       //////////////////////////////////////////////////////////////////////////
558 
rewrite(external_string_type * w) const559       external_string_type* rewrite(external_string_type* w) const {
560         external_to_internal_string(*w);
561         internal_rewrite(w);
562         internal_to_external_string(*w);
563         return w;
564       }
565 
equal_to(external_string_type const & u,external_string_type const & v)566       bool equal_to(external_string_type const& u,
567                     external_string_type const& v) {
568         if (u == v) {
569           return true;
570         }
571         external_string_type uu = _kb->rewrite(u);
572         external_string_type vv = _kb->rewrite(v);
573         if (uu == vv) {
574           return true;
575         }
576         knuth_bendix();
577         external_to_internal_string(uu);
578         external_to_internal_string(vv);
579         internal_rewrite(&uu);
580         internal_rewrite(&vv);
581         return uu == vv;
582       }
583 
set_overlap_policy(policy::overlap p)584       void set_overlap_policy(policy::overlap p) {
585         if (p == _kb->_settings._overlap_policy
586             && _overlap_measure != nullptr) {
587           return;
588         }
589         delete _overlap_measure;
590         switch (p) {
591           case policy::overlap::ABC:
592             _overlap_measure = new ABC();
593             break;
594           case policy::overlap::AB_BC:
595             _overlap_measure = new AB_BC();
596             break;
597           case policy::overlap::MAX_AB_BC:
598             _overlap_measure = new MAX_AB_BC();
599             break;
600           default:
601             LIBSEMIGROUPS_ASSERT(false);
602         }
603       }
604 
set_internal_alphabet(std::string const & lphbt="")605       void set_internal_alphabet(std::string const& lphbt = "") {
606         _internal_is_same_as_external = true;
607         for (size_t i = 0; i < lphbt.size(); ++i) {
608           if (uint_to_internal_char(i) != lphbt[i]) {
609             _internal_is_same_as_external = false;
610             return;
611           }
612         }
613       }
614 
615      private:
616       //////////////////////////////////////////////////////////////////////////
617       // KnuthBendixImpl - other methods - private
618       //////////////////////////////////////////////////////////////////////////
619       // REWRITE_FROM_LEFT from Sims, p67
620       // Caution: this uses the assumption that rules are length reducing, if it
621       // is not, then u might not have sufficient space!
internal_rewrite(internal_string_type * u) const622       void internal_rewrite(internal_string_type* u) const {
623         if (u->size() < _min_length_lhs_rule) {
624           return;
625         }
626         internal_string_type::iterator const& v_begin = u->begin();
627         internal_string_type::iterator        v_end
628             = u->begin() + _min_length_lhs_rule - 1;
629         internal_string_type::iterator        w_begin = v_end;
630         internal_string_type::iterator const& w_end   = u->end();
631 
632         RuleLookup lookup;
633 
634         while (w_begin != w_end) {
635           *v_end = *w_begin;
636           ++v_end;
637           ++w_begin;
638 
639           auto it = _set_rules.find(lookup(v_begin, v_end));
640           if (it != _set_rules.end()) {
641             Rule const* rule = (*it).rule();
642             if (rule->lhs()->size() <= static_cast<size_t>(v_end - v_begin)) {
643               LIBSEMIGROUPS_ASSERT(detail::is_suffix(
644                   v_begin, v_end, rule->lhs()->cbegin(), rule->lhs()->cend()));
645               v_end -= rule->lhs()->size();
646               w_begin -= rule->rhs()->size();
647               detail::string_replace(
648                   w_begin, rule->rhs()->cbegin(), rule->rhs()->cend());
649             }
650           }
651           while (w_begin != w_end
652                  && _min_length_lhs_rule - 1
653                         > static_cast<size_t>((v_end - v_begin))) {
654             *v_end = *w_begin;
655             ++v_end;
656             ++w_begin;
657           }
658         }
659         u->erase(v_end - u->cbegin());
660       }
661 
662       // TEST_2 from Sims, p76
clear_stack()663       void clear_stack() {
664         while (!_stack.empty() && !_kb->stopped()) {
665 #ifdef LIBSEMIGROUPS_VERBOSE
666           _max_stack_depth = std::max(_max_stack_depth, _stack.size());
667 #endif
668 
669           Rule* rule1 = _stack.top();
670           _stack.pop();
671           LIBSEMIGROUPS_ASSERT(!rule1->active());
672           LIBSEMIGROUPS_ASSERT(*rule1->lhs() != *rule1->rhs());
673           // Rewrite both sides and reorder if necessary . . .
674           rule1->rewrite();
675 
676           if (*rule1->lhs() != *rule1->rhs()) {
677             internal_string_type const* lhs = rule1->lhs();
678             for (auto it = _active_rules.begin(); it != _active_rules.end();) {
679               Rule* rule2 = const_cast<Rule*>(*it);
680               if (rule2->lhs()->find(*lhs) != external_string_type::npos) {
681                 it = remove_rule(it);
682                 LIBSEMIGROUPS_ASSERT(*rule2->lhs() != *rule2->rhs());
683                 // rule2 is added to _inactive_rules by clear_stack
684                 _stack.emplace(rule2);
685               } else {
686                 if (rule2->rhs()->find(*lhs) != external_string_type::npos) {
687                   internal_rewrite(rule2->rhs());
688                 }
689                 ++it;
690               }
691             }
692             add_rule(rule1);
693             // rule1 is activated, we do this after removing rules that rule1
694             // makes redundant to avoid failing to insert rule1 in _set_rules
695           } else {
696             _inactive_rules.push_back(rule1);
697           }
698           if (_kb->report()) {
699             REPORT_DEFAULT(
700                 "active rules = %d, inactive rules = %d, rules defined = "
701                 "%d\n",
702                 _active_rules.size(),
703                 _inactive_rules.size(),
704                 _total_rules);
705             REPORT_VERBOSE_DEFAULT("max stack depth        = %d\n"
706                                    "max word length        = %d\n"
707                                    "max active word length = %d\n"
708                                    "max active rules       = %d\n"
709                                    "number of unique lhs   = %d\n",
710                                    _max_stack_depth,
711                                    _max_word_length,
712                                    max_active_word_length(),
713                                    _max_active_rules,
714                                    _unique_lhs_rules.size());
715           }
716         }
717       }
718       // FIXME(later) there is a possibly infinite loop here clear_stack ->
719       // push_stack -> clear_stack and so on
push_stack(Rule * rule)720       void push_stack(Rule* rule) {
721         LIBSEMIGROUPS_ASSERT(!rule->active());
722         if (*rule->lhs() != *rule->rhs()) {
723           _stack.emplace(rule);
724           clear_stack();
725         } else {
726           _inactive_rules.push_back(rule);
727         }
728       }
729 
730       // OVERLAP_2 from Sims, p77
overlap(Rule const * u,Rule const * v)731       void overlap(Rule const* u, Rule const* v) {
732         LIBSEMIGROUPS_ASSERT(u->active() && v->active());
733         auto limit
734             = u->lhs()->cend() - std::min(u->lhs()->size(), v->lhs()->size());
735         int64_t u_id = u->id();
736         int64_t v_id = v->id();
737         for (auto it = u->lhs()->cend() - 1;
738              it > limit && u_id == u->id() && v_id == v->id() && !_kb->stopped()
739              && (_kb->_settings._max_overlap == POSITIVE_INFINITY
740                  || (*_overlap_measure)(u, v, it)
741                         <= _kb->_settings._max_overlap);
742              --it) {
743           // Check if B = [it, u->lhs()->cend()) is a prefix of v->lhs()
744           if (detail::is_prefix(
745                   v->lhs()->cbegin(), v->lhs()->cend(), it, u->lhs()->cend())) {
746             // u = P_i = AB -> Q_i and v = P_j = BC -> Q_j
747             // This version of new_rule does not reorder
748             Rule* rule = new_rule(u->lhs()->cbegin(),
749                                   it,
750                                   u->rhs()->cbegin(),
751                                   u->rhs()->cend());  // rule = A -> Q_i
752             rule->_lhs->append(*v->rhs());            // rule = AQ_j -> Q_i
753             rule->_rhs->append(v->lhs()->cbegin() + (u->lhs()->cend() - it),
754                                v->lhs()->cend());  // rule = AQ_j -> Q_iC
755             // rule is reordered during rewriting in clear_stack
756             push_stack(rule);
757             // It can be that the iterator `it` is invalidated by the call to
758             // push_stack (i.e. if `u` is deactivated, then rewritten, actually
759             // changed, and reactivated) and that is the reason for the checks
760             // in the for-loop above. If this is the case, then we should stop
761             // considering the overlaps of u and v here, and note that they will
762             // be considered later, because when the rule `u` is reactivated it
763             // is added to the end of the active rules list.
764           }
765         }
766       }
767 
768      public:
769       //////////////////////////////////////////////////////////////////////////
770       // KnuthBendixImpl - main methods - public
771       //////////////////////////////////////////////////////////////////////////
772 
confluent() const773       bool confluent() const {
774         if (!_stack.empty()) {
775           return false;
776         }
777         if (!_confluence_known && (!_kb->running() || !_kb->stopped())) {
778           LIBSEMIGROUPS_ASSERT(_stack.empty());
779           _confluent        = true;
780           _confluence_known = true;
781           internal_string_type word1;
782           internal_string_type word2;
783           size_t               seen = 0;
784 
785           for (auto it1 = _active_rules.cbegin();
786                it1 != _active_rules.cend()
787                && (!_kb->running() || !_kb->stopped());
788                ++it1) {
789             Rule const* rule1 = *it1;
790             // Seems to be much faster to do this in reverse.
791             for (auto it2 = _active_rules.crbegin();
792                  it2 != _active_rules.crend()
793                  && (!_kb->running() || !_kb->stopped());
794                  ++it2) {
795               seen++;
796               Rule const* rule2 = *it2;
797               for (auto it = rule1->lhs()->cend() - 1;
798                    it >= rule1->lhs()->cbegin()
799                    && (!_kb->running() || !_kb->stopped());
800                    --it) {
801                 // Find longest common prefix of suffix B of rule1.lhs() defined
802                 // by it and R = rule2.lhs()
803                 auto prefix
804                     = detail::maximum_common_prefix(it,
805                                                     rule1->lhs()->cend(),
806                                                     rule2->lhs()->cbegin(),
807                                                     rule2->lhs()->cend());
808                 if (prefix.first == rule1->lhs()->cend()
809                     || prefix.second == rule2->lhs()->cend()) {
810                   word1.clear();
811                   word1.append(rule1->lhs()->cbegin(), it);          // A
812                   word1.append(*rule2->rhs());                       // S
813                   word1.append(prefix.first, rule1->lhs()->cend());  // D
814 
815                   word2.clear();
816                   word2.append(*rule1->rhs());                        // Q
817                   word2.append(prefix.second, rule2->lhs()->cend());  // E
818 
819                   if (word1 != word2) {
820                     internal_rewrite(&word1);
821                     internal_rewrite(&word2);
822                     if (word1 != word2) {
823                       _confluent = false;
824                       return _confluent;
825                     }
826                   }
827                 }
828               }
829             }
830             if (_kb->report()) {
831               REPORT_DEFAULT("checked %d pairs of overlaps out of %d\n",
832                              seen,
833                              _active_rules.size() * _active_rules.size());
834             }
835           }
836           if (_kb->running() && _kb->stopped()) {
837             _confluence_known = false;
838           }
839         }
840         return _confluent;
841       }
842 
843       // KBS_2 from Sims, p77-78
knuth_bendix()844       bool knuth_bendix() {
845         detail::Timer timer;
846         if (_stack.empty() && confluent() && !_kb->stopped()) {
847           // _stack can be non-empty if non-reduced rules were used to define
848           // the KnuthBendix.  If _stack is non-empty, then it means that the
849           // rules in _active_rules might not define the system.
850           REPORT_DEFAULT("the system is confluent already\n");
851           return true;
852         } else if (_active_rules.size() >= _kb->_settings._max_rules) {
853           REPORT_DEFAULT("too many rules\n");
854           return false;
855         }
856         // Reduce the rules
857         _next_rule_it1 = _active_rules.begin();
858         while (_next_rule_it1 != _active_rules.end() && !_kb->stopped()) {
859           // Copy *_next_rule_it1 and push_stack so that it is not modified by
860           // the call to clear_stack.
861           LIBSEMIGROUPS_ASSERT((*_next_rule_it1)->lhs()
862                                != (*_next_rule_it1)->rhs());
863           push_stack(new_rule(*_next_rule_it1));
864           ++_next_rule_it1;
865         }
866         _next_rule_it1 = _active_rules.begin();
867         size_t nr      = 0;
868         while (_next_rule_it1 != _active_rules.cend()
869                && _active_rules.size() < _kb->_settings._max_rules
870                && !_kb->stopped()) {
871           Rule const* rule1 = *_next_rule_it1;
872           _next_rule_it2    = _next_rule_it1;
873           ++_next_rule_it1;
874           overlap(rule1, rule1);
875           while (_next_rule_it2 != _active_rules.begin() && rule1->active()) {
876             --_next_rule_it2;
877             Rule const* rule2 = *_next_rule_it2;
878             overlap(rule1, rule2);
879             ++nr;
880             if (rule1->active() && rule2->active()) {
881               ++nr;
882               overlap(rule2, rule1);
883             }
884           }
885           if (nr > _kb->_settings._check_confluence_interval) {
886             if (confluent()) {
887               break;
888             }
889             nr = 0;
890           }
891           if (_next_rule_it1 == _active_rules.cend()) {
892             clear_stack();
893           }
894         }
895         // LIBSEMIGROUPS_ASSERT(_stack.empty());
896         // Seems that the stack can be non-empty here in KnuthBendix 12, 14, 16
897         // and maybe more
898         bool ret;
899         if (_kb->_settings._max_overlap == POSITIVE_INFINITY
900             && _kb->_settings._max_rules == POSITIVE_INFINITY
901             && !_kb->stopped()) {
902           _confluence_known = true;
903           _confluent        = true;
904           for (Rule* rule : _inactive_rules) {
905             delete rule;
906           }
907           _inactive_rules.clear();
908           ret = true;
909         } else {
910           ret = false;
911         }
912 
913         REPORT_DEFAULT("stopping with active rules = %d, inactive rules = %d, "
914                        "rules defined = %d\n",
915                        _active_rules.size(),
916                        _inactive_rules.size(),
917                        _total_rules);
918         REPORT_VERBOSE_DEFAULT("max stack depth = %d", _max_stack_depth);
919         REPORT_TIME(timer);
920         return ret;
921       }
922 
knuth_bendix_by_overlap_length()923       void knuth_bendix_by_overlap_length() {
924         detail::Timer timer;
925         size_t        max_overlap = _kb->_settings._max_overlap;
926         size_t        check_confluence_interval
927             = _kb->_settings._check_confluence_interval;
928         _kb->_settings._max_overlap               = 1;
929         _kb->_settings._check_confluence_interval = POSITIVE_INFINITY;
930         while (!_kb->stopped() && !confluent()) {
931           knuth_bendix();
932           _kb->_settings._max_overlap++;
933         }
934         _kb->_settings._max_overlap               = max_overlap;
935         _kb->_settings._check_confluence_interval = check_confluence_interval;
936         REPORT_TIME(timer);
937       }
938 
939      private:
940       ////////////////////////////////////////////////////////////////////////
941       // KnuthBendixImpl - iterators - private
942       ////////////////////////////////////////////////////////////////////////
943 
944       using external_rule_type
945           = std::pair<external_string_type, external_string_type>;
946 
947       struct IteratorMethods {
948         external_rule_type
indirectionlibsemigroups::fpsemigroup::KnuthBendix::KnuthBendixImpl::IteratorMethods949         indirection(KnuthBendixImpl*                       kbi,
950                     std::list<Rule const*>::const_iterator it) const {
951           auto lhs = std::string(*(*it)->lhs());
952           auto rhs = std::string(*(*it)->rhs());
953           kbi->internal_to_external_string(lhs);
954           kbi->internal_to_external_string(rhs);
955 
956           return std::make_pair(lhs, rhs);
957         }
958 
959         // Not defined!
960         external_rule_type const*
addressoflibsemigroups::fpsemigroup::KnuthBendix::KnuthBendixImpl::IteratorMethods961         addressof(KnuthBendixImpl*,
962                   std::list<Rule const*>::const_iterator) const {
963           return nullptr;
964         }
965       };
966 
967      public:
968       ////////////////////////////////////////////////////////////////////////
969       // KnuthBendixImpl - iterators - public
970       ////////////////////////////////////////////////////////////////////////
971 
972       /*using const_iterator = detail::ConstIteratorStateful<
973           KnuthBendixImpl const*,                  // state
974           IteratorMethods,                        // methods
975           std::list<Rule const*>::const_iterator,  // wrapped iterator
976           external_rule_type,                      // external value type
977           external_rule_type,                      // external const pointer
978           external_rule_type&&                     // external const reference
979           >;
980 
981       const_iterator cbegin_active_rules() const {
982         return const_iterator(this, _active_rules.cbegin());
983       }
984 
985       const_iterator cend_active_rules() const {
986         return const_iterator(this, _active_rules.cend());
987       }*/
988 
989      private:
990       ////////////////////////////////////////////////////////////////////////
991       // KnuthBendixImpl - data - private
992       ////////////////////////////////////////////////////////////////////////
993 
994       std::list<Rule const*>           _active_rules;
995       mutable std::atomic<bool>        _confluent;
996       mutable std::atomic<bool>        _confluence_known;
997       mutable std::list<Rule*>         _inactive_rules;
998       bool                             _internal_is_same_as_external;
999       KnuthBendix*                     _kb;
1000       size_t                           _min_length_lhs_rule;
1001       std::list<Rule const*>::iterator _next_rule_it1;
1002       std::list<Rule const*>::iterator _next_rule_it2;
1003       OverlapMeasure*                  _overlap_measure;
1004       std::set<RuleLookup>             _set_rules;
1005       std::stack<Rule*>                _stack;
1006       internal_string_type*            _tmp_word1;
1007       internal_string_type*            _tmp_word2;
1008       mutable size_t                   _total_rules;
1009 
1010 #ifdef LIBSEMIGROUPS_VERBOSE
1011       //////////////////////////////////////////////////////////////////////////
1012       // ./configure --enable-verbose functions
1013       //////////////////////////////////////////////////////////////////////////
1014 
max_active_word_length()1015       size_t max_active_word_length() {
1016         auto comp = [](Rule const* p, Rule const* q) -> bool {
1017           return p->lhs()->size() < q->lhs()->size();
1018         };
1019         auto max = std::max_element(
1020             _active_rules.cbegin(), _active_rules.cend(), comp);
1021         if (max != _active_rules.cend()) {
1022           _max_active_word_length
1023               = std::max(_max_active_word_length, (*max)->lhs()->size());
1024         }
1025         return _max_active_word_length;
1026       }
1027       size_t                                   _max_stack_depth;
1028       size_t                                   _max_word_length;
1029       size_t                                   _max_active_word_length;
1030       size_t                                   _max_active_rules;
1031       std::unordered_set<internal_string_type> _unique_lhs_rules;
1032 #endif
1033     };  // struct KnuthBendixImpl
1034   }     // namespace fpsemigroup
1035 }  // namespace libsemigroups
1036 #endif  // LIBSEMIGROUPS_SRC_KNUTH_BENDIX_IMPL_HPP_
1037