1import functools
2
3from guppy.etc.RE_Rect import chooserects
4from guppy.etc.IterPermute import iterpermute
5
6
7class InfiniteError(Exception):
8    pass
9
10
11class WordsMemo:
12    def __init__(self, re, ch):
13        self.re = re
14        self.ch = ch
15        self.xs = {}
16        self.N = 0
17
18    def get_words_of_length(self, N):
19        # Return a list of words of length up to N
20        if N not in self.xs:
21            self.xs[N] = self.re.get_words_of_length_memoized(N, self)
22        return self.xs[N]
23
24    def get_words_of_length_upto(self, N):
25        # Return all words of length up to N, in the form
26        # [(0, <list of words of length 0>),
27        #  (1, <list of words of length 0>),
28        #  ...]
29        xsu = []
30        for i in range(N+1):
31            xs = self.get_words_of_length(i)
32            if xs:
33                xsu.append((i, xs))
34        return xsu
35
36
37REBASE = tuple
38
39
40class RE(REBASE):
41    # Regular expression nodes
42    # The operators are choosen to be compatible with Pythonic standards:
43    #   o sets               : using | for union
44    #   o strings, sequences : using + for concatenation.
45    #
46    # This differs from mathematical presentations of regular
47    # expressions where + is the union, but it seemed more important
48    # to not confuse the Python usage.
49
50    # There are also operators for closure x*, x+ that can not be
51    # represented directly in Python expressions and these were choosen
52    # to use a function call syntax.
53    # The following table summarizes the operators.
54
55    #   RE node expr    re lib          mathematical    name
56
57    #   x + y           x y             x y             Concatenation
58    #   x | y           x | y           x + y           Union
59    #   x('*')          x*              x*              Kleene closure
60    #   x('+')          x+              x+              Positive closure
61    #   x('?')          x?
62
63    _re_special = r'.^$*+?{}\[]|()'
64
65    def __add__(a, b):
66        if isinstance(b, RE):
67            return concat(a, b)
68        else:
69            return Concatenation(a, Single(b))
70
71    def __call__(a, *args, **kwds):
72        if not kwds:
73            if args == ('*',):
74                return KleeneClosure(a)
75            elif args == ('+',):
76                return PositiveClosure(a)
77            elif args == ('?',):
78                return EpsilonOrOne(a)
79        raise ValueError(
80            "Argument to regular expression must be '*' or '+' or '?'")
81
82    def __hash__(self):
83        return hash((self._name, tuple(self)))
84
85    def __eq__(a, b):
86        return (a._name == b._name and
87                tuple(a) == tuple(b))
88
89    def __lt__(a, b):
90        if a._name == b._name:
91            return tuple(a) < tuple(b)
92        else:
93            return a._name < b._name
94
95    def __or__(a, b):
96        return Union(a, b)
97
98    def get_num_closures(self):
99        ns = 0
100        for ch in self:
101            ns += ch.get_num_closures()
102        return ns
103
104    def get_num_syms(self):
105        ns = 0
106        for ch in self:
107            ns += ch.get_num_syms()
108        return ns
109
110    def get_sum_sym_lengths(self):
111        ns = 0
112        for ch in self:
113            ns += ch.get_sum_sym_lengths()
114        return ns
115
116    def get_words_memo(self):
117        ch = [x.get_words_memo() for x in self]
118        return WordsMemo(self, ch)
119
120    def get_words_of_length(self, N):
121        xs = self.get_words_memo()
122        return xs.get_words_of_length(N)
123
124    def mapchildren(self, f):
125        return self.__class__(*[f(x) for x in self])
126
127    def regexpform(self):
128        return self.mappedrepr(regexpname)
129
130    def reversed(self):
131        return self.mapchildren(lambda x: x.reversed())
132
133    def rempretup(self):
134        def f(x):
135            if isinstance(x, Seq):
136                if x is not Epsilon and isinstance(x[0], tuple):
137                    ws = x[1:]
138                    return Seq(*ws)
139                else:
140                    return x
141            return x.mapchildren(f)
142
143        return f(self)
144
145    def seqatoms(self):
146        sa = []
147        self.apseqatoms(sa.append)
148        return sa
149
150    def sequni(self):
151        d = {}
152        us = []
153
154        def ap(x):
155            if x not in d:
156                d[x] = 1
157                us.append(x)
158        self.apseq(ap)
159        return Union(*us)
160
161    def shform(self, conc=' '):
162        r = self.mappedrepr(regexpname)
163        if conc != ' ':
164            r = conc.join(r.split(' '))
165        return r
166
167    def simplified(self, *a, **k):
168        return self
169
170    def simulform(self):
171        def f(x):
172            if x == '':
173                return '()'
174            return str(x)
175        return self.mappedrepr(f)
176
177
178def regexpname(s):
179    if s == '':
180        return '()'
181    special = RE._re_special
182    ren = []
183    for c in str(s):
184        if c in special+"', ":
185            #c = r'\%s'%c
186            c = ''
187        ren.append(c)
188    return ''.join(ren)
189
190
191class Seq(RE):
192    _priority = 0
193    _name = 'Seq'
194
195    def __new__(clas, *symbols):
196        if not symbols:
197            return Epsilon
198        return REBASE.__new__(clas, symbols)
199
200    def __repr__(self):
201        return '%s(%s)' % (self.__class__.__name__, ', '.join(['%r' % (x,) for x in self]))
202
203    def apseq(self, ap):
204        ap(self)
205
206    def apseqatoms(self, ap):
207        for x in self:
208            ap(Single(x))
209
210    def get_num_closures(self):
211        return 0
212
213    def get_num_syms(self):
214        return len(self)
215
216    def get_sum_sym_lengths(self):
217        s = 0
218        for x in self:
219            s += len(str(x))
220        return s
221
222    def get_words_memo(self):
223        return WordsMemo(self, ())
224
225    def get_words_of_length_memoized(self, N, memo):
226        if N == len(self):
227            return [self]
228        else:
229            return []
230
231    def limited(self, N):
232        return self
233
234    def mappedrepr(self, f):
235        if not self:
236            return f('')
237        return ' '.join(['%s' % (f(x),) for x in self])
238
239    def reversed(self):
240        r = list(self)
241        r.reverse()
242        return self.__class__(*r)
243
244    def unionsplitted(self):
245        return [self]
246
247
248def Single(symbol):
249    return REBASE.__new__(Seq, (symbol,))
250
251
252Epsilon = REBASE.__new__(Seq, ())
253
254
255def concat(*args):
256    args = [x for x in args if x is not Epsilon]
257    if len(args) < 2:
258        if not args:
259            return Epsilon
260        return args[0]
261    return REBASE.__new__(Concatenation, args)
262
263
264class Concatenation(RE):
265    _priority = 2
266    _name = 'Concat'
267
268    def __new__(clas, *args):
269        if len(args) < 2:
270            if not args:
271                return Epsilon
272            return args[0]
273        return REBASE.__new__(clas, args)
274
275    def __repr__(self):
276        rs = []
277        for ch in self:
278            r = '%r' % (ch,)
279            if ch._priority > self._priority:
280                r = '(%s)' % (r,)
281            rs.append(r)
282        return ' + '.join(rs)
283
284    def apseq(self, ap):
285        uns = [x.sequni() for x in self]
286        ixs = [0]*len(uns)
287        while 1:
288            xs = []
289            for (i, us) in enumerate(uns):
290                for x in us[ixs[i]]:
291                    if x is not Epsilon:
292                        xs.append(x)
293            ap(Seq(*xs))
294            j = 0
295            for j, ix in enumerate(ixs):
296                ix += 1
297                if ix >= len(uns[j]):
298                    ix = 0
299                ixs[j] = ix
300                if ix != 0:
301                    break
302            else:
303                break
304
305    def apseqatoms(self, ap):
306        for x in self:
307            x.apseqatoms(ap)
308
309    def get_words_of_length_memoized(self, N, memo):
310        chxs = []
311        for ch in memo.ch:
312            chxs.append(ch.get_words_of_length_upto(N))
313        xs = []
314        seen = {}
315
316        def ads(xx, i, n):
317            if i == len(chxs):
318                if n == N:
319                    for toconc in iterpermute(*xx):
320                        conc = simple_Concatenation(toconc)
321                        if conc not in seen:
322                            xs.append(conc)
323                            seen[conc] = 1
324            else:
325                for m, x in chxs[i]:
326                    if n + m <= N:
327                        ads(xx + [x], i + 1, n + m)
328
329        ads([], 0, 0)
330        return xs
331
332    def limited(self, N):
333        return Concatenation(*[x.limited(N) for x in self])
334
335    def mappedrepr(self, f):
336        rs = []
337        for ch in self:
338            r = ch.mappedrepr(f)
339            if ch._priority > self._priority:
340                r = '(%s)' % (r,)
341            rs.append(r)
342        return ' '.join(rs)
343
344    def reversed(self):
345        r = [x.reversed() for x in self]
346        r.reverse()
347        return self.__class__(*r)
348
349    def simplified(self, *a, **k):
350        conc = [x.simplified(*a, **k) for x in self]
351        sa = []
352        for c in conc:
353            for a in c.seqatoms():
354                sa.append(a)
355        return simple_Concatenation(sa)
356
357    def unionsplitted(self):
358        runs = []
359        uns = []
360        for (i, x) in enumerate(self):
361            us = x.unionsplitted()
362            if len(us) > 1:
363                uns.append((i, us))
364        if not uns:
365            return [self]
366        ixs = [0]*len(uns)
367        ch = list(self)
368        while 1:
369            xs = []
370            i0 = 0
371            for j, (i, us) in enumerate(uns):
372                xs.extend(ch[i0:i])
373                ix = ixs[j]
374                xs.append(us[ix])
375                i0 = i + 1
376            xs.extend(ch[i0:])
377            runs.append(concat(*xs))
378
379            j = 0
380            for j, ix in enumerate(ixs):
381                ix += 1
382                if ix >= len(uns[j][1]):
383                    ix = 0
384                ixs[j] = ix
385                if ix != 0:
386                    break
387            else:
388                return runs
389
390
391class SimplifiedConcatenation(Concatenation):
392    def simplified(self, *a, **k):
393        return self
394
395
396def conclosure(conc):
397    # Simplification noted Mar 5 2005
398    # Simplify ... b b* ... or ... b* b ... to ... b+ ...
399    # conc is a sequence of regular expressions
400
401    seen = {}
402    nconc = []
403    w0 = None
404    for w in conc:
405        if w0 is not None:
406            if (w._name == '*' and      # Not isinstance(KleeneClosure), would catch PositiveClosure
407                    w[0] == w0):
408                w = PositiveClosure(w0)
409            elif (w0._name == '*' and
410                  w0[0] == w):
411                w = PositiveClosure(w)
412            else:
413                if w0 is not None:
414                    nconc.append(w0)
415        w0 = w
416    if w0 is not None:
417        nconc.append(w0)
418    return nconc
419
420
421def simple_Concatenation(conc):
422    if len(conc) > 1:
423        conc0 = conc
424        conc = conclosure(conc)
425    nconc = []
426    i = 0
427    j = 0
428    while i < len(conc):
429        e = conc[i]
430        if not isinstance(e, Seq):
431            i += 1
432            nconc.append(e)
433            continue
434        j = i
435        while j < len(conc):
436            if not isinstance(conc[j], Seq):
437                break
438            j += 1
439        if j == i + 1:
440            nconc.append(e)
441        else:
442            syms = []
443            for k in range(i, j):
444                e = conc[k]
445                syms.extend(list(e))
446            nconc.append(Seq(*syms))
447        i = j
448    if len(nconc) > 1:
449        return Concatenation(*nconc)
450    elif nconc:
451        return nconc[0]
452    else:
453        return Epsilon
454
455
456gauges = [
457    lambda x:x.get_num_syms(),
458    lambda x:x.get_num_closures(),
459    lambda x:x.get_sum_sym_lengths()
460]
461
462
463def simpleunion(lines):
464    choosen = chooserects(lines, gauges)
465    have_epsilon = 0
466    while 1:
467        if len(choosen) == 1 and (choosen[0].width == 0 or len(choosen[0].lines) == 1):
468            us = []
469            for line in choosen[0].lines:
470                if line:
471                    us.append(line)
472                else:
473                    have_epsilon = 1
474            break
475        us = []
476        for r in choosen:
477            conc = r.get_common_part()
478            olines = r.get_uncommons()
479            u = simpleunion(olines)
480            if u is not Epsilon:
481                if r.dir == -1:
482                    conc = [u]+conc
483                else:
484                    conc = conc + [u]
485            if conc:
486                us.append(conc)
487            else:
488                have_epsilon = 1
489            assert not isinstance(us[-1], str)
490
491        choosen = chooserects(us, gauges)
492
493    if len(us) > 1:
494        nus = [simple_Concatenation(line) for line in us]
495        u = SimplifiedUnion(*nus)
496    elif us:
497        u = simple_Concatenation(us[0])
498    else:
499        u = None
500    if have_epsilon:
501        if u is not None:
502            u = simple_EpsilonOrOne(u)
503        else:
504            u = Epsilon
505
506    return u
507
508
509class Union(RE):
510    _priority = 3
511    _name = 'Union'
512
513    def __new__(clas, *args):
514        return REBASE.__new__(clas, args)
515
516    def __repr__(self):
517        rs = []
518        for ch in self:
519            r = '%r' % (ch,)
520            if ch._priority > self._priority:
521                r = '(%s)' % r
522            rs.append(r)
523        return ' | '.join(rs)
524
525    def apseq(self, ap):
526        for c in self:
527            c.apseq(ap)
528
529    def apseqatoms(self, ap):
530        for x in self:
531            x.apseqatoms(ap)
532
533    def get_words_of_length_memoized(self, N, memo):
534        xs = []
535        seen = {}
536        for ch in memo.ch:
537            for x in ch.get_words_of_length(N):
538                if x not in seen:
539                    seen[x] = 1
540                    xs.append(x)
541        return xs
542
543    def limited(self, N):
544        uni = [x.limited(N) for x in self]
545        for i, x in enumerate(uni):
546            if x is not self[i]:
547                return self.__class__(*uni)
548        return self
549
550    def mappedrepr(self, f):
551        rs = []
552        for ch in self:
553            r = '%s' % (ch.mappedrepr(f),)
554            if ch._priority > self._priority:
555                r = '(%s)' % r
556            rs.append(r)
557        return ' | '.join(rs)
558
559    def simplified(self, args=None, *a, **k):
560        if args is None:
561            args = [x.simplified() for x in self.unionsplitted()]
562            #args = [x for x in self.unionsplitted()]
563
564        # Create a simplfied union
565        # Assuming args are simplified, non-unions
566
567        ch = [a.seqatoms() for a in args]
568        return simpleunion(ch)
569
570    def unionsplitted(self):
571        us = []
572        for x in self:
573            us.extend(list(x.unionsplitted()))
574        return us
575
576
577class SimplifiedUnion(Union):
578    def simplified(self, *a, **k):
579        return self
580
581
582class Called(RE):
583    _priority = 1
584
585    def __new__(clas, arg):
586        return REBASE.__new__(clas, (arg,))
587
588    def __repr__(self):
589        ch = self[0]
590        r = '%r' % (ch,)
591        if ch._priority > self._priority:
592            r = '(%s)' % r
593        return "%s(%r)" % (r, self._name)
594
595    def apseqatoms(self, ap):
596        ap(self)
597
598    def get_num_closures(self):
599        return 1 + self[0].get_num_closures()
600
601    def mappedrepr(self, f):
602        ch = self[0]
603        r = ch.mappedrepr(f)
604        if (ch._priority > self._priority
605                or isinstance(ch, Seq) and len(ch) > 1):
606            r = '(%s)' % r
607        return "%s%s" % (r, self._name)
608
609    def simplified(self, *a, **k):
610        return self.__class__(self[0].simplified(*a, **k))
611
612
613class Closure(Called):
614    def get_words_of_length_memoized(self, N, memo):
615        if N == 0:
616            return [Epsilon]
617        if N == 1:
618            return memo.ch[0].get_words_of_length(1)
619        xs = []
620        seen = {}
621        for i in range(1, N):
622            a = memo.get_words_of_length(i)
623            b = memo.get_words_of_length(N-i)
624            for ai in a:
625                for bi in b:
626                    aibi = simple_Concatenation((ai, bi))
627                    if aibi not in seen:
628                        xs.append(aibi)
629                        seen[aibi] = 1
630        for x in memo.ch[0].get_words_of_length(N):
631            if x not in seen:
632                xs.append(x)
633                seen[x] = 1
634        return xs
635
636    def unionsplitted(self):
637        return [self]
638
639
640class KleeneClosure(Closure):
641    _name = '*'
642
643    def apseq(self, ap):
644        raise InfiniteError(
645            'apseq: Regular expression is infinite: contains a Kleene Closure')
646
647    def limited(self, N):
648        if N == 0:
649            return Epsilon
650        cl = self[0].limited(N)
651        uni = []
652        for i in range(N+1):
653            toconc = [cl]*i
654            uni.append(Concatenation(*toconc))
655        return Union(*uni)
656
657    def simplified(self, *a, **k):
658        return simple_KleeneClosure(self[0].simplified(*a, **k))
659
660
661def simple_KleeneClosure(x):
662    # (b+)* -> b*
663    if x._name == '+':
664        return simple_KleeneClosure(x[0])
665    return KleeneClosure(x)
666
667
668class PositiveClosure(Closure):
669    _name = '+'
670
671    def apseq(self, ap):
672        raise InfiniteError(
673            'apseq: Regular expression is infinite: contains a Positive Closure')
674
675    def apseqatoms(self, ap):
676        self[0].apseqatoms(ap)
677        simple_KleeneClosure(self[0]).apseqatoms(ap)
678
679    def get_words_of_length_memoized(self, N, memo):
680        if N <= 1:
681            return memo.ch[0].get_words_of_length(N)
682        return Closure.get_words_of_length_memoized(self, N, memo)
683
684    def limited(self, N):
685        a = self[0].limited(N)
686        b = KleeneClosure(self[0]).limited(N)
687        return Concatenation(a, b)
688
689
690class EpsilonOrOne(Called):
691    _name = '?'
692
693    def apseq(self, ap):
694        ap(Epsilon)
695        self[0].apseq(ap)
696
697    def get_words_of_length_memoized(self, N, memo):
698        if N == 0:
699            return [Epsilon]
700        return memo.ch[0].get_words_of_length(N)
701
702    def limited(self, N):
703        x = self[0].limited(N)
704        if x is not self[0]:
705            self = self.__class__(x)
706        return self
707
708    def simplified(self, *a, **k):
709        return simple_EpsilonOrOne(self[0].simplified(*a, **k))
710
711    def unionsplitted(self):
712        return [Epsilon] + list(self[0].unionsplitted())
713
714
715def simple_EpsilonOrOne(x):
716    # (a+)? -> a*
717
718    if x._name == '+':
719        return simple_KleeneClosure(x)
720
721    # (a*)? -> a*
722    if x._name == '*':
723        return x
724
725    return EpsilonOrOne(x)
726
727
728class RegularSystem:
729
730    def __init__(self, table, Start, final_states):
731        self.table = table
732        self.Start = Start
733        self.Final = '358f0eca5c34bacdfbf6a8ac0ccf84bc'
734        self.final_states = final_states
735
736    def pp(self):
737
738        def statename(state):
739            try:
740                name = self.names[state]
741            except KeyError:
742                name = str(state)
743            return name
744
745        def transname(trans):
746            name = trans.simulform()
747            if trans._priority > 1:
748                name = '(%s)' % (name,)
749            return name
750
751        self.setup_names()
752
753        X = self.X
754
755        xs = [self.Start]+self.order
756        xs.append(self.Final)
757        for Xk in xs:
758            if Xk not in X:
759                continue
760            print('%3s = ' % (statename(Xk),), end=' ')
761            Tk = X[Xk]
762            es = []
763            for Xj in xs:
764                if Xj in Tk:
765                    es.append('%s %s' % (transname(Tk[Xj]), statename(Xj)))
766            if es:
767                print(' | '.join(es))
768            else:
769                print()
770
771    def setup_equations(self):
772        table = self.table
773        final_states = self.final_states
774        Final = self.Final
775        self.X = X = {Final: {}}
776        for Xi, transitions in list(table.items()):
777            X[Xi] = Ti = {}
778            for (symbol, Xj) in list(transitions.items()):
779                Ti.setdefault(Xj, []).append(Single(symbol))
780            for Xj, Aij in list(Ti.items()):
781                if len(Aij) > 1:
782                    Aij.sort()
783                    Aij = Union(*Aij)
784                else:
785                    Aij = Aij[0]
786                Ti[Xj] = Aij
787            if Xi in final_states:
788                Ti[Final] = Epsilon
789
790    def setup_order(self):
791        def dists(X, start):
792            i = 0
793            S = {start: i}
794            news = [start]
795            while news:
796                oldnews = news
797                news = []
798                i += 1
799                for s in oldnews:
800                    if s not in X:
801                        continue
802                    for t in X[s]:
803                        if t not in S:
804                            news.append(t)
805                            S[t] = i
806            return S
807
808        def start_distance(x):
809            return start_dists[x]
810
811        def sumt(f):
812            memo = {}
813
814            def g(x):
815                if x in memo:
816                    return memo[x]
817                s = 0.0
818                for y in X[x]:
819                    s += f(y)
820                memo[x] = s
821                return s
822            return g
823
824        def cmp3(x, y):
825            # Comparison for the sorting of equation solving order
826            # First in list = solved last
827
828            if x is y:
829                return 0
830
831            # Equations with more terms are resolved later
832            c = len(X[y]) - len(X[x])
833            if c:
834                return c
835
836            # The equations with terms more distant from start node will be resolved earlier
837            i = 0
838            while i < 10:  # 4 was enough with tests so far at Feb 24 2005
839                try:
840                    f = sumdists[i]
841                except IndexError:
842                    f = sumt(sumdists[i-1])
843                    sumdists.append(f)
844                c = f(x) - f(y)
845                if c:
846                    return c
847                i += 1
848
849            return (x > y) - (x < y)
850
851        sumdists = [start_distance]
852        X = self.X
853        Start = self.Start
854        Final = self.Final
855        start_dists = dists(X, Start)
856        order = [x for x in start_dists if x is not Start and x is not Final]
857        order.sort(key=functools.cmp_to_key(cmp3))
858        self.order = order
859
860    def setup_names(self):
861        try:
862            self.order
863        except AttributeError:
864            self.setup_order()
865        self.names = {}
866        self.names[self.Start] = 'X0'
867
868        for i, s in enumerate(self.order):
869            self.names[s] = 'X%d' % (i+1)
870        self.names[self.Final] = 'Final'
871
872    def solve(self):
873        # Set up equation system
874
875        self.setup_equations()
876        self.setup_order()
877
878        X = self.X
879        Start = self.Start
880        Final = self.Final
881        todo = list(self.order)
882
883        # Solve equation system
884
885        while todo:
886            Xk = todo.pop()
887            Tk = X[Xk]
888
889            if Xk in Tk:
890                # Recursive equation
891                # Eliminate Akk Xk, using Adler's theorem
892                # Given:
893                # Xk = Ak0 X0 | ... Akk Xk |.. Akn Xkn
894                # we get:
895                # Xk = Akk* (Ak0 X0 | ... <no Xk> ... | Akn Xn)
896                # which we evaluate to:
897                # Xk = Bk0 X0 | ... Bkn Xn
898                # where coefficients get the new values
899                # Bki := Akk* Aki
900
901                Akk = Tk[Xk]
902                del Tk[Xk]
903
904                AkkStar = Akk('*')
905                for Xi, Aki in list(Tk.items()):
906                    Bki = AkkStar + Aki
907                    Tk[Xi] = Bki
908
909            # Substitute Xk in each other equation in X
910            # containing Xk, except eqv. Xk itself, which will not be used any more..
911
912            del X[Xk]
913
914            for Xj, Tj in list(X.items()):
915                Bjk = Tj.get(Xk)
916                if Bjk is None:
917                    continue
918                del Tj[Xk]
919                for Xji, Tk_Xji in list(Tk.items()):
920                    Cji = (Bjk + Tk_Xji)
921                    Bji = Tj.get(Xji)
922                    if Bji is not None:
923                        Cji = Bji | Cji
924                    Tj[Xji] = Cji
925
926        # The equation system is now solved
927        # The result is in Final term of Start equation
928
929        return X[Start][Final]
930
931
932Nothing = Union()
933
934
935def SolveFSA(fsa):
936    RS = RegularSystem(fsa.table, fsa.start_state, fsa.final_states)
937    return RS.solve()
938