1package suffixtree
2
3import "sort"
4
5type Match struct {
6	Ps  []Pos
7	Len Pos
8}
9
10type posList struct {
11	positions []Pos
12}
13
14func newPosList() *posList {
15	return &posList{make([]Pos, 0)}
16}
17
18func (p *posList) append(p2 *posList) {
19	p.positions = append(p.positions, p2.positions...)
20}
21
22func (p *posList) add(pos Pos) {
23	p.positions = append(p.positions, pos)
24}
25
26type contextList struct {
27	lists map[int]*posList
28}
29
30func newContextList() *contextList {
31	return &contextList{make(map[int]*posList)}
32}
33
34func (c *contextList) getAll() []Pos {
35	keys := make([]int, 0, len(c.lists))
36	for k := range c.lists {
37		keys = append(keys, k)
38	}
39	sort.Ints(keys)
40	var ps []Pos
41	for _, k := range keys {
42		ps = append(ps, c.lists[k].positions...)
43	}
44	return ps
45}
46
47func (c *contextList) append(c2 *contextList) {
48	for lc, pl := range c2.lists {
49		if _, ok := c.lists[lc]; ok {
50			c.lists[lc].append(pl)
51		} else {
52			c.lists[lc] = pl
53		}
54	}
55}
56
57// FindDuplOver find pairs of maximal duplicities over a threshold
58// length.
59func (t *STree) FindDuplOver(threshold int) <-chan Match {
60	auxTran := newTran(0, 0, t.root)
61	ch := make(chan Match)
62	go func() {
63		walkTrans(auxTran, 0, threshold, ch)
64		close(ch)
65	}()
66	return ch
67}
68
69func walkTrans(parent *tran, length, threshold int, ch chan<- Match) *contextList {
70	s := parent.state
71
72	cl := newContextList()
73
74	if len(s.trans) == 0 {
75		pl := newPosList()
76		start := parent.end + 1 - Pos(length)
77		pl.add(start)
78		ch := 0
79		if start > 0 {
80			ch = s.tree.data[start-1].Val()
81		}
82		cl.lists[ch] = pl
83		return cl
84	}
85
86	for _, t := range s.trans {
87		ln := length + t.len()
88		cl2 := walkTrans(t, ln, threshold, ch)
89		if ln >= threshold {
90			cl.append(cl2)
91		}
92	}
93	if length >= threshold && len(cl.lists) > 1 {
94		m := Match{cl.getAll(), Pos(length)}
95		ch <- m
96	}
97	return cl
98}
99