1 /*
2     Copyright © 2011-13 Qtrac Ltd. All rights reserved.
3     This program or module is free software: you can redistribute it
4     and/or modify it under the terms of the GNU General Public License
5     as published by the Free Software Foundation, either version 2 of
6     the License, or (at your option) any later version. This program is
7     distributed in the hope that it will be useful, but WITHOUT ANY
8     WARRANTY; without even the implied warranty of MERCHANTABILITY or
9     FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
10     for more details.
11 */
12 
13 #include "sequence_matcher.hpp"
14 #include <QSet>
15 
16 
computeRanges(SequenceMatcher * matcher)17 RangesPair computeRanges(SequenceMatcher *matcher)
18 {
19     Ranges ranges1;
20     Ranges ranges2;
21     QList<Match> matches = matcher->get_matching_blocks();
22     foreach (const Match &match, matches) {
23         if (match.size == 0)
24             continue;
25         ranges1 |= unorderedRange(match.i + match.size, match.i);
26         ranges2 |= unorderedRange(match.j + match.size, match.j);
27     }
28     return qMakePair(ranges1, ranges2);
29 }
30 
31 
invertRanges(const Ranges & ranges1,int length1,const Ranges & ranges2,int length2)32 RangesPair invertRanges(const Ranges &ranges1, int length1,
33                         const Ranges &ranges2, int length2)
34 {
35     const Ranges newRanges1 = unorderedRange(length1) - ranges1;
36     const Ranges newRanges2 = unorderedRange(length2) - ranges2;
37     return qMakePair(newRanges1, newRanges2);
38 }
39 
40 
matchLessThan(const Match & a,const Match & b)41 bool matchLessThan(const Match &a, const Match &b)
42 {
43     if (a.i != b.i)
44         return a.i < b.i;
45     if (a.j != b.j)
46         return a.j < b.j;
47     return a.size < b.size;
48 }
49 
50 
51 struct Offsets
52 {
OffsetsOffsets53     Offsets(int a_low_=0, int a_high_=0, int b_low_=0, int b_high_=0)
54         : a_low(a_low_), a_high(a_high_), b_low(b_low_), b_high(b_high_) {}
55 
56     int a_low;
57     int a_high;
58     int b_low;
59     int b_high;
60 };
61 
62 
SequenceMatcher(const Sequence & a_,const Sequence & b_)63 SequenceMatcher::SequenceMatcher(const Sequence &a_, const Sequence &b_)
64     : a(a_), b(b_)
65 {
66     set_sequences(a, b);
67 }
68 
69 
set_sequence1(const Sequence & sequence)70 void SequenceMatcher::set_sequence1(const Sequence &sequence)
71 {
72     a = sequence;
73     matching_blocks.clear();
74 }
75 
76 
set_sequence2(const Sequence & sequence)77 void SequenceMatcher::set_sequence2(const Sequence &sequence)
78 {
79     b = sequence;
80     matching_blocks.clear();
81     chain_b();
82 }
83 
84 
chain_b()85 void SequenceMatcher::chain_b()
86 {
87     const int N = b.count();
88     b2j.clear();
89     QSet<Element> popular;
90 
91     for (int i = 0; i < N; ++i) {
92         const Element &element = b.at(i);
93         if (b2j.contains(element)) {
94             QList<int> &indexes = b2j[element];
95             if (N >= 200 && indexes.count() * 100 > N) {
96                 popular.insert(element);
97                 indexes.clear();
98             }
99             else
100                 indexes.append(i);
101         }
102         else
103             b2j[element].append(i);
104     }
105 
106     foreach (const Element &element, popular)
107         b2j.remove(element);
108 }
109 
110 
get_matching_blocks()111 QList<Match> SequenceMatcher::get_matching_blocks()
112 {
113     if (!matching_blocks.isEmpty())
114         return matching_blocks;
115 
116     const int LengthA = a.count();
117     const int LengthB = b.count();
118     QList<Offsets> offsets;
119     offsets << Offsets(0, LengthA, 0, LengthB);
120     while (!offsets.isEmpty()) {
121         const Offsets &offset = offsets.takeLast();
122         const int a_low = offset.a_low;
123         const int a_high = offset.a_high;
124         const int b_low = offset.b_low;
125         const int b_high = offset.b_high;
126         const Match match = find_longest_match(a_low, a_high, b_low,
127                                                b_high);
128         const int i = match.i;
129         const int j = match.j;
130         const int k = match.size;
131         if (k) {
132             matching_blocks.append(match);
133             if (a_low < i && b_low < j)
134                 offsets.append(Offsets(a_low, i, b_low, j));
135             if (i + k < a_high && j + k < b_high)
136                 offsets.append(Offsets(i + k, a_high, j + k, b_high));
137         }
138     }
139     qSort(matching_blocks.begin(), matching_blocks.end(), matchLessThan);
140 
141     int i1 = 0;
142     int j1 = 0;
143     int k1 = 0;
144     QList<Match> non_adjacent;
145     foreach (const Match match, matching_blocks) {
146         const int i2 = match.i;
147         const int j2 = match.j;
148         const int k2 = match.size;
149         if (i1 + k1 == i2 && j1 + k1 == j2)
150             k1 += k2;
151         else {
152             if (k1)
153                 non_adjacent.append(Match(i1, j1, k1));
154             i1 = i2;
155             j1 = j2;
156             k1 = k2;
157         }
158     }
159     if (k1)
160         non_adjacent.append(Match(i1, j1, k1));
161     non_adjacent.append(Match(LengthA, LengthB, 0));
162     matching_blocks = non_adjacent;
163     return matching_blocks;
164 }
165 
166 
find_longest_match(int a_low,int a_high,int b_low,int b_high)167 Match SequenceMatcher::find_longest_match(int a_low, int a_high,
168                                           int b_low, int b_high)
169 {
170     int best_i = a_low;
171     int best_j = b_low;
172     int best_size = 0;
173     QHash<int, int> j2len;
174     for (int i = a_low; i < a_high; ++i) {
175         QHash<int, int> newj2len;
176         foreach (int j, b2j.value(a[i])) {
177             if (j < b_low)
178                 continue;
179             if (j >= b_high)
180                 break;
181             const int k = j2len.value(j - 1, 0) + 1;
182             newj2len[j] = k;
183             if (k > best_size) {
184                 best_i = i - k + 1;
185                 best_j = j - k + 1;
186                 best_size = k;
187             }
188         }
189         j2len = newj2len;
190     }
191 
192     while (best_i > a_low && best_j > b_low &&
193            a[best_i - 1] == b[best_j - 1]) {
194         --best_i;
195         --best_j;
196         ++best_size;
197     }
198     while (best_i + best_size < a_high && best_j + best_size < b_high &&
199            a[best_i + best_size] == b[best_j + best_size])
200         ++best_size;
201 
202     return Match(best_i, best_j, best_size);
203 }
204