1 /*
2 Copyright © 2011-13 Qtrac Ltd. All rights reserved.
3 This program or module is free software: you can redistribute it
4 and/or modify it under the terms of the GNU General Public License
5 as published by the Free Software Foundation, either version 2 of
6 the License, or (at your option) any later version. This program is
7 distributed in the hope that it will be useful, but WITHOUT ANY
8 WARRANTY; without even the implied warranty of MERCHANTABILITY or
9 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
10 for more details.
11 */
12
13 #include "sequence_matcher.hpp"
14 #include <QSet>
15
16
computeRanges(SequenceMatcher * matcher)17 RangesPair computeRanges(SequenceMatcher *matcher)
18 {
19 Ranges ranges1;
20 Ranges ranges2;
21 QList<Match> matches = matcher->get_matching_blocks();
22 foreach (const Match &match, matches) {
23 if (match.size == 0)
24 continue;
25 ranges1 |= unorderedRange(match.i + match.size, match.i);
26 ranges2 |= unorderedRange(match.j + match.size, match.j);
27 }
28 return qMakePair(ranges1, ranges2);
29 }
30
31
invertRanges(const Ranges & ranges1,int length1,const Ranges & ranges2,int length2)32 RangesPair invertRanges(const Ranges &ranges1, int length1,
33 const Ranges &ranges2, int length2)
34 {
35 const Ranges newRanges1 = unorderedRange(length1) - ranges1;
36 const Ranges newRanges2 = unorderedRange(length2) - ranges2;
37 return qMakePair(newRanges1, newRanges2);
38 }
39
40
matchLessThan(const Match & a,const Match & b)41 bool matchLessThan(const Match &a, const Match &b)
42 {
43 if (a.i != b.i)
44 return a.i < b.i;
45 if (a.j != b.j)
46 return a.j < b.j;
47 return a.size < b.size;
48 }
49
50
51 struct Offsets
52 {
OffsetsOffsets53 Offsets(int a_low_=0, int a_high_=0, int b_low_=0, int b_high_=0)
54 : a_low(a_low_), a_high(a_high_), b_low(b_low_), b_high(b_high_) {}
55
56 int a_low;
57 int a_high;
58 int b_low;
59 int b_high;
60 };
61
62
SequenceMatcher(const Sequence & a_,const Sequence & b_)63 SequenceMatcher::SequenceMatcher(const Sequence &a_, const Sequence &b_)
64 : a(a_), b(b_)
65 {
66 set_sequences(a, b);
67 }
68
69
set_sequence1(const Sequence & sequence)70 void SequenceMatcher::set_sequence1(const Sequence &sequence)
71 {
72 a = sequence;
73 matching_blocks.clear();
74 }
75
76
set_sequence2(const Sequence & sequence)77 void SequenceMatcher::set_sequence2(const Sequence &sequence)
78 {
79 b = sequence;
80 matching_blocks.clear();
81 chain_b();
82 }
83
84
chain_b()85 void SequenceMatcher::chain_b()
86 {
87 const int N = b.count();
88 b2j.clear();
89 QSet<Element> popular;
90
91 for (int i = 0; i < N; ++i) {
92 const Element &element = b.at(i);
93 if (b2j.contains(element)) {
94 QList<int> &indexes = b2j[element];
95 if (N >= 200 && indexes.count() * 100 > N) {
96 popular.insert(element);
97 indexes.clear();
98 }
99 else
100 indexes.append(i);
101 }
102 else
103 b2j[element].append(i);
104 }
105
106 foreach (const Element &element, popular)
107 b2j.remove(element);
108 }
109
110
get_matching_blocks()111 QList<Match> SequenceMatcher::get_matching_blocks()
112 {
113 if (!matching_blocks.isEmpty())
114 return matching_blocks;
115
116 const int LengthA = a.count();
117 const int LengthB = b.count();
118 QList<Offsets> offsets;
119 offsets << Offsets(0, LengthA, 0, LengthB);
120 while (!offsets.isEmpty()) {
121 const Offsets &offset = offsets.takeLast();
122 const int a_low = offset.a_low;
123 const int a_high = offset.a_high;
124 const int b_low = offset.b_low;
125 const int b_high = offset.b_high;
126 const Match match = find_longest_match(a_low, a_high, b_low,
127 b_high);
128 const int i = match.i;
129 const int j = match.j;
130 const int k = match.size;
131 if (k) {
132 matching_blocks.append(match);
133 if (a_low < i && b_low < j)
134 offsets.append(Offsets(a_low, i, b_low, j));
135 if (i + k < a_high && j + k < b_high)
136 offsets.append(Offsets(i + k, a_high, j + k, b_high));
137 }
138 }
139 qSort(matching_blocks.begin(), matching_blocks.end(), matchLessThan);
140
141 int i1 = 0;
142 int j1 = 0;
143 int k1 = 0;
144 QList<Match> non_adjacent;
145 foreach (const Match match, matching_blocks) {
146 const int i2 = match.i;
147 const int j2 = match.j;
148 const int k2 = match.size;
149 if (i1 + k1 == i2 && j1 + k1 == j2)
150 k1 += k2;
151 else {
152 if (k1)
153 non_adjacent.append(Match(i1, j1, k1));
154 i1 = i2;
155 j1 = j2;
156 k1 = k2;
157 }
158 }
159 if (k1)
160 non_adjacent.append(Match(i1, j1, k1));
161 non_adjacent.append(Match(LengthA, LengthB, 0));
162 matching_blocks = non_adjacent;
163 return matching_blocks;
164 }
165
166
find_longest_match(int a_low,int a_high,int b_low,int b_high)167 Match SequenceMatcher::find_longest_match(int a_low, int a_high,
168 int b_low, int b_high)
169 {
170 int best_i = a_low;
171 int best_j = b_low;
172 int best_size = 0;
173 QHash<int, int> j2len;
174 for (int i = a_low; i < a_high; ++i) {
175 QHash<int, int> newj2len;
176 foreach (int j, b2j.value(a[i])) {
177 if (j < b_low)
178 continue;
179 if (j >= b_high)
180 break;
181 const int k = j2len.value(j - 1, 0) + 1;
182 newj2len[j] = k;
183 if (k > best_size) {
184 best_i = i - k + 1;
185 best_j = j - k + 1;
186 best_size = k;
187 }
188 }
189 j2len = newj2len;
190 }
191
192 while (best_i > a_low && best_j > b_low &&
193 a[best_i - 1] == b[best_j - 1]) {
194 --best_i;
195 --best_j;
196 ++best_size;
197 }
198 while (best_i + best_size < a_high && best_j + best_size < b_high &&
199 a[best_i + best_size] == b[best_j + best_size])
200 ++best_size;
201
202 return Match(best_i, best_j, best_size);
203 }
204