1 /* Find a best match between two vectors.
2
3 Copyright (C) 2009-2012 Free Software Foundation, Inc.
4 Written by Andreas Gruenbacher <agruen@gnu.org>, 2009.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18
19
20 /* Before including this file, you need to define:
21 EQUAL_IDX(x, y) A two-argument macro that tests elements
22 at index x and y for equality.
23 OFFSET A signed integer type sufficient to hold the
24 difference between two indices. Usually
25 something like ssize_t. */
26
27 /*
28 * Shortest Edit Sequence
29 *
30 * Based on the Greedy LCS/SES Algorithm (Figure 2) in:
31 *
32 * Eugene W. Myers, "An O(ND) Difference Algorithm and Its Variations",
33 * Algorithmica, Vol. 1, No. 1, pp. 251-266, March 1986.
34 * Available: http://dx.doi.org/10.1007/BF01840446
35 * http://xmailserver.org/diff2.pdf
36 *
37 * Returns the number of changes (insertions and deletions) required to get
38 * from a[] to b[]. Returns MAX + 1 if a[] cannot be turned into b[] with
39 * MAX or fewer changes, in which case *PY is not modified.
40 *
41 * MIN specifies the minimum number of elements in which a[] and b[] must
42 * match. This allows to prevent trivial matches in which a sequence is
43 * completely discarded, or completely made up.
44 *
45 * If PY is not NULL, matches a[] against a prefix of b[], and returns the
46 * number of elements in b[] that were matched in *PY. Otherwise, matches
47 * all elements of b[].
48 *
49 * Note that the divide-and-conquer strategy discussed in section 4b of the
50 * paper is more efficient, but does not allow an open-ended prefix string
51 * search.
52 */
53
54 static OFFSET
bestmatch(OFFSET xoff,OFFSET xlim,OFFSET yoff,OFFSET ylim,OFFSET min,OFFSET max,OFFSET * py)55 bestmatch(OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
56 OFFSET min, OFFSET max, OFFSET *py)
57 {
58 const OFFSET dmin = xoff - ylim; /* Minimum valid diagonal. */
59 const OFFSET dmax = xlim - yoff; /* Maximum valid diagonal. */
60 const OFFSET fmid = xoff - yoff; /* Center diagonal. */
61 OFFSET fmin = fmid;
62 OFFSET fmax = fmid;
63 OFFSET *V, *fd;
64 OFFSET fmid_plus_2_min, ymax = -1;
65 OFFSET c;
66
67 V = xmalloc ((2 * max + 3) * sizeof (OFFSET));
68 fd = V + max + 1 - fmid;
69
70 /*
71 The number of elements that were matched in x and in y can be
72 computed as either (x - x_skipped) or (y - y_skipped), with:
73
74 delta = (x - xoff) - (y - yoff)
75 x_skipped = (c + delta) / 2
76 y_skipped = (c - delta) / 2
77
78 For searching for a minimum number of matching elements, we end up
79 with this check:
80
81 (x - x_skipped) >= min
82 ...
83 x + y - c >= (xoff - yoff) + 2 * min
84 x + y - c >= fmid + 2 * min
85 */
86
87 if (min)
88 {
89 fmid_plus_2_min = fmid + 2 * min;
90 min += yoff;
91 if (min > ylim)
92 {
93 c = max + 1;
94 goto free_and_return;
95 }
96 }
97 else
98 fmid_plus_2_min = 0; /* disable this check */
99 if (!py)
100 min = ylim;
101
102 /* Handle the exact-match case. */
103 while (xoff < xlim && yoff < ylim && EQUAL_IDX (xoff, yoff))
104 {
105 xoff++;
106 yoff++;
107 }
108 if (xoff == xlim && yoff >= min
109 && xoff + yoff >= fmid_plus_2_min)
110 {
111 ymax = yoff;
112 c = 0;
113 }
114 else
115 {
116 fd[fmid] = xoff;
117 for (c = 1; c <= max; c++)
118 {
119 OFFSET d;
120
121 if (fmin > dmin)
122 fd[--fmin - 1] = -1;
123 else
124 ++fmin;
125 if (fmax < dmax)
126 fd[++fmax + 1] = -1;
127 else
128 --fmax;
129 for (d = fmax; d >= fmin; d -= 2)
130 {
131 OFFSET x, y;
132
133 if (fd[d - 1] < fd[d + 1])
134 x = fd[d + 1];
135 else
136 x = fd[d - 1] + 1;
137 for (y = x - d;
138 x < xlim && y < ylim && EQUAL_IDX (x, y);
139 x++, y++)
140 /* do nothing */ ;
141 fd[d] = x;
142 if (x == xlim && y >= min
143 && x + y - c >= fmid_plus_2_min)
144 {
145 if (ymax < y)
146 ymax = y;
147 if (y == ylim)
148 goto done;
149 }
150 }
151 if (ymax != -1)
152 goto done;
153 }
154 }
155
156 done:
157 if (py)
158 *py = ymax;
159
160 free_and_return:
161 free (V);
162 return c;
163 }
164
165 #undef OFFSET
166 #undef EQUAL_IDX
167