1 /* Find a best match between two vectors.
2 
3    Copyright (C) 2009-2012 Free Software Foundation, Inc.
4    Written by Andreas Gruenbacher <agruen@gnu.org>, 2009.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation, either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18 
19 
20 /* Before including this file, you need to define:
21      EQUAL_IDX(x, y)         A two-argument macro that tests elements
22 			     at index x and y for equality.
23      OFFSET		     A signed integer type sufficient to hold the
24 			     difference between two indices. Usually
25 			     something like ssize_t.  */
26 
27 /*
28  * Shortest Edit Sequence
29  *
30  * Based on the Greedy LCS/SES Algorithm (Figure 2) in:
31  *
32  *   Eugene W. Myers, "An O(ND) Difference Algorithm and Its Variations",
33  *   Algorithmica, Vol. 1, No. 1, pp. 251-266, March 1986.
34  *   Available: http://dx.doi.org/10.1007/BF01840446
35  *   http://xmailserver.org/diff2.pdf
36  *
37  * Returns the number of changes (insertions and deletions) required to get
38  * from a[] to b[].  Returns MAX + 1 if a[] cannot be turned into b[] with
39  * MAX or fewer changes, in which case *PY is not modified.
40  *
41  * MIN specifies the minimum number of elements in which a[] and b[] must
42  * match. This allows to prevent trivial matches in which a sequence is
43  * completely discarded, or completely made up.
44  *
45  * If PY is not NULL, matches a[] against a prefix of b[], and returns the
46  * number of elements in b[] that were matched in *PY.  Otherwise, matches
47  * all elements of b[].
48  *
49  * Note that the divide-and-conquer strategy discussed in section 4b of the
50  * paper is more efficient, but does not allow an open-ended prefix string
51  * search.
52  */
53 
54 static OFFSET
bestmatch(OFFSET xoff,OFFSET xlim,OFFSET yoff,OFFSET ylim,OFFSET min,OFFSET max,OFFSET * py)55 bestmatch(OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
56 	  OFFSET min, OFFSET max, OFFSET *py)
57 {
58     const OFFSET dmin = xoff - ylim;      /* Minimum valid diagonal. */
59     const OFFSET dmax = xlim - yoff;      /* Maximum valid diagonal. */
60     const OFFSET fmid = xoff - yoff;      /* Center diagonal. */
61     OFFSET fmin = fmid;
62     OFFSET fmax = fmid;
63     OFFSET *V, *fd;
64     OFFSET fmid_plus_2_min, ymax = -1;
65     OFFSET c;
66 
67     V = xmalloc ((2 * max + 3) * sizeof (OFFSET));
68     fd = V + max + 1 - fmid;
69 
70     /*
71        The number of elements that were matched in x and in y can be
72        computed as either (x - x_skipped) or (y - y_skipped), with:
73 
74 	 delta = (x - xoff) - (y - yoff)
75 	 x_skipped = (c + delta) / 2
76 	 y_skipped = (c - delta) / 2
77 
78        For searching for a minimum number of matching elements, we end up
79        with this check:
80 
81          (x - x_skipped) >= min
82 	  ...
83 	 x + y - c >= (xoff - yoff) + 2 * min
84 	 x + y - c >= fmid + 2 * min
85     */
86 
87     if (min)
88       {
89 	fmid_plus_2_min = fmid + 2 * min;
90 	min += yoff;
91 	if (min > ylim)
92 	  {
93 	    c = max + 1;
94 	    goto free_and_return;
95 	  }
96       }
97     else
98       fmid_plus_2_min = 0;  /* disable this check */
99     if (!py)
100       min = ylim;
101 
102     /* Handle the exact-match case. */
103     while (xoff < xlim && yoff < ylim && EQUAL_IDX (xoff, yoff))
104       {
105 	xoff++;
106 	yoff++;
107       }
108     if (xoff == xlim && yoff >= min
109 	&& xoff + yoff >= fmid_plus_2_min)
110       {
111 	ymax = yoff;
112         c = 0;
113       }
114     else
115       {
116 	fd[fmid] = xoff;
117 	for (c = 1; c <= max; c++)
118 	  {
119 	    OFFSET d;
120 
121 	    if (fmin > dmin)
122 	      fd[--fmin - 1] = -1;
123 	    else
124 	      ++fmin;
125 	    if (fmax < dmax)
126 	      fd[++fmax + 1] = -1;
127 	    else
128 	      --fmax;
129 	    for (d = fmax; d >= fmin; d -= 2)
130 	      {
131 		OFFSET x, y;
132 
133 		if (fd[d - 1] < fd[d + 1])
134 		  x = fd[d + 1];
135 		else
136 		  x = fd[d - 1] + 1;
137 		for (y = x - d;
138 		     x < xlim && y < ylim && EQUAL_IDX (x, y);
139 		     x++, y++)
140 		  /* do nothing */ ;
141 		fd[d] = x;
142 		if (x == xlim && y >= min
143 		    && x + y - c >= fmid_plus_2_min)
144 		  {
145 		    if (ymax < y)
146 		      ymax = y;
147 		    if (y == ylim)
148 		      goto done;
149 		  }
150 	      }
151 	    if (ymax != -1)
152 	      goto done;
153 	  }
154       }
155 
156   done:
157     if (py)
158       *py = ymax;
159 
160   free_and_return:
161     free (V);
162     return c;
163 }
164 
165 #undef OFFSET
166 #undef EQUAL_IDX
167