1855caec6SPeter Avalos /* Analyze file differences for GNU DIFF.
2855caec6SPeter Avalos
344b87433SJohn Marino Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006-2007,
4*6ea1f93eSDaniel Fojt 2009-2013, 2015-2018 Free Software Foundation, Inc.
5855caec6SPeter Avalos
6855caec6SPeter Avalos This file is part of GNU DIFF.
7855caec6SPeter Avalos
844b87433SJohn Marino This program is free software: you can redistribute it and/or modify
9855caec6SPeter Avalos it under the terms of the GNU General Public License as published by
1044b87433SJohn Marino the Free Software Foundation, either version 3 of the License, or
1144b87433SJohn Marino (at your option) any later version.
12855caec6SPeter Avalos
1344b87433SJohn Marino This program is distributed in the hope that it will be useful,
14855caec6SPeter Avalos but WITHOUT ANY WARRANTY; without even the implied warranty of
15855caec6SPeter Avalos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16855caec6SPeter Avalos GNU General Public License for more details.
17855caec6SPeter Avalos
18855caec6SPeter Avalos You should have received a copy of the GNU General Public License
1944b87433SJohn Marino along with this program. If not, see <http://www.gnu.org/licenses/>. */
20855caec6SPeter Avalos
21855caec6SPeter Avalos #include "diff.h"
22855caec6SPeter Avalos #include <cmpbuf.h>
23855caec6SPeter Avalos #include <error.h>
24855caec6SPeter Avalos #include <file-type.h>
25855caec6SPeter Avalos #include <xalloc.h>
26855caec6SPeter Avalos
2744b87433SJohn Marino /* The core of the Diff algorithm. */
2844b87433SJohn Marino #define ELEMENT lin
2944b87433SJohn Marino #define EQUAL(x,y) ((x) == (y))
3044b87433SJohn Marino #define OFFSET lin
3144b87433SJohn Marino #define EXTRA_CONTEXT_FIELDS /* none */
3244b87433SJohn Marino #define NOTE_DELETE(c, xoff) (files[0].changed[files[0].realindexes[xoff]] = 1)
3344b87433SJohn Marino #define NOTE_INSERT(c, yoff) (files[1].changed[files[1].realindexes[yoff]] = 1)
3444b87433SJohn Marino #define USE_HEURISTIC 1
3544b87433SJohn Marino #include <diffseq.h>
36855caec6SPeter Avalos
37855caec6SPeter Avalos /* Discard lines from one file that have no matches in the other file.
38855caec6SPeter Avalos
39855caec6SPeter Avalos A line which is discarded will not be considered by the actual
40855caec6SPeter Avalos comparison algorithm; it will be as if that line were not in the file.
414536c563SJohn Marino The file's 'realindexes' table maps virtual line numbers
42855caec6SPeter Avalos (which don't count the discarded lines) into real line numbers;
43855caec6SPeter Avalos this is how the actual comparison algorithm produces results
44855caec6SPeter Avalos that are comprehensible when the discarded lines are counted.
45855caec6SPeter Avalos
46855caec6SPeter Avalos When we discard a line, we also mark it as a deletion or insertion
47855caec6SPeter Avalos so that it will be printed in the output. */
48855caec6SPeter Avalos
49855caec6SPeter Avalos static void
discard_confusing_lines(struct file_data filevec[])50855caec6SPeter Avalos discard_confusing_lines (struct file_data filevec[])
51855caec6SPeter Avalos {
52855caec6SPeter Avalos int f;
53855caec6SPeter Avalos lin i;
54855caec6SPeter Avalos char *discarded[2];
55855caec6SPeter Avalos lin *equiv_count[2];
56855caec6SPeter Avalos lin *p;
57855caec6SPeter Avalos
58855caec6SPeter Avalos /* Allocate our results. */
59855caec6SPeter Avalos p = xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
60855caec6SPeter Avalos * (2 * sizeof *p));
61855caec6SPeter Avalos for (f = 0; f < 2; f++)
62855caec6SPeter Avalos {
63855caec6SPeter Avalos filevec[f].undiscarded = p; p += filevec[f].buffered_lines;
64855caec6SPeter Avalos filevec[f].realindexes = p; p += filevec[f].buffered_lines;
65855caec6SPeter Avalos }
66855caec6SPeter Avalos
67855caec6SPeter Avalos /* Set up equiv_count[F][I] as the number of lines in file F
68855caec6SPeter Avalos that fall in equivalence class I. */
69855caec6SPeter Avalos
70855caec6SPeter Avalos p = zalloc (filevec[0].equiv_max * (2 * sizeof *p));
71855caec6SPeter Avalos equiv_count[0] = p;
72855caec6SPeter Avalos equiv_count[1] = p + filevec[0].equiv_max;
73855caec6SPeter Avalos
74855caec6SPeter Avalos for (i = 0; i < filevec[0].buffered_lines; ++i)
75855caec6SPeter Avalos ++equiv_count[0][filevec[0].equivs[i]];
76855caec6SPeter Avalos for (i = 0; i < filevec[1].buffered_lines; ++i)
77855caec6SPeter Avalos ++equiv_count[1][filevec[1].equivs[i]];
78855caec6SPeter Avalos
79855caec6SPeter Avalos /* Set up tables of which lines are going to be discarded. */
80855caec6SPeter Avalos
81855caec6SPeter Avalos discarded[0] = zalloc (filevec[0].buffered_lines
82855caec6SPeter Avalos + filevec[1].buffered_lines);
83855caec6SPeter Avalos discarded[1] = discarded[0] + filevec[0].buffered_lines;
84855caec6SPeter Avalos
85855caec6SPeter Avalos /* Mark to be discarded each line that matches no line of the other file.
86855caec6SPeter Avalos If a line matches many lines, mark it as provisionally discardable. */
87855caec6SPeter Avalos
88855caec6SPeter Avalos for (f = 0; f < 2; f++)
89855caec6SPeter Avalos {
90855caec6SPeter Avalos size_t end = filevec[f].buffered_lines;
91855caec6SPeter Avalos char *discards = discarded[f];
92855caec6SPeter Avalos lin *counts = equiv_count[1 - f];
93855caec6SPeter Avalos lin *equivs = filevec[f].equivs;
94855caec6SPeter Avalos size_t many = 5;
95855caec6SPeter Avalos size_t tem = end / 64;
96855caec6SPeter Avalos
97855caec6SPeter Avalos /* Multiply MANY by approximate square root of number of lines.
98855caec6SPeter Avalos That is the threshold for provisionally discardable lines. */
99855caec6SPeter Avalos while ((tem = tem >> 2) > 0)
100855caec6SPeter Avalos many *= 2;
101855caec6SPeter Avalos
102855caec6SPeter Avalos for (i = 0; i < end; i++)
103855caec6SPeter Avalos {
104855caec6SPeter Avalos lin nmatch;
105855caec6SPeter Avalos if (equivs[i] == 0)
106855caec6SPeter Avalos continue;
107855caec6SPeter Avalos nmatch = counts[equivs[i]];
108855caec6SPeter Avalos if (nmatch == 0)
109855caec6SPeter Avalos discards[i] = 1;
110855caec6SPeter Avalos else if (nmatch > many)
111855caec6SPeter Avalos discards[i] = 2;
112855caec6SPeter Avalos }
113855caec6SPeter Avalos }
114855caec6SPeter Avalos
115855caec6SPeter Avalos /* Don't really discard the provisional lines except when they occur
116855caec6SPeter Avalos in a run of discardables, with nonprovisionals at the beginning
117855caec6SPeter Avalos and end. */
118855caec6SPeter Avalos
119855caec6SPeter Avalos for (f = 0; f < 2; f++)
120855caec6SPeter Avalos {
121855caec6SPeter Avalos lin end = filevec[f].buffered_lines;
122855caec6SPeter Avalos register char *discards = discarded[f];
123855caec6SPeter Avalos
124855caec6SPeter Avalos for (i = 0; i < end; i++)
125855caec6SPeter Avalos {
126855caec6SPeter Avalos /* Cancel provisional discards not in middle of run of discards. */
127855caec6SPeter Avalos if (discards[i] == 2)
128855caec6SPeter Avalos discards[i] = 0;
129855caec6SPeter Avalos else if (discards[i] != 0)
130855caec6SPeter Avalos {
131855caec6SPeter Avalos /* We have found a nonprovisional discard. */
132855caec6SPeter Avalos register lin j;
133855caec6SPeter Avalos lin length;
134855caec6SPeter Avalos lin provisional = 0;
135855caec6SPeter Avalos
136855caec6SPeter Avalos /* Find end of this run of discardable lines.
137855caec6SPeter Avalos Count how many are provisionally discardable. */
138855caec6SPeter Avalos for (j = i; j < end; j++)
139855caec6SPeter Avalos {
140855caec6SPeter Avalos if (discards[j] == 0)
141855caec6SPeter Avalos break;
142855caec6SPeter Avalos if (discards[j] == 2)
143855caec6SPeter Avalos ++provisional;
144855caec6SPeter Avalos }
145855caec6SPeter Avalos
146855caec6SPeter Avalos /* Cancel provisional discards at end, and shrink the run. */
147855caec6SPeter Avalos while (j > i && discards[j - 1] == 2)
148855caec6SPeter Avalos discards[--j] = 0, --provisional;
149855caec6SPeter Avalos
150855caec6SPeter Avalos /* Now we have the length of a run of discardable lines
151855caec6SPeter Avalos whose first and last are not provisional. */
152855caec6SPeter Avalos length = j - i;
153855caec6SPeter Avalos
154855caec6SPeter Avalos /* If 1/4 of the lines in the run are provisional,
155855caec6SPeter Avalos cancel discarding of all provisional lines in the run. */
156855caec6SPeter Avalos if (provisional * 4 > length)
157855caec6SPeter Avalos {
158855caec6SPeter Avalos while (j > i)
159855caec6SPeter Avalos if (discards[--j] == 2)
160855caec6SPeter Avalos discards[j] = 0;
161855caec6SPeter Avalos }
162855caec6SPeter Avalos else
163855caec6SPeter Avalos {
164855caec6SPeter Avalos register lin consec;
165855caec6SPeter Avalos lin minimum = 1;
166855caec6SPeter Avalos lin tem = length >> 2;
167855caec6SPeter Avalos
168855caec6SPeter Avalos /* MINIMUM is approximate square root of LENGTH/4.
169855caec6SPeter Avalos A subrun of two or more provisionals can stand
170855caec6SPeter Avalos when LENGTH is at least 16.
171855caec6SPeter Avalos A subrun of 4 or more can stand when LENGTH >= 64. */
172855caec6SPeter Avalos while (0 < (tem >>= 2))
173855caec6SPeter Avalos minimum <<= 1;
174855caec6SPeter Avalos minimum++;
175855caec6SPeter Avalos
176855caec6SPeter Avalos /* Cancel any subrun of MINIMUM or more provisionals
177855caec6SPeter Avalos within the larger run. */
178855caec6SPeter Avalos for (j = 0, consec = 0; j < length; j++)
179855caec6SPeter Avalos if (discards[i + j] != 2)
180855caec6SPeter Avalos consec = 0;
181855caec6SPeter Avalos else if (minimum == ++consec)
182855caec6SPeter Avalos /* Back up to start of subrun, to cancel it all. */
183855caec6SPeter Avalos j -= consec;
184855caec6SPeter Avalos else if (minimum < consec)
185855caec6SPeter Avalos discards[i + j] = 0;
186855caec6SPeter Avalos
187855caec6SPeter Avalos /* Scan from beginning of run
188855caec6SPeter Avalos until we find 3 or more nonprovisionals in a row
189855caec6SPeter Avalos or until the first nonprovisional at least 8 lines in.
190855caec6SPeter Avalos Until that point, cancel any provisionals. */
191855caec6SPeter Avalos for (j = 0, consec = 0; j < length; j++)
192855caec6SPeter Avalos {
193855caec6SPeter Avalos if (j >= 8 && discards[i + j] == 1)
194855caec6SPeter Avalos break;
195855caec6SPeter Avalos if (discards[i + j] == 2)
196855caec6SPeter Avalos consec = 0, discards[i + j] = 0;
197855caec6SPeter Avalos else if (discards[i + j] == 0)
198855caec6SPeter Avalos consec = 0;
199855caec6SPeter Avalos else
200855caec6SPeter Avalos consec++;
201855caec6SPeter Avalos if (consec == 3)
202855caec6SPeter Avalos break;
203855caec6SPeter Avalos }
204855caec6SPeter Avalos
205855caec6SPeter Avalos /* I advances to the last line of the run. */
206855caec6SPeter Avalos i += length - 1;
207855caec6SPeter Avalos
208855caec6SPeter Avalos /* Same thing, from end. */
209855caec6SPeter Avalos for (j = 0, consec = 0; j < length; j++)
210855caec6SPeter Avalos {
211855caec6SPeter Avalos if (j >= 8 && discards[i - j] == 1)
212855caec6SPeter Avalos break;
213855caec6SPeter Avalos if (discards[i - j] == 2)
214855caec6SPeter Avalos consec = 0, discards[i - j] = 0;
215855caec6SPeter Avalos else if (discards[i - j] == 0)
216855caec6SPeter Avalos consec = 0;
217855caec6SPeter Avalos else
218855caec6SPeter Avalos consec++;
219855caec6SPeter Avalos if (consec == 3)
220855caec6SPeter Avalos break;
221855caec6SPeter Avalos }
222855caec6SPeter Avalos }
223855caec6SPeter Avalos }
224855caec6SPeter Avalos }
225855caec6SPeter Avalos }
226855caec6SPeter Avalos
227855caec6SPeter Avalos /* Actually discard the lines. */
228855caec6SPeter Avalos for (f = 0; f < 2; f++)
229855caec6SPeter Avalos {
230855caec6SPeter Avalos char *discards = discarded[f];
231855caec6SPeter Avalos lin end = filevec[f].buffered_lines;
232855caec6SPeter Avalos lin j = 0;
233855caec6SPeter Avalos for (i = 0; i < end; ++i)
234855caec6SPeter Avalos if (minimal || discards[i] == 0)
235855caec6SPeter Avalos {
236855caec6SPeter Avalos filevec[f].undiscarded[j] = filevec[f].equivs[i];
237855caec6SPeter Avalos filevec[f].realindexes[j++] = i;
238855caec6SPeter Avalos }
239855caec6SPeter Avalos else
240855caec6SPeter Avalos filevec[f].changed[i] = 1;
241855caec6SPeter Avalos filevec[f].nondiscarded_lines = j;
242855caec6SPeter Avalos }
243855caec6SPeter Avalos
244855caec6SPeter Avalos free (discarded[0]);
245855caec6SPeter Avalos free (equiv_count[0]);
246855caec6SPeter Avalos }
247855caec6SPeter Avalos
248855caec6SPeter Avalos /* Adjust inserts/deletes of identical lines to join changes
249855caec6SPeter Avalos as much as possible.
250855caec6SPeter Avalos
251855caec6SPeter Avalos We do something when a run of changed lines include a
252855caec6SPeter Avalos line at one end and have an excluded, identical line at the other.
253855caec6SPeter Avalos We are free to choose which identical line is included.
2544536c563SJohn Marino 'compareseq' usually chooses the one at the beginning,
255855caec6SPeter Avalos but usually it is cleaner to consider the following identical line
256855caec6SPeter Avalos to be the "change". */
257855caec6SPeter Avalos
258855caec6SPeter Avalos static void
shift_boundaries(struct file_data filevec[])259855caec6SPeter Avalos shift_boundaries (struct file_data filevec[])
260855caec6SPeter Avalos {
261855caec6SPeter Avalos int f;
262855caec6SPeter Avalos
263855caec6SPeter Avalos for (f = 0; f < 2; f++)
264855caec6SPeter Avalos {
265855caec6SPeter Avalos char *changed = filevec[f].changed;
266855caec6SPeter Avalos char *other_changed = filevec[1 - f].changed;
267855caec6SPeter Avalos lin const *equivs = filevec[f].equivs;
268855caec6SPeter Avalos lin i = 0;
269855caec6SPeter Avalos lin j = 0;
270855caec6SPeter Avalos lin i_end = filevec[f].buffered_lines;
271855caec6SPeter Avalos
272855caec6SPeter Avalos while (1)
273855caec6SPeter Avalos {
274855caec6SPeter Avalos lin runlength, start, corresponding;
275855caec6SPeter Avalos
276855caec6SPeter Avalos /* Scan forwards to find beginning of another run of changes.
277855caec6SPeter Avalos Also keep track of the corresponding point in the other file. */
278855caec6SPeter Avalos
279855caec6SPeter Avalos while (i < i_end && !changed[i])
280855caec6SPeter Avalos {
281855caec6SPeter Avalos while (other_changed[j++])
282855caec6SPeter Avalos continue;
283855caec6SPeter Avalos i++;
284855caec6SPeter Avalos }
285855caec6SPeter Avalos
286855caec6SPeter Avalos if (i == i_end)
287855caec6SPeter Avalos break;
288855caec6SPeter Avalos
289855caec6SPeter Avalos start = i;
290855caec6SPeter Avalos
291855caec6SPeter Avalos /* Find the end of this run of changes. */
292855caec6SPeter Avalos
293855caec6SPeter Avalos while (changed[++i])
294855caec6SPeter Avalos continue;
295855caec6SPeter Avalos while (other_changed[j])
296855caec6SPeter Avalos j++;
297855caec6SPeter Avalos
298855caec6SPeter Avalos do
299855caec6SPeter Avalos {
300855caec6SPeter Avalos /* Record the length of this run of changes, so that
301855caec6SPeter Avalos we can later determine whether the run has grown. */
302855caec6SPeter Avalos runlength = i - start;
303855caec6SPeter Avalos
304855caec6SPeter Avalos /* Move the changed region back, so long as the
305855caec6SPeter Avalos previous unchanged line matches the last changed one.
306855caec6SPeter Avalos This merges with previous changed regions. */
307855caec6SPeter Avalos
308855caec6SPeter Avalos while (start && equivs[start - 1] == equivs[i - 1])
309855caec6SPeter Avalos {
310855caec6SPeter Avalos changed[--start] = 1;
311855caec6SPeter Avalos changed[--i] = 0;
312855caec6SPeter Avalos while (changed[start - 1])
313855caec6SPeter Avalos start--;
314855caec6SPeter Avalos while (other_changed[--j])
315855caec6SPeter Avalos continue;
316855caec6SPeter Avalos }
317855caec6SPeter Avalos
318855caec6SPeter Avalos /* Set CORRESPONDING to the end of the changed run, at the last
319855caec6SPeter Avalos point where it corresponds to a changed run in the other file.
320855caec6SPeter Avalos CORRESPONDING == I_END means no such point has been found. */
321855caec6SPeter Avalos corresponding = other_changed[j - 1] ? i : i_end;
322855caec6SPeter Avalos
323855caec6SPeter Avalos /* Move the changed region forward, so long as the
324855caec6SPeter Avalos first changed line matches the following unchanged one.
325855caec6SPeter Avalos This merges with following changed regions.
326855caec6SPeter Avalos Do this second, so that if there are no merges,
327855caec6SPeter Avalos the changed region is moved forward as far as possible. */
328855caec6SPeter Avalos
329855caec6SPeter Avalos while (i != i_end && equivs[start] == equivs[i])
330855caec6SPeter Avalos {
331855caec6SPeter Avalos changed[start++] = 0;
332855caec6SPeter Avalos changed[i++] = 1;
333855caec6SPeter Avalos while (changed[i])
334855caec6SPeter Avalos i++;
335855caec6SPeter Avalos while (other_changed[++j])
336855caec6SPeter Avalos corresponding = i;
337855caec6SPeter Avalos }
338855caec6SPeter Avalos }
339855caec6SPeter Avalos while (runlength != i - start);
340855caec6SPeter Avalos
341855caec6SPeter Avalos /* If possible, move the fully-merged run of changes
342855caec6SPeter Avalos back to a corresponding run in the other file. */
343855caec6SPeter Avalos
344855caec6SPeter Avalos while (corresponding < i)
345855caec6SPeter Avalos {
346855caec6SPeter Avalos changed[--start] = 1;
347855caec6SPeter Avalos changed[--i] = 0;
348855caec6SPeter Avalos while (other_changed[--j])
349855caec6SPeter Avalos continue;
350855caec6SPeter Avalos }
351855caec6SPeter Avalos }
352855caec6SPeter Avalos }
353855caec6SPeter Avalos }
354855caec6SPeter Avalos
355855caec6SPeter Avalos /* Cons an additional entry onto the front of an edit script OLD.
356855caec6SPeter Avalos LINE0 and LINE1 are the first affected lines in the two files (origin 0).
357855caec6SPeter Avalos DELETED is the number of lines deleted here from file 0.
358855caec6SPeter Avalos INSERTED is the number of lines inserted here in file 1.
359855caec6SPeter Avalos
360855caec6SPeter Avalos If DELETED is 0 then LINE0 is the number of the line before
361855caec6SPeter Avalos which the insertion was done; vice versa for INSERTED and LINE1. */
362855caec6SPeter Avalos
363855caec6SPeter Avalos static struct change *
add_change(lin line0,lin line1,lin deleted,lin inserted,struct change * old)364855caec6SPeter Avalos add_change (lin line0, lin line1, lin deleted, lin inserted,
365855caec6SPeter Avalos struct change *old)
366855caec6SPeter Avalos {
367855caec6SPeter Avalos struct change *new = xmalloc (sizeof *new);
368855caec6SPeter Avalos
369855caec6SPeter Avalos new->line0 = line0;
370855caec6SPeter Avalos new->line1 = line1;
371855caec6SPeter Avalos new->inserted = inserted;
372855caec6SPeter Avalos new->deleted = deleted;
373855caec6SPeter Avalos new->link = old;
374855caec6SPeter Avalos return new;
375855caec6SPeter Avalos }
376855caec6SPeter Avalos
377855caec6SPeter Avalos /* Scan the tables of which lines are inserted and deleted,
378855caec6SPeter Avalos producing an edit script in reverse order. */
379855caec6SPeter Avalos
380855caec6SPeter Avalos static struct change *
build_reverse_script(struct file_data const filevec[])381855caec6SPeter Avalos build_reverse_script (struct file_data const filevec[])
382855caec6SPeter Avalos {
383855caec6SPeter Avalos struct change *script = 0;
384855caec6SPeter Avalos char *changed0 = filevec[0].changed;
385855caec6SPeter Avalos char *changed1 = filevec[1].changed;
386855caec6SPeter Avalos lin len0 = filevec[0].buffered_lines;
387855caec6SPeter Avalos lin len1 = filevec[1].buffered_lines;
388855caec6SPeter Avalos
38944b87433SJohn Marino /* Note that changedN[lenN] does exist, and is 0. */
390855caec6SPeter Avalos
391855caec6SPeter Avalos lin i0 = 0, i1 = 0;
392855caec6SPeter Avalos
393855caec6SPeter Avalos while (i0 < len0 || i1 < len1)
394855caec6SPeter Avalos {
395855caec6SPeter Avalos if (changed0[i0] | changed1[i1])
396855caec6SPeter Avalos {
397855caec6SPeter Avalos lin line0 = i0, line1 = i1;
398855caec6SPeter Avalos
399855caec6SPeter Avalos /* Find # lines changed here in each file. */
400855caec6SPeter Avalos while (changed0[i0]) ++i0;
401855caec6SPeter Avalos while (changed1[i1]) ++i1;
402855caec6SPeter Avalos
403855caec6SPeter Avalos /* Record this change. */
404855caec6SPeter Avalos script = add_change (line0, line1, i0 - line0, i1 - line1, script);
405855caec6SPeter Avalos }
406855caec6SPeter Avalos
407855caec6SPeter Avalos /* We have reached lines in the two files that match each other. */
408855caec6SPeter Avalos i0++, i1++;
409855caec6SPeter Avalos }
410855caec6SPeter Avalos
411855caec6SPeter Avalos return script;
412855caec6SPeter Avalos }
413855caec6SPeter Avalos
414855caec6SPeter Avalos /* Scan the tables of which lines are inserted and deleted,
415855caec6SPeter Avalos producing an edit script in forward order. */
416855caec6SPeter Avalos
417855caec6SPeter Avalos static struct change *
build_script(struct file_data const filevec[])418855caec6SPeter Avalos build_script (struct file_data const filevec[])
419855caec6SPeter Avalos {
420855caec6SPeter Avalos struct change *script = 0;
421855caec6SPeter Avalos char *changed0 = filevec[0].changed;
422855caec6SPeter Avalos char *changed1 = filevec[1].changed;
423855caec6SPeter Avalos lin i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
424855caec6SPeter Avalos
425855caec6SPeter Avalos /* Note that changedN[-1] does exist, and is 0. */
426855caec6SPeter Avalos
427855caec6SPeter Avalos while (i0 >= 0 || i1 >= 0)
428855caec6SPeter Avalos {
429855caec6SPeter Avalos if (changed0[i0 - 1] | changed1[i1 - 1])
430855caec6SPeter Avalos {
431855caec6SPeter Avalos lin line0 = i0, line1 = i1;
432855caec6SPeter Avalos
433855caec6SPeter Avalos /* Find # lines changed here in each file. */
434855caec6SPeter Avalos while (changed0[i0 - 1]) --i0;
435855caec6SPeter Avalos while (changed1[i1 - 1]) --i1;
436855caec6SPeter Avalos
437855caec6SPeter Avalos /* Record this change. */
438855caec6SPeter Avalos script = add_change (i0, i1, line0 - i0, line1 - i1, script);
439855caec6SPeter Avalos }
440855caec6SPeter Avalos
441855caec6SPeter Avalos /* We have reached lines in the two files that match each other. */
442855caec6SPeter Avalos i0--, i1--;
443855caec6SPeter Avalos }
444855caec6SPeter Avalos
445855caec6SPeter Avalos return script;
446855caec6SPeter Avalos }
447855caec6SPeter Avalos
448*6ea1f93eSDaniel Fojt /* If CHANGES, briefly report that two files differed. */
449*6ea1f93eSDaniel Fojt static void
briefly_report(int changes,struct file_data const filevec[])450855caec6SPeter Avalos briefly_report (int changes, struct file_data const filevec[])
451855caec6SPeter Avalos {
452855caec6SPeter Avalos if (changes)
453*6ea1f93eSDaniel Fojt message ((brief
454*6ea1f93eSDaniel Fojt ? _("Files %s and %s differ\n")
455*6ea1f93eSDaniel Fojt : _("Binary files %s and %s differ\n")),
456*6ea1f93eSDaniel Fojt file_label[0] ? file_label[0] : filevec[0].name,
457*6ea1f93eSDaniel Fojt file_label[1] ? file_label[1] : filevec[1].name);
458855caec6SPeter Avalos }
459855caec6SPeter Avalos
460855caec6SPeter Avalos /* Report the differences of two files. */
461855caec6SPeter Avalos int
diff_2_files(struct comparison * cmp)462855caec6SPeter Avalos diff_2_files (struct comparison *cmp)
463855caec6SPeter Avalos {
464855caec6SPeter Avalos int f;
465855caec6SPeter Avalos struct change *e, *p;
466855caec6SPeter Avalos struct change *script;
467855caec6SPeter Avalos int changes;
468855caec6SPeter Avalos
469855caec6SPeter Avalos
470855caec6SPeter Avalos /* If we have detected that either file is binary,
471855caec6SPeter Avalos compare the two files as binary. This can happen
472855caec6SPeter Avalos only when the first chunk is read.
473855caec6SPeter Avalos Also, --brief without any --ignore-* options means
474855caec6SPeter Avalos we can speed things up by treating the files as binary. */
475855caec6SPeter Avalos
476855caec6SPeter Avalos if (read_files (cmp->file, files_can_be_treated_as_binary))
477855caec6SPeter Avalos {
478855caec6SPeter Avalos /* Files with different lengths must be different. */
479855caec6SPeter Avalos if (cmp->file[0].stat.st_size != cmp->file[1].stat.st_size
480*6ea1f93eSDaniel Fojt && 0 < cmp->file[0].stat.st_size
481*6ea1f93eSDaniel Fojt && 0 < cmp->file[1].stat.st_size
482855caec6SPeter Avalos && (cmp->file[0].desc < 0 || S_ISREG (cmp->file[0].stat.st_mode))
483855caec6SPeter Avalos && (cmp->file[1].desc < 0 || S_ISREG (cmp->file[1].stat.st_mode)))
484855caec6SPeter Avalos changes = 1;
485855caec6SPeter Avalos
486855caec6SPeter Avalos /* Standard input equals itself. */
487855caec6SPeter Avalos else if (cmp->file[0].desc == cmp->file[1].desc)
488855caec6SPeter Avalos changes = 0;
489855caec6SPeter Avalos
490855caec6SPeter Avalos else
491855caec6SPeter Avalos /* Scan both files, a buffer at a time, looking for a difference. */
492855caec6SPeter Avalos {
493855caec6SPeter Avalos /* Allocate same-sized buffers for both files. */
494855caec6SPeter Avalos size_t lcm_max = PTRDIFF_MAX - 1;
495855caec6SPeter Avalos size_t buffer_size =
496855caec6SPeter Avalos buffer_lcm (sizeof (word),
497855caec6SPeter Avalos buffer_lcm (STAT_BLOCKSIZE (cmp->file[0].stat),
498855caec6SPeter Avalos STAT_BLOCKSIZE (cmp->file[1].stat),
499855caec6SPeter Avalos lcm_max),
500855caec6SPeter Avalos lcm_max);
501855caec6SPeter Avalos for (f = 0; f < 2; f++)
502855caec6SPeter Avalos cmp->file[f].buffer = xrealloc (cmp->file[f].buffer, buffer_size);
503855caec6SPeter Avalos
504855caec6SPeter Avalos for (;; cmp->file[0].buffered = cmp->file[1].buffered = 0)
505855caec6SPeter Avalos {
506855caec6SPeter Avalos /* Read a buffer's worth from both files. */
507855caec6SPeter Avalos for (f = 0; f < 2; f++)
508855caec6SPeter Avalos if (0 <= cmp->file[f].desc)
509855caec6SPeter Avalos file_block_read (&cmp->file[f],
510855caec6SPeter Avalos buffer_size - cmp->file[f].buffered);
511855caec6SPeter Avalos
512855caec6SPeter Avalos /* If the buffers differ, the files differ. */
513855caec6SPeter Avalos if (cmp->file[0].buffered != cmp->file[1].buffered
514855caec6SPeter Avalos || memcmp (cmp->file[0].buffer,
515855caec6SPeter Avalos cmp->file[1].buffer,
516855caec6SPeter Avalos cmp->file[0].buffered))
517855caec6SPeter Avalos {
518855caec6SPeter Avalos changes = 1;
519855caec6SPeter Avalos break;
520855caec6SPeter Avalos }
521855caec6SPeter Avalos
522855caec6SPeter Avalos /* If we reach end of file, the files are the same. */
523855caec6SPeter Avalos if (cmp->file[0].buffered != buffer_size)
524855caec6SPeter Avalos {
525855caec6SPeter Avalos changes = 0;
526855caec6SPeter Avalos break;
527855caec6SPeter Avalos }
528855caec6SPeter Avalos }
529855caec6SPeter Avalos }
530855caec6SPeter Avalos
531*6ea1f93eSDaniel Fojt briefly_report (changes, cmp->file);
532855caec6SPeter Avalos }
533855caec6SPeter Avalos else
534855caec6SPeter Avalos {
53544b87433SJohn Marino struct context ctxt;
53644b87433SJohn Marino lin diags;
53744b87433SJohn Marino lin too_expensive;
53844b87433SJohn Marino
539855caec6SPeter Avalos /* Allocate vectors for the results of comparison:
540855caec6SPeter Avalos a flag for each line of each file, saying whether that line
541855caec6SPeter Avalos is an insertion or deletion.
542855caec6SPeter Avalos Allocate an extra element, always 0, at each end of each vector. */
543855caec6SPeter Avalos
544855caec6SPeter Avalos size_t s = cmp->file[0].buffered_lines + cmp->file[1].buffered_lines + 4;
545855caec6SPeter Avalos char *flag_space = zalloc (s);
546855caec6SPeter Avalos cmp->file[0].changed = flag_space + 1;
547855caec6SPeter Avalos cmp->file[1].changed = flag_space + cmp->file[0].buffered_lines + 3;
548855caec6SPeter Avalos
549855caec6SPeter Avalos /* Some lines are obviously insertions or deletions
550855caec6SPeter Avalos because they don't match anything. Detect them now, and
551855caec6SPeter Avalos avoid even thinking about them in the main comparison algorithm. */
552855caec6SPeter Avalos
553855caec6SPeter Avalos discard_confusing_lines (cmp->file);
554855caec6SPeter Avalos
555855caec6SPeter Avalos /* Now do the main comparison algorithm, considering just the
556855caec6SPeter Avalos undiscarded lines. */
557855caec6SPeter Avalos
55844b87433SJohn Marino ctxt.xvec = cmp->file[0].undiscarded;
55944b87433SJohn Marino ctxt.yvec = cmp->file[1].undiscarded;
560855caec6SPeter Avalos diags = (cmp->file[0].nondiscarded_lines
561855caec6SPeter Avalos + cmp->file[1].nondiscarded_lines + 3);
56244b87433SJohn Marino ctxt.fdiag = xmalloc (diags * (2 * sizeof *ctxt.fdiag));
56344b87433SJohn Marino ctxt.bdiag = ctxt.fdiag + diags;
56444b87433SJohn Marino ctxt.fdiag += cmp->file[1].nondiscarded_lines + 1;
56544b87433SJohn Marino ctxt.bdiag += cmp->file[1].nondiscarded_lines + 1;
56644b87433SJohn Marino
56744b87433SJohn Marino ctxt.heuristic = speed_large_files;
568855caec6SPeter Avalos
569*6ea1f93eSDaniel Fojt /* Set TOO_EXPENSIVE to be the approximate square root of the
570*6ea1f93eSDaniel Fojt input size, bounded below by 4096. 4096 seems to be good for
571*6ea1f93eSDaniel Fojt circa-2016 CPUs; see Bug#16848 and Bug#24715. */
572855caec6SPeter Avalos too_expensive = 1;
573855caec6SPeter Avalos for (; diags != 0; diags >>= 2)
574855caec6SPeter Avalos too_expensive <<= 1;
575*6ea1f93eSDaniel Fojt ctxt.too_expensive = MAX (4096, too_expensive);
576855caec6SPeter Avalos
577855caec6SPeter Avalos files[0] = cmp->file[0];
578855caec6SPeter Avalos files[1] = cmp->file[1];
579855caec6SPeter Avalos
580855caec6SPeter Avalos compareseq (0, cmp->file[0].nondiscarded_lines,
58144b87433SJohn Marino 0, cmp->file[1].nondiscarded_lines, minimal, &ctxt);
582855caec6SPeter Avalos
58344b87433SJohn Marino free (ctxt.fdiag - (cmp->file[1].nondiscarded_lines + 1));
584855caec6SPeter Avalos
585855caec6SPeter Avalos /* Modify the results slightly to make them prettier
586855caec6SPeter Avalos in cases where that can validly be done. */
587855caec6SPeter Avalos
588855caec6SPeter Avalos shift_boundaries (cmp->file);
589855caec6SPeter Avalos
590855caec6SPeter Avalos /* Get the results of comparison in the form of a chain
5914536c563SJohn Marino of 'struct change's -- an edit script. */
592855caec6SPeter Avalos
593855caec6SPeter Avalos if (output_style == OUTPUT_ED)
594855caec6SPeter Avalos script = build_reverse_script (cmp->file);
595855caec6SPeter Avalos else
596855caec6SPeter Avalos script = build_script (cmp->file);
597855caec6SPeter Avalos
598855caec6SPeter Avalos /* Set CHANGES if we had any diffs.
599855caec6SPeter Avalos If some changes are ignored, we must scan the script to decide. */
600855caec6SPeter Avalos if (ignore_blank_lines || ignore_regexp.fastmap)
601855caec6SPeter Avalos {
602855caec6SPeter Avalos struct change *next = script;
603855caec6SPeter Avalos changes = 0;
604855caec6SPeter Avalos
605855caec6SPeter Avalos while (next && changes == 0)
606855caec6SPeter Avalos {
607855caec6SPeter Avalos struct change *this, *end;
608855caec6SPeter Avalos lin first0, last0, first1, last1;
609855caec6SPeter Avalos
610855caec6SPeter Avalos /* Find a set of changes that belong together. */
611855caec6SPeter Avalos this = next;
612855caec6SPeter Avalos end = find_change (next);
613855caec6SPeter Avalos
614855caec6SPeter Avalos /* Disconnect them from the rest of the changes, making them
615855caec6SPeter Avalos a hunk, and remember the rest for next iteration. */
616855caec6SPeter Avalos next = end->link;
617855caec6SPeter Avalos end->link = 0;
618855caec6SPeter Avalos
619855caec6SPeter Avalos /* Determine whether this hunk is really a difference. */
620855caec6SPeter Avalos if (analyze_hunk (this, &first0, &last0, &first1, &last1))
621855caec6SPeter Avalos changes = 1;
622855caec6SPeter Avalos
623855caec6SPeter Avalos /* Reconnect the script so it will all be freed properly. */
624855caec6SPeter Avalos end->link = next;
625855caec6SPeter Avalos }
626855caec6SPeter Avalos }
627855caec6SPeter Avalos else
628855caec6SPeter Avalos changes = (script != 0);
629855caec6SPeter Avalos
630855caec6SPeter Avalos if (brief)
631*6ea1f93eSDaniel Fojt briefly_report (changes, cmp->file);
632855caec6SPeter Avalos else
633855caec6SPeter Avalos {
63444b87433SJohn Marino if (changes || !no_diff_means_no_output)
635855caec6SPeter Avalos {
636855caec6SPeter Avalos /* Record info for starting up output,
637855caec6SPeter Avalos to be used if and when we have some output to print. */
638855caec6SPeter Avalos setup_output (file_label[0] ? file_label[0] : cmp->file[0].name,
639855caec6SPeter Avalos file_label[1] ? file_label[1] : cmp->file[1].name,
640855caec6SPeter Avalos cmp->parent != 0);
641855caec6SPeter Avalos
642855caec6SPeter Avalos switch (output_style)
643855caec6SPeter Avalos {
644855caec6SPeter Avalos case OUTPUT_CONTEXT:
645855caec6SPeter Avalos print_context_script (script, false);
646855caec6SPeter Avalos break;
647855caec6SPeter Avalos
648855caec6SPeter Avalos case OUTPUT_UNIFIED:
649855caec6SPeter Avalos print_context_script (script, true);
650855caec6SPeter Avalos break;
651855caec6SPeter Avalos
652855caec6SPeter Avalos case OUTPUT_ED:
653855caec6SPeter Avalos print_ed_script (script);
654855caec6SPeter Avalos break;
655855caec6SPeter Avalos
656855caec6SPeter Avalos case OUTPUT_FORWARD_ED:
657855caec6SPeter Avalos pr_forward_ed_script (script);
658855caec6SPeter Avalos break;
659855caec6SPeter Avalos
660855caec6SPeter Avalos case OUTPUT_RCS:
661855caec6SPeter Avalos print_rcs_script (script);
662855caec6SPeter Avalos break;
663855caec6SPeter Avalos
664855caec6SPeter Avalos case OUTPUT_NORMAL:
665855caec6SPeter Avalos print_normal_script (script);
666855caec6SPeter Avalos break;
667855caec6SPeter Avalos
668855caec6SPeter Avalos case OUTPUT_IFDEF:
669855caec6SPeter Avalos print_ifdef_script (script);
670855caec6SPeter Avalos break;
671855caec6SPeter Avalos
672855caec6SPeter Avalos case OUTPUT_SDIFF:
673855caec6SPeter Avalos print_sdiff_script (script);
674855caec6SPeter Avalos break;
675855caec6SPeter Avalos
676855caec6SPeter Avalos default:
677855caec6SPeter Avalos abort ();
678855caec6SPeter Avalos }
679855caec6SPeter Avalos
680855caec6SPeter Avalos finish_output ();
681855caec6SPeter Avalos }
682855caec6SPeter Avalos }
683855caec6SPeter Avalos
684855caec6SPeter Avalos free (cmp->file[0].undiscarded);
685855caec6SPeter Avalos
686855caec6SPeter Avalos free (flag_space);
687855caec6SPeter Avalos
688855caec6SPeter Avalos for (f = 0; f < 2; f++)
689855caec6SPeter Avalos {
690855caec6SPeter Avalos free (cmp->file[f].equivs);
691855caec6SPeter Avalos free (cmp->file[f].linbuf + cmp->file[f].linbuf_base);
692855caec6SPeter Avalos }
693855caec6SPeter Avalos
694855caec6SPeter Avalos for (e = script; e; e = p)
695855caec6SPeter Avalos {
696855caec6SPeter Avalos p = e->link;
697855caec6SPeter Avalos free (e);
698855caec6SPeter Avalos }
699855caec6SPeter Avalos
700855caec6SPeter Avalos if (! ROBUST_OUTPUT_STYLE (output_style))
701855caec6SPeter Avalos for (f = 0; f < 2; ++f)
702855caec6SPeter Avalos if (cmp->file[f].missing_newline)
703855caec6SPeter Avalos {
704855caec6SPeter Avalos error (0, 0, "%s: %s\n",
705855caec6SPeter Avalos file_label[f] ? file_label[f] : cmp->file[f].name,
706855caec6SPeter Avalos _("No newline at end of file"));
707855caec6SPeter Avalos changes = 2;
708855caec6SPeter Avalos }
709855caec6SPeter Avalos }
710855caec6SPeter Avalos
711855caec6SPeter Avalos if (cmp->file[0].buffer != cmp->file[1].buffer)
712855caec6SPeter Avalos free (cmp->file[0].buffer);
713855caec6SPeter Avalos free (cmp->file[1].buffer);
714855caec6SPeter Avalos
715855caec6SPeter Avalos return changes;
716855caec6SPeter Avalos }
717