xref: /dragonfly/contrib/diffutils/src/analyze.c (revision 6ea1f93e)
1855caec6SPeter Avalos /* Analyze file differences for GNU DIFF.
2855caec6SPeter Avalos 
344b87433SJohn Marino    Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006-2007,
4*6ea1f93eSDaniel Fojt    2009-2013, 2015-2018 Free Software Foundation, Inc.
5855caec6SPeter Avalos 
6855caec6SPeter Avalos    This file is part of GNU DIFF.
7855caec6SPeter Avalos 
844b87433SJohn Marino    This program is free software: you can redistribute it and/or modify
9855caec6SPeter Avalos    it under the terms of the GNU General Public License as published by
1044b87433SJohn Marino    the Free Software Foundation, either version 3 of the License, or
1144b87433SJohn Marino    (at your option) any later version.
12855caec6SPeter Avalos 
1344b87433SJohn Marino    This program is distributed in the hope that it will be useful,
14855caec6SPeter Avalos    but WITHOUT ANY WARRANTY; without even the implied warranty of
15855caec6SPeter Avalos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16855caec6SPeter Avalos    GNU General Public License for more details.
17855caec6SPeter Avalos 
18855caec6SPeter Avalos    You should have received a copy of the GNU General Public License
1944b87433SJohn Marino    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20855caec6SPeter Avalos 
21855caec6SPeter Avalos #include "diff.h"
22855caec6SPeter Avalos #include <cmpbuf.h>
23855caec6SPeter Avalos #include <error.h>
24855caec6SPeter Avalos #include <file-type.h>
25855caec6SPeter Avalos #include <xalloc.h>
26855caec6SPeter Avalos 
2744b87433SJohn Marino /* The core of the Diff algorithm.  */
2844b87433SJohn Marino #define ELEMENT lin
2944b87433SJohn Marino #define EQUAL(x,y) ((x) == (y))
3044b87433SJohn Marino #define OFFSET lin
3144b87433SJohn Marino #define EXTRA_CONTEXT_FIELDS /* none */
3244b87433SJohn Marino #define NOTE_DELETE(c, xoff) (files[0].changed[files[0].realindexes[xoff]] = 1)
3344b87433SJohn Marino #define NOTE_INSERT(c, yoff) (files[1].changed[files[1].realindexes[yoff]] = 1)
3444b87433SJohn Marino #define USE_HEURISTIC 1
3544b87433SJohn Marino #include <diffseq.h>
36855caec6SPeter Avalos 
37855caec6SPeter Avalos /* Discard lines from one file that have no matches in the other file.
38855caec6SPeter Avalos 
39855caec6SPeter Avalos    A line which is discarded will not be considered by the actual
40855caec6SPeter Avalos    comparison algorithm; it will be as if that line were not in the file.
414536c563SJohn Marino    The file's 'realindexes' table maps virtual line numbers
42855caec6SPeter Avalos    (which don't count the discarded lines) into real line numbers;
43855caec6SPeter Avalos    this is how the actual comparison algorithm produces results
44855caec6SPeter Avalos    that are comprehensible when the discarded lines are counted.
45855caec6SPeter Avalos 
46855caec6SPeter Avalos    When we discard a line, we also mark it as a deletion or insertion
47855caec6SPeter Avalos    so that it will be printed in the output.  */
48855caec6SPeter Avalos 
49855caec6SPeter Avalos static void
discard_confusing_lines(struct file_data filevec[])50855caec6SPeter Avalos discard_confusing_lines (struct file_data filevec[])
51855caec6SPeter Avalos {
52855caec6SPeter Avalos   int f;
53855caec6SPeter Avalos   lin i;
54855caec6SPeter Avalos   char *discarded[2];
55855caec6SPeter Avalos   lin *equiv_count[2];
56855caec6SPeter Avalos   lin *p;
57855caec6SPeter Avalos 
58855caec6SPeter Avalos   /* Allocate our results.  */
59855caec6SPeter Avalos   p = xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
60855caec6SPeter Avalos 	       * (2 * sizeof *p));
61855caec6SPeter Avalos   for (f = 0; f < 2; f++)
62855caec6SPeter Avalos     {
63855caec6SPeter Avalos       filevec[f].undiscarded = p;  p += filevec[f].buffered_lines;
64855caec6SPeter Avalos       filevec[f].realindexes = p;  p += filevec[f].buffered_lines;
65855caec6SPeter Avalos     }
66855caec6SPeter Avalos 
67855caec6SPeter Avalos   /* Set up equiv_count[F][I] as the number of lines in file F
68855caec6SPeter Avalos      that fall in equivalence class I.  */
69855caec6SPeter Avalos 
70855caec6SPeter Avalos   p = zalloc (filevec[0].equiv_max * (2 * sizeof *p));
71855caec6SPeter Avalos   equiv_count[0] = p;
72855caec6SPeter Avalos   equiv_count[1] = p + filevec[0].equiv_max;
73855caec6SPeter Avalos 
74855caec6SPeter Avalos   for (i = 0; i < filevec[0].buffered_lines; ++i)
75855caec6SPeter Avalos     ++equiv_count[0][filevec[0].equivs[i]];
76855caec6SPeter Avalos   for (i = 0; i < filevec[1].buffered_lines; ++i)
77855caec6SPeter Avalos     ++equiv_count[1][filevec[1].equivs[i]];
78855caec6SPeter Avalos 
79855caec6SPeter Avalos   /* Set up tables of which lines are going to be discarded.  */
80855caec6SPeter Avalos 
81855caec6SPeter Avalos   discarded[0] = zalloc (filevec[0].buffered_lines
82855caec6SPeter Avalos 			 + filevec[1].buffered_lines);
83855caec6SPeter Avalos   discarded[1] = discarded[0] + filevec[0].buffered_lines;
84855caec6SPeter Avalos 
85855caec6SPeter Avalos   /* Mark to be discarded each line that matches no line of the other file.
86855caec6SPeter Avalos      If a line matches many lines, mark it as provisionally discardable.  */
87855caec6SPeter Avalos 
88855caec6SPeter Avalos   for (f = 0; f < 2; f++)
89855caec6SPeter Avalos     {
90855caec6SPeter Avalos       size_t end = filevec[f].buffered_lines;
91855caec6SPeter Avalos       char *discards = discarded[f];
92855caec6SPeter Avalos       lin *counts = equiv_count[1 - f];
93855caec6SPeter Avalos       lin *equivs = filevec[f].equivs;
94855caec6SPeter Avalos       size_t many = 5;
95855caec6SPeter Avalos       size_t tem = end / 64;
96855caec6SPeter Avalos 
97855caec6SPeter Avalos       /* Multiply MANY by approximate square root of number of lines.
98855caec6SPeter Avalos 	 That is the threshold for provisionally discardable lines.  */
99855caec6SPeter Avalos       while ((tem = tem >> 2) > 0)
100855caec6SPeter Avalos 	many *= 2;
101855caec6SPeter Avalos 
102855caec6SPeter Avalos       for (i = 0; i < end; i++)
103855caec6SPeter Avalos 	{
104855caec6SPeter Avalos 	  lin nmatch;
105855caec6SPeter Avalos 	  if (equivs[i] == 0)
106855caec6SPeter Avalos 	    continue;
107855caec6SPeter Avalos 	  nmatch = counts[equivs[i]];
108855caec6SPeter Avalos 	  if (nmatch == 0)
109855caec6SPeter Avalos 	    discards[i] = 1;
110855caec6SPeter Avalos 	  else if (nmatch > many)
111855caec6SPeter Avalos 	    discards[i] = 2;
112855caec6SPeter Avalos 	}
113855caec6SPeter Avalos     }
114855caec6SPeter Avalos 
115855caec6SPeter Avalos   /* Don't really discard the provisional lines except when they occur
116855caec6SPeter Avalos      in a run of discardables, with nonprovisionals at the beginning
117855caec6SPeter Avalos      and end.  */
118855caec6SPeter Avalos 
119855caec6SPeter Avalos   for (f = 0; f < 2; f++)
120855caec6SPeter Avalos     {
121855caec6SPeter Avalos       lin end = filevec[f].buffered_lines;
122855caec6SPeter Avalos       register char *discards = discarded[f];
123855caec6SPeter Avalos 
124855caec6SPeter Avalos       for (i = 0; i < end; i++)
125855caec6SPeter Avalos 	{
126855caec6SPeter Avalos 	  /* Cancel provisional discards not in middle of run of discards.  */
127855caec6SPeter Avalos 	  if (discards[i] == 2)
128855caec6SPeter Avalos 	    discards[i] = 0;
129855caec6SPeter Avalos 	  else if (discards[i] != 0)
130855caec6SPeter Avalos 	    {
131855caec6SPeter Avalos 	      /* We have found a nonprovisional discard.  */
132855caec6SPeter Avalos 	      register lin j;
133855caec6SPeter Avalos 	      lin length;
134855caec6SPeter Avalos 	      lin provisional = 0;
135855caec6SPeter Avalos 
136855caec6SPeter Avalos 	      /* Find end of this run of discardable lines.
137855caec6SPeter Avalos 		 Count how many are provisionally discardable.  */
138855caec6SPeter Avalos 	      for (j = i; j < end; j++)
139855caec6SPeter Avalos 		{
140855caec6SPeter Avalos 		  if (discards[j] == 0)
141855caec6SPeter Avalos 		    break;
142855caec6SPeter Avalos 		  if (discards[j] == 2)
143855caec6SPeter Avalos 		    ++provisional;
144855caec6SPeter Avalos 		}
145855caec6SPeter Avalos 
146855caec6SPeter Avalos 	      /* Cancel provisional discards at end, and shrink the run.  */
147855caec6SPeter Avalos 	      while (j > i && discards[j - 1] == 2)
148855caec6SPeter Avalos 		discards[--j] = 0, --provisional;
149855caec6SPeter Avalos 
150855caec6SPeter Avalos 	      /* Now we have the length of a run of discardable lines
151855caec6SPeter Avalos 		 whose first and last are not provisional.  */
152855caec6SPeter Avalos 	      length = j - i;
153855caec6SPeter Avalos 
154855caec6SPeter Avalos 	      /* If 1/4 of the lines in the run are provisional,
155855caec6SPeter Avalos 		 cancel discarding of all provisional lines in the run.  */
156855caec6SPeter Avalos 	      if (provisional * 4 > length)
157855caec6SPeter Avalos 		{
158855caec6SPeter Avalos 		  while (j > i)
159855caec6SPeter Avalos 		    if (discards[--j] == 2)
160855caec6SPeter Avalos 		      discards[j] = 0;
161855caec6SPeter Avalos 		}
162855caec6SPeter Avalos 	      else
163855caec6SPeter Avalos 		{
164855caec6SPeter Avalos 		  register lin consec;
165855caec6SPeter Avalos 		  lin minimum = 1;
166855caec6SPeter Avalos 		  lin tem = length >> 2;
167855caec6SPeter Avalos 
168855caec6SPeter Avalos 		  /* MINIMUM is approximate square root of LENGTH/4.
169855caec6SPeter Avalos 		     A subrun of two or more provisionals can stand
170855caec6SPeter Avalos 		     when LENGTH is at least 16.
171855caec6SPeter Avalos 		     A subrun of 4 or more can stand when LENGTH >= 64.  */
172855caec6SPeter Avalos 		  while (0 < (tem >>= 2))
173855caec6SPeter Avalos 		    minimum <<= 1;
174855caec6SPeter Avalos 		  minimum++;
175855caec6SPeter Avalos 
176855caec6SPeter Avalos 		  /* Cancel any subrun of MINIMUM or more provisionals
177855caec6SPeter Avalos 		     within the larger run.  */
178855caec6SPeter Avalos 		  for (j = 0, consec = 0; j < length; j++)
179855caec6SPeter Avalos 		    if (discards[i + j] != 2)
180855caec6SPeter Avalos 		      consec = 0;
181855caec6SPeter Avalos 		    else if (minimum == ++consec)
182855caec6SPeter Avalos 		      /* Back up to start of subrun, to cancel it all.  */
183855caec6SPeter Avalos 		      j -= consec;
184855caec6SPeter Avalos 		    else if (minimum < consec)
185855caec6SPeter Avalos 		      discards[i + j] = 0;
186855caec6SPeter Avalos 
187855caec6SPeter Avalos 		  /* Scan from beginning of run
188855caec6SPeter Avalos 		     until we find 3 or more nonprovisionals in a row
189855caec6SPeter Avalos 		     or until the first nonprovisional at least 8 lines in.
190855caec6SPeter Avalos 		     Until that point, cancel any provisionals.  */
191855caec6SPeter Avalos 		  for (j = 0, consec = 0; j < length; j++)
192855caec6SPeter Avalos 		    {
193855caec6SPeter Avalos 		      if (j >= 8 && discards[i + j] == 1)
194855caec6SPeter Avalos 			break;
195855caec6SPeter Avalos 		      if (discards[i + j] == 2)
196855caec6SPeter Avalos 			consec = 0, discards[i + j] = 0;
197855caec6SPeter Avalos 		      else if (discards[i + j] == 0)
198855caec6SPeter Avalos 			consec = 0;
199855caec6SPeter Avalos 		      else
200855caec6SPeter Avalos 			consec++;
201855caec6SPeter Avalos 		      if (consec == 3)
202855caec6SPeter Avalos 			break;
203855caec6SPeter Avalos 		    }
204855caec6SPeter Avalos 
205855caec6SPeter Avalos 		  /* I advances to the last line of the run.  */
206855caec6SPeter Avalos 		  i += length - 1;
207855caec6SPeter Avalos 
208855caec6SPeter Avalos 		  /* Same thing, from end.  */
209855caec6SPeter Avalos 		  for (j = 0, consec = 0; j < length; j++)
210855caec6SPeter Avalos 		    {
211855caec6SPeter Avalos 		      if (j >= 8 && discards[i - j] == 1)
212855caec6SPeter Avalos 			break;
213855caec6SPeter Avalos 		      if (discards[i - j] == 2)
214855caec6SPeter Avalos 			consec = 0, discards[i - j] = 0;
215855caec6SPeter Avalos 		      else if (discards[i - j] == 0)
216855caec6SPeter Avalos 			consec = 0;
217855caec6SPeter Avalos 		      else
218855caec6SPeter Avalos 			consec++;
219855caec6SPeter Avalos 		      if (consec == 3)
220855caec6SPeter Avalos 			break;
221855caec6SPeter Avalos 		    }
222855caec6SPeter Avalos 		}
223855caec6SPeter Avalos 	    }
224855caec6SPeter Avalos 	}
225855caec6SPeter Avalos     }
226855caec6SPeter Avalos 
227855caec6SPeter Avalos   /* Actually discard the lines. */
228855caec6SPeter Avalos   for (f = 0; f < 2; f++)
229855caec6SPeter Avalos     {
230855caec6SPeter Avalos       char *discards = discarded[f];
231855caec6SPeter Avalos       lin end = filevec[f].buffered_lines;
232855caec6SPeter Avalos       lin j = 0;
233855caec6SPeter Avalos       for (i = 0; i < end; ++i)
234855caec6SPeter Avalos 	if (minimal || discards[i] == 0)
235855caec6SPeter Avalos 	  {
236855caec6SPeter Avalos 	    filevec[f].undiscarded[j] = filevec[f].equivs[i];
237855caec6SPeter Avalos 	    filevec[f].realindexes[j++] = i;
238855caec6SPeter Avalos 	  }
239855caec6SPeter Avalos 	else
240855caec6SPeter Avalos 	  filevec[f].changed[i] = 1;
241855caec6SPeter Avalos       filevec[f].nondiscarded_lines = j;
242855caec6SPeter Avalos     }
243855caec6SPeter Avalos 
244855caec6SPeter Avalos   free (discarded[0]);
245855caec6SPeter Avalos   free (equiv_count[0]);
246855caec6SPeter Avalos }
247855caec6SPeter Avalos 
248855caec6SPeter Avalos /* Adjust inserts/deletes of identical lines to join changes
249855caec6SPeter Avalos    as much as possible.
250855caec6SPeter Avalos 
251855caec6SPeter Avalos    We do something when a run of changed lines include a
252855caec6SPeter Avalos    line at one end and have an excluded, identical line at the other.
253855caec6SPeter Avalos    We are free to choose which identical line is included.
2544536c563SJohn Marino    'compareseq' usually chooses the one at the beginning,
255855caec6SPeter Avalos    but usually it is cleaner to consider the following identical line
256855caec6SPeter Avalos    to be the "change".  */
257855caec6SPeter Avalos 
258855caec6SPeter Avalos static void
shift_boundaries(struct file_data filevec[])259855caec6SPeter Avalos shift_boundaries (struct file_data filevec[])
260855caec6SPeter Avalos {
261855caec6SPeter Avalos   int f;
262855caec6SPeter Avalos 
263855caec6SPeter Avalos   for (f = 0; f < 2; f++)
264855caec6SPeter Avalos     {
265855caec6SPeter Avalos       char *changed = filevec[f].changed;
266855caec6SPeter Avalos       char *other_changed = filevec[1 - f].changed;
267855caec6SPeter Avalos       lin const *equivs = filevec[f].equivs;
268855caec6SPeter Avalos       lin i = 0;
269855caec6SPeter Avalos       lin j = 0;
270855caec6SPeter Avalos       lin i_end = filevec[f].buffered_lines;
271855caec6SPeter Avalos 
272855caec6SPeter Avalos       while (1)
273855caec6SPeter Avalos 	{
274855caec6SPeter Avalos 	  lin runlength, start, corresponding;
275855caec6SPeter Avalos 
276855caec6SPeter Avalos 	  /* Scan forwards to find beginning of another run of changes.
277855caec6SPeter Avalos 	     Also keep track of the corresponding point in the other file.  */
278855caec6SPeter Avalos 
279855caec6SPeter Avalos 	  while (i < i_end && !changed[i])
280855caec6SPeter Avalos 	    {
281855caec6SPeter Avalos 	      while (other_changed[j++])
282855caec6SPeter Avalos 		continue;
283855caec6SPeter Avalos 	      i++;
284855caec6SPeter Avalos 	    }
285855caec6SPeter Avalos 
286855caec6SPeter Avalos 	  if (i == i_end)
287855caec6SPeter Avalos 	    break;
288855caec6SPeter Avalos 
289855caec6SPeter Avalos 	  start = i;
290855caec6SPeter Avalos 
291855caec6SPeter Avalos 	  /* Find the end of this run of changes.  */
292855caec6SPeter Avalos 
293855caec6SPeter Avalos 	  while (changed[++i])
294855caec6SPeter Avalos 	    continue;
295855caec6SPeter Avalos 	  while (other_changed[j])
296855caec6SPeter Avalos 	    j++;
297855caec6SPeter Avalos 
298855caec6SPeter Avalos 	  do
299855caec6SPeter Avalos 	    {
300855caec6SPeter Avalos 	      /* Record the length of this run of changes, so that
301855caec6SPeter Avalos 		 we can later determine whether the run has grown.  */
302855caec6SPeter Avalos 	      runlength = i - start;
303855caec6SPeter Avalos 
304855caec6SPeter Avalos 	      /* Move the changed region back, so long as the
305855caec6SPeter Avalos 		 previous unchanged line matches the last changed one.
306855caec6SPeter Avalos 		 This merges with previous changed regions.  */
307855caec6SPeter Avalos 
308855caec6SPeter Avalos 	      while (start && equivs[start - 1] == equivs[i - 1])
309855caec6SPeter Avalos 		{
310855caec6SPeter Avalos 		  changed[--start] = 1;
311855caec6SPeter Avalos 		  changed[--i] = 0;
312855caec6SPeter Avalos 		  while (changed[start - 1])
313855caec6SPeter Avalos 		    start--;
314855caec6SPeter Avalos 		  while (other_changed[--j])
315855caec6SPeter Avalos 		    continue;
316855caec6SPeter Avalos 		}
317855caec6SPeter Avalos 
318855caec6SPeter Avalos 	      /* Set CORRESPONDING to the end of the changed run, at the last
319855caec6SPeter Avalos 		 point where it corresponds to a changed run in the other file.
320855caec6SPeter Avalos 		 CORRESPONDING == I_END means no such point has been found.  */
321855caec6SPeter Avalos 	      corresponding = other_changed[j - 1] ? i : i_end;
322855caec6SPeter Avalos 
323855caec6SPeter Avalos 	      /* Move the changed region forward, so long as the
324855caec6SPeter Avalos 		 first changed line matches the following unchanged one.
325855caec6SPeter Avalos 		 This merges with following changed regions.
326855caec6SPeter Avalos 		 Do this second, so that if there are no merges,
327855caec6SPeter Avalos 		 the changed region is moved forward as far as possible.  */
328855caec6SPeter Avalos 
329855caec6SPeter Avalos 	      while (i != i_end && equivs[start] == equivs[i])
330855caec6SPeter Avalos 		{
331855caec6SPeter Avalos 		  changed[start++] = 0;
332855caec6SPeter Avalos 		  changed[i++] = 1;
333855caec6SPeter Avalos 		  while (changed[i])
334855caec6SPeter Avalos 		    i++;
335855caec6SPeter Avalos 		  while (other_changed[++j])
336855caec6SPeter Avalos 		    corresponding = i;
337855caec6SPeter Avalos 		}
338855caec6SPeter Avalos 	    }
339855caec6SPeter Avalos 	  while (runlength != i - start);
340855caec6SPeter Avalos 
341855caec6SPeter Avalos 	  /* If possible, move the fully-merged run of changes
342855caec6SPeter Avalos 	     back to a corresponding run in the other file.  */
343855caec6SPeter Avalos 
344855caec6SPeter Avalos 	  while (corresponding < i)
345855caec6SPeter Avalos 	    {
346855caec6SPeter Avalos 	      changed[--start] = 1;
347855caec6SPeter Avalos 	      changed[--i] = 0;
348855caec6SPeter Avalos 	      while (other_changed[--j])
349855caec6SPeter Avalos 		continue;
350855caec6SPeter Avalos 	    }
351855caec6SPeter Avalos 	}
352855caec6SPeter Avalos     }
353855caec6SPeter Avalos }
354855caec6SPeter Avalos 
355855caec6SPeter Avalos /* Cons an additional entry onto the front of an edit script OLD.
356855caec6SPeter Avalos    LINE0 and LINE1 are the first affected lines in the two files (origin 0).
357855caec6SPeter Avalos    DELETED is the number of lines deleted here from file 0.
358855caec6SPeter Avalos    INSERTED is the number of lines inserted here in file 1.
359855caec6SPeter Avalos 
360855caec6SPeter Avalos    If DELETED is 0 then LINE0 is the number of the line before
361855caec6SPeter Avalos    which the insertion was done; vice versa for INSERTED and LINE1.  */
362855caec6SPeter Avalos 
363855caec6SPeter Avalos static struct change *
add_change(lin line0,lin line1,lin deleted,lin inserted,struct change * old)364855caec6SPeter Avalos add_change (lin line0, lin line1, lin deleted, lin inserted,
365855caec6SPeter Avalos 	    struct change *old)
366855caec6SPeter Avalos {
367855caec6SPeter Avalos   struct change *new = xmalloc (sizeof *new);
368855caec6SPeter Avalos 
369855caec6SPeter Avalos   new->line0 = line0;
370855caec6SPeter Avalos   new->line1 = line1;
371855caec6SPeter Avalos   new->inserted = inserted;
372855caec6SPeter Avalos   new->deleted = deleted;
373855caec6SPeter Avalos   new->link = old;
374855caec6SPeter Avalos   return new;
375855caec6SPeter Avalos }
376855caec6SPeter Avalos 
377855caec6SPeter Avalos /* Scan the tables of which lines are inserted and deleted,
378855caec6SPeter Avalos    producing an edit script in reverse order.  */
379855caec6SPeter Avalos 
380855caec6SPeter Avalos static struct change *
build_reverse_script(struct file_data const filevec[])381855caec6SPeter Avalos build_reverse_script (struct file_data const filevec[])
382855caec6SPeter Avalos {
383855caec6SPeter Avalos   struct change *script = 0;
384855caec6SPeter Avalos   char *changed0 = filevec[0].changed;
385855caec6SPeter Avalos   char *changed1 = filevec[1].changed;
386855caec6SPeter Avalos   lin len0 = filevec[0].buffered_lines;
387855caec6SPeter Avalos   lin len1 = filevec[1].buffered_lines;
388855caec6SPeter Avalos 
38944b87433SJohn Marino   /* Note that changedN[lenN] does exist, and is 0.  */
390855caec6SPeter Avalos 
391855caec6SPeter Avalos   lin i0 = 0, i1 = 0;
392855caec6SPeter Avalos 
393855caec6SPeter Avalos   while (i0 < len0 || i1 < len1)
394855caec6SPeter Avalos     {
395855caec6SPeter Avalos       if (changed0[i0] | changed1[i1])
396855caec6SPeter Avalos 	{
397855caec6SPeter Avalos 	  lin line0 = i0, line1 = i1;
398855caec6SPeter Avalos 
399855caec6SPeter Avalos 	  /* Find # lines changed here in each file.  */
400855caec6SPeter Avalos 	  while (changed0[i0]) ++i0;
401855caec6SPeter Avalos 	  while (changed1[i1]) ++i1;
402855caec6SPeter Avalos 
403855caec6SPeter Avalos 	  /* Record this change.  */
404855caec6SPeter Avalos 	  script = add_change (line0, line1, i0 - line0, i1 - line1, script);
405855caec6SPeter Avalos 	}
406855caec6SPeter Avalos 
407855caec6SPeter Avalos       /* We have reached lines in the two files that match each other.  */
408855caec6SPeter Avalos       i0++, i1++;
409855caec6SPeter Avalos     }
410855caec6SPeter Avalos 
411855caec6SPeter Avalos   return script;
412855caec6SPeter Avalos }
413855caec6SPeter Avalos 
414855caec6SPeter Avalos /* Scan the tables of which lines are inserted and deleted,
415855caec6SPeter Avalos    producing an edit script in forward order.  */
416855caec6SPeter Avalos 
417855caec6SPeter Avalos static struct change *
build_script(struct file_data const filevec[])418855caec6SPeter Avalos build_script (struct file_data const filevec[])
419855caec6SPeter Avalos {
420855caec6SPeter Avalos   struct change *script = 0;
421855caec6SPeter Avalos   char *changed0 = filevec[0].changed;
422855caec6SPeter Avalos   char *changed1 = filevec[1].changed;
423855caec6SPeter Avalos   lin i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
424855caec6SPeter Avalos 
425855caec6SPeter Avalos   /* Note that changedN[-1] does exist, and is 0.  */
426855caec6SPeter Avalos 
427855caec6SPeter Avalos   while (i0 >= 0 || i1 >= 0)
428855caec6SPeter Avalos     {
429855caec6SPeter Avalos       if (changed0[i0 - 1] | changed1[i1 - 1])
430855caec6SPeter Avalos 	{
431855caec6SPeter Avalos 	  lin line0 = i0, line1 = i1;
432855caec6SPeter Avalos 
433855caec6SPeter Avalos 	  /* Find # lines changed here in each file.  */
434855caec6SPeter Avalos 	  while (changed0[i0 - 1]) --i0;
435855caec6SPeter Avalos 	  while (changed1[i1 - 1]) --i1;
436855caec6SPeter Avalos 
437855caec6SPeter Avalos 	  /* Record this change.  */
438855caec6SPeter Avalos 	  script = add_change (i0, i1, line0 - i0, line1 - i1, script);
439855caec6SPeter Avalos 	}
440855caec6SPeter Avalos 
441855caec6SPeter Avalos       /* We have reached lines in the two files that match each other.  */
442855caec6SPeter Avalos       i0--, i1--;
443855caec6SPeter Avalos     }
444855caec6SPeter Avalos 
445855caec6SPeter Avalos   return script;
446855caec6SPeter Avalos }
447855caec6SPeter Avalos 
448*6ea1f93eSDaniel Fojt /* If CHANGES, briefly report that two files differed.  */
449*6ea1f93eSDaniel Fojt static void
briefly_report(int changes,struct file_data const filevec[])450855caec6SPeter Avalos briefly_report (int changes, struct file_data const filevec[])
451855caec6SPeter Avalos {
452855caec6SPeter Avalos   if (changes)
453*6ea1f93eSDaniel Fojt     message ((brief
454*6ea1f93eSDaniel Fojt 	      ? _("Files %s and %s differ\n")
455*6ea1f93eSDaniel Fojt 	      : _("Binary files %s and %s differ\n")),
456*6ea1f93eSDaniel Fojt 	     file_label[0] ? file_label[0] : filevec[0].name,
457*6ea1f93eSDaniel Fojt 	     file_label[1] ? file_label[1] : filevec[1].name);
458855caec6SPeter Avalos }
459855caec6SPeter Avalos 
460855caec6SPeter Avalos /* Report the differences of two files.  */
461855caec6SPeter Avalos int
diff_2_files(struct comparison * cmp)462855caec6SPeter Avalos diff_2_files (struct comparison *cmp)
463855caec6SPeter Avalos {
464855caec6SPeter Avalos   int f;
465855caec6SPeter Avalos   struct change *e, *p;
466855caec6SPeter Avalos   struct change *script;
467855caec6SPeter Avalos   int changes;
468855caec6SPeter Avalos 
469855caec6SPeter Avalos 
470855caec6SPeter Avalos   /* If we have detected that either file is binary,
471855caec6SPeter Avalos      compare the two files as binary.  This can happen
472855caec6SPeter Avalos      only when the first chunk is read.
473855caec6SPeter Avalos      Also, --brief without any --ignore-* options means
474855caec6SPeter Avalos      we can speed things up by treating the files as binary.  */
475855caec6SPeter Avalos 
476855caec6SPeter Avalos   if (read_files (cmp->file, files_can_be_treated_as_binary))
477855caec6SPeter Avalos     {
478855caec6SPeter Avalos       /* Files with different lengths must be different.  */
479855caec6SPeter Avalos       if (cmp->file[0].stat.st_size != cmp->file[1].stat.st_size
480*6ea1f93eSDaniel Fojt 	  && 0 < cmp->file[0].stat.st_size
481*6ea1f93eSDaniel Fojt 	  && 0 < cmp->file[1].stat.st_size
482855caec6SPeter Avalos 	  && (cmp->file[0].desc < 0 || S_ISREG (cmp->file[0].stat.st_mode))
483855caec6SPeter Avalos 	  && (cmp->file[1].desc < 0 || S_ISREG (cmp->file[1].stat.st_mode)))
484855caec6SPeter Avalos 	changes = 1;
485855caec6SPeter Avalos 
486855caec6SPeter Avalos       /* Standard input equals itself.  */
487855caec6SPeter Avalos       else if (cmp->file[0].desc == cmp->file[1].desc)
488855caec6SPeter Avalos 	changes = 0;
489855caec6SPeter Avalos 
490855caec6SPeter Avalos       else
491855caec6SPeter Avalos 	/* Scan both files, a buffer at a time, looking for a difference.  */
492855caec6SPeter Avalos 	{
493855caec6SPeter Avalos 	  /* Allocate same-sized buffers for both files.  */
494855caec6SPeter Avalos 	  size_t lcm_max = PTRDIFF_MAX - 1;
495855caec6SPeter Avalos 	  size_t buffer_size =
496855caec6SPeter Avalos 	    buffer_lcm (sizeof (word),
497855caec6SPeter Avalos 			buffer_lcm (STAT_BLOCKSIZE (cmp->file[0].stat),
498855caec6SPeter Avalos 				    STAT_BLOCKSIZE (cmp->file[1].stat),
499855caec6SPeter Avalos 				    lcm_max),
500855caec6SPeter Avalos 			lcm_max);
501855caec6SPeter Avalos 	  for (f = 0; f < 2; f++)
502855caec6SPeter Avalos 	    cmp->file[f].buffer = xrealloc (cmp->file[f].buffer, buffer_size);
503855caec6SPeter Avalos 
504855caec6SPeter Avalos 	  for (;; cmp->file[0].buffered = cmp->file[1].buffered = 0)
505855caec6SPeter Avalos 	    {
506855caec6SPeter Avalos 	      /* Read a buffer's worth from both files.  */
507855caec6SPeter Avalos 	      for (f = 0; f < 2; f++)
508855caec6SPeter Avalos 		if (0 <= cmp->file[f].desc)
509855caec6SPeter Avalos 		  file_block_read (&cmp->file[f],
510855caec6SPeter Avalos 				   buffer_size - cmp->file[f].buffered);
511855caec6SPeter Avalos 
512855caec6SPeter Avalos 	      /* If the buffers differ, the files differ.  */
513855caec6SPeter Avalos 	      if (cmp->file[0].buffered != cmp->file[1].buffered
514855caec6SPeter Avalos 		  || memcmp (cmp->file[0].buffer,
515855caec6SPeter Avalos 			     cmp->file[1].buffer,
516855caec6SPeter Avalos 			     cmp->file[0].buffered))
517855caec6SPeter Avalos 		{
518855caec6SPeter Avalos 		  changes = 1;
519855caec6SPeter Avalos 		  break;
520855caec6SPeter Avalos 		}
521855caec6SPeter Avalos 
522855caec6SPeter Avalos 	      /* If we reach end of file, the files are the same.  */
523855caec6SPeter Avalos 	      if (cmp->file[0].buffered != buffer_size)
524855caec6SPeter Avalos 		{
525855caec6SPeter Avalos 		  changes = 0;
526855caec6SPeter Avalos 		  break;
527855caec6SPeter Avalos 		}
528855caec6SPeter Avalos 	    }
529855caec6SPeter Avalos 	}
530855caec6SPeter Avalos 
531*6ea1f93eSDaniel Fojt       briefly_report (changes, cmp->file);
532855caec6SPeter Avalos     }
533855caec6SPeter Avalos   else
534855caec6SPeter Avalos     {
53544b87433SJohn Marino       struct context ctxt;
53644b87433SJohn Marino       lin diags;
53744b87433SJohn Marino       lin too_expensive;
53844b87433SJohn Marino 
539855caec6SPeter Avalos       /* Allocate vectors for the results of comparison:
540855caec6SPeter Avalos 	 a flag for each line of each file, saying whether that line
541855caec6SPeter Avalos 	 is an insertion or deletion.
542855caec6SPeter Avalos 	 Allocate an extra element, always 0, at each end of each vector.  */
543855caec6SPeter Avalos 
544855caec6SPeter Avalos       size_t s = cmp->file[0].buffered_lines + cmp->file[1].buffered_lines + 4;
545855caec6SPeter Avalos       char *flag_space = zalloc (s);
546855caec6SPeter Avalos       cmp->file[0].changed = flag_space + 1;
547855caec6SPeter Avalos       cmp->file[1].changed = flag_space + cmp->file[0].buffered_lines + 3;
548855caec6SPeter Avalos 
549855caec6SPeter Avalos       /* Some lines are obviously insertions or deletions
550855caec6SPeter Avalos 	 because they don't match anything.  Detect them now, and
551855caec6SPeter Avalos 	 avoid even thinking about them in the main comparison algorithm.  */
552855caec6SPeter Avalos 
553855caec6SPeter Avalos       discard_confusing_lines (cmp->file);
554855caec6SPeter Avalos 
555855caec6SPeter Avalos       /* Now do the main comparison algorithm, considering just the
556855caec6SPeter Avalos 	 undiscarded lines.  */
557855caec6SPeter Avalos 
55844b87433SJohn Marino       ctxt.xvec = cmp->file[0].undiscarded;
55944b87433SJohn Marino       ctxt.yvec = cmp->file[1].undiscarded;
560855caec6SPeter Avalos       diags = (cmp->file[0].nondiscarded_lines
561855caec6SPeter Avalos 	       + cmp->file[1].nondiscarded_lines + 3);
56244b87433SJohn Marino       ctxt.fdiag = xmalloc (diags * (2 * sizeof *ctxt.fdiag));
56344b87433SJohn Marino       ctxt.bdiag = ctxt.fdiag + diags;
56444b87433SJohn Marino       ctxt.fdiag += cmp->file[1].nondiscarded_lines + 1;
56544b87433SJohn Marino       ctxt.bdiag += cmp->file[1].nondiscarded_lines + 1;
56644b87433SJohn Marino 
56744b87433SJohn Marino       ctxt.heuristic = speed_large_files;
568855caec6SPeter Avalos 
569*6ea1f93eSDaniel Fojt       /* Set TOO_EXPENSIVE to be the approximate square root of the
570*6ea1f93eSDaniel Fojt 	 input size, bounded below by 4096.  4096 seems to be good for
571*6ea1f93eSDaniel Fojt 	 circa-2016 CPUs; see Bug#16848 and Bug#24715.  */
572855caec6SPeter Avalos       too_expensive = 1;
573855caec6SPeter Avalos       for (;  diags != 0;  diags >>= 2)
574855caec6SPeter Avalos 	too_expensive <<= 1;
575*6ea1f93eSDaniel Fojt       ctxt.too_expensive = MAX (4096, too_expensive);
576855caec6SPeter Avalos 
577855caec6SPeter Avalos       files[0] = cmp->file[0];
578855caec6SPeter Avalos       files[1] = cmp->file[1];
579855caec6SPeter Avalos 
580855caec6SPeter Avalos       compareseq (0, cmp->file[0].nondiscarded_lines,
58144b87433SJohn Marino 		  0, cmp->file[1].nondiscarded_lines, minimal, &ctxt);
582855caec6SPeter Avalos 
58344b87433SJohn Marino       free (ctxt.fdiag - (cmp->file[1].nondiscarded_lines + 1));
584855caec6SPeter Avalos 
585855caec6SPeter Avalos       /* Modify the results slightly to make them prettier
586855caec6SPeter Avalos 	 in cases where that can validly be done.  */
587855caec6SPeter Avalos 
588855caec6SPeter Avalos       shift_boundaries (cmp->file);
589855caec6SPeter Avalos 
590855caec6SPeter Avalos       /* Get the results of comparison in the form of a chain
5914536c563SJohn Marino 	 of 'struct change's -- an edit script.  */
592855caec6SPeter Avalos 
593855caec6SPeter Avalos       if (output_style == OUTPUT_ED)
594855caec6SPeter Avalos 	script = build_reverse_script (cmp->file);
595855caec6SPeter Avalos       else
596855caec6SPeter Avalos 	script = build_script (cmp->file);
597855caec6SPeter Avalos 
598855caec6SPeter Avalos       /* Set CHANGES if we had any diffs.
599855caec6SPeter Avalos 	 If some changes are ignored, we must scan the script to decide.  */
600855caec6SPeter Avalos       if (ignore_blank_lines || ignore_regexp.fastmap)
601855caec6SPeter Avalos 	{
602855caec6SPeter Avalos 	  struct change *next = script;
603855caec6SPeter Avalos 	  changes = 0;
604855caec6SPeter Avalos 
605855caec6SPeter Avalos 	  while (next && changes == 0)
606855caec6SPeter Avalos 	    {
607855caec6SPeter Avalos 	      struct change *this, *end;
608855caec6SPeter Avalos 	      lin first0, last0, first1, last1;
609855caec6SPeter Avalos 
610855caec6SPeter Avalos 	      /* Find a set of changes that belong together.  */
611855caec6SPeter Avalos 	      this = next;
612855caec6SPeter Avalos 	      end = find_change (next);
613855caec6SPeter Avalos 
614855caec6SPeter Avalos 	      /* Disconnect them from the rest of the changes, making them
615855caec6SPeter Avalos 		 a hunk, and remember the rest for next iteration.  */
616855caec6SPeter Avalos 	      next = end->link;
617855caec6SPeter Avalos 	      end->link = 0;
618855caec6SPeter Avalos 
619855caec6SPeter Avalos 	      /* Determine whether this hunk is really a difference.  */
620855caec6SPeter Avalos 	      if (analyze_hunk (this, &first0, &last0, &first1, &last1))
621855caec6SPeter Avalos 		changes = 1;
622855caec6SPeter Avalos 
623855caec6SPeter Avalos 	      /* Reconnect the script so it will all be freed properly.  */
624855caec6SPeter Avalos 	      end->link = next;
625855caec6SPeter Avalos 	    }
626855caec6SPeter Avalos 	}
627855caec6SPeter Avalos       else
628855caec6SPeter Avalos 	changes = (script != 0);
629855caec6SPeter Avalos 
630855caec6SPeter Avalos       if (brief)
631*6ea1f93eSDaniel Fojt 	briefly_report (changes, cmp->file);
632855caec6SPeter Avalos       else
633855caec6SPeter Avalos 	{
63444b87433SJohn Marino 	  if (changes || !no_diff_means_no_output)
635855caec6SPeter Avalos 	    {
636855caec6SPeter Avalos 	      /* Record info for starting up output,
637855caec6SPeter Avalos 		 to be used if and when we have some output to print.  */
638855caec6SPeter Avalos 	      setup_output (file_label[0] ? file_label[0] : cmp->file[0].name,
639855caec6SPeter Avalos 			    file_label[1] ? file_label[1] : cmp->file[1].name,
640855caec6SPeter Avalos 			    cmp->parent != 0);
641855caec6SPeter Avalos 
642855caec6SPeter Avalos 	      switch (output_style)
643855caec6SPeter Avalos 		{
644855caec6SPeter Avalos 		case OUTPUT_CONTEXT:
645855caec6SPeter Avalos 		  print_context_script (script, false);
646855caec6SPeter Avalos 		  break;
647855caec6SPeter Avalos 
648855caec6SPeter Avalos 		case OUTPUT_UNIFIED:
649855caec6SPeter Avalos 		  print_context_script (script, true);
650855caec6SPeter Avalos 		  break;
651855caec6SPeter Avalos 
652855caec6SPeter Avalos 		case OUTPUT_ED:
653855caec6SPeter Avalos 		  print_ed_script (script);
654855caec6SPeter Avalos 		  break;
655855caec6SPeter Avalos 
656855caec6SPeter Avalos 		case OUTPUT_FORWARD_ED:
657855caec6SPeter Avalos 		  pr_forward_ed_script (script);
658855caec6SPeter Avalos 		  break;
659855caec6SPeter Avalos 
660855caec6SPeter Avalos 		case OUTPUT_RCS:
661855caec6SPeter Avalos 		  print_rcs_script (script);
662855caec6SPeter Avalos 		  break;
663855caec6SPeter Avalos 
664855caec6SPeter Avalos 		case OUTPUT_NORMAL:
665855caec6SPeter Avalos 		  print_normal_script (script);
666855caec6SPeter Avalos 		  break;
667855caec6SPeter Avalos 
668855caec6SPeter Avalos 		case OUTPUT_IFDEF:
669855caec6SPeter Avalos 		  print_ifdef_script (script);
670855caec6SPeter Avalos 		  break;
671855caec6SPeter Avalos 
672855caec6SPeter Avalos 		case OUTPUT_SDIFF:
673855caec6SPeter Avalos 		  print_sdiff_script (script);
674855caec6SPeter Avalos 		  break;
675855caec6SPeter Avalos 
676855caec6SPeter Avalos 		default:
677855caec6SPeter Avalos 		  abort ();
678855caec6SPeter Avalos 		}
679855caec6SPeter Avalos 
680855caec6SPeter Avalos 	      finish_output ();
681855caec6SPeter Avalos 	    }
682855caec6SPeter Avalos 	}
683855caec6SPeter Avalos 
684855caec6SPeter Avalos       free (cmp->file[0].undiscarded);
685855caec6SPeter Avalos 
686855caec6SPeter Avalos       free (flag_space);
687855caec6SPeter Avalos 
688855caec6SPeter Avalos       for (f = 0; f < 2; f++)
689855caec6SPeter Avalos 	{
690855caec6SPeter Avalos 	  free (cmp->file[f].equivs);
691855caec6SPeter Avalos 	  free (cmp->file[f].linbuf + cmp->file[f].linbuf_base);
692855caec6SPeter Avalos 	}
693855caec6SPeter Avalos 
694855caec6SPeter Avalos       for (e = script; e; e = p)
695855caec6SPeter Avalos 	{
696855caec6SPeter Avalos 	  p = e->link;
697855caec6SPeter Avalos 	  free (e);
698855caec6SPeter Avalos 	}
699855caec6SPeter Avalos 
700855caec6SPeter Avalos       if (! ROBUST_OUTPUT_STYLE (output_style))
701855caec6SPeter Avalos 	for (f = 0; f < 2; ++f)
702855caec6SPeter Avalos 	  if (cmp->file[f].missing_newline)
703855caec6SPeter Avalos 	    {
704855caec6SPeter Avalos 	      error (0, 0, "%s: %s\n",
705855caec6SPeter Avalos 		     file_label[f] ? file_label[f] : cmp->file[f].name,
706855caec6SPeter Avalos 		     _("No newline at end of file"));
707855caec6SPeter Avalos 	      changes = 2;
708855caec6SPeter Avalos 	    }
709855caec6SPeter Avalos     }
710855caec6SPeter Avalos 
711855caec6SPeter Avalos   if (cmp->file[0].buffer != cmp->file[1].buffer)
712855caec6SPeter Avalos     free (cmp->file[0].buffer);
713855caec6SPeter Avalos   free (cmp->file[1].buffer);
714855caec6SPeter Avalos 
715855caec6SPeter Avalos   return changes;
716855caec6SPeter Avalos }
717