xref: /dragonfly/contrib/diffutils/src/dir.c (revision 6ea1f93e)
1855caec6SPeter Avalos /* Read, sort and compare two directories.  Used for GNU DIFF.
2855caec6SPeter Avalos 
344b87433SJohn Marino    Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006-2007,
4*6ea1f93eSDaniel Fojt    2009-2013, 2015-2018 Free Software Foundation, Inc.
5855caec6SPeter Avalos 
6855caec6SPeter Avalos    This file is part of GNU DIFF.
7855caec6SPeter Avalos 
844b87433SJohn Marino    This program is free software: you can redistribute it and/or modify
9855caec6SPeter Avalos    it under the terms of the GNU General Public License as published by
1044b87433SJohn Marino    the Free Software Foundation, either version 3 of the License, or
1144b87433SJohn Marino    (at your option) any later version.
12855caec6SPeter Avalos 
1344b87433SJohn Marino    This program is distributed in the hope that it will be useful,
14855caec6SPeter Avalos    but WITHOUT ANY WARRANTY; without even the implied warranty of
15855caec6SPeter Avalos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16855caec6SPeter Avalos    GNU General Public License for more details.
17855caec6SPeter Avalos 
18855caec6SPeter Avalos    You should have received a copy of the GNU General Public License
1944b87433SJohn Marino    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20855caec6SPeter Avalos 
21855caec6SPeter Avalos #include "diff.h"
22855caec6SPeter Avalos #include <error.h>
23855caec6SPeter Avalos #include <exclude.h>
24008e37b6SJohn Marino #include <filenamecat.h>
25855caec6SPeter Avalos #include <setjmp.h>
26855caec6SPeter Avalos #include <xalloc.h>
27855caec6SPeter Avalos 
28855caec6SPeter Avalos /* Read the directory named by DIR and store into DIRDATA a sorted vector
29855caec6SPeter Avalos    of filenames for its contents.  DIR->desc == -1 means this directory is
30855caec6SPeter Avalos    known to be nonexistent, so set DIRDATA to an empty vector.
31855caec6SPeter Avalos    Return -1 (setting errno) if error, 0 otherwise.  */
32855caec6SPeter Avalos 
33855caec6SPeter Avalos struct dirdata
34855caec6SPeter Avalos {
35855caec6SPeter Avalos   size_t nnames;	/* Number of names.  */
36855caec6SPeter Avalos   char const **names;	/* Sorted names of files in dir, followed by 0.  */
37855caec6SPeter Avalos   char *data;	/* Allocated storage for file names.  */
38855caec6SPeter Avalos };
39855caec6SPeter Avalos 
40855caec6SPeter Avalos /* Whether file names in directories should be compared with
41855caec6SPeter Avalos    locale-specific sorting.  */
42855caec6SPeter Avalos static bool locale_specific_sorting;
43855caec6SPeter Avalos 
44855caec6SPeter Avalos /* Where to go if locale-specific sorting fails.  */
45855caec6SPeter Avalos static jmp_buf failed_locale_specific_sorting;
46855caec6SPeter Avalos 
47855caec6SPeter Avalos static bool dir_loop (struct comparison const *, int);
48855caec6SPeter Avalos 
49855caec6SPeter Avalos 
50855caec6SPeter Avalos /* Read a directory and get its vector of names.  */
51855caec6SPeter Avalos 
52855caec6SPeter Avalos static bool
dir_read(struct file_data const * dir,struct dirdata * dirdata)53855caec6SPeter Avalos dir_read (struct file_data const *dir, struct dirdata *dirdata)
54855caec6SPeter Avalos {
55855caec6SPeter Avalos   register struct dirent *next;
56855caec6SPeter Avalos   register size_t i;
57855caec6SPeter Avalos 
58855caec6SPeter Avalos   /* Address of block containing the files that are described.  */
59855caec6SPeter Avalos   char const **names;
60855caec6SPeter Avalos 
61855caec6SPeter Avalos   /* Number of files in directory.  */
62855caec6SPeter Avalos   size_t nnames;
63855caec6SPeter Avalos 
64855caec6SPeter Avalos   /* Allocated and used storage for file name data.  */
65855caec6SPeter Avalos   char *data;
66855caec6SPeter Avalos   size_t data_alloc, data_used;
67855caec6SPeter Avalos 
68855caec6SPeter Avalos   dirdata->names = 0;
69855caec6SPeter Avalos   dirdata->data = 0;
70855caec6SPeter Avalos   nnames = 0;
71855caec6SPeter Avalos   data = 0;
72855caec6SPeter Avalos 
73855caec6SPeter Avalos   if (dir->desc != -1)
74855caec6SPeter Avalos     {
75855caec6SPeter Avalos       /* Open the directory and check for errors.  */
76855caec6SPeter Avalos       register DIR *reading = opendir (dir->name);
77855caec6SPeter Avalos       if (!reading)
78855caec6SPeter Avalos 	return false;
79855caec6SPeter Avalos 
80855caec6SPeter Avalos       /* Initialize the table of filenames.  */
81855caec6SPeter Avalos 
82855caec6SPeter Avalos       data_alloc = 512;
83855caec6SPeter Avalos       data_used = 0;
84855caec6SPeter Avalos       dirdata->data = data = xmalloc (data_alloc);
85855caec6SPeter Avalos 
86855caec6SPeter Avalos       /* Read the directory entries, and insert the subfiles
874536c563SJohn Marino 	 into the 'data' table.  */
88855caec6SPeter Avalos 
89855caec6SPeter Avalos       while ((errno = 0, (next = readdir (reading)) != 0))
90855caec6SPeter Avalos 	{
91855caec6SPeter Avalos 	  char *d_name = next->d_name;
9244b87433SJohn Marino 	  size_t d_size = _D_EXACT_NAMLEN (next) + 1;
93855caec6SPeter Avalos 
94855caec6SPeter Avalos 	  /* Ignore "." and "..".  */
95855caec6SPeter Avalos 	  if (d_name[0] == '.'
96855caec6SPeter Avalos 	      && (d_name[1] == 0 || (d_name[1] == '.' && d_name[2] == 0)))
97855caec6SPeter Avalos 	    continue;
98855caec6SPeter Avalos 
9944b87433SJohn Marino 	  if (excluded_file_name (excluded, d_name))
100855caec6SPeter Avalos 	    continue;
101855caec6SPeter Avalos 
102855caec6SPeter Avalos 	  while (data_alloc < data_used + d_size)
103855caec6SPeter Avalos 	    {
104855caec6SPeter Avalos 	      if (PTRDIFF_MAX / 2 <= data_alloc)
105855caec6SPeter Avalos 		xalloc_die ();
106855caec6SPeter Avalos 	      dirdata->data = data = xrealloc (data, data_alloc *= 2);
107855caec6SPeter Avalos 	    }
108855caec6SPeter Avalos 
109855caec6SPeter Avalos 	  memcpy (data + data_used, d_name, d_size);
110855caec6SPeter Avalos 	  data_used += d_size;
111855caec6SPeter Avalos 	  nnames++;
112855caec6SPeter Avalos 	}
113855caec6SPeter Avalos       if (errno)
114855caec6SPeter Avalos 	{
115855caec6SPeter Avalos 	  int e = errno;
116855caec6SPeter Avalos 	  closedir (reading);
117855caec6SPeter Avalos 	  errno = e;
118855caec6SPeter Avalos 	  return false;
119855caec6SPeter Avalos 	}
120855caec6SPeter Avalos #if CLOSEDIR_VOID
121855caec6SPeter Avalos       closedir (reading);
122855caec6SPeter Avalos #else
123855caec6SPeter Avalos       if (closedir (reading) != 0)
124855caec6SPeter Avalos 	return false;
125855caec6SPeter Avalos #endif
126855caec6SPeter Avalos     }
127855caec6SPeter Avalos 
1284536c563SJohn Marino   /* Create the 'names' table from the 'data' table.  */
129855caec6SPeter Avalos   if (PTRDIFF_MAX / sizeof *names - 1 <= nnames)
130855caec6SPeter Avalos     xalloc_die ();
131855caec6SPeter Avalos   dirdata->names = names = xmalloc ((nnames + 1) * sizeof *names);
132855caec6SPeter Avalos   dirdata->nnames = nnames;
133855caec6SPeter Avalos   for (i = 0;  i < nnames;  i++)
134855caec6SPeter Avalos     {
135855caec6SPeter Avalos       names[i] = data;
136855caec6SPeter Avalos       data += strlen (data) + 1;
137855caec6SPeter Avalos     }
138855caec6SPeter Avalos   names[nnames] = 0;
139855caec6SPeter Avalos   return true;
140855caec6SPeter Avalos }
141855caec6SPeter Avalos 
142*6ea1f93eSDaniel Fojt /* Compare strings in a locale-specific way, returning a value
143*6ea1f93eSDaniel Fojt    compatible with strcmp.  */
144855caec6SPeter Avalos 
145855caec6SPeter Avalos static int
compare_collated(char const * name1,char const * name2)146*6ea1f93eSDaniel Fojt compare_collated (char const *name1, char const *name2)
147855caec6SPeter Avalos {
148855caec6SPeter Avalos   int r;
149855caec6SPeter Avalos   errno = 0;
150855caec6SPeter Avalos   if (ignore_file_name_case)
151855caec6SPeter Avalos     r = strcasecoll (name1, name2);
152855caec6SPeter Avalos   else
153855caec6SPeter Avalos     r = strcoll (name1, name2);
154855caec6SPeter Avalos   if (errno)
155855caec6SPeter Avalos     {
1564536c563SJohn Marino       error (0, errno, _("cannot compare file names '%s' and '%s'"),
157855caec6SPeter Avalos 	     name1, name2);
158855caec6SPeter Avalos       longjmp (failed_locale_specific_sorting, 1);
159855caec6SPeter Avalos     }
160855caec6SPeter Avalos   return r;
161855caec6SPeter Avalos }
162855caec6SPeter Avalos 
163*6ea1f93eSDaniel Fojt /* Compare file names, returning a value compatible with strcmp.  */
164*6ea1f93eSDaniel Fojt 
165*6ea1f93eSDaniel Fojt static int
compare_names(char const * name1,char const * name2)166*6ea1f93eSDaniel Fojt compare_names (char const *name1, char const *name2)
167*6ea1f93eSDaniel Fojt {
168*6ea1f93eSDaniel Fojt   if (locale_specific_sorting)
169*6ea1f93eSDaniel Fojt     {
170*6ea1f93eSDaniel Fojt       int diff = compare_collated (name1, name2);
171*6ea1f93eSDaniel Fojt       if (diff || ignore_file_name_case)
172*6ea1f93eSDaniel Fojt 	return diff;
173*6ea1f93eSDaniel Fojt     }
174008e37b6SJohn Marino   return file_name_cmp (name1, name2);
175855caec6SPeter Avalos }
176855caec6SPeter Avalos 
177008e37b6SJohn Marino /* Compare names FILE1 and FILE2 when sorting a directory.
178008e37b6SJohn Marino    Prefer filtered comparison, breaking ties with file_name_cmp.  */
179855caec6SPeter Avalos 
180855caec6SPeter Avalos static int
compare_names_for_qsort(void const * file1,void const * file2)181855caec6SPeter Avalos compare_names_for_qsort (void const *file1, void const *file2)
182855caec6SPeter Avalos {
183855caec6SPeter Avalos   char const *const *f1 = file1;
184855caec6SPeter Avalos   char const *const *f2 = file2;
185*6ea1f93eSDaniel Fojt   char const *name1 = *f1;
186*6ea1f93eSDaniel Fojt   char const *name2 = *f2;
187*6ea1f93eSDaniel Fojt   if (locale_specific_sorting)
188*6ea1f93eSDaniel Fojt     {
189*6ea1f93eSDaniel Fojt       int diff = compare_collated (name1, name2);
190*6ea1f93eSDaniel Fojt       if (diff)
191*6ea1f93eSDaniel Fojt 	return diff;
192*6ea1f93eSDaniel Fojt     }
193*6ea1f93eSDaniel Fojt   return file_name_cmp (name1, name2);
194855caec6SPeter Avalos }
195855caec6SPeter Avalos 
196855caec6SPeter Avalos /* Compare the contents of two directories named in CMP.
197855caec6SPeter Avalos    This is a top-level routine; it does everything necessary for diff
198855caec6SPeter Avalos    on two directories.
199855caec6SPeter Avalos 
200855caec6SPeter Avalos    CMP->file[0].desc == -1 says directory CMP->file[0] doesn't exist,
201855caec6SPeter Avalos    but pretend it is empty.  Likewise for CMP->file[1].
202855caec6SPeter Avalos 
203855caec6SPeter Avalos    HANDLE_FILE is a caller-provided subroutine called to handle each file.
204855caec6SPeter Avalos    It gets three operands: CMP, name of file in dir 0, name of file in dir 1.
205855caec6SPeter Avalos    These names are relative to the original working directory.
206855caec6SPeter Avalos 
207855caec6SPeter Avalos    For a file that appears in only one of the dirs, one of the name-args
208855caec6SPeter Avalos    to HANDLE_FILE is zero.
209855caec6SPeter Avalos 
210855caec6SPeter Avalos    Returns the maximum of all the values returned by HANDLE_FILE,
211855caec6SPeter Avalos    or EXIT_TROUBLE if trouble is encountered in opening files.  */
212855caec6SPeter Avalos 
213855caec6SPeter Avalos int
diff_dirs(struct comparison const * cmp,int (* handle_file)(struct comparison const *,char const *,char const *))214855caec6SPeter Avalos diff_dirs (struct comparison const *cmp,
215855caec6SPeter Avalos 	   int (*handle_file) (struct comparison const *,
216855caec6SPeter Avalos 			       char const *, char const *))
217855caec6SPeter Avalos {
218855caec6SPeter Avalos   struct dirdata dirdata[2];
219855caec6SPeter Avalos   int volatile val = EXIT_SUCCESS;
220855caec6SPeter Avalos   int i;
221855caec6SPeter Avalos 
222855caec6SPeter Avalos   if ((cmp->file[0].desc == -1 || dir_loop (cmp, 0))
223855caec6SPeter Avalos       && (cmp->file[1].desc == -1 || dir_loop (cmp, 1)))
224855caec6SPeter Avalos     {
22544b87433SJohn Marino       error (0, 0, _("%s: recursive directory loop"),
226855caec6SPeter Avalos 	     cmp->file[cmp->file[0].desc == -1].name);
227855caec6SPeter Avalos       return EXIT_TROUBLE;
228855caec6SPeter Avalos     }
229855caec6SPeter Avalos 
230855caec6SPeter Avalos   /* Get contents of both dirs.  */
231855caec6SPeter Avalos   for (i = 0; i < 2; i++)
232855caec6SPeter Avalos     if (! dir_read (&cmp->file[i], &dirdata[i]))
233855caec6SPeter Avalos       {
234855caec6SPeter Avalos 	perror_with_name (cmp->file[i].name);
235855caec6SPeter Avalos 	val = EXIT_TROUBLE;
236855caec6SPeter Avalos       }
237855caec6SPeter Avalos 
238855caec6SPeter Avalos   if (val == EXIT_SUCCESS)
239855caec6SPeter Avalos     {
240855caec6SPeter Avalos       char const **volatile names[2];
241855caec6SPeter Avalos       names[0] = dirdata[0].names;
242855caec6SPeter Avalos       names[1] = dirdata[1].names;
243855caec6SPeter Avalos 
244855caec6SPeter Avalos       /* Use locale-specific sorting if possible, else native byte order.  */
245855caec6SPeter Avalos       locale_specific_sorting = true;
246855caec6SPeter Avalos       if (setjmp (failed_locale_specific_sorting))
247855caec6SPeter Avalos 	locale_specific_sorting = false;
248855caec6SPeter Avalos 
249855caec6SPeter Avalos       /* Sort the directories.  */
250855caec6SPeter Avalos       for (i = 0; i < 2; i++)
251855caec6SPeter Avalos 	qsort (names[i], dirdata[i].nnames, sizeof *dirdata[i].names,
252855caec6SPeter Avalos 	       compare_names_for_qsort);
253855caec6SPeter Avalos 
2544536c563SJohn Marino       /* If '-S name' was given, and this is the topmost level of comparison,
255855caec6SPeter Avalos 	 ignore all file names less than the specified starting name.  */
256855caec6SPeter Avalos 
257855caec6SPeter Avalos       if (starting_file && ! cmp->parent)
258855caec6SPeter Avalos 	{
259855caec6SPeter Avalos 	  while (*names[0] && compare_names (*names[0], starting_file) < 0)
260855caec6SPeter Avalos 	    names[0]++;
261855caec6SPeter Avalos 	  while (*names[1] && compare_names (*names[1], starting_file) < 0)
262855caec6SPeter Avalos 	    names[1]++;
263855caec6SPeter Avalos 	}
264855caec6SPeter Avalos 
265855caec6SPeter Avalos       /* Loop while files remain in one or both dirs.  */
266855caec6SPeter Avalos       while (*names[0] || *names[1])
267855caec6SPeter Avalos 	{
268855caec6SPeter Avalos 	  /* Compare next name in dir 0 with next name in dir 1.
269855caec6SPeter Avalos 	     At the end of a dir,
270855caec6SPeter Avalos 	     pretend the "next name" in that dir is very large.  */
271855caec6SPeter Avalos 	  int nameorder = (!*names[0] ? 1 : !*names[1] ? -1
272855caec6SPeter Avalos 			   : compare_names (*names[0], *names[1]));
273008e37b6SJohn Marino 
274008e37b6SJohn Marino 	  /* Prefer a file_name_cmp match if available.  This algorithm is
275008e37b6SJohn Marino 	     O(N**2), where N is the number of names in a directory
276008e37b6SJohn Marino 	     that compare_names says are all equal, but in practice N
277008e37b6SJohn Marino 	     is so small it's not worth tuning.  */
278*6ea1f93eSDaniel Fojt 	  if (nameorder == 0 && ignore_file_name_case)
279008e37b6SJohn Marino 	    {
280008e37b6SJohn Marino 	      int raw_order = file_name_cmp (*names[0], *names[1]);
281008e37b6SJohn Marino 	      if (raw_order != 0)
282008e37b6SJohn Marino 		{
283008e37b6SJohn Marino 		  int greater_side = raw_order < 0;
284008e37b6SJohn Marino 		  int lesser_side = 1 - greater_side;
285008e37b6SJohn Marino 		  char const **lesser = names[lesser_side];
286008e37b6SJohn Marino 		  char const *greater_name = *names[greater_side];
287008e37b6SJohn Marino 		  char const **p;
288008e37b6SJohn Marino 
289008e37b6SJohn Marino 		  for (p = lesser + 1;
290008e37b6SJohn Marino 		       *p && compare_names (*p, greater_name) == 0;
291008e37b6SJohn Marino 		       p++)
292008e37b6SJohn Marino 		    {
293008e37b6SJohn Marino 		      int c = file_name_cmp (*p, greater_name);
294008e37b6SJohn Marino 		      if (0 <= c)
295008e37b6SJohn Marino 			{
296008e37b6SJohn Marino 			  if (c == 0)
297008e37b6SJohn Marino 			    {
298008e37b6SJohn Marino 			      memmove (lesser + 1, lesser,
299008e37b6SJohn Marino 				       (char *) p - (char *) lesser);
300008e37b6SJohn Marino 			      *lesser = greater_name;
301008e37b6SJohn Marino 			    }
302008e37b6SJohn Marino 			  break;
303008e37b6SJohn Marino 			}
304008e37b6SJohn Marino 		    }
305008e37b6SJohn Marino 		}
306008e37b6SJohn Marino 	    }
307008e37b6SJohn Marino 
308855caec6SPeter Avalos 	  int v1 = (*handle_file) (cmp,
309855caec6SPeter Avalos 				   0 < nameorder ? 0 : *names[0]++,
310855caec6SPeter Avalos 				   nameorder < 0 ? 0 : *names[1]++);
311855caec6SPeter Avalos 	  if (val < v1)
312855caec6SPeter Avalos 	    val = v1;
313855caec6SPeter Avalos 	}
314855caec6SPeter Avalos     }
315855caec6SPeter Avalos 
316855caec6SPeter Avalos   for (i = 0; i < 2; i++)
317855caec6SPeter Avalos     {
318855caec6SPeter Avalos       free (dirdata[i].names);
319855caec6SPeter Avalos       free (dirdata[i].data);
320855caec6SPeter Avalos     }
321855caec6SPeter Avalos 
322855caec6SPeter Avalos   return val;
323855caec6SPeter Avalos }
324855caec6SPeter Avalos 
325855caec6SPeter Avalos /* Return nonzero if CMP is looping recursively in argument I.  */
326855caec6SPeter Avalos 
3274536c563SJohn Marino static bool _GL_ATTRIBUTE_PURE
dir_loop(struct comparison const * cmp,int i)328855caec6SPeter Avalos dir_loop (struct comparison const *cmp, int i)
329855caec6SPeter Avalos {
330855caec6SPeter Avalos   struct comparison const *p = cmp;
331855caec6SPeter Avalos   while ((p = p->parent))
332855caec6SPeter Avalos     if (0 < same_file (&p->file[i].stat, &cmp->file[i].stat))
333855caec6SPeter Avalos       return true;
334855caec6SPeter Avalos   return false;
335855caec6SPeter Avalos }
336008e37b6SJohn Marino 
337008e37b6SJohn Marino /* Find a matching filename in a directory.  */
338008e37b6SJohn Marino 
339008e37b6SJohn Marino char *
find_dir_file_pathname(char const * dir,char const * file)340008e37b6SJohn Marino find_dir_file_pathname (char const *dir, char const *file)
341008e37b6SJohn Marino {
3424536c563SJohn Marino   /* The 'IF_LINT (volatile)' works around what appears to be a bug in
3434536c563SJohn Marino      gcc 4.8.0 20120825; see
3444536c563SJohn Marino      <http://lists.gnu.org/archive/html/bug-diffutils/2012-08/msg00007.html>.
3454536c563SJohn Marino      */
3464536c563SJohn Marino   char const * IF_LINT (volatile) match = file;
3474536c563SJohn Marino 
348008e37b6SJohn Marino   char *val;
349008e37b6SJohn Marino   struct dirdata dirdata;
350008e37b6SJohn Marino   dirdata.names = NULL;
351008e37b6SJohn Marino   dirdata.data = NULL;
352008e37b6SJohn Marino 
353008e37b6SJohn Marino   if (ignore_file_name_case)
354008e37b6SJohn Marino     {
355008e37b6SJohn Marino       struct file_data filedata;
356008e37b6SJohn Marino       filedata.name = dir;
357008e37b6SJohn Marino       filedata.desc = 0;
358008e37b6SJohn Marino 
359008e37b6SJohn Marino       if (dir_read (&filedata, &dirdata))
360008e37b6SJohn Marino 	{
361008e37b6SJohn Marino 	  locale_specific_sorting = true;
362008e37b6SJohn Marino 	  if (setjmp (failed_locale_specific_sorting))
363008e37b6SJohn Marino 	    match = file; /* longjmp may mess up MATCH.  */
364008e37b6SJohn Marino 	  else
365008e37b6SJohn Marino 	    {
366008e37b6SJohn Marino 	      for (char const **p = dirdata.names; *p; p++)
367008e37b6SJohn Marino 		if (compare_names (*p, file) == 0)
368008e37b6SJohn Marino 		  {
369008e37b6SJohn Marino 		    if (file_name_cmp (*p, file) == 0)
370008e37b6SJohn Marino 		      {
371008e37b6SJohn Marino 			match = *p;
372008e37b6SJohn Marino 			break;
373008e37b6SJohn Marino 		      }
374008e37b6SJohn Marino 		    if (match == file)
375008e37b6SJohn Marino 		      match = *p;
376008e37b6SJohn Marino 		  }
377008e37b6SJohn Marino 	    }
378008e37b6SJohn Marino 	}
379008e37b6SJohn Marino     }
380008e37b6SJohn Marino 
381008e37b6SJohn Marino   val = file_name_concat (dir, match, NULL);
382008e37b6SJohn Marino   free (dirdata.names);
383008e37b6SJohn Marino   free (dirdata.data);
384008e37b6SJohn Marino   return val;
385008e37b6SJohn Marino }
386