xref: /dragonfly/contrib/diffutils/src/dir.c (revision d50f9ae3)
1 /* Read, sort and compare two directories.  Used for GNU DIFF.
2 
3    Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006-2007,
4    2009-2013, 2015-2018 Free Software Foundation, Inc.
5 
6    This file is part of GNU DIFF.
7 
8    This program is free software: you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation, either version 3 of the License, or
11    (at your option) any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 #include "diff.h"
22 #include <error.h>
23 #include <exclude.h>
24 #include <filenamecat.h>
25 #include <setjmp.h>
26 #include <xalloc.h>
27 
28 /* Read the directory named by DIR and store into DIRDATA a sorted vector
29    of filenames for its contents.  DIR->desc == -1 means this directory is
30    known to be nonexistent, so set DIRDATA to an empty vector.
31    Return -1 (setting errno) if error, 0 otherwise.  */
32 
33 struct dirdata
34 {
35   size_t nnames;	/* Number of names.  */
36   char const **names;	/* Sorted names of files in dir, followed by 0.  */
37   char *data;	/* Allocated storage for file names.  */
38 };
39 
40 /* Whether file names in directories should be compared with
41    locale-specific sorting.  */
42 static bool locale_specific_sorting;
43 
44 /* Where to go if locale-specific sorting fails.  */
45 static jmp_buf failed_locale_specific_sorting;
46 
47 static bool dir_loop (struct comparison const *, int);
48 
49 
50 /* Read a directory and get its vector of names.  */
51 
52 static bool
53 dir_read (struct file_data const *dir, struct dirdata *dirdata)
54 {
55   register struct dirent *next;
56   register size_t i;
57 
58   /* Address of block containing the files that are described.  */
59   char const **names;
60 
61   /* Number of files in directory.  */
62   size_t nnames;
63 
64   /* Allocated and used storage for file name data.  */
65   char *data;
66   size_t data_alloc, data_used;
67 
68   dirdata->names = 0;
69   dirdata->data = 0;
70   nnames = 0;
71   data = 0;
72 
73   if (dir->desc != -1)
74     {
75       /* Open the directory and check for errors.  */
76       register DIR *reading = opendir (dir->name);
77       if (!reading)
78 	return false;
79 
80       /* Initialize the table of filenames.  */
81 
82       data_alloc = 512;
83       data_used = 0;
84       dirdata->data = data = xmalloc (data_alloc);
85 
86       /* Read the directory entries, and insert the subfiles
87 	 into the 'data' table.  */
88 
89       while ((errno = 0, (next = readdir (reading)) != 0))
90 	{
91 	  char *d_name = next->d_name;
92 	  size_t d_size = _D_EXACT_NAMLEN (next) + 1;
93 
94 	  /* Ignore "." and "..".  */
95 	  if (d_name[0] == '.'
96 	      && (d_name[1] == 0 || (d_name[1] == '.' && d_name[2] == 0)))
97 	    continue;
98 
99 	  if (excluded_file_name (excluded, d_name))
100 	    continue;
101 
102 	  while (data_alloc < data_used + d_size)
103 	    {
104 	      if (PTRDIFF_MAX / 2 <= data_alloc)
105 		xalloc_die ();
106 	      dirdata->data = data = xrealloc (data, data_alloc *= 2);
107 	    }
108 
109 	  memcpy (data + data_used, d_name, d_size);
110 	  data_used += d_size;
111 	  nnames++;
112 	}
113       if (errno)
114 	{
115 	  int e = errno;
116 	  closedir (reading);
117 	  errno = e;
118 	  return false;
119 	}
120 #if CLOSEDIR_VOID
121       closedir (reading);
122 #else
123       if (closedir (reading) != 0)
124 	return false;
125 #endif
126     }
127 
128   /* Create the 'names' table from the 'data' table.  */
129   if (PTRDIFF_MAX / sizeof *names - 1 <= nnames)
130     xalloc_die ();
131   dirdata->names = names = xmalloc ((nnames + 1) * sizeof *names);
132   dirdata->nnames = nnames;
133   for (i = 0;  i < nnames;  i++)
134     {
135       names[i] = data;
136       data += strlen (data) + 1;
137     }
138   names[nnames] = 0;
139   return true;
140 }
141 
142 /* Compare strings in a locale-specific way, returning a value
143    compatible with strcmp.  */
144 
145 static int
146 compare_collated (char const *name1, char const *name2)
147 {
148   int r;
149   errno = 0;
150   if (ignore_file_name_case)
151     r = strcasecoll (name1, name2);
152   else
153     r = strcoll (name1, name2);
154   if (errno)
155     {
156       error (0, errno, _("cannot compare file names '%s' and '%s'"),
157 	     name1, name2);
158       longjmp (failed_locale_specific_sorting, 1);
159     }
160   return r;
161 }
162 
163 /* Compare file names, returning a value compatible with strcmp.  */
164 
165 static int
166 compare_names (char const *name1, char const *name2)
167 {
168   if (locale_specific_sorting)
169     {
170       int diff = compare_collated (name1, name2);
171       if (diff || ignore_file_name_case)
172 	return diff;
173     }
174   return file_name_cmp (name1, name2);
175 }
176 
177 /* Compare names FILE1 and FILE2 when sorting a directory.
178    Prefer filtered comparison, breaking ties with file_name_cmp.  */
179 
180 static int
181 compare_names_for_qsort (void const *file1, void const *file2)
182 {
183   char const *const *f1 = file1;
184   char const *const *f2 = file2;
185   char const *name1 = *f1;
186   char const *name2 = *f2;
187   if (locale_specific_sorting)
188     {
189       int diff = compare_collated (name1, name2);
190       if (diff)
191 	return diff;
192     }
193   return file_name_cmp (name1, name2);
194 }
195 
196 /* Compare the contents of two directories named in CMP.
197    This is a top-level routine; it does everything necessary for diff
198    on two directories.
199 
200    CMP->file[0].desc == -1 says directory CMP->file[0] doesn't exist,
201    but pretend it is empty.  Likewise for CMP->file[1].
202 
203    HANDLE_FILE is a caller-provided subroutine called to handle each file.
204    It gets three operands: CMP, name of file in dir 0, name of file in dir 1.
205    These names are relative to the original working directory.
206 
207    For a file that appears in only one of the dirs, one of the name-args
208    to HANDLE_FILE is zero.
209 
210    Returns the maximum of all the values returned by HANDLE_FILE,
211    or EXIT_TROUBLE if trouble is encountered in opening files.  */
212 
213 int
214 diff_dirs (struct comparison const *cmp,
215 	   int (*handle_file) (struct comparison const *,
216 			       char const *, char const *))
217 {
218   struct dirdata dirdata[2];
219   int volatile val = EXIT_SUCCESS;
220   int i;
221 
222   if ((cmp->file[0].desc == -1 || dir_loop (cmp, 0))
223       && (cmp->file[1].desc == -1 || dir_loop (cmp, 1)))
224     {
225       error (0, 0, _("%s: recursive directory loop"),
226 	     cmp->file[cmp->file[0].desc == -1].name);
227       return EXIT_TROUBLE;
228     }
229 
230   /* Get contents of both dirs.  */
231   for (i = 0; i < 2; i++)
232     if (! dir_read (&cmp->file[i], &dirdata[i]))
233       {
234 	perror_with_name (cmp->file[i].name);
235 	val = EXIT_TROUBLE;
236       }
237 
238   if (val == EXIT_SUCCESS)
239     {
240       char const **volatile names[2];
241       names[0] = dirdata[0].names;
242       names[1] = dirdata[1].names;
243 
244       /* Use locale-specific sorting if possible, else native byte order.  */
245       locale_specific_sorting = true;
246       if (setjmp (failed_locale_specific_sorting))
247 	locale_specific_sorting = false;
248 
249       /* Sort the directories.  */
250       for (i = 0; i < 2; i++)
251 	qsort (names[i], dirdata[i].nnames, sizeof *dirdata[i].names,
252 	       compare_names_for_qsort);
253 
254       /* If '-S name' was given, and this is the topmost level of comparison,
255 	 ignore all file names less than the specified starting name.  */
256 
257       if (starting_file && ! cmp->parent)
258 	{
259 	  while (*names[0] && compare_names (*names[0], starting_file) < 0)
260 	    names[0]++;
261 	  while (*names[1] && compare_names (*names[1], starting_file) < 0)
262 	    names[1]++;
263 	}
264 
265       /* Loop while files remain in one or both dirs.  */
266       while (*names[0] || *names[1])
267 	{
268 	  /* Compare next name in dir 0 with next name in dir 1.
269 	     At the end of a dir,
270 	     pretend the "next name" in that dir is very large.  */
271 	  int nameorder = (!*names[0] ? 1 : !*names[1] ? -1
272 			   : compare_names (*names[0], *names[1]));
273 
274 	  /* Prefer a file_name_cmp match if available.  This algorithm is
275 	     O(N**2), where N is the number of names in a directory
276 	     that compare_names says are all equal, but in practice N
277 	     is so small it's not worth tuning.  */
278 	  if (nameorder == 0 && ignore_file_name_case)
279 	    {
280 	      int raw_order = file_name_cmp (*names[0], *names[1]);
281 	      if (raw_order != 0)
282 		{
283 		  int greater_side = raw_order < 0;
284 		  int lesser_side = 1 - greater_side;
285 		  char const **lesser = names[lesser_side];
286 		  char const *greater_name = *names[greater_side];
287 		  char const **p;
288 
289 		  for (p = lesser + 1;
290 		       *p && compare_names (*p, greater_name) == 0;
291 		       p++)
292 		    {
293 		      int c = file_name_cmp (*p, greater_name);
294 		      if (0 <= c)
295 			{
296 			  if (c == 0)
297 			    {
298 			      memmove (lesser + 1, lesser,
299 				       (char *) p - (char *) lesser);
300 			      *lesser = greater_name;
301 			    }
302 			  break;
303 			}
304 		    }
305 		}
306 	    }
307 
308 	  int v1 = (*handle_file) (cmp,
309 				   0 < nameorder ? 0 : *names[0]++,
310 				   nameorder < 0 ? 0 : *names[1]++);
311 	  if (val < v1)
312 	    val = v1;
313 	}
314     }
315 
316   for (i = 0; i < 2; i++)
317     {
318       free (dirdata[i].names);
319       free (dirdata[i].data);
320     }
321 
322   return val;
323 }
324 
325 /* Return nonzero if CMP is looping recursively in argument I.  */
326 
327 static bool _GL_ATTRIBUTE_PURE
328 dir_loop (struct comparison const *cmp, int i)
329 {
330   struct comparison const *p = cmp;
331   while ((p = p->parent))
332     if (0 < same_file (&p->file[i].stat, &cmp->file[i].stat))
333       return true;
334   return false;
335 }
336 
337 /* Find a matching filename in a directory.  */
338 
339 char *
340 find_dir_file_pathname (char const *dir, char const *file)
341 {
342   /* The 'IF_LINT (volatile)' works around what appears to be a bug in
343      gcc 4.8.0 20120825; see
344      <http://lists.gnu.org/archive/html/bug-diffutils/2012-08/msg00007.html>.
345      */
346   char const * IF_LINT (volatile) match = file;
347 
348   char *val;
349   struct dirdata dirdata;
350   dirdata.names = NULL;
351   dirdata.data = NULL;
352 
353   if (ignore_file_name_case)
354     {
355       struct file_data filedata;
356       filedata.name = dir;
357       filedata.desc = 0;
358 
359       if (dir_read (&filedata, &dirdata))
360 	{
361 	  locale_specific_sorting = true;
362 	  if (setjmp (failed_locale_specific_sorting))
363 	    match = file; /* longjmp may mess up MATCH.  */
364 	  else
365 	    {
366 	      for (char const **p = dirdata.names; *p; p++)
367 		if (compare_names (*p, file) == 0)
368 		  {
369 		    if (file_name_cmp (*p, file) == 0)
370 		      {
371 			match = *p;
372 			break;
373 		      }
374 		    if (match == file)
375 		      match = *p;
376 		  }
377 	    }
378 	}
379     }
380 
381   val = file_name_concat (dir, match, NULL);
382   free (dirdata.names);
383   free (dirdata.data);
384   return val;
385 }
386