1 /* Read, sort and compare two directories. Used for GNU DIFF.
2
3 Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006-2007,
4 2009-2013, 2015-2018 Free Software Foundation, Inc.
5
6 This file is part of GNU DIFF.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21 #include "diff.h"
22 #include <error.h>
23 #include <exclude.h>
24 #include <filenamecat.h>
25 #include <setjmp.h>
26 #include <xalloc.h>
27
28 /* Read the directory named by DIR and store into DIRDATA a sorted vector
29 of filenames for its contents. DIR->desc == -1 means this directory is
30 known to be nonexistent, so set DIRDATA to an empty vector.
31 Return -1 (setting errno) if error, 0 otherwise. */
32
33 struct dirdata
34 {
35 size_t nnames; /* Number of names. */
36 char const **names; /* Sorted names of files in dir, followed by 0. */
37 char *data; /* Allocated storage for file names. */
38 };
39
40 /* Whether file names in directories should be compared with
41 locale-specific sorting. */
42 static bool locale_specific_sorting;
43
44 /* Where to go if locale-specific sorting fails. */
45 static jmp_buf failed_locale_specific_sorting;
46
47 static bool dir_loop (struct comparison const *, int);
48
49
50 /* Read a directory and get its vector of names. */
51
52 static bool
dir_read(struct file_data const * dir,struct dirdata * dirdata)53 dir_read (struct file_data const *dir, struct dirdata *dirdata)
54 {
55 register struct dirent *next;
56 register size_t i;
57
58 /* Address of block containing the files that are described. */
59 char const **names;
60
61 /* Number of files in directory. */
62 size_t nnames;
63
64 /* Allocated and used storage for file name data. */
65 char *data;
66 size_t data_alloc, data_used;
67
68 dirdata->names = 0;
69 dirdata->data = 0;
70 nnames = 0;
71 data = 0;
72
73 if (dir->desc != -1)
74 {
75 /* Open the directory and check for errors. */
76 register DIR *reading = opendir (dir->name);
77 if (!reading)
78 return false;
79
80 /* Initialize the table of filenames. */
81
82 data_alloc = 512;
83 data_used = 0;
84 dirdata->data = data = xmalloc (data_alloc);
85
86 /* Read the directory entries, and insert the subfiles
87 into the 'data' table. */
88
89 while ((errno = 0, (next = readdir (reading)) != 0))
90 {
91 char *d_name = next->d_name;
92 size_t d_size = _D_EXACT_NAMLEN (next) + 1;
93
94 /* Ignore "." and "..". */
95 if (d_name[0] == '.'
96 && (d_name[1] == 0 || (d_name[1] == '.' && d_name[2] == 0)))
97 continue;
98
99 if (excluded_file_name (excluded, d_name))
100 continue;
101
102 while (data_alloc < data_used + d_size)
103 {
104 if (PTRDIFF_MAX / 2 <= data_alloc)
105 xalloc_die ();
106 dirdata->data = data = xrealloc (data, data_alloc *= 2);
107 }
108
109 memcpy (data + data_used, d_name, d_size);
110 data_used += d_size;
111 nnames++;
112 }
113 if (errno)
114 {
115 int e = errno;
116 closedir (reading);
117 errno = e;
118 return false;
119 }
120 #if CLOSEDIR_VOID
121 closedir (reading);
122 #else
123 if (closedir (reading) != 0)
124 return false;
125 #endif
126 }
127
128 /* Create the 'names' table from the 'data' table. */
129 if (PTRDIFF_MAX / sizeof *names - 1 <= nnames)
130 xalloc_die ();
131 dirdata->names = names = xmalloc ((nnames + 1) * sizeof *names);
132 dirdata->nnames = nnames;
133 for (i = 0; i < nnames; i++)
134 {
135 names[i] = data;
136 data += strlen (data) + 1;
137 }
138 names[nnames] = 0;
139 return true;
140 }
141
142 /* Compare strings in a locale-specific way, returning a value
143 compatible with strcmp. */
144
145 static int
compare_collated(char const * name1,char const * name2)146 compare_collated (char const *name1, char const *name2)
147 {
148 int r;
149 errno = 0;
150 if (ignore_file_name_case)
151 r = strcasecoll (name1, name2);
152 else
153 r = strcoll (name1, name2);
154 if (errno)
155 {
156 error (0, errno, _("cannot compare file names '%s' and '%s'"),
157 name1, name2);
158 longjmp (failed_locale_specific_sorting, 1);
159 }
160 return r;
161 }
162
163 /* Compare file names, returning a value compatible with strcmp. */
164
165 static int
compare_names(char const * name1,char const * name2)166 compare_names (char const *name1, char const *name2)
167 {
168 if (locale_specific_sorting)
169 {
170 int diff = compare_collated (name1, name2);
171 if (diff || ignore_file_name_case)
172 return diff;
173 }
174 return file_name_cmp (name1, name2);
175 }
176
177 /* Compare names FILE1 and FILE2 when sorting a directory.
178 Prefer filtered comparison, breaking ties with file_name_cmp. */
179
180 static int
compare_names_for_qsort(void const * file1,void const * file2)181 compare_names_for_qsort (void const *file1, void const *file2)
182 {
183 char const *const *f1 = file1;
184 char const *const *f2 = file2;
185 char const *name1 = *f1;
186 char const *name2 = *f2;
187 if (locale_specific_sorting)
188 {
189 int diff = compare_collated (name1, name2);
190 if (diff)
191 return diff;
192 }
193 return file_name_cmp (name1, name2);
194 }
195
196 /* Compare the contents of two directories named in CMP.
197 This is a top-level routine; it does everything necessary for diff
198 on two directories.
199
200 CMP->file[0].desc == -1 says directory CMP->file[0] doesn't exist,
201 but pretend it is empty. Likewise for CMP->file[1].
202
203 HANDLE_FILE is a caller-provided subroutine called to handle each file.
204 It gets three operands: CMP, name of file in dir 0, name of file in dir 1.
205 These names are relative to the original working directory.
206
207 For a file that appears in only one of the dirs, one of the name-args
208 to HANDLE_FILE is zero.
209
210 Returns the maximum of all the values returned by HANDLE_FILE,
211 or EXIT_TROUBLE if trouble is encountered in opening files. */
212
213 int
diff_dirs(struct comparison const * cmp,int (* handle_file)(struct comparison const *,char const *,char const *))214 diff_dirs (struct comparison const *cmp,
215 int (*handle_file) (struct comparison const *,
216 char const *, char const *))
217 {
218 struct dirdata dirdata[2];
219 int volatile val = EXIT_SUCCESS;
220 int i;
221
222 if ((cmp->file[0].desc == -1 || dir_loop (cmp, 0))
223 && (cmp->file[1].desc == -1 || dir_loop (cmp, 1)))
224 {
225 error (0, 0, _("%s: recursive directory loop"),
226 cmp->file[cmp->file[0].desc == -1].name);
227 return EXIT_TROUBLE;
228 }
229
230 /* Get contents of both dirs. */
231 for (i = 0; i < 2; i++)
232 if (! dir_read (&cmp->file[i], &dirdata[i]))
233 {
234 perror_with_name (cmp->file[i].name);
235 val = EXIT_TROUBLE;
236 }
237
238 if (val == EXIT_SUCCESS)
239 {
240 char const **volatile names[2];
241 names[0] = dirdata[0].names;
242 names[1] = dirdata[1].names;
243
244 /* Use locale-specific sorting if possible, else native byte order. */
245 locale_specific_sorting = true;
246 if (setjmp (failed_locale_specific_sorting))
247 locale_specific_sorting = false;
248
249 /* Sort the directories. */
250 for (i = 0; i < 2; i++)
251 qsort (names[i], dirdata[i].nnames, sizeof *dirdata[i].names,
252 compare_names_for_qsort);
253
254 /* If '-S name' was given, and this is the topmost level of comparison,
255 ignore all file names less than the specified starting name. */
256
257 if (starting_file && ! cmp->parent)
258 {
259 while (*names[0] && compare_names (*names[0], starting_file) < 0)
260 names[0]++;
261 while (*names[1] && compare_names (*names[1], starting_file) < 0)
262 names[1]++;
263 }
264
265 /* Loop while files remain in one or both dirs. */
266 while (*names[0] || *names[1])
267 {
268 /* Compare next name in dir 0 with next name in dir 1.
269 At the end of a dir,
270 pretend the "next name" in that dir is very large. */
271 int nameorder = (!*names[0] ? 1 : !*names[1] ? -1
272 : compare_names (*names[0], *names[1]));
273
274 /* Prefer a file_name_cmp match if available. This algorithm is
275 O(N**2), where N is the number of names in a directory
276 that compare_names says are all equal, but in practice N
277 is so small it's not worth tuning. */
278 if (nameorder == 0 && ignore_file_name_case)
279 {
280 int raw_order = file_name_cmp (*names[0], *names[1]);
281 if (raw_order != 0)
282 {
283 int greater_side = raw_order < 0;
284 int lesser_side = 1 - greater_side;
285 char const **lesser = names[lesser_side];
286 char const *greater_name = *names[greater_side];
287 char const **p;
288
289 for (p = lesser + 1;
290 *p && compare_names (*p, greater_name) == 0;
291 p++)
292 {
293 int c = file_name_cmp (*p, greater_name);
294 if (0 <= c)
295 {
296 if (c == 0)
297 {
298 memmove (lesser + 1, lesser,
299 (char *) p - (char *) lesser);
300 *lesser = greater_name;
301 }
302 break;
303 }
304 }
305 }
306 }
307
308 int v1 = (*handle_file) (cmp,
309 0 < nameorder ? 0 : *names[0]++,
310 nameorder < 0 ? 0 : *names[1]++);
311 if (val < v1)
312 val = v1;
313 }
314 }
315
316 for (i = 0; i < 2; i++)
317 {
318 free (dirdata[i].names);
319 free (dirdata[i].data);
320 }
321
322 return val;
323 }
324
325 /* Return nonzero if CMP is looping recursively in argument I. */
326
327 static bool _GL_ATTRIBUTE_PURE
dir_loop(struct comparison const * cmp,int i)328 dir_loop (struct comparison const *cmp, int i)
329 {
330 struct comparison const *p = cmp;
331 while ((p = p->parent))
332 if (0 < same_file (&p->file[i].stat, &cmp->file[i].stat))
333 return true;
334 return false;
335 }
336
337 /* Find a matching filename in a directory. */
338
339 char *
find_dir_file_pathname(char const * dir,char const * file)340 find_dir_file_pathname (char const *dir, char const *file)
341 {
342 /* The 'IF_LINT (volatile)' works around what appears to be a bug in
343 gcc 4.8.0 20120825; see
344 <http://lists.gnu.org/archive/html/bug-diffutils/2012-08/msg00007.html>.
345 */
346 char const * IF_LINT (volatile) match = file;
347
348 char *val;
349 struct dirdata dirdata;
350 dirdata.names = NULL;
351 dirdata.data = NULL;
352
353 if (ignore_file_name_case)
354 {
355 struct file_data filedata;
356 filedata.name = dir;
357 filedata.desc = 0;
358
359 if (dir_read (&filedata, &dirdata))
360 {
361 locale_specific_sorting = true;
362 if (setjmp (failed_locale_specific_sorting))
363 match = file; /* longjmp may mess up MATCH. */
364 else
365 {
366 for (char const **p = dirdata.names; *p; p++)
367 if (compare_names (*p, file) == 0)
368 {
369 if (file_name_cmp (*p, file) == 0)
370 {
371 match = *p;
372 break;
373 }
374 if (match == file)
375 match = *p;
376 }
377 }
378 }
379 }
380
381 val = file_name_concat (dir, match, NULL);
382 free (dirdata.names);
383 free (dirdata.data);
384 return val;
385 }
386