1855caec6SPeter Avalos /* Read, sort and compare two directories. Used for GNU DIFF.
2855caec6SPeter Avalos
344b87433SJohn Marino Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006-2007,
4*6ea1f93eSDaniel Fojt 2009-2013, 2015-2018 Free Software Foundation, Inc.
5855caec6SPeter Avalos
6855caec6SPeter Avalos This file is part of GNU DIFF.
7855caec6SPeter Avalos
844b87433SJohn Marino This program is free software: you can redistribute it and/or modify
9855caec6SPeter Avalos it under the terms of the GNU General Public License as published by
1044b87433SJohn Marino the Free Software Foundation, either version 3 of the License, or
1144b87433SJohn Marino (at your option) any later version.
12855caec6SPeter Avalos
1344b87433SJohn Marino This program is distributed in the hope that it will be useful,
14855caec6SPeter Avalos but WITHOUT ANY WARRANTY; without even the implied warranty of
15855caec6SPeter Avalos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16855caec6SPeter Avalos GNU General Public License for more details.
17855caec6SPeter Avalos
18855caec6SPeter Avalos You should have received a copy of the GNU General Public License
1944b87433SJohn Marino along with this program. If not, see <http://www.gnu.org/licenses/>. */
20855caec6SPeter Avalos
21855caec6SPeter Avalos #include "diff.h"
22855caec6SPeter Avalos #include <error.h>
23855caec6SPeter Avalos #include <exclude.h>
24008e37b6SJohn Marino #include <filenamecat.h>
25855caec6SPeter Avalos #include <setjmp.h>
26855caec6SPeter Avalos #include <xalloc.h>
27855caec6SPeter Avalos
28855caec6SPeter Avalos /* Read the directory named by DIR and store into DIRDATA a sorted vector
29855caec6SPeter Avalos of filenames for its contents. DIR->desc == -1 means this directory is
30855caec6SPeter Avalos known to be nonexistent, so set DIRDATA to an empty vector.
31855caec6SPeter Avalos Return -1 (setting errno) if error, 0 otherwise. */
32855caec6SPeter Avalos
33855caec6SPeter Avalos struct dirdata
34855caec6SPeter Avalos {
35855caec6SPeter Avalos size_t nnames; /* Number of names. */
36855caec6SPeter Avalos char const **names; /* Sorted names of files in dir, followed by 0. */
37855caec6SPeter Avalos char *data; /* Allocated storage for file names. */
38855caec6SPeter Avalos };
39855caec6SPeter Avalos
40855caec6SPeter Avalos /* Whether file names in directories should be compared with
41855caec6SPeter Avalos locale-specific sorting. */
42855caec6SPeter Avalos static bool locale_specific_sorting;
43855caec6SPeter Avalos
44855caec6SPeter Avalos /* Where to go if locale-specific sorting fails. */
45855caec6SPeter Avalos static jmp_buf failed_locale_specific_sorting;
46855caec6SPeter Avalos
47855caec6SPeter Avalos static bool dir_loop (struct comparison const *, int);
48855caec6SPeter Avalos
49855caec6SPeter Avalos
50855caec6SPeter Avalos /* Read a directory and get its vector of names. */
51855caec6SPeter Avalos
52855caec6SPeter Avalos static bool
dir_read(struct file_data const * dir,struct dirdata * dirdata)53855caec6SPeter Avalos dir_read (struct file_data const *dir, struct dirdata *dirdata)
54855caec6SPeter Avalos {
55855caec6SPeter Avalos register struct dirent *next;
56855caec6SPeter Avalos register size_t i;
57855caec6SPeter Avalos
58855caec6SPeter Avalos /* Address of block containing the files that are described. */
59855caec6SPeter Avalos char const **names;
60855caec6SPeter Avalos
61855caec6SPeter Avalos /* Number of files in directory. */
62855caec6SPeter Avalos size_t nnames;
63855caec6SPeter Avalos
64855caec6SPeter Avalos /* Allocated and used storage for file name data. */
65855caec6SPeter Avalos char *data;
66855caec6SPeter Avalos size_t data_alloc, data_used;
67855caec6SPeter Avalos
68855caec6SPeter Avalos dirdata->names = 0;
69855caec6SPeter Avalos dirdata->data = 0;
70855caec6SPeter Avalos nnames = 0;
71855caec6SPeter Avalos data = 0;
72855caec6SPeter Avalos
73855caec6SPeter Avalos if (dir->desc != -1)
74855caec6SPeter Avalos {
75855caec6SPeter Avalos /* Open the directory and check for errors. */
76855caec6SPeter Avalos register DIR *reading = opendir (dir->name);
77855caec6SPeter Avalos if (!reading)
78855caec6SPeter Avalos return false;
79855caec6SPeter Avalos
80855caec6SPeter Avalos /* Initialize the table of filenames. */
81855caec6SPeter Avalos
82855caec6SPeter Avalos data_alloc = 512;
83855caec6SPeter Avalos data_used = 0;
84855caec6SPeter Avalos dirdata->data = data = xmalloc (data_alloc);
85855caec6SPeter Avalos
86855caec6SPeter Avalos /* Read the directory entries, and insert the subfiles
874536c563SJohn Marino into the 'data' table. */
88855caec6SPeter Avalos
89855caec6SPeter Avalos while ((errno = 0, (next = readdir (reading)) != 0))
90855caec6SPeter Avalos {
91855caec6SPeter Avalos char *d_name = next->d_name;
9244b87433SJohn Marino size_t d_size = _D_EXACT_NAMLEN (next) + 1;
93855caec6SPeter Avalos
94855caec6SPeter Avalos /* Ignore "." and "..". */
95855caec6SPeter Avalos if (d_name[0] == '.'
96855caec6SPeter Avalos && (d_name[1] == 0 || (d_name[1] == '.' && d_name[2] == 0)))
97855caec6SPeter Avalos continue;
98855caec6SPeter Avalos
9944b87433SJohn Marino if (excluded_file_name (excluded, d_name))
100855caec6SPeter Avalos continue;
101855caec6SPeter Avalos
102855caec6SPeter Avalos while (data_alloc < data_used + d_size)
103855caec6SPeter Avalos {
104855caec6SPeter Avalos if (PTRDIFF_MAX / 2 <= data_alloc)
105855caec6SPeter Avalos xalloc_die ();
106855caec6SPeter Avalos dirdata->data = data = xrealloc (data, data_alloc *= 2);
107855caec6SPeter Avalos }
108855caec6SPeter Avalos
109855caec6SPeter Avalos memcpy (data + data_used, d_name, d_size);
110855caec6SPeter Avalos data_used += d_size;
111855caec6SPeter Avalos nnames++;
112855caec6SPeter Avalos }
113855caec6SPeter Avalos if (errno)
114855caec6SPeter Avalos {
115855caec6SPeter Avalos int e = errno;
116855caec6SPeter Avalos closedir (reading);
117855caec6SPeter Avalos errno = e;
118855caec6SPeter Avalos return false;
119855caec6SPeter Avalos }
120855caec6SPeter Avalos #if CLOSEDIR_VOID
121855caec6SPeter Avalos closedir (reading);
122855caec6SPeter Avalos #else
123855caec6SPeter Avalos if (closedir (reading) != 0)
124855caec6SPeter Avalos return false;
125855caec6SPeter Avalos #endif
126855caec6SPeter Avalos }
127855caec6SPeter Avalos
1284536c563SJohn Marino /* Create the 'names' table from the 'data' table. */
129855caec6SPeter Avalos if (PTRDIFF_MAX / sizeof *names - 1 <= nnames)
130855caec6SPeter Avalos xalloc_die ();
131855caec6SPeter Avalos dirdata->names = names = xmalloc ((nnames + 1) * sizeof *names);
132855caec6SPeter Avalos dirdata->nnames = nnames;
133855caec6SPeter Avalos for (i = 0; i < nnames; i++)
134855caec6SPeter Avalos {
135855caec6SPeter Avalos names[i] = data;
136855caec6SPeter Avalos data += strlen (data) + 1;
137855caec6SPeter Avalos }
138855caec6SPeter Avalos names[nnames] = 0;
139855caec6SPeter Avalos return true;
140855caec6SPeter Avalos }
141855caec6SPeter Avalos
142*6ea1f93eSDaniel Fojt /* Compare strings in a locale-specific way, returning a value
143*6ea1f93eSDaniel Fojt compatible with strcmp. */
144855caec6SPeter Avalos
145855caec6SPeter Avalos static int
compare_collated(char const * name1,char const * name2)146*6ea1f93eSDaniel Fojt compare_collated (char const *name1, char const *name2)
147855caec6SPeter Avalos {
148855caec6SPeter Avalos int r;
149855caec6SPeter Avalos errno = 0;
150855caec6SPeter Avalos if (ignore_file_name_case)
151855caec6SPeter Avalos r = strcasecoll (name1, name2);
152855caec6SPeter Avalos else
153855caec6SPeter Avalos r = strcoll (name1, name2);
154855caec6SPeter Avalos if (errno)
155855caec6SPeter Avalos {
1564536c563SJohn Marino error (0, errno, _("cannot compare file names '%s' and '%s'"),
157855caec6SPeter Avalos name1, name2);
158855caec6SPeter Avalos longjmp (failed_locale_specific_sorting, 1);
159855caec6SPeter Avalos }
160855caec6SPeter Avalos return r;
161855caec6SPeter Avalos }
162855caec6SPeter Avalos
163*6ea1f93eSDaniel Fojt /* Compare file names, returning a value compatible with strcmp. */
164*6ea1f93eSDaniel Fojt
165*6ea1f93eSDaniel Fojt static int
compare_names(char const * name1,char const * name2)166*6ea1f93eSDaniel Fojt compare_names (char const *name1, char const *name2)
167*6ea1f93eSDaniel Fojt {
168*6ea1f93eSDaniel Fojt if (locale_specific_sorting)
169*6ea1f93eSDaniel Fojt {
170*6ea1f93eSDaniel Fojt int diff = compare_collated (name1, name2);
171*6ea1f93eSDaniel Fojt if (diff || ignore_file_name_case)
172*6ea1f93eSDaniel Fojt return diff;
173*6ea1f93eSDaniel Fojt }
174008e37b6SJohn Marino return file_name_cmp (name1, name2);
175855caec6SPeter Avalos }
176855caec6SPeter Avalos
177008e37b6SJohn Marino /* Compare names FILE1 and FILE2 when sorting a directory.
178008e37b6SJohn Marino Prefer filtered comparison, breaking ties with file_name_cmp. */
179855caec6SPeter Avalos
180855caec6SPeter Avalos static int
compare_names_for_qsort(void const * file1,void const * file2)181855caec6SPeter Avalos compare_names_for_qsort (void const *file1, void const *file2)
182855caec6SPeter Avalos {
183855caec6SPeter Avalos char const *const *f1 = file1;
184855caec6SPeter Avalos char const *const *f2 = file2;
185*6ea1f93eSDaniel Fojt char const *name1 = *f1;
186*6ea1f93eSDaniel Fojt char const *name2 = *f2;
187*6ea1f93eSDaniel Fojt if (locale_specific_sorting)
188*6ea1f93eSDaniel Fojt {
189*6ea1f93eSDaniel Fojt int diff = compare_collated (name1, name2);
190*6ea1f93eSDaniel Fojt if (diff)
191*6ea1f93eSDaniel Fojt return diff;
192*6ea1f93eSDaniel Fojt }
193*6ea1f93eSDaniel Fojt return file_name_cmp (name1, name2);
194855caec6SPeter Avalos }
195855caec6SPeter Avalos
196855caec6SPeter Avalos /* Compare the contents of two directories named in CMP.
197855caec6SPeter Avalos This is a top-level routine; it does everything necessary for diff
198855caec6SPeter Avalos on two directories.
199855caec6SPeter Avalos
200855caec6SPeter Avalos CMP->file[0].desc == -1 says directory CMP->file[0] doesn't exist,
201855caec6SPeter Avalos but pretend it is empty. Likewise for CMP->file[1].
202855caec6SPeter Avalos
203855caec6SPeter Avalos HANDLE_FILE is a caller-provided subroutine called to handle each file.
204855caec6SPeter Avalos It gets three operands: CMP, name of file in dir 0, name of file in dir 1.
205855caec6SPeter Avalos These names are relative to the original working directory.
206855caec6SPeter Avalos
207855caec6SPeter Avalos For a file that appears in only one of the dirs, one of the name-args
208855caec6SPeter Avalos to HANDLE_FILE is zero.
209855caec6SPeter Avalos
210855caec6SPeter Avalos Returns the maximum of all the values returned by HANDLE_FILE,
211855caec6SPeter Avalos or EXIT_TROUBLE if trouble is encountered in opening files. */
212855caec6SPeter Avalos
213855caec6SPeter Avalos int
diff_dirs(struct comparison const * cmp,int (* handle_file)(struct comparison const *,char const *,char const *))214855caec6SPeter Avalos diff_dirs (struct comparison const *cmp,
215855caec6SPeter Avalos int (*handle_file) (struct comparison const *,
216855caec6SPeter Avalos char const *, char const *))
217855caec6SPeter Avalos {
218855caec6SPeter Avalos struct dirdata dirdata[2];
219855caec6SPeter Avalos int volatile val = EXIT_SUCCESS;
220855caec6SPeter Avalos int i;
221855caec6SPeter Avalos
222855caec6SPeter Avalos if ((cmp->file[0].desc == -1 || dir_loop (cmp, 0))
223855caec6SPeter Avalos && (cmp->file[1].desc == -1 || dir_loop (cmp, 1)))
224855caec6SPeter Avalos {
22544b87433SJohn Marino error (0, 0, _("%s: recursive directory loop"),
226855caec6SPeter Avalos cmp->file[cmp->file[0].desc == -1].name);
227855caec6SPeter Avalos return EXIT_TROUBLE;
228855caec6SPeter Avalos }
229855caec6SPeter Avalos
230855caec6SPeter Avalos /* Get contents of both dirs. */
231855caec6SPeter Avalos for (i = 0; i < 2; i++)
232855caec6SPeter Avalos if (! dir_read (&cmp->file[i], &dirdata[i]))
233855caec6SPeter Avalos {
234855caec6SPeter Avalos perror_with_name (cmp->file[i].name);
235855caec6SPeter Avalos val = EXIT_TROUBLE;
236855caec6SPeter Avalos }
237855caec6SPeter Avalos
238855caec6SPeter Avalos if (val == EXIT_SUCCESS)
239855caec6SPeter Avalos {
240855caec6SPeter Avalos char const **volatile names[2];
241855caec6SPeter Avalos names[0] = dirdata[0].names;
242855caec6SPeter Avalos names[1] = dirdata[1].names;
243855caec6SPeter Avalos
244855caec6SPeter Avalos /* Use locale-specific sorting if possible, else native byte order. */
245855caec6SPeter Avalos locale_specific_sorting = true;
246855caec6SPeter Avalos if (setjmp (failed_locale_specific_sorting))
247855caec6SPeter Avalos locale_specific_sorting = false;
248855caec6SPeter Avalos
249855caec6SPeter Avalos /* Sort the directories. */
250855caec6SPeter Avalos for (i = 0; i < 2; i++)
251855caec6SPeter Avalos qsort (names[i], dirdata[i].nnames, sizeof *dirdata[i].names,
252855caec6SPeter Avalos compare_names_for_qsort);
253855caec6SPeter Avalos
2544536c563SJohn Marino /* If '-S name' was given, and this is the topmost level of comparison,
255855caec6SPeter Avalos ignore all file names less than the specified starting name. */
256855caec6SPeter Avalos
257855caec6SPeter Avalos if (starting_file && ! cmp->parent)
258855caec6SPeter Avalos {
259855caec6SPeter Avalos while (*names[0] && compare_names (*names[0], starting_file) < 0)
260855caec6SPeter Avalos names[0]++;
261855caec6SPeter Avalos while (*names[1] && compare_names (*names[1], starting_file) < 0)
262855caec6SPeter Avalos names[1]++;
263855caec6SPeter Avalos }
264855caec6SPeter Avalos
265855caec6SPeter Avalos /* Loop while files remain in one or both dirs. */
266855caec6SPeter Avalos while (*names[0] || *names[1])
267855caec6SPeter Avalos {
268855caec6SPeter Avalos /* Compare next name in dir 0 with next name in dir 1.
269855caec6SPeter Avalos At the end of a dir,
270855caec6SPeter Avalos pretend the "next name" in that dir is very large. */
271855caec6SPeter Avalos int nameorder = (!*names[0] ? 1 : !*names[1] ? -1
272855caec6SPeter Avalos : compare_names (*names[0], *names[1]));
273008e37b6SJohn Marino
274008e37b6SJohn Marino /* Prefer a file_name_cmp match if available. This algorithm is
275008e37b6SJohn Marino O(N**2), where N is the number of names in a directory
276008e37b6SJohn Marino that compare_names says are all equal, but in practice N
277008e37b6SJohn Marino is so small it's not worth tuning. */
278*6ea1f93eSDaniel Fojt if (nameorder == 0 && ignore_file_name_case)
279008e37b6SJohn Marino {
280008e37b6SJohn Marino int raw_order = file_name_cmp (*names[0], *names[1]);
281008e37b6SJohn Marino if (raw_order != 0)
282008e37b6SJohn Marino {
283008e37b6SJohn Marino int greater_side = raw_order < 0;
284008e37b6SJohn Marino int lesser_side = 1 - greater_side;
285008e37b6SJohn Marino char const **lesser = names[lesser_side];
286008e37b6SJohn Marino char const *greater_name = *names[greater_side];
287008e37b6SJohn Marino char const **p;
288008e37b6SJohn Marino
289008e37b6SJohn Marino for (p = lesser + 1;
290008e37b6SJohn Marino *p && compare_names (*p, greater_name) == 0;
291008e37b6SJohn Marino p++)
292008e37b6SJohn Marino {
293008e37b6SJohn Marino int c = file_name_cmp (*p, greater_name);
294008e37b6SJohn Marino if (0 <= c)
295008e37b6SJohn Marino {
296008e37b6SJohn Marino if (c == 0)
297008e37b6SJohn Marino {
298008e37b6SJohn Marino memmove (lesser + 1, lesser,
299008e37b6SJohn Marino (char *) p - (char *) lesser);
300008e37b6SJohn Marino *lesser = greater_name;
301008e37b6SJohn Marino }
302008e37b6SJohn Marino break;
303008e37b6SJohn Marino }
304008e37b6SJohn Marino }
305008e37b6SJohn Marino }
306008e37b6SJohn Marino }
307008e37b6SJohn Marino
308855caec6SPeter Avalos int v1 = (*handle_file) (cmp,
309855caec6SPeter Avalos 0 < nameorder ? 0 : *names[0]++,
310855caec6SPeter Avalos nameorder < 0 ? 0 : *names[1]++);
311855caec6SPeter Avalos if (val < v1)
312855caec6SPeter Avalos val = v1;
313855caec6SPeter Avalos }
314855caec6SPeter Avalos }
315855caec6SPeter Avalos
316855caec6SPeter Avalos for (i = 0; i < 2; i++)
317855caec6SPeter Avalos {
318855caec6SPeter Avalos free (dirdata[i].names);
319855caec6SPeter Avalos free (dirdata[i].data);
320855caec6SPeter Avalos }
321855caec6SPeter Avalos
322855caec6SPeter Avalos return val;
323855caec6SPeter Avalos }
324855caec6SPeter Avalos
325855caec6SPeter Avalos /* Return nonzero if CMP is looping recursively in argument I. */
326855caec6SPeter Avalos
3274536c563SJohn Marino static bool _GL_ATTRIBUTE_PURE
dir_loop(struct comparison const * cmp,int i)328855caec6SPeter Avalos dir_loop (struct comparison const *cmp, int i)
329855caec6SPeter Avalos {
330855caec6SPeter Avalos struct comparison const *p = cmp;
331855caec6SPeter Avalos while ((p = p->parent))
332855caec6SPeter Avalos if (0 < same_file (&p->file[i].stat, &cmp->file[i].stat))
333855caec6SPeter Avalos return true;
334855caec6SPeter Avalos return false;
335855caec6SPeter Avalos }
336008e37b6SJohn Marino
337008e37b6SJohn Marino /* Find a matching filename in a directory. */
338008e37b6SJohn Marino
339008e37b6SJohn Marino char *
find_dir_file_pathname(char const * dir,char const * file)340008e37b6SJohn Marino find_dir_file_pathname (char const *dir, char const *file)
341008e37b6SJohn Marino {
3424536c563SJohn Marino /* The 'IF_LINT (volatile)' works around what appears to be a bug in
3434536c563SJohn Marino gcc 4.8.0 20120825; see
3444536c563SJohn Marino <http://lists.gnu.org/archive/html/bug-diffutils/2012-08/msg00007.html>.
3454536c563SJohn Marino */
3464536c563SJohn Marino char const * IF_LINT (volatile) match = file;
3474536c563SJohn Marino
348008e37b6SJohn Marino char *val;
349008e37b6SJohn Marino struct dirdata dirdata;
350008e37b6SJohn Marino dirdata.names = NULL;
351008e37b6SJohn Marino dirdata.data = NULL;
352008e37b6SJohn Marino
353008e37b6SJohn Marino if (ignore_file_name_case)
354008e37b6SJohn Marino {
355008e37b6SJohn Marino struct file_data filedata;
356008e37b6SJohn Marino filedata.name = dir;
357008e37b6SJohn Marino filedata.desc = 0;
358008e37b6SJohn Marino
359008e37b6SJohn Marino if (dir_read (&filedata, &dirdata))
360008e37b6SJohn Marino {
361008e37b6SJohn Marino locale_specific_sorting = true;
362008e37b6SJohn Marino if (setjmp (failed_locale_specific_sorting))
363008e37b6SJohn Marino match = file; /* longjmp may mess up MATCH. */
364008e37b6SJohn Marino else
365008e37b6SJohn Marino {
366008e37b6SJohn Marino for (char const **p = dirdata.names; *p; p++)
367008e37b6SJohn Marino if (compare_names (*p, file) == 0)
368008e37b6SJohn Marino {
369008e37b6SJohn Marino if (file_name_cmp (*p, file) == 0)
370008e37b6SJohn Marino {
371008e37b6SJohn Marino match = *p;
372008e37b6SJohn Marino break;
373008e37b6SJohn Marino }
374008e37b6SJohn Marino if (match == file)
375008e37b6SJohn Marino match = *p;
376008e37b6SJohn Marino }
377008e37b6SJohn Marino }
378008e37b6SJohn Marino }
379008e37b6SJohn Marino }
380008e37b6SJohn Marino
381008e37b6SJohn Marino val = file_name_concat (dir, match, NULL);
382008e37b6SJohn Marino free (dirdata.names);
383008e37b6SJohn Marino free (dirdata.data);
384008e37b6SJohn Marino return val;
385008e37b6SJohn Marino }
386