1 /* Read, sort and compare two directories. Used for GNU DIFF. 2 3 Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006-2007, 4 2009-2013, 2015-2018 Free Software Foundation, Inc. 5 6 This file is part of GNU DIFF. 7 8 This program is free software: you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation, either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 20 21 #include "diff.h" 22 #include <error.h> 23 #include <exclude.h> 24 #include <filenamecat.h> 25 #include <setjmp.h> 26 #include <xalloc.h> 27 28 /* Read the directory named by DIR and store into DIRDATA a sorted vector 29 of filenames for its contents. DIR->desc == -1 means this directory is 30 known to be nonexistent, so set DIRDATA to an empty vector. 31 Return -1 (setting errno) if error, 0 otherwise. */ 32 33 struct dirdata 34 { 35 size_t nnames; /* Number of names. */ 36 char const **names; /* Sorted names of files in dir, followed by 0. */ 37 char *data; /* Allocated storage for file names. */ 38 }; 39 40 /* Whether file names in directories should be compared with 41 locale-specific sorting. */ 42 static bool locale_specific_sorting; 43 44 /* Where to go if locale-specific sorting fails. */ 45 static jmp_buf failed_locale_specific_sorting; 46 47 static bool dir_loop (struct comparison const *, int); 48 49 50 /* Read a directory and get its vector of names. */ 51 52 static bool 53 dir_read (struct file_data const *dir, struct dirdata *dirdata) 54 { 55 register struct dirent *next; 56 register size_t i; 57 58 /* Address of block containing the files that are described. */ 59 char const **names; 60 61 /* Number of files in directory. */ 62 size_t nnames; 63 64 /* Allocated and used storage for file name data. */ 65 char *data; 66 size_t data_alloc, data_used; 67 68 dirdata->names = 0; 69 dirdata->data = 0; 70 nnames = 0; 71 data = 0; 72 73 if (dir->desc != -1) 74 { 75 /* Open the directory and check for errors. */ 76 register DIR *reading = opendir (dir->name); 77 if (!reading) 78 return false; 79 80 /* Initialize the table of filenames. */ 81 82 data_alloc = 512; 83 data_used = 0; 84 dirdata->data = data = xmalloc (data_alloc); 85 86 /* Read the directory entries, and insert the subfiles 87 into the 'data' table. */ 88 89 while ((errno = 0, (next = readdir (reading)) != 0)) 90 { 91 char *d_name = next->d_name; 92 size_t d_size = _D_EXACT_NAMLEN (next) + 1; 93 94 /* Ignore "." and "..". */ 95 if (d_name[0] == '.' 96 && (d_name[1] == 0 || (d_name[1] == '.' && d_name[2] == 0))) 97 continue; 98 99 if (excluded_file_name (excluded, d_name)) 100 continue; 101 102 while (data_alloc < data_used + d_size) 103 { 104 if (PTRDIFF_MAX / 2 <= data_alloc) 105 xalloc_die (); 106 dirdata->data = data = xrealloc (data, data_alloc *= 2); 107 } 108 109 memcpy (data + data_used, d_name, d_size); 110 data_used += d_size; 111 nnames++; 112 } 113 if (errno) 114 { 115 int e = errno; 116 closedir (reading); 117 errno = e; 118 return false; 119 } 120 #if CLOSEDIR_VOID 121 closedir (reading); 122 #else 123 if (closedir (reading) != 0) 124 return false; 125 #endif 126 } 127 128 /* Create the 'names' table from the 'data' table. */ 129 if (PTRDIFF_MAX / sizeof *names - 1 <= nnames) 130 xalloc_die (); 131 dirdata->names = names = xmalloc ((nnames + 1) * sizeof *names); 132 dirdata->nnames = nnames; 133 for (i = 0; i < nnames; i++) 134 { 135 names[i] = data; 136 data += strlen (data) + 1; 137 } 138 names[nnames] = 0; 139 return true; 140 } 141 142 /* Compare strings in a locale-specific way, returning a value 143 compatible with strcmp. */ 144 145 static int 146 compare_collated (char const *name1, char const *name2) 147 { 148 int r; 149 errno = 0; 150 if (ignore_file_name_case) 151 r = strcasecoll (name1, name2); 152 else 153 r = strcoll (name1, name2); 154 if (errno) 155 { 156 error (0, errno, _("cannot compare file names '%s' and '%s'"), 157 name1, name2); 158 longjmp (failed_locale_specific_sorting, 1); 159 } 160 return r; 161 } 162 163 /* Compare file names, returning a value compatible with strcmp. */ 164 165 static int 166 compare_names (char const *name1, char const *name2) 167 { 168 if (locale_specific_sorting) 169 { 170 int diff = compare_collated (name1, name2); 171 if (diff || ignore_file_name_case) 172 return diff; 173 } 174 return file_name_cmp (name1, name2); 175 } 176 177 /* Compare names FILE1 and FILE2 when sorting a directory. 178 Prefer filtered comparison, breaking ties with file_name_cmp. */ 179 180 static int 181 compare_names_for_qsort (void const *file1, void const *file2) 182 { 183 char const *const *f1 = file1; 184 char const *const *f2 = file2; 185 char const *name1 = *f1; 186 char const *name2 = *f2; 187 if (locale_specific_sorting) 188 { 189 int diff = compare_collated (name1, name2); 190 if (diff) 191 return diff; 192 } 193 return file_name_cmp (name1, name2); 194 } 195 196 /* Compare the contents of two directories named in CMP. 197 This is a top-level routine; it does everything necessary for diff 198 on two directories. 199 200 CMP->file[0].desc == -1 says directory CMP->file[0] doesn't exist, 201 but pretend it is empty. Likewise for CMP->file[1]. 202 203 HANDLE_FILE is a caller-provided subroutine called to handle each file. 204 It gets three operands: CMP, name of file in dir 0, name of file in dir 1. 205 These names are relative to the original working directory. 206 207 For a file that appears in only one of the dirs, one of the name-args 208 to HANDLE_FILE is zero. 209 210 Returns the maximum of all the values returned by HANDLE_FILE, 211 or EXIT_TROUBLE if trouble is encountered in opening files. */ 212 213 int 214 diff_dirs (struct comparison const *cmp, 215 int (*handle_file) (struct comparison const *, 216 char const *, char const *)) 217 { 218 struct dirdata dirdata[2]; 219 int volatile val = EXIT_SUCCESS; 220 int i; 221 222 if ((cmp->file[0].desc == -1 || dir_loop (cmp, 0)) 223 && (cmp->file[1].desc == -1 || dir_loop (cmp, 1))) 224 { 225 error (0, 0, _("%s: recursive directory loop"), 226 cmp->file[cmp->file[0].desc == -1].name); 227 return EXIT_TROUBLE; 228 } 229 230 /* Get contents of both dirs. */ 231 for (i = 0; i < 2; i++) 232 if (! dir_read (&cmp->file[i], &dirdata[i])) 233 { 234 perror_with_name (cmp->file[i].name); 235 val = EXIT_TROUBLE; 236 } 237 238 if (val == EXIT_SUCCESS) 239 { 240 char const **volatile names[2]; 241 names[0] = dirdata[0].names; 242 names[1] = dirdata[1].names; 243 244 /* Use locale-specific sorting if possible, else native byte order. */ 245 locale_specific_sorting = true; 246 if (setjmp (failed_locale_specific_sorting)) 247 locale_specific_sorting = false; 248 249 /* Sort the directories. */ 250 for (i = 0; i < 2; i++) 251 qsort (names[i], dirdata[i].nnames, sizeof *dirdata[i].names, 252 compare_names_for_qsort); 253 254 /* If '-S name' was given, and this is the topmost level of comparison, 255 ignore all file names less than the specified starting name. */ 256 257 if (starting_file && ! cmp->parent) 258 { 259 while (*names[0] && compare_names (*names[0], starting_file) < 0) 260 names[0]++; 261 while (*names[1] && compare_names (*names[1], starting_file) < 0) 262 names[1]++; 263 } 264 265 /* Loop while files remain in one or both dirs. */ 266 while (*names[0] || *names[1]) 267 { 268 /* Compare next name in dir 0 with next name in dir 1. 269 At the end of a dir, 270 pretend the "next name" in that dir is very large. */ 271 int nameorder = (!*names[0] ? 1 : !*names[1] ? -1 272 : compare_names (*names[0], *names[1])); 273 274 /* Prefer a file_name_cmp match if available. This algorithm is 275 O(N**2), where N is the number of names in a directory 276 that compare_names says are all equal, but in practice N 277 is so small it's not worth tuning. */ 278 if (nameorder == 0 && ignore_file_name_case) 279 { 280 int raw_order = file_name_cmp (*names[0], *names[1]); 281 if (raw_order != 0) 282 { 283 int greater_side = raw_order < 0; 284 int lesser_side = 1 - greater_side; 285 char const **lesser = names[lesser_side]; 286 char const *greater_name = *names[greater_side]; 287 char const **p; 288 289 for (p = lesser + 1; 290 *p && compare_names (*p, greater_name) == 0; 291 p++) 292 { 293 int c = file_name_cmp (*p, greater_name); 294 if (0 <= c) 295 { 296 if (c == 0) 297 { 298 memmove (lesser + 1, lesser, 299 (char *) p - (char *) lesser); 300 *lesser = greater_name; 301 } 302 break; 303 } 304 } 305 } 306 } 307 308 int v1 = (*handle_file) (cmp, 309 0 < nameorder ? 0 : *names[0]++, 310 nameorder < 0 ? 0 : *names[1]++); 311 if (val < v1) 312 val = v1; 313 } 314 } 315 316 for (i = 0; i < 2; i++) 317 { 318 free (dirdata[i].names); 319 free (dirdata[i].data); 320 } 321 322 return val; 323 } 324 325 /* Return nonzero if CMP is looping recursively in argument I. */ 326 327 static bool _GL_ATTRIBUTE_PURE 328 dir_loop (struct comparison const *cmp, int i) 329 { 330 struct comparison const *p = cmp; 331 while ((p = p->parent)) 332 if (0 < same_file (&p->file[i].stat, &cmp->file[i].stat)) 333 return true; 334 return false; 335 } 336 337 /* Find a matching filename in a directory. */ 338 339 char * 340 find_dir_file_pathname (char const *dir, char const *file) 341 { 342 /* The 'IF_LINT (volatile)' works around what appears to be a bug in 343 gcc 4.8.0 20120825; see 344 <http://lists.gnu.org/archive/html/bug-diffutils/2012-08/msg00007.html>. 345 */ 346 char const * IF_LINT (volatile) match = file; 347 348 char *val; 349 struct dirdata dirdata; 350 dirdata.names = NULL; 351 dirdata.data = NULL; 352 353 if (ignore_file_name_case) 354 { 355 struct file_data filedata; 356 filedata.name = dir; 357 filedata.desc = 0; 358 359 if (dir_read (&filedata, &dirdata)) 360 { 361 locale_specific_sorting = true; 362 if (setjmp (failed_locale_specific_sorting)) 363 match = file; /* longjmp may mess up MATCH. */ 364 else 365 { 366 for (char const **p = dirdata.names; *p; p++) 367 if (compare_names (*p, file) == 0) 368 { 369 if (file_name_cmp (*p, file) == 0) 370 { 371 match = *p; 372 break; 373 } 374 if (match == file) 375 match = *p; 376 } 377 } 378 } 379 } 380 381 val = file_name_concat (dir, match, NULL); 382 free (dirdata.names); 383 free (dirdata.data); 384 return val; 385 } 386