1 /*
2  * Copyright (c) 2009-2013, Novell Inc.
3  *
4  * This program is licensed under the BSD license, read LICENSE.BSD
5  * for further information
6  */
7 
8 #include <stdio.h>
9 #include <sys/stat.h>
10 #include <unistd.h>
11 
12 #include "pool.h"
13 #include "repo.h"
14 #include "hash.h"
15 #include "repo_rpmdb.h"
16 #include "pool_fileconflicts.h"
17 
18 struct cbdata {
19   Pool *pool;
20   int create;
21   int aliases;
22 
23   Queue lookat;		/* conflict candidates */
24   Queue lookat_dir;	/* not yet conflicting directories */
25 
26   Hashtable cflmap;
27   Hashval cflmapn;
28   unsigned int cflmapused;
29 
30   Hashtable dirmap;
31   Hashval dirmapn;
32   unsigned int dirmapused;
33   int dirconflicts;
34 
35   Map idxmap;
36 
37   unsigned int lastdiridx;	/* last diridx we have seen */
38   unsigned int lastdirhash;	/* strhash of last dir we have seen */
39   int lastdiridxbad;
40 
41   Id idx;	/* index of package we're looking at */
42 
43   unsigned char *filesspace;
44   unsigned int filesspacen;
45 
46   Hashtable normap;
47   Hashval normapn;
48   unsigned int normapused;
49   Queue norq;
50 
51   Hashtable statmap;
52   Hashval statmapn;
53   unsigned int statmapused;
54 
55   int usestat;
56   int statsmade;
57 
58   const char *rootdir;
59   int rootdirl;
60 
61   char *canonspace;
62   int canonspacen;
63 
64   Hashtable fetchmap;
65   Hashval fetchmapn;
66   Map fetchdirmap;
67   int fetchdirmapn;
68   Queue newlookat;
69 };
70 
71 #define FILESSPACE_BLOCK 255
72 
73 static Hashtable
growhash(Hashtable map,Hashval * mapnp)74 growhash(Hashtable map, Hashval *mapnp)
75 {
76   Hashval mapn = *mapnp;
77   Hashval newn = (mapn + 1) * 2 - 1;
78   Hashval i, h, hh;
79   Hashtable m;
80   Id hx, qx;
81 
82   m = solv_calloc(newn + 1, 2 * sizeof(Id));
83   for (i = 0; i <= mapn; i++)
84     {
85       hx = map[2 * i];
86       if (!hx)
87 	continue;
88       h = hx & newn;
89       hh = HASHCHAIN_START;
90       for (;;)
91 	{
92 	  qx = m[2 * h];
93 	  if (!qx)
94 	    break;
95 	  h = HASHCHAIN_NEXT(h, hh, newn);
96 	}
97       m[2 * h] = hx;
98       m[2 * h + 1] = map[2 * i + 1];
99     }
100   solv_free(map);
101   *mapnp = newn;
102   return m;
103 }
104 
105 /* first pass for non-alias mode:
106  * create hash (dhx, idx) of directories that may have conflicts.
107  * also create map "ixdmap" of packages involved
108  */
109 static void
finddirs_cb(void * cbdatav,const char * fn,struct filelistinfo * info)110 finddirs_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
111 {
112   struct cbdata *cbdata = cbdatav;
113   Hashval h, hh;
114   Id dhx, qx;
115   Id oidx, idx = cbdata->idx;
116 
117   dhx = strhash(fn);
118   if (!dhx)
119     dhx = strlen(fn) + 1;	/* make sure dhx is not zero */
120   h = dhx & cbdata->dirmapn;
121   hh = HASHCHAIN_START;
122   for (;;)
123     {
124       qx = cbdata->dirmap[2 * h];
125       if (!qx)
126 	break;
127       if (qx == dhx)
128 	break;
129       h = HASHCHAIN_NEXT(h, hh, cbdata->dirmapn);
130     }
131   if (!qx)
132     {
133       /* a miss */
134       if (!cbdata->create)
135 	return;
136       cbdata->dirmap[2 * h] = dhx;
137       cbdata->dirmap[2 * h + 1] = idx;
138       if (++cbdata->dirmapused * 2 > cbdata->dirmapn)
139 	cbdata->dirmap = growhash(cbdata->dirmap, &cbdata->dirmapn);
140       return;
141     }
142   /* we saw this dir before */
143   oidx = cbdata->dirmap[2 * h + 1];
144   if (oidx == idx)
145     return;
146   /* found a conflict, this dir may be used in multiple packages */
147   if (oidx != -1)
148     {
149       MAPSET(&cbdata->idxmap, oidx);
150       cbdata->dirmap[2 * h + 1] = -1;	/* mark as "multiple packages" */
151       cbdata->dirconflicts++;
152     }
153   MAPSET(&cbdata->idxmap, idx);
154 }
155 
156 /* check if a dhx value is marked as "multiple" in the dirmap created by finddirs_cb */
157 static inline int
isindirmap(struct cbdata * cbdata,Id dhx)158 isindirmap(struct cbdata *cbdata, Id dhx)
159 {
160   Hashval h, hh;
161   Id qx;
162 
163   h = dhx & cbdata->dirmapn;
164   hh = HASHCHAIN_START;
165   for (;;)
166     {
167       qx = cbdata->dirmap[2 * h];
168       if (!qx)
169 	return 0;
170       if (qx == dhx)
171 	return cbdata->dirmap[2 * h + 1] == -1 ? 1 : 0;
172       h = HASHCHAIN_NEXT(h, hh, cbdata->dirmapn);
173     }
174 }
175 
176 /* collect all possible file conflicts candidates in cbdata->lookat */
177 /* algorithm: hash file name into hx. Use cflmap the check if we have seen
178  * this value before. If yes, we have a file conflict candidate. */
179 /* we also do extra work to ignore all-directory conflicts */
180 static void
findfileconflicts_cb(void * cbdatav,const char * fn,struct filelistinfo * info)181 findfileconflicts_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
182 {
183   struct cbdata *cbdata = cbdatav;
184   int isdir = S_ISDIR(info->mode);
185   const char *dp;
186   Id idx, oidx;
187   Id hx, qx;
188   Hashval h, hh, dhx;
189 
190   idx = cbdata->idx;
191 
192   if (!info->dirlen)
193     return;
194   dp = fn + info->dirlen;
195   if (info->diridx != cbdata->lastdiridx)
196     {
197       cbdata->lastdiridx = info->diridx;
198       cbdata->lastdirhash = strnhash(fn, dp - fn);
199     }
200   dhx = cbdata->lastdirhash;
201 
202   /* check if the directory is marked as "multiple" in the dirmap */
203   /* this mirrors the "if (!dhx) dhx = strlen(fn) + 1" used in  finddirs_cb */
204   if (!isindirmap(cbdata, dhx ? dhx : dp - fn + 1))
205     return;
206 
207   hx = strhash_cont(dp, dhx);	/* extend hash to complete file name */
208   if (!hx)
209     hx = strlen(fn) + 1;	/* make sure hx is not zero */
210 
211   h = hx & cbdata->cflmapn;
212   hh = HASHCHAIN_START;
213   for (;;)
214     {
215       qx = cbdata->cflmap[2 * h];
216       if (!qx)
217 	break;
218       if (qx == hx)
219 	break;
220       h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
221     }
222   if (!qx)
223     {
224       /* a miss */
225       if (!cbdata->create)
226 	return;
227       cbdata->cflmap[2 * h] = hx;
228       cbdata->cflmap[2 * h + 1] = (isdir ? ~idx : idx);
229       if (++cbdata->cflmapused * 2 > cbdata->cflmapn)
230 	cbdata->cflmap = growhash(cbdata->cflmap, &cbdata->cflmapn);
231       return;
232     }
233   /* we have seen this hx before */
234   oidx = cbdata->cflmap[2 * h + 1];
235   if (oidx < 0)
236     {
237       int i;
238       if (isdir)
239 	{
240 	  /* both are directories. delay the conflict, keep oidx in slot */
241           queue_push2(&cbdata->lookat_dir, hx, idx);
242 	  return;
243 	}
244       oidx = ~oidx;
245       /* now have file, had directories before. */
246       cbdata->cflmap[2 * h + 1] = oidx;	/* make it a file */
247       /* dump all delayed directory hits for hx */
248       for (i = 0; i < cbdata->lookat_dir.count; i += 2)
249 	if (cbdata->lookat_dir.elements[i] == hx)
250 	  {
251 	    queue_push2(&cbdata->lookat, hx, cbdata->lookat_dir.elements[i + 1]);
252 	    queue_push2(&cbdata->lookat, 0, 0);
253 	  }
254     }
255   else if (oidx == idx)
256     return;	/* no conflicts with ourself, please */
257   queue_push2(&cbdata->lookat, hx, oidx);
258   queue_push2(&cbdata->lookat, 0, 0);
259   queue_push2(&cbdata->lookat, hx, idx);
260   queue_push2(&cbdata->lookat, 0, 0);
261 }
262 
263 /* same as findfileconflicts_cb, but
264  * - hashes with just the basename
265  * - sets idx in a map instead of pushing to lookat
266  * - sets the hash element to -1 ("multiple") if there may be a conflict
267  * we then use findfileconflicts_alias_cb as second step to generate the candidates.
268  * we do it this way because normailzing file names is expensive and thus we
269  * only want to do it for entries marked as "multiple"
270  */
271 static void
findfileconflicts_basename_cb(void * cbdatav,const char * fn,struct filelistinfo * info)272 findfileconflicts_basename_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
273 {
274   struct cbdata *cbdata = cbdatav;
275   int isdir = S_ISDIR(info->mode);
276   const char *dp;
277   Id idx, oidx;
278   Id hx, qx;
279   Hashval h, hh;
280 
281   idx = cbdata->idx;
282 
283   if (!info->dirlen)
284     return;
285   dp = fn + info->dirlen;
286   hx = strhash(dp);
287   if (!hx)
288     hx = strlen(fn) + 1;
289 
290   h = hx & cbdata->cflmapn;
291   hh = HASHCHAIN_START;
292   for (;;)
293     {
294       qx = cbdata->cflmap[2 * h];
295       if (!qx)
296 	break;
297       if (qx == hx)
298 	break;
299       h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
300     }
301   if (!qx)
302     {
303       /* a miss */
304       if (!cbdata->create)
305 	return;
306       cbdata->cflmap[2 * h] = hx;
307       cbdata->cflmap[2 * h + 1] = (isdir ? -idx - 2 : idx);
308       if (++cbdata->cflmapused * 2 > cbdata->cflmapn)
309 	cbdata->cflmap = growhash(cbdata->cflmap, &cbdata->cflmapn);
310       return;
311     }
312   oidx = cbdata->cflmap[2 * h + 1];
313   if (oidx < -1)
314     {
315       int i;
316       if (isdir)
317 	{
318 	  /* both are directories. delay the conflict, keep oidx in slot */
319           queue_push2(&cbdata->lookat_dir, hx, idx);
320 	  return;
321 	}
322       oidx = -idx - 2;
323       /* now have file, had directories before. */
324       cbdata->cflmap[2 * h + 1] = oidx;	/* make it a file */
325       /* dump all delayed directory hits for hx */
326       for (i = 0; i < cbdata->lookat_dir.count; i += 2)
327 	if (cbdata->lookat_dir.elements[i] == hx)
328 	  MAPSET(&cbdata->idxmap, cbdata->lookat_dir.elements[i + 1]);
329     }
330   else if (oidx == idx)
331     return;	/* no conflicts with ourself, please */
332   if (oidx >= 0)
333     MAPSET(&cbdata->idxmap, oidx);
334   MAPSET(&cbdata->idxmap, idx);
335   if (oidx != -1)
336     cbdata->cflmap[2 * h + 1] = -1;
337 }
338 
339 static inline Id
addfilesspace(struct cbdata * cbdata,int len)340 addfilesspace(struct cbdata *cbdata, int len)
341 {
342   unsigned int off = cbdata->filesspacen;
343   cbdata->filesspace = solv_extend(cbdata->filesspace, cbdata->filesspacen, len, 1, FILESSPACE_BLOCK);
344   cbdata->filesspacen += len;
345   return off;
346 }
347 
348 static Id
unifywithstat(struct cbdata * cbdata,Id diroff,int dirl)349 unifywithstat(struct cbdata *cbdata, Id diroff, int dirl)
350 {
351   struct stat stb;
352   int i;
353   Hashval h, hh;
354   Id hx, qx;
355   Id nspaceoff;
356   unsigned char statdata[16 + sizeof(stb.st_dev) + sizeof(stb.st_ino)];
357 
358   if (dirl > 1 && cbdata->filesspace[diroff + dirl - 1] == '/')
359     cbdata->filesspace[diroff + dirl - 1] = 0;
360   cbdata->statsmade++;
361   i = stat((char *)cbdata->filesspace + diroff, &stb);
362   if (dirl > 1 && cbdata->filesspace[diroff + dirl - 1] == 0)
363     cbdata->filesspace[diroff + dirl - 1] = '/';
364   if (i)
365     return diroff;
366   memset(statdata, 0, 16);
367   memcpy(statdata + 8, &stb.st_dev, sizeof(stb.st_dev));
368   memcpy(statdata, &stb.st_ino, sizeof(stb.st_ino));
369   hx = 0;
370   for (i = 15; i >= 0; i--)
371     hx = (unsigned int)hx * 13 + statdata[i];
372   h = hx & cbdata->statmapn;
373   hh = HASHCHAIN_START;
374   for (;;)
375     {
376       qx = cbdata->statmap[2 * h];
377       if (!qx)
378 	break;
379       if (qx == hx)
380 	{
381 	  Id off = cbdata->statmap[2 * h + 1];
382 	  char *dp = (char *)cbdata->filesspace + cbdata->norq.elements[off];
383 	  if (!memcmp(dp, statdata, 16))
384 	    return cbdata->norq.elements[off + 1];
385 	}
386       h = HASHCHAIN_NEXT(h, hh, cbdata->statmapn);
387     }
388   /* new stat result. work. */
389   nspaceoff = addfilesspace(cbdata, 16);
390   memcpy(cbdata->filesspace + nspaceoff, statdata, 16);
391   queue_push2(&cbdata->norq, nspaceoff, nspaceoff);
392   cbdata->statmap[2 * h] = hx;
393   cbdata->statmap[2 * h + 1] = cbdata->norq.count - 2;
394   if (++cbdata->statmapused * 2 > cbdata->statmapn)
395     cbdata->statmap = growhash(cbdata->statmap, &cbdata->statmapn);
396   return nspaceoff;
397 }
398 
399 /* forward declaration */
400 static Id normalizedir(struct cbdata *cbdata, const char *dir, int dirl, Id hx, int create);
401 
402 static Id
unifywithcanon(struct cbdata * cbdata,Id diroff,int dirl)403 unifywithcanon(struct cbdata *cbdata, Id diroff, int dirl)
404 {
405   Id dirnameid;
406   int i, l, ll, lo;
407   struct stat stb;
408 
409 #if 0
410   printf("UNIFY %.*s\n", dirl, (char *)cbdata->filesspace + diroff);
411 #endif
412   if (!dirl || cbdata->filesspace[diroff] != '/')
413     return diroff;
414   /* strip / at end*/
415   while (dirl && cbdata->filesspace[diroff + dirl - 1] == '/')
416     dirl--;
417   if (!dirl)
418     return diroff;
419 
420   /* find dirname */
421   for (i = dirl - 1; i > 0; i--)
422     if (cbdata->filesspace[diroff + i] == '/')
423       break;
424   i++;				/* include trailing / */
425 
426   /* normalize dirname */
427   dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + diroff, i, strnhash((char *)cbdata->filesspace + diroff, i), 1);
428   if (dirnameid == -1)
429     return diroff;		/* hit "in progress" marker, some cyclic link */
430 
431   /* sanity check result */
432   if (cbdata->filesspace[dirnameid] != '/')
433     return diroff;		/* hmm */
434   l = strlen((char *)cbdata->filesspace + dirnameid);
435   if (l && cbdata->filesspace[dirnameid + l - 1] != '/')
436     return diroff;		/* hmm */
437 
438   /* special handling for "." and ".." basename */
439   if (cbdata->filesspace[diroff + i] == '.')
440     {
441       if (dirl - i == 1)
442 	return dirnameid;
443       if (dirl - i == 2 && cbdata->filesspace[diroff + i + 1] == '.')
444 	{
445 	  if (l <= 2)
446 	    return dirnameid;	/* we hit our root */
447 	  for (i = l - 2; i > 0; i--)
448 	    if (cbdata->filesspace[dirnameid + i] == '/')
449 	      break;
450 	  i++;	/* include trailing / */
451 	  dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + dirnameid, i, strnhash((char *)cbdata->filesspace + dirnameid, i), 1);
452 	  return dirnameid == -1 ? diroff : dirnameid;
453 	}
454     }
455 
456   /* append basename to normalized dirname */
457   if (cbdata->rootdirl + l + dirl - i + 1 > cbdata->canonspacen)
458     {
459       cbdata->canonspacen = cbdata->rootdirl + l + dirl - i + 20;
460       cbdata->canonspace = solv_realloc(cbdata->canonspace, cbdata->canonspacen);
461       strcpy(cbdata->canonspace, cbdata->rootdir);
462     }
463   strcpy(cbdata->canonspace + cbdata->rootdirl, (char *)cbdata->filesspace + dirnameid);
464   strncpy(cbdata->canonspace + cbdata->rootdirl + l, (char *)cbdata->filesspace + diroff + i, dirl - i);
465   cbdata->canonspace[cbdata->rootdirl + l + dirl - i] = 0;
466 
467 #if 0
468   printf("stat()ing %s\n", cbdata->canonspace);
469 #endif
470   cbdata->statsmade++;
471   if (lstat(cbdata->canonspace, &stb) != 0 || !S_ISLNK(stb.st_mode))
472     {
473       /* not a symlink or stat failed, have new canon entry */
474       diroff = addfilesspace(cbdata, l + dirl - i + 2);
475       strcpy((char *)cbdata->filesspace + diroff, cbdata->canonspace + cbdata->rootdirl);
476       l += dirl - i;
477       /* add trailing / */
478       if (cbdata->filesspace[diroff + l - 1] != '/')
479 	{
480 	  cbdata->filesspace[diroff + l++] = '/';
481 	  cbdata->filesspace[diroff + l] = 0;
482 	}
483       /* call normalizedir on new entry for unification purposes */
484       dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + diroff, l, strnhash((char *)cbdata->filesspace + diroff, l), 1);
485       return dirnameid == -1 ? diroff : dirnameid;
486     }
487   /* oh no, a symlink! follow */
488   lo = cbdata->rootdirl + l + dirl - i + 1;
489   if (lo + stb.st_size + 2 > cbdata->canonspacen)
490     {
491       cbdata->canonspacen = lo + stb.st_size + 20;
492       cbdata->canonspace = solv_realloc(cbdata->canonspace, cbdata->canonspacen);
493     }
494   ll = readlink(cbdata->canonspace, cbdata->canonspace + lo, stb.st_size);
495   if (ll < 0 || ll > stb.st_size)
496     return diroff;		/* hmm */
497   if (ll == 0)
498     return dirnameid;		/* empty means current dir */
499   if (cbdata->canonspace[lo + ll - 1] != '/')
500     cbdata->canonspace[lo + ll++] = '/';	/* add trailing / */
501   cbdata->canonspace[lo + ll] = 0;		/* zero terminate */
502   if (cbdata->canonspace[lo] != '/')
503     {
504       /* relative link, concatenate to dirname */
505       memmove(cbdata->canonspace + cbdata->rootdirl + l, cbdata->canonspace + lo, ll + 1);
506       lo = cbdata->rootdirl;
507       ll += l;
508     }
509   dirnameid = normalizedir(cbdata, cbdata->canonspace + lo, ll, strnhash(cbdata->canonspace + lo, ll), 1);
510   return dirnameid == -1 ? diroff : dirnameid;
511 }
512 
513 /*
514  * map a directory (containing a trailing /) into a number.
515  * for unifywithstat this is the offset to the 16 byte stat result.
516  * for unifywithcanon this is the offset to the normailzed dir.
517  */
518 static Id
normalizedir(struct cbdata * cbdata,const char * dir,int dirl,Id hx,int create)519 normalizedir(struct cbdata *cbdata, const char *dir, int dirl, Id hx, int create)
520 {
521   Hashval h, hh;
522   Id qx;
523   Id nspaceoff;
524   int mycnt;
525 
526   if (!hx)
527     hx = dirl + 1;
528   h = hx & cbdata->normapn;
529   hh = HASHCHAIN_START;
530   for (;;)
531     {
532       qx = cbdata->normap[2 * h];
533       if (!qx)
534 	break;
535       if (qx == hx)
536 	{
537 	  Id off = cbdata->normap[2 * h + 1];
538 	  char *dp = (char *)cbdata->filesspace + cbdata->norq.elements[off];
539 	  if (!strncmp(dp, dir, dirl) && dp[dirl] == 0)
540 	    return cbdata->norq.elements[off + 1];
541 	}
542       h = HASHCHAIN_NEXT(h, hh, cbdata->normapn);
543     }
544   if (!create)
545     return 0;
546   /* new dir. work. */
547   if (dir >= (const char *)cbdata->filesspace && dir < (const char *)cbdata->filesspace + cbdata->filesspacen)
548     {
549       /* can happen when called from unifywithcanon */
550       Id off = dir - (const char *)cbdata->filesspace;
551       nspaceoff = addfilesspace(cbdata, dirl + 1);
552       dir = (const char *)cbdata->filesspace + off;
553     }
554   else
555     nspaceoff = addfilesspace(cbdata, dirl + 1);
556   if (dirl)
557     memcpy(cbdata->filesspace + nspaceoff, dir, dirl);
558   cbdata->filesspace[nspaceoff + dirl] = 0;
559   mycnt = cbdata->norq.count;
560   queue_push2(&cbdata->norq, nspaceoff, -1);	/* -1: in progress */
561   cbdata->normap[2 * h] = hx;
562   cbdata->normap[2 * h + 1] = mycnt;
563   if (++cbdata->normapused * 2 > cbdata->normapn)
564     cbdata->normap = growhash(cbdata->normap, &cbdata->normapn);
565   /* unify */
566   if (cbdata->usestat)
567     nspaceoff = unifywithstat(cbdata, nspaceoff, dirl);
568   else
569     nspaceoff = unifywithcanon(cbdata, nspaceoff, dirl);
570   cbdata->norq.elements[mycnt + 1] = nspaceoff;	/* patch in result */
571 #if 0
572   if (!cbdata->usestat)
573     printf("%s normalized to %d: %s\n", cbdata->filesspace + cbdata->norq.elements[mycnt], nspaceoff, cbdata->filesspace + nspaceoff);
574 #endif
575   return nspaceoff;
576 }
577 
578 /* collect all candidates for cflmap entries marked as "multiple" */
579 static void
findfileconflicts_alias_cb(void * cbdatav,const char * fn,struct filelistinfo * info)580 findfileconflicts_alias_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
581 {
582   int isdir = S_ISDIR(info->mode);
583   struct cbdata *cbdata = cbdatav;
584   const char *dp;
585   Id idx, dirid;
586   Id hx, qx;
587   Hashval h, hh;
588 
589   idx = cbdata->idx;
590 
591   if (!info->dirlen)
592     return;
593   if (info->diridx != cbdata->lastdiridx)
594     {
595       cbdata->lastdiridx = info->diridx;
596       cbdata->lastdirhash = 0;
597     }
598   dp = fn + info->dirlen;
599   hx = strhash(dp);
600   if (!hx)
601     hx = strlen(fn) + 1;
602 
603   h = hx & cbdata->cflmapn;
604   hh = HASHCHAIN_START;
605   for (;;)
606     {
607       qx = cbdata->cflmap[2 * h];
608       if (!qx)
609 	break;
610       if (qx == hx)
611 	break;
612       h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
613     }
614   if (!qx || cbdata->cflmap[2 * h + 1] != -1)
615     return;
616   /* found entry marked as "multiple", recored as conflict candidate */
617   if (!cbdata->lastdirhash)
618     cbdata->lastdirhash = strnhash(fn, dp - fn);
619   dirid = normalizedir(cbdata, fn, dp - fn, cbdata->lastdirhash, 1);
620   queue_push2(&cbdata->lookat, hx, idx);
621   queue_push2(&cbdata->lookat, cbdata->lastdirhash, isdir ? -dirid : dirid);
622 }
623 
624 /* turn (hx, idx, dhx, dirid) entries into (hx, idx, off, dirid) entries,
625  * where off is an offset into the filespace block */
626 static void
findfileconflicts_expand_cb(void * cbdatav,const char * fn,struct filelistinfo * info)627 findfileconflicts_expand_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
628 {
629   struct cbdata *cbdata = cbdatav;
630   Id hx, dhx;
631   Hashval h, hh;
632   const char *dp;
633   char md5padded[34];
634   Id off, dirid;
635   int i;
636 
637   if (!info->dirlen)
638     return;
639   dp = fn + info->dirlen;
640   if (info->diridx != cbdata->lastdiridx)
641     {
642       cbdata->lastdiridx = info->diridx;
643       cbdata->lastdirhash = strnhash(fn, dp - fn);
644       if (cbdata->aliases)
645         cbdata->lastdiridxbad = MAPTST(&cbdata->fetchdirmap, cbdata->lastdirhash & cbdata->fetchdirmapn) ? 0 : 1;
646     }
647   if (cbdata->lastdiridxbad)
648     return;
649   if (cbdata->aliases)
650     {
651       hx = strhash(dp);
652       dhx = cbdata->lastdirhash;
653       dirid =  normalizedir(cbdata, fn, dp - fn, dhx, 0);
654     }
655   else
656     {
657       hx = cbdata->lastdirhash;
658       hx = strhash_cont(dp, hx);
659       dhx = dirid = 0;
660     }
661   if (!hx)
662     hx = strlen(fn) + 1;
663 
664   h = (hx ^ (dirid * 37)) & cbdata->fetchmapn;
665   hh = HASHCHAIN_START;
666   for (;;)
667     {
668       i = cbdata->fetchmap[h];
669       if (!i)
670 	break;
671       if (cbdata->lookat.elements[i - 1] == hx && cbdata->lookat.elements[i + 2] == dirid && cbdata->lookat.elements[i + 1] == dhx)
672 	{
673           /* printf("%d, hx %x dhx %x dirid %d -> %s   %d %s %d\n", cbdata->idx, hx, dhx, dirid, fn, info->mode, info->digest, info->color); */
674 	  strncpy(md5padded, info->digest, 32);
675 	  md5padded[32] = 0;
676 	  md5padded[33] = info->color;
677 	  off = addfilesspace(cbdata, strlen(fn) + (34 + 1));
678 	  memcpy(cbdata->filesspace + off, (unsigned char *)md5padded, 34);
679           strcpy((char *)cbdata->filesspace + off + 34, fn);
680 	  queue_push2(&cbdata->lookat, hx, cbdata->idx);
681 	  queue_push2(&cbdata->lookat, off, dirid);
682 	}
683       h = HASHCHAIN_NEXT(h, hh, cbdata->fetchmapn);
684     }
685 }
686 
687 static int
lookat_idx_cmp(const void * ap,const void * bp,void * dp)688 lookat_idx_cmp(const void *ap, const void *bp, void *dp)
689 {
690   const Id *a = ap, *b = bp;
691   unsigned int ahx, bhx;
692   if (a[1] - b[1] != 0)		/* idx */
693     return a[1] - b[1];
694   if (a[3] - b[3] != 0)		/* dirid */
695     return a[3] - b[3];
696   ahx = (unsigned int)a[0];	/* can be < 0 */
697   bhx = (unsigned int)b[0];
698   if (ahx != bhx)
699     return ahx < bhx ? -1 : 1;
700   ahx = (unsigned int)a[2];	/* dhx */
701   bhx = (unsigned int)b[2];
702   if (ahx != bhx)
703     return ahx < bhx ? -1 : 1;
704   return 0;
705 }
706 
707 static int
lookat_hx_cmp(const void * ap,const void * bp,void * dp)708 lookat_hx_cmp(const void *ap, const void *bp, void *dp)
709 {
710   const Id *a = ap, *b = bp;
711   unsigned int ahx, bhx;
712   Id adirid, bdirid;
713   ahx = (unsigned int)a[0];	/* can be < 0 */
714   bhx = (unsigned int)b[0];
715   if (ahx != bhx)
716     return ahx < bhx ? -1 : 1;
717   adirid = a[3] < 0 ? -a[3] : a[3];
718   bdirid = b[3] < 0 ? -b[3] : b[3];
719   if (adirid - bdirid != 0)	/* dirid */
720     return adirid - bdirid;
721   if (a[3] != b[3])
722     return a[3] > 0 ? -1 : 1; 	/* bring positive dirids to front */
723   if (a[1] - b[1] != 0)		/* idx */
724     return a[1] - b[1];
725   ahx = (unsigned int)a[2];	/* dhx */
726   bhx = (unsigned int)b[2];
727   if (ahx != bhx)
728     return ahx < bhx ? -1 : 1;
729   return 0;
730 }
731 
732 static int
conflicts_cmp(const void * ap,const void * bp,void * dp)733 conflicts_cmp(const void *ap, const void *bp, void *dp)
734 {
735   Pool *pool = dp;
736   const Id *a = ap;
737   const Id *b = bp;
738   if (a[0] != b[0])	/* filename1 */
739     return strcmp(pool_id2str(pool, a[0]), pool_id2str(pool, b[0]));
740   if (a[3] != b[3])	/* filename2 */
741     return strcmp(pool_id2str(pool, a[3]), pool_id2str(pool, b[3]));
742   if (a[1] != b[1])	/* pkgid1 */
743     return a[1] - b[1];
744   if (a[4] != b[4])	/* pkgid2 */
745     return a[4] - b[4];
746   return 0;
747 }
748 
749 static void
iterate_solvable_dirs(Pool * pool,Id p,void (* cb)(void *,const char *,struct filelistinfo *),void * cbdata)750 iterate_solvable_dirs(Pool *pool, Id p, void (*cb)(void *, const char *, struct filelistinfo *), void *cbdata)
751 {
752   Repodata *lastdata = 0;
753   Id lastdirid = -1;
754   Dataiterator di;
755 
756   dataiterator_init(&di, pool, 0, p, SOLVABLE_FILELIST, 0, SEARCH_COMPLETE_FILELIST);
757   while (dataiterator_step(&di))
758     {
759       if (di.data == lastdata && di.kv.id == lastdirid)
760 	continue;
761       lastdata = di.data;
762       lastdirid = di.kv.id;
763       cb(cbdata, repodata_dir2str(di.data, di.kv.id, ""), 0);
764     }
765   dataiterator_free(&di);
766 }
767 
768 /* before calling the expensive findfileconflicts_cb we check if any of
769  * the files match. This only makes sense when cbdata->create is off.
770  */
771 static int
precheck_solvable_files(struct cbdata * cbdata,Pool * pool,Id p)772 precheck_solvable_files(struct cbdata *cbdata, Pool *pool, Id p)
773 {
774   Dataiterator di;
775   Id hx, qx;
776   Hashval h, hh;
777   int found = 0;
778   int aliases = cbdata->aliases;
779   unsigned int lastdirid = -1;
780   Hashval lastdirhash = 0;
781   int lastdirlen = 0;
782   int checkthisdir = 0;
783   Repodata *lastrepodata = 0;
784 
785   dataiterator_init(&di, pool, 0, p, SOLVABLE_FILELIST, 0, SEARCH_COMPLETE_FILELIST);
786   while (dataiterator_step(&di))
787     {
788       if (aliases)
789 	{
790 	  /* hash just the basename */
791 	  hx = strhash(di.kv.str);
792 	  if (!hx)
793 	    hx = strlen(di.kv.str) + 1;
794 	}
795       else
796 	{
797 	  /* hash the full path */
798 	  if (di.data != lastrepodata || di.kv.id != lastdirid)
799 	    {
800 	      const char *dir;
801 	      lastrepodata = di.data;
802 	      lastdirid = di.kv.id;
803 	      dir = repodata_dir2str(lastrepodata, lastdirid, "");
804 	      lastdirlen = strlen(dir);
805 	      lastdirhash = strhash(dir);
806 	      checkthisdir =  isindirmap(cbdata, lastdirhash ? lastdirhash : lastdirlen + 1);
807 	    }
808 	  if (!checkthisdir)
809 	    continue;
810 	  hx = strhash_cont(di.kv.str, lastdirhash);
811 	  if (!hx)
812 	    hx = lastdirlen + strlen(di.kv.str) + 1;
813 	}
814       h = hx & cbdata->cflmapn;
815       hh = HASHCHAIN_START;
816       for (;;)
817 	{
818 	  qx = cbdata->cflmap[2 * h];
819 	  if (!qx)
820 	    break;
821 	  if (qx == hx)
822 	    {
823 	      found = 1;
824 	      break;
825 	    }
826 	  h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
827 	}
828       if (found)
829 	break;
830     }
831   dataiterator_free(&di);
832   return found;
833 }
834 
835 
836 /* pool_findfileconflicts: find file conflicts in a set of packages
837  * input:
838  *   - pkgs: list of packages to check
839  *   - cutoff: packages after this are not checked against each other
840  *             this is useful to ignore file conflicts in already installed packages
841  *   - flags: see pool_fileconflicts.h
842  *   - handle_cb, handle_cbdata: callback for rpm header fetches
843  * output:
844  *   - conflicts: list of conflicts
845  *
846  * This is designed for needing only little memory while still being reasonable fast.
847  * We do this by hashing the file names and working with the 32bit hash values in the
848  * first steps of the algorithm. A hash conflict is not a problem as it will just
849  * lead to some unneeded extra work later on.
850  */
851 
852 int
pool_findfileconflicts(Pool * pool,Queue * pkgs,int cutoff,Queue * conflicts,int flags,void * (* handle_cb)(Pool *,Id,void *),void * handle_cbdata)853 pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, int flags, void *(*handle_cb)(Pool *, Id, void *) , void *handle_cbdata)
854 {
855   int i, j, idxmapset;
856   struct cbdata cbdata;
857   unsigned int now, start;
858   void *handle;
859   Repo *installed = pool->installed;
860   Id p;
861   int usefilecolors;
862   int hdrfetches;
863   int lookat_cnt;
864 
865   queue_empty(conflicts);
866   if (!pkgs->count)
867     return 0;
868 
869   now = start = solv_timems(0);
870   /* Hmm, should we have a different flag for this? */
871   usefilecolors = pool_get_flag(pool, POOL_FLAG_IMPLICITOBSOLETEUSESCOLORS);
872   POOL_DEBUG(SOLV_DEBUG_STATS, "searching for file conflicts\n");
873   POOL_DEBUG(SOLV_DEBUG_STATS, "packages: %d, cutoff %d, usefilecolors %d\n", pkgs->count, cutoff, usefilecolors);
874 
875   memset(&cbdata, 0, sizeof(cbdata));
876   cbdata.aliases = flags & FINDFILECONFLICTS_CHECK_DIRALIASING;
877   cbdata.pool = pool;
878   if (cbdata.aliases && (flags & FINDFILECONFLICTS_USE_ROOTDIR) != 0)
879     {
880       cbdata.rootdir = pool_get_rootdir(pool);
881       if (cbdata.rootdir && !strcmp(cbdata.rootdir, "/"))
882 	cbdata.rootdir = 0;
883       if (cbdata.rootdir)
884 	cbdata.rootdirl = strlen(cbdata.rootdir);
885       if (!cbdata.rootdir)
886 	cbdata.usestat = 1;
887     }
888   queue_init(&cbdata.lookat);
889   queue_init(&cbdata.lookat_dir);
890   map_init(&cbdata.idxmap, pkgs->count);
891 
892   if (cutoff <= 0)
893     cutoff = pkgs->count;
894 
895   /* avarage file list size: 200 files per package */
896   /* avarage dir count: 20 dirs per package */
897 
898   /* first pass: find dirs belonging to multiple packages */
899   if (!cbdata.aliases)
900     {
901       hdrfetches = 0;
902       cbdata.dirmapn = mkmask((cutoff + 3) * 16);
903       cbdata.dirmap = solv_calloc(cbdata.dirmapn + 1, 2 * sizeof(Id));
904       cbdata.create = 1;
905       idxmapset = 0;
906       for (i = 0; i < pkgs->count; i++)
907 	{
908 	  if (i == cutoff)
909 	    cbdata.create = 0;
910 	  cbdata.idx = i;
911 	  p = pkgs->elements[i];
912 	  if ((flags & FINDFILECONFLICTS_USE_SOLVABLEFILELIST) != 0 && installed)
913 	    {
914 	      if (p >= installed->start && p < installed->end && pool->solvables[p].repo == installed)
915 		{
916 		  iterate_solvable_dirs(pool, p, finddirs_cb, &cbdata);
917 		  if (MAPTST(&cbdata.idxmap, i))
918 		    idxmapset++;
919 		  continue;
920 		}
921 	    }
922 	  handle = (*handle_cb)(pool, p, handle_cbdata);
923 	  if (!handle)
924 	    continue;
925 	  hdrfetches++;
926 	  rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_ONLYDIRS, finddirs_cb, &cbdata);
927 	  if (MAPTST(&cbdata.idxmap, i))
928 	    idxmapset++;
929 	}
930       POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap size: %d, used %d\n", cbdata.dirmapn + 1, cbdata.dirmapused);
931       POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap memory usage: %d K\n", (cbdata.dirmapn + 1) * 2 * (int)sizeof(Id) / 1024);
932       POOL_DEBUG(SOLV_DEBUG_STATS, "header fetches: %d\n", hdrfetches);
933       POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap creation took %d ms\n", solv_timems(now));
934       POOL_DEBUG(SOLV_DEBUG_STATS, "dir conflicts found: %d, idxmap %d of %d\n", cbdata.dirconflicts, idxmapset, pkgs->count);
935     }
936 
937   /* second pass: scan files in the directories found above */
938   now = solv_timems(0);
939   cbdata.cflmapn = mkmask((cutoff + 3) * 32);
940   cbdata.cflmap = solv_calloc(cbdata.cflmapn + 1, 2 * sizeof(Id));
941   cbdata.create = 1;
942   hdrfetches = 0;
943   for (i = 0; i < pkgs->count; i++)
944     {
945       if (i == cutoff)
946 	cbdata.create = 0;
947       if (!cbdata.aliases && !MAPTST(&cbdata.idxmap, i))
948 	continue;
949       cbdata.idx = i;
950       p = pkgs->elements[i];
951       if (!cbdata.create && (flags & FINDFILECONFLICTS_USE_SOLVABLEFILELIST) != 0 && installed)
952 	{
953 	  if (p >= installed->start && p < installed->end && pool->solvables[p].repo == installed)
954 	    if (!precheck_solvable_files(&cbdata, pool, p))
955 	      continue;
956 	}
957       /* can't use FINDFILECONFLICTS_USE_SOLVABLEFILELIST because we have to know if
958        * the file is a directory or not */
959       handle = (*handle_cb)(pool, p, handle_cbdata);
960       if (!handle)
961 	continue;
962       hdrfetches++;
963       cbdata.lastdiridx = -1;
964       rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_NOGHOSTS, cbdata.aliases ? findfileconflicts_basename_cb : findfileconflicts_cb, &cbdata);
965     }
966 
967   POOL_DEBUG(SOLV_DEBUG_STATS, "filemap size: %d, used %d\n", cbdata.cflmapn + 1, cbdata.cflmapused);
968   POOL_DEBUG(SOLV_DEBUG_STATS, "filemap memory usage: %d K\n", (cbdata.cflmapn + 1) * 2 * (int)sizeof(Id) / 1024);
969   POOL_DEBUG(SOLV_DEBUG_STATS, "header fetches: %d\n", hdrfetches);
970   POOL_DEBUG(SOLV_DEBUG_STATS, "filemap creation took %d ms\n", solv_timems(now));
971   POOL_DEBUG(SOLV_DEBUG_STATS, "lookat_dir size: %d\n", cbdata.lookat_dir.count);
972   queue_free(&cbdata.lookat_dir);
973 
974   /* we need another pass for aliases to generate the normalized directory ids */
975   queue_init(&cbdata.norq);
976   if (cbdata.aliases)
977     {
978       now = solv_timems(0);
979       addfilesspace(&cbdata, 1);	/* make sure the first offset is not zero */
980       cbdata.normapn = mkmask((cutoff + 3) * 4);
981       cbdata.normap = solv_calloc(cbdata.normapn + 1, 2 * sizeof(Id));
982       if (cbdata.usestat)
983 	{
984 	  cbdata.statmapn = cbdata.normapn;
985 	  cbdata.statmap = solv_calloc(cbdata.statmapn + 1, 2 * sizeof(Id));
986 	}
987       cbdata.create = 0;
988       hdrfetches = 0;
989       for (i = 0; i < pkgs->count; i++)
990 	{
991 	  if (!MAPTST(&cbdata.idxmap, i))
992 	    continue;
993 	  p = pkgs->elements[i];
994 	  cbdata.idx = i;
995 	  /* can't use FINDFILECONFLICTS_USE_SOLVABLEFILELIST because we have to know if
996 	   * the file is a directory or not */
997 	  handle = (*handle_cb)(pool, p, handle_cbdata);
998 	  if (!handle)
999 	    continue;
1000 	  hdrfetches++;
1001 	  cbdata.lastdiridx = -1;
1002 	  rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_NOGHOSTS, findfileconflicts_alias_cb, &cbdata);
1003 	}
1004       POOL_DEBUG(SOLV_DEBUG_STATS, "normap size: %d, used %d\n", cbdata.normapn + 1, cbdata.normapused);
1005       POOL_DEBUG(SOLV_DEBUG_STATS, "normap memory usage: %d K\n", (cbdata.normapn + 1) * 2 * (int)sizeof(Id) / 1024);
1006       POOL_DEBUG(SOLV_DEBUG_STATS, "header fetches: %d\n", hdrfetches);
1007       POOL_DEBUG(SOLV_DEBUG_STATS, "stats made: %d\n", cbdata.statsmade);
1008       if (cbdata.usestat)
1009 	{
1010 	  POOL_DEBUG(SOLV_DEBUG_STATS, "statmap size: %d, used %d\n", cbdata.statmapn + 1, cbdata.statmapused);
1011 	  POOL_DEBUG(SOLV_DEBUG_STATS, "statmap memory usage: %d K\n", (cbdata.statmapn + 1) * 2 * (int)sizeof(Id) / 1024);
1012 	}
1013       cbdata.statmap = solv_free(cbdata.statmap);
1014       cbdata.statmapn = 0;
1015       cbdata.canonspace = solv_free(cbdata.canonspace);
1016       cbdata.canonspacen = 0;
1017       POOL_DEBUG(SOLV_DEBUG_STATS, "alias processing took %d ms\n", solv_timems(now));
1018     }
1019 
1020   /* free no longer used stuff */
1021   cbdata.dirmap = solv_free(cbdata.dirmap);
1022   cbdata.dirmapn = 0;
1023   cbdata.dirmapused = 0;
1024   cbdata.cflmap = solv_free(cbdata.cflmap);
1025   cbdata.cflmapn = 0;
1026   cbdata.cflmapused = 0;
1027   map_free(&cbdata.idxmap);
1028 
1029   /* sort and unify/prune */
1030   /* this also makes all dirids positive as side effect */
1031   now = solv_timems(0);
1032   POOL_DEBUG(SOLV_DEBUG_STATS, "raw candidates: %d\n", cbdata.lookat.count / 4);
1033   solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 4, sizeof(Id) * 4, &lookat_hx_cmp, pool);
1034   for (i = j = 0; i < cbdata.lookat.count; )
1035     {
1036       int first = 1, jstart = j;
1037       Id hx = cbdata.lookat.elements[i];
1038       Id idx = cbdata.lookat.elements[i + 1];
1039       Id dhx = cbdata.lookat.elements[i + 2];
1040       Id dirid = cbdata.lookat.elements[i + 3];
1041       i += 4;
1042       for (; i < cbdata.lookat.count && hx == cbdata.lookat.elements[i] && (dirid == cbdata.lookat.elements[i + 3] || dirid == -cbdata.lookat.elements[i + 3]); i += 4)
1043 	{
1044 	  if (idx == cbdata.lookat.elements[i + 1] && dhx == cbdata.lookat.elements[i + 2])
1045 	    {
1046 	      if (first && idx < cutoff && cbdata.aliases && dirid >= 0)
1047 		first = 0;	/* special self-conflict case with dhx hash collision, e.g. /foo/xx and /fpf/xx */
1048 	      else
1049 	        continue;	/* ignore duplicates */
1050 	    }
1051 	  if (first)
1052 	    {
1053 	      if (dirid < 0)
1054 		continue;	/* all have a neg dirid */
1055 	      cbdata.lookat.elements[j++] = hx;
1056 	      cbdata.lookat.elements[j++] = idx;
1057 	      cbdata.lookat.elements[j++] = dhx;
1058 	      cbdata.lookat.elements[j++] = dirid;
1059 	      first = 0;
1060 	      if (jstart >= 0 && idx < cutoff)
1061 		jstart = -1;
1062 	    }
1063 	  idx = cbdata.lookat.elements[i + 1];
1064 	  dhx = cbdata.lookat.elements[i + 2];
1065 	  cbdata.lookat.elements[j++] = hx;
1066 	  cbdata.lookat.elements[j++] = idx;
1067 	  cbdata.lookat.elements[j++] = dhx;
1068 	  cbdata.lookat.elements[j++] = dirid;
1069 	  if (jstart >= 0 && idx < cutoff)
1070 	    jstart = -1;
1071 	}
1072       if (jstart >= 0)	/* we need at least one new candidate */
1073 	j = jstart;
1074     }
1075   queue_truncate(&cbdata.lookat, j);
1076   POOL_DEBUG(SOLV_DEBUG_STATS, "pruned candidates: %d\n", cbdata.lookat.count / 4);
1077   POOL_DEBUG(SOLV_DEBUG_STATS, "pruning took %d ms\n", solv_timems(now));
1078 
1079   /* third pass: expand to real file names */
1080   now = solv_timems(0);
1081   /* sort by idx so we can do all files of a package in one go */
1082   solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 4, sizeof(Id) * 4, &lookat_idx_cmp, pool);
1083   hdrfetches = 0;
1084   queue_init(&cbdata.newlookat);
1085   if (cbdata.lookat.count)
1086     {
1087       /* setup fetch map space */
1088       cbdata.fetchmapn = mkmask(cbdata.lookat.count + 3);
1089       if (cbdata.fetchmapn < 4095)
1090         cbdata.fetchmapn = 4095;
1091       cbdata.fetchmap = solv_calloc(cbdata.fetchmapn + 1, sizeof(Id));
1092       if (cbdata.aliases)
1093 	{
1094 	  cbdata.fetchdirmapn = ((cbdata.fetchmapn + 1) / 16) - 1;
1095 	  map_init(&cbdata.fetchdirmap, cbdata.fetchdirmapn + 1);
1096 	}
1097     }
1098   lookat_cnt = cbdata.lookat.count;
1099   while (lookat_cnt)
1100     {
1101       Id idx = cbdata.lookat.elements[1];
1102       int iterflags = RPM_ITERATE_FILELIST_WITHMD5 | RPM_ITERATE_FILELIST_NOGHOSTS;
1103       if (usefilecolors)
1104 	iterflags |= RPM_ITERATE_FILELIST_WITHCOL;
1105       /* find end of idx block */
1106       for (j = 4; j < lookat_cnt; j += 4)
1107 	if (cbdata.lookat.elements[j + 1] != idx)
1108 	  break;
1109       p = pkgs->elements[idx];
1110       handle = (*handle_cb)(pool, p, handle_cbdata);
1111       if (!handle)
1112 	{
1113 	  queue_deleten(&cbdata.lookat, 0, j);
1114 	  lookat_cnt -= j;
1115 	  continue;
1116 	}
1117       hdrfetches++;
1118       /* create hash which maps (hx, dirid) to lookat elements */
1119       /* also create map from dhx values for fast reject */
1120       for (i = 0; i < j; i += 4)
1121 	{
1122 	  Hashval h, hh;
1123 	  h = (cbdata.lookat.elements[i] ^ (cbdata.lookat.elements[i + 3] * 37)) & cbdata.fetchmapn;
1124 	  hh = HASHCHAIN_START;
1125 	  while (cbdata.fetchmap[h])
1126 	    h = HASHCHAIN_NEXT(h, hh, cbdata.fetchmapn);
1127 	  cbdata.fetchmap[h] = i + 1;
1128 	  cbdata.lookat.elements[i + 1] = (Id)h;	/* hack: misuse idx for easy hash cleanup */
1129 	  if (cbdata.fetchdirmapn)
1130 	    MAPSET(&cbdata.fetchdirmap, cbdata.lookat.elements[i + 2] & cbdata.fetchdirmapn);
1131 	}
1132       cbdata.idx = idx;
1133       cbdata.lastdiridx = -1;
1134       cbdata.lastdiridxbad = 0;
1135       queue_prealloc(&cbdata.newlookat, j + 256);
1136       rpm_iterate_filelist(handle, iterflags, findfileconflicts_expand_cb, &cbdata);
1137       /* clear hash and map again */
1138       for (i = 0; i < j; i += 4)
1139 	{
1140 	  Hashval h = (Hashval)cbdata.lookat.elements[i + 1];
1141 	  cbdata.fetchmap[h] = 0;
1142 	  if (cbdata.fetchdirmapn)
1143 	    MAPCLR_AT(&cbdata.fetchdirmap, cbdata.lookat.elements[i + 2] & cbdata.fetchdirmapn);
1144 	}
1145       /* now delete old block and add new block to the end */
1146       queue_deleten(&cbdata.lookat, 0, j);
1147       queue_insertn(&cbdata.lookat, cbdata.lookat.count, cbdata.newlookat.count, cbdata.newlookat.elements);
1148       queue_empty(&cbdata.newlookat);
1149       lookat_cnt -= j;
1150     }
1151   queue_free(&cbdata.newlookat);
1152   POOL_DEBUG(SOLV_DEBUG_STATS, "header fetches: %d\n", hdrfetches);
1153   POOL_DEBUG(SOLV_DEBUG_STATS, "candidates now: %d\n", cbdata.lookat.count / 4);
1154   POOL_DEBUG(SOLV_DEBUG_STATS, "file expansion took %d ms\n", solv_timems(now));
1155   /* free memory */
1156   cbdata.fetchmap = solv_free(cbdata.fetchmap);
1157   cbdata.fetchmapn = 0;
1158   if (cbdata.fetchdirmapn)
1159     map_free(&cbdata.fetchdirmap);
1160   cbdata.fetchdirmapn = 0;
1161   cbdata.normap = solv_free(cbdata.normap);
1162   cbdata.normapn = 0;
1163   queue_free(&cbdata.norq);
1164 
1165   /* forth pass: for each (hx,dirid) we have, compare all matching files against all other matching files */
1166   now = solv_timems(0);
1167   solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 4, sizeof(Id) * 4, &lookat_hx_cmp, pool);
1168   for (i = 0; i < cbdata.lookat.count - 4; i += 4)
1169     {
1170       Id hx = cbdata.lookat.elements[i];
1171       Id dirid = cbdata.lookat.elements[i + 3];
1172       Id idxi = cbdata.lookat.elements[i + 1];
1173       Id offi = cbdata.lookat.elements[i + 2];
1174       if (idxi >= cutoff)
1175 	continue;	/* no conflicts between packages with idx >= cutoff */
1176       for (j = i + 4; j < cbdata.lookat.count && cbdata.lookat.elements[j] == hx && cbdata.lookat.elements[j + 3] == dirid; j += 4)
1177 	{
1178 	  Id idxj = cbdata.lookat.elements[j + 1];
1179 	  Id offj = cbdata.lookat.elements[j + 2];
1180 	  char *fsi = (char *)cbdata.filesspace + offi;
1181 	  char *fsj = (char *)cbdata.filesspace + offj;
1182 	  if (cbdata.aliases)
1183 	    {
1184 	      /* compare just the basenames, the dirs match because of the dirid */
1185 	      char *bsi = strrchr(fsi + 34, '/');
1186 	      char *bsj = strrchr(fsj + 34, '/');
1187 	      if (!bsi || !bsj)
1188 		continue;
1189 	      if (strcmp(bsi, bsj))
1190 		continue;	/* different base names */
1191 	    }
1192 	  else
1193 	    {
1194 	      if (strcmp(fsi + 34, fsj + 34))
1195 		continue;	/* different file names */
1196 	    }
1197 	  if (!strcmp(fsi, fsj))
1198 	    continue;	/* file digests match, no conflict */
1199 	  if (usefilecolors && fsi[33] && fsj[33] && (fsi[33] & fsj[33]) == 0)
1200 	    continue;	/* colors do not conflict */
1201 	  queue_push(conflicts, pool_str2id(pool, fsi + 34, 1));
1202 	  queue_push(conflicts, pkgs->elements[idxi]);
1203 	  queue_push(conflicts, pool_str2id(pool, fsi, 1));
1204 	  queue_push(conflicts, pool_str2id(pool, fsj + 34, 1));
1205 	  queue_push(conflicts, pkgs->elements[idxj]);
1206 	  queue_push(conflicts, pool_str2id(pool, fsj, 1));
1207 	}
1208     }
1209   POOL_DEBUG(SOLV_DEBUG_STATS, "filespace size: %d K\n", cbdata.filesspacen / 1024);
1210   POOL_DEBUG(SOLV_DEBUG_STATS, "candidate check took %d ms\n", solv_timems(now));
1211   cbdata.filesspace = solv_free(cbdata.filesspace);
1212   cbdata.filesspacen = 0;
1213   queue_free(&cbdata.lookat);
1214   if (conflicts->count > 6)
1215     solv_sort(conflicts->elements, conflicts->count / 6, 6 * sizeof(Id), conflicts_cmp, pool);
1216   POOL_DEBUG(SOLV_DEBUG_STATS, "found %d file conflicts\n", conflicts->count / 6);
1217   POOL_DEBUG(SOLV_DEBUG_STATS, "file conflict detection took %d ms\n", solv_timems(start));
1218 
1219   return conflicts->count / 6;
1220 }
1221 
1222