1 /*
2   diff.c -- binary diff generator
3 
4   Copyright 2004,2005 Michael Schroeder
5 
6   rewritten from bsdiff.c,
7       http://www.freebsd.org/cgi/cvsweb.cgi/src/usr.bin/bsdiff
8   added library interface and hash method, enhanced suffix method.
9 */
10 /*-
11  * Copyright 2003-2005 Colin Percival
12  * All rights reserved
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted providing that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
27  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <unistd.h>
39 #include <string.h>
40 #include <bzlib.h>
41 
42 #include "delta.h"
43 
44 struct bzblock {
45   unsigned char *data;
46   unsigned int len;
47   bz_stream strm;
48 };
49 
blockopen()50 static struct bzblock *blockopen()
51 {
52   struct bzblock *bzf;
53   int ret;
54   bzf = malloc(sizeof(*bzf));
55   if (!bzf)
56     return 0;
57   bzf->strm.bzalloc = 0;
58   bzf->strm.bzfree = 0;
59   bzf->strm.opaque = 0;
60   bzf->len = 0;
61   ret = BZ2_bzCompressInit(&bzf->strm, 9, 0, 30);
62   if (ret != BZ_OK)
63     {
64       free(bzf);
65       return 0;
66     }
67   bzf->data = 0;
68   bzf->strm.avail_in = 0;
69   bzf->strm.next_in  = 0;
70   bzf->strm.avail_out = 0;
71   bzf->strm.next_out  = 0;
72   return bzf;
73 }
74 
blockwrite(struct bzblock * bzf,void * buf,int len)75 static int blockwrite(struct bzblock *bzf, void *buf, int len)
76 {
77   int ret;
78 
79   if (len <= 0)
80     return len < 0 ? -1 : 0;
81   bzf->strm.avail_in = len;
82   bzf->strm.next_in = buf;
83   for (;;)
84     {
85       if (bzf->strm.avail_out < 4096)
86 	{
87 	  if (bzf->len + 8192 < bzf->len)
88 	    return -1;
89 	  if (bzf->data)
90 	    bzf->data = realloc(bzf->data, bzf->len + 8192);
91 	  else
92 	    bzf->data = malloc(bzf->len + 8192);
93 	  if (!bzf->data)
94 	    return -1;
95 	  bzf->strm.avail_out = 8192;
96 	}
97       bzf->strm.next_out = (char *)bzf->data + bzf->len;
98       ret = BZ2_bzCompress(&bzf->strm, BZ_RUN);
99       if (ret != BZ_RUN_OK)
100 	return -1;
101       bzf->len = (unsigned char *)bzf->strm.next_out - bzf->data;
102       if (bzf->strm.avail_in == 0)
103 	return len;
104     }
105 }
106 
blockclose(struct bzblock * bzf,unsigned char ** datap,unsigned int * lenp)107 static int blockclose(struct bzblock *bzf, unsigned char **datap, unsigned int *lenp)
108 {
109   int ret;
110   bzf->strm.avail_in = 0;
111   bzf->strm.next_in = 0;
112   for (;;)
113     {
114       if (bzf->strm.avail_out < 4096)
115 	{
116 	  if (bzf->len + 8192 < bzf->len)
117 	    return -1;
118 	  if (bzf->data)
119 	    bzf->data = realloc(bzf->data, bzf->len + 8192);
120 	  else
121 	    bzf->data = malloc(bzf->len + 8192);
122 	  if (!bzf->data)
123 	    return -1;
124 	  bzf->strm.avail_out = 8192;
125 	}
126       bzf->strm.next_out = (char *)bzf->data + bzf->len;
127       ret = BZ2_bzCompress(&bzf->strm, BZ_FINISH);
128       if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
129         return -1;
130       bzf->len = (unsigned char *)bzf->strm.next_out - bzf->data;
131       if (ret == BZ_STREAM_END)
132         break;
133     }
134   BZ2_bzCompressEnd (&bzf->strm);
135   *datap = bzf->data;
136   *lenp = bzf->len;
137   free(bzf);
138   return 0;
139 }
140 
matchlen(unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen)141 static inline bsuint matchlen(unsigned char *old, bsuint oldlen, unsigned char *new, bsuint newlen)
142 {
143   unsigned char *oldsave;
144   bsuint max;
145   oldsave = old;
146   max = oldlen > newlen ? newlen : oldlen;
147   while (max-- > 0)
148     if (*old++ != *new++)
149       return old - oldsave - 1;
150   return old - oldsave;
151 }
152 
153 #ifndef BSDIFF_NO_HASH
154 
155 /******************************************************************/
156 /*                                                                */
157 /*         hash method                                            */
158 /*                                                                */
159 /******************************************************************/
160 
161 #define HSIZESHIFT      4
162 #define HSIZE           (1 << HSIZESHIFT)
163 
164 /* 256 random numbers generated by a quantum source */
165 static unsigned int noise[256] =
166 {
167   0x9be502a4U, 0xba7180eaU, 0x324e474fU, 0x0aab8451U, 0x0ced3810U,
168   0x2158a968U, 0x6bbd3771U, 0x75a02529U, 0x41f05c14U, 0xc2264b87U,
169   0x1f67b359U, 0xcd2d031dU, 0x49dc0c04U, 0xa04ae45cU, 0x6ade28a7U,
170   0x2d0254ffU, 0xdec60c7cU, 0xdef5c084U, 0x0f77ffc8U, 0x112021f6U,
171   0x5f6d581eU, 0xe35ea3dfU, 0x3216bfb4U, 0xd5a3083dU, 0x7e63e9cdU,
172   0xaa9208f6U, 0xda3f3978U, 0xfe0e2547U, 0x09dfb020U, 0xd97472c5U,
173   0xbbce2edeU, 0x121aebd2U, 0x0e9fdbebU, 0x7b6f5d9cU, 0x84938e43U,
174   0x30694f2dU, 0x86b7a7f8U, 0xefaf5876U, 0x263812e6U, 0xb6e48ddfU,
175   0xce8ed980U, 0x4df591e1U, 0x75257b35U, 0x2f88dcffU, 0xa461fe44U,
176   0xca613b4dU, 0xd9803f73U, 0xea056205U, 0xccca7a89U, 0x0f2dbb07U,
177   0xc53e359eU, 0xe80d0137U, 0x2b2d2a5dU, 0xcfc1391aU, 0x2bb3b6c5U,
178   0xb66aea3cU, 0x00ea419eU, 0xce5ada84U, 0xae1d6712U, 0x12f576baU,
179   0x117fcbc4U, 0xa9d4c775U, 0x25b3d616U, 0xefda65a8U, 0xaff3ef5bU,
180   0x00627e68U, 0x668d1e99U, 0x088d0eefU, 0xf8fac24dU, 0xe77457c7U,
181   0x68d3beb4U, 0x921d2acbU, 0x9410eac9U, 0xd7f24399U, 0xcbdec497U,
182   0x98c99ae1U, 0x65802b2cU, 0x81e1c3c4U, 0xa130bb09U, 0x17a87badU,
183   0xa70367d6U, 0x148658d4U, 0x02f33377U, 0x8620d8b6U, 0xbdac25bdU,
184   0xb0a6de51U, 0xd64c4571U, 0xa4185ba0U, 0xa342d70fU, 0x3f1dc4c1U,
185   0x042dc3ceU, 0x0de89f43U, 0xa69b1867U, 0x3c064e11U, 0xad1e2c3eU,
186   0x9660e8cdU, 0xd36b09caU, 0x4888f228U, 0x61a9ac3cU, 0xd9561118U,
187   0x3532797eU, 0x71a35c22U, 0xecc1376cU, 0xab31e656U, 0x88bd0d35U,
188   0x423b20ddU, 0x38e4651cU, 0x3c6397a4U, 0x4a7b12d9U, 0x08b1cf33U,
189   0xd0604137U, 0xb035fdb8U, 0x4916da23U, 0xa9349493U, 0xd83daa9bU,
190   0x145f7d95U, 0x868531d6U, 0xacb18f17U, 0x9cd33b6fU, 0x193e42b9U,
191   0x26dfdc42U, 0x5069d8faU, 0x5bee24eeU, 0x5475d4c6U, 0x315b2c0cU,
192   0xf764ef45U, 0x01b6f4ebU, 0x60ba3225U, 0x8a16777cU, 0x4c05cd28U,
193   0x53e8c1d2U, 0xc8a76ce5U, 0x8045c1e6U, 0x61328752U, 0x2ebad322U,
194   0x3444f3e2U, 0x91b8af11U, 0xb0cee675U, 0x55dbff5aU, 0xf7061ee0U,
195   0x27d7d639U, 0xa4aef8c9U, 0x42ff0e4fU, 0x62755468U, 0x1c6ca3f3U,
196   0xe4f522d1U, 0x2765fcb3U, 0xe20c8a95U, 0x3a69aea7U, 0x56ab2c4fU,
197   0x8551e688U, 0xe0bc14c2U, 0x278676bfU, 0x893b6102U, 0xb4f0ab3bU,
198   0xb55ddda9U, 0xa04c521fU, 0xc980088eU, 0x912aeac1U, 0x08519badU,
199   0x991302d3U, 0x5b91a25bU, 0x696d9854U, 0x9ad8b4bfU, 0x41cb7e21U,
200   0xa65d1e03U, 0x85791d29U, 0x89478aa7U, 0x4581e337U, 0x59bae0b1U,
201   0xe0fc9df3U, 0x45d9002cU, 0x7837464fU, 0xda22de3aU, 0x1dc544bdU,
202   0x601d8badU, 0x668b0abcU, 0x7a5ebfb1U, 0x3ac0b624U, 0x5ee16d7dU,
203   0x9bfac387U, 0xbe8ef20cU, 0x8d2ae384U, 0x819dc7d5U, 0x7c4951e7U,
204   0xe60da716U, 0x0c5b0073U, 0xb43b3d97U, 0xce9974edU, 0x0f691da9U,
205   0x4b616d60U, 0x8fa9e819U, 0x3f390333U, 0x6f62fad6U, 0x5a32b67cU,
206   0x3be6f1c3U, 0x05851103U, 0xff28828dU, 0xaa43a56aU, 0x075d7dd5U,
207   0x248c4b7eU, 0x52fde3ebU, 0xf72e2edaU, 0x5da6f75fU, 0x2f5148d9U,
208   0xcae2aeaeU, 0xfda6f3e5U, 0xff60d8ffU, 0x2adc02d2U, 0x1dbdbd4cU,
209   0xd410ad7cU, 0x8c284aaeU, 0x392ef8e0U, 0x37d48b3aU, 0x6792fe9dU,
210   0xad32ddfaU, 0x1545f24eU, 0x3a260f73U, 0xb724ca36U, 0xc510d751U,
211   0x4f8df992U, 0x000b8b37U, 0x292e9b3dU, 0xa32f250fU, 0x8263d144U,
212   0xfcae0516U, 0x1eae2183U, 0xd4af2027U, 0xc64afae3U, 0xe7b34fe4U,
213   0xdf864aeaU, 0x80cc71c5U, 0x0e814df3U, 0x66cc5f41U, 0x853a497aU,
214   0xa2886213U, 0x5e34a2eaU, 0x0f53ba47U, 0x718c484aU, 0xfa0f0b12U,
215   0x33cc59ffU, 0x72b48e07U, 0x8b6f57bcU, 0x29cf886dU, 0x1950955bU,
216   0xcd52910cU, 0x4cecef65U, 0x05c2cbfeU, 0x49df4f6aU, 0x1f4c3f34U,
217   0xfadc1a09U, 0xf2d65a24U, 0x117f5594U, 0xde3a84e6U, 0x48db3024U,
218   0xd10ca9b5U
219 };
220 
221 /* buzhash by Robert C. Uzgalis */
222 /* General hash functions. Technical Report TR-92-01, The University
223    of Hong Kong, 1993 */
224 
buzhash(unsigned char * buf)225 static unsigned int buzhash(unsigned char *buf)
226 {
227   unsigned int x = 0x83d31df4U;
228   int i;
229   for (i = HSIZE; i != 0; i--)
230     x = (x << 1) ^ (x & (1 << 31) ? 1 : 0) ^ noise[*buf++];
231   return x;
232 }
233 
234 static unsigned int primes[] =
235 {
236   65537, 98317, 147481, 221227, 331841, 497771, 746659, 1120001,
237   1680013, 2520031, 3780053, 5670089, 8505137, 12757739, 19136609,
238   28704913, 43057369, 64586087, 96879131, 145318741, 217978121,
239   326967209, 490450837, 735676303, 1103514463, 1655271719,
240   0xffffffff
241 };
242 
243 struct hash_data {
244   bsuint *hash;
245   unsigned int prime;
246 };
247 
hash_create(unsigned char * buf,bsuint len)248 static void *hash_create(unsigned char *buf, bsuint len)
249 {
250   struct hash_data *hd;
251   bsuint *hash;
252   unsigned char *bp = buf;
253   bsuint off;
254   unsigned int s;
255   unsigned int prime;
256   unsigned int num;
257 
258   hd = malloc(sizeof(*hd));
259   if (!hd)
260     return 0;
261 #ifdef BSDIFF_64BIT
262   /* this is a 16GB limit for HSIZESHIFT == 4 */
263   if (len >= (bsuint)(0xffffffff / 4) << HSIZESHIFT)
264     return 0;
265 #endif
266   num = (len + HSIZE - 1) >> HSIZESHIFT;
267   prime = num * 4;
268   for (s = 0; s < sizeof(primes)/sizeof(*primes) - 1; s++)
269     if (prime < primes[s])
270       break;
271   prime = primes[s];
272   hash = calloc(prime, sizeof(*hash));
273   if (!hash)
274     {
275       free(hd);
276       return 0;
277     }
278   for (off = 0; len >= HSIZE; off += HSIZE, buf += HSIZE, len -= HSIZE)
279     {
280       s = buzhash(buf) % prime;
281       if (hash[s])
282         {
283           if (hash[(s == prime - 1) ? 0 : s + 1])
284             continue;
285           if (!memcmp(buf, bp + hash[s], HSIZE))
286             continue;
287           s = (s == prime - 1) ? 0 : s + 1;
288         }
289       hash[s] = off + 1;
290     }
291   hd->hash = hash;
292   hd->prime = prime;
293   return hd;
294 }
295 
hash_free(void * data)296 static void hash_free(void *data)
297 {
298   struct hash_data *hd = data;
299   free(hd->hash);
300   free(hd);
301 }
302 
hash_findnext(void * data,unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen,bsuint lastoffset,bsuint scan,bsuint * posp,bsuint * lenp)303 static bsuint hash_findnext(void *data, unsigned char *old, bsuint oldlen, unsigned char *new, bsuint newlen, bsuint lastoffset, bsuint scan, bsuint *posp, bsuint *lenp)
304 {
305   struct hash_data *hd = data;
306   bsuint scanstart, oldscore, oldscorenum, oldscorestart;
307   bsuint pos, len;
308   bsuint lscan, lpos, llen;
309   bsuint i, ss, scsc;
310   unsigned int ssx;
311   unsigned int prime;
312   bsuint *hash;
313 
314   hash = hd->hash;
315   prime = hd->prime;
316   scanstart = scan;
317   oldscore = oldscorenum = oldscorestart = 0;
318   ssx = scan <= newlen - HSIZE ? buzhash(new + scan) : 0;
319   pos = 0;
320   len = 0;
321   lpos = lscan = llen = 0;
322   for (;;)
323     {
324       if (scan >= newlen - HSIZE)
325 	{
326 	  if (llen >= 32)
327 	    goto gotit;
328 	  break;
329 	}
330       ss = ssx % prime;
331       pos = hash[ss];
332       if (!pos)
333 	{
334 	  unsigned int oldc;
335 scannext:
336 	  if (llen >= 32 && scan - lscan >= HSIZE)
337 	    goto gotit;
338 	  ssx = (ssx << 1) ^ (ssx & (1 << 31) ? 1 : 0) ^ noise[new[scan + HSIZE]];
339 	  oldc = noise[new[scan]] ^ (0x83d31df4U ^ 0x07a63be9U);
340 #if HSIZE % 32 != 0
341 	  ssx ^= (oldc << (HSIZE % 32)) ^ (oldc >> (32 - (HSIZE % 32)));
342 #else
343 	  ssx ^= oldc;
344 #endif
345 	  scan++;
346 	  continue;
347 	}
348       pos--;
349       if (memcmp(old + pos, new + scan, HSIZE))
350 	{
351 	  pos = hash[ss == prime - 1 ? 0 : ss + 1];
352 	  if (!pos)
353 	    goto scannext;
354 	  pos--;
355 	  if (memcmp(old + pos, new + scan, HSIZE))
356 	    goto scannext;
357 	}
358       len = matchlen(old + pos + HSIZE, oldlen - pos - HSIZE, new + scan + HSIZE, newlen - scan - HSIZE) + HSIZE;
359       if (scan + HSIZE * 4 <= newlen)
360 	{
361 	  unsigned int ssx2;
362 	  bsuint len2, pos2;
363 	  ssx2 = buzhash(new + scan + HSIZE * 3) % prime;
364 	  pos2 = hash[ssx2];
365 	  if (pos2)
366 	    {
367 	      if (memcmp(new + scan + HSIZE *3, old + pos2 - 1, HSIZE))
368 		{
369 		  ssx2 = (ssx2 == prime) ? 0 : ssx2 + 1;
370 		  pos2 = hash[ssx2];
371 		}
372 	    }
373 	  if (pos2 > 1 + HSIZE*3)
374 	    {
375 	      pos2 = pos2 - 1 - HSIZE*3;
376 	      if (pos2 != pos)
377 		{
378 		  len2 = matchlen(old + pos2, oldlen - pos2, new + scan, newlen - scan);
379 		  if (len2 > len)
380 		    {
381 		      pos = pos2;
382 		      len = len2;
383 		    }
384 		}
385 	    }
386 	}
387       if (len > llen)
388 	{
389 	  llen = len;
390 	  lpos = pos;
391 	  lscan = scan;
392 	}
393       goto scannext;
394 gotit:
395       scan = lscan;
396       len = llen;
397       pos = lpos;
398       if (scan + lastoffset == pos)
399 	{
400 	  scan += len;
401 	  scanstart = scan;
402 	  if (scan + HSIZE < newlen)
403 	    ssx = buzhash(new + scan);
404 	  llen = 0;
405 	  continue;
406 	}
407       for (i = scan - scanstart; i && pos && scan && old[pos - 1] == new[scan - 1]; i--)
408 	{
409 	  len++;
410 	  pos--;
411 	  scan--;
412 	}
413       if (oldscorestart + 1 != scan || oldscorenum == 0 || oldscorenum - 1 > len)
414 	{
415 	  oldscore = 0;
416 	  for (scsc = scan; scsc<scan+len; scsc++)
417 	    if ((scsc+lastoffset<oldlen) && (old[scsc+lastoffset] == new[scsc]))
418 	      oldscore++;
419 	  oldscorestart = scan;
420 	  oldscorenum = len;
421 	}
422       else
423 	{
424 	  if (oldscorestart + lastoffset < oldlen && old[oldscorestart + lastoffset] == new[oldscorestart])
425 	    oldscore--;
426 	  oldscorestart++;
427 	  oldscorenum--;
428 	  for (scsc = oldscorestart + oldscorenum; oldscorenum < len; scsc++)
429 	    {
430 	      if ((scsc+lastoffset<oldlen) && (old[scsc+lastoffset] == new[scsc]))
431 		oldscore++;
432 		oldscorenum++;
433 	    }
434 	}
435       if (len - oldscore >= 32)
436 	break;
437       if (len > HSIZE * 3 + 32)
438         scan += len - (HSIZE * 3 + 32);
439       if (scan <= lscan)
440 	scan = lscan + 1;
441       scanstart = scan;
442       if (scan + HSIZE < newlen)
443 	ssx = buzhash(new + scan);
444       llen = 0;
445     }
446   if (scan >= newlen - HSIZE)
447     {
448       scan = newlen;
449       pos = 0;
450       len = 0;
451     }
452   *posp = pos;
453   *lenp = len;
454   return scan;
455 }
456 
457 #endif /* BSDIFF_NO_HASH */
458 
459 #ifndef BSDIFF_NO_SUF
460 
461 /******************************************************************/
462 /*                                                                */
463 /*         suffix list method                                     */
464 /*                                                                */
465 /******************************************************************/
466 
467 struct suf_data {
468   bsint *I;
469   bsint F[257];
470 };
471 
suf_split(bsint * I,bsint * V,bsint start,bsint len,bsint h)472 static void suf_split(bsint *I, bsint *V, bsint start, bsint len, bsint h)
473 {
474   bsint i, j, k, x, tmp, jj, kk;
475 
476   if (len < 16)
477     {
478       for (k = start; k < start + len; k += j)
479 	{
480 	  j = 1;
481           x = V[I[k] + h];
482 	  for (i = 1; k + i < start + len; i++)
483 	    {
484 	      if (V[I[k + i] + h] < x)
485 		{
486 		  x = V[I[k + i] + h];
487 		  j = 0;
488 		}
489 	      if(V[I[k + i] + h] == x)
490 		{
491 		  tmp = I[k+j];
492                   I[k+j] = I[k+i];
493                   I[k+i] = tmp;
494 		  j++;
495 		}
496 	    }
497 	  for(i = 0; i < j; i++)
498 	     V[I[k+i]] = k + j - 1;
499 	  if(j == 1)
500 	     I[k] = -1;
501 	}
502       return;
503     }
504 
505   x = V[I[start + len / 2] + h];
506   jj = 0;
507   kk = 0;
508   for (i = start; i < start + len; i++)
509     {
510       if(V[I[i] + h] < x)
511 	 jj++;
512       if(V[I[i] + h] == x)
513 	 kk++;
514     };
515   jj += start;
516   kk += jj;
517 
518   i = start;
519   j = 0;
520   k = 0;
521   while (i < jj)
522     {
523       if(V[I[i]+h]<x)
524 	{
525 	  i++;
526 	}
527       else if(V[I[i]+h]==x)
528 	{
529 	  tmp = I[i];
530           I[i] = I[jj + j];
531           I[jj + j] = tmp;
532 	  j++;
533 	}
534       else
535 	{
536 	  tmp = I[i];
537           I[i] = I[kk + k];
538           I[kk + k] = tmp;
539 	  k++;
540 	}
541     }
542   while (jj + j < kk)
543     {
544       if(V[I[jj+j]+h]==x)
545 	{
546 	  j++;
547 	}
548       else
549 	{
550 	  tmp = I[jj + j];
551 	  I[jj + j] = I[kk + k];
552 	  I[kk + k] = tmp;
553 	  k++;
554 	}
555     }
556 
557   if(jj > start)
558     suf_split(I, V, start, jj-start, h);
559 
560   for (i=0; i < kk - jj; i++)
561     V[I[jj + i]] = kk - 1;
562   if (jj == kk - 1)
563     I[jj] = -1;
564 
565   if (start + len > kk)
566     suf_split(I, V, kk, start + len - kk, h);
567 }
568 
suf_bucketsort(bsint * V,bsint * I,bsint n,bsint s)569 static bsint suf_bucketsort(bsint *V, bsint *I, bsint n, bsint s)
570 {
571   bsint c, d, g, i, j;
572   bsint *B;
573 
574   B = calloc(s, sizeof(bsint));
575   if (!B)
576     return 0;
577   for (i = n - 1; i >= 0; i--)
578     {
579       c = V[i];
580       V[i] = B[c];
581       B[c] = i + 1;
582     }
583   for (j = s - 1, i = n; i; j--)
584     {
585       for (d = B[j], g = i; d; i--)
586 	{
587 	  c = d - 1;
588 	  d = V[c];
589 	  V[c] = g;
590 	  I[i] = !d && g == i ? -1 : c;
591 	}
592     }
593   V[n] = 0;
594   I[0] = -1;
595   free(B);
596   return 1;
597 }
598 
suf_create(unsigned char * buf,bsuint ulen)599 static void *suf_create(unsigned char *buf, bsuint ulen)
600 {
601   struct suf_data *sd;
602   bsint *V, *I;
603   bsint i, h, l, oldv, s, len;
604 
605   len = ulen;
606   if (len < 0)
607     return 0;
608   sd = malloc(sizeof(*sd));
609   if (!sd)
610     return 0;
611   V = malloc(sizeof(bsint) * (len + 3));
612   if (!V)
613     {
614       free(sd);
615       return 0;
616     }
617   I = malloc(sizeof(bsint) * (len + 3));
618   if (!I)
619     {
620       free(V);
621       free(sd);
622       return 0;
623     }
624   memset(sd->F, 0, sizeof(sd->F));
625   if (len >= 0x1000000)
626     {
627       s = 0x1000002;
628       sd->F[buf[0]]++;
629       sd->F[buf[1]]++;
630       oldv = buf[0] << 8 | buf[1];
631       for (i = 2; i < len; i++)
632 	{
633 	  sd->F[buf[i]]++;
634 	  oldv = (oldv & 0xffff) << 8 | buf[i];
635 	  V[i - 2] = oldv + 2;
636         }
637       oldv = (oldv & 0xffff) << 8;
638       V[len - 2] = oldv + 2;
639       oldv = (oldv & 0xffff) << 8;
640       V[len - 1] = oldv + 2;
641       len += 2;
642       V[len - 2] = 1;
643       V[len - 1] = 0;
644       h = 3;
645     }
646   else
647     {
648       s = 0x10001;
649       sd->F[buf[0]]++;
650       oldv = buf[0];
651       for (i = 1; i < len; i++)
652 	{
653 	  sd->F[buf[i]]++;
654 	  oldv = (oldv & 0xff) << 8 | buf[i];
655 	  V[i - 1] = oldv + 1;
656 	}
657       oldv = (oldv & 0xff) << 8;
658       V[len - 1] = oldv + 1;
659       len += 1;
660       V[len - 1] = 0;
661       h = 2;
662     }
663   oldv = len;
664   for (i = 256; i > 0; i--)
665     {
666       sd->F[i] = oldv;
667       oldv -= sd->F[i - 1];
668     }
669   sd->F[0] = oldv;
670   if (!suf_bucketsort(V, I, len, s))
671     {
672       free(I);
673       free(V);
674       free(sd);
675       return 0;
676     }
677   for(; I[0] != -(len + 1); h += h)
678     {
679       l=0;
680       for (i = 0; i < len + 1; )
681 	{
682           if (I[i] < 0)
683 	    {
684               l -= I[i];
685               i -= I[i];
686             }
687 	  else
688 	    {
689               if(l)
690 		 I[i - l] = -l;
691               l = V[I[i]] + 1 - i;
692               suf_split(I, V, i, l, h);
693               i += l;
694               l = 0;
695             }
696         }
697       if (l)
698 	 I[i - l] = -l;
699     }
700   for (i = 0; i < len + 1; i++)
701     I[V[i]] = i;
702   free(V);
703   sd->I = I;
704   return sd;
705 }
706 
suf_free(void * data)707 static void suf_free(void *data)
708 {
709   struct suf_data *sd = data;
710   free(sd->I);
711   free(sd);
712 }
713 
suf_bsearch(bsint * I,unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen,bsuint st,bsuint en,bsuint * posp)714 static bsuint suf_bsearch(bsint *I, unsigned char *old, bsuint oldlen, unsigned char *new, bsuint newlen, bsuint st, bsuint en, bsuint *posp)
715 {
716   bsuint x, y;
717   if (st > en)
718     return 0;
719   while (en - st >= 2)
720     {
721       x = st + (en - st) / 2;
722       if (memcmp(old + I[x], new, oldlen - I[x] < newlen ? oldlen - I[x] : newlen) < 0)
723 	st = x;
724       else
725 	en = x;
726     }
727   x = matchlen(old + I[st], oldlen - I[st], new, newlen);
728   y = matchlen(old + I[en], oldlen - I[en], new, newlen);
729   *posp = x > y ? I[st] : I[en];
730   return x > y ? x : y;
731 }
732 
suf_findnext(void * data,unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen,bsuint lastoffset,bsuint scan,bsuint * posp,bsuint * lenp)733 static bsuint suf_findnext(void *data, unsigned char *old, bsuint oldlen, unsigned char *new, bsuint newlen, bsuint lastoffset, bsuint scan, bsuint *posp, bsuint *lenp)
734 {
735   bsuint scsc, oldscore, len;
736 
737   struct suf_data *sd = data;
738   len = 0;
739   *posp = 0;
740   scsc = scan;
741   oldscore = 0;
742   while (scan < newlen)
743     {
744       len = suf_bsearch(sd->I, old, oldlen, new + scan, newlen - scan, sd->F[new[scan]] + 1, sd->F[new[scan] + 1], posp);
745       for (; scsc < scan + len; scsc++)
746 	if (scsc + lastoffset < oldlen && old[scsc + lastoffset] == new[scsc])
747 	  oldscore++;
748       if (len && len == oldscore)
749 	{
750 	  scan += len;
751 	  scsc = scan;
752 	  oldscore = 0;
753 	  continue;
754 	}
755       if (len > oldscore + 32)
756 	break;
757       if (scan + lastoffset < oldlen && old[scan + lastoffset] == new[scan])
758 	oldscore--;
759       scan++;
760     }
761   *lenp = len;
762   return scan;
763 }
764 
765 #endif /* BSDIFF_NO_SUF */
766 
767 /******************************************************************/
768 
769 struct deltamode {
770   int mode;
771   void *(*create)(unsigned char *buf, bsuint len);
772   bsuint (*findnext)(void *data, unsigned char *old, bsuint oldlen, unsigned char *new, bsuint newlen, bsuint lastoffset, bsuint scan, bsuint *posp, bsuint *lenp);
773   void (*free)(void *data);
774 };
775 
776 struct deltamode deltamodes[] =
777 {
778 #ifndef BSDIFF_NO_SUF
779   {DELTAMODE_SUF, suf_create, suf_findnext, suf_free},
780 #endif
781 #ifndef BSDIFF_NO_HASH
782   {DELTAMODE_HASH, hash_create, hash_findnext, hash_free},
783 #endif
784 };
785 
addoff(struct bzblock * bzi,bsint off)786 static void addoff(struct bzblock *bzi, bsint off)
787 {
788   int i, sign = 0;
789   unsigned char b[8];
790 
791   if (off < 0)
792     {
793       sign = 0x80;
794       off = -off;
795     }
796   for (i = 0; i < 7; i++)
797     {
798       b[i] = off & 0xff;
799       off >>= 8;
800     }
801   b[7] = sign | (off & 0xff);
802   blockwrite(bzi, b, 8);
803 }
804 
mkdiff(int mode,unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen,struct instr ** instrp,int * instrlenp,unsigned char ** instrblkp,unsigned int * instrblklenp,unsigned char ** addblkp,unsigned int * addblklenp,unsigned char ** extrablkp,unsigned int * extrablklenp)805 void mkdiff(int mode,
806             unsigned char *old, bsuint oldlen,
807             unsigned char *new, bsuint newlen,
808             struct instr **instrp, int *instrlenp,
809             unsigned char **instrblkp, unsigned int *instrblklenp,
810             unsigned char **addblkp, unsigned int *addblklenp,
811             unsigned char **extrablkp, unsigned int *extrablklenp)
812 {
813   struct instr *instr = 0;
814   int instrlen = 0;
815   bsuint i, scan, pos, len;
816   bsuint lastscan, lastpos, lastoffset;
817   bsuint s, Sf, lenf, Sb, lenb;
818   bsuint overlap, Ss, lens;
819   struct bzblock *bza = 0;
820   struct bzblock *bze = 0;
821   struct bzblock *bzi = 0;
822   void *data;
823   struct deltamode *dm;
824   int noaddblk = 0;
825 
826   if ((mode & DELTAMODE_NOADDBLK) != 0)
827     {
828       mode ^= DELTAMODE_NOADDBLK;
829       noaddblk = 1;
830     }
831   dm = 0;
832   for (i = 0; i < sizeof(deltamodes)/sizeof(*deltamodes); i++)
833     {
834       dm = deltamodes + i;
835       if (deltamodes[i].mode == mode)
836 	break;
837     }
838   if (!dm)
839     {
840       fprintf(stderr, "mkdiff: no mode installed\n");
841       exit(1);
842     }
843   if (addblkp)
844     {
845       *addblkp = 0;
846       *addblklenp = 0;
847     }
848   if (extrablkp)
849     {
850       *extrablkp = 0;
851       *extrablklenp = 0;
852     }
853   if (instrblkp)
854     {
855       *instrblkp = 0;
856       *instrblklenp = 0;
857     }
858   if (!noaddblk && addblkp && (bza = blockopen()) == 0)
859     {
860       fprintf(stderr, "mkdiff: could not create compression stream\n");
861       exit(1);
862     }
863   if (extrablkp && (bze = blockopen()) == 0)
864     {
865       fprintf(stderr, "mkdiff: could not create compression stream\n");
866       exit(1);
867     }
868   if (instrblkp && (bzi = blockopen()) == 0)
869     {
870       fprintf(stderr, "mkdiff: could not create compression stream\n");
871       exit(1);
872     }
873   data = dm->create(old, oldlen);
874   if (!data)
875     {
876       fprintf(stderr, "mkdiff: could not create data\n");
877       exit(1);
878     }
879 
880   scan = 0; len = 0; lenf = 0;
881   lastscan = 0; lastpos = 0;
882 
883   while (lastscan < newlen)
884     {
885       /* search for data matching something in new[scan...]
886        * input:
887        *   old/oldlen, new/newlen: search data
888        *   lastoffset: lastpos - lastscan  (because we want to find a different match)
889        * returns:
890        *   scan: start of match in new[]
891        *   pos:  start of match in old[]
892        *   len:  length of match
893        * (if no match is found, scan = newlen, len = 0)
894        */
895       lastoffset = noaddblk ? oldlen : lastpos - lastscan;
896       scan = dm->findnext(data, old, oldlen, new, newlen, lastoffset, scan, &pos, &len);
897 
898       /* extand old match forward */
899       if (!noaddblk)
900 	{
901 	  s = Sf = lenf = 0;
902 	  for (i = 0; lastscan + i < scan && lastpos + i < oldlen; )
903 	    {
904 	      if (old[lastpos+i] == new[lastscan+i])
905 		{
906 		  s++;
907 		  i++;
908 		  if (s >= Sf + i - s)
909 		    {
910 		      Sf = 2 * s - i;
911 		      lenf = i;
912 		    }
913 		}
914 	      else
915 	        i++;
916 	    }
917 	}
918       else
919 	{
920 	  for (i = 0; lastscan + i < scan && lastpos + i < oldlen; i++)
921 	    if (old[lastpos+i] != new[lastscan+i])
922 	      break;
923 	  lenf = i;
924 	}
925 
926       lenb = 0;
927       /* extand new match backward, scan == newlen means we're going to finish */
928       if (!noaddblk && scan < newlen)
929 	{
930 	  s = Sb = 0;
931 	  for(i = 1; scan >= lastscan + i && pos >= i; i++)
932 	    {
933 	      if (old[pos-i] == new[scan-i])
934 		{
935 		  s++;
936 		  if(s >= Sb + i - s)
937 		    {
938 		      Sb = 2 * s - i;
939 		      lenb = i;
940 		    }
941 		}
942 	    }
943 	}
944 
945       /* if there is an overlap find good place to split */
946       if (lastscan + lenf > scan - lenb)
947 	{
948 	  overlap = (lastscan + lenf) - (scan - lenb);
949 	  s = Sb = Ss = lens = 0;
950 	  for (i = 0; i < overlap; i++)
951 	    {
952 	      if(new[lastscan + lenf - overlap + i] == old[lastpos + lenf - overlap + i])
953 		s++;
954 	      if(new[scan - lenb + i] == old[pos - lenb + i])
955 		Sb++;
956 	      if (s > Sb && s - Sb > Ss)
957 		{
958 		  Ss = s - Sb;
959 		  lens = i + 1;
960 		}
961 	    }
962 	  lenf -= overlap - lens;
963 	  lenb -= lens;
964 	}
965 
966       /*
967        *         lastscan                    scan
968        *            |--- lenf ---|    |- lenb -|-- len --|
969        *            |            |    |        |         |
970        * new: ------+=======-----+----+--------+=========+--
971        *           /                           \
972        *          /                             |
973        * old: ---+=======-----------------------+=========---
974        *         |                              |
975        *      lastpos                          pos
976        */
977 
978       if (instrp)
979 	{
980 	  if ((instrlen & 31) == 0)
981 	    {
982 	      if (instr)
983 	        instr = realloc(instr, sizeof(*instr) * (instrlen + 32));
984 	      else
985 	        instr = malloc(sizeof(*instr) * (instrlen + 32));
986 	      if (!instr)
987 		{
988 		  fprintf(stderr, "out of memory\n");
989 		  exit(1);
990 		}
991 	    }
992 	  instr[instrlen].copyout = lenf;
993 	  instr[instrlen].copyin = (scan - lenb) - (lastscan + lenf);
994 	  instr[instrlen].copyinoff = lastscan + lenf;
995 	  instr[instrlen].copyoutoff = lastpos;
996 	  instrlen++;
997 	}
998       if (bzi)
999 	{
1000 	  addoff(bzi, lenf);
1001 	  addoff(bzi, (scan - lenb) - (lastscan + lenf));
1002 	  addoff(bzi, (pos - lenb) - (lastpos + lenf));
1003 	}
1004       if (bze)
1005 	{
1006 	  i = lastscan + lenf;
1007 	  s = (scan - lenb) - i;
1008 	  while (s > 0)
1009 	    {
1010 	      int len2 = s > 0x40000000 ? 0x40000000 : s;
1011 	      blockwrite(bze, new + i, len2);
1012 	      i += len2;
1013 	      s -= len2;
1014 	    }
1015 	}
1016       if (bza)
1017 	{
1018 	  while (lenf > 0)
1019 	    {
1020 	      unsigned char addblk[4096];
1021 	      int len2;
1022 	      len2 = lenf > 4096 ? 4096 : lenf;
1023 	      for (i = 0; i < len2; i++)
1024 		addblk[i] = new[lastscan + i] - old[lastpos + i];
1025 	      if (blockwrite(bza, addblk, len2) != len2)
1026 		{
1027 		  fprintf(stderr, "could not append to data block\n");
1028 		  exit(1);
1029 		}
1030 	      lastscan += len2;
1031 	      lastpos += len2;
1032 	      lenf -= len2;
1033 	    }
1034 	}
1035 
1036       /* advance */
1037       lastscan = scan - lenb;
1038       lastpos = pos - lenb;
1039       scan += len;
1040     }
1041   if (bza && blockclose(bza, addblkp, addblklenp))
1042     {
1043       fprintf(stderr, "could not close data block\n");
1044       exit(1);
1045     }
1046   if (bze && blockclose(bze, extrablkp, extrablklenp))
1047     {
1048       fprintf(stderr, "could not close extra block\n");
1049       exit(1);
1050     }
1051   if (bzi && blockclose(bzi, instrblkp, instrblklenp))
1052     {
1053       fprintf(stderr, "could not close instr block\n");
1054       exit(1);
1055     }
1056   if (instrp)
1057     {
1058       *instrp = instr;
1059       *instrlenp = instrlen;
1060     }
1061   dm->free(data);
1062 }
1063 
1064 struct stepdata {
1065   struct deltamode *dm;
1066   void *data;
1067   int noaddblk;
1068 };
1069 
1070 void *
mkdiff_step_setup(int mode)1071 mkdiff_step_setup(int mode)
1072 {
1073   int noaddblk = 0;
1074   struct stepdata *sd;
1075   struct deltamode *dm;
1076   int i;
1077 
1078   if ((mode & DELTAMODE_NOADDBLK) != 0)
1079     {
1080       mode ^= DELTAMODE_NOADDBLK;
1081       noaddblk = 1;
1082     }
1083   dm = 0;
1084   for (i = 0; i < sizeof(deltamodes)/sizeof(*deltamodes); i++)
1085     {
1086       dm = deltamodes + i;
1087       if (deltamodes[i].mode == mode)
1088 	break;
1089     }
1090   if (!dm)
1091     {
1092       fprintf(stderr, "mkdiff: no mode installed\n");
1093       exit(1);
1094     }
1095   sd = calloc(1, sizeof(*sd));
1096   if (!sd)
1097     return 0;
1098   sd->dm = dm;
1099   sd->data = 0;
1100   sd->noaddblk = noaddblk;
1101   return sd;
1102 }
1103 
1104 void
mkdiff_step(void * sdata,unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen,struct instr * instr,bsuint * scanp,bsuint * lastposp,bsuint * lastscanp)1105 mkdiff_step(void *sdata,
1106             unsigned char *old, bsuint oldlen,
1107             unsigned char *new, bsuint newlen,
1108             struct instr *instr,
1109 	    bsuint *scanp, bsuint *lastposp, bsuint *lastscanp)
1110 
1111 {
1112   struct stepdata *sd = sdata;
1113   struct deltamode *dm = sd->dm;
1114   bsuint scan, lastpos, lastscan;
1115   bsuint pos, len, lastoffset;
1116   bsuint s, Sf, lenf, Sb, lenb;
1117   bsuint overlap, Ss, lens;
1118   bsuint i;
1119 
1120   if (!sd->data)
1121     {
1122       sd->data = dm->create(old, oldlen);
1123       if (!sd->data)
1124 	{
1125 	  fprintf(stderr, "mkdiff: could not create data\n");
1126 	  exit(1);
1127 	}
1128     }
1129   scan = *scanp;
1130   lastscan = *lastscanp;
1131   lastpos = *lastposp;
1132 
1133   lastoffset = sd->noaddblk ? oldlen : lastpos - lastscan;
1134   scan = dm->findnext(sd->data, old, oldlen, new, newlen, lastoffset, scan, &pos, &len);
1135   if (!sd->noaddblk)
1136     {
1137       s = Sf = lenf = 0;
1138       for (i = 0; lastscan + i < scan && lastpos + i < oldlen; )
1139 	{
1140 	  if (old[lastpos+i] == new[lastscan+i])
1141 	    {
1142 	      s++;
1143 	      i++;
1144 	      if (s >= Sf + i - s)
1145 		{
1146 		  Sf = 2 * s - i;
1147 		  lenf = i;
1148 		}
1149 	    }
1150 	  else
1151 	    i++;
1152 	}
1153     }
1154   else
1155     {
1156       for (i = 0; lastscan + i < scan && lastpos + i < oldlen; i++)
1157 	if (old[lastpos+i] != new[lastscan+i])
1158 	  break;
1159       lenf = i;
1160     }
1161 
1162   lenb = 0;
1163   /* extand new match backward, scan == newlen means we're going to finish */
1164   if (!sd->noaddblk && scan < newlen)
1165     {
1166       s = Sb = 0;
1167       for(i = 1; scan >= lastscan + i && pos >= i; i++)
1168 	{
1169 	  if (old[pos-i] == new[scan-i])
1170 	    {
1171 	      s++;
1172 	      if(s >= Sb + i - s)
1173 		{
1174 		  Sb = 2 * s - i;
1175 		  lenb = i;
1176 		}
1177 	    }
1178 	}
1179     }
1180 
1181   /* if there is an overlap find good place to split */
1182   if (lastscan + lenf > scan - lenb)
1183     {
1184       overlap = (lastscan + lenf) - (scan - lenb);
1185       s = Sb = Ss = lens = 0;
1186       for (i = 0; i < overlap; i++)
1187 	{
1188 	  if(new[lastscan + lenf - overlap + i] == old[lastpos + lenf - overlap + i])
1189 	    s++;
1190 	  if(new[scan - lenb + i] == old[pos - lenb + i])
1191 	    Sb++;
1192 	  if (s > Sb && s - Sb > Ss)
1193 	    {
1194 	      Ss = s - Sb;
1195 	      lens = i + 1;
1196 	    }
1197 	}
1198       lenf -= overlap - lens;
1199       lenb -= lens;
1200     }
1201 
1202   /* printf("MATCH len %d @ %d:%d-%d-%d:%d -- %d\n", len, lastscan, lenf, (scan - lenb) - (lastscan + lenf), lenb, scan, pos); */
1203 
1204   instr->copyout = lenf;
1205   instr->copyin = (scan - lenb) - (lastscan + lenf);
1206   instr->copyinoff = lastscan + lenf;
1207   instr->copyoutoff = lastpos;
1208   *scanp = scan + len;
1209   *lastscanp = scan - lenb;
1210   if (scan != newlen)
1211     *lastposp = pos - lenb;
1212   else
1213     *lastposp = lastpos + lenf;
1214 }
1215 
1216 void
mkdiff_step_freedata(void * sdata)1217 mkdiff_step_freedata(void *sdata)
1218 {
1219   struct stepdata *sd = sdata;
1220   struct deltamode *dm = sd->dm;
1221   if (sd->data)
1222     dm->free(sd->data);
1223   sd->data = 0;
1224 }
1225 
1226 void
mkdiff_step_free(void * sdata)1227 mkdiff_step_free(void *sdata)
1228 {
1229   struct stepdata *sd = sdata;
1230   struct deltamode *dm = sd->dm;
1231   if (sd->data)
1232     dm->free(sd->data);
1233   free(sd);
1234 }
1235