1 /*
2 diff.c -- binary diff generator
3
4 Copyright 2004,2005 Michael Schroeder
5
6 rewritten from bsdiff.c,
7 http://www.freebsd.org/cgi/cvsweb.cgi/src/usr.bin/bsdiff
8 added library interface and hash method, enhanced suffix method.
9 */
10 /*-
11 * Copyright 2003-2005 Colin Percival
12 * All rights reserved
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted providing that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
27 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <unistd.h>
39 #include <string.h>
40 #include <bzlib.h>
41
42 #include "delta.h"
43
44 struct bzblock {
45 unsigned char *data;
46 unsigned int len;
47 bz_stream strm;
48 };
49
blockopen()50 static struct bzblock *blockopen()
51 {
52 struct bzblock *bzf;
53 int ret;
54 bzf = malloc(sizeof(*bzf));
55 if (!bzf)
56 return 0;
57 bzf->strm.bzalloc = 0;
58 bzf->strm.bzfree = 0;
59 bzf->strm.opaque = 0;
60 bzf->len = 0;
61 ret = BZ2_bzCompressInit(&bzf->strm, 9, 0, 30);
62 if (ret != BZ_OK)
63 {
64 free(bzf);
65 return 0;
66 }
67 bzf->data = 0;
68 bzf->strm.avail_in = 0;
69 bzf->strm.next_in = 0;
70 bzf->strm.avail_out = 0;
71 bzf->strm.next_out = 0;
72 return bzf;
73 }
74
blockwrite(struct bzblock * bzf,void * buf,int len)75 static int blockwrite(struct bzblock *bzf, void *buf, int len)
76 {
77 int ret;
78
79 if (len <= 0)
80 return len < 0 ? -1 : 0;
81 bzf->strm.avail_in = len;
82 bzf->strm.next_in = buf;
83 for (;;)
84 {
85 if (bzf->strm.avail_out < 4096)
86 {
87 if (bzf->len + 8192 < bzf->len)
88 return -1;
89 if (bzf->data)
90 bzf->data = realloc(bzf->data, bzf->len + 8192);
91 else
92 bzf->data = malloc(bzf->len + 8192);
93 if (!bzf->data)
94 return -1;
95 bzf->strm.avail_out = 8192;
96 }
97 bzf->strm.next_out = (char *)bzf->data + bzf->len;
98 ret = BZ2_bzCompress(&bzf->strm, BZ_RUN);
99 if (ret != BZ_RUN_OK)
100 return -1;
101 bzf->len = (unsigned char *)bzf->strm.next_out - bzf->data;
102 if (bzf->strm.avail_in == 0)
103 return len;
104 }
105 }
106
blockclose(struct bzblock * bzf,unsigned char ** datap,unsigned int * lenp)107 static int blockclose(struct bzblock *bzf, unsigned char **datap, unsigned int *lenp)
108 {
109 int ret;
110 bzf->strm.avail_in = 0;
111 bzf->strm.next_in = 0;
112 for (;;)
113 {
114 if (bzf->strm.avail_out < 4096)
115 {
116 if (bzf->len + 8192 < bzf->len)
117 return -1;
118 if (bzf->data)
119 bzf->data = realloc(bzf->data, bzf->len + 8192);
120 else
121 bzf->data = malloc(bzf->len + 8192);
122 if (!bzf->data)
123 return -1;
124 bzf->strm.avail_out = 8192;
125 }
126 bzf->strm.next_out = (char *)bzf->data + bzf->len;
127 ret = BZ2_bzCompress(&bzf->strm, BZ_FINISH);
128 if (ret != BZ_FINISH_OK && ret != BZ_STREAM_END)
129 return -1;
130 bzf->len = (unsigned char *)bzf->strm.next_out - bzf->data;
131 if (ret == BZ_STREAM_END)
132 break;
133 }
134 BZ2_bzCompressEnd (&bzf->strm);
135 *datap = bzf->data;
136 *lenp = bzf->len;
137 free(bzf);
138 return 0;
139 }
140
matchlen(unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen)141 static inline bsuint matchlen(unsigned char *old, bsuint oldlen, unsigned char *new, bsuint newlen)
142 {
143 unsigned char *oldsave;
144 bsuint max;
145 oldsave = old;
146 max = oldlen > newlen ? newlen : oldlen;
147 while (max-- > 0)
148 if (*old++ != *new++)
149 return old - oldsave - 1;
150 return old - oldsave;
151 }
152
153 #ifndef BSDIFF_NO_HASH
154
155 /******************************************************************/
156 /* */
157 /* hash method */
158 /* */
159 /******************************************************************/
160
161 #define HSIZESHIFT 4
162 #define HSIZE (1 << HSIZESHIFT)
163
164 /* 256 random numbers generated by a quantum source */
165 static unsigned int noise[256] =
166 {
167 0x9be502a4U, 0xba7180eaU, 0x324e474fU, 0x0aab8451U, 0x0ced3810U,
168 0x2158a968U, 0x6bbd3771U, 0x75a02529U, 0x41f05c14U, 0xc2264b87U,
169 0x1f67b359U, 0xcd2d031dU, 0x49dc0c04U, 0xa04ae45cU, 0x6ade28a7U,
170 0x2d0254ffU, 0xdec60c7cU, 0xdef5c084U, 0x0f77ffc8U, 0x112021f6U,
171 0x5f6d581eU, 0xe35ea3dfU, 0x3216bfb4U, 0xd5a3083dU, 0x7e63e9cdU,
172 0xaa9208f6U, 0xda3f3978U, 0xfe0e2547U, 0x09dfb020U, 0xd97472c5U,
173 0xbbce2edeU, 0x121aebd2U, 0x0e9fdbebU, 0x7b6f5d9cU, 0x84938e43U,
174 0x30694f2dU, 0x86b7a7f8U, 0xefaf5876U, 0x263812e6U, 0xb6e48ddfU,
175 0xce8ed980U, 0x4df591e1U, 0x75257b35U, 0x2f88dcffU, 0xa461fe44U,
176 0xca613b4dU, 0xd9803f73U, 0xea056205U, 0xccca7a89U, 0x0f2dbb07U,
177 0xc53e359eU, 0xe80d0137U, 0x2b2d2a5dU, 0xcfc1391aU, 0x2bb3b6c5U,
178 0xb66aea3cU, 0x00ea419eU, 0xce5ada84U, 0xae1d6712U, 0x12f576baU,
179 0x117fcbc4U, 0xa9d4c775U, 0x25b3d616U, 0xefda65a8U, 0xaff3ef5bU,
180 0x00627e68U, 0x668d1e99U, 0x088d0eefU, 0xf8fac24dU, 0xe77457c7U,
181 0x68d3beb4U, 0x921d2acbU, 0x9410eac9U, 0xd7f24399U, 0xcbdec497U,
182 0x98c99ae1U, 0x65802b2cU, 0x81e1c3c4U, 0xa130bb09U, 0x17a87badU,
183 0xa70367d6U, 0x148658d4U, 0x02f33377U, 0x8620d8b6U, 0xbdac25bdU,
184 0xb0a6de51U, 0xd64c4571U, 0xa4185ba0U, 0xa342d70fU, 0x3f1dc4c1U,
185 0x042dc3ceU, 0x0de89f43U, 0xa69b1867U, 0x3c064e11U, 0xad1e2c3eU,
186 0x9660e8cdU, 0xd36b09caU, 0x4888f228U, 0x61a9ac3cU, 0xd9561118U,
187 0x3532797eU, 0x71a35c22U, 0xecc1376cU, 0xab31e656U, 0x88bd0d35U,
188 0x423b20ddU, 0x38e4651cU, 0x3c6397a4U, 0x4a7b12d9U, 0x08b1cf33U,
189 0xd0604137U, 0xb035fdb8U, 0x4916da23U, 0xa9349493U, 0xd83daa9bU,
190 0x145f7d95U, 0x868531d6U, 0xacb18f17U, 0x9cd33b6fU, 0x193e42b9U,
191 0x26dfdc42U, 0x5069d8faU, 0x5bee24eeU, 0x5475d4c6U, 0x315b2c0cU,
192 0xf764ef45U, 0x01b6f4ebU, 0x60ba3225U, 0x8a16777cU, 0x4c05cd28U,
193 0x53e8c1d2U, 0xc8a76ce5U, 0x8045c1e6U, 0x61328752U, 0x2ebad322U,
194 0x3444f3e2U, 0x91b8af11U, 0xb0cee675U, 0x55dbff5aU, 0xf7061ee0U,
195 0x27d7d639U, 0xa4aef8c9U, 0x42ff0e4fU, 0x62755468U, 0x1c6ca3f3U,
196 0xe4f522d1U, 0x2765fcb3U, 0xe20c8a95U, 0x3a69aea7U, 0x56ab2c4fU,
197 0x8551e688U, 0xe0bc14c2U, 0x278676bfU, 0x893b6102U, 0xb4f0ab3bU,
198 0xb55ddda9U, 0xa04c521fU, 0xc980088eU, 0x912aeac1U, 0x08519badU,
199 0x991302d3U, 0x5b91a25bU, 0x696d9854U, 0x9ad8b4bfU, 0x41cb7e21U,
200 0xa65d1e03U, 0x85791d29U, 0x89478aa7U, 0x4581e337U, 0x59bae0b1U,
201 0xe0fc9df3U, 0x45d9002cU, 0x7837464fU, 0xda22de3aU, 0x1dc544bdU,
202 0x601d8badU, 0x668b0abcU, 0x7a5ebfb1U, 0x3ac0b624U, 0x5ee16d7dU,
203 0x9bfac387U, 0xbe8ef20cU, 0x8d2ae384U, 0x819dc7d5U, 0x7c4951e7U,
204 0xe60da716U, 0x0c5b0073U, 0xb43b3d97U, 0xce9974edU, 0x0f691da9U,
205 0x4b616d60U, 0x8fa9e819U, 0x3f390333U, 0x6f62fad6U, 0x5a32b67cU,
206 0x3be6f1c3U, 0x05851103U, 0xff28828dU, 0xaa43a56aU, 0x075d7dd5U,
207 0x248c4b7eU, 0x52fde3ebU, 0xf72e2edaU, 0x5da6f75fU, 0x2f5148d9U,
208 0xcae2aeaeU, 0xfda6f3e5U, 0xff60d8ffU, 0x2adc02d2U, 0x1dbdbd4cU,
209 0xd410ad7cU, 0x8c284aaeU, 0x392ef8e0U, 0x37d48b3aU, 0x6792fe9dU,
210 0xad32ddfaU, 0x1545f24eU, 0x3a260f73U, 0xb724ca36U, 0xc510d751U,
211 0x4f8df992U, 0x000b8b37U, 0x292e9b3dU, 0xa32f250fU, 0x8263d144U,
212 0xfcae0516U, 0x1eae2183U, 0xd4af2027U, 0xc64afae3U, 0xe7b34fe4U,
213 0xdf864aeaU, 0x80cc71c5U, 0x0e814df3U, 0x66cc5f41U, 0x853a497aU,
214 0xa2886213U, 0x5e34a2eaU, 0x0f53ba47U, 0x718c484aU, 0xfa0f0b12U,
215 0x33cc59ffU, 0x72b48e07U, 0x8b6f57bcU, 0x29cf886dU, 0x1950955bU,
216 0xcd52910cU, 0x4cecef65U, 0x05c2cbfeU, 0x49df4f6aU, 0x1f4c3f34U,
217 0xfadc1a09U, 0xf2d65a24U, 0x117f5594U, 0xde3a84e6U, 0x48db3024U,
218 0xd10ca9b5U
219 };
220
221 /* buzhash by Robert C. Uzgalis */
222 /* General hash functions. Technical Report TR-92-01, The University
223 of Hong Kong, 1993 */
224
buzhash(unsigned char * buf)225 static unsigned int buzhash(unsigned char *buf)
226 {
227 unsigned int x = 0x83d31df4U;
228 int i;
229 for (i = HSIZE; i != 0; i--)
230 x = (x << 1) ^ (x & (1 << 31) ? 1 : 0) ^ noise[*buf++];
231 return x;
232 }
233
234 static unsigned int primes[] =
235 {
236 65537, 98317, 147481, 221227, 331841, 497771, 746659, 1120001,
237 1680013, 2520031, 3780053, 5670089, 8505137, 12757739, 19136609,
238 28704913, 43057369, 64586087, 96879131, 145318741, 217978121,
239 326967209, 490450837, 735676303, 1103514463, 1655271719,
240 0xffffffff
241 };
242
243 struct hash_data {
244 bsuint *hash;
245 unsigned int prime;
246 };
247
hash_create(unsigned char * buf,bsuint len)248 static void *hash_create(unsigned char *buf, bsuint len)
249 {
250 struct hash_data *hd;
251 bsuint *hash;
252 unsigned char *bp = buf;
253 bsuint off;
254 unsigned int s;
255 unsigned int prime;
256 unsigned int num;
257
258 hd = malloc(sizeof(*hd));
259 if (!hd)
260 return 0;
261 #ifdef BSDIFF_64BIT
262 /* this is a 16GB limit for HSIZESHIFT == 4 */
263 if (len >= (bsuint)(0xffffffff / 4) << HSIZESHIFT)
264 return 0;
265 #endif
266 num = (len + HSIZE - 1) >> HSIZESHIFT;
267 prime = num * 4;
268 for (s = 0; s < sizeof(primes)/sizeof(*primes) - 1; s++)
269 if (prime < primes[s])
270 break;
271 prime = primes[s];
272 hash = calloc(prime, sizeof(*hash));
273 if (!hash)
274 {
275 free(hd);
276 return 0;
277 }
278 for (off = 0; len >= HSIZE; off += HSIZE, buf += HSIZE, len -= HSIZE)
279 {
280 s = buzhash(buf) % prime;
281 if (hash[s])
282 {
283 if (hash[(s == prime - 1) ? 0 : s + 1])
284 continue;
285 if (!memcmp(buf, bp + hash[s], HSIZE))
286 continue;
287 s = (s == prime - 1) ? 0 : s + 1;
288 }
289 hash[s] = off + 1;
290 }
291 hd->hash = hash;
292 hd->prime = prime;
293 return hd;
294 }
295
hash_free(void * data)296 static void hash_free(void *data)
297 {
298 struct hash_data *hd = data;
299 free(hd->hash);
300 free(hd);
301 }
302
hash_findnext(void * data,unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen,bsuint lastoffset,bsuint scan,bsuint * posp,bsuint * lenp)303 static bsuint hash_findnext(void *data, unsigned char *old, bsuint oldlen, unsigned char *new, bsuint newlen, bsuint lastoffset, bsuint scan, bsuint *posp, bsuint *lenp)
304 {
305 struct hash_data *hd = data;
306 bsuint scanstart, oldscore, oldscorenum, oldscorestart;
307 bsuint pos, len;
308 bsuint lscan, lpos, llen;
309 bsuint i, ss, scsc;
310 unsigned int ssx;
311 unsigned int prime;
312 bsuint *hash;
313
314 hash = hd->hash;
315 prime = hd->prime;
316 scanstart = scan;
317 oldscore = oldscorenum = oldscorestart = 0;
318 ssx = scan <= newlen - HSIZE ? buzhash(new + scan) : 0;
319 pos = 0;
320 len = 0;
321 lpos = lscan = llen = 0;
322 for (;;)
323 {
324 if (scan >= newlen - HSIZE)
325 {
326 if (llen >= 32)
327 goto gotit;
328 break;
329 }
330 ss = ssx % prime;
331 pos = hash[ss];
332 if (!pos)
333 {
334 unsigned int oldc;
335 scannext:
336 if (llen >= 32 && scan - lscan >= HSIZE)
337 goto gotit;
338 ssx = (ssx << 1) ^ (ssx & (1 << 31) ? 1 : 0) ^ noise[new[scan + HSIZE]];
339 oldc = noise[new[scan]] ^ (0x83d31df4U ^ 0x07a63be9U);
340 #if HSIZE % 32 != 0
341 ssx ^= (oldc << (HSIZE % 32)) ^ (oldc >> (32 - (HSIZE % 32)));
342 #else
343 ssx ^= oldc;
344 #endif
345 scan++;
346 continue;
347 }
348 pos--;
349 if (memcmp(old + pos, new + scan, HSIZE))
350 {
351 pos = hash[ss == prime - 1 ? 0 : ss + 1];
352 if (!pos)
353 goto scannext;
354 pos--;
355 if (memcmp(old + pos, new + scan, HSIZE))
356 goto scannext;
357 }
358 len = matchlen(old + pos + HSIZE, oldlen - pos - HSIZE, new + scan + HSIZE, newlen - scan - HSIZE) + HSIZE;
359 if (scan + HSIZE * 4 <= newlen)
360 {
361 unsigned int ssx2;
362 bsuint len2, pos2;
363 ssx2 = buzhash(new + scan + HSIZE * 3) % prime;
364 pos2 = hash[ssx2];
365 if (pos2)
366 {
367 if (memcmp(new + scan + HSIZE *3, old + pos2 - 1, HSIZE))
368 {
369 ssx2 = (ssx2 == prime) ? 0 : ssx2 + 1;
370 pos2 = hash[ssx2];
371 }
372 }
373 if (pos2 > 1 + HSIZE*3)
374 {
375 pos2 = pos2 - 1 - HSIZE*3;
376 if (pos2 != pos)
377 {
378 len2 = matchlen(old + pos2, oldlen - pos2, new + scan, newlen - scan);
379 if (len2 > len)
380 {
381 pos = pos2;
382 len = len2;
383 }
384 }
385 }
386 }
387 if (len > llen)
388 {
389 llen = len;
390 lpos = pos;
391 lscan = scan;
392 }
393 goto scannext;
394 gotit:
395 scan = lscan;
396 len = llen;
397 pos = lpos;
398 if (scan + lastoffset == pos)
399 {
400 scan += len;
401 scanstart = scan;
402 if (scan + HSIZE < newlen)
403 ssx = buzhash(new + scan);
404 llen = 0;
405 continue;
406 }
407 for (i = scan - scanstart; i && pos && scan && old[pos - 1] == new[scan - 1]; i--)
408 {
409 len++;
410 pos--;
411 scan--;
412 }
413 if (oldscorestart + 1 != scan || oldscorenum == 0 || oldscorenum - 1 > len)
414 {
415 oldscore = 0;
416 for (scsc = scan; scsc<scan+len; scsc++)
417 if ((scsc+lastoffset<oldlen) && (old[scsc+lastoffset] == new[scsc]))
418 oldscore++;
419 oldscorestart = scan;
420 oldscorenum = len;
421 }
422 else
423 {
424 if (oldscorestart + lastoffset < oldlen && old[oldscorestart + lastoffset] == new[oldscorestart])
425 oldscore--;
426 oldscorestart++;
427 oldscorenum--;
428 for (scsc = oldscorestart + oldscorenum; oldscorenum < len; scsc++)
429 {
430 if ((scsc+lastoffset<oldlen) && (old[scsc+lastoffset] == new[scsc]))
431 oldscore++;
432 oldscorenum++;
433 }
434 }
435 if (len - oldscore >= 32)
436 break;
437 if (len > HSIZE * 3 + 32)
438 scan += len - (HSIZE * 3 + 32);
439 if (scan <= lscan)
440 scan = lscan + 1;
441 scanstart = scan;
442 if (scan + HSIZE < newlen)
443 ssx = buzhash(new + scan);
444 llen = 0;
445 }
446 if (scan >= newlen - HSIZE)
447 {
448 scan = newlen;
449 pos = 0;
450 len = 0;
451 }
452 *posp = pos;
453 *lenp = len;
454 return scan;
455 }
456
457 #endif /* BSDIFF_NO_HASH */
458
459 #ifndef BSDIFF_NO_SUF
460
461 /******************************************************************/
462 /* */
463 /* suffix list method */
464 /* */
465 /******************************************************************/
466
467 struct suf_data {
468 bsint *I;
469 bsint F[257];
470 };
471
suf_split(bsint * I,bsint * V,bsint start,bsint len,bsint h)472 static void suf_split(bsint *I, bsint *V, bsint start, bsint len, bsint h)
473 {
474 bsint i, j, k, x, tmp, jj, kk;
475
476 if (len < 16)
477 {
478 for (k = start; k < start + len; k += j)
479 {
480 j = 1;
481 x = V[I[k] + h];
482 for (i = 1; k + i < start + len; i++)
483 {
484 if (V[I[k + i] + h] < x)
485 {
486 x = V[I[k + i] + h];
487 j = 0;
488 }
489 if(V[I[k + i] + h] == x)
490 {
491 tmp = I[k+j];
492 I[k+j] = I[k+i];
493 I[k+i] = tmp;
494 j++;
495 }
496 }
497 for(i = 0; i < j; i++)
498 V[I[k+i]] = k + j - 1;
499 if(j == 1)
500 I[k] = -1;
501 }
502 return;
503 }
504
505 x = V[I[start + len / 2] + h];
506 jj = 0;
507 kk = 0;
508 for (i = start; i < start + len; i++)
509 {
510 if(V[I[i] + h] < x)
511 jj++;
512 if(V[I[i] + h] == x)
513 kk++;
514 };
515 jj += start;
516 kk += jj;
517
518 i = start;
519 j = 0;
520 k = 0;
521 while (i < jj)
522 {
523 if(V[I[i]+h]<x)
524 {
525 i++;
526 }
527 else if(V[I[i]+h]==x)
528 {
529 tmp = I[i];
530 I[i] = I[jj + j];
531 I[jj + j] = tmp;
532 j++;
533 }
534 else
535 {
536 tmp = I[i];
537 I[i] = I[kk + k];
538 I[kk + k] = tmp;
539 k++;
540 }
541 }
542 while (jj + j < kk)
543 {
544 if(V[I[jj+j]+h]==x)
545 {
546 j++;
547 }
548 else
549 {
550 tmp = I[jj + j];
551 I[jj + j] = I[kk + k];
552 I[kk + k] = tmp;
553 k++;
554 }
555 }
556
557 if(jj > start)
558 suf_split(I, V, start, jj-start, h);
559
560 for (i=0; i < kk - jj; i++)
561 V[I[jj + i]] = kk - 1;
562 if (jj == kk - 1)
563 I[jj] = -1;
564
565 if (start + len > kk)
566 suf_split(I, V, kk, start + len - kk, h);
567 }
568
suf_bucketsort(bsint * V,bsint * I,bsint n,bsint s)569 static bsint suf_bucketsort(bsint *V, bsint *I, bsint n, bsint s)
570 {
571 bsint c, d, g, i, j;
572 bsint *B;
573
574 B = calloc(s, sizeof(bsint));
575 if (!B)
576 return 0;
577 for (i = n - 1; i >= 0; i--)
578 {
579 c = V[i];
580 V[i] = B[c];
581 B[c] = i + 1;
582 }
583 for (j = s - 1, i = n; i; j--)
584 {
585 for (d = B[j], g = i; d; i--)
586 {
587 c = d - 1;
588 d = V[c];
589 V[c] = g;
590 I[i] = !d && g == i ? -1 : c;
591 }
592 }
593 V[n] = 0;
594 I[0] = -1;
595 free(B);
596 return 1;
597 }
598
suf_create(unsigned char * buf,bsuint ulen)599 static void *suf_create(unsigned char *buf, bsuint ulen)
600 {
601 struct suf_data *sd;
602 bsint *V, *I;
603 bsint i, h, l, oldv, s, len;
604
605 len = ulen;
606 if (len < 0)
607 return 0;
608 sd = malloc(sizeof(*sd));
609 if (!sd)
610 return 0;
611 V = malloc(sizeof(bsint) * (len + 3));
612 if (!V)
613 {
614 free(sd);
615 return 0;
616 }
617 I = malloc(sizeof(bsint) * (len + 3));
618 if (!I)
619 {
620 free(V);
621 free(sd);
622 return 0;
623 }
624 memset(sd->F, 0, sizeof(sd->F));
625 if (len >= 0x1000000)
626 {
627 s = 0x1000002;
628 sd->F[buf[0]]++;
629 sd->F[buf[1]]++;
630 oldv = buf[0] << 8 | buf[1];
631 for (i = 2; i < len; i++)
632 {
633 sd->F[buf[i]]++;
634 oldv = (oldv & 0xffff) << 8 | buf[i];
635 V[i - 2] = oldv + 2;
636 }
637 oldv = (oldv & 0xffff) << 8;
638 V[len - 2] = oldv + 2;
639 oldv = (oldv & 0xffff) << 8;
640 V[len - 1] = oldv + 2;
641 len += 2;
642 V[len - 2] = 1;
643 V[len - 1] = 0;
644 h = 3;
645 }
646 else
647 {
648 s = 0x10001;
649 sd->F[buf[0]]++;
650 oldv = buf[0];
651 for (i = 1; i < len; i++)
652 {
653 sd->F[buf[i]]++;
654 oldv = (oldv & 0xff) << 8 | buf[i];
655 V[i - 1] = oldv + 1;
656 }
657 oldv = (oldv & 0xff) << 8;
658 V[len - 1] = oldv + 1;
659 len += 1;
660 V[len - 1] = 0;
661 h = 2;
662 }
663 oldv = len;
664 for (i = 256; i > 0; i--)
665 {
666 sd->F[i] = oldv;
667 oldv -= sd->F[i - 1];
668 }
669 sd->F[0] = oldv;
670 if (!suf_bucketsort(V, I, len, s))
671 {
672 free(I);
673 free(V);
674 free(sd);
675 return 0;
676 }
677 for(; I[0] != -(len + 1); h += h)
678 {
679 l=0;
680 for (i = 0; i < len + 1; )
681 {
682 if (I[i] < 0)
683 {
684 l -= I[i];
685 i -= I[i];
686 }
687 else
688 {
689 if(l)
690 I[i - l] = -l;
691 l = V[I[i]] + 1 - i;
692 suf_split(I, V, i, l, h);
693 i += l;
694 l = 0;
695 }
696 }
697 if (l)
698 I[i - l] = -l;
699 }
700 for (i = 0; i < len + 1; i++)
701 I[V[i]] = i;
702 free(V);
703 sd->I = I;
704 return sd;
705 }
706
suf_free(void * data)707 static void suf_free(void *data)
708 {
709 struct suf_data *sd = data;
710 free(sd->I);
711 free(sd);
712 }
713
suf_bsearch(bsint * I,unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen,bsuint st,bsuint en,bsuint * posp)714 static bsuint suf_bsearch(bsint *I, unsigned char *old, bsuint oldlen, unsigned char *new, bsuint newlen, bsuint st, bsuint en, bsuint *posp)
715 {
716 bsuint x, y;
717 if (st > en)
718 return 0;
719 while (en - st >= 2)
720 {
721 x = st + (en - st) / 2;
722 if (memcmp(old + I[x], new, oldlen - I[x] < newlen ? oldlen - I[x] : newlen) < 0)
723 st = x;
724 else
725 en = x;
726 }
727 x = matchlen(old + I[st], oldlen - I[st], new, newlen);
728 y = matchlen(old + I[en], oldlen - I[en], new, newlen);
729 *posp = x > y ? I[st] : I[en];
730 return x > y ? x : y;
731 }
732
suf_findnext(void * data,unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen,bsuint lastoffset,bsuint scan,bsuint * posp,bsuint * lenp)733 static bsuint suf_findnext(void *data, unsigned char *old, bsuint oldlen, unsigned char *new, bsuint newlen, bsuint lastoffset, bsuint scan, bsuint *posp, bsuint *lenp)
734 {
735 bsuint scsc, oldscore, len;
736
737 struct suf_data *sd = data;
738 len = 0;
739 *posp = 0;
740 scsc = scan;
741 oldscore = 0;
742 while (scan < newlen)
743 {
744 len = suf_bsearch(sd->I, old, oldlen, new + scan, newlen - scan, sd->F[new[scan]] + 1, sd->F[new[scan] + 1], posp);
745 for (; scsc < scan + len; scsc++)
746 if (scsc + lastoffset < oldlen && old[scsc + lastoffset] == new[scsc])
747 oldscore++;
748 if (len && len == oldscore)
749 {
750 scan += len;
751 scsc = scan;
752 oldscore = 0;
753 continue;
754 }
755 if (len > oldscore + 32)
756 break;
757 if (scan + lastoffset < oldlen && old[scan + lastoffset] == new[scan])
758 oldscore--;
759 scan++;
760 }
761 *lenp = len;
762 return scan;
763 }
764
765 #endif /* BSDIFF_NO_SUF */
766
767 /******************************************************************/
768
769 struct deltamode {
770 int mode;
771 void *(*create)(unsigned char *buf, bsuint len);
772 bsuint (*findnext)(void *data, unsigned char *old, bsuint oldlen, unsigned char *new, bsuint newlen, bsuint lastoffset, bsuint scan, bsuint *posp, bsuint *lenp);
773 void (*free)(void *data);
774 };
775
776 struct deltamode deltamodes[] =
777 {
778 #ifndef BSDIFF_NO_SUF
779 {DELTAMODE_SUF, suf_create, suf_findnext, suf_free},
780 #endif
781 #ifndef BSDIFF_NO_HASH
782 {DELTAMODE_HASH, hash_create, hash_findnext, hash_free},
783 #endif
784 };
785
addoff(struct bzblock * bzi,bsint off)786 static void addoff(struct bzblock *bzi, bsint off)
787 {
788 int i, sign = 0;
789 unsigned char b[8];
790
791 if (off < 0)
792 {
793 sign = 0x80;
794 off = -off;
795 }
796 for (i = 0; i < 7; i++)
797 {
798 b[i] = off & 0xff;
799 off >>= 8;
800 }
801 b[7] = sign | (off & 0xff);
802 blockwrite(bzi, b, 8);
803 }
804
mkdiff(int mode,unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen,struct instr ** instrp,int * instrlenp,unsigned char ** instrblkp,unsigned int * instrblklenp,unsigned char ** addblkp,unsigned int * addblklenp,unsigned char ** extrablkp,unsigned int * extrablklenp)805 void mkdiff(int mode,
806 unsigned char *old, bsuint oldlen,
807 unsigned char *new, bsuint newlen,
808 struct instr **instrp, int *instrlenp,
809 unsigned char **instrblkp, unsigned int *instrblklenp,
810 unsigned char **addblkp, unsigned int *addblklenp,
811 unsigned char **extrablkp, unsigned int *extrablklenp)
812 {
813 struct instr *instr = 0;
814 int instrlen = 0;
815 bsuint i, scan, pos, len;
816 bsuint lastscan, lastpos, lastoffset;
817 bsuint s, Sf, lenf, Sb, lenb;
818 bsuint overlap, Ss, lens;
819 struct bzblock *bza = 0;
820 struct bzblock *bze = 0;
821 struct bzblock *bzi = 0;
822 void *data;
823 struct deltamode *dm;
824 int noaddblk = 0;
825
826 if ((mode & DELTAMODE_NOADDBLK) != 0)
827 {
828 mode ^= DELTAMODE_NOADDBLK;
829 noaddblk = 1;
830 }
831 dm = 0;
832 for (i = 0; i < sizeof(deltamodes)/sizeof(*deltamodes); i++)
833 {
834 dm = deltamodes + i;
835 if (deltamodes[i].mode == mode)
836 break;
837 }
838 if (!dm)
839 {
840 fprintf(stderr, "mkdiff: no mode installed\n");
841 exit(1);
842 }
843 if (addblkp)
844 {
845 *addblkp = 0;
846 *addblklenp = 0;
847 }
848 if (extrablkp)
849 {
850 *extrablkp = 0;
851 *extrablklenp = 0;
852 }
853 if (instrblkp)
854 {
855 *instrblkp = 0;
856 *instrblklenp = 0;
857 }
858 if (!noaddblk && addblkp && (bza = blockopen()) == 0)
859 {
860 fprintf(stderr, "mkdiff: could not create compression stream\n");
861 exit(1);
862 }
863 if (extrablkp && (bze = blockopen()) == 0)
864 {
865 fprintf(stderr, "mkdiff: could not create compression stream\n");
866 exit(1);
867 }
868 if (instrblkp && (bzi = blockopen()) == 0)
869 {
870 fprintf(stderr, "mkdiff: could not create compression stream\n");
871 exit(1);
872 }
873 data = dm->create(old, oldlen);
874 if (!data)
875 {
876 fprintf(stderr, "mkdiff: could not create data\n");
877 exit(1);
878 }
879
880 scan = 0; len = 0; lenf = 0;
881 lastscan = 0; lastpos = 0;
882
883 while (lastscan < newlen)
884 {
885 /* search for data matching something in new[scan...]
886 * input:
887 * old/oldlen, new/newlen: search data
888 * lastoffset: lastpos - lastscan (because we want to find a different match)
889 * returns:
890 * scan: start of match in new[]
891 * pos: start of match in old[]
892 * len: length of match
893 * (if no match is found, scan = newlen, len = 0)
894 */
895 lastoffset = noaddblk ? oldlen : lastpos - lastscan;
896 scan = dm->findnext(data, old, oldlen, new, newlen, lastoffset, scan, &pos, &len);
897
898 /* extand old match forward */
899 if (!noaddblk)
900 {
901 s = Sf = lenf = 0;
902 for (i = 0; lastscan + i < scan && lastpos + i < oldlen; )
903 {
904 if (old[lastpos+i] == new[lastscan+i])
905 {
906 s++;
907 i++;
908 if (s >= Sf + i - s)
909 {
910 Sf = 2 * s - i;
911 lenf = i;
912 }
913 }
914 else
915 i++;
916 }
917 }
918 else
919 {
920 for (i = 0; lastscan + i < scan && lastpos + i < oldlen; i++)
921 if (old[lastpos+i] != new[lastscan+i])
922 break;
923 lenf = i;
924 }
925
926 lenb = 0;
927 /* extand new match backward, scan == newlen means we're going to finish */
928 if (!noaddblk && scan < newlen)
929 {
930 s = Sb = 0;
931 for(i = 1; scan >= lastscan + i && pos >= i; i++)
932 {
933 if (old[pos-i] == new[scan-i])
934 {
935 s++;
936 if(s >= Sb + i - s)
937 {
938 Sb = 2 * s - i;
939 lenb = i;
940 }
941 }
942 }
943 }
944
945 /* if there is an overlap find good place to split */
946 if (lastscan + lenf > scan - lenb)
947 {
948 overlap = (lastscan + lenf) - (scan - lenb);
949 s = Sb = Ss = lens = 0;
950 for (i = 0; i < overlap; i++)
951 {
952 if(new[lastscan + lenf - overlap + i] == old[lastpos + lenf - overlap + i])
953 s++;
954 if(new[scan - lenb + i] == old[pos - lenb + i])
955 Sb++;
956 if (s > Sb && s - Sb > Ss)
957 {
958 Ss = s - Sb;
959 lens = i + 1;
960 }
961 }
962 lenf -= overlap - lens;
963 lenb -= lens;
964 }
965
966 /*
967 * lastscan scan
968 * |--- lenf ---| |- lenb -|-- len --|
969 * | | | | |
970 * new: ------+=======-----+----+--------+=========+--
971 * / \
972 * / |
973 * old: ---+=======-----------------------+=========---
974 * | |
975 * lastpos pos
976 */
977
978 if (instrp)
979 {
980 if ((instrlen & 31) == 0)
981 {
982 if (instr)
983 instr = realloc(instr, sizeof(*instr) * (instrlen + 32));
984 else
985 instr = malloc(sizeof(*instr) * (instrlen + 32));
986 if (!instr)
987 {
988 fprintf(stderr, "out of memory\n");
989 exit(1);
990 }
991 }
992 instr[instrlen].copyout = lenf;
993 instr[instrlen].copyin = (scan - lenb) - (lastscan + lenf);
994 instr[instrlen].copyinoff = lastscan + lenf;
995 instr[instrlen].copyoutoff = lastpos;
996 instrlen++;
997 }
998 if (bzi)
999 {
1000 addoff(bzi, lenf);
1001 addoff(bzi, (scan - lenb) - (lastscan + lenf));
1002 addoff(bzi, (pos - lenb) - (lastpos + lenf));
1003 }
1004 if (bze)
1005 {
1006 i = lastscan + lenf;
1007 s = (scan - lenb) - i;
1008 while (s > 0)
1009 {
1010 int len2 = s > 0x40000000 ? 0x40000000 : s;
1011 blockwrite(bze, new + i, len2);
1012 i += len2;
1013 s -= len2;
1014 }
1015 }
1016 if (bza)
1017 {
1018 while (lenf > 0)
1019 {
1020 unsigned char addblk[4096];
1021 int len2;
1022 len2 = lenf > 4096 ? 4096 : lenf;
1023 for (i = 0; i < len2; i++)
1024 addblk[i] = new[lastscan + i] - old[lastpos + i];
1025 if (blockwrite(bza, addblk, len2) != len2)
1026 {
1027 fprintf(stderr, "could not append to data block\n");
1028 exit(1);
1029 }
1030 lastscan += len2;
1031 lastpos += len2;
1032 lenf -= len2;
1033 }
1034 }
1035
1036 /* advance */
1037 lastscan = scan - lenb;
1038 lastpos = pos - lenb;
1039 scan += len;
1040 }
1041 if (bza && blockclose(bza, addblkp, addblklenp))
1042 {
1043 fprintf(stderr, "could not close data block\n");
1044 exit(1);
1045 }
1046 if (bze && blockclose(bze, extrablkp, extrablklenp))
1047 {
1048 fprintf(stderr, "could not close extra block\n");
1049 exit(1);
1050 }
1051 if (bzi && blockclose(bzi, instrblkp, instrblklenp))
1052 {
1053 fprintf(stderr, "could not close instr block\n");
1054 exit(1);
1055 }
1056 if (instrp)
1057 {
1058 *instrp = instr;
1059 *instrlenp = instrlen;
1060 }
1061 dm->free(data);
1062 }
1063
1064 struct stepdata {
1065 struct deltamode *dm;
1066 void *data;
1067 int noaddblk;
1068 };
1069
1070 void *
mkdiff_step_setup(int mode)1071 mkdiff_step_setup(int mode)
1072 {
1073 int noaddblk = 0;
1074 struct stepdata *sd;
1075 struct deltamode *dm;
1076 int i;
1077
1078 if ((mode & DELTAMODE_NOADDBLK) != 0)
1079 {
1080 mode ^= DELTAMODE_NOADDBLK;
1081 noaddblk = 1;
1082 }
1083 dm = 0;
1084 for (i = 0; i < sizeof(deltamodes)/sizeof(*deltamodes); i++)
1085 {
1086 dm = deltamodes + i;
1087 if (deltamodes[i].mode == mode)
1088 break;
1089 }
1090 if (!dm)
1091 {
1092 fprintf(stderr, "mkdiff: no mode installed\n");
1093 exit(1);
1094 }
1095 sd = calloc(1, sizeof(*sd));
1096 if (!sd)
1097 return 0;
1098 sd->dm = dm;
1099 sd->data = 0;
1100 sd->noaddblk = noaddblk;
1101 return sd;
1102 }
1103
1104 void
mkdiff_step(void * sdata,unsigned char * old,bsuint oldlen,unsigned char * new,bsuint newlen,struct instr * instr,bsuint * scanp,bsuint * lastposp,bsuint * lastscanp)1105 mkdiff_step(void *sdata,
1106 unsigned char *old, bsuint oldlen,
1107 unsigned char *new, bsuint newlen,
1108 struct instr *instr,
1109 bsuint *scanp, bsuint *lastposp, bsuint *lastscanp)
1110
1111 {
1112 struct stepdata *sd = sdata;
1113 struct deltamode *dm = sd->dm;
1114 bsuint scan, lastpos, lastscan;
1115 bsuint pos, len, lastoffset;
1116 bsuint s, Sf, lenf, Sb, lenb;
1117 bsuint overlap, Ss, lens;
1118 bsuint i;
1119
1120 if (!sd->data)
1121 {
1122 sd->data = dm->create(old, oldlen);
1123 if (!sd->data)
1124 {
1125 fprintf(stderr, "mkdiff: could not create data\n");
1126 exit(1);
1127 }
1128 }
1129 scan = *scanp;
1130 lastscan = *lastscanp;
1131 lastpos = *lastposp;
1132
1133 lastoffset = sd->noaddblk ? oldlen : lastpos - lastscan;
1134 scan = dm->findnext(sd->data, old, oldlen, new, newlen, lastoffset, scan, &pos, &len);
1135 if (!sd->noaddblk)
1136 {
1137 s = Sf = lenf = 0;
1138 for (i = 0; lastscan + i < scan && lastpos + i < oldlen; )
1139 {
1140 if (old[lastpos+i] == new[lastscan+i])
1141 {
1142 s++;
1143 i++;
1144 if (s >= Sf + i - s)
1145 {
1146 Sf = 2 * s - i;
1147 lenf = i;
1148 }
1149 }
1150 else
1151 i++;
1152 }
1153 }
1154 else
1155 {
1156 for (i = 0; lastscan + i < scan && lastpos + i < oldlen; i++)
1157 if (old[lastpos+i] != new[lastscan+i])
1158 break;
1159 lenf = i;
1160 }
1161
1162 lenb = 0;
1163 /* extand new match backward, scan == newlen means we're going to finish */
1164 if (!sd->noaddblk && scan < newlen)
1165 {
1166 s = Sb = 0;
1167 for(i = 1; scan >= lastscan + i && pos >= i; i++)
1168 {
1169 if (old[pos-i] == new[scan-i])
1170 {
1171 s++;
1172 if(s >= Sb + i - s)
1173 {
1174 Sb = 2 * s - i;
1175 lenb = i;
1176 }
1177 }
1178 }
1179 }
1180
1181 /* if there is an overlap find good place to split */
1182 if (lastscan + lenf > scan - lenb)
1183 {
1184 overlap = (lastscan + lenf) - (scan - lenb);
1185 s = Sb = Ss = lens = 0;
1186 for (i = 0; i < overlap; i++)
1187 {
1188 if(new[lastscan + lenf - overlap + i] == old[lastpos + lenf - overlap + i])
1189 s++;
1190 if(new[scan - lenb + i] == old[pos - lenb + i])
1191 Sb++;
1192 if (s > Sb && s - Sb > Ss)
1193 {
1194 Ss = s - Sb;
1195 lens = i + 1;
1196 }
1197 }
1198 lenf -= overlap - lens;
1199 lenb -= lens;
1200 }
1201
1202 /* printf("MATCH len %d @ %d:%d-%d-%d:%d -- %d\n", len, lastscan, lenf, (scan - lenb) - (lastscan + lenf), lenb, scan, pos); */
1203
1204 instr->copyout = lenf;
1205 instr->copyin = (scan - lenb) - (lastscan + lenf);
1206 instr->copyinoff = lastscan + lenf;
1207 instr->copyoutoff = lastpos;
1208 *scanp = scan + len;
1209 *lastscanp = scan - lenb;
1210 if (scan != newlen)
1211 *lastposp = pos - lenb;
1212 else
1213 *lastposp = lastpos + lenf;
1214 }
1215
1216 void
mkdiff_step_freedata(void * sdata)1217 mkdiff_step_freedata(void *sdata)
1218 {
1219 struct stepdata *sd = sdata;
1220 struct deltamode *dm = sd->dm;
1221 if (sd->data)
1222 dm->free(sd->data);
1223 sd->data = 0;
1224 }
1225
1226 void
mkdiff_step_free(void * sdata)1227 mkdiff_step_free(void *sdata)
1228 {
1229 struct stepdata *sd = sdata;
1230 struct deltamode *dm = sd->dm;
1231 if (sd->data)
1232 dm->free(sd->data);
1233 free(sd);
1234 }
1235