1 /* vim:set ts=8 sw=8 sts=8 noet: */
2 /*
3 bsdiff.c -- Binary patch generator.
4
5 Copyright 2003 Colin Percival
6
7 For the terms under which this work may be distributed, please see
8 the adjoining file "LICENSE".
9
10 ChangeLog:
11 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
12 values throughout.
13 --Benjamin Smedberg <benjamin@smedbergs.us>
14 2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
15 provided by libbz2.
16 --Darin Fisher <darin@meer.net>
17 */
18
19 #include "bspatch.h"
20
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <fcntl.h>
25 #include <stdarg.h>
26 #ifdef XP_WIN
27 #include <io.h>
28 #include <winsock2.h>
29 #define open _open
30 #define close _close
31 #define read _read
32 #define lseek _lseek
33 #define write _write
34 #else
35 #include <unistd.h>
36 #include <arpa/inet.h>
37 #define _O_BINARY 0
38 #endif
39
40 #include "crctable.h"
41
42 #undef MIN
43 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
44
45 /*---------------------------------------------------------------------------*/
46
47 /* This variable lives in libbz2. It's declared in bzlib_private.h, so we just
48 * declare it here to avoid including that entire header file.
49 */
50 extern unsigned int BZ2_crc32Table[256];
51
52 static unsigned int
crc32(const unsigned char * buf,unsigned int len)53 crc32(const unsigned char *buf, unsigned int len)
54 {
55 unsigned int crc = 0xffffffffL;
56
57 const unsigned char *end = buf + len;
58 for (; buf != end; ++buf)
59 crc = (crc << 8) ^ BZ2_crc32Table[(crc >> 24) ^ *buf];
60
61 crc = ~crc;
62 return crc;
63 }
64
65 /*---------------------------------------------------------------------------*/
66
67 static void
reporterr(int e,const char * fmt,...)68 reporterr(int e, const char *fmt, ...)
69 {
70 if (fmt) {
71 va_list args;
72 va_start(args, fmt);
73 vfprintf(stderr, fmt, args);
74 va_end(args);
75 }
76
77 exit(e);
78 }
79
80 static void
split(int32_t * I,int32_t * V,int32_t start,int32_t len,int32_t h)81 split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h)
82 {
83 int32_t i,j,k,x,tmp,jj,kk;
84
85 if(len<16) {
86 for(k=start;k<start+len;k+=j) {
87 j=1;x=V[I[k]+h];
88 for(i=1;k+i<start+len;i++) {
89 if(V[I[k+i]+h]<x) {
90 x=V[I[k+i]+h];
91 j=0;
92 };
93 if(V[I[k+i]+h]==x) {
94 tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
95 j++;
96 };
97 };
98 for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
99 if(j==1) I[k]=-1;
100 };
101 return;
102 };
103
104 x=V[I[start+len/2]+h];
105 jj=0;kk=0;
106 for(i=start;i<start+len;i++) {
107 if(V[I[i]+h]<x) jj++;
108 if(V[I[i]+h]==x) kk++;
109 };
110 jj+=start;kk+=jj;
111
112 i=start;j=0;k=0;
113 while(i<jj) {
114 if(V[I[i]+h]<x) {
115 i++;
116 } else if(V[I[i]+h]==x) {
117 tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
118 j++;
119 } else {
120 tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
121 k++;
122 };
123 };
124
125 while(jj+j<kk) {
126 if(V[I[jj+j]+h]==x) {
127 j++;
128 } else {
129 tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
130 k++;
131 };
132 };
133
134 if(jj>start) split(I,V,start,jj-start,h);
135
136 for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
137 if(jj==kk-1) I[jj]=-1;
138
139 if(start+len>kk) split(I,V,kk,start+len-kk,h);
140 }
141
142 static void
qsufsort(int32_t * I,int32_t * V,unsigned char * old,int32_t oldsize)143 qsufsort(int32_t *I,int32_t *V,unsigned char *old,int32_t oldsize)
144 {
145 int32_t buckets[256];
146 int32_t i,h,len;
147
148 for(i=0;i<256;i++) buckets[i]=0;
149 for(i=0;i<oldsize;i++) buckets[old[i]]++;
150 for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
151 for(i=255;i>0;i--) buckets[i]=buckets[i-1];
152 buckets[0]=0;
153
154 for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
155 I[0]=oldsize;
156 for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
157 V[oldsize]=0;
158 for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
159 I[0]=-1;
160
161 for(h=1;I[0]!=-(oldsize+1);h+=h) {
162 len=0;
163 for(i=0;i<oldsize+1;) {
164 if(I[i]<0) {
165 len-=I[i];
166 i-=I[i];
167 } else {
168 if(len) I[i-len]=-len;
169 len=V[I[i]]+1-i;
170 split(I,V,i,len,h);
171 i+=len;
172 len=0;
173 };
174 };
175 if(len) I[i-len]=-len;
176 };
177
178 for(i=0;i<oldsize+1;i++) I[V[i]]=i;
179 }
180
181 static int32_t
matchlen(unsigned char * old,int32_t oldsize,unsigned char * newbuf,int32_t newsize)182 matchlen(unsigned char *old,int32_t oldsize,unsigned char *newbuf,int32_t newsize)
183 {
184 int32_t i;
185
186 for(i=0;(i<oldsize)&&(i<newsize);i++)
187 if(old[i]!=newbuf[i]) break;
188
189 return i;
190 }
191
192 static int32_t
search(int32_t * I,unsigned char * old,int32_t oldsize,unsigned char * newbuf,int32_t newsize,int32_t st,int32_t en,int32_t * pos)193 search(int32_t *I,unsigned char *old,int32_t oldsize,
194 unsigned char *newbuf,int32_t newsize,int32_t st,int32_t en,int32_t *pos)
195 {
196 int32_t x,y;
197
198 if(en-st<2) {
199 x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
200 y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
201
202 if(x>y) {
203 *pos=I[st];
204 return x;
205 } else {
206 *pos=I[en];
207 return y;
208 }
209 };
210
211 x=st+(en-st)/2;
212 if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
213 return search(I,old,oldsize,newbuf,newsize,x,en,pos);
214 } else {
215 return search(I,old,oldsize,newbuf,newsize,st,x,pos);
216 };
217 }
218
main(int argc,char * argv[])219 int main(int argc,char *argv[])
220 {
221 int fd;
222 unsigned char *old,*newbuf;
223 int32_t oldsize,newsize;
224 int32_t *I,*V;
225
226 int32_t scan,pos,len;
227 int32_t lastscan,lastpos,lastoffset;
228 int32_t oldscore,scsc;
229
230 int32_t s,Sf,lenf,Sb,lenb;
231 int32_t overlap,Ss,lens;
232 int32_t i;
233
234 int32_t dblen,eblen;
235 unsigned char *db,*eb;
236
237 unsigned int scrc;
238
239 MBSPatchHeader header = {
240 {'M','B','D','I','F','F','1','0'},
241 0, 0, 0, 0, 0, 0
242 };
243
244 uint32_t numtriples;
245
246 if(argc!=4)
247 reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]);
248
249 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
250 that we never try to malloc(0) and get a NULL pointer */
251 if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) ||
252 ((oldsize=lseek(fd,0,SEEK_END))==-1) ||
253 ((old=(unsigned char*) malloc(oldsize+1))==NULL) ||
254 (lseek(fd,0,SEEK_SET)!=0) ||
255 (read(fd,old,oldsize)!=oldsize) ||
256 (close(fd)==-1))
257 reporterr(1,"%s\n",argv[1]);
258
259 scrc = crc32(old, oldsize);
260
261 if(((I=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL) ||
262 ((V=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL))
263 reporterr(1,NULL);
264
265 qsufsort(I,V,old,oldsize);
266
267 free(V);
268
269 /* Allocate newsize+1 bytes instead of newsize bytes to ensure
270 that we never try to malloc(0) and get a NULL pointer */
271 if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) ||
272 ((newsize=lseek(fd,0,SEEK_END))==-1) ||
273 ((newbuf=(unsigned char*) malloc(newsize+1))==NULL) ||
274 (lseek(fd,0,SEEK_SET)!=0) ||
275 (read(fd,newbuf,newsize)!=newsize) ||
276 (close(fd)==-1)) reporterr(1,"%s\n",argv[2]);
277
278 if(((db=(unsigned char*) malloc(newsize+1))==NULL) ||
279 ((eb=(unsigned char*) malloc(newsize+1))==NULL))
280 reporterr(1,NULL);
281
282 dblen=0;
283 eblen=0;
284
285 if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0)
286 reporterr(1,"%s\n",argv[3]);
287
288 /* start writing here */
289
290 /* We don't know the lengths yet, so we will write the header again
291 at the end */
292
293 if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader))
294 reporterr(1,"%s\n",argv[3]);
295
296 scan=0;len=0;
297 lastscan=0;lastpos=0;lastoffset=0;
298 numtriples = 0;
299 while(scan<newsize) {
300 oldscore=0;
301
302 for(scsc=scan+=len;scan<newsize;scan++) {
303 len=search(I,old,oldsize,newbuf+scan,newsize-scan,
304 0,oldsize,&pos);
305
306 for(;scsc<scan+len;scsc++)
307 if((scsc+lastoffset<oldsize) &&
308 (old[scsc+lastoffset] == newbuf[scsc]))
309 oldscore++;
310
311 if(((len==oldscore) && (len!=0)) ||
312 (len>oldscore+10)) break;
313
314 if((scan+lastoffset<oldsize) &&
315 (old[scan+lastoffset] == newbuf[scan]))
316 oldscore--;
317 };
318
319 if((len!=oldscore) || (scan==newsize)) {
320 MBSPatchTriple triple;
321
322 s=0;Sf=0;lenf=0;
323 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
324 if(old[lastpos+i]==newbuf[lastscan+i]) s++;
325 i++;
326 if(s*3-i*2>Sf*3-lenf*2) { Sf=s; lenf=i; };
327 };
328
329 lenb=0;
330 if(scan<newsize) {
331 s=0;Sb=0;
332 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
333 if(old[pos-i]==newbuf[scan-i]) s++;
334 if(s*3-i*2>Sb*3-lenb*2) { Sb=s; lenb=i; };
335 };
336 };
337
338 if(lastscan+lenf>scan-lenb) {
339 overlap=(lastscan+lenf)-(scan-lenb);
340 s=0;Ss=0;lens=0;
341 for(i=0;i<overlap;i++) {
342 if(newbuf[lastscan+lenf-overlap+i]==
343 old[lastpos+lenf-overlap+i]) s++;
344 if(newbuf[scan-lenb+i]==
345 old[pos-lenb+i]) s--;
346 if(s>Ss) { Ss=s; lens=i+1; };
347 };
348
349 lenf+=lens-overlap;
350 lenb-=lens;
351 };
352
353 for(i=0;i<lenf;i++)
354 db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i];
355 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
356 eb[eblen+i]=newbuf[lastscan+lenf+i];
357
358 dblen+=lenf;
359 eblen+=(scan-lenb)-(lastscan+lenf);
360
361 triple.x = htonl(lenf);
362 triple.y = htonl((scan-lenb)-(lastscan+lenf));
363 triple.z = htonl((pos-lenb)-(lastpos+lenf));
364 if (write(fd,&triple,sizeof(triple)) != sizeof(triple))
365 reporterr(1,NULL);
366
367 #ifdef DEBUG_bsmedberg
368 printf("Writing a block:\n"
369 " X: %u\n"
370 " Y: %u\n"
371 " Z: %i\n",
372 (uint32_t) lenf,
373 (uint32_t) ((scan-lenb)-(lastscan+lenf)),
374 (uint32_t) ((pos-lenb)-(lastpos+lenf)));
375 #endif
376
377 ++numtriples;
378
379 lastscan=scan-lenb;
380 lastpos=pos-lenb;
381 lastoffset=pos-scan;
382 };
383 };
384
385 if(write(fd,db,dblen)!=dblen)
386 reporterr(1,NULL);
387
388 if(write(fd,eb,eblen)!=eblen)
389 reporterr(1,NULL);
390
391 header.slen = htonl(oldsize);
392 header.scrc32 = htonl(scrc);
393 header.dlen = htonl(newsize);
394 header.cblen = htonl(numtriples * sizeof(MBSPatchTriple));
395 header.difflen = htonl(dblen);
396 header.extralen = htonl(eblen);
397
398 if (lseek(fd,0,SEEK_SET) == -1 ||
399 write(fd,&header,sizeof(header)) != sizeof(header) ||
400 close(fd) == -1)
401 reporterr(1,NULL);
402
403 free(db);
404 free(eb);
405 free(I);
406 free(old);
407 free(newbuf);
408
409 return 0;
410 }
411
412