1 /* vim:set ts=8 sw=8 sts=8 noet: */
2 /*
3   bsdiff.c -- Binary patch generator.
4 
5   Copyright 2003 Colin Percival
6 
7   For the terms under which this work may be distributed, please see
8   the adjoining file "LICENSE".
9 
10   ChangeLog:
11   2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit
12                values throughout.
13                  --Benjamin Smedberg <benjamin@smedbergs.us>
14   2005-05-18 - Use the same CRC algorithm as bzip2, and leverage the CRC table
15                provided by libbz2.
16                  --Darin Fisher <darin@meer.net>
17 */
18 
19 #include "bspatch.h"
20 
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <fcntl.h>
25 #include <stdarg.h>
26 #ifdef XP_WIN
27 #include <io.h>
28 #include <winsock2.h>
29 #define open _open
30 #define close _close
31 #define read _read
32 #define lseek _lseek
33 #define write _write
34 #else
35 #include <unistd.h>
36 #include <arpa/inet.h>
37 #define _O_BINARY 0
38 #endif
39 
40 #include "crctable.h"
41 
42 #undef MIN
43 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
44 
45 /*---------------------------------------------------------------------------*/
46 
47 /* This variable lives in libbz2.  It's declared in bzlib_private.h, so we just
48  * declare it here to avoid including that entire header file.
49  */
50 extern unsigned int BZ2_crc32Table[256];
51 
52 static unsigned int
crc32(const unsigned char * buf,unsigned int len)53 crc32(const unsigned char *buf, unsigned int len)
54 {
55 	unsigned int crc = 0xffffffffL;
56 
57 	const unsigned char *end = buf + len;
58 	for (; buf != end; ++buf)
59 		crc = (crc << 8) ^ BZ2_crc32Table[(crc >> 24) ^ *buf];
60 
61 	crc = ~crc;
62 	return crc;
63 }
64 
65 /*---------------------------------------------------------------------------*/
66 
67 static void
reporterr(int e,const char * fmt,...)68 reporterr(int e, const char *fmt, ...)
69 {
70 	if (fmt) {
71 		va_list args;
72 		va_start(args, fmt);
73 		vfprintf(stderr, fmt, args);
74 		va_end(args);
75 	}
76 
77 	exit(e);
78 }
79 
80 static void
split(int32_t * I,int32_t * V,int32_t start,int32_t len,int32_t h)81 split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h)
82 {
83 	int32_t i,j,k,x,tmp,jj,kk;
84 
85 	if(len<16) {
86 		for(k=start;k<start+len;k+=j) {
87 			j=1;x=V[I[k]+h];
88 			for(i=1;k+i<start+len;i++) {
89 				if(V[I[k+i]+h]<x) {
90 					x=V[I[k+i]+h];
91 					j=0;
92 				};
93 				if(V[I[k+i]+h]==x) {
94 					tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
95 					j++;
96 				};
97 			};
98 			for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
99 			if(j==1) I[k]=-1;
100 		};
101 		return;
102 	};
103 
104 	x=V[I[start+len/2]+h];
105 	jj=0;kk=0;
106 	for(i=start;i<start+len;i++) {
107 		if(V[I[i]+h]<x) jj++;
108 		if(V[I[i]+h]==x) kk++;
109 	};
110 	jj+=start;kk+=jj;
111 
112 	i=start;j=0;k=0;
113 	while(i<jj) {
114 		if(V[I[i]+h]<x) {
115 			i++;
116 		} else if(V[I[i]+h]==x) {
117 			tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
118 			j++;
119 		} else {
120 			tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
121 			k++;
122 		};
123 	};
124 
125 	while(jj+j<kk) {
126 		if(V[I[jj+j]+h]==x) {
127 			j++;
128 		} else {
129 			tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
130 			k++;
131 		};
132 	};
133 
134 	if(jj>start) split(I,V,start,jj-start,h);
135 
136 	for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
137 	if(jj==kk-1) I[jj]=-1;
138 
139 	if(start+len>kk) split(I,V,kk,start+len-kk,h);
140 }
141 
142 static void
qsufsort(int32_t * I,int32_t * V,unsigned char * old,int32_t oldsize)143 qsufsort(int32_t *I,int32_t *V,unsigned char *old,int32_t oldsize)
144 {
145 	int32_t buckets[256];
146 	int32_t i,h,len;
147 
148 	for(i=0;i<256;i++) buckets[i]=0;
149 	for(i=0;i<oldsize;i++) buckets[old[i]]++;
150 	for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
151 	for(i=255;i>0;i--) buckets[i]=buckets[i-1];
152 	buckets[0]=0;
153 
154 	for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
155 	I[0]=oldsize;
156 	for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
157 	V[oldsize]=0;
158 	for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
159 	I[0]=-1;
160 
161 	for(h=1;I[0]!=-(oldsize+1);h+=h) {
162 		len=0;
163 		for(i=0;i<oldsize+1;) {
164 			if(I[i]<0) {
165 				len-=I[i];
166 				i-=I[i];
167 			} else {
168 				if(len) I[i-len]=-len;
169 				len=V[I[i]]+1-i;
170 				split(I,V,i,len,h);
171 				i+=len;
172 				len=0;
173 			};
174 		};
175 		if(len) I[i-len]=-len;
176 	};
177 
178 	for(i=0;i<oldsize+1;i++) I[V[i]]=i;
179 }
180 
181 static int32_t
matchlen(unsigned char * old,int32_t oldsize,unsigned char * newbuf,int32_t newsize)182 matchlen(unsigned char *old,int32_t oldsize,unsigned char *newbuf,int32_t newsize)
183 {
184 	int32_t i;
185 
186 	for(i=0;(i<oldsize)&&(i<newsize);i++)
187 		if(old[i]!=newbuf[i]) break;
188 
189 	return i;
190 }
191 
192 static int32_t
search(int32_t * I,unsigned char * old,int32_t oldsize,unsigned char * newbuf,int32_t newsize,int32_t st,int32_t en,int32_t * pos)193 search(int32_t *I,unsigned char *old,int32_t oldsize,
194        unsigned char *newbuf,int32_t newsize,int32_t st,int32_t en,int32_t *pos)
195 {
196 	int32_t x,y;
197 
198 	if(en-st<2) {
199 		x=matchlen(old+I[st],oldsize-I[st],newbuf,newsize);
200 		y=matchlen(old+I[en],oldsize-I[en],newbuf,newsize);
201 
202 		if(x>y) {
203 			*pos=I[st];
204 			return x;
205 		} else {
206 			*pos=I[en];
207 			return y;
208 		}
209 	};
210 
211 	x=st+(en-st)/2;
212 	if(memcmp(old+I[x],newbuf,MIN(oldsize-I[x],newsize))<0) {
213 		return search(I,old,oldsize,newbuf,newsize,x,en,pos);
214 	} else {
215 		return search(I,old,oldsize,newbuf,newsize,st,x,pos);
216 	};
217 }
218 
main(int argc,char * argv[])219 int main(int argc,char *argv[])
220 {
221 	int fd;
222 	unsigned char *old,*newbuf;
223 	int32_t oldsize,newsize;
224 	int32_t *I,*V;
225 
226 	int32_t scan,pos,len;
227 	int32_t lastscan,lastpos,lastoffset;
228 	int32_t oldscore,scsc;
229 
230 	int32_t s,Sf,lenf,Sb,lenb;
231 	int32_t overlap,Ss,lens;
232 	int32_t i;
233 
234 	int32_t dblen,eblen;
235 	unsigned char *db,*eb;
236 
237 	unsigned int scrc;
238 
239 	MBSPatchHeader header = {
240 		{'M','B','D','I','F','F','1','0'},
241 		0, 0, 0, 0, 0, 0
242 	};
243 
244 	uint32_t numtriples;
245 
246 	if(argc!=4)
247 		reporterr(1,"usage: %s <oldfile> <newfile> <patchfile>\n",argv[0]);
248 
249 	/* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
250 		that we never try to malloc(0) and get a NULL pointer */
251 	if(((fd=open(argv[1],O_RDONLY|_O_BINARY,0))<0) ||
252 		((oldsize=lseek(fd,0,SEEK_END))==-1) ||
253 		((old=(unsigned char*) malloc(oldsize+1))==NULL) ||
254 		(lseek(fd,0,SEEK_SET)!=0) ||
255 		(read(fd,old,oldsize)!=oldsize) ||
256 		(close(fd)==-1))
257 		reporterr(1,"%s\n",argv[1]);
258 
259 	scrc = crc32(old, oldsize);
260 
261 	if(((I=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL) ||
262 		((V=(int32_t*) malloc((oldsize+1)*sizeof(int32_t)))==NULL))
263 		reporterr(1,NULL);
264 
265 	qsufsort(I,V,old,oldsize);
266 
267 	free(V);
268 
269 	/* Allocate newsize+1 bytes instead of newsize bytes to ensure
270 		that we never try to malloc(0) and get a NULL pointer */
271 	if(((fd=open(argv[2],O_RDONLY|_O_BINARY,0))<0) ||
272 		((newsize=lseek(fd,0,SEEK_END))==-1) ||
273 		((newbuf=(unsigned char*) malloc(newsize+1))==NULL) ||
274 		(lseek(fd,0,SEEK_SET)!=0) ||
275 		(read(fd,newbuf,newsize)!=newsize) ||
276 		(close(fd)==-1)) reporterr(1,"%s\n",argv[2]);
277 
278 	if(((db=(unsigned char*) malloc(newsize+1))==NULL) ||
279 		((eb=(unsigned char*) malloc(newsize+1))==NULL))
280 		reporterr(1,NULL);
281 
282 	dblen=0;
283 	eblen=0;
284 
285 	if((fd=open(argv[3],O_CREAT|O_TRUNC|O_WRONLY|_O_BINARY,0666))<0)
286 		reporterr(1,"%s\n",argv[3]);
287 
288 	/* start writing here */
289 
290 	/* We don't know the lengths yet, so we will write the header again
291 		 at the end */
292 
293 	if(write(fd,&header,sizeof(MBSPatchHeader))!=sizeof(MBSPatchHeader))
294 		reporterr(1,"%s\n",argv[3]);
295 
296 	scan=0;len=0;
297 	lastscan=0;lastpos=0;lastoffset=0;
298 	numtriples = 0;
299 	while(scan<newsize) {
300 		oldscore=0;
301 
302 		for(scsc=scan+=len;scan<newsize;scan++) {
303 			len=search(I,old,oldsize,newbuf+scan,newsize-scan,
304 					0,oldsize,&pos);
305 
306 			for(;scsc<scan+len;scsc++)
307 			if((scsc+lastoffset<oldsize) &&
308 				(old[scsc+lastoffset] == newbuf[scsc]))
309 				oldscore++;
310 
311 			if(((len==oldscore) && (len!=0)) ||
312 				(len>oldscore+10)) break;
313 
314 			if((scan+lastoffset<oldsize) &&
315 				(old[scan+lastoffset] == newbuf[scan]))
316 				oldscore--;
317 		};
318 
319 		if((len!=oldscore) || (scan==newsize)) {
320 			MBSPatchTriple triple;
321 
322 			s=0;Sf=0;lenf=0;
323 			for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
324 				if(old[lastpos+i]==newbuf[lastscan+i]) s++;
325 				i++;
326 				if(s*3-i*2>Sf*3-lenf*2) { Sf=s; lenf=i; };
327 			};
328 
329 			lenb=0;
330 			if(scan<newsize) {
331 				s=0;Sb=0;
332 				for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
333 					if(old[pos-i]==newbuf[scan-i]) s++;
334 					if(s*3-i*2>Sb*3-lenb*2) { Sb=s; lenb=i; };
335 				};
336 			};
337 
338 			if(lastscan+lenf>scan-lenb) {
339 				overlap=(lastscan+lenf)-(scan-lenb);
340 				s=0;Ss=0;lens=0;
341 				for(i=0;i<overlap;i++) {
342 					if(newbuf[lastscan+lenf-overlap+i]==
343 					   old[lastpos+lenf-overlap+i]) s++;
344 					if(newbuf[scan-lenb+i]==
345 					   old[pos-lenb+i]) s--;
346 					if(s>Ss) { Ss=s; lens=i+1; };
347 				};
348 
349 				lenf+=lens-overlap;
350 				lenb-=lens;
351 			};
352 
353 			for(i=0;i<lenf;i++)
354 				db[dblen+i]=newbuf[lastscan+i]-old[lastpos+i];
355 			for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
356 				eb[eblen+i]=newbuf[lastscan+lenf+i];
357 
358 			dblen+=lenf;
359 			eblen+=(scan-lenb)-(lastscan+lenf);
360 
361 			triple.x = htonl(lenf);
362 			triple.y = htonl((scan-lenb)-(lastscan+lenf));
363 			triple.z = htonl((pos-lenb)-(lastpos+lenf));
364 			if (write(fd,&triple,sizeof(triple)) != sizeof(triple))
365 				reporterr(1,NULL);
366 
367 #ifdef DEBUG_bsmedberg
368 			printf("Writing a block:\n"
369 			       "	X: %u\n"
370 			       "	Y: %u\n"
371 			       "	Z: %i\n",
372 			       (uint32_t) lenf,
373 			       (uint32_t) ((scan-lenb)-(lastscan+lenf)),
374 			       (uint32_t) ((pos-lenb)-(lastpos+lenf)));
375 #endif
376 
377 			++numtriples;
378 
379 			lastscan=scan-lenb;
380 			lastpos=pos-lenb;
381 			lastoffset=pos-scan;
382 		};
383 	};
384 
385 	if(write(fd,db,dblen)!=dblen)
386 		reporterr(1,NULL);
387 
388 	if(write(fd,eb,eblen)!=eblen)
389 		reporterr(1,NULL);
390 
391 	header.slen	= htonl(oldsize);
392 	header.scrc32	= htonl(scrc);
393 	header.dlen	= htonl(newsize);
394 	header.cblen	= htonl(numtriples * sizeof(MBSPatchTriple));
395 	header.difflen	= htonl(dblen);
396 	header.extralen = htonl(eblen);
397 
398 	if (lseek(fd,0,SEEK_SET) == -1 ||
399 	    write(fd,&header,sizeof(header)) != sizeof(header) ||
400 	    close(fd) == -1)
401 		reporterr(1,NULL);
402 
403 	free(db);
404 	free(eb);
405 	free(I);
406 	free(old);
407 	free(newbuf);
408 
409 	return 0;
410 }
411 
412