1 /* (PD) 2001 The Bitzi Corporation
2  * Please see file COPYING or http://bitzi.com/publicdomain
3  * for more info.
4  *
5  *  $Id: ftuuhash.c,v 1.2 2003/03/18 08:15:19 gojomo Exp $
6  *
7  *  Support for calculating FastTrack internal hashes
8  *
9  *  Based on the definition of the FastTrack hash used
10  *  by the giFT project; reworked to allow calculation
11  *  in one stream pass, rather than with file-seeks and
12  *  a filesize known in advance.
13  *
14  *  FT identifiers are roughly:
15  *  16 bytes: md5 of first 307,200 bytes of file
16  *   4 bytes: result of running a more simple "smallHash"
17  *            over a number of ranges of the file, including:
18  *
19  *            - the 307,200 bytes starting at each 1MiB, 2MiB, 4MiB,
20  *              8MiB, 16MiB, etc offset within the file
21  *
22  *            - the last 307,200 bytes of the file (or less
23  *              if the filesize < 614,400)
24  *
25  *            The last 307,200 has precedence over the 307,200
26  *            starting at any sampling point -- so for example,
27  *            in a file that's 1.5 MiB, the range from 1MiB to
28  *            1MiB+307200 is not sampled, because it would overlap
29  *            into the 307,200-long endseg.
30  *
31  *  Since this code does not know the stream's length in advance,
32  *  it uses a big rolling window of file contents, and can rollback
33  *  the smallhash if the end is discovered within 307200 of the
34  *  last sample range end. (Seeding the FTUU_CTX with the file's
35  *  length would obviate the need for that rolling window...
36  *  probably speeding up the process a lot.)
37  *
38  */
39 
40 
41 #include <stdio.h>
42 #include <string.h>
43 
44 #include "ftuuhash.h"
45 #include "md5.h"
46 
47 static const unsigned int FTSEG_SIZE = 307200;
48 
49 // table from giFT project
50 static const unsigned int smalltable[256] = {
51     0x00000000,0x77073096,0xEE0E612C,0x990951BA,
52     0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3,
53     0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,
54     0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91,
55     0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,
56     0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7,
57     0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,
58     0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,
59     0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,
60     0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,
61     0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,
62     0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,
63     0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,
64     0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,
65     0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,
66     0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D,
67     0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,
68     0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433,
69     0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,
70     0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01,
71     0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,
72     0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,
73     0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,
74     0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,
75     0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,
76     0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,
77     0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,
78     0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,
79     0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,
80     0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F,
81     0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,
82     0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD,
83     0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,
84     0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683,
85     0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,
86     0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,
87     0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,
88     0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,
89     0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,
90     0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,
91     0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,
92     0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,
93     0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,
94     0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79,
95     0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,
96     0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F,
97     0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,
98     0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,
99     0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,
100     0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,
101     0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,
102     0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,
103     0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,
104     0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,
105     0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,
106     0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45,
107     0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,
108     0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB,
109     0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,
110     0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9,
111     0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,
112     0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF,
113     0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,
114     0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D
115 };
116 
117 /**
118  * Update the 4-byte running small hash; also from giFT project
119  */
hashSmallHash(byte * data,size_t len,unsigned int hash)120 unsigned int hashSmallHash(byte *data, size_t len, unsigned int hash)
121 {
122     unsigned int i;
123 
124     for(i=0;i<len;++i) {
125 	hash = smalltable[data[i] ^ (hash & 0xff)] ^ (hash >> 8);
126     }
127 
128     return hash;
129 }
130 
131 // dirt simple base64 encoding. caller should ensure out has enough space.
132 // no '=' padding provided, but string is zero-terminated.
bitziEncodeBase64(byte * raw,int len,char * out)133 void bitziEncodeBase64(byte *raw, int len, char *out) {
134 	char  *base64digits = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
135 	int bitsNeeded = 6;
136 	int bitPosition = 7;
137 	int rawIndex = 0;
138 	int strIndex = 0;
139 	int bit = 0;
140 	int digit = 0;
141 
142 	while (rawIndex < len) {
143 		while (bitsNeeded > 0) {
144 			if (bitPosition >= 0) {
145 				bit = (raw[rawIndex]>>bitPosition) & 1;
146 				digit = digit << 1;
147 				digit += bit;
148 				bitsNeeded--;
149 				bitPosition--;
150 			} else {
151 				rawIndex++;
152 				bitPosition=7;
153 				if (rawIndex==len) {  // we're past the end; zero-pad
154 					digit = digit << bitsNeeded;
155 					bitsNeeded = 0;
156 				}
157 			}
158 		}
159 		out[strIndex]=base64digits[digit];
160 		digit=0;
161 		bitsNeeded=6;
162 		strIndex++;
163 	}
164 	out[strIndex]=0;
165 }
166 
167 
168 //
169 // stream-based FT hash calculation
170 //
171 
172 /* FTUUHash initialization.
173  */
FTUUInit(FTUU_CTX * context)174 void FTUUInit(FTUU_CTX *context)                                        /* context */
175 {
176 	context->nextPos = 0;
177 	context->smallHash = 0xffffffff;
178 	context->backupSmallHash = 0xffffffff;
179 	MD5Init(&(context->md5context));
180 	context->nextSampleStart = 0x100000;
181 }
182 
183 /* FTUUHash block update operation.
184  */
FTUUUpdate(FTUU_CTX * context,const unsigned char * input,unsigned int inputLen)185 void FTUUUpdate(FTUU_CTX *context,                                        /* context */
186 const unsigned char *input,                                /* input block */
187 unsigned int inputLen)                     /* length of input block */
188 {
189 	unsigned int firstPart = inputLen;
190 
191 	// first, handle the MD5'd portion of the file
192 	if(context->nextPos < FTSEG_SIZE) {
193 		if((context->nextPos+inputLen)>FTSEG_SIZE) {
194 			// don't overshoot the segsize
195 			firstPart = FTSEG_SIZE - context->nextPos;
196 		}
197 		MD5Update(&(context->md5context),input,firstPart);
198 		context->nextPos += firstPart;
199 		if(firstPart<inputLen) {
200 			// continue with the rest of the input
201 			FTUUUpdate(context,input+firstPart,inputLen-firstPart);
202 		}
203 		return;
204 	}
205 
206 	// OK, we're past the MD5 portion of the file
207 
208     // check for at sampling-range end
209 	if(context->nextPos == (context->nextSampleStart+FTSEG_SIZE) ) {
210 		// the rollingBuffer is loaded with exactly enough data
211 		// to add a sample to the smallHash
212         context->backupSmallHash = context->smallHash; // save current smallhash state
213 		// through to end...
214 		context->smallHash =
215 			hashSmallHash(context->rollingBuffer + (context->nextPos % FTSEG_SIZE),
216 			                         FTSEG_SIZE - (context->nextPos % FTSEG_SIZE),
217 								     // 0,
218 								     context->smallHash);
219 		// ... and wraparound
220 		context->smallHash =
221 			hashSmallHash(context->rollingBuffer,
222 			                         context->nextPos % FTSEG_SIZE,
223 								     // FTSEG_SIZE - (context->nextPos % FTSEG_SIZE),
224 								     context->smallHash);
225 		// set new sampling startpoint
226 		context->nextSampleStart = context->nextSampleStart << 1;
227 	}
228 
229 	// OK, just move bytes of data from input to the rollingBuffer
230 	// (use smallest of: inputLen, bytes-to-wraparound, bytes-to-sample-end
231     if((context->nextPos + inputLen) > (context->nextSampleStart + FTSEG_SIZE))
232 		firstPart = (context->nextSampleStart + FTSEG_SIZE) - context->nextPos;
233 	if(((context->nextPos % FTSEG_SIZE) + firstPart) > FTSEG_SIZE) {
234 		firstPart = FTSEG_SIZE - (context->nextPos % FTSEG_SIZE);
235 	}
236 
237     memcpy(context->rollingBuffer + (context->nextPos % FTSEG_SIZE),
238 		   input,
239 		   firstPart);
240     context->nextPos += firstPart;
241 
242 	if (firstPart<inputLen) {
243 		// continue with the rest of the input
244 		FTUUUpdate(context,input+firstPart,inputLen-firstPart);
245     }
246 }
247 
248 /* FTUUHash finalization.
249  *//* message digest *//* context */
FTUUFinal(unsigned char digest[20],FTUU_CTX * context)250 void FTUUFinal(unsigned char digest[20],FTUU_CTX *context)
251 {
252 	int continueIndex = 0;
253     // finalize MD5
254 	MD5Final(digest,&(context->md5context));
255 
256 	// decide whether or not to rollback smallhash
257 	if(context->nextPos < ((context->nextSampleStart >> 1) + 2*FTSEG_SIZE)) {
258 		// the last FTSEG_SIZE bytes overlap the last internal sample
259 		// pretend like the last smallHash processing never happened
260 		context->smallHash = context->backupSmallHash;
261 	}
262 
263 	// do the smallHash of the end segment
264 	if(context->nextPos >= 2*FTSEG_SIZE) {
265 		// end segment is a full FTSEG_SIZE
266 		context->smallHash =
267 			hashSmallHash(context->rollingBuffer + (context->nextPos % FTSEG_SIZE),
268 			                         FTSEG_SIZE - (context->nextPos % FTSEG_SIZE),
269 								     // 0,
270 								     context->smallHash);
271 		continueIndex = FTSEG_SIZE - (context->nextPos % FTSEG_SIZE);
272 	}
273 	if(context->nextPos > FTSEG_SIZE) {
274 		// wraparound or do an endseg < FTSEG_SIZE
275 		context->smallHash =
276 			hashSmallHash(context->rollingBuffer,
277 									 context->nextPos % FTSEG_SIZE,
278 									 // continueIndex,
279 									 context->smallHash);
280 	} // else filesize was <= FTSEG_SIZE, no smallHashing of endseg necessary
281     context->smallHash ^= context->nextPos;
282     digest[16] = context->smallHash & 0xff;
283     digest[17] = (context->smallHash >> 8) & 0xff;
284     digest[18] = (context->smallHash >> 16) & 0xff;
285     digest[19] = (context->smallHash >> 24) & 0xff;
286 
287 }
288 
289