1 /* (PD) 2001 The Bitzi Corporation
2 * Please see file COPYING or http://bitzi.com/publicdomain
3 * for more info.
4 *
5 * $Id: ftuuhash.c,v 1.2 2003/03/18 08:15:19 gojomo Exp $
6 *
7 * Support for calculating FastTrack internal hashes
8 *
9 * Based on the definition of the FastTrack hash used
10 * by the giFT project; reworked to allow calculation
11 * in one stream pass, rather than with file-seeks and
12 * a filesize known in advance.
13 *
14 * FT identifiers are roughly:
15 * 16 bytes: md5 of first 307,200 bytes of file
16 * 4 bytes: result of running a more simple "smallHash"
17 * over a number of ranges of the file, including:
18 *
19 * - the 307,200 bytes starting at each 1MiB, 2MiB, 4MiB,
20 * 8MiB, 16MiB, etc offset within the file
21 *
22 * - the last 307,200 bytes of the file (or less
23 * if the filesize < 614,400)
24 *
25 * The last 307,200 has precedence over the 307,200
26 * starting at any sampling point -- so for example,
27 * in a file that's 1.5 MiB, the range from 1MiB to
28 * 1MiB+307200 is not sampled, because it would overlap
29 * into the 307,200-long endseg.
30 *
31 * Since this code does not know the stream's length in advance,
32 * it uses a big rolling window of file contents, and can rollback
33 * the smallhash if the end is discovered within 307200 of the
34 * last sample range end. (Seeding the FTUU_CTX with the file's
35 * length would obviate the need for that rolling window...
36 * probably speeding up the process a lot.)
37 *
38 */
39
40
41 #include <stdio.h>
42 #include <string.h>
43
44 #include "ftuuhash.h"
45 #include "md5.h"
46
47 static const unsigned int FTSEG_SIZE = 307200;
48
49 // table from giFT project
50 static const unsigned int smalltable[256] = {
51 0x00000000,0x77073096,0xEE0E612C,0x990951BA,
52 0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3,
53 0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,
54 0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91,
55 0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,
56 0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7,
57 0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,
58 0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,
59 0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,
60 0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,
61 0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,
62 0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,
63 0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,
64 0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,
65 0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,
66 0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D,
67 0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,
68 0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433,
69 0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,
70 0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01,
71 0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,
72 0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,
73 0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,
74 0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,
75 0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,
76 0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,
77 0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,
78 0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,
79 0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,
80 0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F,
81 0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,
82 0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD,
83 0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,
84 0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683,
85 0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,
86 0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,
87 0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,
88 0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,
89 0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,
90 0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,
91 0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,
92 0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,
93 0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,
94 0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79,
95 0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,
96 0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F,
97 0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,
98 0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,
99 0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,
100 0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,
101 0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,
102 0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,
103 0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,
104 0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,
105 0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,
106 0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45,
107 0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,
108 0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB,
109 0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,
110 0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9,
111 0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,
112 0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF,
113 0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,
114 0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D
115 };
116
117 /**
118 * Update the 4-byte running small hash; also from giFT project
119 */
hashSmallHash(byte * data,size_t len,unsigned int hash)120 unsigned int hashSmallHash(byte *data, size_t len, unsigned int hash)
121 {
122 unsigned int i;
123
124 for(i=0;i<len;++i) {
125 hash = smalltable[data[i] ^ (hash & 0xff)] ^ (hash >> 8);
126 }
127
128 return hash;
129 }
130
131 // dirt simple base64 encoding. caller should ensure out has enough space.
132 // no '=' padding provided, but string is zero-terminated.
bitziEncodeBase64(byte * raw,int len,char * out)133 void bitziEncodeBase64(byte *raw, int len, char *out) {
134 char *base64digits = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
135 int bitsNeeded = 6;
136 int bitPosition = 7;
137 int rawIndex = 0;
138 int strIndex = 0;
139 int bit = 0;
140 int digit = 0;
141
142 while (rawIndex < len) {
143 while (bitsNeeded > 0) {
144 if (bitPosition >= 0) {
145 bit = (raw[rawIndex]>>bitPosition) & 1;
146 digit = digit << 1;
147 digit += bit;
148 bitsNeeded--;
149 bitPosition--;
150 } else {
151 rawIndex++;
152 bitPosition=7;
153 if (rawIndex==len) { // we're past the end; zero-pad
154 digit = digit << bitsNeeded;
155 bitsNeeded = 0;
156 }
157 }
158 }
159 out[strIndex]=base64digits[digit];
160 digit=0;
161 bitsNeeded=6;
162 strIndex++;
163 }
164 out[strIndex]=0;
165 }
166
167
168 //
169 // stream-based FT hash calculation
170 //
171
172 /* FTUUHash initialization.
173 */
FTUUInit(FTUU_CTX * context)174 void FTUUInit(FTUU_CTX *context) /* context */
175 {
176 context->nextPos = 0;
177 context->smallHash = 0xffffffff;
178 context->backupSmallHash = 0xffffffff;
179 MD5Init(&(context->md5context));
180 context->nextSampleStart = 0x100000;
181 }
182
183 /* FTUUHash block update operation.
184 */
FTUUUpdate(FTUU_CTX * context,const unsigned char * input,unsigned int inputLen)185 void FTUUUpdate(FTUU_CTX *context, /* context */
186 const unsigned char *input, /* input block */
187 unsigned int inputLen) /* length of input block */
188 {
189 unsigned int firstPart = inputLen;
190
191 // first, handle the MD5'd portion of the file
192 if(context->nextPos < FTSEG_SIZE) {
193 if((context->nextPos+inputLen)>FTSEG_SIZE) {
194 // don't overshoot the segsize
195 firstPart = FTSEG_SIZE - context->nextPos;
196 }
197 MD5Update(&(context->md5context),input,firstPart);
198 context->nextPos += firstPart;
199 if(firstPart<inputLen) {
200 // continue with the rest of the input
201 FTUUUpdate(context,input+firstPart,inputLen-firstPart);
202 }
203 return;
204 }
205
206 // OK, we're past the MD5 portion of the file
207
208 // check for at sampling-range end
209 if(context->nextPos == (context->nextSampleStart+FTSEG_SIZE) ) {
210 // the rollingBuffer is loaded with exactly enough data
211 // to add a sample to the smallHash
212 context->backupSmallHash = context->smallHash; // save current smallhash state
213 // through to end...
214 context->smallHash =
215 hashSmallHash(context->rollingBuffer + (context->nextPos % FTSEG_SIZE),
216 FTSEG_SIZE - (context->nextPos % FTSEG_SIZE),
217 // 0,
218 context->smallHash);
219 // ... and wraparound
220 context->smallHash =
221 hashSmallHash(context->rollingBuffer,
222 context->nextPos % FTSEG_SIZE,
223 // FTSEG_SIZE - (context->nextPos % FTSEG_SIZE),
224 context->smallHash);
225 // set new sampling startpoint
226 context->nextSampleStart = context->nextSampleStart << 1;
227 }
228
229 // OK, just move bytes of data from input to the rollingBuffer
230 // (use smallest of: inputLen, bytes-to-wraparound, bytes-to-sample-end
231 if((context->nextPos + inputLen) > (context->nextSampleStart + FTSEG_SIZE))
232 firstPart = (context->nextSampleStart + FTSEG_SIZE) - context->nextPos;
233 if(((context->nextPos % FTSEG_SIZE) + firstPart) > FTSEG_SIZE) {
234 firstPart = FTSEG_SIZE - (context->nextPos % FTSEG_SIZE);
235 }
236
237 memcpy(context->rollingBuffer + (context->nextPos % FTSEG_SIZE),
238 input,
239 firstPart);
240 context->nextPos += firstPart;
241
242 if (firstPart<inputLen) {
243 // continue with the rest of the input
244 FTUUUpdate(context,input+firstPart,inputLen-firstPart);
245 }
246 }
247
248 /* FTUUHash finalization.
249 *//* message digest *//* context */
FTUUFinal(unsigned char digest[20],FTUU_CTX * context)250 void FTUUFinal(unsigned char digest[20],FTUU_CTX *context)
251 {
252 int continueIndex = 0;
253 // finalize MD5
254 MD5Final(digest,&(context->md5context));
255
256 // decide whether or not to rollback smallhash
257 if(context->nextPos < ((context->nextSampleStart >> 1) + 2*FTSEG_SIZE)) {
258 // the last FTSEG_SIZE bytes overlap the last internal sample
259 // pretend like the last smallHash processing never happened
260 context->smallHash = context->backupSmallHash;
261 }
262
263 // do the smallHash of the end segment
264 if(context->nextPos >= 2*FTSEG_SIZE) {
265 // end segment is a full FTSEG_SIZE
266 context->smallHash =
267 hashSmallHash(context->rollingBuffer + (context->nextPos % FTSEG_SIZE),
268 FTSEG_SIZE - (context->nextPos % FTSEG_SIZE),
269 // 0,
270 context->smallHash);
271 continueIndex = FTSEG_SIZE - (context->nextPos % FTSEG_SIZE);
272 }
273 if(context->nextPos > FTSEG_SIZE) {
274 // wraparound or do an endseg < FTSEG_SIZE
275 context->smallHash =
276 hashSmallHash(context->rollingBuffer,
277 context->nextPos % FTSEG_SIZE,
278 // continueIndex,
279 context->smallHash);
280 } // else filesize was <= FTSEG_SIZE, no smallHashing of endseg necessary
281 context->smallHash ^= context->nextPos;
282 digest[16] = context->smallHash & 0xff;
283 digest[17] = (context->smallHash >> 8) & 0xff;
284 digest[18] = (context->smallHash >> 16) & 0xff;
285 digest[19] = (context->smallHash >> 24) & 0xff;
286
287 }
288
289