1 /*
2  * Trivial amendments by James Bonfield <jkb@sanger.ac.uk> to provide an
3  * HTSlib interface. 2015.
4  *
5  * Externally our API uses an opaque hts_md5_context structure.
6  *
7  * Internally either this gets defined and used with the routines here
8  * or it remains incomplete and is cast to the OpenSSL MD5_CTX structure
9  * and used by routines from OpenSSL.
10  */
11 
12 /*
13  * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
14  * MD5 Message-Digest Algorithm (RFC 1321).
15  *
16  * Homepage:
17  * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
18  *
19  * Author:
20  * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
21  *
22  * This software was written by Alexander Peslyak in 2001.  No copyright is
23  * claimed, and the software is hereby placed in the public domain.
24  * In case this attempt to disclaim copyright and place the software in the
25  * public domain is deemed null and void, then the software is
26  * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
27  * general public under the following terms:
28  *
29  * Redistribution and use in source and binary forms, with or without
30  * modification, are permitted.
31  *
32  * There's ABSOLUTELY NO WARRANTY, express or implied.
33  *
34  * (This is a heavily cut-down "BSD license".)
35  *
36  * This differs from Colin Plumb's older public domain implementation in that
37  * no exactly 32-bit integer data type is required (any 32-bit or wider
38  * unsigned integer data type will do), there's no compile-time endianness
39  * configuration, and the function prototypes match OpenSSL's.  No code from
40  * Colin Plumb's implementation has been reused; this comment merely compares
41  * the properties of the two independent implementations.
42  *
43  * The primary goals of this implementation are portability and ease of use.
44  * It is meant to be fast, but not as fast as possible.  Some known
45  * optimizations are not included to reduce source code size and avoid
46  * compile-time configuration.
47  */
48 
49 #include <config.h>
50 
51 #include <stdlib.h>
52 #include "htslib/hts.h"
53 #include "htslib/hts_endian.h"
54 
55 #ifndef HAVE_OPENSSL
56 
57 #include <string.h>
58 
59 /* Any 32-bit or wider unsigned integer data type will do */
60 typedef unsigned int hts_md5_u32plus;
61 
62 struct hts_md5_context {
63 	hts_md5_u32plus lo, hi;
64 	hts_md5_u32plus a, b, c, d;
65 	unsigned char buffer[64];
66 	hts_md5_u32plus block[16];
67 };
68 
69 /*
70  * The basic MD5 functions.
71  *
72  * F and G are optimized compared to their RFC 1321 definitions for
73  * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
74  * implementation.
75  */
76 #define F(x, y, z)			((z) ^ ((x) & ((y) ^ (z))))
77 #define G(x, y, z)			((y) ^ ((z) & ((x) ^ (y))))
78 #define H(x, y, z)			(((x) ^ (y)) ^ (z))
79 #define H2(x, y, z)			((x) ^ ((y) ^ (z)))
80 #define I(x, y, z)			((y) ^ ((x) | ~(z)))
81 
82 /*
83  * The MD5 transformation for all four rounds.
84  */
85 #define STEP(f, a, b, c, d, x, t, s) \
86 	(a) += f((b), (c), (d)) + (x) + (t); \
87 	(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
88 	(a) += (b);
89 
90 /*
91  * SET reads 4 input bytes in little-endian byte order and stores them
92  * in a properly aligned word in host byte order.
93  *
94  * The check for little-endian architectures that tolerate unaligned
95  * memory accesses is just an optimization.  Nothing will break if it
96  * doesn't work.
97  */
98 #if defined(HTS_LITTLE_ENDIAN) && HTS_ALLOW_UNALIGNED != 0
99 #define SET(n) \
100 	(*(hts_md5_u32plus *)&ptr[(n) * 4])
101 #define GET(n) \
102 	SET(n)
103 #else
104 #define SET(n) \
105 	(ctx->block[(n)] = \
106 	(hts_md5_u32plus)ptr[(n) * 4] | \
107 	((hts_md5_u32plus)ptr[(n) * 4 + 1] << 8) | \
108 	((hts_md5_u32plus)ptr[(n) * 4 + 2] << 16) | \
109 	((hts_md5_u32plus)ptr[(n) * 4 + 3] << 24))
110 #define GET(n) \
111 	(ctx->block[(n)])
112 #endif
113 
114 /*
115  * This processes one or more 64-byte data blocks, but does NOT update
116  * the bit counters.  There are no alignment requirements.
117  */
body(hts_md5_context * ctx,const void * data,unsigned long size)118 static const void *body(hts_md5_context *ctx, const void *data, unsigned long size)
119 {
120 	const unsigned char *ptr;
121 	hts_md5_u32plus a, b, c, d;
122 	hts_md5_u32plus saved_a, saved_b, saved_c, saved_d;
123 
124 	ptr = (const unsigned char *)data;
125 
126 	a = ctx->a;
127 	b = ctx->b;
128 	c = ctx->c;
129 	d = ctx->d;
130 
131 	do {
132 		saved_a = a;
133 		saved_b = b;
134 		saved_c = c;
135 		saved_d = d;
136 
137 /* Round 1 */
138 		STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
139 		STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
140 		STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
141 		STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
142 		STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
143 		STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
144 		STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
145 		STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
146 		STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
147 		STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
148 		STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
149 		STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
150 		STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
151 		STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
152 		STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
153 		STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
154 
155 /* Round 2 */
156 		STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
157 		STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
158 		STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
159 		STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
160 		STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
161 		STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
162 		STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
163 		STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
164 		STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
165 		STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
166 		STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
167 		STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
168 		STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
169 		STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
170 		STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
171 		STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
172 
173 /* Round 3 */
174 		STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
175 		STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
176 		STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
177 		STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
178 		STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
179 		STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
180 		STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
181 		STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
182 		STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
183 		STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
184 		STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
185 		STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
186 		STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
187 		STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
188 		STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
189 		STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
190 
191 /* Round 4 */
192 		STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
193 		STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
194 		STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
195 		STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
196 		STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
197 		STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
198 		STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
199 		STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
200 		STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
201 		STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
202 		STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
203 		STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
204 		STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
205 		STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
206 		STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
207 		STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
208 
209 		a += saved_a;
210 		b += saved_b;
211 		c += saved_c;
212 		d += saved_d;
213 
214 		ptr += 64;
215 	} while (size -= 64);
216 
217 	ctx->a = a;
218 	ctx->b = b;
219 	ctx->c = c;
220 	ctx->d = d;
221 
222 	return ptr;
223 }
224 
hts_md5_reset(hts_md5_context * ctx)225 void hts_md5_reset(hts_md5_context *ctx)
226 {
227 	ctx->a = 0x67452301;
228 	ctx->b = 0xefcdab89;
229 	ctx->c = 0x98badcfe;
230 	ctx->d = 0x10325476;
231 
232 	ctx->lo = 0;
233 	ctx->hi = 0;
234 }
235 
hts_md5_update(hts_md5_context * ctx,const void * data,unsigned long size)236 void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size)
237 {
238 	hts_md5_u32plus saved_lo;
239 	unsigned long used, available;
240 
241 	saved_lo = ctx->lo;
242 	if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
243 		ctx->hi++;
244 	ctx->hi += size >> 29;
245 
246 	used = saved_lo & 0x3f;
247 
248 	if (used) {
249 		available = 64 - used;
250 
251 		if (size < available) {
252 			memcpy(&ctx->buffer[used], data, size);
253 			return;
254 		}
255 
256 		memcpy(&ctx->buffer[used], data, available);
257 		data = (const unsigned char *)data + available;
258 		size -= available;
259 		body(ctx, ctx->buffer, 64);
260 	}
261 
262 	if (size >= 64) {
263 		data = body(ctx, data, size & ~(unsigned long)0x3f);
264 		size &= 0x3f;
265 	}
266 
267 	memcpy(ctx->buffer, data, size);
268 }
269 
hts_md5_final(unsigned char * result,hts_md5_context * ctx)270 void hts_md5_final(unsigned char *result, hts_md5_context *ctx)
271 {
272 	unsigned long used, available;
273 
274 	used = ctx->lo & 0x3f;
275 
276 	ctx->buffer[used++] = 0x80;
277 
278 	available = 64 - used;
279 
280 	if (available < 8) {
281 		memset(&ctx->buffer[used], 0, available);
282 		body(ctx, ctx->buffer, 64);
283 		used = 0;
284 		available = 64;
285 	}
286 
287 	memset(&ctx->buffer[used], 0, available - 8);
288 
289 	ctx->lo <<= 3;
290 	ctx->buffer[56] = ctx->lo;
291 	ctx->buffer[57] = ctx->lo >> 8;
292 	ctx->buffer[58] = ctx->lo >> 16;
293 	ctx->buffer[59] = ctx->lo >> 24;
294 	ctx->buffer[60] = ctx->hi;
295 	ctx->buffer[61] = ctx->hi >> 8;
296 	ctx->buffer[62] = ctx->hi >> 16;
297 	ctx->buffer[63] = ctx->hi >> 24;
298 
299 	body(ctx, ctx->buffer, 64);
300 
301 	result[0] = ctx->a;
302 	result[1] = ctx->a >> 8;
303 	result[2] = ctx->a >> 16;
304 	result[3] = ctx->a >> 24;
305 	result[4] = ctx->b;
306 	result[5] = ctx->b >> 8;
307 	result[6] = ctx->b >> 16;
308 	result[7] = ctx->b >> 24;
309 	result[8] = ctx->c;
310 	result[9] = ctx->c >> 8;
311 	result[10] = ctx->c >> 16;
312 	result[11] = ctx->c >> 24;
313 	result[12] = ctx->d;
314 	result[13] = ctx->d >> 8;
315 	result[14] = ctx->d >> 16;
316 	result[15] = ctx->d >> 24;
317 
318 	memset(ctx, 0, sizeof(*ctx));
319 }
320 
321 
hts_md5_init(void)322 hts_md5_context *hts_md5_init(void)
323 {
324     hts_md5_context *ctx = malloc(sizeof(*ctx));
325     if (!ctx)
326         return NULL;
327 
328     hts_md5_reset(ctx);
329     return ctx;
330 }
331 
332 #else
333 
334 #include <openssl/md5.h>
335 #include <assert.h>
336 
337 /*
338  * Wrappers around the OpenSSL libcrypto.so MD5 implementation.
339  *
340  * These are here to ensure they end up in the symbol table of the
341  * library regardless of the static inline in the headers.
342  */
hts_md5_init(void)343 hts_md5_context *hts_md5_init(void)
344 {
345     MD5_CTX *ctx = malloc(sizeof(*ctx));
346     if (!ctx)
347         return NULL;
348 
349     MD5_Init(ctx);
350 
351     return (hts_md5_context *)ctx;
352 }
353 
hts_md5_reset(hts_md5_context * ctx)354 void hts_md5_reset(hts_md5_context *ctx)
355 {
356     MD5_Init((MD5_CTX *)ctx);
357 }
358 
hts_md5_update(hts_md5_context * ctx,const void * data,unsigned long size)359 void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size)
360 {
361     MD5_Update((MD5_CTX *)ctx, data, size);
362 }
363 
hts_md5_final(unsigned char * result,hts_md5_context * ctx)364 void hts_md5_final(unsigned char *result, hts_md5_context *ctx)
365 {
366     MD5_Final(result, (MD5_CTX *)ctx);
367 }
368 
369 #endif
370 
hts_md5_destroy(hts_md5_context * ctx)371 void hts_md5_destroy(hts_md5_context *ctx)
372 {
373     if (!ctx)
374         return;
375 
376     free(ctx);
377 }
378 
hts_md5_hex(char * hex,const unsigned char * digest)379 void hts_md5_hex(char *hex, const unsigned char *digest)
380 {
381     int i;
382     for (i = 0; i < 16; i++) {
383         hex[i*2+0] = "0123456789abcdef"[(digest[i]>>4)&0xf];
384         hex[i*2+1] = "0123456789abcdef"[digest[i]&0xf];
385     }
386     hex[32] = 0;
387 }
388