1 /**
2  * Computes MD5 hashes of arbitrary data. MD5 hashes are 16 byte quantities that are like a
3  * checksum or CRC, but are more robust.
4  *
5 $(SCRIPT inhibitQuickIndex = 1;)
6 
7 $(DIVC quickindex,
8 $(BOOKTABLE ,
9 $(TR $(TH Category) $(TH Functions)
10 )
11 $(TR $(TDNW Template API) $(TD $(MYREF MD5)
12 )
13 )
14 $(TR $(TDNW OOP API) $(TD $(MYREF MD5Digest))
15 )
16 $(TR $(TDNW Helpers) $(TD $(MYREF md5Of))
17 )
18 )
19 )
20 
21  * This module conforms to the APIs defined in $(D std.digest). To understand the
22  * differences between the template and the OOP API, see $(MREF std, digest).
23  *
24  * This module publicly imports $(MREF std, digest) and can be used as a stand-alone
25  * module.
26  *
27  * License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
28  *
29  * CTFE:
30  * Digests do not work in CTFE
31  *
32  * Authors:
33  * Piotr Szturmaj, Kai Nacke, Johannes Pfau $(BR)
34  * The routines and algorithms are derived from the $(I RSA Data Security, Inc. MD5 Message-Digest Algorithm).
35  *
36  * References:
37  *      $(LINK2 http://en.wikipedia.org/wiki/Md5, Wikipedia on MD5)
38  *
39  * Source: $(PHOBOSSRC std/digest/_md.d)
40  *
41  */
42 
43 /* md5.d - RSA Data Security, Inc., MD5 message-digest algorithm
44  * Derived from the RSA Data Security, Inc. MD5 Message-Digest Algorithm.
45  */
46 module std.digest.md;
47 
48 public import std.digest;
49 
50 ///
51 @safe unittest
52 {
53     //Template API
54     import std.digest.md;
55 
56     //Feeding data
57     ubyte[1024] data;
58     MD5 md5;
59     md5.start();
60     md5.put(data[]);
61     md5.start(); //Start again
62     md5.put(data[]);
63     auto hash = md5.finish();
64 }
65 
66 ///
67 @safe unittest
68 {
69     //OOP API
70     import std.digest.md;
71 
72     auto md5 = new MD5Digest();
73     ubyte[] hash = md5.digest("abc");
74     assert(toHexString(hash) == "900150983CD24FB0D6963F7D28E17F72");
75 
76     //Feeding data
77     ubyte[1024] data;
78     md5.put(data[]);
79     md5.reset(); //Start again
80     md5.put(data[]);
81     hash = md5.finish();
82 }
83 
84 //rotateLeft rotates x left n bits
rotateLeft(uint x,uint n)85 private uint rotateLeft(uint x, uint n) @safe pure nothrow @nogc
86 {
87     // With recently added optimization to DMD (commit 32ea0206 at 07/28/11), this is translated to rol.
88     // No assembler required.
89     return (x << n) | (x >> (32-n));
90 }
91 
92 /**
93  * Template API MD5 implementation.
94  * See $(D std.digest) for differences between template and OOP API.
95  */
96 struct MD5
97 {
98     private:
99         // magic initialization constants
100         uint[4] _state = [0x67452301,0xefcdab89,0x98badcfe,0x10325476]; // state (ABCD)
101         ulong _count; //number of bits, modulo 2^64
102         ubyte[64] _buffer; // input buffer
103 
104         static immutable ubyte[64] _padding =
105         [
106           0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
107           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
109         ];
110 
111         // F, G, H and I are basic MD5 functions
112         static @safe pure nothrow @nogc
113         {
FMD5114             uint F(uint x, uint y, uint z) { return (x & y) | (~x & z); }
GMD5115             uint G(uint x, uint y, uint z) { return (x & z) | (y & ~z); }
HMD5116             uint H(uint x, uint y, uint z) { return x ^ y ^ z; }
IMD5117             uint I(uint x, uint y, uint z) { return y ^ (x | ~z); }
118         }
119 
120 
121         /*
122          * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
123          * Rotation is separate from addition to prevent recomputation.
124          */
FFMD5125         static void FF(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
126             @safe pure nothrow @nogc
127         {
128             a += F (b, c, d) + x + ac;
129             a = rotateLeft(a, s);
130             a += b;
131         }
132 
GGMD5133         static void GG(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
134             @safe pure nothrow @nogc
135         {
136             a += G (b, c, d) + x + ac;
137             a = rotateLeft(a, s);
138             a += b;
139         }
140 
HHMD5141         static void HH(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
142             @safe pure nothrow @nogc
143         {
144             a += H (b, c, d) + x + ac;
145             a = rotateLeft(a, s);
146             a += b;
147         }
148 
IIMD5149         static void II(ref uint a, uint b, uint c, uint d, uint x, uint s, uint ac)
150             @safe pure nothrow @nogc
151         {
152             a += I (b, c, d) + x + ac;
153             a = rotateLeft(a, s);
154             a += b;
155         }
156 
157         /*
158          * MD5 basic transformation. Transforms state based on block.
159          */
160 
161         //Constants for MD5Transform routine.
162         enum
163         {
164             S11 = 7,
165             S12 = 12,
166             S13 = 17,
167             S14 = 22,
168             S21 = 5,
169             S22 = 9,
170             S23 = 14,
171             S24 = 20,
172             S31 = 4,
173             S32 = 11,
174             S33 = 16,
175             S34 = 23,
176             S41 = 6,
177             S42 = 10,
178             S43 = 15,
179             S44 = 21,
180         }
181 
transformMD5182         private void transform(const(ubyte[64])* block) pure nothrow @nogc
183         {
184             uint a = _state[0],
185                  b = _state[1],
186                  c = _state[2],
187                  d = _state[3];
188 
189             uint[16] x = void;
190 
191             version (BigEndian)
192             {
193                 import std.bitmanip : littleEndianToNative;
194 
195                 for (size_t i = 0; i < 16; i++)
196                 {
197                     x[i] = littleEndianToNative!uint(*cast(ubyte[4]*)&(*block)[i*4]);
198                 }
199             }
200             else
201             {
202                 (cast(ubyte*) x.ptr)[0 .. 64] = (cast(ubyte*) block)[0 .. 64];
203             }
204 
205             //Round 1
206             FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
207             FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
208             FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
209             FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
210             FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
211             FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
212             FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
213             FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
214             FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
215             FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
216             FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
217             FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
218             FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
219             FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
220             FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
221             FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
222 
223             //Round 2
224             GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
225             GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
226             GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
227             GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
228             GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
229             GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
230             GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
231             GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
232             GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
233             GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
234             GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
235             GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
236             GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
237             GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
238             GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
239             GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
240 
241             //Round 3
242             HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
243             HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
244             HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
245             HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
246             HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
247             HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
248             HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
249             HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
250             HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
251             HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
252             HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
253             HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
254             HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
255             HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
256             HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
257             HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
258 
259             //Round 4
260             II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
261             II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
262             II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
263             II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
264             II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
265             II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
266             II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
267             II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
268             II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
269             II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
270             II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
271             II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
272             II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
273             II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
274             II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
275             II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
276 
277             _state[0] += a;
278             _state[1] += b;
279             _state[2] += c;
280             _state[3] += d;
281 
282             //Zeroize sensitive information.
283             x[] = 0;
284         }
285 
286     public:
287         enum blockSize = 512;
288 
289         /**
290          * Use this to feed the digest with data.
291          * Also implements the $(REF isOutputRange, std,range,primitives)
292          * interface for $(D ubyte) and $(D const(ubyte)[]).
293          *
294          * Example:
295          * ----
296          * MD5 dig;
297          * dig.put(cast(ubyte) 0); //single ubyte
298          * dig.put(cast(ubyte) 0, cast(ubyte) 0); //variadic
299          * ubyte[10] buf;
300          * dig.put(buf); //buffer
301          * ----
302          */
putMD5303         void put(scope const(ubyte)[] data...) @trusted pure nothrow @nogc
304         {
305             uint i, index, partLen;
306             auto inputLen = data.length;
307 
308             //Compute number of bytes mod 64
309             index = (cast(uint)_count >> 3) & (64 - 1);
310 
311             //Update number of bits
312             _count += inputLen * 8;
313 
314             partLen = 64 - index;
315 
316             //Transform as many times as possible
317             if (inputLen >= partLen)
318             {
319                 (&_buffer[index])[0 .. partLen] = data.ptr[0 .. partLen];
320                 transform(&_buffer);
321 
322                 for (i = partLen; i + 63 < inputLen; i += 64)
323                 {
324                     transform(cast(const(ubyte[64])*)(data[i .. i + 64].ptr));
325                 }
326 
327                 index = 0;
328             }
329             else
330             {
331                 i = 0;
332             }
333 
334             /* Buffer remaining input */
335             if (inputLen - i)
336                 (&_buffer[index])[0 .. inputLen-i] = (&data[i])[0 .. inputLen-i];
337         }
338 
339         /**
340          * Used to (re)initialize the MD5 digest.
341          *
342          * Note:
343          * For this MD5 Digest implementation calling start after default construction
344          * is not necessary. Calling start is only necessary to reset the Digest.
345          *
346          * Generic code which deals with different Digest types should always call start though.
347          *
348          * Example:
349          * --------
350          * MD5 digest;
351          * //digest.start(); //Not necessary
352          * digest.put(0);
353          * --------
354          */
startMD5355         void start() @safe pure nothrow @nogc
356         {
357             this = MD5.init;
358         }
359 
360         /**
361          * Returns the finished MD5 hash. This also calls $(LREF start) to
362          * reset the internal state.
363           */
finishMD5364         ubyte[16] finish() @trusted pure nothrow @nogc
365         {
366             import std.bitmanip : nativeToLittleEndian;
367 
368             ubyte[16] data = void;
369             ubyte[8] bits = void;
370             uint index, padLen;
371 
372             //Save number of bits
373             bits[0 .. 8] = nativeToLittleEndian(_count)[];
374 
375             //Pad out to 56 mod 64
376             index = (cast(uint)_count >> 3) & (64 - 1);
377             padLen = (index < 56) ? (56 - index) : (120 - index);
378             put(_padding[0 .. padLen]);
379 
380             //Append length (before padding)
381             put(bits);
382 
383             //Store state in digest
384             data[0 .. 4]   = nativeToLittleEndian(_state[0])[];
385             data[4 .. 8]   = nativeToLittleEndian(_state[1])[];
386             data[8 .. 12]  = nativeToLittleEndian(_state[2])[];
387             data[12 .. 16] = nativeToLittleEndian(_state[3])[];
388 
389             /* Zeroize sensitive information. */
390             start();
391             return data;
392         }
393         ///
394         @safe unittest
395         {
396             //Simple example
397             MD5 hash;
398             hash.start();
399             hash.put(cast(ubyte) 0);
400             ubyte[16] result = hash.finish();
401         }
402 }
403 
404 ///
405 @safe unittest
406 {
407     //Simple example, hashing a string using md5Of helper function
408     ubyte[16] hash = md5Of("abc");
409     //Let's get a hash string
410     assert(toHexString(hash) == "900150983CD24FB0D6963F7D28E17F72");
411 }
412 
413 ///
414 @safe unittest
415 {
416     //Using the basic API
417     MD5 hash;
418     hash.start();
419     ubyte[1024] data;
420     //Initialize data here...
421     hash.put(data);
422     ubyte[16] result = hash.finish();
423 }
424 
425 ///
426 @safe unittest
427 {
428     //Let's use the template features:
429     void doSomething(T)(ref T hash)
430     if (isDigest!T)
431     {
432         hash.put(cast(ubyte) 0);
433     }
434     MD5 md5;
435     md5.start();
436     doSomething(md5);
437     assert(toHexString(md5.finish()) == "93B885ADFE0DA089CDF634904FD59F71");
438 }
439 
440 @safe unittest
441 {
442     assert(isDigest!MD5);
443 }
444 
445 @system unittest
446 {
447     import std.range;
448 
449     ubyte[16] digest;
450 
451     MD5 md5;
452     md5.put(cast(ubyte[])"abcdef");
453     md5.start();
454     md5.put(cast(ubyte[])"");
455     assert(md5.finish() == cast(ubyte[]) x"d41d8cd98f00b204e9800998ecf8427e");
456 
457     digest = md5Of("");
458     assert(digest == cast(ubyte[]) x"d41d8cd98f00b204e9800998ecf8427e");
459 
460     digest = md5Of("a");
461     assert(digest == cast(ubyte[]) x"0cc175b9c0f1b6a831c399e269772661");
462 
463     digest = md5Of("abc");
464     assert(digest == cast(ubyte[]) x"900150983cd24fb0d6963f7d28e17f72");
465 
466     digest = md5Of("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq");
467     assert(digest == cast(ubyte[]) x"8215ef0796a20bcaaae116d3876c664a");
468 
469     digest = md5Of("message digest");
470     assert(digest == cast(ubyte[]) x"f96b697d7cb7938d525a2f31aaf161d0");
471 
472     digest = md5Of("abcdefghijklmnopqrstuvwxyz");
473     assert(digest == cast(ubyte[]) x"c3fcd3d76192e4007dfb496cca67e13b");
474 
475     digest = md5Of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789");
476     assert(digest == cast(ubyte[]) x"d174ab98d277d9f5a5611c2c9f419d9f");
477 
478     digest = md5Of("1234567890123456789012345678901234567890"~
479                     "1234567890123456789012345678901234567890");
480     assert(digest == cast(ubyte[]) x"57edf4a22be3c955ac49da2e2107b67a");
481 
482     assert(toHexString(cast(ubyte[16]) x"c3fcd3d76192e4007dfb496cca67e13b")
483         == "C3FCD3D76192E4007DFB496CCA67E13B");
484 
485     ubyte[] onemilliona = new ubyte[1000000];
486     onemilliona[] = 'a';
487     digest = md5Of(onemilliona);
488     assert(digest == cast(ubyte[]) x"7707D6AE4E027C70EEA2A935C2296F21");
489 
490     auto oneMillionRange = repeat!ubyte(cast(ubyte)'a', 1000000);
491     digest = md5Of(oneMillionRange);
492     assert(digest == cast(ubyte[]) x"7707D6AE4E027C70EEA2A935C2296F21");
493 }
494 
495 /**
496  * This is a convenience alias for $(REF digest, std,digest) using the
497  * MD5 implementation.
498  */
499 //simple alias doesn't work here, hope this gets inlined...
md5Of(T...)500 auto md5Of(T...)(T data)
501 {
502     return digest!(MD5, T)(data);
503 }
504 
505 ///
506 @safe unittest
507 {
508     ubyte[16] hash = md5Of("abc");
509     assert(hash == digest!MD5("abc"));
510 }
511 
512 /**
513  * OOP API MD5 implementation.
514  * See $(D std.digest) for differences between template and OOP API.
515  *
516  * This is an alias for $(D $(REF WrapperDigest, std,digest)!MD5), see
517  * there for more information.
518  */
519 alias MD5Digest = WrapperDigest!MD5;
520 
521 ///
522 @safe unittest
523 {
524     //Simple example, hashing a string using Digest.digest helper function
525     auto md5 = new MD5Digest();
526     ubyte[] hash = md5.digest("abc");
527     //Let's get a hash string
528     assert(toHexString(hash) == "900150983CD24FB0D6963F7D28E17F72");
529 }
530 
531 ///
532 @system unittest
533 {
534      //Let's use the OOP features:
test(Digest dig)535     void test(Digest dig)
536     {
537       dig.put(cast(ubyte) 0);
538     }
539     auto md5 = new MD5Digest();
540     test(md5);
541 
542     //Let's use a custom buffer:
543     ubyte[16] buf;
544     ubyte[] result = md5.finish(buf[]);
545     assert(toHexString(result) == "93B885ADFE0DA089CDF634904FD59F71");
546 }
547 
548 @system unittest
549 {
550     auto md5 = new MD5Digest();
551 
552     md5.put(cast(ubyte[])"abcdef");
553     md5.reset();
554     md5.put(cast(ubyte[])"");
555     assert(md5.finish() == cast(ubyte[]) x"d41d8cd98f00b204e9800998ecf8427e");
556 
557     md5.put(cast(ubyte[])"abcdefghijklmnopqrstuvwxyz");
558     ubyte[20] result;
559     auto result2 = md5.finish(result[]);
560     assert(result[0 .. 16] == result2 && result2 == cast(ubyte[]) x"c3fcd3d76192e4007dfb496cca67e13b");
561 
562     debug
563     {
564         import std.exception;
565         assertThrown!Error(md5.finish(result[0 .. 15]));
566     }
567 
568     assert(md5.length == 16);
569 
570     assert(md5.digest("") == cast(ubyte[]) x"d41d8cd98f00b204e9800998ecf8427e");
571 
572     assert(md5.digest("a") == cast(ubyte[]) x"0cc175b9c0f1b6a831c399e269772661");
573 
574     assert(md5.digest("abc") == cast(ubyte[]) x"900150983cd24fb0d6963f7d28e17f72");
575 
576     assert(md5.digest("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq")
577            == cast(ubyte[]) x"8215ef0796a20bcaaae116d3876c664a");
578 
579     assert(md5.digest("message digest") == cast(ubyte[]) x"f96b697d7cb7938d525a2f31aaf161d0");
580 
581     assert(md5.digest("abcdefghijklmnopqrstuvwxyz")
582            == cast(ubyte[]) x"c3fcd3d76192e4007dfb496cca67e13b");
583 
584     assert(md5.digest("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789")
585            == cast(ubyte[]) x"d174ab98d277d9f5a5611c2c9f419d9f");
586 
587     assert(md5.digest("1234567890123456789012345678901234567890",
588                                    "1234567890123456789012345678901234567890")
589            == cast(ubyte[]) x"57edf4a22be3c955ac49da2e2107b67a");
590 }
591