1 /*
2  * This code is derived from (original license follows):
3  *
4  * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
5  * MD5 Message-Digest Algorithm (RFC 1321).
6  *
7  * Homepage:
8  * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
9  *
10  * Author:
11  * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
12  *
13  * This software was written by Alexander Peslyak in 2001.  No copyright is
14  * claimed, and the software is hereby placed in the public domain.
15  * In case this attempt to disclaim copyright and place the software in the
16  * public domain is deemed null and void, then the software is
17  * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
18  * general public under the following terms:
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted.
22  *
23  * There's ABSOLUTELY NO WARRANTY, express or implied.
24  *
25  * (This is a heavily cut-down "BSD license".)
26  *
27  * This differs from Colin Plumb's older public domain implementation in that
28  * no exactly 32-bit integer data type is required (any 32-bit or wider
29  * unsigned integer data type will do), there's no compile-time endianness
30  * configuration, and the function prototypes match OpenSSL's.  No code from
31  * Colin Plumb's implementation has been reused; this comment merely compares
32  * the properties of the two independent implementations.
33  *
34  * The primary goals of this implementation are portability and ease of use.
35  * It is meant to be fast, but not as fast as possible.  Some known
36  * optimizations are not included to reduce source code size and avoid
37  * compile-time configuration.
38  */
39 
40 #include "llvm/Support/MD5.h"
41 #include "llvm/ADT/ArrayRef.h"
42 #include "llvm/ADT/StringRef.h"
43 #include "llvm/Support/Endian.h"
44 #include "llvm/Support/Format.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include <array>
47 #include <cstdint>
48 #include <cstring>
49 
50 // The basic MD5 functions.
51 
52 // F and G are optimized compared to their RFC 1321 definitions for
53 // architectures that lack an AND-NOT instruction, just like in Colin Plumb's
54 // implementation.
55 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
56 #define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
57 #define H(x, y, z) ((x) ^ (y) ^ (z))
58 #define I(x, y, z) ((y) ^ ((x) | ~(z)))
59 
60 // The MD5 transformation for all four rounds.
61 #define STEP(f, a, b, c, d, x, t, s)                                           \
62   (a) += f((b), (c), (d)) + (x) + (t);                                         \
63   (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s))));                   \
64   (a) += (b);
65 
66 // SET reads 4 input bytes in little-endian byte order and stores them
67 // in a properly aligned word in host byte order.
68 #define SET(n)                                                                 \
69   (block[(n)] =                                                                \
70        (MD5_u32plus) ptr[(n) * 4] | ((MD5_u32plus) ptr[(n) * 4 + 1] << 8) |    \
71        ((MD5_u32plus) ptr[(n) * 4 + 2] << 16) |                                \
72        ((MD5_u32plus) ptr[(n) * 4 + 3] << 24))
73 #define GET(n) (block[(n)])
74 
75 using namespace llvm;
76 
77 /// This processes one or more 64-byte data blocks, but does NOT update
78 ///the bit counters.  There are no alignment requirements.
79 const uint8_t *MD5::body(ArrayRef<uint8_t> Data) {
80   const uint8_t *ptr;
81   MD5_u32plus a, b, c, d;
82   MD5_u32plus saved_a, saved_b, saved_c, saved_d;
83   unsigned long Size = Data.size();
84 
85   ptr = Data.data();
86 
87   a = this->a;
88   b = this->b;
89   c = this->c;
90   d = this->d;
91 
92   do {
93     saved_a = a;
94     saved_b = b;
95     saved_c = c;
96     saved_d = d;
97 
98     // Round 1
99     STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
100     STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
101     STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
102     STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
103     STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
104     STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
105     STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
106     STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
107     STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
108     STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
109     STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
110     STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
111     STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
112     STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
113     STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
114     STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
115 
116     // Round 2
117     STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
118     STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
119     STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
120     STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
121     STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
122     STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
123     STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
124     STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
125     STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
126     STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
127     STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
128     STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
129     STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
130     STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
131     STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
132     STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
133 
134     // Round 3
135     STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
136     STEP(H, d, a, b, c, GET(8), 0x8771f681, 11)
137     STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
138     STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23)
139     STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
140     STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11)
141     STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
142     STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23)
143     STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
144     STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11)
145     STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
146     STEP(H, b, c, d, a, GET(6), 0x04881d05, 23)
147     STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
148     STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11)
149     STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
150     STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23)
151 
152     // Round 4
153     STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
154     STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
155     STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
156     STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
157     STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
158     STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
159     STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
160     STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
161     STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
162     STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
163     STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
164     STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
165     STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
166     STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
167     STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
168     STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
169 
170     a += saved_a;
171     b += saved_b;
172     c += saved_c;
173     d += saved_d;
174 
175     ptr += 64;
176   } while (Size -= 64);
177 
178   this->a = a;
179   this->b = b;
180   this->c = c;
181   this->d = d;
182 
183   return ptr;
184 }
185 
186 MD5::MD5() = default;
187 
188 /// Incrementally add the bytes in \p Data to the hash.
189 void MD5::update(ArrayRef<uint8_t> Data) {
190   MD5_u32plus saved_lo;
191   unsigned long used, free;
192   const uint8_t *Ptr = Data.data();
193   unsigned long Size = Data.size();
194 
195   saved_lo = lo;
196   if ((lo = (saved_lo + Size) & 0x1fffffff) < saved_lo)
197     hi++;
198   hi += Size >> 29;
199 
200   used = saved_lo & 0x3f;
201 
202   if (used) {
203     free = 64 - used;
204 
205     if (Size < free) {
206       memcpy(&buffer[used], Ptr, Size);
207       return;
208     }
209 
210     memcpy(&buffer[used], Ptr, free);
211     Ptr = Ptr + free;
212     Size -= free;
213     body(makeArrayRef(buffer, 64));
214   }
215 
216   if (Size >= 64) {
217     Ptr = body(makeArrayRef(Ptr, Size & ~(unsigned long) 0x3f));
218     Size &= 0x3f;
219   }
220 
221   memcpy(buffer, Ptr, Size);
222 }
223 
224 /// Add the bytes in the StringRef \p Str to the hash.
225 // Note that this isn't a string and so this won't include any trailing NULL
226 // bytes.
227 void MD5::update(StringRef Str) {
228   ArrayRef<uint8_t> SVal((const uint8_t *)Str.data(), Str.size());
229   update(SVal);
230 }
231 
232 /// Finish the hash and place the resulting hash into \p result.
233 /// \param Result is assumed to be a minimum of 16-bytes in size.
234 void MD5::final(MD5Result &Result) {
235   unsigned long used, free;
236 
237   used = lo & 0x3f;
238 
239   buffer[used++] = 0x80;
240 
241   free = 64 - used;
242 
243   if (free < 8) {
244     memset(&buffer[used], 0, free);
245     body(makeArrayRef(buffer, 64));
246     used = 0;
247     free = 64;
248   }
249 
250   memset(&buffer[used], 0, free - 8);
251 
252   lo <<= 3;
253   support::endian::write32le(&buffer[56], lo);
254   support::endian::write32le(&buffer[60], hi);
255 
256   body(makeArrayRef(buffer, 64));
257 
258   support::endian::write32le(&Result[0], a);
259   support::endian::write32le(&Result[4], b);
260   support::endian::write32le(&Result[8], c);
261   support::endian::write32le(&Result[12], d);
262 }
263 
264 SmallString<32> MD5::MD5Result::digest() const {
265   SmallString<32> Str;
266   raw_svector_ostream Res(Str);
267   for (int i = 0; i < 16; ++i)
268     Res << format("%.2x", Bytes[i]);
269   return Str;
270 }
271 
272 void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
273   Str = Result.digest();
274 }
275 
276 std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) {
277   MD5 Hash;
278   Hash.update(Data);
279   MD5::MD5Result Res;
280   Hash.final(Res);
281 
282   return Res;
283 }
284