1 /*  $Id: md5.cpp 564203 2018-05-23 12:13:23Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Aaron Ucko (C++ interface); original author unknown
27  *
28  * File Description:
29  *   CMD5 - class for computing Message Digest version 5 checksums.
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <util/md5.hpp>
35 #include <util/util_exception.hpp>
36 
37 
38 BEGIN_NCBI_SCOPE
39 
40 
41 #ifdef WORDS_BIGENDIAN
42 inline
s_ByteReverse(unsigned char * buf,size_t longs)43 static void s_ByteReverse(unsigned char* buf, size_t longs)
44 {
45     Uint4 t;
46     do {
47         t = (Uint4) ((unsigned) buf[3] << 8 | buf[2]) << 16 |
48             ((unsigned) buf[1] << 8 | buf[0]);
49         *(reinterpret_cast<Uint4*>(buf)) = t;
50         buf += 4;
51     } while (--longs);
52 }
53 #endif
54 
55 
56 // Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
57 // initialization constants.
CMD5(void)58 CMD5::CMD5(void)
59     : m_Bits(0), m_Finalized(false)
60 {
61     m_Buf[0] = 0x67452301;
62     m_Buf[1] = 0xefcdab89;
63     m_Buf[2] = 0x98badcfe;
64     m_Buf[3] = 0x10325476;
65 }
66 
67 
68 // Update state to reflect the concatenation of another buffer full of bytes.
Update(const char * buf,size_t length)69 void CMD5::Update(const char* buf, size_t length)
70 {
71     if ( m_Finalized ) {
72         NCBI_THROW(CUtilException, eWrongCommand,
73                    "attempt to update a finalized MD5 instance");
74     }
75 
76     // Number of leftover bytes in m_In
77     unsigned int tmp = (unsigned int)((m_Bits >> 3) % sizeof(m_In));
78 
79     // Update bit count
80     m_Bits += length << 3;
81 
82     // Handle any leading odd-sized chunks
83     if ( tmp ) {
84         unsigned char* p = m_In + tmp;
85 
86         tmp = kBlockSize - tmp;
87         if (length < tmp) {
88             memcpy(p, buf, length);
89             return;
90         }
91         memcpy(p, buf, tmp);
92 #ifdef WORDS_BIGENDIAN
93         s_ByteReverse(m_In, 16);
94 #endif
95         Transform();
96         buf    += tmp;
97         length -= tmp;
98     }
99 
100     // Process remaining data in kBlockSize-byte chunks
101     while (length >= kBlockSize) {
102         memcpy(m_In, buf, kBlockSize);
103 #ifdef WORDS_BIGENDIAN
104         s_ByteReverse(m_In, 16);
105 #endif
106         Transform();
107         buf    += kBlockSize;
108         length -= kBlockSize;
109     }
110 
111     // Handle any remaining bytes of data
112     memcpy(m_In, buf, length);
113 }
114 
115 
116 // Final wrapup - pad to kBlockSize-byte boundary with the bit pattern
117 // 1 0* (64-bit count of bits processed, MSB-first).
Finalize(unsigned char digest[16])118 void CMD5::Finalize(unsigned char digest[16])
119 {
120     if ( m_Finalized ) {
121         memcpy(digest, m_Buf, 16);
122         return;
123     }
124 
125     // Compute number of bytes mod kBlockSize
126     int count = (int)((m_Bits >> 3) % kBlockSize);
127 
128     // Set the first char of padding to 0x80.  This is safe since there is
129     // always at least one byte free.
130     unsigned char *p = m_In + count;
131     *p++ = 0x80;
132 
133     // Bytes of padding needed to make kBlockSize bytes
134     count = kBlockSize - 1 - count;
135 
136     // Pad out to 56 mod kBlockSize
137     if (count < 8) {
138         // Two lots of padding:  Pad the first block to kBlockSize bytes
139         memset(p, 0, count);
140 #ifdef WORDS_BIGENDIAN
141         s_ByteReverse(m_In, 16);
142 #endif
143         Transform();
144 
145         // Now fill the next block with 56 bytes
146         memset(m_In, 0, kBlockSize - 8);
147     } else {
148         // Pad block to 56 bytes
149         memset(p, 0, count - 8);
150 #ifdef WORDS_BIGENDIAN
151         s_ByteReverse(m_In, 14);
152 #endif
153     }
154 
155     // Append length in bits and transform
156 
157     Uint4 bits = static_cast<Uint4>(m_Bits);
158     memcpy(m_In + 14*sizeof(bits), &bits, sizeof(bits));
159     bits = static_cast<Uint4>(m_Bits >> 32);
160     memcpy(m_In + 15*sizeof(bits), &bits, sizeof(bits));
161 
162     Transform();
163 #ifdef WORDS_BIGENDIAN
164     s_ByteReverse(reinterpret_cast<unsigned char*>(m_Buf), 4);
165 #endif
166     memcpy(digest, m_Buf, 16);
167     memset(m_In, 0, kBlockSize); // may be sensitive
168     m_Finalized = true;
169 }
170 
171 
GetHexSum(unsigned char digest[16])172 string CMD5::GetHexSum(unsigned char digest[16])
173 {
174     CNcbiOstrstream oss;
175     oss << hex << setfill('0');
176     for (size_t i = 0; i < 16; ++i) {
177         oss << setw(2) << (int)digest[i];
178     }
179     return CNcbiOstrstreamToString(oss);
180 }
181 
182 
183 // The four core functions - F1 is optimized somewhat
184 
185 // #define F1(x, y, z) (x & y | ~x & z)
186 #define F1(x, y, z) (z ^ (x & (y ^ z)))
187 #define F2(x, y, z) ((z & x) | (~z & y))
188 #define F3(x, y, z) (x ^ (y ^ z))
189 #define F4(x, y, z) (y ^ (x | ~z))
190 
191 // This is the central step in the MD5 algorithm.
192 #define MD5STEP(f, w, x, y, z, data, s) \
193         ( w += f(x, y, z) + data,  w = w<<s | w>>(32-s),  w += x )
194 
195 // The core of the MD5 algorithm, this alters an existing MD5 hash to
196 // reflect the addition of 16 longwords of new data.  MD5Update blocks
197 // the data and converts bytes into longwords for this routine.
Transform(void)198 void CMD5::Transform(void)
199 {
200     Uint4  a, b, c, d;
201     Uint4* inw = reinterpret_cast<Uint4*>(m_In);
202 
203     a = m_Buf[0];
204     b = m_Buf[1];
205     c = m_Buf[2];
206     d = m_Buf[3];
207 
208     MD5STEP(F1, a, b, c, d, inw[0]  + 0xd76aa478,  7);
209     MD5STEP(F1, d, a, b, c, inw[1]  + 0xe8c7b756, 12);
210     MD5STEP(F1, c, d, a, b, inw[2]  + 0x242070db, 17);
211     MD5STEP(F1, b, c, d, a, inw[3]  + 0xc1bdceee, 22);
212     MD5STEP(F1, a, b, c, d, inw[4]  + 0xf57c0faf,  7);
213     MD5STEP(F1, d, a, b, c, inw[5]  + 0x4787c62a, 12);
214     MD5STEP(F1, c, d, a, b, inw[6]  + 0xa8304613, 17);
215     MD5STEP(F1, b, c, d, a, inw[7]  + 0xfd469501, 22);
216     MD5STEP(F1, a, b, c, d, inw[8]  + 0x698098d8,  7);
217     MD5STEP(F1, d, a, b, c, inw[9]  + 0x8b44f7af, 12);
218     MD5STEP(F1, c, d, a, b, inw[10] + 0xffff5bb1, 17);
219     MD5STEP(F1, b, c, d, a, inw[11] + 0x895cd7be, 22);
220     MD5STEP(F1, a, b, c, d, inw[12] + 0x6b901122,  7);
221     MD5STEP(F1, d, a, b, c, inw[13] + 0xfd987193, 12);
222     MD5STEP(F1, c, d, a, b, inw[14] + 0xa679438e, 17);
223     MD5STEP(F1, b, c, d, a, inw[15] + 0x49b40821, 22);
224 
225     MD5STEP(F2, a, b, c, d, inw[1]  + 0xf61e2562,  5);
226     MD5STEP(F2, d, a, b, c, inw[6]  + 0xc040b340,  9);
227     MD5STEP(F2, c, d, a, b, inw[11] + 0x265e5a51, 14);
228     MD5STEP(F2, b, c, d, a, inw[0]  + 0xe9b6c7aa, 20);
229     MD5STEP(F2, a, b, c, d, inw[5]  + 0xd62f105d,  5);
230     MD5STEP(F2, d, a, b, c, inw[10] + 0x02441453,  9);
231     MD5STEP(F2, c, d, a, b, inw[15] + 0xd8a1e681, 14);
232     MD5STEP(F2, b, c, d, a, inw[4]  + 0xe7d3fbc8, 20);
233     MD5STEP(F2, a, b, c, d, inw[9]  + 0x21e1cde6,  5);
234     MD5STEP(F2, d, a, b, c, inw[14] + 0xc33707d6,  9);
235     MD5STEP(F2, c, d, a, b, inw[3]  + 0xf4d50d87, 14);
236     MD5STEP(F2, b, c, d, a, inw[8]  + 0x455a14ed, 20);
237     MD5STEP(F2, a, b, c, d, inw[13] + 0xa9e3e905,  5);
238     MD5STEP(F2, d, a, b, c, inw[2]  + 0xfcefa3f8,  9);
239     MD5STEP(F2, c, d, a, b, inw[7]  + 0x676f02d9, 14);
240     MD5STEP(F2, b, c, d, a, inw[12] + 0x8d2a4c8a, 20);
241 
242     MD5STEP(F3, a, b, c, d, inw[5]  + 0xfffa3942,  4);
243     MD5STEP(F3, d, a, b, c, inw[8]  + 0x8771f681, 11);
244     MD5STEP(F3, c, d, a, b, inw[11] + 0x6d9d6122, 16);
245     MD5STEP(F3, b, c, d, a, inw[14] + 0xfde5380c, 23);
246     MD5STEP(F3, a, b, c, d, inw[1]  + 0xa4beea44,  4);
247     MD5STEP(F3, d, a, b, c, inw[4]  + 0x4bdecfa9, 11);
248     MD5STEP(F3, c, d, a, b, inw[7]  + 0xf6bb4b60, 16);
249     MD5STEP(F3, b, c, d, a, inw[10] + 0xbebfbc70, 23);
250     MD5STEP(F3, a, b, c, d, inw[13] + 0x289b7ec6,  4);
251     MD5STEP(F3, d, a, b, c, inw[0]  + 0xeaa127fa, 11);
252     MD5STEP(F3, c, d, a, b, inw[3]  + 0xd4ef3085, 16);
253     MD5STEP(F3, b, c, d, a, inw[6]  + 0x04881d05, 23);
254     MD5STEP(F3, a, b, c, d, inw[9]  + 0xd9d4d039,  4);
255     MD5STEP(F3, d, a, b, c, inw[12] + 0xe6db99e5, 11);
256     MD5STEP(F3, c, d, a, b, inw[15] + 0x1fa27cf8, 16);
257     MD5STEP(F3, b, c, d, a, inw[2]  + 0xc4ac5665, 23);
258 
259     MD5STEP(F4, a, b, c, d, inw[0]  + 0xf4292244,  6);
260     MD5STEP(F4, d, a, b, c, inw[7]  + 0x432aff97, 10);
261     MD5STEP(F4, c, d, a, b, inw[14] + 0xab9423a7, 15);
262     MD5STEP(F4, b, c, d, a, inw[5]  + 0xfc93a039, 21);
263     MD5STEP(F4, a, b, c, d, inw[12] + 0x655b59c3,  6);
264     MD5STEP(F4, d, a, b, c, inw[3]  + 0x8f0ccc92, 10);
265     MD5STEP(F4, c, d, a, b, inw[10] + 0xffeff47d, 15);
266     MD5STEP(F4, b, c, d, a, inw[1]  + 0x85845dd1, 21);
267     MD5STEP(F4, a, b, c, d, inw[8]  + 0x6fa87e4f,  6);
268     MD5STEP(F4, d, a, b, c, inw[15] + 0xfe2ce6e0, 10);
269     MD5STEP(F4, c, d, a, b, inw[6]  + 0xa3014314, 15);
270     MD5STEP(F4, b, c, d, a, inw[13] + 0x4e0811a1, 21);
271     MD5STEP(F4, a, b, c, d, inw[4]  + 0xf7537e82,  6);
272     MD5STEP(F4, d, a, b, c, inw[11] + 0xbd3af235, 10);
273     MD5STEP(F4, c, d, a, b, inw[2]  + 0x2ad7d2bb, 15);
274     MD5STEP(F4, b, c, d, a, inw[9]  + 0xeb86d391, 21);
275 
276     m_Buf[0] += a;
277     m_Buf[1] += b;
278     m_Buf[2] += c;
279     m_Buf[3] += d;
280 }
281 
282 
283 END_NCBI_SCOPE
284