1 /*******************************************************************************
2   Copyright (c) 2012-2020, Intel Corporation
3 
4   Redistribution and use in source and binary forms, with or without
5   modification, are permitted provided that the following conditions are met:
6 
7       * Redistributions of source code must retain the above copyright notice,
8         this list of conditions and the following disclaimer.
9       * Redistributions in binary form must reproduce the above copyright
10         notice, this list of conditions and the following disclaimer in the
11         documentation and/or other materials provided with the distribution.
12       * Neither the name of Intel Corporation nor the names of its contributors
13         may be used to endorse or promote products derived from this software
14         without specific prior written permission.
15 
16   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *******************************************************************************/
27 
28 #include <stdio.h>
29 #include <stdint.h>
30 #include "intel-ipsec-mb.h"
31 #include "include/clear_regs_mem.h"
32 
33 #ifdef LINUX
34 #define ROTATE(a, n) (((a) << (n)) ^ ((a) >> (32 - (n))))
35 #else
36 #include <intrin.h>
37 #define ROTATE(a, n) _rotl(a, n)
38 #endif
39 
40 #define H0 0x67452301
41 #define H1 0xefcdab89
42 #define H2 0x98badcfe
43 #define H3 0x10325476
44 
45 #define	F1(b, c, d)	((((c) ^ (d)) & (b)) ^ (d))
46 #define	F2(b, c, d)	((((b) ^ (c)) & (d)) ^ (c))
47 #define	F3(b, c, d)	((b) ^ (c) ^ (d))
48 #define	F4(b, c, d)	(((~(d)) | (b)) ^ (c))
49 
50 #define STEP1(a, b, c, d, k, w, r) {            \
51                 a += w + k + F1(b, c, d);       \
52                 a = ROTATE(a, r);               \
53                 a += b;                         \
54         }
55 #define STEP2(a, b, c, d, k, w, r) {            \
56                 a += w + k + F2(b, c, d);       \
57                 a = ROTATE(a, r);               \
58                 a += b;                         \
59         }
60 #define STEP3(a, b, c, d, k, w, r) {            \
61                 a += w + k + F3(b, c, d);       \
62                 a = ROTATE(a, r);               \
63                 a += b;                         \
64         }
65 #define STEP4(a, b, c, d, k, w, r) {            \
66                 a += w + k + F4(b, c, d);       \
67                 a = ROTATE(a, r);               \
68                 a += b;                         \
69         }
70 
71 enum arch_type {
72         ARCH_SSE = 0,
73         ARCH_AVX,
74         ARCH_AVX2,
75         ARCH_AVX512,
76 };
77 
78 __forceinline
79 void
md5_one_block_common(const uint8_t * data,uint32_t digest[4],const enum arch_type arch)80 md5_one_block_common(const uint8_t *data, uint32_t digest[4],
81                      const enum arch_type arch)
82 {
83 #ifdef SAFE_PARAM
84         if (data == NULL || digest == NULL)
85                 return;
86 #endif
87         uint32_t a, b, c, d;
88         uint32_t w[16];
89         const uint32_t *data32 = (const uint32_t *)data;
90 
91         a = H0;
92         b = H1;
93         c = H2;
94         d = H3;
95 
96         w[0] = data32[0];
97         w[1] = data32[1];
98 
99         STEP1(a, b, c, d, 0xd76aa478, w[0], 7);
100         w[2] = data32[2];
101         STEP1(d, a, b, c, 0xe8c7b756, w[1], 12);
102         w[3] = data32[3];
103         STEP1(c, d, a, b, 0x242070db, w[2], 17);
104         w[4] = data32[4];
105         STEP1(b, c, d, a, 0xc1bdceee, w[3], 22);
106         w[5] = data32[5];
107         STEP1(a, b, c, d, 0xf57c0faf, w[4], 7);
108         w[6] = data32[6];
109         STEP1(d, a, b, c, 0x4787c62a, w[5], 12);
110         w[7] = data32[7];
111         STEP1(c, d, a, b, 0xa8304613, w[6], 17);
112         w[8] = data32[8];
113         STEP1(b, c, d, a, 0xfd469501, w[7], 22);
114         w[9] = data32[9];
115         STEP1(a, b, c, d, 0x698098d8, w[8], 7);
116         w[10] = data32[10];
117         STEP1(d, a, b, c, 0x8b44f7af, w[9], 12);
118         w[11] = data32[11];
119         STEP1(c, d, a, b, 0xffff5bb1, w[10], 17);
120         w[12] = data32[12];
121         STEP1(b, c, d, a, 0x895cd7be, w[11], 22);
122         w[13] = data32[13];
123         STEP1(a, b, c, d, 0x6b901122, w[12], 7);
124         w[14] = data32[14];
125         STEP1(d, a, b, c, 0xfd987193, w[13], 12);
126         w[15] = data32[15];
127         STEP1(c, d, a, b, 0xa679438e, w[14], 17);
128         STEP1(b, c, d, a, 0x49b40821, w[15], 22);
129         STEP2(a, b, c, d, 0xf61e2562, w[1], 5);
130         STEP2(d, a, b, c, 0xc040b340, w[6], 9);
131         STEP2(c, d, a, b, 0x265e5a51, w[11], 14);
132         STEP2(b, c, d, a, 0xe9b6c7aa, w[0], 20);
133         STEP2(a, b, c, d, 0xd62f105d, w[5], 5);
134         STEP2(d, a, b, c, 0x02441453, w[10], 9);
135         STEP2(c, d, a, b, 0xd8a1e681, w[15], 14);
136         STEP2(b, c, d, a, 0xe7d3fbc8, w[4], 20);
137         STEP2(a, b, c, d, 0x21e1cde6, w[9], 5);
138         STEP2(d, a, b, c, 0xc33707d6, w[14], 9);
139         STEP2(c, d, a, b, 0xf4d50d87, w[3], 14);
140         STEP2(b, c, d, a, 0x455a14ed, w[8], 20);
141         STEP2(a, b, c, d, 0xa9e3e905, w[13], 5);
142         STEP2(d, a, b, c, 0xfcefa3f8, w[2], 9);
143         STEP2(c, d, a, b, 0x676f02d9, w[7], 14);
144         STEP2(b, c, d, a, 0x8d2a4c8a, w[12], 20);
145         STEP3(a, b, c, d, 0xfffa3942, w[5], 4);
146         STEP3(d, a, b, c, 0x8771f681, w[8], 11);
147         STEP3(c, d, a, b, 0x6d9d6122, w[11], 16);
148         STEP3(b, c, d, a, 0xfde5380c, w[14], 23);
149         STEP3(a, b, c, d, 0xa4beea44, w[1], 4);
150         STEP3(d, a, b, c, 0x4bdecfa9, w[4], 11);
151         STEP3(c, d, a, b, 0xf6bb4b60, w[7], 16);
152         STEP3(b, c, d, a, 0xbebfbc70, w[10], 23);
153         STEP3(a, b, c, d, 0x289b7ec6, w[13], 4);
154         STEP3(d, a, b, c, 0xeaa127fa, w[0], 11);
155         STEP3(c, d, a, b, 0xd4ef3085, w[3], 16);
156         STEP3(b, c, d, a, 0x04881d05, w[6], 23);
157         STEP3(a, b, c, d, 0xd9d4d039, w[9], 4);
158         STEP3(d, a, b, c, 0xe6db99e5, w[12], 11);
159         STEP3(c, d, a, b, 0x1fa27cf8, w[15], 16);
160         STEP3(b, c, d, a, 0xc4ac5665, w[2], 23);
161         STEP4(a, b, c, d, 0xf4292244, w[0], 6);
162         STEP4(d, a, b, c, 0x432aff97, w[7], 10);
163         STEP4(c, d, a, b, 0xab9423a7, w[14], 15);
164         STEP4(b, c, d, a, 0xfc93a039, w[5], 21);
165         STEP4(a, b, c, d, 0x655b59c3, w[12], 6);
166         STEP4(d, a, b, c, 0x8f0ccc92, w[3], 10);
167         STEP4(c, d, a, b, 0xffeff47d, w[10], 15);
168         STEP4(b, c, d, a, 0x85845dd1, w[1], 21);
169         STEP4(a, b, c, d, 0x6fa87e4f, w[8], 6);
170         STEP4(d, a, b, c, 0xfe2ce6e0, w[15], 10);
171         STEP4(c, d, a, b, 0xa3014314, w[6], 15);
172         STEP4(b, c, d, a, 0x4e0811a1, w[13], 21);
173         STEP4(a, b, c, d, 0xf7537e82, w[4], 6);
174         STEP4(d, a, b, c, 0xbd3af235, w[11], 10);
175         STEP4(c, d, a, b, 0x2ad7d2bb, w[2], 15);
176         STEP4(b, c, d, a, 0xeb86d391, w[9], 21);
177 
178         digest[0] = a + H0;
179         digest[1] = b + H1;
180         digest[2] = c + H2;
181         digest[3] = d + H3;
182 #ifdef SAFE_DATA
183         clear_var(&a, sizeof(a));
184         clear_var(&b, sizeof(b));
185         clear_var(&c, sizeof(c));
186         clear_var(&d, sizeof(d));
187         clear_mem(w, sizeof(w));
188         clear_scratch_gps();
189         switch(arch) {
190         case ARCH_SSE:
191                 clear_scratch_xmms_sse();
192                 break;
193         case ARCH_AVX:
194                 clear_scratch_xmms_avx();
195                 break;
196         case ARCH_AVX2:
197                 clear_scratch_ymms();
198                 break;
199         case ARCH_AVX512:
200                 clear_scratch_zmms();
201                 break;
202         default:
203                 break;
204         }
205 #else
206         (void) arch;  /* unused */
207 #endif
208 }
209 
210 void
md5_one_block_sse(const void * data,void * digest)211 md5_one_block_sse(const void *data, void *digest)
212 {
213         md5_one_block_common(data, digest, ARCH_SSE);
214 }
215 
216 void
md5_one_block_avx(const void * data,void * digest)217 md5_one_block_avx(const void *data, void *digest)
218 {
219         md5_one_block_common(data, digest, ARCH_AVX);
220 }
221 
222 void
md5_one_block_avx2(const void * data,void * digest)223 md5_one_block_avx2(const void *data, void *digest)
224 {
225         md5_one_block_common(data, digest, ARCH_AVX2);
226 }
227 
228 void
md5_one_block_avx512(const void * data,void * digest)229 md5_one_block_avx512(const void *data, void *digest)
230 {
231         md5_one_block_common(data, digest, ARCH_AVX512);
232 }
233