1/*	$OpenBSD: md5_amd64_generic.S,v 1.1 2025/01/24 13:35:04 jsing Exp $ */
2/*
3 * Copyright (c) 2025 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifdef __CET__
19#include <cet.h>
20#else
21#define _CET_ENDBR
22#endif
23
24#define	ctx		%rdi
25#define	in		%rsi
26#define	num		%rdx
27
28#define	end		%rbp
29
30#define	A		%eax
31#define	B		%ebx
32#define	C		%ecx
33#define	D		%edx
34
35#define	AA		%r8d
36#define	BB		%r9d
37#define	CC		%r10d
38#define	DD		%r11d
39
40#define	tmp0		%r12d
41#define	tmp1		%r13d
42
43/*
44 * Compute MD5 round 1 as:
45 *
46 *   a = b + rol(a + F(b, c, d) + x + t, s)
47 *   F(x, y, z) = (x & y) | (~x & z)
48 *              = ((y ^ z) & x) ^ z
49 */
50#define md5_round1(a, b, c, d, x, t, s) \
51	addl	(x*4)(in), a;					\
52	movl	c, tmp0;					\
53	xorl	d, tmp0;					\
54	andl	b, tmp0;					\
55	xorl	d, tmp0;					\
56	leal	t(tmp0, a), a;					\
57	roll	$s, a;						\
58	addl	b, a;
59
60/*
61 * Compute MD5 round 2 as:
62 *
63 *   a = b + rol(a + G(b, c, d) + x + t, s)
64 *   G(x, y, z) = (x & z) | (y & ~z)
65 */
66#define md5_round2(a, b, c, d, x, t, s) \
67	addl	(x*4)(in), a;					\
68	movl	d, tmp0;					\
69	xorl	$-1, tmp0;					\
70	andl	c, tmp0;					\
71	addl	tmp0, a;					\
72	movl	d, tmp1;					\
73	andl	b, tmp1;					\
74	leal	t(tmp1, a), a;					\
75	roll	$s, a;						\
76	addl	b, a;
77
78/*
79 * Compute MD5 round 3 as:
80 *
81 *   a = b + rol(a + H(b, c, d) + x + t, s)
82 *   H(x, y, z) = x ^ y ^ z;
83 */
84#define md5_round3(a, b, c, d, x, t, s) \
85	addl	(x*4)(in), a;					\
86	movl	d, tmp0;					\
87	xorl	c, tmp0;					\
88	xorl	b, tmp0;					\
89	leal    t(tmp0, a), a;					\
90	roll	$s, a;						\
91	addl	b, a;
92
93/*
94 * Compute MD5 round 4 as:
95 *
96 *   a = b + rol(a + I(b, c, d) + x + t, s)
97 *   I(x, y, z) = y ^ (x | ~z)
98 */
99#define md5_round4(a, b, c, d, x, t, s) \
100	addl	(x*4)(in), a;					\
101	movl	d, tmp0;					\
102	xorl	$-1, tmp0;					\
103	orl	b, tmp0;					\
104	xorl	c, tmp0;					\
105	leal    t(tmp0, a), a;					\
106	roll	$s, a;						\
107	addl	b, a;
108
109.text
110
111/*
112 * void md5_block_data_order(MD5_CTX *ctx, const void *in, size_t num);
113 *
114 * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num
115 */
116.align 16
117.globl	md5_block_data_order
118.type	md5_block_data_order,@function
119md5_block_data_order:
120	_CET_ENDBR
121
122	/* Save callee save registers. */
123	pushq	%rbx
124	pushq	%rbp
125	pushq	%r12
126	pushq	%r13
127
128	/* Compute end of message. */
129	shlq	$6, num
130	leaq	(in, num, 1), end
131
132	/* Load current hash state from context. */
133	movl	(0*4)(ctx), AA
134	movl	(1*4)(ctx), BB
135	movl	(2*4)(ctx), CC
136	movl	(3*4)(ctx), DD
137
138	jmp	.Lblock_loop
139
140.align 16
141.Lblock_loop:
142	movl	AA, A
143	movl	BB, B
144	movl	CC, C
145	movl	DD, D
146
147	md5_round1(A, B, C, D, 0, 0xd76aa478L, 7);
148	md5_round1(D, A, B, C, 1, 0xe8c7b756L, 12);
149	md5_round1(C, D, A, B, 2, 0x242070dbL, 17);
150	md5_round1(B, C, D, A, 3, 0xc1bdceeeL, 22);
151	md5_round1(A, B, C, D, 4, 0xf57c0fafL, 7);
152	md5_round1(D, A, B, C, 5, 0x4787c62aL, 12);
153	md5_round1(C, D, A, B, 6, 0xa8304613L, 17);
154	md5_round1(B, C, D, A, 7, 0xfd469501L, 22);
155	md5_round1(A, B, C, D, 8, 0x698098d8L, 7);
156	md5_round1(D, A, B, C, 9, 0x8b44f7afL, 12);
157	md5_round1(C, D, A, B, 10, 0xffff5bb1L, 17);
158	md5_round1(B, C, D, A, 11, 0x895cd7beL, 22);
159	md5_round1(A, B, C, D, 12, 0x6b901122L, 7);
160	md5_round1(D, A, B, C, 13, 0xfd987193L, 12);
161	md5_round1(C, D, A, B, 14, 0xa679438eL, 17);
162	md5_round1(B, C, D, A, 15, 0x49b40821L, 22);
163
164	md5_round2(A, B, C, D, 1, 0xf61e2562L, 5);
165	md5_round2(D, A, B, C, 6, 0xc040b340L, 9);
166	md5_round2(C, D, A, B, 11, 0x265e5a51L, 14);
167	md5_round2(B, C, D, A, 0, 0xe9b6c7aaL, 20);
168	md5_round2(A, B, C, D, 5, 0xd62f105dL, 5);
169	md5_round2(D, A, B, C, 10, 0x02441453L, 9);
170	md5_round2(C, D, A, B, 15, 0xd8a1e681L, 14);
171	md5_round2(B, C, D, A, 4, 0xe7d3fbc8L, 20);
172	md5_round2(A, B, C, D, 9, 0x21e1cde6L, 5);
173	md5_round2(D, A, B, C, 14, 0xc33707d6L, 9);
174	md5_round2(C, D, A, B, 3, 0xf4d50d87L, 14);
175	md5_round2(B, C, D, A, 8, 0x455a14edL, 20);
176	md5_round2(A, B, C, D, 13, 0xa9e3e905L, 5);
177	md5_round2(D, A, B, C, 2, 0xfcefa3f8L, 9);
178	md5_round2(C, D, A, B, 7, 0x676f02d9L, 14);
179	md5_round2(B, C, D, A, 12, 0x8d2a4c8aL, 20);
180
181	md5_round3(A, B, C, D, 5, 0xfffa3942L, 4);
182	md5_round3(D, A, B, C, 8, 0x8771f681L, 11);
183	md5_round3(C, D, A, B, 11, 0x6d9d6122L, 16);
184	md5_round3(B, C, D, A, 14, 0xfde5380cL, 23);
185	md5_round3(A, B, C, D, 1, 0xa4beea44L, 4);
186	md5_round3(D, A, B, C, 4, 0x4bdecfa9L, 11);
187	md5_round3(C, D, A, B, 7, 0xf6bb4b60L, 16);
188	md5_round3(B, C, D, A, 10, 0xbebfbc70L, 23);
189	md5_round3(A, B, C, D, 13, 0x289b7ec6L, 4);
190	md5_round3(D, A, B, C, 0, 0xeaa127faL, 11);
191	md5_round3(C, D, A, B, 3, 0xd4ef3085L, 16);
192	md5_round3(B, C, D, A, 6, 0x04881d05L, 23);
193	md5_round3(A, B, C, D, 9, 0xd9d4d039L, 4);
194	md5_round3(D, A, B, C, 12, 0xe6db99e5L, 11);
195	md5_round3(C, D, A, B, 15, 0x1fa27cf8L, 16);
196	md5_round3(B, C, D, A, 2, 0xc4ac5665L, 23);
197
198	md5_round4(A, B, C, D, 0, 0xf4292244L, 6);
199	md5_round4(D, A, B, C, 7, 0x432aff97L, 10);
200	md5_round4(C, D, A, B, 14, 0xab9423a7L, 15);
201	md5_round4(B, C, D, A, 5, 0xfc93a039L, 21);
202	md5_round4(A, B, C, D, 12, 0x655b59c3L, 6);
203	md5_round4(D, A, B, C, 3, 0x8f0ccc92L, 10);
204	md5_round4(C, D, A, B, 10, 0xffeff47dL, 15);
205	md5_round4(B, C, D, A, 1, 0x85845dd1L, 21);
206	md5_round4(A, B, C, D, 8, 0x6fa87e4fL, 6);
207	md5_round4(D, A, B, C, 15, 0xfe2ce6e0L, 10);
208	md5_round4(C, D, A, B, 6, 0xa3014314L, 15);
209	md5_round4(B, C, D, A, 13, 0x4e0811a1L, 21);
210	md5_round4(A, B, C, D, 4, 0xf7537e82L, 6);
211	md5_round4(D, A, B, C, 11, 0xbd3af235L, 10);
212	md5_round4(C, D, A, B, 2, 0x2ad7d2bbL, 15);
213	md5_round4(B, C, D, A, 9, 0xeb86d391L, 21);
214
215	/* Add intermediate state to hash state. */
216	addl	A, AA
217	addl	B, BB
218	addl	C, CC
219	addl	D, DD
220
221	addq	$64, in
222	cmpq	end, in
223	jb	.Lblock_loop
224
225	/* Store new hash state to context. */
226	movl	AA, (0*4)(ctx)
227	movl	BB, (1*4)(ctx)
228	movl	CC, (2*4)(ctx)
229	movl	DD, (3*4)(ctx)
230
231	/* Restore callee save registers. */
232	popq	%r13
233	popq	%r12
234	popq	%rbp
235	popq	%rbx
236
237	ret
238