1/* $OpenBSD: md5_amd64_generic.S,v 1.1 2025/01/24 13:35:04 jsing Exp $ */ 2/* 3 * Copyright (c) 2025 Joel Sing <jsing@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18#ifdef __CET__ 19#include <cet.h> 20#else 21#define _CET_ENDBR 22#endif 23 24#define ctx %rdi 25#define in %rsi 26#define num %rdx 27 28#define end %rbp 29 30#define A %eax 31#define B %ebx 32#define C %ecx 33#define D %edx 34 35#define AA %r8d 36#define BB %r9d 37#define CC %r10d 38#define DD %r11d 39 40#define tmp0 %r12d 41#define tmp1 %r13d 42 43/* 44 * Compute MD5 round 1 as: 45 * 46 * a = b + rol(a + F(b, c, d) + x + t, s) 47 * F(x, y, z) = (x & y) | (~x & z) 48 * = ((y ^ z) & x) ^ z 49 */ 50#define md5_round1(a, b, c, d, x, t, s) \ 51 addl (x*4)(in), a; \ 52 movl c, tmp0; \ 53 xorl d, tmp0; \ 54 andl b, tmp0; \ 55 xorl d, tmp0; \ 56 leal t(tmp0, a), a; \ 57 roll $s, a; \ 58 addl b, a; 59 60/* 61 * Compute MD5 round 2 as: 62 * 63 * a = b + rol(a + G(b, c, d) + x + t, s) 64 * G(x, y, z) = (x & z) | (y & ~z) 65 */ 66#define md5_round2(a, b, c, d, x, t, s) \ 67 addl (x*4)(in), a; \ 68 movl d, tmp0; \ 69 xorl $-1, tmp0; \ 70 andl c, tmp0; \ 71 addl tmp0, a; \ 72 movl d, tmp1; \ 73 andl b, tmp1; \ 74 leal t(tmp1, a), a; \ 75 roll $s, a; \ 76 addl b, a; 77 78/* 79 * Compute MD5 round 3 as: 80 * 81 * a = b + rol(a + H(b, c, d) + x + t, s) 82 * H(x, y, z) = x ^ y ^ z; 83 */ 84#define md5_round3(a, b, c, d, x, t, s) \ 85 addl (x*4)(in), a; \ 86 movl d, tmp0; \ 87 xorl c, tmp0; \ 88 xorl b, tmp0; \ 89 leal t(tmp0, a), a; \ 90 roll $s, a; \ 91 addl b, a; 92 93/* 94 * Compute MD5 round 4 as: 95 * 96 * a = b + rol(a + I(b, c, d) + x + t, s) 97 * I(x, y, z) = y ^ (x | ~z) 98 */ 99#define md5_round4(a, b, c, d, x, t, s) \ 100 addl (x*4)(in), a; \ 101 movl d, tmp0; \ 102 xorl $-1, tmp0; \ 103 orl b, tmp0; \ 104 xorl c, tmp0; \ 105 leal t(tmp0, a), a; \ 106 roll $s, a; \ 107 addl b, a; 108 109.text 110 111/* 112 * void md5_block_data_order(MD5_CTX *ctx, const void *in, size_t num); 113 * 114 * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num 115 */ 116.align 16 117.globl md5_block_data_order 118.type md5_block_data_order,@function 119md5_block_data_order: 120 _CET_ENDBR 121 122 /* Save callee save registers. */ 123 pushq %rbx 124 pushq %rbp 125 pushq %r12 126 pushq %r13 127 128 /* Compute end of message. */ 129 shlq $6, num 130 leaq (in, num, 1), end 131 132 /* Load current hash state from context. */ 133 movl (0*4)(ctx), AA 134 movl (1*4)(ctx), BB 135 movl (2*4)(ctx), CC 136 movl (3*4)(ctx), DD 137 138 jmp .Lblock_loop 139 140.align 16 141.Lblock_loop: 142 movl AA, A 143 movl BB, B 144 movl CC, C 145 movl DD, D 146 147 md5_round1(A, B, C, D, 0, 0xd76aa478L, 7); 148 md5_round1(D, A, B, C, 1, 0xe8c7b756L, 12); 149 md5_round1(C, D, A, B, 2, 0x242070dbL, 17); 150 md5_round1(B, C, D, A, 3, 0xc1bdceeeL, 22); 151 md5_round1(A, B, C, D, 4, 0xf57c0fafL, 7); 152 md5_round1(D, A, B, C, 5, 0x4787c62aL, 12); 153 md5_round1(C, D, A, B, 6, 0xa8304613L, 17); 154 md5_round1(B, C, D, A, 7, 0xfd469501L, 22); 155 md5_round1(A, B, C, D, 8, 0x698098d8L, 7); 156 md5_round1(D, A, B, C, 9, 0x8b44f7afL, 12); 157 md5_round1(C, D, A, B, 10, 0xffff5bb1L, 17); 158 md5_round1(B, C, D, A, 11, 0x895cd7beL, 22); 159 md5_round1(A, B, C, D, 12, 0x6b901122L, 7); 160 md5_round1(D, A, B, C, 13, 0xfd987193L, 12); 161 md5_round1(C, D, A, B, 14, 0xa679438eL, 17); 162 md5_round1(B, C, D, A, 15, 0x49b40821L, 22); 163 164 md5_round2(A, B, C, D, 1, 0xf61e2562L, 5); 165 md5_round2(D, A, B, C, 6, 0xc040b340L, 9); 166 md5_round2(C, D, A, B, 11, 0x265e5a51L, 14); 167 md5_round2(B, C, D, A, 0, 0xe9b6c7aaL, 20); 168 md5_round2(A, B, C, D, 5, 0xd62f105dL, 5); 169 md5_round2(D, A, B, C, 10, 0x02441453L, 9); 170 md5_round2(C, D, A, B, 15, 0xd8a1e681L, 14); 171 md5_round2(B, C, D, A, 4, 0xe7d3fbc8L, 20); 172 md5_round2(A, B, C, D, 9, 0x21e1cde6L, 5); 173 md5_round2(D, A, B, C, 14, 0xc33707d6L, 9); 174 md5_round2(C, D, A, B, 3, 0xf4d50d87L, 14); 175 md5_round2(B, C, D, A, 8, 0x455a14edL, 20); 176 md5_round2(A, B, C, D, 13, 0xa9e3e905L, 5); 177 md5_round2(D, A, B, C, 2, 0xfcefa3f8L, 9); 178 md5_round2(C, D, A, B, 7, 0x676f02d9L, 14); 179 md5_round2(B, C, D, A, 12, 0x8d2a4c8aL, 20); 180 181 md5_round3(A, B, C, D, 5, 0xfffa3942L, 4); 182 md5_round3(D, A, B, C, 8, 0x8771f681L, 11); 183 md5_round3(C, D, A, B, 11, 0x6d9d6122L, 16); 184 md5_round3(B, C, D, A, 14, 0xfde5380cL, 23); 185 md5_round3(A, B, C, D, 1, 0xa4beea44L, 4); 186 md5_round3(D, A, B, C, 4, 0x4bdecfa9L, 11); 187 md5_round3(C, D, A, B, 7, 0xf6bb4b60L, 16); 188 md5_round3(B, C, D, A, 10, 0xbebfbc70L, 23); 189 md5_round3(A, B, C, D, 13, 0x289b7ec6L, 4); 190 md5_round3(D, A, B, C, 0, 0xeaa127faL, 11); 191 md5_round3(C, D, A, B, 3, 0xd4ef3085L, 16); 192 md5_round3(B, C, D, A, 6, 0x04881d05L, 23); 193 md5_round3(A, B, C, D, 9, 0xd9d4d039L, 4); 194 md5_round3(D, A, B, C, 12, 0xe6db99e5L, 11); 195 md5_round3(C, D, A, B, 15, 0x1fa27cf8L, 16); 196 md5_round3(B, C, D, A, 2, 0xc4ac5665L, 23); 197 198 md5_round4(A, B, C, D, 0, 0xf4292244L, 6); 199 md5_round4(D, A, B, C, 7, 0x432aff97L, 10); 200 md5_round4(C, D, A, B, 14, 0xab9423a7L, 15); 201 md5_round4(B, C, D, A, 5, 0xfc93a039L, 21); 202 md5_round4(A, B, C, D, 12, 0x655b59c3L, 6); 203 md5_round4(D, A, B, C, 3, 0x8f0ccc92L, 10); 204 md5_round4(C, D, A, B, 10, 0xffeff47dL, 15); 205 md5_round4(B, C, D, A, 1, 0x85845dd1L, 21); 206 md5_round4(A, B, C, D, 8, 0x6fa87e4fL, 6); 207 md5_round4(D, A, B, C, 15, 0xfe2ce6e0L, 10); 208 md5_round4(C, D, A, B, 6, 0xa3014314L, 15); 209 md5_round4(B, C, D, A, 13, 0x4e0811a1L, 21); 210 md5_round4(A, B, C, D, 4, 0xf7537e82L, 6); 211 md5_round4(D, A, B, C, 11, 0xbd3af235L, 10); 212 md5_round4(C, D, A, B, 2, 0x2ad7d2bbL, 15); 213 md5_round4(B, C, D, A, 9, 0xeb86d391L, 21); 214 215 /* Add intermediate state to hash state. */ 216 addl A, AA 217 addl B, BB 218 addl C, CC 219 addl D, DD 220 221 addq $64, in 222 cmpq end, in 223 jb .Lblock_loop 224 225 /* Store new hash state to context. */ 226 movl AA, (0*4)(ctx) 227 movl BB, (1*4)(ctx) 228 movl CC, (2*4)(ctx) 229 movl DD, (3*4)(ctx) 230 231 /* Restore callee save registers. */ 232 popq %r13 233 popq %r12 234 popq %rbp 235 popq %rbx 236 237 ret 238