1C Used as temporaries by the AES macros 2define(`TMP1', `%g1') 3define(`TMP2', `%g2') 4define(`TMP3', `%g3') 5 6C Loop invariants used by AES_ROUND 7define(`T0', `%o0') 8define(`T1', `%o1') 9define(`T2', `%o2') 10define(`T3', `%o3') 11 12C AES_LOAD(i, src, key, res) 13define(`AES_LOAD', ` 14 ldub [$2 + 4*$1], $4 15 ldub [$2 + 4*$1 + 1], TMP1 16 ldub [$2 + 4*$1 + 2], TMP2 17 sll TMP1, 8, TMP1 18 19 or $4, TMP1, $4 20 ldub [$2 + 4*$1+3], TMP1 21 sll TMP2, 16, TMP2 22 or $4, TMP2, $4 23 24 sll TMP1, 24, TMP1 25 C Get subkey 26 ld [$3 + 4*$1], TMP2 27 or $4, TMP1, $4 28 xor $4, TMP2, $4')dnl 29 30C AES_ROUND(i, a, b, c, d, key, res) 31C Computes one word of the AES round 32C FIXME: Could use registers pointing directly to the four tables 33C FIXME: Needs better instruction scheduling, and perhaps more temporaries 34C Alternatively, we can use a single table and some rotations 35define(`AES_ROUND', ` 36 and $2, 0xff, TMP1 C 0 37 srl $3, 6, TMP2 C 1 38 sll TMP1, 2, TMP1 C 0 39 and TMP2, 0x3fc, TMP2 C 1 40 ld [T0 + TMP1], $7 C 0 E0 41 srl $4, 14, TMP1 C 2 42 ld [T1 + TMP2], TMP2 C 1 43 and TMP1, 0x3fc, TMP1 C 2 44 xor $7, TMP2, $7 C 1 E1 45 srl $5, 22, TMP2 C 3 46 ld [T2 + TMP1], TMP1 C 2 47 and TMP2, 0x3fc, TMP2 C 3 48 xor $7, TMP1, $7 C 2 E2 49 ld [$6 + 4*$1], TMP1 C 4 50 ld [T3 + TMP2], TMP2 C 3 51 xor $7, TMP1, $7 C 4 E4 52 xor $7, TMP2, $7 C 3 E3 53')dnl 54 55C AES_FINAL_ROUND(i, T, a, b, c, d, key, dst) 56C Compute one word in the final round function. Output is converted to 57C octets and stored at dst. Relies on AES_SBOX being zero. 58define(`AES_FINAL_ROUND', ` 59 C Load subkey 60 ld [$7 + 4*$1], TMP3 61 62 and $3, 0xff, TMP1 C 0 63 srl $4, 8, TMP2 C 1 64 ldub [T + TMP1], TMP1 C 0 65 and TMP2, 0xff, TMP2 C 1 66 xor TMP3, TMP1, TMP1 C 0 67 ldub [T + TMP2], TMP2 C 1 68 stb TMP1, [$8 + 4*$1] C 0 E0 69 srl $5, 16, TMP1 C 2 70 srl TMP3, 8, TMP3 C 1 71 and TMP1, 0xff, TMP1 C 2 72 xor TMP3, TMP2, TMP2 C 1 73 ldub [T + TMP1], TMP1 C 2 74 stb TMP2, [$8 + 4*$1 + 1] C 1 E1 75 srl $6, 24, TMP2 C 3 76 srl TMP3, 8, TMP3 C 2 77 ldub [T + TMP2], TMP2 C 3 78 xor TMP3, TMP1, TMP1 C 2 79 srl TMP3, 8, TMP3 C 3 80 stb TMP1, [$8 + 4*$1 + 2] C 2 E2 81 xor TMP3, TMP2, TMP2 C 3 82 stb TMP2, [$8 + 4*$1 + 3] C 3 E3 83') 84