149aa7c00SJason A. Donenfeld/* SPDX-License-Identifier: GPL-2.0 OR MIT */ 249aa7c00SJason A. Donenfeld/* 349aa7c00SJason A. Donenfeld * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. 449aa7c00SJason A. Donenfeld * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 549aa7c00SJason A. Donenfeld */ 649aa7c00SJason A. Donenfeld 749aa7c00SJason A. Donenfeld#define MASK_U32 0x3c 849aa7c00SJason A. Donenfeld#define CHACHA20_BLOCK_SIZE 64 949aa7c00SJason A. Donenfeld#define STACK_SIZE 32 1049aa7c00SJason A. Donenfeld 1149aa7c00SJason A. Donenfeld#define X0 $t0 1249aa7c00SJason A. Donenfeld#define X1 $t1 1349aa7c00SJason A. Donenfeld#define X2 $t2 1449aa7c00SJason A. Donenfeld#define X3 $t3 1549aa7c00SJason A. Donenfeld#define X4 $t4 1649aa7c00SJason A. Donenfeld#define X5 $t5 1749aa7c00SJason A. Donenfeld#define X6 $t6 1849aa7c00SJason A. Donenfeld#define X7 $t7 1949aa7c00SJason A. Donenfeld#define X8 $t8 2049aa7c00SJason A. Donenfeld#define X9 $t9 2149aa7c00SJason A. Donenfeld#define X10 $v1 2249aa7c00SJason A. Donenfeld#define X11 $s6 2349aa7c00SJason A. Donenfeld#define X12 $s5 2449aa7c00SJason A. Donenfeld#define X13 $s4 2549aa7c00SJason A. Donenfeld#define X14 $s3 2649aa7c00SJason A. Donenfeld#define X15 $s2 2749aa7c00SJason A. Donenfeld/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ 2849aa7c00SJason A. Donenfeld#define T0 $s1 2949aa7c00SJason A. Donenfeld#define T1 $s0 3049aa7c00SJason A. Donenfeld#define T(n) T ## n 3149aa7c00SJason A. Donenfeld#define X(n) X ## n 3249aa7c00SJason A. Donenfeld 3349aa7c00SJason A. Donenfeld/* Input arguments */ 3449aa7c00SJason A. Donenfeld#define STATE $a0 3549aa7c00SJason A. Donenfeld#define OUT $a1 3649aa7c00SJason A. Donenfeld#define IN $a2 3749aa7c00SJason A. Donenfeld#define BYTES $a3 3849aa7c00SJason A. Donenfeld 3949aa7c00SJason A. Donenfeld/* Output argument */ 4049aa7c00SJason A. Donenfeld/* NONCE[0] is kept in a register and not in memory. 4149aa7c00SJason A. Donenfeld * We don't want to touch original value in memory. 4249aa7c00SJason A. Donenfeld * Must be incremented every loop iteration. 4349aa7c00SJason A. Donenfeld */ 4449aa7c00SJason A. Donenfeld#define NONCE_0 $v0 4549aa7c00SJason A. Donenfeld 4649aa7c00SJason A. Donenfeld/* SAVED_X and SAVED_CA are set in the jump table. 4749aa7c00SJason A. Donenfeld * Use regs which are overwritten on exit else we don't leak clear data. 4849aa7c00SJason A. Donenfeld * They are used to handling the last bytes which are not multiple of 4. 4949aa7c00SJason A. Donenfeld */ 5049aa7c00SJason A. Donenfeld#define SAVED_X X15 5149aa7c00SJason A. Donenfeld#define SAVED_CA $s7 5249aa7c00SJason A. Donenfeld 5349aa7c00SJason A. Donenfeld#define IS_UNALIGNED $s7 5449aa7c00SJason A. Donenfeld 5549aa7c00SJason A. Donenfeld#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 5649aa7c00SJason A. Donenfeld#define MSB 0 5749aa7c00SJason A. Donenfeld#define LSB 3 5849aa7c00SJason A. Donenfeld#define ROTx rotl 5949aa7c00SJason A. Donenfeld#define ROTR(n) rotr n, 24 6049aa7c00SJason A. Donenfeld#define CPU_TO_LE32(n) \ 6149aa7c00SJason A. Donenfeld wsbh n; \ 6249aa7c00SJason A. Donenfeld rotr n, 16; 6349aa7c00SJason A. Donenfeld#else 6449aa7c00SJason A. Donenfeld#define MSB 3 6549aa7c00SJason A. Donenfeld#define LSB 0 6649aa7c00SJason A. Donenfeld#define ROTx rotr 6749aa7c00SJason A. Donenfeld#define CPU_TO_LE32(n) 6849aa7c00SJason A. Donenfeld#define ROTR(n) 6949aa7c00SJason A. Donenfeld#endif 7049aa7c00SJason A. Donenfeld 7149aa7c00SJason A. Donenfeld#define FOR_EACH_WORD(x) \ 7249aa7c00SJason A. Donenfeld x( 0); \ 7349aa7c00SJason A. Donenfeld x( 1); \ 7449aa7c00SJason A. Donenfeld x( 2); \ 7549aa7c00SJason A. Donenfeld x( 3); \ 7649aa7c00SJason A. Donenfeld x( 4); \ 7749aa7c00SJason A. Donenfeld x( 5); \ 7849aa7c00SJason A. Donenfeld x( 6); \ 7949aa7c00SJason A. Donenfeld x( 7); \ 8049aa7c00SJason A. Donenfeld x( 8); \ 8149aa7c00SJason A. Donenfeld x( 9); \ 8249aa7c00SJason A. Donenfeld x(10); \ 8349aa7c00SJason A. Donenfeld x(11); \ 8449aa7c00SJason A. Donenfeld x(12); \ 8549aa7c00SJason A. Donenfeld x(13); \ 8649aa7c00SJason A. Donenfeld x(14); \ 8749aa7c00SJason A. Donenfeld x(15); 8849aa7c00SJason A. Donenfeld 8949aa7c00SJason A. Donenfeld#define FOR_EACH_WORD_REV(x) \ 9049aa7c00SJason A. Donenfeld x(15); \ 9149aa7c00SJason A. Donenfeld x(14); \ 9249aa7c00SJason A. Donenfeld x(13); \ 9349aa7c00SJason A. Donenfeld x(12); \ 9449aa7c00SJason A. Donenfeld x(11); \ 9549aa7c00SJason A. Donenfeld x(10); \ 9649aa7c00SJason A. Donenfeld x( 9); \ 9749aa7c00SJason A. Donenfeld x( 8); \ 9849aa7c00SJason A. Donenfeld x( 7); \ 9949aa7c00SJason A. Donenfeld x( 6); \ 10049aa7c00SJason A. Donenfeld x( 5); \ 10149aa7c00SJason A. Donenfeld x( 4); \ 10249aa7c00SJason A. Donenfeld x( 3); \ 10349aa7c00SJason A. Donenfeld x( 2); \ 10449aa7c00SJason A. Donenfeld x( 1); \ 10549aa7c00SJason A. Donenfeld x( 0); 10649aa7c00SJason A. Donenfeld 10749aa7c00SJason A. Donenfeld#define PLUS_ONE_0 1 10849aa7c00SJason A. Donenfeld#define PLUS_ONE_1 2 10949aa7c00SJason A. Donenfeld#define PLUS_ONE_2 3 11049aa7c00SJason A. Donenfeld#define PLUS_ONE_3 4 11149aa7c00SJason A. Donenfeld#define PLUS_ONE_4 5 11249aa7c00SJason A. Donenfeld#define PLUS_ONE_5 6 11349aa7c00SJason A. Donenfeld#define PLUS_ONE_6 7 11449aa7c00SJason A. Donenfeld#define PLUS_ONE_7 8 11549aa7c00SJason A. Donenfeld#define PLUS_ONE_8 9 11649aa7c00SJason A. Donenfeld#define PLUS_ONE_9 10 11749aa7c00SJason A. Donenfeld#define PLUS_ONE_10 11 11849aa7c00SJason A. Donenfeld#define PLUS_ONE_11 12 11949aa7c00SJason A. Donenfeld#define PLUS_ONE_12 13 12049aa7c00SJason A. Donenfeld#define PLUS_ONE_13 14 12149aa7c00SJason A. Donenfeld#define PLUS_ONE_14 15 12249aa7c00SJason A. Donenfeld#define PLUS_ONE_15 16 12349aa7c00SJason A. Donenfeld#define PLUS_ONE(x) PLUS_ONE_ ## x 12449aa7c00SJason A. Donenfeld#define _CONCAT3(a,b,c) a ## b ## c 12549aa7c00SJason A. Donenfeld#define CONCAT3(a,b,c) _CONCAT3(a,b,c) 12649aa7c00SJason A. Donenfeld 12749aa7c00SJason A. Donenfeld#define STORE_UNALIGNED(x) \ 128*3a2f58f3SArd BiesheuvelCONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ 12949aa7c00SJason A. Donenfeld .if (x != 12); \ 13049aa7c00SJason A. Donenfeld lw T0, (x*4)(STATE); \ 13149aa7c00SJason A. Donenfeld .endif; \ 13249aa7c00SJason A. Donenfeld lwl T1, (x*4)+MSB ## (IN); \ 13349aa7c00SJason A. Donenfeld lwr T1, (x*4)+LSB ## (IN); \ 13449aa7c00SJason A. Donenfeld .if (x == 12); \ 13549aa7c00SJason A. Donenfeld addu X ## x, NONCE_0; \ 13649aa7c00SJason A. Donenfeld .else; \ 13749aa7c00SJason A. Donenfeld addu X ## x, T0; \ 13849aa7c00SJason A. Donenfeld .endif; \ 13949aa7c00SJason A. Donenfeld CPU_TO_LE32(X ## x); \ 14049aa7c00SJason A. Donenfeld xor X ## x, T1; \ 14149aa7c00SJason A. Donenfeld swl X ## x, (x*4)+MSB ## (OUT); \ 14249aa7c00SJason A. Donenfeld swr X ## x, (x*4)+LSB ## (OUT); 14349aa7c00SJason A. Donenfeld 14449aa7c00SJason A. Donenfeld#define STORE_ALIGNED(x) \ 145*3a2f58f3SArd BiesheuvelCONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ 14649aa7c00SJason A. Donenfeld .if (x != 12); \ 14749aa7c00SJason A. Donenfeld lw T0, (x*4)(STATE); \ 14849aa7c00SJason A. Donenfeld .endif; \ 14949aa7c00SJason A. Donenfeld lw T1, (x*4) ## (IN); \ 15049aa7c00SJason A. Donenfeld .if (x == 12); \ 15149aa7c00SJason A. Donenfeld addu X ## x, NONCE_0; \ 15249aa7c00SJason A. Donenfeld .else; \ 15349aa7c00SJason A. Donenfeld addu X ## x, T0; \ 15449aa7c00SJason A. Donenfeld .endif; \ 15549aa7c00SJason A. Donenfeld CPU_TO_LE32(X ## x); \ 15649aa7c00SJason A. Donenfeld xor X ## x, T1; \ 15749aa7c00SJason A. Donenfeld sw X ## x, (x*4) ## (OUT); 15849aa7c00SJason A. Donenfeld 15949aa7c00SJason A. Donenfeld/* Jump table macro. 16049aa7c00SJason A. Donenfeld * Used for setup and handling the last bytes, which are not multiple of 4. 16149aa7c00SJason A. Donenfeld * X15 is free to store Xn 16249aa7c00SJason A. Donenfeld * Every jumptable entry must be equal in size. 16349aa7c00SJason A. Donenfeld */ 16449aa7c00SJason A. Donenfeld#define JMPTBL_ALIGNED(x) \ 165*3a2f58f3SArd Biesheuvel.Lchacha_mips_jmptbl_aligned_ ## x: ; \ 16649aa7c00SJason A. Donenfeld .set noreorder; \ 167*3a2f58f3SArd Biesheuvel b .Lchacha_mips_xor_aligned_ ## x ## _b; \ 16849aa7c00SJason A. Donenfeld .if (x == 12); \ 16949aa7c00SJason A. Donenfeld addu SAVED_X, X ## x, NONCE_0; \ 17049aa7c00SJason A. Donenfeld .else; \ 17149aa7c00SJason A. Donenfeld addu SAVED_X, X ## x, SAVED_CA; \ 17249aa7c00SJason A. Donenfeld .endif; \ 17349aa7c00SJason A. Donenfeld .set reorder 17449aa7c00SJason A. Donenfeld 17549aa7c00SJason A. Donenfeld#define JMPTBL_UNALIGNED(x) \ 176*3a2f58f3SArd Biesheuvel.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ 17749aa7c00SJason A. Donenfeld .set noreorder; \ 178*3a2f58f3SArd Biesheuvel b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ 17949aa7c00SJason A. Donenfeld .if (x == 12); \ 18049aa7c00SJason A. Donenfeld addu SAVED_X, X ## x, NONCE_0; \ 18149aa7c00SJason A. Donenfeld .else; \ 18249aa7c00SJason A. Donenfeld addu SAVED_X, X ## x, SAVED_CA; \ 18349aa7c00SJason A. Donenfeld .endif; \ 18449aa7c00SJason A. Donenfeld .set reorder 18549aa7c00SJason A. Donenfeld 18649aa7c00SJason A. Donenfeld#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ 18749aa7c00SJason A. Donenfeld addu X(A), X(K); \ 18849aa7c00SJason A. Donenfeld addu X(B), X(L); \ 18949aa7c00SJason A. Donenfeld addu X(C), X(M); \ 19049aa7c00SJason A. Donenfeld addu X(D), X(N); \ 19149aa7c00SJason A. Donenfeld xor X(V), X(A); \ 19249aa7c00SJason A. Donenfeld xor X(W), X(B); \ 19349aa7c00SJason A. Donenfeld xor X(Y), X(C); \ 19449aa7c00SJason A. Donenfeld xor X(Z), X(D); \ 19549aa7c00SJason A. Donenfeld rotl X(V), S; \ 19649aa7c00SJason A. Donenfeld rotl X(W), S; \ 19749aa7c00SJason A. Donenfeld rotl X(Y), S; \ 19849aa7c00SJason A. Donenfeld rotl X(Z), S; 19949aa7c00SJason A. Donenfeld 20049aa7c00SJason A. Donenfeld.text 20149aa7c00SJason A. Donenfeld.set reorder 20249aa7c00SJason A. Donenfeld.set noat 203*3a2f58f3SArd Biesheuvel.globl chacha_crypt_arch 204*3a2f58f3SArd Biesheuvel.ent chacha_crypt_arch 205*3a2f58f3SArd Biesheuvelchacha_crypt_arch: 20649aa7c00SJason A. Donenfeld .frame $sp, STACK_SIZE, $ra 20749aa7c00SJason A. Donenfeld 208*3a2f58f3SArd Biesheuvel /* Load number of rounds */ 209*3a2f58f3SArd Biesheuvel lw $at, 16($sp) 210*3a2f58f3SArd Biesheuvel 21149aa7c00SJason A. Donenfeld addiu $sp, -STACK_SIZE 21249aa7c00SJason A. Donenfeld 21349aa7c00SJason A. Donenfeld /* Return bytes = 0. */ 214*3a2f58f3SArd Biesheuvel beqz BYTES, .Lchacha_mips_end 21549aa7c00SJason A. Donenfeld 21649aa7c00SJason A. Donenfeld lw NONCE_0, 48(STATE) 21749aa7c00SJason A. Donenfeld 21849aa7c00SJason A. Donenfeld /* Save s0-s7 */ 21949aa7c00SJason A. Donenfeld sw $s0, 0($sp) 22049aa7c00SJason A. Donenfeld sw $s1, 4($sp) 22149aa7c00SJason A. Donenfeld sw $s2, 8($sp) 22249aa7c00SJason A. Donenfeld sw $s3, 12($sp) 22349aa7c00SJason A. Donenfeld sw $s4, 16($sp) 22449aa7c00SJason A. Donenfeld sw $s5, 20($sp) 22549aa7c00SJason A. Donenfeld sw $s6, 24($sp) 22649aa7c00SJason A. Donenfeld sw $s7, 28($sp) 22749aa7c00SJason A. Donenfeld 22849aa7c00SJason A. Donenfeld /* Test IN or OUT is unaligned. 22949aa7c00SJason A. Donenfeld * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 23049aa7c00SJason A. Donenfeld */ 23149aa7c00SJason A. Donenfeld or IS_UNALIGNED, IN, OUT 23249aa7c00SJason A. Donenfeld andi IS_UNALIGNED, 0x3 23349aa7c00SJason A. Donenfeld 234*3a2f58f3SArd Biesheuvel b .Lchacha_rounds_start 23549aa7c00SJason A. Donenfeld 23649aa7c00SJason A. Donenfeld.align 4 237*3a2f58f3SArd Biesheuvel.Loop_chacha_rounds: 23849aa7c00SJason A. Donenfeld addiu IN, CHACHA20_BLOCK_SIZE 23949aa7c00SJason A. Donenfeld addiu OUT, CHACHA20_BLOCK_SIZE 24049aa7c00SJason A. Donenfeld addiu NONCE_0, 1 24149aa7c00SJason A. Donenfeld 242*3a2f58f3SArd Biesheuvel.Lchacha_rounds_start: 24349aa7c00SJason A. Donenfeld lw X0, 0(STATE) 24449aa7c00SJason A. Donenfeld lw X1, 4(STATE) 24549aa7c00SJason A. Donenfeld lw X2, 8(STATE) 24649aa7c00SJason A. Donenfeld lw X3, 12(STATE) 24749aa7c00SJason A. Donenfeld 24849aa7c00SJason A. Donenfeld lw X4, 16(STATE) 24949aa7c00SJason A. Donenfeld lw X5, 20(STATE) 25049aa7c00SJason A. Donenfeld lw X6, 24(STATE) 25149aa7c00SJason A. Donenfeld lw X7, 28(STATE) 25249aa7c00SJason A. Donenfeld lw X8, 32(STATE) 25349aa7c00SJason A. Donenfeld lw X9, 36(STATE) 25449aa7c00SJason A. Donenfeld lw X10, 40(STATE) 25549aa7c00SJason A. Donenfeld lw X11, 44(STATE) 25649aa7c00SJason A. Donenfeld 25749aa7c00SJason A. Donenfeld move X12, NONCE_0 25849aa7c00SJason A. Donenfeld lw X13, 52(STATE) 25949aa7c00SJason A. Donenfeld lw X14, 56(STATE) 26049aa7c00SJason A. Donenfeld lw X15, 60(STATE) 26149aa7c00SJason A. Donenfeld 262*3a2f58f3SArd Biesheuvel.Loop_chacha_xor_rounds: 26349aa7c00SJason A. Donenfeld addiu $at, -2 26449aa7c00SJason A. Donenfeld AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); 26549aa7c00SJason A. Donenfeld AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); 26649aa7c00SJason A. Donenfeld AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); 26749aa7c00SJason A. Donenfeld AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); 26849aa7c00SJason A. Donenfeld AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); 26949aa7c00SJason A. Donenfeld AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); 27049aa7c00SJason A. Donenfeld AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); 27149aa7c00SJason A. Donenfeld AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); 272*3a2f58f3SArd Biesheuvel bnez $at, .Loop_chacha_xor_rounds 27349aa7c00SJason A. Donenfeld 27449aa7c00SJason A. Donenfeld addiu BYTES, -(CHACHA20_BLOCK_SIZE) 27549aa7c00SJason A. Donenfeld 27649aa7c00SJason A. Donenfeld /* Is data src/dst unaligned? Jump */ 277*3a2f58f3SArd Biesheuvel bnez IS_UNALIGNED, .Loop_chacha_unaligned 27849aa7c00SJason A. Donenfeld 27949aa7c00SJason A. Donenfeld /* Set number rounds here to fill delayslot. */ 280*3a2f58f3SArd Biesheuvel lw $at, (STACK_SIZE+16)($sp) 28149aa7c00SJason A. Donenfeld 28249aa7c00SJason A. Donenfeld /* BYTES < 0, it has no full block. */ 283*3a2f58f3SArd Biesheuvel bltz BYTES, .Lchacha_mips_no_full_block_aligned 28449aa7c00SJason A. Donenfeld 28549aa7c00SJason A. Donenfeld FOR_EACH_WORD_REV(STORE_ALIGNED) 28649aa7c00SJason A. Donenfeld 28749aa7c00SJason A. Donenfeld /* BYTES > 0? Loop again. */ 288*3a2f58f3SArd Biesheuvel bgtz BYTES, .Loop_chacha_rounds 28949aa7c00SJason A. Donenfeld 29049aa7c00SJason A. Donenfeld /* Place this here to fill delay slot */ 29149aa7c00SJason A. Donenfeld addiu NONCE_0, 1 29249aa7c00SJason A. Donenfeld 29349aa7c00SJason A. Donenfeld /* BYTES < 0? Handle last bytes */ 294*3a2f58f3SArd Biesheuvel bltz BYTES, .Lchacha_mips_xor_bytes 29549aa7c00SJason A. Donenfeld 296*3a2f58f3SArd Biesheuvel.Lchacha_mips_xor_done: 29749aa7c00SJason A. Donenfeld /* Restore used registers */ 29849aa7c00SJason A. Donenfeld lw $s0, 0($sp) 29949aa7c00SJason A. Donenfeld lw $s1, 4($sp) 30049aa7c00SJason A. Donenfeld lw $s2, 8($sp) 30149aa7c00SJason A. Donenfeld lw $s3, 12($sp) 30249aa7c00SJason A. Donenfeld lw $s4, 16($sp) 30349aa7c00SJason A. Donenfeld lw $s5, 20($sp) 30449aa7c00SJason A. Donenfeld lw $s6, 24($sp) 30549aa7c00SJason A. Donenfeld lw $s7, 28($sp) 30649aa7c00SJason A. Donenfeld 30749aa7c00SJason A. Donenfeld /* Write NONCE_0 back to right location in state */ 30849aa7c00SJason A. Donenfeld sw NONCE_0, 48(STATE) 30949aa7c00SJason A. Donenfeld 310*3a2f58f3SArd Biesheuvel.Lchacha_mips_end: 31149aa7c00SJason A. Donenfeld addiu $sp, STACK_SIZE 31249aa7c00SJason A. Donenfeld jr $ra 31349aa7c00SJason A. Donenfeld 314*3a2f58f3SArd Biesheuvel.Lchacha_mips_no_full_block_aligned: 31549aa7c00SJason A. Donenfeld /* Restore the offset on BYTES */ 31649aa7c00SJason A. Donenfeld addiu BYTES, CHACHA20_BLOCK_SIZE 31749aa7c00SJason A. Donenfeld 31849aa7c00SJason A. Donenfeld /* Get number of full WORDS */ 31949aa7c00SJason A. Donenfeld andi $at, BYTES, MASK_U32 32049aa7c00SJason A. Donenfeld 32149aa7c00SJason A. Donenfeld /* Load upper half of jump table addr */ 322*3a2f58f3SArd Biesheuvel lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) 32349aa7c00SJason A. Donenfeld 32449aa7c00SJason A. Donenfeld /* Calculate lower half jump table offset */ 32549aa7c00SJason A. Donenfeld ins T0, $at, 1, 6 32649aa7c00SJason A. Donenfeld 32749aa7c00SJason A. Donenfeld /* Add offset to STATE */ 32849aa7c00SJason A. Donenfeld addu T1, STATE, $at 32949aa7c00SJason A. Donenfeld 33049aa7c00SJason A. Donenfeld /* Add lower half jump table addr */ 331*3a2f58f3SArd Biesheuvel addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) 33249aa7c00SJason A. Donenfeld 33349aa7c00SJason A. Donenfeld /* Read value from STATE */ 33449aa7c00SJason A. Donenfeld lw SAVED_CA, 0(T1) 33549aa7c00SJason A. Donenfeld 33649aa7c00SJason A. Donenfeld /* Store remaining bytecounter as negative value */ 33749aa7c00SJason A. Donenfeld subu BYTES, $at, BYTES 33849aa7c00SJason A. Donenfeld 33949aa7c00SJason A. Donenfeld jr T0 34049aa7c00SJason A. Donenfeld 34149aa7c00SJason A. Donenfeld /* Jump table */ 34249aa7c00SJason A. Donenfeld FOR_EACH_WORD(JMPTBL_ALIGNED) 34349aa7c00SJason A. Donenfeld 34449aa7c00SJason A. Donenfeld 345*3a2f58f3SArd Biesheuvel.Loop_chacha_unaligned: 34649aa7c00SJason A. Donenfeld /* Set number rounds here to fill delayslot. */ 347*3a2f58f3SArd Biesheuvel lw $at, (STACK_SIZE+16)($sp) 34849aa7c00SJason A. Donenfeld 34949aa7c00SJason A. Donenfeld /* BYTES > 0, it has no full block. */ 350*3a2f58f3SArd Biesheuvel bltz BYTES, .Lchacha_mips_no_full_block_unaligned 35149aa7c00SJason A. Donenfeld 35249aa7c00SJason A. Donenfeld FOR_EACH_WORD_REV(STORE_UNALIGNED) 35349aa7c00SJason A. Donenfeld 35449aa7c00SJason A. Donenfeld /* BYTES > 0? Loop again. */ 355*3a2f58f3SArd Biesheuvel bgtz BYTES, .Loop_chacha_rounds 35649aa7c00SJason A. Donenfeld 35749aa7c00SJason A. Donenfeld /* Write NONCE_0 back to right location in state */ 35849aa7c00SJason A. Donenfeld sw NONCE_0, 48(STATE) 35949aa7c00SJason A. Donenfeld 36049aa7c00SJason A. Donenfeld .set noreorder 36149aa7c00SJason A. Donenfeld /* Fall through to byte handling */ 362*3a2f58f3SArd Biesheuvel bgez BYTES, .Lchacha_mips_xor_done 363*3a2f58f3SArd Biesheuvel.Lchacha_mips_xor_unaligned_0_b: 364*3a2f58f3SArd Biesheuvel.Lchacha_mips_xor_aligned_0_b: 36549aa7c00SJason A. Donenfeld /* Place this here to fill delay slot */ 36649aa7c00SJason A. Donenfeld addiu NONCE_0, 1 36749aa7c00SJason A. Donenfeld .set reorder 36849aa7c00SJason A. Donenfeld 369*3a2f58f3SArd Biesheuvel.Lchacha_mips_xor_bytes: 37049aa7c00SJason A. Donenfeld addu IN, $at 37149aa7c00SJason A. Donenfeld addu OUT, $at 37249aa7c00SJason A. Donenfeld /* First byte */ 37349aa7c00SJason A. Donenfeld lbu T1, 0(IN) 37449aa7c00SJason A. Donenfeld addiu $at, BYTES, 1 37549aa7c00SJason A. Donenfeld CPU_TO_LE32(SAVED_X) 37649aa7c00SJason A. Donenfeld ROTR(SAVED_X) 37749aa7c00SJason A. Donenfeld xor T1, SAVED_X 37849aa7c00SJason A. Donenfeld sb T1, 0(OUT) 379*3a2f58f3SArd Biesheuvel beqz $at, .Lchacha_mips_xor_done 38049aa7c00SJason A. Donenfeld /* Second byte */ 38149aa7c00SJason A. Donenfeld lbu T1, 1(IN) 38249aa7c00SJason A. Donenfeld addiu $at, BYTES, 2 38349aa7c00SJason A. Donenfeld ROTx SAVED_X, 8 38449aa7c00SJason A. Donenfeld xor T1, SAVED_X 38549aa7c00SJason A. Donenfeld sb T1, 1(OUT) 386*3a2f58f3SArd Biesheuvel beqz $at, .Lchacha_mips_xor_done 38749aa7c00SJason A. Donenfeld /* Third byte */ 38849aa7c00SJason A. Donenfeld lbu T1, 2(IN) 38949aa7c00SJason A. Donenfeld ROTx SAVED_X, 8 39049aa7c00SJason A. Donenfeld xor T1, SAVED_X 39149aa7c00SJason A. Donenfeld sb T1, 2(OUT) 392*3a2f58f3SArd Biesheuvel b .Lchacha_mips_xor_done 39349aa7c00SJason A. Donenfeld 394*3a2f58f3SArd Biesheuvel.Lchacha_mips_no_full_block_unaligned: 39549aa7c00SJason A. Donenfeld /* Restore the offset on BYTES */ 39649aa7c00SJason A. Donenfeld addiu BYTES, CHACHA20_BLOCK_SIZE 39749aa7c00SJason A. Donenfeld 39849aa7c00SJason A. Donenfeld /* Get number of full WORDS */ 39949aa7c00SJason A. Donenfeld andi $at, BYTES, MASK_U32 40049aa7c00SJason A. Donenfeld 40149aa7c00SJason A. Donenfeld /* Load upper half of jump table addr */ 402*3a2f58f3SArd Biesheuvel lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) 40349aa7c00SJason A. Donenfeld 40449aa7c00SJason A. Donenfeld /* Calculate lower half jump table offset */ 40549aa7c00SJason A. Donenfeld ins T0, $at, 1, 6 40649aa7c00SJason A. Donenfeld 40749aa7c00SJason A. Donenfeld /* Add offset to STATE */ 40849aa7c00SJason A. Donenfeld addu T1, STATE, $at 40949aa7c00SJason A. Donenfeld 41049aa7c00SJason A. Donenfeld /* Add lower half jump table addr */ 411*3a2f58f3SArd Biesheuvel addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) 41249aa7c00SJason A. Donenfeld 41349aa7c00SJason A. Donenfeld /* Read value from STATE */ 41449aa7c00SJason A. Donenfeld lw SAVED_CA, 0(T1) 41549aa7c00SJason A. Donenfeld 41649aa7c00SJason A. Donenfeld /* Store remaining bytecounter as negative value */ 41749aa7c00SJason A. Donenfeld subu BYTES, $at, BYTES 41849aa7c00SJason A. Donenfeld 41949aa7c00SJason A. Donenfeld jr T0 42049aa7c00SJason A. Donenfeld 42149aa7c00SJason A. Donenfeld /* Jump table */ 42249aa7c00SJason A. Donenfeld FOR_EACH_WORD(JMPTBL_UNALIGNED) 423*3a2f58f3SArd Biesheuvel.end chacha_crypt_arch 424*3a2f58f3SArd Biesheuvel.set at 425*3a2f58f3SArd Biesheuvel 426*3a2f58f3SArd Biesheuvel/* Input arguments 427*3a2f58f3SArd Biesheuvel * STATE $a0 428*3a2f58f3SArd Biesheuvel * OUT $a1 429*3a2f58f3SArd Biesheuvel * NROUND $a2 430*3a2f58f3SArd Biesheuvel */ 431*3a2f58f3SArd Biesheuvel 432*3a2f58f3SArd Biesheuvel#undef X12 433*3a2f58f3SArd Biesheuvel#undef X13 434*3a2f58f3SArd Biesheuvel#undef X14 435*3a2f58f3SArd Biesheuvel#undef X15 436*3a2f58f3SArd Biesheuvel 437*3a2f58f3SArd Biesheuvel#define X12 $a3 438*3a2f58f3SArd Biesheuvel#define X13 $at 439*3a2f58f3SArd Biesheuvel#define X14 $v0 440*3a2f58f3SArd Biesheuvel#define X15 STATE 441*3a2f58f3SArd Biesheuvel 442*3a2f58f3SArd Biesheuvel.set noat 443*3a2f58f3SArd Biesheuvel.globl hchacha_block_arch 444*3a2f58f3SArd Biesheuvel.ent hchacha_block_arch 445*3a2f58f3SArd Biesheuvelhchacha_block_arch: 446*3a2f58f3SArd Biesheuvel .frame $sp, STACK_SIZE, $ra 447*3a2f58f3SArd Biesheuvel 448*3a2f58f3SArd Biesheuvel addiu $sp, -STACK_SIZE 449*3a2f58f3SArd Biesheuvel 450*3a2f58f3SArd Biesheuvel /* Save X11(s6) */ 451*3a2f58f3SArd Biesheuvel sw X11, 0($sp) 452*3a2f58f3SArd Biesheuvel 453*3a2f58f3SArd Biesheuvel lw X0, 0(STATE) 454*3a2f58f3SArd Biesheuvel lw X1, 4(STATE) 455*3a2f58f3SArd Biesheuvel lw X2, 8(STATE) 456*3a2f58f3SArd Biesheuvel lw X3, 12(STATE) 457*3a2f58f3SArd Biesheuvel lw X4, 16(STATE) 458*3a2f58f3SArd Biesheuvel lw X5, 20(STATE) 459*3a2f58f3SArd Biesheuvel lw X6, 24(STATE) 460*3a2f58f3SArd Biesheuvel lw X7, 28(STATE) 461*3a2f58f3SArd Biesheuvel lw X8, 32(STATE) 462*3a2f58f3SArd Biesheuvel lw X9, 36(STATE) 463*3a2f58f3SArd Biesheuvel lw X10, 40(STATE) 464*3a2f58f3SArd Biesheuvel lw X11, 44(STATE) 465*3a2f58f3SArd Biesheuvel lw X12, 48(STATE) 466*3a2f58f3SArd Biesheuvel lw X13, 52(STATE) 467*3a2f58f3SArd Biesheuvel lw X14, 56(STATE) 468*3a2f58f3SArd Biesheuvel lw X15, 60(STATE) 469*3a2f58f3SArd Biesheuvel 470*3a2f58f3SArd Biesheuvel.Loop_hchacha_xor_rounds: 471*3a2f58f3SArd Biesheuvel addiu $a2, -2 472*3a2f58f3SArd Biesheuvel AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); 473*3a2f58f3SArd Biesheuvel AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); 474*3a2f58f3SArd Biesheuvel AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); 475*3a2f58f3SArd Biesheuvel AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); 476*3a2f58f3SArd Biesheuvel AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); 477*3a2f58f3SArd Biesheuvel AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); 478*3a2f58f3SArd Biesheuvel AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); 479*3a2f58f3SArd Biesheuvel AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); 480*3a2f58f3SArd Biesheuvel bnez $a2, .Loop_hchacha_xor_rounds 481*3a2f58f3SArd Biesheuvel 482*3a2f58f3SArd Biesheuvel /* Restore used register */ 483*3a2f58f3SArd Biesheuvel lw X11, 0($sp) 484*3a2f58f3SArd Biesheuvel 485*3a2f58f3SArd Biesheuvel sw X0, 0(OUT) 486*3a2f58f3SArd Biesheuvel sw X1, 4(OUT) 487*3a2f58f3SArd Biesheuvel sw X2, 8(OUT) 488*3a2f58f3SArd Biesheuvel sw X3, 12(OUT) 489*3a2f58f3SArd Biesheuvel sw X12, 16(OUT) 490*3a2f58f3SArd Biesheuvel sw X13, 20(OUT) 491*3a2f58f3SArd Biesheuvel sw X14, 24(OUT) 492*3a2f58f3SArd Biesheuvel sw X15, 28(OUT) 493*3a2f58f3SArd Biesheuvel 494*3a2f58f3SArd Biesheuvel addiu $sp, STACK_SIZE 495*3a2f58f3SArd Biesheuvel jr $ra 496*3a2f58f3SArd Biesheuvel.end hchacha_block_arch 49749aa7c00SJason A. Donenfeld.set at 498