1/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
2//
3// This file is dual-licensed, meaning that you can use it under your
4// choice of either of the following two licenses:
5//
6// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
7//
8// Licensed under the Apache License 2.0 (the "License"). You can obtain
9// a copy in the file LICENSE in the source distribution or at
10// https://www.openssl.org/source/license.html
11//
12// or
13//
14// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
15// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
16// Copyright 2024 Google LLC
17// All rights reserved.
18//
19// Redistribution and use in source and binary forms, with or without
20// modification, are permitted provided that the following conditions
21// are met:
22// 1. Redistributions of source code must retain the above copyright
23//    notice, this list of conditions and the following disclaimer.
24// 2. Redistributions in binary form must reproduce the above copyright
25//    notice, this list of conditions and the following disclaimer in the
26//    documentation and/or other materials provided with the distribution.
27//
28// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
40// The generated code of this file depends on the following RISC-V extensions:
41// - RV64I
42// - RISC-V Vector ('V') with VLEN >= 128
43// - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
44// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
45
46#include <linux/cfi_types.h>
47
48.text
49.option arch, +zvknha, +zvkb
50
51#define STATEP		a0
52#define DATA		a1
53#define NUM_BLOCKS	a2
54
55#define STATEP_C	a3
56
57#define MASK		v0
58#define INDICES		v1
59#define W0		v2
60#define W1		v3
61#define W2		v4
62#define W3		v5
63#define VTMP		v6
64#define FEBA		v7
65#define HGDC		v8
66#define K0		v10
67#define K1		v11
68#define K2		v12
69#define K3		v13
70#define K4		v14
71#define K5		v15
72#define K6		v16
73#define K7		v17
74#define K8		v18
75#define K9		v19
76#define K10		v20
77#define K11		v21
78#define K12		v22
79#define K13		v23
80#define K14		v24
81#define K15		v25
82#define PREV_FEBA	v26
83#define PREV_HGDC	v27
84
85// Do 4 rounds of SHA-256.  w0 contains the current 4 message schedule words.
86//
87// If not all the message schedule words have been computed yet, then this also
88// computes 4 more message schedule words.  w1-w3 contain the next 3 groups of 4
89// message schedule words; this macro computes the group after w3 and writes it
90// to w0.  This means that the next (w0, w1, w2, w3) is the current (w1, w2, w3,
91// w0), so the caller must cycle through the registers accordingly.
92.macro	sha256_4rounds	last, k, w0, w1, w2, w3
93	vadd.vv		VTMP, \k, \w0
94	vsha2cl.vv	HGDC, FEBA, VTMP
95	vsha2ch.vv	FEBA, HGDC, VTMP
96.if !\last
97	vmerge.vvm	VTMP, \w2, \w1, MASK
98	vsha2ms.vv	\w0, VTMP, \w3
99.endif
100.endm
101
102.macro	sha256_16rounds	last, k0, k1, k2, k3
103	sha256_4rounds	\last, \k0, W0, W1, W2, W3
104	sha256_4rounds	\last, \k1, W1, W2, W3, W0
105	sha256_4rounds	\last, \k2, W2, W3, W0, W1
106	sha256_4rounds	\last, \k3, W3, W0, W1, W2
107.endm
108
109// void sha256_transform_zvknha_or_zvknhb_zvkb(u32 state[8], const u8 *data,
110//					       int num_blocks);
111SYM_TYPED_FUNC_START(sha256_transform_zvknha_or_zvknhb_zvkb)
112
113	// Load the round constants into K0-K15.
114	vsetivli	zero, 4, e32, m1, ta, ma
115	la		t0, K256
116	vle32.v		K0, (t0)
117	addi		t0, t0, 16
118	vle32.v		K1, (t0)
119	addi		t0, t0, 16
120	vle32.v		K2, (t0)
121	addi		t0, t0, 16
122	vle32.v		K3, (t0)
123	addi		t0, t0, 16
124	vle32.v		K4, (t0)
125	addi		t0, t0, 16
126	vle32.v		K5, (t0)
127	addi		t0, t0, 16
128	vle32.v		K6, (t0)
129	addi		t0, t0, 16
130	vle32.v		K7, (t0)
131	addi		t0, t0, 16
132	vle32.v		K8, (t0)
133	addi		t0, t0, 16
134	vle32.v		K9, (t0)
135	addi		t0, t0, 16
136	vle32.v		K10, (t0)
137	addi		t0, t0, 16
138	vle32.v		K11, (t0)
139	addi		t0, t0, 16
140	vle32.v		K12, (t0)
141	addi		t0, t0, 16
142	vle32.v		K13, (t0)
143	addi		t0, t0, 16
144	vle32.v		K14, (t0)
145	addi		t0, t0, 16
146	vle32.v		K15, (t0)
147
148	// Setup mask for the vmerge to replace the first word (idx==0) in
149	// message scheduling.  There are 4 words, so an 8-bit mask suffices.
150	vsetivli	zero, 1, e8, m1, ta, ma
151	vmv.v.i		MASK, 0x01
152
153	// Load the state.  The state is stored as {a,b,c,d,e,f,g,h}, but we
154	// need {f,e,b,a},{h,g,d,c}.  The dst vtype is e32m1 and the index vtype
155	// is e8mf4.  We use index-load with the i8 indices {20, 16, 4, 0},
156	// loaded using the 32-bit little endian value 0x00041014.
157	li		t0, 0x00041014
158	vsetivli	zero, 1, e32, m1, ta, ma
159	vmv.v.x		INDICES, t0
160	addi		STATEP_C, STATEP, 8
161	vsetivli	zero, 4, e32, m1, ta, ma
162	vluxei8.v	FEBA, (STATEP), INDICES
163	vluxei8.v	HGDC, (STATEP_C), INDICES
164
165.Lnext_block:
166	addi		NUM_BLOCKS, NUM_BLOCKS, -1
167
168	// Save the previous state, as it's needed later.
169	vmv.v.v		PREV_FEBA, FEBA
170	vmv.v.v		PREV_HGDC, HGDC
171
172	// Load the next 512-bit message block and endian-swap each 32-bit word.
173	vle32.v		W0, (DATA)
174	vrev8.v		W0, W0
175	addi		DATA, DATA, 16
176	vle32.v		W1, (DATA)
177	vrev8.v		W1, W1
178	addi		DATA, DATA, 16
179	vle32.v		W2, (DATA)
180	vrev8.v		W2, W2
181	addi		DATA, DATA, 16
182	vle32.v		W3, (DATA)
183	vrev8.v		W3, W3
184	addi		DATA, DATA, 16
185
186	// Do the 64 rounds of SHA-256.
187	sha256_16rounds	0, K0, K1, K2, K3
188	sha256_16rounds	0, K4, K5, K6, K7
189	sha256_16rounds	0, K8, K9, K10, K11
190	sha256_16rounds	1, K12, K13, K14, K15
191
192	// Add the previous state.
193	vadd.vv		FEBA, FEBA, PREV_FEBA
194	vadd.vv		HGDC, HGDC, PREV_HGDC
195
196	// Repeat if more blocks remain.
197	bnez		NUM_BLOCKS, .Lnext_block
198
199	// Store the new state and return.
200	vsuxei8.v	FEBA, (STATEP), INDICES
201	vsuxei8.v	HGDC, (STATEP_C), INDICES
202	ret
203SYM_FUNC_END(sha256_transform_zvknha_or_zvknhb_zvkb)
204
205.section ".rodata"
206.p2align 2
207.type K256, @object
208K256:
209	.word		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
210	.word		0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
211	.word		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
212	.word		0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
213	.word		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
214	.word		0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
215	.word		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
216	.word		0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
217	.word		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
218	.word		0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
219	.word		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
220	.word		0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
221	.word		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
222	.word		0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
223	.word		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
224	.word		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
225.size K256, . - K256
226