1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
4 *
5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
6 */
7
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10
11	.irp		b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
12	.set		.Lv\b\().4s, \b
13	.endr
14
15	.macro		sm3partw1, rd, rn, rm
16	.inst		0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
17	.endm
18
19	.macro		sm3partw2, rd, rn, rm
20	.inst		0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
21	.endm
22
23	.macro		sm3ss1, rd, rn, rm, ra
24	.inst		0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
25	.endm
26
27	.macro		sm3tt1a, rd, rn, rm, imm2
28	.inst		0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
29	.endm
30
31	.macro		sm3tt1b, rd, rn, rm, imm2
32	.inst		0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
33	.endm
34
35	.macro		sm3tt2a, rd, rn, rm, imm2
36	.inst		0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
37	.endm
38
39	.macro		sm3tt2b, rd, rn, rm, imm2
40	.inst		0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
41	.endm
42
43	.macro		round, ab, s0, t0, t1, i
44	sm3ss1		v5.4s, v8.4s, \t0\().4s, v9.4s
45	shl		\t1\().4s, \t0\().4s, #1
46	sri		\t1\().4s, \t0\().4s, #31
47	sm3tt1\ab	v8.4s, v5.4s, v10.4s, \i
48	sm3tt2\ab	v9.4s, v5.4s, \s0\().4s, \i
49	.endm
50
51	.macro		qround, ab, s0, s1, s2, s3, s4
52	.ifnb		\s4
53	ext		\s4\().16b, \s1\().16b, \s2\().16b, #12
54	ext		v6.16b, \s0\().16b, \s1\().16b, #12
55	ext		v7.16b, \s2\().16b, \s3\().16b, #8
56	sm3partw1	\s4\().4s, \s0\().4s, \s3\().4s
57	.endif
58
59	eor		v10.16b, \s0\().16b, \s1\().16b
60
61	round		\ab, \s0, v11, v12, 0
62	round		\ab, \s0, v12, v11, 1
63	round		\ab, \s0, v11, v12, 2
64	round		\ab, \s0, v12, v11, 3
65
66	.ifnb		\s4
67	sm3partw2	\s4\().4s, v7.4s, v6.4s
68	.endif
69	.endm
70
71	/*
72	 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
73	 *                       int blocks)
74	 */
75	.text
76SYM_FUNC_START(sm3_ce_transform)
77	/* load state */
78	ld1		{v8.4s-v9.4s}, [x0]
79	rev64		v8.4s, v8.4s
80	rev64		v9.4s, v9.4s
81	ext		v8.16b, v8.16b, v8.16b, #8
82	ext		v9.16b, v9.16b, v9.16b, #8
83
84	adr_l		x8, .Lt
85	ldp		s13, s14, [x8]
86
87	/* load input */
880:	ld1		{v0.16b-v3.16b}, [x1], #64
89	sub		w2, w2, #1
90
91	mov		v15.16b, v8.16b
92	mov		v16.16b, v9.16b
93
94CPU_LE(	rev32		v0.16b, v0.16b		)
95CPU_LE(	rev32		v1.16b, v1.16b		)
96CPU_LE(	rev32		v2.16b, v2.16b		)
97CPU_LE(	rev32		v3.16b, v3.16b		)
98
99	ext		v11.16b, v13.16b, v13.16b, #4
100
101	qround		a, v0, v1, v2, v3, v4
102	qround		a, v1, v2, v3, v4, v0
103	qround		a, v2, v3, v4, v0, v1
104	qround		a, v3, v4, v0, v1, v2
105
106	ext		v11.16b, v14.16b, v14.16b, #4
107
108	qround		b, v4, v0, v1, v2, v3
109	qround		b, v0, v1, v2, v3, v4
110	qround		b, v1, v2, v3, v4, v0
111	qround		b, v2, v3, v4, v0, v1
112	qround		b, v3, v4, v0, v1, v2
113	qround		b, v4, v0, v1, v2, v3
114	qround		b, v0, v1, v2, v3, v4
115	qround		b, v1, v2, v3, v4, v0
116	qround		b, v2, v3, v4, v0, v1
117	qround		b, v3, v4
118	qround		b, v4, v0
119	qround		b, v0, v1
120
121	eor		v8.16b, v8.16b, v15.16b
122	eor		v9.16b, v9.16b, v16.16b
123
124	/* handled all input blocks? */
125	cbnz		w2, 0b
126
127	/* save state */
128	rev64		v8.4s, v8.4s
129	rev64		v9.4s, v9.4s
130	ext		v8.16b, v8.16b, v8.16b, #8
131	ext		v9.16b, v9.16b, v9.16b, #8
132	st1		{v8.4s-v9.4s}, [x0]
133	ret
134SYM_FUNC_END(sm3_ce_transform)
135
136	.section	".rodata", "a"
137	.align		3
138.Lt:	.word		0x79cc4519, 0x9d8a7a87
139