xref: /openbsd/sys/arch/octeon/dev/octcrypto_asm.S (revision 7b6d8aac)
1/*	$OpenBSD: octcrypto_asm.S,v 1.2 2019/01/03 17:06:22 visa Exp $	*/
2
3/*
4 * Copyright (c) 2018 Visa Hankala
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <machine/asm.h>
20
21	.set	noreorder
22	.set	arch=octeon
23
24/*
25 * COP2 registers and operation codes
26 */
27#define MT_AES_IV		0x0102
28#define MT_AES_KEY		0x0104
29#define MT_AES_ENC0		0x010a
30#define MT_AES_ENC1		0x310b
31#define MT_AES_ENC_CBC0		0x0108
32#define MT_AES_ENC_CBC1		0x3109
33#define MT_AES_DEC_CBC0		0x010c
34#define MT_AES_DEC_CBC1		0x310d
35#define MF_AES_RESINP		0x0100
36#define MT_AES_RESINP		0x0100
37#define MT_HSH_DAT		0x0040
38#define MT_HSH_DATW		0x0240
39#define MT_HSH_IV		0x0048
40#define MF_HSH_IV		0x0048
41#define MT_HSH_IVW		0x0250
42#define MF_HSH_IVW		0x0250
43#define MT_HSH_STARTMD5		0x4047
44#define MT_HSH_STARTSHA		0x4057
45#define MT_HSH_STARTSHA256	0x404f
46#define MT_HSH_STARTSHA512	0x424f
47
48#define MT_GFM_MUL		0x0258
49#define MT_GFM_POLY		0x025e
50#define MF_GFM_RESINP		0x025a
51#define MT_GFM_RESINP		0x025a
52#define MT_GFM_XOR0		0x025c
53#define MT_GFM_XORMUL1		0x425d
54
55#define GF128_POLY		0xe100
56
57/*
58 * void octcrypto_aes_set_key(const uint64_t *key, size_t len)
59 *
60 * Load an AES key to the coprocessor.
61 * `len' is in bytes.
62 */
63LEAF(octcrypto_aes_set_key, 0)
64	ld	t0, (a0)
65	ld	t1, 8(a0)
66	dmtc2	t0, MT_AES_KEY
67	bltu	a1, 24, 1f
68	 dmtc2	t1, MT_AES_KEY+1	/* 128-bit key */
69	ld	t0, 16(a0)
70	bltu	a1, 32, 1f
71	 dmtc2	t0, MT_AES_KEY+2	/* 192-bit key */
72	ld	t0, 24(a0)
73	dmtc2	t0, MT_AES_KEY+3	/* 256-bit key */
741:
75	jr	ra
76	 nop
77END(octcrypto_aes_set_key)
78
79/*
80 * void octcrypto_aes_clear(void)
81 *
82 * Clear AES state.
83 */
84LEAF(octcrypto_aes_clear, 0)
85	dmtc2	zero, MT_AES_KEY
86	dmtc2	zero, MT_AES_KEY+1
87	dmtc2	zero, MT_AES_KEY+2
88	dmtc2	zero, MT_AES_KEY+3
89	dmtc2	zero, MT_AES_RESINP
90	jr	ra
91	 dmtc2	zero, MT_AES_RESINP+1
92END(octcrypto_aes_clear)
93
94/*
95 * void octcrypto_aes_enc(uint64_t *buf)
96 *
97 * AES encrypt a single block.
98 */
99LEAF(octcrypto_aes_enc, 0)
100	ld	t0, (a0)
101	ld	t1, 8(a0)
102	dmtc2	t0, MT_AES_ENC0
103	dmtc2	t1, MT_AES_ENC1
104	dmfc2	t0, MF_AES_RESINP
105	dmfc2	t1, MF_AES_RESINP+1
106	sd	t0, (a0)
107	jr	ra
108	 sd	t1, 8(a0)
109END(octcrypto_aes_enc)
110
111/*
112 * void octcrypto_aes_cbc_enc(void *buf, size_t size, const void *iv)
113 *
114 * AES CBC encrypt a sequence of blocks.
115 */
116LEAF(octcrypto_aes_cbc_enc, 0)
117	bltu	a1, 16, 3f
118	 and	t2, a1, 15
119	dsubu	a3, a1, t2
120	daddu	a3, a3, a0		/* Compute buffer end. */
121	and	t2, a0, 7		/* Determine alignment. */
122	ld	t0, (a2)
123	ld	t1, 8(a2)
124	dmtc2	t0, MT_AES_IV
125	bne	t2, zero, 2f
126	 dmtc2	t1, MT_AES_IV+1
127
128	/*
129	 * Aligned buffer
130	 */
1311:
132	ld	t0, (a0)
133	ld	t1, 8(a0)
134	daddu	a0, a0, 16
135	dmtc2	t0, MT_AES_ENC_CBC0
136	dmtc2	t1, MT_AES_ENC_CBC1
137	dmfc2	t0, MF_AES_RESINP
138	dmfc2	t1, MF_AES_RESINP+1
139	sd	t0, -16(a0)
140	bltu	a0, a3, 1b
141	 sd	t1, -8(a0)
142	b	3f
143	 nop
144
145	/*
146	 * Unaligned buffer
147	 */
1482:
149	LDHI	t0, (a0)
150	LDLO	t0, 7(a0)
151	LDHI	t1, 8(a0)
152	LDLO	t1, 15(a0)
153	daddu	a0, a0, 16
154	dmtc2	t0, MT_AES_ENC_CBC0
155	dmtc2	t1, MT_AES_ENC_CBC1
156	dmfc2	t0, MF_AES_RESINP
157	dmfc2	t1, MF_AES_RESINP+1
158	SDHI	t0, -16(a0)
159	SDLO	t0, -9(a0)
160	SDHI	t1, -8(a0)
161	bltu	a0, a3, 2b
162	 SDLO	t1, -1(a0)
163
1643:
165	jr	ra
166	 nop
167END(octcrypto_aes_cbc_enc)
168
169/*
170 * void octcrypto_aes_cbc_dec(void *dst, size_t size, const void *iv)
171 *
172 * AES CBC decrypt a sequence of blocks.
173 */
174LEAF(octcrypto_aes_cbc_dec, 0)
175	bltu	a1, 16, 3f
176	 and	t2, a1, 15
177	dsubu	a3, a1, t2
178	daddu	a3, a3, a0		/* Compute buffer end. */
179	and	t2, a0, 7		/* Determine alignment. */
180	ld	t0, (a2)
181	ld	t1, 8(a2)
182	dmtc2	t0, MT_AES_IV
183	bne	t2, zero, 2f
184	 dmtc2	t1, MT_AES_IV+1
185
186	/*
187	 * Aligned buffer
188	 */
1891:
190	ld	t0, (a0)
191	ld	t1, 8(a0)
192	daddu	a0, a0, 16
193	dmtc2	t0, MT_AES_DEC_CBC0
194	dmtc2	t1, MT_AES_DEC_CBC1
195	dmfc2	t0, MF_AES_RESINP
196	dmfc2	t1, MF_AES_RESINP+1
197	sd	t0, -16(a0)
198	bltu	a0, a3, 1b
199	 sd	t1, -8(a0)
200	b	3f
201	 nop
202
203	/*
204	 * Unaligned buffer
205	 */
2062:
207	LDHI	t0, (a0)
208	LDLO	t0, 7(a0)
209	LDHI	t1, 8(a0)
210	LDLO	t1, 15(a0)
211	daddu	a0, a0, 16
212	dmtc2	t0, MT_AES_DEC_CBC0
213	dmtc2	t1, MT_AES_DEC_CBC1
214	dmfc2	t0, MF_AES_RESINP
215	dmfc2	t1, MF_AES_RESINP+1
216	SDHI	t0, -16(a0)
217	SDLO	t0, -9(a0)
218	SDHI	t1, -8(a0)
219	bltu	a0, a3, 2b
220	 SDLO	t1, -1(a0)
221
2223:
223	jr	ra
224	 nop
225END(octcrypto_aes_cbc_dec)
226
227/*
228 * void octcrypto_aes_ctr_enc(void *buf, size_t size, const void *icb)
229 *
230 * AES CTR encrypt a sequence of blocks.
231 */
232LEAF(octcrypto_aes_ctr_enc, 0)
233	bltu	a1, 16, 3f
234	 and	t2, a1, 15
235	dsubu	a3, a1, t2
236	daddu	a3, a3, a0		/* Compute buffer end. */
237	and	t2, a0, 7		/* Determine alignment. */
238	ld	t0, (a2)
239	bne	t2, zero, 2f
240	 ld	t1, 8(a2)
241
242	/*
243	 * Aligned buffer
244	 */
2451:
246	/*
247	 * Increment the counter.
248	 * Assume big endian byte order and no overflow.
249	 */
250	daddu	t1, 1
251	/* Compute the keystream block. */
252	dmtc2	t0, MT_AES_ENC0
253	dmtc2	t1, MT_AES_ENC1
254	dmfc2	t2, MF_AES_RESINP
255	dmfc2	t3, MF_AES_RESINP+1
256	/* XOR the plaintext and the keystream. */
257	ld	a4, (a0)
258	ld	a5, 8(a0)
259	daddu	a0, a0, 16
260	xor	a4, t2
261	xor	a5, t3
262	sd	a4, -16(a0)
263	bltu	a0, a3, 1b
264	 sd	a5, -8(a0)
265	b	3f
266	 nop
267
268	/*
269	 * Unaligned buffer
270	 */
2712:
272	/*
273	 * Increment the counter.
274	 * Assume big endian byte order and no overflow.
275	 */
276	daddu	t1, 1
277	/* Compute the keystream block. */
278	dmtc2	t0, MT_AES_ENC0
279	dmtc2	t1, MT_AES_ENC1
280	dmfc2	t2, MF_AES_RESINP
281	dmfc2	t3, MF_AES_RESINP+1
282	/* XOR the plaintext and the keystream. */
283	LDHI	a4, (a0)
284	LDLO	a4, 7(a0)
285	LDHI	a5, 8(a0)
286	LDLO	a5, 15(a0)
287	daddu	a0, a0, 16
288	xor	a4, t2
289	xor	a5, t3
290	SDHI	a4, -16(a0)
291	SDLO	a4, -9(a0)
292	SDHI	a5, -8(a0)
293	bltu	a0, a3, 2b
294	 SDLO	a5, -1(a0)
295
2963:
297	jr	ra
298	 nop
299END(octcrypto_aes_ctr_enc)
300
301#define HASH_NARROW(name, type)						\
302LEAF(octcrypto_hash_##name, 0)						\
303	bltu	a1, 64, 3f;						\
304	 and	t3, a1, 63;						\
305	and	t2, a0, 7;		/* Determine alignment. */	\
306	dsubu	a3, a1, t3;						\
307	bne	t2, zero, 2f;						\
308	 daddu	a3, a3, a0;		/* Compute buffer end. */	\
309									\
310	/*								\
311	 * Aligned buffer						\
312	 */								\
3131:									\
314	ld	t0, (a0);						\
315	ld	t1, 8(a0);						\
316	ld	t2, 16(a0);						\
317	ld	t3, 24(a0);						\
318	dmtc2	t0, MT_HSH_DAT;						\
319	dmtc2	t1, MT_HSH_DAT+1;					\
320	dmtc2	t2, MT_HSH_DAT+2;					\
321	dmtc2	t3, MT_HSH_DAT+3;					\
322	ld	t0, 32(a0);						\
323	ld	t1, 40(a0);						\
324	ld	t2, 48(a0);						\
325	ld	t3, 56(a0);						\
326	daddu	a0, a0, 64;						\
327	dmtc2	t0, MT_HSH_DAT+4;					\
328	dmtc2	t1, MT_HSH_DAT+5;					\
329	dmtc2	t2, MT_HSH_DAT+6;					\
330	bltu	a0, a3, 1b;						\
331	 dmtc2	t3, MT_HSH_START##type;					\
332	b	3f;							\
333	 nop;								\
334									\
335	/*								\
336	 * Unaligned buffer						\
337	 */								\
3382:									\
339	LDHI	t0, (a0);						\
340	LDLO	t0, 7(a0);						\
341	LDHI	t1, 8(a0);						\
342	LDLO	t1, 15(a0);						\
343	LDHI	t2, 16(a0);						\
344	LDLO	t2, 23(a0);						\
345	LDHI	t3, 24(a0);						\
346	LDLO	t3, 31(a0);						\
347	dmtc2	t0, MT_HSH_DAT;						\
348	dmtc2	t1, MT_HSH_DAT+1;					\
349	dmtc2	t2, MT_HSH_DAT+2;					\
350	dmtc2	t3, MT_HSH_DAT+3;					\
351	LDHI	t0, 32(a0);						\
352	LDLO	t0, 39(a0);						\
353	LDHI	t1, 40(a0);						\
354	LDLO	t1, 47(a0);						\
355	LDHI	t2, 48(a0);						\
356	LDLO	t2, 55(a0);						\
357	LDHI	t3, 56(a0);						\
358	LDLO	t3, 63(a0);						\
359	daddu	a0, a0, 64;						\
360	dmtc2	t0, MT_HSH_DAT+4;					\
361	dmtc2	t1, MT_HSH_DAT+5;					\
362	dmtc2	t2, MT_HSH_DAT+6;					\
363	bltu	a0, a3, 2b;						\
364	 dmtc2	t3, MT_HSH_START##type;					\
3653:									\
366	jr	ra;							\
367	 nop;								\
368END(octcrypto_hash_##name)
369
370#define HASH_WIDE(name, type)						\
371LEAF(octcrypto_hash_##name, 0)						\
372	bltu	a1, 128, 3f;						\
373	 and	t3, a1, 127;						\
374	and	t2, a0, 7;		/* Determine alignment. */	\
375	dsubu	a3, a1, t3;						\
376	bne	t2, zero, 2f;						\
377	 daddu	a3, a3, a0;		/* Compute buffer end. */	\
378									\
379	/*								\
380	 * Aligned buffer						\
381	 */								\
3821:									\
383	ld	t0, (a0);						\
384	ld	t1, 8(a0);						\
385	ld	t2, 16(a0);						\
386	ld	t3, 24(a0);						\
387	dmtc2	t0, MT_HSH_DATW;					\
388	dmtc2	t1, MT_HSH_DATW+1;					\
389	dmtc2	t2, MT_HSH_DATW+2;					\
390	dmtc2	t3, MT_HSH_DATW+3;					\
391	ld	t0, 32(a0);						\
392	ld	t1, 40(a0);						\
393	ld	t2, 48(a0);						\
394	ld	t3, 56(a0);						\
395	dmtc2	t0, MT_HSH_DATW+4;					\
396	dmtc2	t1, MT_HSH_DATW+5;					\
397	dmtc2	t2, MT_HSH_DATW+6;					\
398	dmtc2	t3, MT_HSH_DATW+7;					\
399	ld	t0, 64(a0);						\
400	ld	t1, 72(a0);						\
401	ld	t2, 80(a0);						\
402	ld	t3, 88(a0);						\
403	dmtc2	t0, MT_HSH_DATW+8;					\
404	dmtc2	t1, MT_HSH_DATW+9;					\
405	dmtc2	t2, MT_HSH_DATW+10;					\
406	dmtc2	t3, MT_HSH_DATW+11;					\
407	ld	t0, 96(a0);						\
408	ld	t1, 104(a0);						\
409	ld	t2, 112(a0);						\
410	ld	t3, 120(a0);						\
411	daddu	a0, a0, 128;						\
412	dmtc2	t0, MT_HSH_DATW+12;					\
413	dmtc2	t1, MT_HSH_DATW+13;					\
414	dmtc2	t2, MT_HSH_DATW+14;					\
415	bltu	a0, a3, 1b;						\
416	 dmtc2	t3, MT_HSH_START##type;					\
417	b	3f;							\
418	 nop;								\
419									\
420	/*								\
421	 * Unaligned buffer						\
422	 */								\
4232:									\
424	LDHI	t0, (a0);						\
425	LDLO	t0, 7(a0);						\
426	LDHI	t1, 8(a0);						\
427	LDLO	t1, 15(a0);						\
428	LDHI	t2, 16(a0);						\
429	LDLO	t2, 23(a0);						\
430	LDHI	t3, 24(a0);						\
431	LDLO	t3, 31(a0);						\
432	dmtc2	t0, MT_HSH_DATW;					\
433	dmtc2	t1, MT_HSH_DATW+1;					\
434	dmtc2	t2, MT_HSH_DATW+2;					\
435	dmtc2	t3, MT_HSH_DATW+3;					\
436	LDHI	t0, 32(a0);						\
437	LDLO	t0, 39(a0);						\
438	LDHI	t1, 40(a0);						\
439	LDLO	t1, 47(a0);						\
440	LDHI	t2, 48(a0);						\
441	LDLO	t2, 55(a0);						\
442	LDHI	t3, 56(a0);						\
443	LDLO	t3, 63(a0);						\
444	dmtc2	t0, MT_HSH_DATW+4;					\
445	dmtc2	t1, MT_HSH_DATW+5;					\
446	dmtc2	t2, MT_HSH_DATW+6;					\
447	dmtc2	t3, MT_HSH_DATW+7;					\
448	LDHI	t0, 64(a0);						\
449	LDLO	t0, 71(a0);						\
450	LDHI	t1, 72(a0);						\
451	LDLO	t1, 79(a0);						\
452	LDHI	t2, 80(a0);						\
453	LDLO	t2, 87(a0);						\
454	LDHI	t3, 88(a0);						\
455	LDLO	t3, 95(a0);						\
456	dmtc2	t0, MT_HSH_DATW+8;					\
457	dmtc2	t1, MT_HSH_DATW+9;					\
458	dmtc2	t2, MT_HSH_DATW+10;					\
459	dmtc2	t3, MT_HSH_DATW+11;					\
460	LDHI	t0, 96(a0);						\
461	LDLO	t0, 103(a0);						\
462	LDHI	t1, 104(a0);						\
463	LDLO	t1, 111(a0);						\
464	LDHI	t2, 112(a0);						\
465	LDLO	t2, 119(a0);						\
466	LDHI	t3, 120(a0);						\
467	LDLO	t3, 127(a0);						\
468	daddu	a0, a0, 128;						\
469	dmtc2	t0, MT_HSH_DATW+12;					\
470	dmtc2	t1, MT_HSH_DATW+13;					\
471	dmtc2	t2, MT_HSH_DATW+14;					\
472	bltu	a0, a3, 2b;						\
473	 dmtc2	t3, MT_HSH_START##type;					\
4743:									\
475	jr	ra;							\
476	 nop;								\
477END(octcrypto_hash_##name)
478
479/*
480 * void octcrypto_md5(const void *buf, size_t size)
481 */
482HASH_NARROW(md5, MD5)
483
484/*
485 * void octcrypto_sha1(const void *buf, size_t size)
486 */
487HASH_NARROW(sha1, SHA)
488
489/*
490 * void octcrypto_sha256(const void *src, size_t size)
491 */
492HASH_NARROW(sha256, SHA256)
493
494/*
495 * void octcrypto_sha512(const void *src, size_t size)
496 */
497HASH_WIDE(sha512, SHA512)
498
499/*
500 * void octcrypto_hash_set_ivn(const uint64_t *iv)
501 */
502LEAF(octcrypto_hash_set_ivn, 0)
503	ld	t0, (a0)
504	ld	t1, 8(a0)
505	ld	t2, 16(a0)
506	ld	t3, 24(a0)
507	dmtc2	t0, MT_HSH_IV
508	dmtc2	t1, MT_HSH_IV+1
509	dmtc2	t2, MT_HSH_IV+2
510	jr	ra
511	 dmtc2	t3, MT_HSH_IV+3
512END(octcrypto_hash_set_ivn)
513
514/*
515 * void octcrypto_hash_set_ivw(const uint64_t *iv)
516 */
517LEAF(octcrypto_hash_set_ivw, 0)
518	ld	t0, (a0)
519	ld	t1, 8(a0)
520	ld	t2, 16(a0)
521	ld	t3, 24(a0)
522	dmtc2	t0, MT_HSH_IVW
523	dmtc2	t1, MT_HSH_IVW+1
524	dmtc2	t2, MT_HSH_IVW+2
525	dmtc2	t3, MT_HSH_IVW+3
526	ld	t0, 32(a0)
527	ld	t1, 40(a0)
528	ld	t2, 48(a0)
529	ld	t3, 56(a0)
530	dmtc2	t0, MT_HSH_IVW+4
531	dmtc2	t1, MT_HSH_IVW+5
532	dmtc2	t2, MT_HSH_IVW+6
533	jr	ra
534	 dmtc2	t3, MT_HSH_IVW+7
535END(octcrypto_hash_set_ivw)
536
537/*
538 * void octcrypto_hash_get_ivn(uint64_t *iv)
539 */
540LEAF(octcrypto_hash_get_ivn, 0)
541	dmfc2	t0, MF_HSH_IV
542	dmfc2	t1, MF_HSH_IV+1
543	dmfc2	t2, MF_HSH_IV+2
544	dmfc2	t3, MF_HSH_IV+3
545	sd	t0, (a0)
546	sd	t1, 8(a0)
547	sd	t2, 16(a0)
548	jr	ra
549	 sd	t3, 24(a0)
550END(octcrypto_hash_get_ivn)
551
552/*
553 * void octcrypto_hash_get_ivw(uint64_t *iv)
554 */
555LEAF(octcrypto_hash_get_ivw, 0)
556	dmfc2	t0, MF_HSH_IVW
557	dmfc2	t1, MF_HSH_IVW+1
558	dmfc2	t2, MF_HSH_IVW+2
559	dmfc2	t3, MF_HSH_IVW+3
560	sd	t0, (a0)
561	sd	t1, 8(a0)
562	sd	t2, 16(a0)
563	sd	t3, 24(a0)
564	dmfc2	t0, MF_HSH_IVW+4
565	dmfc2	t1, MF_HSH_IVW+5
566	dmfc2	t2, MF_HSH_IVW+6
567	dmfc2	t3, MF_HSH_IVW+7
568	sd	t0, 32(a0)
569	sd	t1, 40(a0)
570	sd	t2, 48(a0)
571	jr	ra
572	 sd	t3, 56(a0)
573END(octcrypto_hash_get_ivw)
574
575/*
576 * void octcrypto_hash_clearn(void)
577 */
578LEAF(octcrypto_hash_clearn, 0)
579	dmtc2	zero, MT_HSH_IV
580	dmtc2	zero, MT_HSH_IV+1
581	dmtc2	zero, MT_HSH_IV+2
582	dmtc2	zero, MT_HSH_IV+3
583	dmtc2	zero, MT_HSH_DAT
584	dmtc2	zero, MT_HSH_DAT+1
585	dmtc2	zero, MT_HSH_DAT+2
586	dmtc2	zero, MT_HSH_DAT+3
587	dmtc2	zero, MT_HSH_DAT+4
588	dmtc2	zero, MT_HSH_DAT+5
589	jr	ra
590	 dmtc2	zero, MT_HSH_DAT+6
591END(octcrypto_hash_clearn)
592
593/*
594 * void octcrypto_hash_clearw(void)
595 */
596LEAF(octcrypto_hash_clearw, 0)
597	dmtc2	zero, MT_HSH_IVW
598	dmtc2	zero, MT_HSH_IVW+1
599	dmtc2	zero, MT_HSH_IVW+2
600	dmtc2	zero, MT_HSH_IVW+3
601	dmtc2	zero, MT_HSH_IVW+4
602	dmtc2	zero, MT_HSH_IVW+5
603	dmtc2	zero, MT_HSH_IVW+6
604	dmtc2	zero, MT_HSH_IVW+7
605	dmtc2	zero, MT_HSH_DATW
606	dmtc2	zero, MT_HSH_DATW+1
607	dmtc2	zero, MT_HSH_DATW+2
608	dmtc2	zero, MT_HSH_DATW+3
609	dmtc2	zero, MT_HSH_DATW+4
610	dmtc2	zero, MT_HSH_DATW+5
611	dmtc2	zero, MT_HSH_DATW+6
612	dmtc2	zero, MT_HSH_DATW+7
613	dmtc2	zero, MT_HSH_DATW+8
614	dmtc2	zero, MT_HSH_DATW+9
615	dmtc2	zero, MT_HSH_DATW+10
616	dmtc2	zero, MT_HSH_DATW+11
617	dmtc2	zero, MT_HSH_DATW+12
618	dmtc2	zero, MT_HSH_DATW+13
619	jr	ra
620	 dmtc2	zero, MT_HSH_DATW+14
621END(octcrypto_hash_clearw)
622
623/*
624 * void octcrypto_ghash_init(const uint64_t *key, const uint64_t *state)
625 *
626 * Initialize the Galois field multiplier unit with the GF(2^128) polynomial
627 * and given key.
628 * If state is given, load it to the unit; otherwise, use zero state.
629 */
630LEAF(octcrypto_ghash_init, 0)
631	/* Set the polynomial. */
632	li	t0, GF128_POLY
633	dmtc2	t0, MT_GFM_POLY
634	/* Set the hash key / multiplier. */
635	ld	t0, (a0)
636	ld	t1, 8(a0)
637	dmtc2	t0, MT_GFM_MUL
638	dmtc2	t1, MT_GFM_MUL+1
639	/* Initialize the state. */
640	bne	a1, zero, 1f
641	 move	t0, zero
642	b	2f
643	 move	t1, zero
6441:
645	ld	t0, (a1)
646	ld	t1, 8(a1)
6472:
648	dmtc2	t0, MT_GFM_RESINP
649	jr	ra
650	 dmtc2	t1, MT_GFM_RESINP+1
651END(octcrypto_ghash_init)
652
653/*
654 * void octcrypto_ghash_finish(uint64_t *x)
655 *
656 * Store the current GHASH state into buffer `x',
657 * and clear the GFM unit's state.
658 */
659LEAF(octcrypto_ghash_finish, 0)
660	dmfc2	t0, MF_GFM_RESINP
661	dmfc2	t1, MF_GFM_RESINP+1
662	sd	t0, (a0)
663	sd	t1, 8(a0)
664	dmtc2	zero, MT_GFM_RESINP
665	dmtc2	zero, MT_GFM_RESINP+1
666	dmtc2	zero, MT_GFM_MUL
667	jr	ra
668	 dmtc2	zero, MT_GFM_MUL+1
669END(octcrypto_ghash_finish)
670
671/*
672 * void octcrypto_ghash_update(const void *buf, size_t len)
673 *
674 * Update the GHASH state.
675 *
676 * For each X_i in X_0 || X_1 || ... || X_{n-1} = buf:
677 *     Y := (Y ^ X_i) * H
678 */
679LEAF(octcrypto_ghash_update, 0)
680	bltu	a1, 16, 3f
681	 and	t2, a1, 15
682	dsubu	a3, a1, t2
683	and	t0, a0, 7
684	bne	t0, zero, 2f
685	 daddu	a3, a3, a0
6861:
687	/* Aligned buffer */
688	ld	t0, (a0)
689	ld	t1, 8(a0)
690	daddu	a0, 16
691	dmtc2	t0, MT_GFM_XOR0
692	bltu	a0, a3, 1b
693	 dmtc2	t1, MT_GFM_XORMUL1
694	b	3f
695	 nop
6962:
697	/* Unaligned buffer */
698	LDHI	t0, (a0)
699	LDLO	t0, 7(a0)
700	LDHI	t1, 8(a0)
701	LDLO	t1, 15(a0)
702	daddu	a0, 16
703	dmtc2	t0, MT_GFM_XOR0
704	bltu	a0, a3, 2b
705	 dmtc2	t1, MT_GFM_XORMUL1
7063:
707	jr	ra
708	 nop
709END(octcrypto_ghash_update)
710