1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build gc && !purego
6// +build gc,!purego
7
8#include "textflag.h"
9
10// This was ported from the amd64 implementation.
11
12#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
13	MOVD (msg), t0;  \
14	MOVD 8(msg), t1; \
15	MOVD $1, t2;     \
16	ADDC t0, h0, h0; \
17	ADDE t1, h1, h1; \
18	ADDE t2, h2;     \
19	ADD  $16, msg
20
21#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
22	MULLD  r0, h0, t0;  \
23	MULLD  r0, h1, t4;  \
24	MULHDU r0, h0, t1;  \
25	MULHDU r0, h1, t5;  \
26	ADDC   t4, t1, t1;  \
27	MULLD  r0, h2, t2;  \
28	ADDZE  t5;          \
29	MULHDU r1, h0, t4;  \
30	MULLD  r1, h0, h0;  \
31	ADD    t5, t2, t2;  \
32	ADDC   h0, t1, t1;  \
33	MULLD  h2, r1, t3;  \
34	ADDZE  t4, h0;      \
35	MULHDU r1, h1, t5;  \
36	MULLD  r1, h1, t4;  \
37	ADDC   t4, t2, t2;  \
38	ADDE   t5, t3, t3;  \
39	ADDC   h0, t2, t2;  \
40	MOVD   $-4, t4;     \
41	MOVD   t0, h0;      \
42	MOVD   t1, h1;      \
43	ADDZE  t3;          \
44	ANDCC  $3, t2, h2;  \
45	AND    t2, t4, t0;  \
46	ADDC   t0, h0, h0;  \
47	ADDE   t3, h1, h1;  \
48	SLD    $62, t3, t4; \
49	SRD    $2, t2;      \
50	ADDZE  h2;          \
51	OR     t4, t2, t2;  \
52	SRD    $2, t3;      \
53	ADDC   t2, h0, h0;  \
54	ADDE   t3, h1, h1;  \
55	ADDZE  h2
56
57DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
58DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
59GLOBL ·poly1305Mask<>(SB), RODATA, $16
60
61// func update(state *[7]uint64, msg []byte)
62TEXT ·update(SB), $0-32
63	MOVD state+0(FP), R3
64	MOVD msg_base+8(FP), R4
65	MOVD msg_len+16(FP), R5
66
67	MOVD 0(R3), R8   // h0
68	MOVD 8(R3), R9   // h1
69	MOVD 16(R3), R10 // h2
70	MOVD 24(R3), R11 // r0
71	MOVD 32(R3), R12 // r1
72
73	CMP R5, $16
74	BLT bytes_between_0_and_15
75
76loop:
77	POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
78
79multiply:
80	POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
81	ADD $-16, R5
82	CMP R5, $16
83	BGE loop
84
85bytes_between_0_and_15:
86	CMP  R5, $0
87	BEQ  done
88	MOVD $0, R16 // h0
89	MOVD $0, R17 // h1
90
91flush_buffer:
92	CMP R5, $8
93	BLE just1
94
95	MOVD $8, R21
96	SUB  R21, R5, R21
97
98	// Greater than 8 -- load the rightmost remaining bytes in msg
99	// and put into R17 (h1)
100	MOVD (R4)(R21), R17
101	MOVD $16, R22
102
103	// Find the offset to those bytes
104	SUB R5, R22, R22
105	SLD $3, R22
106
107	// Shift to get only the bytes in msg
108	SRD R22, R17, R17
109
110	// Put 1 at high end
111	MOVD $1, R23
112	SLD  $3, R21
113	SLD  R21, R23, R23
114	OR   R23, R17, R17
115
116	// Remainder is 8
117	MOVD $8, R5
118
119just1:
120	CMP R5, $8
121	BLT less8
122
123	// Exactly 8
124	MOVD (R4), R16
125
126	CMP R17, $0
127
128	// Check if we've already set R17; if not
129	// set 1 to indicate end of msg.
130	BNE  carry
131	MOVD $1, R17
132	BR   carry
133
134less8:
135	MOVD  $0, R16   // h0
136	MOVD  $0, R22   // shift count
137	CMP   R5, $4
138	BLT   less4
139	MOVWZ (R4), R16
140	ADD   $4, R4
141	ADD   $-4, R5
142	MOVD  $32, R22
143
144less4:
145	CMP   R5, $2
146	BLT   less2
147	MOVHZ (R4), R21
148	SLD   R22, R21, R21
149	OR    R16, R21, R16
150	ADD   $16, R22
151	ADD   $-2, R5
152	ADD   $2, R4
153
154less2:
155	CMP   R5, $0
156	BEQ   insert1
157	MOVBZ (R4), R21
158	SLD   R22, R21, R21
159	OR    R16, R21, R16
160	ADD   $8, R22
161
162insert1:
163	// Insert 1 at end of msg
164	MOVD $1, R21
165	SLD  R22, R21, R21
166	OR   R16, R21, R16
167
168carry:
169	// Add new values to h0, h1, h2
170	ADDC  R16, R8
171	ADDE  R17, R9
172	ADDZE R10, R10
173	MOVD  $16, R5
174	ADD   R5, R4
175	BR    multiply
176
177done:
178	// Save h0, h1, h2 in state
179	MOVD R8, 0(R3)
180	MOVD R9, 8(R3)
181	MOVD R10, 16(R3)
182	RET
183