1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ppc64le,!gccgo,!appengine
6
7#include "textflag.h"
8
9// This was ported from the amd64 implementation.
10
11#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
12	MOVD (msg), t0;  \
13	MOVD 8(msg), t1; \
14	MOVD $1, t2;     \
15	ADDC t0, h0, h0; \
16	ADDE t1, h1, h1; \
17	ADDE t2, h2;     \
18	ADD  $16, msg
19
20#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
21	MULLD  r0, h0, t0;  \
22	MULLD  r0, h1, t4;  \
23	MULHDU r0, h0, t1;  \
24	MULHDU r0, h1, t5;  \
25	ADDC   t4, t1, t1;  \
26	MULLD  r0, h2, t2;  \
27	ADDZE  t5;          \
28	MULHDU r1, h0, t4;  \
29	MULLD  r1, h0, h0;  \
30	ADD    t5, t2, t2;  \
31	ADDC   h0, t1, t1;  \
32	MULLD  h2, r1, t3;  \
33	ADDZE  t4, h0;      \
34	MULHDU r1, h1, t5;  \
35	MULLD  r1, h1, t4;  \
36	ADDC   t4, t2, t2;  \
37	ADDE   t5, t3, t3;  \
38	ADDC   h0, t2, t2;  \
39	MOVD   $-4, t4;     \
40	MOVD   t0, h0;      \
41	MOVD   t1, h1;      \
42	ADDZE  t3;          \
43	ANDCC  $3, t2, h2;  \
44	AND    t2, t4, t0;  \
45	ADDC   t0, h0, h0;  \
46	ADDE   t3, h1, h1;  \
47	SLD    $62, t3, t4; \
48	SRD    $2, t2;      \
49	ADDZE  h2;          \
50	OR     t4, t2, t2;  \
51	SRD    $2, t3;      \
52	ADDC   t2, h0, h0;  \
53	ADDE   t3, h1, h1;  \
54	ADDZE  h2
55
56DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
57DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
58GLOBL ·poly1305Mask<>(SB), RODATA, $16
59
60// func update(state *[7]uint64, msg []byte)
61TEXT ·update(SB), $0-32
62	MOVD state+0(FP), R3
63	MOVD msg_base+8(FP), R4
64	MOVD msg_len+16(FP), R5
65
66	MOVD 0(R3), R8   // h0
67	MOVD 8(R3), R9   // h1
68	MOVD 16(R3), R10 // h2
69	MOVD 24(R3), R11 // r0
70	MOVD 32(R3), R12 // r1
71
72	CMP R5, $16
73	BLT bytes_between_0_and_15
74
75loop:
76	POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
77
78multiply:
79	POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
80	ADD $-16, R5
81	CMP R5, $16
82	BGE loop
83
84bytes_between_0_and_15:
85	CMP  $0, R5
86	BEQ  done
87	MOVD $0, R16 // h0
88	MOVD $0, R17 // h1
89
90flush_buffer:
91	CMP R5, $8
92	BLE just1
93
94	MOVD $8, R21
95	SUB  R21, R5, R21
96
97	// Greater than 8 -- load the rightmost remaining bytes in msg
98	// and put into R17 (h1)
99	MOVD (R4)(R21), R17
100	MOVD $16, R22
101
102	// Find the offset to those bytes
103	SUB R5, R22, R22
104	SLD $3, R22
105
106	// Shift to get only the bytes in msg
107	SRD R22, R17, R17
108
109	// Put 1 at high end
110	MOVD $1, R23
111	SLD  $3, R21
112	SLD  R21, R23, R23
113	OR   R23, R17, R17
114
115	// Remainder is 8
116	MOVD $8, R5
117
118just1:
119	CMP R5, $8
120	BLT less8
121
122	// Exactly 8
123	MOVD (R4), R16
124
125	CMP $0, R17
126
127	// Check if we've already set R17; if not
128	// set 1 to indicate end of msg.
129	BNE  carry
130	MOVD $1, R17
131	BR   carry
132
133less8:
134	MOVD  $0, R16   // h0
135	MOVD  $0, R22   // shift count
136	CMP   R5, $4
137	BLT   less4
138	MOVWZ (R4), R16
139	ADD   $4, R4
140	ADD   $-4, R5
141	MOVD  $32, R22
142
143less4:
144	CMP   R5, $2
145	BLT   less2
146	MOVHZ (R4), R21
147	SLD   R22, R21, R21
148	OR    R16, R21, R16
149	ADD   $16, R22
150	ADD   $-2, R5
151	ADD   $2, R4
152
153less2:
154	CMP   $0, R5
155	BEQ   insert1
156	MOVBZ (R4), R21
157	SLD   R22, R21, R21
158	OR    R16, R21, R16
159	ADD   $8, R22
160
161insert1:
162	// Insert 1 at end of msg
163	MOVD $1, R21
164	SLD  R22, R21, R21
165	OR   R16, R21, R16
166
167carry:
168	// Add new values to h0, h1, h2
169	ADDC R16, R8
170	ADDE R17, R9
171	ADDE $0, R10
172	MOVD $16, R5
173	ADD  R5, R4
174	BR   multiply
175
176done:
177	// Save h0, h1, h2 in state
178	MOVD R8, 0(R3)
179	MOVD R9, 8(R3)
180	MOVD R10, 16(R3)
181	RET
182