1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build gc && !purego
6// +build gc,!purego
7
8#include "textflag.h"
9
10#define POLY1305_ADD(msg, h0, h1, h2) \
11	ADDQ 0(msg), h0;  \
12	ADCQ 8(msg), h1;  \
13	ADCQ $1, h2;      \
14	LEAQ 16(msg), msg
15
16#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
17	MOVQ  r0, AX;                  \
18	MULQ  h0;                      \
19	MOVQ  AX, t0;                  \
20	MOVQ  DX, t1;                  \
21	MOVQ  r0, AX;                  \
22	MULQ  h1;                      \
23	ADDQ  AX, t1;                  \
24	ADCQ  $0, DX;                  \
25	MOVQ  r0, t2;                  \
26	IMULQ h2, t2;                  \
27	ADDQ  DX, t2;                  \
28	                               \
29	MOVQ  r1, AX;                  \
30	MULQ  h0;                      \
31	ADDQ  AX, t1;                  \
32	ADCQ  $0, DX;                  \
33	MOVQ  DX, h0;                  \
34	MOVQ  r1, t3;                  \
35	IMULQ h2, t3;                  \
36	MOVQ  r1, AX;                  \
37	MULQ  h1;                      \
38	ADDQ  AX, t2;                  \
39	ADCQ  DX, t3;                  \
40	ADDQ  h0, t2;                  \
41	ADCQ  $0, t3;                  \
42	                               \
43	MOVQ  t0, h0;                  \
44	MOVQ  t1, h1;                  \
45	MOVQ  t2, h2;                  \
46	ANDQ  $3, h2;                  \
47	MOVQ  t2, t0;                  \
48	ANDQ  $0xFFFFFFFFFFFFFFFC, t0; \
49	ADDQ  t0, h0;                  \
50	ADCQ  t3, h1;                  \
51	ADCQ  $0, h2;                  \
52	SHRQ  $2, t3, t2;              \
53	SHRQ  $2, t3;                  \
54	ADDQ  t2, h0;                  \
55	ADCQ  t3, h1;                  \
56	ADCQ  $0, h2
57
58// func update(state *[7]uint64, msg []byte)
59TEXT ·update(SB), $0-32
60	MOVQ state+0(FP), DI
61	MOVQ msg_base+8(FP), SI
62	MOVQ msg_len+16(FP), R15
63
64	MOVQ 0(DI), R8   // h0
65	MOVQ 8(DI), R9   // h1
66	MOVQ 16(DI), R10 // h2
67	MOVQ 24(DI), R11 // r0
68	MOVQ 32(DI), R12 // r1
69
70	CMPQ R15, $16
71	JB   bytes_between_0_and_15
72
73loop:
74	POLY1305_ADD(SI, R8, R9, R10)
75
76multiply:
77	POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
78	SUBQ $16, R15
79	CMPQ R15, $16
80	JAE  loop
81
82bytes_between_0_and_15:
83	TESTQ R15, R15
84	JZ    done
85	MOVQ  $1, BX
86	XORQ  CX, CX
87	XORQ  R13, R13
88	ADDQ  R15, SI
89
90flush_buffer:
91	SHLQ $8, BX, CX
92	SHLQ $8, BX
93	MOVB -1(SI), R13
94	XORQ R13, BX
95	DECQ SI
96	DECQ R15
97	JNZ  flush_buffer
98
99	ADDQ BX, R8
100	ADCQ CX, R9
101	ADCQ $0, R10
102	MOVQ $16, R15
103	JMP  multiply
104
105done:
106	MOVQ R8, 0(DI)
107	MOVQ R9, 8(DI)
108	MOVQ R10, 16(DI)
109	RET
110