1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build !gccgo,!purego
6
7#include "textflag.h"
8
9#define POLY1305_ADD(msg, h0, h1, h2) \
10	ADDQ 0(msg), h0;  \
11	ADCQ 8(msg), h1;  \
12	ADCQ $1, h2;      \
13	LEAQ 16(msg), msg
14
15#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
16	MOVQ  r0, AX;                  \
17	MULQ  h0;                      \
18	MOVQ  AX, t0;                  \
19	MOVQ  DX, t1;                  \
20	MOVQ  r0, AX;                  \
21	MULQ  h1;                      \
22	ADDQ  AX, t1;                  \
23	ADCQ  $0, DX;                  \
24	MOVQ  r0, t2;                  \
25	IMULQ h2, t2;                  \
26	ADDQ  DX, t2;                  \
27	                               \
28	MOVQ  r1, AX;                  \
29	MULQ  h0;                      \
30	ADDQ  AX, t1;                  \
31	ADCQ  $0, DX;                  \
32	MOVQ  DX, h0;                  \
33	MOVQ  r1, t3;                  \
34	IMULQ h2, t3;                  \
35	MOVQ  r1, AX;                  \
36	MULQ  h1;                      \
37	ADDQ  AX, t2;                  \
38	ADCQ  DX, t3;                  \
39	ADDQ  h0, t2;                  \
40	ADCQ  $0, t3;                  \
41	                               \
42	MOVQ  t0, h0;                  \
43	MOVQ  t1, h1;                  \
44	MOVQ  t2, h2;                  \
45	ANDQ  $3, h2;                  \
46	MOVQ  t2, t0;                  \
47	ANDQ  $0xFFFFFFFFFFFFFFFC, t0; \
48	ADDQ  t0, h0;                  \
49	ADCQ  t3, h1;                  \
50	ADCQ  $0, h2;                  \
51	SHRQ  $2, t3, t2;              \
52	SHRQ  $2, t3;                  \
53	ADDQ  t2, h0;                  \
54	ADCQ  t3, h1;                  \
55	ADCQ  $0, h2
56
57// func update(state *[7]uint64, msg []byte)
58TEXT ·update(SB), $0-32
59	MOVQ state+0(FP), DI
60	MOVQ msg_base+8(FP), SI
61	MOVQ msg_len+16(FP), R15
62
63	MOVQ 0(DI), R8   // h0
64	MOVQ 8(DI), R9   // h1
65	MOVQ 16(DI), R10 // h2
66	MOVQ 24(DI), R11 // r0
67	MOVQ 32(DI), R12 // r1
68
69	CMPQ R15, $16
70	JB   bytes_between_0_and_15
71
72loop:
73	POLY1305_ADD(SI, R8, R9, R10)
74
75multiply:
76	POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
77	SUBQ $16, R15
78	CMPQ R15, $16
79	JAE  loop
80
81bytes_between_0_and_15:
82	TESTQ R15, R15
83	JZ    done
84	MOVQ  $1, BX
85	XORQ  CX, CX
86	XORQ  R13, R13
87	ADDQ  R15, SI
88
89flush_buffer:
90	SHLQ $8, BX, CX
91	SHLQ $8, BX
92	MOVB -1(SI), R13
93	XORQ R13, BX
94	DECQ SI
95	DECQ R15
96	JNZ  flush_buffer
97
98	ADDQ BX, R8
99	ADCQ CX, R9
100	ADCQ $0, R10
101	MOVQ $16, R15
102	JMP  multiply
103
104done:
105	MOVQ R8, 0(DI)
106	MOVQ R9, 8(DI)
107	MOVQ R10, 16(DI)
108	RET
109