1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build amd64,!gccgo,!appengine
6
7#include "textflag.h"
8
9#define POLY1305_ADD(msg, h0, h1, h2) \
10	ADDQ 0(msg), h0;  \
11	ADCQ 8(msg), h1;  \
12	ADCQ $1, h2;      \
13	LEAQ 16(msg), msg
14
15#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
16	MOVQ  r0, AX;                  \
17	MULQ  h0;                      \
18	MOVQ  AX, t0;                  \
19	MOVQ  DX, t1;                  \
20	MOVQ  r0, AX;                  \
21	MULQ  h1;                      \
22	ADDQ  AX, t1;                  \
23	ADCQ  $0, DX;                  \
24	MOVQ  r0, t2;                  \
25	IMULQ h2, t2;                  \
26	ADDQ  DX, t2;                  \
27	                               \
28	MOVQ  r1, AX;                  \
29	MULQ  h0;                      \
30	ADDQ  AX, t1;                  \
31	ADCQ  $0, DX;                  \
32	MOVQ  DX, h0;                  \
33	MOVQ  r1, t3;                  \
34	IMULQ h2, t3;                  \
35	MOVQ  r1, AX;                  \
36	MULQ  h1;                      \
37	ADDQ  AX, t2;                  \
38	ADCQ  DX, t3;                  \
39	ADDQ  h0, t2;                  \
40	ADCQ  $0, t3;                  \
41	                               \
42	MOVQ  t0, h0;                  \
43	MOVQ  t1, h1;                  \
44	MOVQ  t2, h2;                  \
45	ANDQ  $3, h2;                  \
46	MOVQ  t2, t0;                  \
47	ANDQ  $0xFFFFFFFFFFFFFFFC, t0; \
48	ADDQ  t0, h0;                  \
49	ADCQ  t3, h1;                  \
50	ADCQ  $0, h2;                  \
51	SHRQ  $2, t3, t2;              \
52	SHRQ  $2, t3;                  \
53	ADDQ  t2, h0;                  \
54	ADCQ  t3, h1;                  \
55	ADCQ  $0, h2
56
57DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
58DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
59GLOBL ·poly1305Mask<>(SB), RODATA, $16
60
61// func update(state *[7]uint64, msg []byte)
62TEXT ·update(SB), $0-32
63	MOVQ state+0(FP), DI
64	MOVQ msg_base+8(FP), SI
65	MOVQ msg_len+16(FP), R15
66
67	MOVQ 0(DI), R8   // h0
68	MOVQ 8(DI), R9   // h1
69	MOVQ 16(DI), R10 // h2
70	MOVQ 24(DI), R11 // r0
71	MOVQ 32(DI), R12 // r1
72
73	CMPQ R15, $16
74	JB   bytes_between_0_and_15
75
76loop:
77	POLY1305_ADD(SI, R8, R9, R10)
78
79multiply:
80	POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
81	SUBQ $16, R15
82	CMPQ R15, $16
83	JAE  loop
84
85bytes_between_0_and_15:
86	TESTQ R15, R15
87	JZ    done
88	MOVQ  $1, BX
89	XORQ  CX, CX
90	XORQ  R13, R13
91	ADDQ  R15, SI
92
93flush_buffer:
94	SHLQ $8, BX, CX
95	SHLQ $8, BX
96	MOVB -1(SI), R13
97	XORQ R13, BX
98	DECQ SI
99	DECQ R15
100	JNZ  flush_buffer
101
102	ADDQ BX, R8
103	ADCQ CX, R9
104	ADCQ $0, R10
105	MOVQ $16, R15
106	JMP  multiply
107
108done:
109	MOVQ R8, 0(DI)
110	MOVQ R9, 8(DI)
111	MOVQ R10, 16(DI)
112	RET
113
114// func initialize(state *[7]uint64, key *[32]byte)
115TEXT ·initialize(SB), $0-16
116	MOVQ state+0(FP), DI
117	MOVQ key+8(FP), SI
118
119	// state[0...7] is initialized with zero
120	MOVOU 0(SI), X0
121	MOVOU 16(SI), X1
122	MOVOU ·poly1305Mask<>(SB), X2
123	PAND  X2, X0
124	MOVOU X0, 24(DI)
125	MOVOU X1, 40(DI)
126	RET
127
128// func finalize(tag *[TagSize]byte, state *[7]uint64)
129TEXT ·finalize(SB), $0-16
130	MOVQ tag+0(FP), DI
131	MOVQ state+8(FP), SI
132
133	MOVQ    0(SI), AX
134	MOVQ    8(SI), BX
135	MOVQ    16(SI), CX
136	MOVQ    AX, R8
137	MOVQ    BX, R9
138	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
139	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
140	SBBQ    $3, CX
141	CMOVQCS R8, AX
142	CMOVQCS R9, BX
143	ADDQ    40(SI), AX
144	ADCQ    48(SI), BX
145
146	MOVQ AX, 0(DI)
147	MOVQ BX, 8(DI)
148	RET
149