1 /*
2 poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication
3 and 64 bit addition
4 */
5
6 #if defined(_MSC_VER)
7 # define POLY1305_NOINLINE __declspec(noinline)
8 #elif defined(__clang__) || defined(__GNUC__)
9 # define POLY1305_NOINLINE __attribute__((noinline))
10 #else
11 # define POLY1305_NOINLINE
12 #endif
13
14 #include "private/common.h"
15
16 #define poly1305_block_size 16
17
18 /* 17 + sizeof(unsigned long long) + 14*sizeof(unsigned long) */
19 typedef struct poly1305_state_internal_t {
20 unsigned long r[5];
21 unsigned long h[5];
22 unsigned long pad[4];
23 unsigned long long leftover;
24 unsigned char buffer[poly1305_block_size];
25 unsigned char final;
26 } poly1305_state_internal_t;
27
28 static void
poly1305_init(poly1305_state_internal_t * st,const unsigned char key[32])29 poly1305_init(poly1305_state_internal_t *st, const unsigned char key[32])
30 {
31 /* r &= 0xffffffc0ffffffc0ffffffc0fffffff - wiped after finalization */
32 st->r[0] = (LOAD32_LE(&key[0])) & 0x3ffffff;
33 st->r[1] = (LOAD32_LE(&key[3]) >> 2) & 0x3ffff03;
34 st->r[2] = (LOAD32_LE(&key[6]) >> 4) & 0x3ffc0ff;
35 st->r[3] = (LOAD32_LE(&key[9]) >> 6) & 0x3f03fff;
36 st->r[4] = (LOAD32_LE(&key[12]) >> 8) & 0x00fffff;
37
38 /* h = 0 */
39 st->h[0] = 0;
40 st->h[1] = 0;
41 st->h[2] = 0;
42 st->h[3] = 0;
43 st->h[4] = 0;
44
45 /* save pad for later */
46 st->pad[0] = LOAD32_LE(&key[16]);
47 st->pad[1] = LOAD32_LE(&key[20]);
48 st->pad[2] = LOAD32_LE(&key[24]);
49 st->pad[3] = LOAD32_LE(&key[28]);
50
51 st->leftover = 0;
52 st->final = 0;
53 }
54
55 static void
poly1305_blocks(poly1305_state_internal_t * st,const unsigned char * m,unsigned long long bytes)56 poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m,
57 unsigned long long bytes)
58 {
59 const unsigned long hibit = (st->final) ? 0UL : (1UL << 24); /* 1 << 128 */
60 unsigned long r0, r1, r2, r3, r4;
61 unsigned long s1, s2, s3, s4;
62 unsigned long h0, h1, h2, h3, h4;
63 unsigned long long d0, d1, d2, d3, d4;
64 unsigned long c;
65
66 r0 = st->r[0];
67 r1 = st->r[1];
68 r2 = st->r[2];
69 r3 = st->r[3];
70 r4 = st->r[4];
71
72 s1 = r1 * 5;
73 s2 = r2 * 5;
74 s3 = r3 * 5;
75 s4 = r4 * 5;
76
77 h0 = st->h[0];
78 h1 = st->h[1];
79 h2 = st->h[2];
80 h3 = st->h[3];
81 h4 = st->h[4];
82
83 while (bytes >= poly1305_block_size) {
84 /* h += m[i] */
85 h0 += (LOAD32_LE(m + 0)) & 0x3ffffff;
86 h1 += (LOAD32_LE(m + 3) >> 2) & 0x3ffffff;
87 h2 += (LOAD32_LE(m + 6) >> 4) & 0x3ffffff;
88 h3 += (LOAD32_LE(m + 9) >> 6) & 0x3ffffff;
89 h4 += (LOAD32_LE(m + 12) >> 8) | hibit;
90
91 /* h *= r */
92 d0 = ((unsigned long long) h0 * r0) + ((unsigned long long) h1 * s4) +
93 ((unsigned long long) h2 * s3) + ((unsigned long long) h3 * s2) +
94 ((unsigned long long) h4 * s1);
95 d1 = ((unsigned long long) h0 * r1) + ((unsigned long long) h1 * r0) +
96 ((unsigned long long) h2 * s4) + ((unsigned long long) h3 * s3) +
97 ((unsigned long long) h4 * s2);
98 d2 = ((unsigned long long) h0 * r2) + ((unsigned long long) h1 * r1) +
99 ((unsigned long long) h2 * r0) + ((unsigned long long) h3 * s4) +
100 ((unsigned long long) h4 * s3);
101 d3 = ((unsigned long long) h0 * r3) + ((unsigned long long) h1 * r2) +
102 ((unsigned long long) h2 * r1) + ((unsigned long long) h3 * r0) +
103 ((unsigned long long) h4 * s4);
104 d4 = ((unsigned long long) h0 * r4) + ((unsigned long long) h1 * r3) +
105 ((unsigned long long) h2 * r2) + ((unsigned long long) h3 * r1) +
106 ((unsigned long long) h4 * r0);
107
108 /* (partial) h %= p */
109 c = (unsigned long) (d0 >> 26);
110 h0 = (unsigned long) d0 & 0x3ffffff;
111 d1 += c;
112 c = (unsigned long) (d1 >> 26);
113 h1 = (unsigned long) d1 & 0x3ffffff;
114 d2 += c;
115 c = (unsigned long) (d2 >> 26);
116 h2 = (unsigned long) d2 & 0x3ffffff;
117 d3 += c;
118 c = (unsigned long) (d3 >> 26);
119 h3 = (unsigned long) d3 & 0x3ffffff;
120 d4 += c;
121 c = (unsigned long) (d4 >> 26);
122 h4 = (unsigned long) d4 & 0x3ffffff;
123 h0 += c * 5;
124 c = (h0 >> 26);
125 h0 = h0 & 0x3ffffff;
126 h1 += c;
127
128 m += poly1305_block_size;
129 bytes -= poly1305_block_size;
130 }
131
132 st->h[0] = h0;
133 st->h[1] = h1;
134 st->h[2] = h2;
135 st->h[3] = h3;
136 st->h[4] = h4;
137 }
138
139 static POLY1305_NOINLINE void
poly1305_finish(poly1305_state_internal_t * st,unsigned char mac[16])140 poly1305_finish(poly1305_state_internal_t *st, unsigned char mac[16])
141 {
142 unsigned long h0, h1, h2, h3, h4, c;
143 unsigned long g0, g1, g2, g3, g4;
144 unsigned long long f;
145 unsigned long mask;
146
147 /* process the remaining block */
148 if (st->leftover) {
149 unsigned long long i = st->leftover;
150
151 st->buffer[i++] = 1;
152 for (; i < poly1305_block_size; i++) {
153 st->buffer[i] = 0;
154 }
155 st->final = 1;
156 poly1305_blocks(st, st->buffer, poly1305_block_size);
157 }
158
159 /* fully carry h */
160 h0 = st->h[0];
161 h1 = st->h[1];
162 h2 = st->h[2];
163 h3 = st->h[3];
164 h4 = st->h[4];
165
166 c = h1 >> 26;
167 h1 = h1 & 0x3ffffff;
168 h2 += c;
169 c = h2 >> 26;
170 h2 = h2 & 0x3ffffff;
171 h3 += c;
172 c = h3 >> 26;
173 h3 = h3 & 0x3ffffff;
174 h4 += c;
175 c = h4 >> 26;
176 h4 = h4 & 0x3ffffff;
177 h0 += c * 5;
178 c = h0 >> 26;
179 h0 = h0 & 0x3ffffff;
180 h1 += c;
181
182 /* compute h + -p */
183 g0 = h0 + 5;
184 c = g0 >> 26;
185 g0 &= 0x3ffffff;
186 g1 = h1 + c;
187 c = g1 >> 26;
188 g1 &= 0x3ffffff;
189 g2 = h2 + c;
190 c = g2 >> 26;
191 g2 &= 0x3ffffff;
192 g3 = h3 + c;
193 c = g3 >> 26;
194 g3 &= 0x3ffffff;
195 g4 = h4 + c - (1UL << 26);
196
197 /* select h if h < p, or h + -p if h >= p */
198 mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1;
199 g0 &= mask;
200 g1 &= mask;
201 g2 &= mask;
202 g3 &= mask;
203 g4 &= mask;
204 mask = ~mask;
205
206 h0 = (h0 & mask) | g0;
207 h1 = (h1 & mask) | g1;
208 h2 = (h2 & mask) | g2;
209 h3 = (h3 & mask) | g3;
210 h4 = (h4 & mask) | g4;
211
212 /* h = h % (2^128) */
213 h0 = ((h0) | (h1 << 26)) & 0xffffffff;
214 h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
215 h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
216 h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
217
218 /* mac = (h + pad) % (2^128) */
219 f = (unsigned long long) h0 + st->pad[0];
220 h0 = (unsigned long) f;
221 f = (unsigned long long) h1 + st->pad[1] + (f >> 32);
222 h1 = (unsigned long) f;
223 f = (unsigned long long) h2 + st->pad[2] + (f >> 32);
224 h2 = (unsigned long) f;
225 f = (unsigned long long) h3 + st->pad[3] + (f >> 32);
226 h3 = (unsigned long) f;
227
228 STORE32_LE(mac + 0, (uint32_t) h0);
229 STORE32_LE(mac + 4, (uint32_t) h1);
230 STORE32_LE(mac + 8, (uint32_t) h2);
231 STORE32_LE(mac + 12, (uint32_t) h3);
232
233 /* zero out the state */
234 sodium_memzero((void *) st, sizeof *st);
235 }
236