1
2 #include <sightglass.h>
3
4 #include <assert.h>
5 #include <stdint.h>
6 #include <stdlib.h>
7 #include <string.h>
8
9 #define ITERATIONS 1000
10 #define BUF_SIZE 1000
11
12 #define ROTL32(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b))))
13
14 #define LOAD32_LE(SRC) load32_le(SRC)
15 static inline uint32_t
load32_le(const uint8_t src[4])16 load32_le(const uint8_t src[4])
17 {
18 uint32_t w = (uint32_t) src[0];
19 w |= (uint32_t) src[1] << 8;
20 w |= (uint32_t) src[2] << 16;
21 w |= (uint32_t) src[3] << 24;
22 return w;
23 }
24
25 #define STORE32_LE(DST, W) store32_le((DST), (W))
26 static inline void
store32_le(uint8_t dst[4],uint32_t w)27 store32_le(uint8_t dst[4], uint32_t w)
28 {
29 dst[0] = (uint8_t) w;
30 w >>= 8;
31 dst[1] = (uint8_t) w;
32 w >>= 8;
33 dst[2] = (uint8_t) w;
34 w >>= 8;
35 dst[3] = (uint8_t) w;
36 }
37
38 #define CHACHA20_ROUNDS 12
39
40 #define chacha20_KEYBYTES 32
41 #define chacha20_NONCEBYTES 12
42
43 #define xchacha20_KEYBYTES 32
44 #define xchacha20_NONCEBYTES 24
45
46 #define chacha20_block_BYTES 64
47 #define chacha20_block_KEYBYTES 32
48 #define chacha20_block_NONCEBYTES 16
49
50 #define hchacha20_BYTES 32
51 #define hchacha20_KEYBYTES 32
52 #define hchacha20_NONCEBYTES 16
53
54 #define CHACHA20_QUARTERROUND(a, b, c, d) \
55 a += b; \
56 d = ROTL32(d ^ a, 16); \
57 c += d; \
58 b = ROTL32(b ^ c, 12); \
59 a += b; \
60 d = ROTL32(d ^ a, 8); \
61 c += d; \
62 b = ROTL32(b ^ c, 7)
63
64 static void
chacha20_rounds(uint32_t st[16])65 chacha20_rounds(uint32_t st[16])
66 {
67 int i;
68
69 for (i = 0; i < CHACHA20_ROUNDS; i += 2) {
70 CHACHA20_QUARTERROUND(st[0], st[4], st[8], st[12]);
71 CHACHA20_QUARTERROUND(st[1], st[5], st[9], st[13]);
72 CHACHA20_QUARTERROUND(st[2], st[6], st[10], st[14]);
73 CHACHA20_QUARTERROUND(st[3], st[7], st[11], st[15]);
74 CHACHA20_QUARTERROUND(st[0], st[5], st[10], st[15]);
75 CHACHA20_QUARTERROUND(st[1], st[6], st[11], st[12]);
76 CHACHA20_QUARTERROUND(st[2], st[7], st[8], st[13]);
77 CHACHA20_QUARTERROUND(st[3], st[4], st[9], st[14]);
78 }
79 }
80
81 static void
chacha20_update(uint32_t ks[16],uint32_t st[16])82 chacha20_update(uint32_t ks[16], uint32_t st[16])
83 {
84 int i;
85
86 memcpy(ks, st, 4 * 16);
87 chacha20_rounds(st);
88 for (i = 0; i < 16; i++) {
89 ks[i] += st[i];
90 }
91 if (++st[12] == 0) {
92 ++st[13];
93 }
94 }
95
96 static void
chacha20_init(uint32_t st[16],const uint8_t nonce[chacha20_NONCEBYTES],const uint8_t key[chacha20_KEYBYTES])97 chacha20_init(uint32_t st[16], const uint8_t nonce[chacha20_NONCEBYTES],
98 const uint8_t key[chacha20_KEYBYTES])
99 {
100 int i;
101
102 st[0] = 0x61707865UL;
103 st[1] = 0x3120646eUL;
104 st[2] = 0x79622d36UL;
105 st[3] = 0x6b206574UL;
106 for (i = 0; i < 8; i++) {
107 st[4 + i] = LOAD32_LE(&key[4 * i]);
108 }
109 st[12] = 0;
110 st[13] = LOAD32_LE(&nonce[4 * 0]);
111 st[14] = LOAD32_LE(&nonce[4 * 1]);
112 st[15] = LOAD32_LE(&nonce[4 * 2]);
113 }
114
115 static int
chacha20_xor(uint8_t * c,const uint8_t * m,size_t len,const uint8_t nonce[chacha20_NONCEBYTES],const uint8_t key[chacha20_KEYBYTES])116 chacha20_xor(uint8_t *c, const uint8_t *m, size_t len, const uint8_t nonce[chacha20_NONCEBYTES],
117 const uint8_t key[chacha20_KEYBYTES])
118 {
119 uint8_t tmp[64];
120 uint32_t ks[16];
121 uint32_t st[16];
122 uint32_t x;
123 int i;
124
125 chacha20_init(st, nonce, key);
126 while (len >= 64) {
127 chacha20_update(ks, st);
128 for (i = 0; i < 16; i++) {
129 x = ks[i] ^ LOAD32_LE(m + 4 * i);
130 STORE32_LE(c + 4 * i, x);
131 }
132 c += 64;
133 m += 64;
134 len -= 64;
135 }
136 if (len > 0) {
137 chacha20_update(ks, st);
138 memset(tmp, 0, 64);
139 for (i = 0; i < (int) len; i++) {
140 tmp[i] = m[i];
141 }
142 for (i = 0; i < 16; i++) {
143 x = ks[i] ^ LOAD32_LE(tmp + 4 * i);
144 STORE32_LE(tmp + 4 * i, x);
145 }
146 for (i = 0; i < (int) len; i++) {
147 c[i] = tmp[i];
148 }
149 }
150 return 0;
151 }
152
153 static void
hchacha20(uint8_t subkey[hchacha20_BYTES],const uint8_t nonce[hchacha20_NONCEBYTES],const uint8_t key[hchacha20_KEYBYTES])154 hchacha20(uint8_t subkey[hchacha20_BYTES], const uint8_t nonce[hchacha20_NONCEBYTES],
155 const uint8_t key[hchacha20_KEYBYTES])
156 {
157 uint32_t st[16];
158 int i;
159
160 chacha20_init(st, &nonce[4], key);
161 st[12] = LOAD32_LE(&nonce[0]);
162 chacha20_rounds(st);
163 for (i = 0; i < 4; i++) {
164 STORE32_LE(subkey + 4 * i, st[i]);
165 }
166 for (; i < 8; i++) {
167 STORE32_LE(subkey + 4 * i, st[i + 12 - 4]);
168 }
169 }
170
171 static int
xchacha20_xor(uint8_t * c,const uint8_t * m,size_t len,const uint8_t nonce[xchacha20_NONCEBYTES],const uint8_t key[xchacha20_KEYBYTES])172 xchacha20_xor(uint8_t *c, const uint8_t *m, size_t len, const uint8_t nonce[xchacha20_NONCEBYTES],
173 const uint8_t key[xchacha20_KEYBYTES])
174 {
175 uint8_t subkey[chacha20_KEYBYTES];
176 uint8_t subnonce[chacha20_NONCEBYTES];
177
178 hchacha20(subkey, nonce, key);
179 memset(subnonce, 0, 4);
180 memcpy(subnonce + 4, nonce + hchacha20_NONCEBYTES, 8);
181
182 return chacha20_xor(c, m, len, subnonce, subkey);
183 }
184
185 static void
xchacha20_setup(void * global_ctx,void ** ctx_p)186 xchacha20_setup(void *global_ctx, void **ctx_p)
187 {
188 (void) global_ctx;
189
190 uint8_t *buf = calloc(BUF_SIZE, (size_t) 1U);
191 assert(buf != NULL);
192
193 *ctx_p = (void *) buf;
194 }
195
196 static void
xchacha20_teardown(void * ctx)197 xchacha20_teardown(void *ctx)
198 {
199 free(ctx);
200 }
201
202 void
xchacha20_body(void * ctx_)203 xchacha20_body(void *ctx_)
204 {
205 (void) ctx_;
206
207 uint8_t *buf;
208 xchacha20_setup(NULL, (void **) &buf);
209
210 BLACK_BOX(buf);
211 assert(BUF_SIZE >= xchacha20_KEYBYTES && BUF_SIZE >= xchacha20_NONCEBYTES);
212 int i;
213 for (i = 0; i < ITERATIONS; i++) {
214 xchacha20_xor(buf, buf, BUF_SIZE, buf, buf);
215 }
216 BLACK_BOX(buf);
217 xchacha20_teardown(buf);
218 }
219