1 /*
2 * Taken from OpenBSD CVS src/lib/libc/crypt/chacha_private.h on
3 * May 12, 2014.
4 */
5
6 /*
7 chacha-merged.c version 20080118
8 D. J. Bernstein
9 Public domain.
10 */
11
12 typedef unsigned char u8;
13 typedef unsigned int u32;
14
15 typedef struct
16 {
17 u32 input[16]; /* could be compressed */
18 } chacha_ctx;
19
20 #define U8C(v) (v##U)
21 #define U32C(v) (v##U)
22
23 #define U8V(v) ((u8)(v) & U8C(0xFF))
24 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
25
26 #define ROTL32(v, n) \
27 (U32V((v) << (n)) | ((v) >> (32 - (n))))
28
29 #define U8TO32_LITTLE(p) \
30 (((u32)((p)[0]) ) | \
31 ((u32)((p)[1]) << 8) | \
32 ((u32)((p)[2]) << 16) | \
33 ((u32)((p)[3]) << 24))
34
35 #define U32TO8_LITTLE(p, v) \
36 do { \
37 (p)[0] = U8V((v) ); \
38 (p)[1] = U8V((v) >> 8); \
39 (p)[2] = U8V((v) >> 16); \
40 (p)[3] = U8V((v) >> 24); \
41 } while (0)
42
43 #define ROTATE(v,c) (ROTL32(v,c))
44 #define XOR(v,w) ((v) ^ (w))
45 #define PLUS(v,w) (U32V((v) + (w)))
46 #define PLUSONE(v) (PLUS((v),1))
47
48 #define QUARTERROUND(a,b,c,d) \
49 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
50 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
51 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
52 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
53
54 static const char sigma[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '3',
55 '2', '-', 'b', 'y', 't', 'e', ' ', 'k' };
56 static const char tau[16] = { 'e', 'x', 'p', 'a', 'n', 'd', ' ', '1',
57 '6', '-', 'b', 'y', 't', 'e', ' ', 'k' };
58
59 static void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits,u32 ivbits)60 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits,u32 ivbits)
61 {
62 const char *constants;
63
64 UNUSED(ivbits);
65
66 x->input[4] = U8TO32_LITTLE(k + 0);
67 x->input[5] = U8TO32_LITTLE(k + 4);
68 x->input[6] = U8TO32_LITTLE(k + 8);
69 x->input[7] = U8TO32_LITTLE(k + 12);
70 if (kbits == 256) { /* recommended */
71 k += 16;
72 constants = sigma;
73 } else { /* kbits == 128 */
74 constants = tau;
75 }
76 x->input[8] = U8TO32_LITTLE(k + 0);
77 x->input[9] = U8TO32_LITTLE(k + 4);
78 x->input[10] = U8TO32_LITTLE(k + 8);
79 x->input[11] = U8TO32_LITTLE(k + 12);
80 x->input[0] = U8TO32_LITTLE(constants + 0);
81 x->input[1] = U8TO32_LITTLE(constants + 4);
82 x->input[2] = U8TO32_LITTLE(constants + 8);
83 x->input[3] = U8TO32_LITTLE(constants + 12);
84 }
85
86 static void
chacha_ivsetup(chacha_ctx * x,const u8 * iv)87 chacha_ivsetup(chacha_ctx *x,const u8 *iv)
88 {
89 x->input[12] = 0;
90 x->input[13] = 0;
91 x->input[14] = U8TO32_LITTLE(iv + 0);
92 x->input[15] = U8TO32_LITTLE(iv + 4);
93 }
94
95 static void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)96 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
97 {
98 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
99 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
100 u8 *ctarget = NULL;
101 u8 tmp[64];
102 u_int i;
103
104 if (!bytes) return;
105
106 j0 = x->input[0];
107 j1 = x->input[1];
108 j2 = x->input[2];
109 j3 = x->input[3];
110 j4 = x->input[4];
111 j5 = x->input[5];
112 j6 = x->input[6];
113 j7 = x->input[7];
114 j8 = x->input[8];
115 j9 = x->input[9];
116 j10 = x->input[10];
117 j11 = x->input[11];
118 j12 = x->input[12];
119 j13 = x->input[13];
120 j14 = x->input[14];
121 j15 = x->input[15];
122
123 for (;;) {
124 if (bytes < 64) {
125 for (i = 0;i < bytes;++i) tmp[i] = m[i];
126 m = tmp;
127 ctarget = c;
128 c = tmp;
129 }
130 x0 = j0;
131 x1 = j1;
132 x2 = j2;
133 x3 = j3;
134 x4 = j4;
135 x5 = j5;
136 x6 = j6;
137 x7 = j7;
138 x8 = j8;
139 x9 = j9;
140 x10 = j10;
141 x11 = j11;
142 x12 = j12;
143 x13 = j13;
144 x14 = j14;
145 x15 = j15;
146 for (i = 20;i > 0;i -= 2) {
147 QUARTERROUND( x0, x4, x8,x12)
148 QUARTERROUND( x1, x5, x9,x13)
149 QUARTERROUND( x2, x6,x10,x14)
150 QUARTERROUND( x3, x7,x11,x15)
151 QUARTERROUND( x0, x5,x10,x15)
152 QUARTERROUND( x1, x6,x11,x12)
153 QUARTERROUND( x2, x7, x8,x13)
154 QUARTERROUND( x3, x4, x9,x14)
155 }
156 x0 = PLUS(x0,j0);
157 x1 = PLUS(x1,j1);
158 x2 = PLUS(x2,j2);
159 x3 = PLUS(x3,j3);
160 x4 = PLUS(x4,j4);
161 x5 = PLUS(x5,j5);
162 x6 = PLUS(x6,j6);
163 x7 = PLUS(x7,j7);
164 x8 = PLUS(x8,j8);
165 x9 = PLUS(x9,j9);
166 x10 = PLUS(x10,j10);
167 x11 = PLUS(x11,j11);
168 x12 = PLUS(x12,j12);
169 x13 = PLUS(x13,j13);
170 x14 = PLUS(x14,j14);
171 x15 = PLUS(x15,j15);
172
173 #ifndef KEYSTREAM_ONLY
174 x0 = XOR(x0,U8TO32_LITTLE(m + 0));
175 x1 = XOR(x1,U8TO32_LITTLE(m + 4));
176 x2 = XOR(x2,U8TO32_LITTLE(m + 8));
177 x3 = XOR(x3,U8TO32_LITTLE(m + 12));
178 x4 = XOR(x4,U8TO32_LITTLE(m + 16));
179 x5 = XOR(x5,U8TO32_LITTLE(m + 20));
180 x6 = XOR(x6,U8TO32_LITTLE(m + 24));
181 x7 = XOR(x7,U8TO32_LITTLE(m + 28));
182 x8 = XOR(x8,U8TO32_LITTLE(m + 32));
183 x9 = XOR(x9,U8TO32_LITTLE(m + 36));
184 x10 = XOR(x10,U8TO32_LITTLE(m + 40));
185 x11 = XOR(x11,U8TO32_LITTLE(m + 44));
186 x12 = XOR(x12,U8TO32_LITTLE(m + 48));
187 x13 = XOR(x13,U8TO32_LITTLE(m + 52));
188 x14 = XOR(x14,U8TO32_LITTLE(m + 56));
189 x15 = XOR(x15,U8TO32_LITTLE(m + 60));
190 #endif
191
192 j12 = PLUSONE(j12);
193 if (!j12) {
194 j13 = PLUSONE(j13);
195 /* stopping at 2^70 bytes per nonce is user's responsibility */
196 }
197
198 U32TO8_LITTLE(c + 0,x0);
199 U32TO8_LITTLE(c + 4,x1);
200 U32TO8_LITTLE(c + 8,x2);
201 U32TO8_LITTLE(c + 12,x3);
202 U32TO8_LITTLE(c + 16,x4);
203 U32TO8_LITTLE(c + 20,x5);
204 U32TO8_LITTLE(c + 24,x6);
205 U32TO8_LITTLE(c + 28,x7);
206 U32TO8_LITTLE(c + 32,x8);
207 U32TO8_LITTLE(c + 36,x9);
208 U32TO8_LITTLE(c + 40,x10);
209 U32TO8_LITTLE(c + 44,x11);
210 U32TO8_LITTLE(c + 48,x12);
211 U32TO8_LITTLE(c + 52,x13);
212 U32TO8_LITTLE(c + 56,x14);
213 U32TO8_LITTLE(c + 60,x15);
214
215 if (bytes <= 64) {
216 if (bytes < 64) {
217 for (i = 0;i < bytes;++i) ctarget[i] = c[i];
218 }
219 x->input[12] = j12;
220 x->input[13] = j13;
221 return;
222 }
223 bytes -= 64;
224 c += 64;
225 #ifndef KEYSTREAM_ONLY
226 m += 64;
227 #endif
228 }
229 }
230