1 /* $OpenBSD: chacha_private.h,v 1.4 2020/07/22 13:54:30 tobhe Exp $ */ 2 /* 3 chacha-merged.c version 20080118 4 D. J. Bernstein 5 Public domain. 6 */ 7 8 #include <sys/systm.h> 9 10 typedef unsigned char u8; 11 typedef unsigned int u32; 12 13 typedef struct 14 { 15 u32 input[16]; /* could be compressed */ 16 } chacha_ctx; 17 18 #define U8C(v) (v##U) 19 #define U32C(v) (v##U) 20 21 #define U8V(v) ((u8)(v) & U8C(0xFF)) 22 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) 23 24 #define ROTL32(v, n) \ 25 (U32V((v) << (n)) | ((v) >> (32 - (n)))) 26 27 #define U8TO32_LITTLE(p) \ 28 (((u32)((p)[0]) ) | \ 29 ((u32)((p)[1]) << 8) | \ 30 ((u32)((p)[2]) << 16) | \ 31 ((u32)((p)[3]) << 24)) 32 33 #define U32TO8_LITTLE(p, v) \ 34 do { \ 35 (p)[0] = U8V((v) ); \ 36 (p)[1] = U8V((v) >> 8); \ 37 (p)[2] = U8V((v) >> 16); \ 38 (p)[3] = U8V((v) >> 24); \ 39 } while (0) 40 41 #define ROTATE(v,c) (ROTL32(v,c)) 42 #define XOR(v,w) ((v) ^ (w)) 43 #define PLUS(v,w) (U32V((v) + (w))) 44 #define PLUSONE(v) (PLUS((v),1)) 45 46 #define QUARTERROUND(a,b,c,d) \ 47 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 48 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 49 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 50 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 51 52 static const char sigma[16] = "expand 32-byte k"; 53 static const char tau[16] = "expand 16-byte k"; 54 55 static inline void 56 hchacha20(u32 derived_key[8], const u8 nonce[16], const u8 key[32]) 57 { 58 int i; 59 uint32_t x[] = { 60 U8TO32_LITTLE(sigma + 0), 61 U8TO32_LITTLE(sigma + 4), 62 U8TO32_LITTLE(sigma + 8), 63 U8TO32_LITTLE(sigma + 12), 64 U8TO32_LITTLE(key + 0), 65 U8TO32_LITTLE(key + 4), 66 U8TO32_LITTLE(key + 8), 67 U8TO32_LITTLE(key + 12), 68 U8TO32_LITTLE(key + 16), 69 U8TO32_LITTLE(key + 20), 70 U8TO32_LITTLE(key + 24), 71 U8TO32_LITTLE(key + 28), 72 U8TO32_LITTLE(nonce + 0), 73 U8TO32_LITTLE(nonce + 4), 74 U8TO32_LITTLE(nonce + 8), 75 U8TO32_LITTLE(nonce + 12) 76 }; 77 78 for (i = 20;i > 0;i -= 2) { 79 QUARTERROUND( x[0], x[4], x[8],x[12]) 80 QUARTERROUND( x[1], x[5], x[9],x[13]) 81 QUARTERROUND( x[2], x[6],x[10],x[14]) 82 QUARTERROUND( x[3], x[7],x[11],x[15]) 83 QUARTERROUND( x[0], x[5],x[10],x[15]) 84 QUARTERROUND( x[1], x[6],x[11],x[12]) 85 QUARTERROUND( x[2], x[7], x[8],x[13]) 86 QUARTERROUND( x[3], x[4], x[9],x[14]) 87 } 88 89 memcpy(derived_key + 0, x + 0, sizeof(u32) * 4); 90 memcpy(derived_key + 4, x + 12, sizeof(u32) * 4); 91 } 92 93 static void 94 chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits) 95 { 96 const char *constants; 97 98 x->input[4] = U8TO32_LITTLE(k + 0); 99 x->input[5] = U8TO32_LITTLE(k + 4); 100 x->input[6] = U8TO32_LITTLE(k + 8); 101 x->input[7] = U8TO32_LITTLE(k + 12); 102 if (kbits == 256) { /* recommended */ 103 k += 16; 104 constants = sigma; 105 } else { /* kbits == 128 */ 106 constants = tau; 107 } 108 x->input[8] = U8TO32_LITTLE(k + 0); 109 x->input[9] = U8TO32_LITTLE(k + 4); 110 x->input[10] = U8TO32_LITTLE(k + 8); 111 x->input[11] = U8TO32_LITTLE(k + 12); 112 x->input[0] = U8TO32_LITTLE(constants + 0); 113 x->input[1] = U8TO32_LITTLE(constants + 4); 114 x->input[2] = U8TO32_LITTLE(constants + 8); 115 x->input[3] = U8TO32_LITTLE(constants + 12); 116 } 117 118 static void 119 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter) 120 { 121 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); 122 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); 123 x->input[14] = U8TO32_LITTLE(iv + 0); 124 x->input[15] = U8TO32_LITTLE(iv + 4); 125 } 126 127 static void 128 chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) 129 { 130 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 131 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 132 u8 *ctarget = NULL; 133 u8 tmp[64]; 134 u_int i; 135 136 if (!bytes) return; 137 138 j0 = x->input[0]; 139 j1 = x->input[1]; 140 j2 = x->input[2]; 141 j3 = x->input[3]; 142 j4 = x->input[4]; 143 j5 = x->input[5]; 144 j6 = x->input[6]; 145 j7 = x->input[7]; 146 j8 = x->input[8]; 147 j9 = x->input[9]; 148 j10 = x->input[10]; 149 j11 = x->input[11]; 150 j12 = x->input[12]; 151 j13 = x->input[13]; 152 j14 = x->input[14]; 153 j15 = x->input[15]; 154 155 for (;;) { 156 if (bytes < 64) { 157 for (i = 0;i < bytes;++i) tmp[i] = m[i]; 158 m = tmp; 159 ctarget = c; 160 c = tmp; 161 } 162 x0 = j0; 163 x1 = j1; 164 x2 = j2; 165 x3 = j3; 166 x4 = j4; 167 x5 = j5; 168 x6 = j6; 169 x7 = j7; 170 x8 = j8; 171 x9 = j9; 172 x10 = j10; 173 x11 = j11; 174 x12 = j12; 175 x13 = j13; 176 x14 = j14; 177 x15 = j15; 178 for (i = 20;i > 0;i -= 2) { 179 QUARTERROUND( x0, x4, x8,x12) 180 QUARTERROUND( x1, x5, x9,x13) 181 QUARTERROUND( x2, x6,x10,x14) 182 QUARTERROUND( x3, x7,x11,x15) 183 QUARTERROUND( x0, x5,x10,x15) 184 QUARTERROUND( x1, x6,x11,x12) 185 QUARTERROUND( x2, x7, x8,x13) 186 QUARTERROUND( x3, x4, x9,x14) 187 } 188 x0 = PLUS(x0,j0); 189 x1 = PLUS(x1,j1); 190 x2 = PLUS(x2,j2); 191 x3 = PLUS(x3,j3); 192 x4 = PLUS(x4,j4); 193 x5 = PLUS(x5,j5); 194 x6 = PLUS(x6,j6); 195 x7 = PLUS(x7,j7); 196 x8 = PLUS(x8,j8); 197 x9 = PLUS(x9,j9); 198 x10 = PLUS(x10,j10); 199 x11 = PLUS(x11,j11); 200 x12 = PLUS(x12,j12); 201 x13 = PLUS(x13,j13); 202 x14 = PLUS(x14,j14); 203 x15 = PLUS(x15,j15); 204 205 #ifndef KEYSTREAM_ONLY 206 x0 = XOR(x0,U8TO32_LITTLE(m + 0)); 207 x1 = XOR(x1,U8TO32_LITTLE(m + 4)); 208 x2 = XOR(x2,U8TO32_LITTLE(m + 8)); 209 x3 = XOR(x3,U8TO32_LITTLE(m + 12)); 210 x4 = XOR(x4,U8TO32_LITTLE(m + 16)); 211 x5 = XOR(x5,U8TO32_LITTLE(m + 20)); 212 x6 = XOR(x6,U8TO32_LITTLE(m + 24)); 213 x7 = XOR(x7,U8TO32_LITTLE(m + 28)); 214 x8 = XOR(x8,U8TO32_LITTLE(m + 32)); 215 x9 = XOR(x9,U8TO32_LITTLE(m + 36)); 216 x10 = XOR(x10,U8TO32_LITTLE(m + 40)); 217 x11 = XOR(x11,U8TO32_LITTLE(m + 44)); 218 x12 = XOR(x12,U8TO32_LITTLE(m + 48)); 219 x13 = XOR(x13,U8TO32_LITTLE(m + 52)); 220 x14 = XOR(x14,U8TO32_LITTLE(m + 56)); 221 x15 = XOR(x15,U8TO32_LITTLE(m + 60)); 222 #endif 223 224 j12 = PLUSONE(j12); 225 if (!j12) { 226 j13 = PLUSONE(j13); 227 /* stopping at 2^70 bytes per nonce is user's responsibility */ 228 } 229 230 U32TO8_LITTLE(c + 0,x0); 231 U32TO8_LITTLE(c + 4,x1); 232 U32TO8_LITTLE(c + 8,x2); 233 U32TO8_LITTLE(c + 12,x3); 234 U32TO8_LITTLE(c + 16,x4); 235 U32TO8_LITTLE(c + 20,x5); 236 U32TO8_LITTLE(c + 24,x6); 237 U32TO8_LITTLE(c + 28,x7); 238 U32TO8_LITTLE(c + 32,x8); 239 U32TO8_LITTLE(c + 36,x9); 240 U32TO8_LITTLE(c + 40,x10); 241 U32TO8_LITTLE(c + 44,x11); 242 U32TO8_LITTLE(c + 48,x12); 243 U32TO8_LITTLE(c + 52,x13); 244 U32TO8_LITTLE(c + 56,x14); 245 U32TO8_LITTLE(c + 60,x15); 246 247 if (bytes <= 64) { 248 if (bytes < 64) { 249 for (i = 0;i < bytes;++i) ctarget[i] = c[i]; 250 } 251 x->input[12] = j12; 252 x->input[13] = j13; 253 return; 254 } 255 bytes -= 64; 256 c += 64; 257 #ifndef KEYSTREAM_ONLY 258 m += 64; 259 #endif 260 } 261 } 262