1 /* $OpenBSD: chacha-merged.c,v 1.11 2023/07/07 19:37:53 beck Exp $ */ 2 /* 3 chacha-merged.c version 20080118 4 D. J. Bernstein 5 Public domain. 6 */ 7 8 #include <stdint.h> 9 10 #define CHACHA_MINKEYLEN 16 11 #define CHACHA_NONCELEN 8 12 #define CHACHA_CTRLEN 8 13 #define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN) 14 #define CHACHA_BLOCKLEN 64 15 16 typedef uint8_t u8; 17 typedef uint32_t u32; 18 19 struct chacha_ctx { 20 u32 input[16]; 21 u8 ks[CHACHA_BLOCKLEN]; 22 u8 unused; 23 }; 24 25 static inline void chacha_keysetup(struct chacha_ctx *x, const u8 *k, u32 kbits) 26 __attribute__((__bounded__(__minbytes__, 2, CHACHA_MINKEYLEN))); 27 static inline void chacha_ivsetup(struct chacha_ctx *x, const u8 *iv, 28 const u8 *ctr) 29 __attribute__((__bounded__(__minbytes__, 2, CHACHA_NONCELEN))) 30 __attribute__((__bounded__(__minbytes__, 3, CHACHA_CTRLEN))); 31 static inline void chacha_encrypt_bytes(struct chacha_ctx *x, const u8 *m, 32 u8 *c, u32 bytes) 33 __attribute__((__bounded__(__buffer__, 2, 4))) 34 __attribute__((__bounded__(__buffer__, 3, 4))); 35 36 typedef struct chacha_ctx chacha_ctx; 37 38 #define U8C(v) (v##U) 39 #define U32C(v) (v##U) 40 41 #define U8V(v) ((u8)(v) & U8C(0xFF)) 42 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) 43 44 #define ROTL32(v, n) \ 45 (U32V((v) << (n)) | ((v) >> (32 - (n)))) 46 47 #define U8TO32_LITTLE(p) \ 48 (((u32)((p)[0])) | \ 49 ((u32)((p)[1]) << 8) | \ 50 ((u32)((p)[2]) << 16) | \ 51 ((u32)((p)[3]) << 24)) 52 53 #define U32TO8_LITTLE(p, v) \ 54 do { \ 55 (p)[0] = U8V((v)); \ 56 (p)[1] = U8V((v) >> 8); \ 57 (p)[2] = U8V((v) >> 16); \ 58 (p)[3] = U8V((v) >> 24); \ 59 } while (0) 60 61 #define ROTATE(v,c) (ROTL32(v,c)) 62 #define XOR(v,w) ((v) ^ (w)) 63 #define PLUS(v,w) (U32V((v) + (w))) 64 #define PLUSONE(v) (PLUS((v),1)) 65 66 #define QUARTERROUND(a,b,c,d) \ 67 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 68 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 69 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 70 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 71 72 /* Initialise with "expand 32-byte k". */ 73 static const char sigma[16] = { 74 0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x33, 75 0x32, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b, 76 }; 77 78 /* Initialise with "expand 16-byte k". */ 79 static const char tau[16] = { 80 0x65, 0x78, 0x70, 0x61, 0x6e, 0x64, 0x20, 0x31, 81 0x36, 0x2d, 0x62, 0x79, 0x74, 0x65, 0x20, 0x6b, 82 }; 83 84 static inline void 85 chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits) 86 { 87 const char *constants; 88 89 x->input[4] = U8TO32_LITTLE(k + 0); 90 x->input[5] = U8TO32_LITTLE(k + 4); 91 x->input[6] = U8TO32_LITTLE(k + 8); 92 x->input[7] = U8TO32_LITTLE(k + 12); 93 if (kbits == 256) { /* recommended */ 94 k += 16; 95 constants = sigma; 96 } else { /* kbits == 128 */ 97 constants = tau; 98 } 99 x->input[8] = U8TO32_LITTLE(k + 0); 100 x->input[9] = U8TO32_LITTLE(k + 4); 101 x->input[10] = U8TO32_LITTLE(k + 8); 102 x->input[11] = U8TO32_LITTLE(k + 12); 103 x->input[0] = U8TO32_LITTLE(constants + 0); 104 x->input[1] = U8TO32_LITTLE(constants + 4); 105 x->input[2] = U8TO32_LITTLE(constants + 8); 106 x->input[3] = U8TO32_LITTLE(constants + 12); 107 } 108 109 static inline void 110 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter) 111 { 112 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); 113 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); 114 x->input[14] = U8TO32_LITTLE(iv + 0); 115 x->input[15] = U8TO32_LITTLE(iv + 4); 116 } 117 118 static inline void 119 chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes) 120 { 121 u32 x0, x1, x2, x3, x4, x5, x6, x7; 122 u32 x8, x9, x10, x11, x12, x13, x14, x15; 123 u32 j0, j1, j2, j3, j4, j5, j6, j7; 124 u32 j8, j9, j10, j11, j12, j13, j14, j15; 125 u8 *ctarget = NULL; 126 u8 tmp[64]; 127 u32 i; 128 129 if (!bytes) 130 return; 131 132 j0 = x->input[0]; 133 j1 = x->input[1]; 134 j2 = x->input[2]; 135 j3 = x->input[3]; 136 j4 = x->input[4]; 137 j5 = x->input[5]; 138 j6 = x->input[6]; 139 j7 = x->input[7]; 140 j8 = x->input[8]; 141 j9 = x->input[9]; 142 j10 = x->input[10]; 143 j11 = x->input[11]; 144 j12 = x->input[12]; 145 j13 = x->input[13]; 146 j14 = x->input[14]; 147 j15 = x->input[15]; 148 149 for (;;) { 150 if (bytes < 64) { 151 for (i = 0; i < bytes; ++i) 152 tmp[i] = m[i]; 153 m = tmp; 154 ctarget = c; 155 c = tmp; 156 } 157 x0 = j0; 158 x1 = j1; 159 x2 = j2; 160 x3 = j3; 161 x4 = j4; 162 x5 = j5; 163 x6 = j6; 164 x7 = j7; 165 x8 = j8; 166 x9 = j9; 167 x10 = j10; 168 x11 = j11; 169 x12 = j12; 170 x13 = j13; 171 x14 = j14; 172 x15 = j15; 173 for (i = 20; i > 0; i -= 2) { 174 QUARTERROUND(x0, x4, x8, x12) 175 QUARTERROUND(x1, x5, x9, x13) 176 QUARTERROUND(x2, x6, x10, x14) 177 QUARTERROUND(x3, x7, x11, x15) 178 QUARTERROUND(x0, x5, x10, x15) 179 QUARTERROUND(x1, x6, x11, x12) 180 QUARTERROUND(x2, x7, x8, x13) 181 QUARTERROUND(x3, x4, x9, x14) 182 } 183 x0 = PLUS(x0, j0); 184 x1 = PLUS(x1, j1); 185 x2 = PLUS(x2, j2); 186 x3 = PLUS(x3, j3); 187 x4 = PLUS(x4, j4); 188 x5 = PLUS(x5, j5); 189 x6 = PLUS(x6, j6); 190 x7 = PLUS(x7, j7); 191 x8 = PLUS(x8, j8); 192 x9 = PLUS(x9, j9); 193 x10 = PLUS(x10, j10); 194 x11 = PLUS(x11, j11); 195 x12 = PLUS(x12, j12); 196 x13 = PLUS(x13, j13); 197 x14 = PLUS(x14, j14); 198 x15 = PLUS(x15, j15); 199 200 if (bytes < 64) { 201 U32TO8_LITTLE(x->ks + 0, x0); 202 U32TO8_LITTLE(x->ks + 4, x1); 203 U32TO8_LITTLE(x->ks + 8, x2); 204 U32TO8_LITTLE(x->ks + 12, x3); 205 U32TO8_LITTLE(x->ks + 16, x4); 206 U32TO8_LITTLE(x->ks + 20, x5); 207 U32TO8_LITTLE(x->ks + 24, x6); 208 U32TO8_LITTLE(x->ks + 28, x7); 209 U32TO8_LITTLE(x->ks + 32, x8); 210 U32TO8_LITTLE(x->ks + 36, x9); 211 U32TO8_LITTLE(x->ks + 40, x10); 212 U32TO8_LITTLE(x->ks + 44, x11); 213 U32TO8_LITTLE(x->ks + 48, x12); 214 U32TO8_LITTLE(x->ks + 52, x13); 215 U32TO8_LITTLE(x->ks + 56, x14); 216 U32TO8_LITTLE(x->ks + 60, x15); 217 } 218 219 x0 = XOR(x0, U8TO32_LITTLE(m + 0)); 220 x1 = XOR(x1, U8TO32_LITTLE(m + 4)); 221 x2 = XOR(x2, U8TO32_LITTLE(m + 8)); 222 x3 = XOR(x3, U8TO32_LITTLE(m + 12)); 223 x4 = XOR(x4, U8TO32_LITTLE(m + 16)); 224 x5 = XOR(x5, U8TO32_LITTLE(m + 20)); 225 x6 = XOR(x6, U8TO32_LITTLE(m + 24)); 226 x7 = XOR(x7, U8TO32_LITTLE(m + 28)); 227 x8 = XOR(x8, U8TO32_LITTLE(m + 32)); 228 x9 = XOR(x9, U8TO32_LITTLE(m + 36)); 229 x10 = XOR(x10, U8TO32_LITTLE(m + 40)); 230 x11 = XOR(x11, U8TO32_LITTLE(m + 44)); 231 x12 = XOR(x12, U8TO32_LITTLE(m + 48)); 232 x13 = XOR(x13, U8TO32_LITTLE(m + 52)); 233 x14 = XOR(x14, U8TO32_LITTLE(m + 56)); 234 x15 = XOR(x15, U8TO32_LITTLE(m + 60)); 235 236 j12 = PLUSONE(j12); 237 if (!j12) { 238 j13 = PLUSONE(j13); 239 /* 240 * Stopping at 2^70 bytes per nonce is the user's 241 * responsibility. 242 */ 243 } 244 245 U32TO8_LITTLE(c + 0, x0); 246 U32TO8_LITTLE(c + 4, x1); 247 U32TO8_LITTLE(c + 8, x2); 248 U32TO8_LITTLE(c + 12, x3); 249 U32TO8_LITTLE(c + 16, x4); 250 U32TO8_LITTLE(c + 20, x5); 251 U32TO8_LITTLE(c + 24, x6); 252 U32TO8_LITTLE(c + 28, x7); 253 U32TO8_LITTLE(c + 32, x8); 254 U32TO8_LITTLE(c + 36, x9); 255 U32TO8_LITTLE(c + 40, x10); 256 U32TO8_LITTLE(c + 44, x11); 257 U32TO8_LITTLE(c + 48, x12); 258 U32TO8_LITTLE(c + 52, x13); 259 U32TO8_LITTLE(c + 56, x14); 260 U32TO8_LITTLE(c + 60, x15); 261 262 if (bytes <= 64) { 263 if (bytes < 64) { 264 for (i = 0; i < bytes; ++i) 265 ctarget[i] = c[i]; 266 } 267 x->input[12] = j12; 268 x->input[13] = j13; 269 x->unused = 64 - bytes; 270 return; 271 } 272 bytes -= 64; 273 c += 64; 274 m += 64; 275 } 276 } 277 278 void 279 CRYPTO_hchacha_20(unsigned char subkey[32], const unsigned char key[32], 280 const unsigned char nonce[16]) 281 { 282 uint32_t x[16]; 283 int i; 284 285 x[0] = U8TO32_LITTLE(sigma + 0); 286 x[1] = U8TO32_LITTLE(sigma + 4); 287 x[2] = U8TO32_LITTLE(sigma + 8); 288 x[3] = U8TO32_LITTLE(sigma + 12); 289 x[4] = U8TO32_LITTLE(key + 0); 290 x[5] = U8TO32_LITTLE(key + 4); 291 x[6] = U8TO32_LITTLE(key + 8); 292 x[7] = U8TO32_LITTLE(key + 12); 293 x[8] = U8TO32_LITTLE(key + 16); 294 x[9] = U8TO32_LITTLE(key + 20); 295 x[10] = U8TO32_LITTLE(key + 24); 296 x[11] = U8TO32_LITTLE(key + 28); 297 x[12] = U8TO32_LITTLE(nonce + 0); 298 x[13] = U8TO32_LITTLE(nonce + 4); 299 x[14] = U8TO32_LITTLE(nonce + 8); 300 x[15] = U8TO32_LITTLE(nonce + 12); 301 302 for (i = 20; i > 0; i -= 2) { 303 QUARTERROUND(x[0], x[4], x[8], x[12]) 304 QUARTERROUND(x[1], x[5], x[9], x[13]) 305 QUARTERROUND(x[2], x[6], x[10], x[14]) 306 QUARTERROUND(x[3], x[7], x[11], x[15]) 307 QUARTERROUND(x[0], x[5], x[10], x[15]) 308 QUARTERROUND(x[1], x[6], x[11], x[12]) 309 QUARTERROUND(x[2], x[7], x[8], x[13]) 310 QUARTERROUND(x[3], x[4], x[9], x[14]) 311 } 312 313 U32TO8_LITTLE(subkey + 0, x[0]); 314 U32TO8_LITTLE(subkey + 4, x[1]); 315 U32TO8_LITTLE(subkey + 8, x[2]); 316 U32TO8_LITTLE(subkey + 12, x[3]); 317 318 U32TO8_LITTLE(subkey + 16, x[12]); 319 U32TO8_LITTLE(subkey + 20, x[13]); 320 U32TO8_LITTLE(subkey + 24, x[14]); 321 U32TO8_LITTLE(subkey + 28, x[15]); 322 } 323 LCRYPTO_ALIAS(CRYPTO_hchacha_20); 324