1 /* 2 chacha-merged.c version 20080118 3 D. J. Bernstein 4 Public domain. 5 */ 6 7 /* $OpenBSD: chacha.c,v 1.1 2013/11/21 00:45:44 djm Exp $ */ 8 9 #include <sys/param.h> 10 11 #include "chacha.h" 12 13 typedef uint8_t u8; 14 typedef uint32_t u32; 15 16 typedef struct chacha_ctx chacha_ctx; 17 18 #define U8C(v) (v##U) 19 #define U32C(v) (v##U) 20 21 #define U8V(v) ((u8)(v) & U8C(0xFF)) 22 #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) 23 24 #define ROTL32(v, n) \ 25 (U32V((v) << (n)) | ((v) >> (32 - (n)))) 26 27 #define U8TO32_LITTLE(p) \ 28 (((u32)((p)[0]) ) | \ 29 ((u32)((p)[1]) << 8) | \ 30 ((u32)((p)[2]) << 16) | \ 31 ((u32)((p)[3]) << 24)) 32 33 #define U32TO8_LITTLE(p, v) \ 34 do { \ 35 (p)[0] = U8V((v) ); \ 36 (p)[1] = U8V((v) >> 8); \ 37 (p)[2] = U8V((v) >> 16); \ 38 (p)[3] = U8V((v) >> 24); \ 39 } while (0) 40 41 #define ROTATE(v,c) (ROTL32(v,c)) 42 #define XOR(v,w) ((v) ^ (w)) 43 #define PLUS(v,w) (U32V((v) + (w))) 44 #define PLUSONE(v) (PLUS((v),1)) 45 46 #define QUARTERROUND(a,b,c,d) \ 47 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 48 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 49 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 50 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 51 52 static const char sigma[16] = "expand 32-byte k"; 53 static const char tau[16] = "expand 16-byte k"; 54 55 LOCAL void 56 chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits) 57 { 58 const char *constants; 59 60 x->input[4] = U8TO32_LITTLE(k + 0); 61 x->input[5] = U8TO32_LITTLE(k + 4); 62 x->input[6] = U8TO32_LITTLE(k + 8); 63 x->input[7] = U8TO32_LITTLE(k + 12); 64 if (kbits == 256) { /* recommended */ 65 k += 16; 66 constants = sigma; 67 } else { /* kbits == 128 */ 68 constants = tau; 69 } 70 x->input[8] = U8TO32_LITTLE(k + 0); 71 x->input[9] = U8TO32_LITTLE(k + 4); 72 x->input[10] = U8TO32_LITTLE(k + 8); 73 x->input[11] = U8TO32_LITTLE(k + 12); 74 x->input[0] = U8TO32_LITTLE(constants + 0); 75 x->input[1] = U8TO32_LITTLE(constants + 4); 76 x->input[2] = U8TO32_LITTLE(constants + 8); 77 x->input[3] = U8TO32_LITTLE(constants + 12); 78 } 79 80 LOCAL void 81 chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter) 82 { 83 #ifdef CHACHA_NONCE0_CTR128 84 /* 128-bit counter without IV */ 85 (void)iv; 86 x->input[12] = U8TO32_LITTLE(counter + 0); 87 x->input[13] = U8TO32_LITTLE(counter + 4); 88 x->input[14] = U8TO32_LITTLE(counter + 8); 89 x->input[15] = U8TO32_LITTLE(counter + 12); 90 #else 91 /* 64-bit IV and 64-bit counter */ 92 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); 93 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); 94 x->input[14] = U8TO32_LITTLE(iv + 0); 95 x->input[15] = U8TO32_LITTLE(iv + 4); 96 #endif 97 } 98 99 #ifdef CHACHA_NONCE0_CTR128 100 LOCAL void 101 chacha_ctrsave(const chacha_ctx *x, u8 *counter) 102 { 103 U32TO8_LITTLE(counter + 0, x->input[12]); 104 U32TO8_LITTLE(counter + 4, x->input[13]); 105 U32TO8_LITTLE(counter + 8, x->input[14]); 106 U32TO8_LITTLE(counter + 12, x->input[15]); 107 } 108 #endif 109 110 LOCAL void 111 chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes) 112 { 113 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 114 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 115 u8 *ctarget = NULL; 116 u8 tmp[64]; 117 u32 i; 118 119 if (!bytes) return; 120 121 j0 = x->input[0]; 122 j1 = x->input[1]; 123 j2 = x->input[2]; 124 j3 = x->input[3]; 125 j4 = x->input[4]; 126 j5 = x->input[5]; 127 j6 = x->input[6]; 128 j7 = x->input[7]; 129 j8 = x->input[8]; 130 j9 = x->input[9]; 131 j10 = x->input[10]; 132 j11 = x->input[11]; 133 j12 = x->input[12]; 134 j13 = x->input[13]; 135 j14 = x->input[14]; 136 j15 = x->input[15]; 137 138 for (;;) { 139 if (bytes < 64) { 140 #ifndef KEYSTREAM_ONLY 141 for (i = 0;i < bytes;++i) tmp[i] = m[i]; 142 m = tmp; 143 #else 144 (void)m; /* suppress compiler warning */ 145 #endif 146 ctarget = c; 147 c = tmp; 148 } 149 x0 = j0; 150 x1 = j1; 151 x2 = j2; 152 x3 = j3; 153 x4 = j4; 154 x5 = j5; 155 x6 = j6; 156 x7 = j7; 157 x8 = j8; 158 x9 = j9; 159 x10 = j10; 160 x11 = j11; 161 x12 = j12; 162 x13 = j13; 163 x14 = j14; 164 x15 = j15; 165 for (i = 20;i > 0;i -= 2) { 166 QUARTERROUND( x0, x4, x8,x12) 167 QUARTERROUND( x1, x5, x9,x13) 168 QUARTERROUND( x2, x6,x10,x14) 169 QUARTERROUND( x3, x7,x11,x15) 170 QUARTERROUND( x0, x5,x10,x15) 171 QUARTERROUND( x1, x6,x11,x12) 172 QUARTERROUND( x2, x7, x8,x13) 173 QUARTERROUND( x3, x4, x9,x14) 174 } 175 x0 = PLUS(x0,j0); 176 x1 = PLUS(x1,j1); 177 x2 = PLUS(x2,j2); 178 x3 = PLUS(x3,j3); 179 x4 = PLUS(x4,j4); 180 x5 = PLUS(x5,j5); 181 x6 = PLUS(x6,j6); 182 x7 = PLUS(x7,j7); 183 x8 = PLUS(x8,j8); 184 x9 = PLUS(x9,j9); 185 x10 = PLUS(x10,j10); 186 x11 = PLUS(x11,j11); 187 x12 = PLUS(x12,j12); 188 x13 = PLUS(x13,j13); 189 x14 = PLUS(x14,j14); 190 x15 = PLUS(x15,j15); 191 192 #ifndef KEYSTREAM_ONLY 193 x0 = XOR(x0,U8TO32_LITTLE(m + 0)); 194 x1 = XOR(x1,U8TO32_LITTLE(m + 4)); 195 x2 = XOR(x2,U8TO32_LITTLE(m + 8)); 196 x3 = XOR(x3,U8TO32_LITTLE(m + 12)); 197 x4 = XOR(x4,U8TO32_LITTLE(m + 16)); 198 x5 = XOR(x5,U8TO32_LITTLE(m + 20)); 199 x6 = XOR(x6,U8TO32_LITTLE(m + 24)); 200 x7 = XOR(x7,U8TO32_LITTLE(m + 28)); 201 x8 = XOR(x8,U8TO32_LITTLE(m + 32)); 202 x9 = XOR(x9,U8TO32_LITTLE(m + 36)); 203 x10 = XOR(x10,U8TO32_LITTLE(m + 40)); 204 x11 = XOR(x11,U8TO32_LITTLE(m + 44)); 205 x12 = XOR(x12,U8TO32_LITTLE(m + 48)); 206 x13 = XOR(x13,U8TO32_LITTLE(m + 52)); 207 x14 = XOR(x14,U8TO32_LITTLE(m + 56)); 208 x15 = XOR(x15,U8TO32_LITTLE(m + 60)); 209 #endif 210 211 j12 = PLUSONE(j12); 212 if (!j12) { 213 j13 = PLUSONE(j13); 214 #ifdef CHACHA_NONCE0_CTR128 215 if (!j13) { 216 j14 = PLUSONE(j14); 217 if (!j14) { 218 j15 = PLUSONE(j15); 219 } 220 } 221 #else 222 /* stopping at 2^70 bytes per nonce is user's responsibility */ 223 #endif 224 } 225 226 U32TO8_LITTLE(c + 0,x0); 227 U32TO8_LITTLE(c + 4,x1); 228 U32TO8_LITTLE(c + 8,x2); 229 U32TO8_LITTLE(c + 12,x3); 230 U32TO8_LITTLE(c + 16,x4); 231 U32TO8_LITTLE(c + 20,x5); 232 U32TO8_LITTLE(c + 24,x6); 233 U32TO8_LITTLE(c + 28,x7); 234 U32TO8_LITTLE(c + 32,x8); 235 U32TO8_LITTLE(c + 36,x9); 236 U32TO8_LITTLE(c + 40,x10); 237 U32TO8_LITTLE(c + 44,x11); 238 U32TO8_LITTLE(c + 48,x12); 239 U32TO8_LITTLE(c + 52,x13); 240 U32TO8_LITTLE(c + 56,x14); 241 U32TO8_LITTLE(c + 60,x15); 242 243 if (bytes <= 64) { 244 if (bytes < 64) { 245 for (i = 0;i < bytes;++i) ctarget[i] = c[i]; 246 } 247 x->input[12] = j12; 248 x->input[13] = j13; 249 #ifdef CHACHA_NONCE0_CTR128 250 x->input[14] = j14; 251 x->input[15] = j15; 252 #endif 253 return; 254 } 255 bytes -= 64; 256 c += 64; 257 #ifndef KEYSTREAM_ONLY 258 m += 64; 259 #endif 260 } 261 } 262