1*254cc503Sdjm /* $OpenBSD: chacha.c,v 1.2 2023/07/17 05:26:38 djm Exp $ */
21edbfe23Sdjm /*
31edbfe23Sdjm chacha-merged.c version 20080118
41edbfe23Sdjm D. J. Bernstein
51edbfe23Sdjm Public domain.
61edbfe23Sdjm */
71edbfe23Sdjm
81edbfe23Sdjm #include "chacha.h"
91edbfe23Sdjm
101edbfe23Sdjm typedef unsigned char u8;
111edbfe23Sdjm typedef unsigned int u32;
121edbfe23Sdjm
131edbfe23Sdjm typedef struct chacha_ctx chacha_ctx;
141edbfe23Sdjm
151edbfe23Sdjm #define U8C(v) (v##U)
161edbfe23Sdjm #define U32C(v) (v##U)
171edbfe23Sdjm
181edbfe23Sdjm #define U8V(v) ((u8)(v) & U8C(0xFF))
191edbfe23Sdjm #define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
201edbfe23Sdjm
211edbfe23Sdjm #define ROTL32(v, n) \
221edbfe23Sdjm (U32V((v) << (n)) | ((v) >> (32 - (n))))
231edbfe23Sdjm
241edbfe23Sdjm #define U8TO32_LITTLE(p) \
251edbfe23Sdjm (((u32)((p)[0]) ) | \
261edbfe23Sdjm ((u32)((p)[1]) << 8) | \
271edbfe23Sdjm ((u32)((p)[2]) << 16) | \
281edbfe23Sdjm ((u32)((p)[3]) << 24))
291edbfe23Sdjm
301edbfe23Sdjm #define U32TO8_LITTLE(p, v) \
311edbfe23Sdjm do { \
321edbfe23Sdjm (p)[0] = U8V((v) ); \
331edbfe23Sdjm (p)[1] = U8V((v) >> 8); \
341edbfe23Sdjm (p)[2] = U8V((v) >> 16); \
351edbfe23Sdjm (p)[3] = U8V((v) >> 24); \
361edbfe23Sdjm } while (0)
371edbfe23Sdjm
381edbfe23Sdjm #define ROTATE(v,c) (ROTL32(v,c))
391edbfe23Sdjm #define XOR(v,w) ((v) ^ (w))
401edbfe23Sdjm #define PLUS(v,w) (U32V((v) + (w)))
411edbfe23Sdjm #define PLUSONE(v) (PLUS((v),1))
421edbfe23Sdjm
431edbfe23Sdjm #define QUARTERROUND(a,b,c,d) \
441edbfe23Sdjm a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
451edbfe23Sdjm c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
461edbfe23Sdjm a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
471edbfe23Sdjm c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
481edbfe23Sdjm
491edbfe23Sdjm static const char sigma[16] = "expand 32-byte k";
501edbfe23Sdjm static const char tau[16] = "expand 16-byte k";
511edbfe23Sdjm
521edbfe23Sdjm void
chacha_keysetup(chacha_ctx * x,const u8 * k,u32 kbits)531edbfe23Sdjm chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
541edbfe23Sdjm {
551edbfe23Sdjm const char *constants;
561edbfe23Sdjm
571edbfe23Sdjm x->input[4] = U8TO32_LITTLE(k + 0);
581edbfe23Sdjm x->input[5] = U8TO32_LITTLE(k + 4);
591edbfe23Sdjm x->input[6] = U8TO32_LITTLE(k + 8);
601edbfe23Sdjm x->input[7] = U8TO32_LITTLE(k + 12);
611edbfe23Sdjm if (kbits == 256) { /* recommended */
621edbfe23Sdjm k += 16;
631edbfe23Sdjm constants = sigma;
641edbfe23Sdjm } else { /* kbits == 128 */
651edbfe23Sdjm constants = tau;
661edbfe23Sdjm }
671edbfe23Sdjm x->input[8] = U8TO32_LITTLE(k + 0);
681edbfe23Sdjm x->input[9] = U8TO32_LITTLE(k + 4);
691edbfe23Sdjm x->input[10] = U8TO32_LITTLE(k + 8);
701edbfe23Sdjm x->input[11] = U8TO32_LITTLE(k + 12);
711edbfe23Sdjm x->input[0] = U8TO32_LITTLE(constants + 0);
721edbfe23Sdjm x->input[1] = U8TO32_LITTLE(constants + 4);
731edbfe23Sdjm x->input[2] = U8TO32_LITTLE(constants + 8);
741edbfe23Sdjm x->input[3] = U8TO32_LITTLE(constants + 12);
751edbfe23Sdjm }
761edbfe23Sdjm
771edbfe23Sdjm void
chacha_ivsetup(chacha_ctx * x,const u8 * iv,const u8 * counter)781edbfe23Sdjm chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
791edbfe23Sdjm {
801edbfe23Sdjm x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
811edbfe23Sdjm x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
821edbfe23Sdjm x->input[14] = U8TO32_LITTLE(iv + 0);
831edbfe23Sdjm x->input[15] = U8TO32_LITTLE(iv + 4);
841edbfe23Sdjm }
851edbfe23Sdjm
861edbfe23Sdjm void
chacha_encrypt_bytes(chacha_ctx * x,const u8 * m,u8 * c,u32 bytes)871edbfe23Sdjm chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
881edbfe23Sdjm {
891edbfe23Sdjm u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
901edbfe23Sdjm u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
911edbfe23Sdjm u8 *ctarget = NULL;
921edbfe23Sdjm u8 tmp[64];
931edbfe23Sdjm u_int i;
941edbfe23Sdjm
951edbfe23Sdjm if (!bytes) return;
961edbfe23Sdjm
971edbfe23Sdjm j0 = x->input[0];
981edbfe23Sdjm j1 = x->input[1];
991edbfe23Sdjm j2 = x->input[2];
1001edbfe23Sdjm j3 = x->input[3];
1011edbfe23Sdjm j4 = x->input[4];
1021edbfe23Sdjm j5 = x->input[5];
1031edbfe23Sdjm j6 = x->input[6];
1041edbfe23Sdjm j7 = x->input[7];
1051edbfe23Sdjm j8 = x->input[8];
1061edbfe23Sdjm j9 = x->input[9];
1071edbfe23Sdjm j10 = x->input[10];
1081edbfe23Sdjm j11 = x->input[11];
1091edbfe23Sdjm j12 = x->input[12];
1101edbfe23Sdjm j13 = x->input[13];
1111edbfe23Sdjm j14 = x->input[14];
1121edbfe23Sdjm j15 = x->input[15];
1131edbfe23Sdjm
1141edbfe23Sdjm for (;;) {
1151edbfe23Sdjm if (bytes < 64) {
1161edbfe23Sdjm for (i = 0;i < bytes;++i) tmp[i] = m[i];
1171edbfe23Sdjm m = tmp;
1181edbfe23Sdjm ctarget = c;
1191edbfe23Sdjm c = tmp;
1201edbfe23Sdjm }
1211edbfe23Sdjm x0 = j0;
1221edbfe23Sdjm x1 = j1;
1231edbfe23Sdjm x2 = j2;
1241edbfe23Sdjm x3 = j3;
1251edbfe23Sdjm x4 = j4;
1261edbfe23Sdjm x5 = j5;
1271edbfe23Sdjm x6 = j6;
1281edbfe23Sdjm x7 = j7;
1291edbfe23Sdjm x8 = j8;
1301edbfe23Sdjm x9 = j9;
1311edbfe23Sdjm x10 = j10;
1321edbfe23Sdjm x11 = j11;
1331edbfe23Sdjm x12 = j12;
1341edbfe23Sdjm x13 = j13;
1351edbfe23Sdjm x14 = j14;
1361edbfe23Sdjm x15 = j15;
1371edbfe23Sdjm for (i = 20;i > 0;i -= 2) {
1381edbfe23Sdjm QUARTERROUND( x0, x4, x8,x12)
1391edbfe23Sdjm QUARTERROUND( x1, x5, x9,x13)
1401edbfe23Sdjm QUARTERROUND( x2, x6,x10,x14)
1411edbfe23Sdjm QUARTERROUND( x3, x7,x11,x15)
1421edbfe23Sdjm QUARTERROUND( x0, x5,x10,x15)
1431edbfe23Sdjm QUARTERROUND( x1, x6,x11,x12)
1441edbfe23Sdjm QUARTERROUND( x2, x7, x8,x13)
1451edbfe23Sdjm QUARTERROUND( x3, x4, x9,x14)
1461edbfe23Sdjm }
1471edbfe23Sdjm x0 = PLUS(x0,j0);
1481edbfe23Sdjm x1 = PLUS(x1,j1);
1491edbfe23Sdjm x2 = PLUS(x2,j2);
1501edbfe23Sdjm x3 = PLUS(x3,j3);
1511edbfe23Sdjm x4 = PLUS(x4,j4);
1521edbfe23Sdjm x5 = PLUS(x5,j5);
1531edbfe23Sdjm x6 = PLUS(x6,j6);
1541edbfe23Sdjm x7 = PLUS(x7,j7);
1551edbfe23Sdjm x8 = PLUS(x8,j8);
1561edbfe23Sdjm x9 = PLUS(x9,j9);
1571edbfe23Sdjm x10 = PLUS(x10,j10);
1581edbfe23Sdjm x11 = PLUS(x11,j11);
1591edbfe23Sdjm x12 = PLUS(x12,j12);
1601edbfe23Sdjm x13 = PLUS(x13,j13);
1611edbfe23Sdjm x14 = PLUS(x14,j14);
1621edbfe23Sdjm x15 = PLUS(x15,j15);
1631edbfe23Sdjm
1641edbfe23Sdjm x0 = XOR(x0,U8TO32_LITTLE(m + 0));
1651edbfe23Sdjm x1 = XOR(x1,U8TO32_LITTLE(m + 4));
1661edbfe23Sdjm x2 = XOR(x2,U8TO32_LITTLE(m + 8));
1671edbfe23Sdjm x3 = XOR(x3,U8TO32_LITTLE(m + 12));
1681edbfe23Sdjm x4 = XOR(x4,U8TO32_LITTLE(m + 16));
1691edbfe23Sdjm x5 = XOR(x5,U8TO32_LITTLE(m + 20));
1701edbfe23Sdjm x6 = XOR(x6,U8TO32_LITTLE(m + 24));
1711edbfe23Sdjm x7 = XOR(x7,U8TO32_LITTLE(m + 28));
1721edbfe23Sdjm x8 = XOR(x8,U8TO32_LITTLE(m + 32));
1731edbfe23Sdjm x9 = XOR(x9,U8TO32_LITTLE(m + 36));
1741edbfe23Sdjm x10 = XOR(x10,U8TO32_LITTLE(m + 40));
1751edbfe23Sdjm x11 = XOR(x11,U8TO32_LITTLE(m + 44));
1761edbfe23Sdjm x12 = XOR(x12,U8TO32_LITTLE(m + 48));
1771edbfe23Sdjm x13 = XOR(x13,U8TO32_LITTLE(m + 52));
1781edbfe23Sdjm x14 = XOR(x14,U8TO32_LITTLE(m + 56));
1791edbfe23Sdjm x15 = XOR(x15,U8TO32_LITTLE(m + 60));
1801edbfe23Sdjm
1811edbfe23Sdjm j12 = PLUSONE(j12);
1821edbfe23Sdjm if (!j12) {
1831edbfe23Sdjm j13 = PLUSONE(j13);
1841edbfe23Sdjm /* stopping at 2^70 bytes per nonce is user's responsibility */
1851edbfe23Sdjm }
1861edbfe23Sdjm
1871edbfe23Sdjm U32TO8_LITTLE(c + 0,x0);
1881edbfe23Sdjm U32TO8_LITTLE(c + 4,x1);
1891edbfe23Sdjm U32TO8_LITTLE(c + 8,x2);
1901edbfe23Sdjm U32TO8_LITTLE(c + 12,x3);
1911edbfe23Sdjm U32TO8_LITTLE(c + 16,x4);
1921edbfe23Sdjm U32TO8_LITTLE(c + 20,x5);
1931edbfe23Sdjm U32TO8_LITTLE(c + 24,x6);
1941edbfe23Sdjm U32TO8_LITTLE(c + 28,x7);
1951edbfe23Sdjm U32TO8_LITTLE(c + 32,x8);
1961edbfe23Sdjm U32TO8_LITTLE(c + 36,x9);
1971edbfe23Sdjm U32TO8_LITTLE(c + 40,x10);
1981edbfe23Sdjm U32TO8_LITTLE(c + 44,x11);
1991edbfe23Sdjm U32TO8_LITTLE(c + 48,x12);
2001edbfe23Sdjm U32TO8_LITTLE(c + 52,x13);
2011edbfe23Sdjm U32TO8_LITTLE(c + 56,x14);
2021edbfe23Sdjm U32TO8_LITTLE(c + 60,x15);
2031edbfe23Sdjm
2041edbfe23Sdjm if (bytes <= 64) {
2051edbfe23Sdjm if (bytes < 64) {
2061edbfe23Sdjm for (i = 0;i < bytes;++i) ctarget[i] = c[i];
2071edbfe23Sdjm }
2081edbfe23Sdjm x->input[12] = j12;
2091edbfe23Sdjm x->input[13] = j13;
2101edbfe23Sdjm return;
2111edbfe23Sdjm }
2121edbfe23Sdjm bytes -= 64;
2131edbfe23Sdjm c += 64;
2141edbfe23Sdjm m += 64;
2151edbfe23Sdjm }
2161edbfe23Sdjm }
217