14d1abfb2Sjoerg ///////////////////////////////////////////////////////////////////////////////
24d1abfb2Sjoerg //
34d1abfb2Sjoerg /// \file sha256.c
44d1abfb2Sjoerg /// \brief SHA-256
54d1abfb2Sjoerg ///
64d1abfb2Sjoerg /// \todo Crypto++ has x86 ASM optimizations. They use SSE so if they
74d1abfb2Sjoerg /// are imported to liblzma, SSE instructions need to be used
84d1abfb2Sjoerg /// conditionally to keep the code working on older boxes.
94d1abfb2Sjoerg //
104d1abfb2Sjoerg // This code is based on the code found from 7-Zip, which has a modified
114d1abfb2Sjoerg // version of the SHA-256 found from Crypto++ <http://www.cryptopp.com/>.
124d1abfb2Sjoerg // The code was modified a little to fit into liblzma.
134d1abfb2Sjoerg //
144d1abfb2Sjoerg // Authors: Kevin Springle
154d1abfb2Sjoerg // Wei Dai
164d1abfb2Sjoerg // Igor Pavlov
174d1abfb2Sjoerg // Lasse Collin
184d1abfb2Sjoerg //
194d1abfb2Sjoerg // This file has been put into the public domain.
204d1abfb2Sjoerg // You can do whatever you want with this file.
214d1abfb2Sjoerg //
224d1abfb2Sjoerg ///////////////////////////////////////////////////////////////////////////////
234d1abfb2Sjoerg
244d1abfb2Sjoerg #include "check.h"
254d1abfb2Sjoerg
26*7653b22fSchristos // Rotate a uint32_t. GCC can optimize this to a rotate instruction
27*7653b22fSchristos // at least on x86.
28*7653b22fSchristos static inline uint32_t
rotr_32(uint32_t num,unsigned amount)29*7653b22fSchristos rotr_32(uint32_t num, unsigned amount)
30*7653b22fSchristos {
31*7653b22fSchristos return (num >> amount) | (num << (32 - amount));
32*7653b22fSchristos }
334d1abfb2Sjoerg
34*7653b22fSchristos #define blk0(i) (W[i] = conv32be(data[i]))
354d1abfb2Sjoerg #define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \
364d1abfb2Sjoerg + s0(W[(i - 15) & 15]))
374d1abfb2Sjoerg
384d1abfb2Sjoerg #define Ch(x, y, z) (z ^ (x & (y ^ z)))
39*7653b22fSchristos #define Maj(x, y, z) ((x & (y ^ z)) + (y & z))
404d1abfb2Sjoerg
414d1abfb2Sjoerg #define a(i) T[(0 - i) & 7]
424d1abfb2Sjoerg #define b(i) T[(1 - i) & 7]
434d1abfb2Sjoerg #define c(i) T[(2 - i) & 7]
444d1abfb2Sjoerg #define d(i) T[(3 - i) & 7]
454d1abfb2Sjoerg #define e(i) T[(4 - i) & 7]
464d1abfb2Sjoerg #define f(i) T[(5 - i) & 7]
474d1abfb2Sjoerg #define g(i) T[(6 - i) & 7]
484d1abfb2Sjoerg #define h(i) T[(7 - i) & 7]
494d1abfb2Sjoerg
50*7653b22fSchristos #define R(i, j, blk) \
51*7653b22fSchristos h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] + blk; \
524d1abfb2Sjoerg d(i) += h(i); \
534d1abfb2Sjoerg h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
54*7653b22fSchristos #define R0(i) R(i, 0, blk0(i))
55*7653b22fSchristos #define R2(i) R(i, j, blk2(i))
564d1abfb2Sjoerg
57*7653b22fSchristos #define S0(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 9), 11), 2)
58*7653b22fSchristos #define S1(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 14), 5), 6)
59*7653b22fSchristos #define s0(x) (rotr_32(x ^ rotr_32(x, 11), 7) ^ (x >> 3))
60*7653b22fSchristos #define s1(x) (rotr_32(x ^ rotr_32(x, 2), 17) ^ (x >> 10))
614d1abfb2Sjoerg
624d1abfb2Sjoerg
634d1abfb2Sjoerg static const uint32_t SHA256_K[64] = {
644d1abfb2Sjoerg 0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
654d1abfb2Sjoerg 0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
664d1abfb2Sjoerg 0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
674d1abfb2Sjoerg 0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
684d1abfb2Sjoerg 0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
694d1abfb2Sjoerg 0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
704d1abfb2Sjoerg 0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
714d1abfb2Sjoerg 0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
724d1abfb2Sjoerg 0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
734d1abfb2Sjoerg 0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
744d1abfb2Sjoerg 0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
754d1abfb2Sjoerg 0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
764d1abfb2Sjoerg 0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
774d1abfb2Sjoerg 0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
784d1abfb2Sjoerg 0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
794d1abfb2Sjoerg 0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
804d1abfb2Sjoerg };
814d1abfb2Sjoerg
824d1abfb2Sjoerg
834d1abfb2Sjoerg static void
transform(uint32_t state[8],const uint32_t data[16])84*7653b22fSchristos transform(uint32_t state[8], const uint32_t data[16])
854d1abfb2Sjoerg {
864d1abfb2Sjoerg uint32_t W[16];
874d1abfb2Sjoerg uint32_t T[8];
884d1abfb2Sjoerg
894d1abfb2Sjoerg // Copy state[] to working vars.
904d1abfb2Sjoerg memcpy(T, state, sizeof(T));
914d1abfb2Sjoerg
92*7653b22fSchristos // The first 16 operations unrolled
93*7653b22fSchristos R0( 0); R0( 1); R0( 2); R0( 3);
94*7653b22fSchristos R0( 4); R0( 5); R0( 6); R0( 7);
95*7653b22fSchristos R0( 8); R0( 9); R0(10); R0(11);
96*7653b22fSchristos R0(12); R0(13); R0(14); R0(15);
97*7653b22fSchristos
98*7653b22fSchristos // The remaining 48 operations partially unrolled
99*7653b22fSchristos for (unsigned int j = 16; j < 64; j += 16) {
100*7653b22fSchristos R2( 0); R2( 1); R2( 2); R2( 3);
101*7653b22fSchristos R2( 4); R2( 5); R2( 6); R2( 7);
102*7653b22fSchristos R2( 8); R2( 9); R2(10); R2(11);
103*7653b22fSchristos R2(12); R2(13); R2(14); R2(15);
1044d1abfb2Sjoerg }
1054d1abfb2Sjoerg
1064d1abfb2Sjoerg // Add the working vars back into state[].
1074d1abfb2Sjoerg state[0] += a(0);
1084d1abfb2Sjoerg state[1] += b(0);
1094d1abfb2Sjoerg state[2] += c(0);
1104d1abfb2Sjoerg state[3] += d(0);
1114d1abfb2Sjoerg state[4] += e(0);
1124d1abfb2Sjoerg state[5] += f(0);
1134d1abfb2Sjoerg state[6] += g(0);
1144d1abfb2Sjoerg state[7] += h(0);
1154d1abfb2Sjoerg }
1164d1abfb2Sjoerg
1174d1abfb2Sjoerg
1184d1abfb2Sjoerg static void
process(lzma_check_state * check)1194d1abfb2Sjoerg process(lzma_check_state *check)
1204d1abfb2Sjoerg {
1214d1abfb2Sjoerg transform(check->state.sha256.state, check->buffer.u32);
1224d1abfb2Sjoerg return;
1234d1abfb2Sjoerg }
1244d1abfb2Sjoerg
1254d1abfb2Sjoerg
1264d1abfb2Sjoerg extern void
lzma_sha256_init(lzma_check_state * check)1274d1abfb2Sjoerg lzma_sha256_init(lzma_check_state *check)
1284d1abfb2Sjoerg {
1294d1abfb2Sjoerg static const uint32_t s[8] = {
1304d1abfb2Sjoerg 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
1314d1abfb2Sjoerg 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
1324d1abfb2Sjoerg };
1334d1abfb2Sjoerg
1344d1abfb2Sjoerg memcpy(check->state.sha256.state, s, sizeof(s));
1354d1abfb2Sjoerg check->state.sha256.size = 0;
1364d1abfb2Sjoerg
1374d1abfb2Sjoerg return;
1384d1abfb2Sjoerg }
1394d1abfb2Sjoerg
1404d1abfb2Sjoerg
1414d1abfb2Sjoerg extern void
lzma_sha256_update(const uint8_t * buf,size_t size,lzma_check_state * check)1424d1abfb2Sjoerg lzma_sha256_update(const uint8_t *buf, size_t size, lzma_check_state *check)
1434d1abfb2Sjoerg {
1444d1abfb2Sjoerg // Copy the input data into a properly aligned temporary buffer.
1454d1abfb2Sjoerg // This way we can be called with arbitrarily sized buffers
1464d1abfb2Sjoerg // (no need to be multiple of 64 bytes), and the code works also
1474d1abfb2Sjoerg // on architectures that don't allow unaligned memory access.
1484d1abfb2Sjoerg while (size > 0) {
1494d1abfb2Sjoerg const size_t copy_start = check->state.sha256.size & 0x3F;
1504d1abfb2Sjoerg size_t copy_size = 64 - copy_start;
1514d1abfb2Sjoerg if (copy_size > size)
1524d1abfb2Sjoerg copy_size = size;
1534d1abfb2Sjoerg
1544d1abfb2Sjoerg memcpy(check->buffer.u8 + copy_start, buf, copy_size);
1554d1abfb2Sjoerg
1564d1abfb2Sjoerg buf += copy_size;
1574d1abfb2Sjoerg size -= copy_size;
1584d1abfb2Sjoerg check->state.sha256.size += copy_size;
1594d1abfb2Sjoerg
1604d1abfb2Sjoerg if ((check->state.sha256.size & 0x3F) == 0)
1614d1abfb2Sjoerg process(check);
1624d1abfb2Sjoerg }
1634d1abfb2Sjoerg
1644d1abfb2Sjoerg return;
1654d1abfb2Sjoerg }
1664d1abfb2Sjoerg
1674d1abfb2Sjoerg
1684d1abfb2Sjoerg extern void
lzma_sha256_finish(lzma_check_state * check)1694d1abfb2Sjoerg lzma_sha256_finish(lzma_check_state *check)
1704d1abfb2Sjoerg {
1714d1abfb2Sjoerg // Add padding as described in RFC 3174 (it describes SHA-1 but
1724d1abfb2Sjoerg // the same padding style is used for SHA-256 too).
1734d1abfb2Sjoerg size_t pos = check->state.sha256.size & 0x3F;
1744d1abfb2Sjoerg check->buffer.u8[pos++] = 0x80;
1754d1abfb2Sjoerg
1764d1abfb2Sjoerg while (pos != 64 - 8) {
1774d1abfb2Sjoerg if (pos == 64) {
1784d1abfb2Sjoerg process(check);
1794d1abfb2Sjoerg pos = 0;
1804d1abfb2Sjoerg }
1814d1abfb2Sjoerg
1824d1abfb2Sjoerg check->buffer.u8[pos++] = 0x00;
1834d1abfb2Sjoerg }
1844d1abfb2Sjoerg
1854d1abfb2Sjoerg // Convert the message size from bytes to bits.
1864d1abfb2Sjoerg check->state.sha256.size *= 8;
1874d1abfb2Sjoerg
1884d1abfb2Sjoerg check->buffer.u64[(64 - 8) / 8] = conv64be(check->state.sha256.size);
1894d1abfb2Sjoerg
1904d1abfb2Sjoerg process(check);
1914d1abfb2Sjoerg
1924d1abfb2Sjoerg for (size_t i = 0; i < 8; ++i)
1934d1abfb2Sjoerg check->buffer.u32[i] = conv32be(check->state.sha256.state[i]);
1944d1abfb2Sjoerg
1954d1abfb2Sjoerg return;
1964d1abfb2Sjoerg }
197