liblzma/check/sha256.c

4d1abfb2Sjoerg///////////////////////////////////////////////////////////////////////////////
4d1abfb2Sjoerg//
4d1abfb2Sjoerg/// \file       sha256.c
4d1abfb2Sjoerg/// \brief      SHA-256
4d1abfb2Sjoerg///
4d1abfb2Sjoerg/// \todo       Crypto++ has x86 ASM optimizations. They use SSE so if they
4d1abfb2Sjoerg///             are imported to liblzma, SSE instructions need to be used
4d1abfb2Sjoerg///             conditionally to keep the code working on older boxes.
4d1abfb2Sjoerg//
4d1abfb2Sjoerg//  This code is based on the code found from 7-Zip, which has a modified
4d1abfb2Sjoerg//  version of the SHA-256 found from Crypto++ <http://www.cryptopp.com/>.
4d1abfb2Sjoerg//  The code was modified a little to fit into liblzma.
4d1abfb2Sjoerg//
4d1abfb2Sjoerg//  Authors:    Kevin Springle
4d1abfb2Sjoerg//              Wei Dai
4d1abfb2Sjoerg//              Igor Pavlov
4d1abfb2Sjoerg//              Lasse Collin
4d1abfb2Sjoerg//
4d1abfb2Sjoerg//  This file has been put into the public domain.
4d1abfb2Sjoerg//  You can do whatever you want with this file.
4d1abfb2Sjoerg//
4d1abfb2Sjoerg///////////////////////////////////////////////////////////////////////////////
4d1abfb2Sjoerg
4d1abfb2Sjoerg#include "check.h"
4d1abfb2Sjoerg
*7653b22fSchristos// Rotate a uint32_t. GCC can optimize this to a rotate instruction
*7653b22fSchristos// at least on x86.
*7653b22fSchristosstatic inline uint32_t
*7653b22fSchristosrotr_32(uint32_t num, unsigned amount)
*7653b22fSchristos{
*7653b22fSchristos        return (num >> amount) | (num << (32 - amount));
*7653b22fSchristos}
4d1abfb2Sjoerg
*7653b22fSchristos#define blk0(i) (W[i] = conv32be(data[i]))
4d1abfb2Sjoerg#define blk2(i) (W[i & 15] += s1(W[(i - 2) & 15]) + W[(i - 7) & 15] \
4d1abfb2Sjoerg		+ s0(W[(i - 15) & 15]))
4d1abfb2Sjoerg
4d1abfb2Sjoerg#define Ch(x, y, z) (z ^ (x & (y ^ z)))
*7653b22fSchristos#define Maj(x, y, z) ((x & (y ^ z)) + (y & z))
4d1abfb2Sjoerg
4d1abfb2Sjoerg#define a(i) T[(0 - i) & 7]
4d1abfb2Sjoerg#define b(i) T[(1 - i) & 7]
4d1abfb2Sjoerg#define c(i) T[(2 - i) & 7]
4d1abfb2Sjoerg#define d(i) T[(3 - i) & 7]
4d1abfb2Sjoerg#define e(i) T[(4 - i) & 7]
4d1abfb2Sjoerg#define f(i) T[(5 - i) & 7]
4d1abfb2Sjoerg#define g(i) T[(6 - i) & 7]
4d1abfb2Sjoerg#define h(i) T[(7 - i) & 7]
4d1abfb2Sjoerg
*7653b22fSchristos#define R(i, j, blk) \
*7653b22fSchristos	h(i) += S1(e(i)) + Ch(e(i), f(i), g(i)) + SHA256_K[i + j] + blk; \
4d1abfb2Sjoerg	d(i) += h(i); \
4d1abfb2Sjoerg	h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
*7653b22fSchristos#define R0(i) R(i, 0, blk0(i))
*7653b22fSchristos#define R2(i) R(i, j, blk2(i))
4d1abfb2Sjoerg
*7653b22fSchristos#define S0(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 9), 11), 2)
*7653b22fSchristos#define S1(x) rotr_32(x ^ rotr_32(x ^ rotr_32(x, 14), 5), 6)
*7653b22fSchristos#define s0(x) (rotr_32(x ^ rotr_32(x, 11), 7) ^ (x >> 3))
*7653b22fSchristos#define s1(x) (rotr_32(x ^ rotr_32(x, 2), 17) ^ (x >> 10))
4d1abfb2Sjoerg
4d1abfb2Sjoerg
4d1abfb2Sjoergstatic const uint32_t SHA256_K[64] = {
4d1abfb2Sjoerg	0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
4d1abfb2Sjoerg	0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
4d1abfb2Sjoerg	0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
4d1abfb2Sjoerg	0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
4d1abfb2Sjoerg	0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
4d1abfb2Sjoerg	0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
4d1abfb2Sjoerg	0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
4d1abfb2Sjoerg	0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
4d1abfb2Sjoerg	0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
4d1abfb2Sjoerg	0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
4d1abfb2Sjoerg	0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
4d1abfb2Sjoerg	0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
4d1abfb2Sjoerg	0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
4d1abfb2Sjoerg	0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
4d1abfb2Sjoerg	0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
4d1abfb2Sjoerg	0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
4d1abfb2Sjoerg};
4d1abfb2Sjoerg
4d1abfb2Sjoerg
4d1abfb2Sjoergstatic void
*7653b22fSchristostransform(uint32_t state[8], const uint32_t data[16])
4d1abfb2Sjoerg{
4d1abfb2Sjoerg	uint32_t W[16];
4d1abfb2Sjoerg	uint32_t T[8];
4d1abfb2Sjoerg
4d1abfb2Sjoerg	// Copy state[] to working vars.
4d1abfb2Sjoerg	memcpy(T, state, sizeof(T));
4d1abfb2Sjoerg
*7653b22fSchristos	// The first 16 operations unrolled
*7653b22fSchristos	R0( 0); R0( 1); R0( 2); R0( 3);
*7653b22fSchristos	R0( 4); R0( 5); R0( 6); R0( 7);
*7653b22fSchristos	R0( 8); R0( 9); R0(10); R0(11);
*7653b22fSchristos	R0(12); R0(13); R0(14); R0(15);
*7653b22fSchristos
*7653b22fSchristos	// The remaining 48 operations partially unrolled
*7653b22fSchristos	for (unsigned int j = 16; j < 64; j += 16) {
*7653b22fSchristos		R2( 0); R2( 1); R2( 2); R2( 3);
*7653b22fSchristos		R2( 4); R2( 5); R2( 6); R2( 7);
*7653b22fSchristos		R2( 8); R2( 9); R2(10); R2(11);
*7653b22fSchristos		R2(12); R2(13); R2(14); R2(15);
4d1abfb2Sjoerg	}
4d1abfb2Sjoerg
4d1abfb2Sjoerg	// Add the working vars back into state[].
4d1abfb2Sjoerg	state[0] += a(0);
4d1abfb2Sjoerg	state[1] += b(0);
4d1abfb2Sjoerg	state[2] += c(0);
4d1abfb2Sjoerg	state[3] += d(0);
4d1abfb2Sjoerg	state[4] += e(0);
4d1abfb2Sjoerg	state[5] += f(0);
4d1abfb2Sjoerg	state[6] += g(0);
4d1abfb2Sjoerg	state[7] += h(0);
4d1abfb2Sjoerg}
4d1abfb2Sjoerg
4d1abfb2Sjoerg
4d1abfb2Sjoergstatic void
4d1abfb2Sjoergprocess(lzma_check_state *check)
4d1abfb2Sjoerg{
4d1abfb2Sjoerg	transform(check->state.sha256.state, check->buffer.u32);
4d1abfb2Sjoerg	return;
4d1abfb2Sjoerg}
4d1abfb2Sjoerg
4d1abfb2Sjoerg
4d1abfb2Sjoergextern void
4d1abfb2Sjoerglzma_sha256_init(lzma_check_state *check)
4d1abfb2Sjoerg{
4d1abfb2Sjoerg	static const uint32_t s[8] = {
4d1abfb2Sjoerg		0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
4d1abfb2Sjoerg		0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
4d1abfb2Sjoerg	};
4d1abfb2Sjoerg
4d1abfb2Sjoerg	memcpy(check->state.sha256.state, s, sizeof(s));
4d1abfb2Sjoerg	check->state.sha256.size = 0;
4d1abfb2Sjoerg
4d1abfb2Sjoerg	return;
4d1abfb2Sjoerg}
4d1abfb2Sjoerg
4d1abfb2Sjoerg
4d1abfb2Sjoergextern void
4d1abfb2Sjoerglzma_sha256_update(const uint8_t *buf, size_t size, lzma_check_state *check)
4d1abfb2Sjoerg{
4d1abfb2Sjoerg	// Copy the input data into a properly aligned temporary buffer.
4d1abfb2Sjoerg	// This way we can be called with arbitrarily sized buffers
4d1abfb2Sjoerg	// (no need to be multiple of 64 bytes), and the code works also
4d1abfb2Sjoerg	// on architectures that don't allow unaligned memory access.
4d1abfb2Sjoerg	while (size > 0) {
4d1abfb2Sjoerg		const size_t copy_start = check->state.sha256.size & 0x3F;
4d1abfb2Sjoerg		size_t copy_size = 64 - copy_start;
4d1abfb2Sjoerg		if (copy_size > size)
4d1abfb2Sjoerg			copy_size = size;
4d1abfb2Sjoerg
4d1abfb2Sjoerg		memcpy(check->buffer.u8 + copy_start, buf, copy_size);
4d1abfb2Sjoerg
4d1abfb2Sjoerg		buf += copy_size;
4d1abfb2Sjoerg		size -= copy_size;
4d1abfb2Sjoerg		check->state.sha256.size += copy_size;
4d1abfb2Sjoerg
4d1abfb2Sjoerg		if ((check->state.sha256.size & 0x3F) == 0)
4d1abfb2Sjoerg			process(check);
4d1abfb2Sjoerg	}
4d1abfb2Sjoerg
4d1abfb2Sjoerg	return;
4d1abfb2Sjoerg}
4d1abfb2Sjoerg
4d1abfb2Sjoerg
4d1abfb2Sjoergextern void
4d1abfb2Sjoerglzma_sha256_finish(lzma_check_state *check)
4d1abfb2Sjoerg{
4d1abfb2Sjoerg	// Add padding as described in RFC 3174 (it describes SHA-1 but
4d1abfb2Sjoerg	// the same padding style is used for SHA-256 too).
4d1abfb2Sjoerg	size_t pos = check->state.sha256.size & 0x3F;
4d1abfb2Sjoerg	check->buffer.u8[pos++] = 0x80;
4d1abfb2Sjoerg
4d1abfb2Sjoerg	while (pos != 64 - 8) {
4d1abfb2Sjoerg		if (pos == 64) {
4d1abfb2Sjoerg			process(check);
4d1abfb2Sjoerg			pos = 0;
4d1abfb2Sjoerg		}
4d1abfb2Sjoerg
4d1abfb2Sjoerg		check->buffer.u8[pos++] = 0x00;
4d1abfb2Sjoerg	}
4d1abfb2Sjoerg
4d1abfb2Sjoerg	// Convert the message size from bytes to bits.
4d1abfb2Sjoerg	check->state.sha256.size *= 8;
4d1abfb2Sjoerg
4d1abfb2Sjoerg	check->buffer.u64[(64 - 8) / 8] = conv64be(check->state.sha256.size);
4d1abfb2Sjoerg
4d1abfb2Sjoerg	process(check);
4d1abfb2Sjoerg
4d1abfb2Sjoerg	for (size_t i = 0; i < 8; ++i)
4d1abfb2Sjoerg		check->buffer.u32[i] = conv32be(check->state.sha256.state[i]);
4d1abfb2Sjoerg
4d1abfb2Sjoerg	return;
4d1abfb2Sjoerg}