X-Git-Url: http://www.wagner.pp.ru/gitweb/?p=openssl-gost%2Fengine.git;a=blobdiff_plain;f=gosthash2012.c;h=91c2c9462518681df33e7536dba0b06e24dc73de;hp=6a89237e1d9ba7ba35f0924780cd0c147385df22;hb=HEAD;hpb=cba16944bff9d8c5dcf37be641822cd3de6d2ec1 diff --git a/gosthash2012.c b/gosthash2012.c index 6a89237..91c2c94 100644 --- a/gosthash2012.c +++ b/gosthash2012.c @@ -9,6 +9,13 @@ */ #include "gosthash2012.h" +#if defined(__x86_64__) || defined(__e2k__) +# ifdef _MSC_VER +# include +# else +# include +# endif +#endif #if defined(_WIN32) || defined(_WINDOWS) # define INLINE __inline @@ -32,69 +39,79 @@ void init_gost2012_hash_ctx(gost2012_hash_ctx * CTX, const unsigned int digest_size) { - unsigned int i; - memset(CTX, 0, sizeof(gost2012_hash_ctx)); CTX->digest_size = digest_size; + /* + * IV for 512-bit hash should be 0^512 + * IV for 256-bit hash should be (00000001)^64 + * + * It's already zeroed when CTX is cleared above, so we only + * need to set it to 0x01-s for 256-bit hash. + */ if (digest_size == 256) memset(&CTX->h, 0x01, sizeof(uint512_u)); - else - memset(&CTX->h, 0x00, sizeof(uint512_u)); } static INLINE void pad(gost2012_hash_ctx * CTX) { - unsigned char buf[64]; - - if (CTX->bufsize > 63) - return; - - memset(&buf, 0x00, sizeof buf); - memcpy(&buf, CTX->buffer, CTX->bufsize); + memset(&(CTX->buffer.B[CTX->bufsize]), 0, sizeof(CTX->buffer) - CTX->bufsize); + CTX->buffer.B[CTX->bufsize] = 1; - buf[CTX->bufsize] = 0x01; - memcpy(CTX->buffer, &buf, sizeof buf); } -static INLINE void add512(const union uint512_u *x, - const union uint512_u *y, union uint512_u *r) +static INLINE void add512(union uint512_u * RESTRICT x, + const union uint512_u * RESTRICT y) { #ifndef __GOST3411_BIG_ENDIAN__ - unsigned int CF, OF; + unsigned int CF = 0; unsigned int i; - CF = 0; +# ifdef HAVE_ADDCARRY_U64 + for (i = 0; i < 8; i++) + CF = _addcarry_u64(CF, x->QWORD[i] , y->QWORD[i], &(x->QWORD[i])); +# else for (i = 0; i < 8; i++) { - r->QWORD[i] = x->QWORD[i] + y->QWORD[i]; - if (r->QWORD[i] < y->QWORD[i] || r->QWORD[i] < x->QWORD[i]) - OF = 1; - else - OF = 0; - - r->QWORD[i] += CF; - CF = OF; + const unsigned long long left = x->QWORD[i]; + unsigned long long sum; + + sum = left + y->QWORD[i] + CF; + /* + * (sum == left): is noop, because it's possible only + * when `left' is added with `0 + 0' or with `ULLONG_MAX + 1', + * in that case `CF' (carry) retain previous value, which is correct, + * because when `left + 0 + 0' there was no overflow (thus no carry), + * and when `left + ULLONG_MAX + 1' value is wrapped back to + * itself with overflow, thus creating carry. + * + * (sum != left): + * if `sum' is not wrapped (sum > left) there should not be carry, + * if `sum' is wrapped (sum < left) there should be carry. + */ + if (sum != left) + CF = (sum < left); + x->QWORD[i] = sum; } -#else - const unsigned char *xp, *yp; - unsigned char *rp; +# endif /* !__x86_64__ */ +#else /* __GOST3411_BIG_ENDIAN__ */ + const unsigned char *yp; + unsigned char *xp; unsigned int i; int buf; - xp = (const unsigned char *)&x[0]; + xp = (unsigned char *)&x[0]; yp = (const unsigned char *)&y[0]; - rp = (unsigned char *)&r[0]; buf = 0; for (i = 0; i < 64; i++) { buf = xp[i] + yp[i] + (buf >> 8); - rp[i] = (unsigned char)buf & 0xFF; + xp[i] = (unsigned char)buf & 0xFF; } -#endif +#endif /* __GOST3411_BIG_ENDIAN__ */ } -static void g(union uint512_u *h, const union uint512_u *N, - const unsigned char *m) +static void g(union uint512_u *h, const union uint512_u * RESTRICT N, + const union uint512_u * RESTRICT m) { #ifdef __GOST3411_HAS_SSE2__ __m128i xmm0, xmm2, xmm4, xmm6; /* XMMR0-quadruple */ @@ -104,7 +121,7 @@ static void g(union uint512_u *h, const union uint512_u *N, LOAD(N, xmm0, xmm2, xmm4, xmm6); XLPS128M(h, xmm0, xmm2, xmm4, xmm6); - LOAD(m, xmm1, xmm3, xmm5, xmm7); + ULOAD(m, xmm1, xmm3, xmm5, xmm7); XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7); for (i = 0; i < 11; i++) @@ -114,12 +131,15 @@ static void g(union uint512_u *h, const union uint512_u *N, X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7); X128M(h, xmm0, xmm2, xmm4, xmm6); - X128M(m, xmm0, xmm2, xmm4, xmm6); - - UNLOAD(h, xmm0, xmm2, xmm4, xmm6); + ULOAD(m, xmm1, xmm3, xmm5, xmm7); + X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7); + STORE(h, xmm0, xmm2, xmm4, xmm6); +# ifndef __i386__ /* Restore the Floating-point status on the CPU */ + /* This is only required on MMX, but EXTRACT32 is using MMX */ _mm_empty(); +# endif #else union uint512_u Ki, data; unsigned int i; @@ -138,45 +158,34 @@ static void g(union uint512_u *h, const union uint512_u *N, /* E() done */ X((&data), h, (&data)); - X((&data), ((const union uint512_u *)&m[0]), h); + X((&data), m, h); #endif } -static INLINE void stage2(gost2012_hash_ctx * CTX, const unsigned char *data) +static INLINE void stage2(gost2012_hash_ctx * CTX, const union uint512_u *data) { g(&(CTX->h), &(CTX->N), data); - add512(&(CTX->N), &buffer512, &(CTX->N)); - add512(&(CTX->Sigma), (const union uint512_u *)data, &(CTX->Sigma)); + add512(&(CTX->N), &buffer512); + add512(&(CTX->Sigma), data); } static INLINE void stage3(gost2012_hash_ctx * CTX) { - ALIGN(16) union uint512_u buf; - - memset(&buf, 0x00, sizeof buf); - memcpy(&buf, &(CTX->buffer), CTX->bufsize); - memcpy(&(CTX->buffer), &buf, sizeof(uint512_u)); + pad(CTX); + g(&(CTX->h), &(CTX->N), &(CTX->buffer)); + add512(&(CTX->Sigma), &CTX->buffer); - memset(&buf, 0x00, sizeof buf); + memset(&(CTX->buffer.B[0]), 0, sizeof(uint512_u)); #ifndef __GOST3411_BIG_ENDIAN__ - buf.QWORD[0] = CTX->bufsize << 3; + CTX->buffer.QWORD[0] = CTX->bufsize << 3; #else - buf.QWORD[0] = BSWAP64(CTX->bufsize << 3); + CTX->buffer.QWORD[0] = BSWAP64(CTX->bufsize << 3); #endif + add512(&(CTX->N), &(CTX->buffer)); - pad(CTX); - - g(&(CTX->h), &(CTX->N), (const unsigned char *)&(CTX->buffer)); - - add512(&(CTX->N), &buf, &(CTX->N)); - add512(&(CTX->Sigma), (const union uint512_u *)&CTX->buffer[0], - &(CTX->Sigma)); - - g(&(CTX->h), &buffer0, (const unsigned char *)&(CTX->N)); - - g(&(CTX->h), &buffer0, (const unsigned char *)&(CTX->Sigma)); - memcpy(&(CTX->hash), &(CTX->h), sizeof(uint512_u)); + g(&(CTX->h), &buffer0, &(CTX->N)); + g(&(CTX->h), &buffer0, &(CTX->Sigma)); } /* @@ -186,32 +195,34 @@ static INLINE void stage3(gost2012_hash_ctx * CTX) void gost2012_hash_block(gost2012_hash_ctx * CTX, const unsigned char *data, size_t len) { - size_t chunksize; - - while (len > 63 && CTX->bufsize == 0) { - stage2(CTX, data); - - data += 64; - len -= 64; + register size_t bufsize = CTX->bufsize; + + if (bufsize == 0) { + while (len >= 64) { + memcpy(&CTX->buffer.B[0], data, 64); + stage2(CTX, &(CTX->buffer)); + data += 64; + len -= 64; + } } while (len) { - chunksize = 64 - CTX->bufsize; + register size_t chunksize = 64 - bufsize; if (chunksize > len) chunksize = len; - memcpy(&CTX->buffer[CTX->bufsize], data, chunksize); + memcpy(&CTX->buffer.B[bufsize], data, chunksize); - CTX->bufsize += chunksize; + bufsize += chunksize; len -= chunksize; data += chunksize; - if (CTX->bufsize == 64) { - stage2(CTX, CTX->buffer); - - CTX->bufsize = 0; + if (bufsize == 64) { + stage2(CTX, &(CTX->buffer) ); + bufsize = 0; } } + CTX->bufsize = bufsize; } /* @@ -226,7 +237,7 @@ void gost2012_finish_hash(gost2012_hash_ctx * CTX, unsigned char *digest) CTX->bufsize = 0; if (CTX->digest_size == 256) - memcpy(digest, &(CTX->hash.QWORD[4]), 32); + memcpy(digest, &(CTX->h.QWORD[4]), 32); else - memcpy(digest, &(CTX->hash.QWORD[0]), 64); + memcpy(digest, &(CTX->h.QWORD[0]), 64); }