X-Git-Url: http://www.wagner.pp.ru/gitweb/?p=openssl-gost%2Fengine.git;a=blobdiff_plain;f=gosthash2012.c;h=91c2c9462518681df33e7536dba0b06e24dc73de;hp=ddd35fad45c7cf32faefc58d933f88378e098a13;hb=HEAD;hpb=31138d02d9c3698c039154b3078c16f205e25902 diff --git a/gosthash2012.c b/gosthash2012.c index ddd35fa..91c2c94 100644 --- a/gosthash2012.c +++ b/gosthash2012.c @@ -9,6 +9,13 @@ */ #include "gosthash2012.h" +#if defined(__x86_64__) || defined(__e2k__) +# ifdef _MSC_VER +# include +# else +# include +# endif +#endif #if defined(_WIN32) || defined(_WINDOWS) # define INLINE __inline @@ -57,32 +64,36 @@ static INLINE void add512(union uint512_u * RESTRICT x, const union uint512_u * RESTRICT y) { #ifndef __GOST3411_BIG_ENDIAN__ - unsigned int CF; + unsigned int CF = 0; unsigned int i; - CF = 0; +# ifdef HAVE_ADDCARRY_U64 + for (i = 0; i < 8; i++) + CF = _addcarry_u64(CF, x->QWORD[i] , y->QWORD[i], &(x->QWORD[i])); +# else for (i = 0; i < 8; i++) { - const unsigned long long left = x->QWORD[i]; - unsigned long long sum; - - sum = left + y->QWORD[i] + CF; - /* - * (sum == left): is noop, because it's possible only - * when `left' is added with `0 + 0' or with `ULLONG_MAX + 1', - * in that case `CF' (carry) retain previous value, which is correct, - * because when `left + 0 + 0' there was no overflow (thus no carry), - * and when `left + ULLONG_MAX + 1' value is wrapped back to - * itself with overflow, thus creating carry. - * - * (sum != left): - * if `sum' is not wrapped (sum > left) there should not be carry, - * if `sum' is wrapped (sum < left) there should be carry. - */ - if (sum != left) - CF = (sum < left); - x->QWORD[i] = sum; + const unsigned long long left = x->QWORD[i]; + unsigned long long sum; + + sum = left + y->QWORD[i] + CF; + /* + * (sum == left): is noop, because it's possible only + * when `left' is added with `0 + 0' or with `ULLONG_MAX + 1', + * in that case `CF' (carry) retain previous value, which is correct, + * because when `left + 0 + 0' there was no overflow (thus no carry), + * and when `left + ULLONG_MAX + 1' value is wrapped back to + * itself with overflow, thus creating carry. + * + * (sum != left): + * if `sum' is not wrapped (sum > left) there should not be carry, + * if `sum' is wrapped (sum < left) there should be carry. + */ + if (sum != left) + CF = (sum < left); + x->QWORD[i] = sum; } -#else +# endif /* !__x86_64__ */ +#else /* __GOST3411_BIG_ENDIAN__ */ const unsigned char *yp; unsigned char *xp; unsigned int i; @@ -96,7 +107,7 @@ static INLINE void add512(union uint512_u * RESTRICT x, buf = xp[i] + yp[i] + (buf >> 8); xp[i] = (unsigned char)buf & 0xFF; } -#endif +#endif /* __GOST3411_BIG_ENDIAN__ */ } static void g(union uint512_u *h, const union uint512_u * RESTRICT N, @@ -110,7 +121,7 @@ static void g(union uint512_u *h, const union uint512_u * RESTRICT N, LOAD(N, xmm0, xmm2, xmm4, xmm6); XLPS128M(h, xmm0, xmm2, xmm4, xmm6); - LOAD(m, xmm1, xmm3, xmm5, xmm7); + ULOAD(m, xmm1, xmm3, xmm5, xmm7); XLPS128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7); for (i = 0; i < 11; i++) @@ -120,12 +131,15 @@ static void g(union uint512_u *h, const union uint512_u * RESTRICT N, X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7); X128M(h, xmm0, xmm2, xmm4, xmm6); - X128M(m, xmm0, xmm2, xmm4, xmm6); - - UNLOAD(h, xmm0, xmm2, xmm4, xmm6); + ULOAD(m, xmm1, xmm3, xmm5, xmm7); + X128R(xmm0, xmm2, xmm4, xmm6, xmm1, xmm3, xmm5, xmm7); + STORE(h, xmm0, xmm2, xmm4, xmm6); +# ifndef __i386__ /* Restore the Floating-point status on the CPU */ + /* This is only required on MMX, but EXTRACT32 is using MMX */ _mm_empty(); +# endif #else union uint512_u Ki, data; unsigned int i; @@ -181,7 +195,6 @@ static INLINE void stage3(gost2012_hash_ctx * CTX) void gost2012_hash_block(gost2012_hash_ctx * CTX, const unsigned char *data, size_t len) { - register size_t chunksize; register size_t bufsize = CTX->bufsize; if (bufsize == 0) { @@ -194,7 +207,7 @@ void gost2012_hash_block(gost2012_hash_ctx * CTX, } while (len) { - chunksize = 64 - bufsize; + register size_t chunksize = 64 - bufsize; if (chunksize > len) chunksize = len;