From 7aff1e4cb053394db57c2814d5fe1e6493e0cc75 Mon Sep 17 00:00:00 2001 From: watcherhd Date: Sat, 26 Nov 2011 14:19:43 +0000 Subject: Project folders rename part 2 git-svn-id: http://miranda-plugins.googlecode.com/svn/trunk@214 e753b5eb-9565-29b2-b5c5-2cc6f99dfbcb --- cryptopp/crypto/rijndael.cpp | 722 ------------------------------------------- 1 file changed, 722 deletions(-) delete mode 100644 cryptopp/crypto/rijndael.cpp (limited to 'cryptopp/crypto/rijndael.cpp') diff --git a/cryptopp/crypto/rijndael.cpp b/cryptopp/crypto/rijndael.cpp deleted file mode 100644 index c1682d8..0000000 --- a/cryptopp/crypto/rijndael.cpp +++ /dev/null @@ -1,722 +0,0 @@ -// rijndael.cpp - modified by Chris Morgan -// and Wei Dai from Paulo Baretto's Rijndael implementation -// The original code and all modifications are in the public domain. - -// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM rijndael.cpp" to generate MASM code - -/* -Defense against timing attacks was added in July 2006 by Wei Dai. - -The code now uses smaller tables in the first and last rounds, -and preloads them into L1 cache before usage (by loading at least -one element in each cache line). - -We try to delay subsequent accesses to each table (used in the first -and last rounds) until all of the table has been preloaded. Hopefully -the compiler isn't smart enough to optimize that code away. - -After preloading the table, we also try not to access any memory location -other than the table and the stack, in order to prevent table entries from -being unloaded from L1 cache, until that round is finished. -(Some popular CPUs have 2-way associative caches.) -*/ - -// This is the original introductory comment: - -/** - * version 3.0 (December 2000) - * - * Optimised ANSI C code for the Rijndael cipher (now AES) - * - * author Vincent Rijmen - * author Antoon Bosselaers - * author Paulo Barreto - * - * This code is hereby placed in the public domain. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS - * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE - * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, - * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "pch.h" - -#ifndef CRYPTOPP_IMPORTS -#ifndef CRYPTOPP_GENERATE_X64_MASM - -#include "rijndael.h" -#include "misc.h" -#include "cpu.h" - -NAMESPACE_BEGIN(CryptoPP) - -void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &) -{ - AssertValidKeyLength(keylen); - - m_rounds = keylen/4 + 6; - m_key.New(4*(m_rounds+1)); - - word32 temp, *rk = m_key; - const word32 *rc = rcon; - - GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen); - - while (true) - { - temp = rk[keylen/4-1]; - rk[keylen/4] = rk[0] ^ - (word32(Se[GETBYTE(temp, 2)]) << 24) ^ - (word32(Se[GETBYTE(temp, 1)]) << 16) ^ - (word32(Se[GETBYTE(temp, 0)]) << 8) ^ - Se[GETBYTE(temp, 3)] ^ - *(rc++); - rk[keylen/4+1] = rk[1] ^ rk[keylen/4]; - rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1]; - rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2]; - - if (rk + keylen/4 + 4 == m_key.end()) - break; - - if (keylen == 24) - { - rk[10] = rk[ 4] ^ rk[ 9]; - rk[11] = rk[ 5] ^ rk[10]; - } - else if (keylen == 32) - { - temp = rk[11]; - rk[12] = rk[ 4] ^ - (word32(Se[GETBYTE(temp, 3)]) << 24) ^ - (word32(Se[GETBYTE(temp, 2)]) << 16) ^ - (word32(Se[GETBYTE(temp, 1)]) << 8) ^ - Se[GETBYTE(temp, 0)]; - rk[13] = rk[ 5] ^ rk[12]; - rk[14] = rk[ 6] ^ rk[13]; - rk[15] = rk[ 7] ^ rk[14]; - } - rk += keylen/4; - } - - if (!IsForwardTransformation()) - { - unsigned int i, j; - rk = m_key; - - /* invert the order of the round keys: */ - for (i = 0, j = 4*m_rounds; i < j; i += 4, j -= 4) { - temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; - temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; - temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; - temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; - } - /* apply the inverse MixColumn transform to all round keys but the first and the last: */ - for (i = 1; i < m_rounds; i++) { - rk += 4; - rk[0] = - Td[0*256+Se[GETBYTE(rk[0], 3)]] ^ - Td[1*256+Se[GETBYTE(rk[0], 2)]] ^ - Td[2*256+Se[GETBYTE(rk[0], 1)]] ^ - Td[3*256+Se[GETBYTE(rk[0], 0)]]; - rk[1] = - Td[0*256+Se[GETBYTE(rk[1], 3)]] ^ - Td[1*256+Se[GETBYTE(rk[1], 2)]] ^ - Td[2*256+Se[GETBYTE(rk[1], 1)]] ^ - Td[3*256+Se[GETBYTE(rk[1], 0)]]; - rk[2] = - Td[0*256+Se[GETBYTE(rk[2], 3)]] ^ - Td[1*256+Se[GETBYTE(rk[2], 2)]] ^ - Td[2*256+Se[GETBYTE(rk[2], 1)]] ^ - Td[3*256+Se[GETBYTE(rk[2], 0)]]; - rk[3] = - Td[0*256+Se[GETBYTE(rk[3], 3)]] ^ - Td[1*256+Se[GETBYTE(rk[3], 2)]] ^ - Td[2*256+Se[GETBYTE(rk[3], 1)]] ^ - Td[3*256+Se[GETBYTE(rk[3], 0)]]; - } - } - - ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key.begin(), m_key.begin(), 16); - ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16); -} - -#ifdef CRYPTOPP_X64_MASM_AVAILABLE -extern "C" { -void Rijndael_Enc_ProcessAndXorBlock(const word32 *table, word32 cacheLineSize, const word32 *k, const word32 *kLoopEnd, const byte *inBlock, const byte *xorBlock, byte *outBlock); -} -#endif - -#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code - -void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const -{ -#endif // #ifdef CRYPTOPP_GENERATE_X64_MASM - -#ifdef CRYPTOPP_X64_MASM_AVAILABLE - Rijndael_Enc_ProcessAndXorBlock(Te, g_cacheLineSize, m_key, m_key + m_rounds*4, inBlock, xorBlock, outBlock); - return; -#endif - -#if defined(CRYPTOPP_X86_ASM_AVAILABLE) - #ifdef CRYPTOPP_GENERATE_X64_MASM - ALIGN 8 - Rijndael_Enc_ProcessAndXorBlock PROC FRAME - rex_push_reg rbx - push_reg rsi - push_reg rdi - push_reg r12 - push_reg r13 - push_reg r14 - push_reg r15 - .endprolog - mov AS_REG_7, rcx - mov rdi, [rsp + 5*8 + 7*8] ; inBlock - #else - if (HasMMX()) - { - const word32 *k = m_key; - const word32 *kLoopEnd = k + m_rounds*4; - #endif - - #if CRYPTOPP_BOOL_X64 - #define K_REG r8 - #define K_END_REG r9 - #define SAVE_K - #define RESTORE_K - #define RESTORE_K_END - #define SAVE_0(x) AS2(mov r13d, x) - #define SAVE_1(x) AS2(mov r14d, x) - #define SAVE_2(x) AS2(mov r15d, x) - #define RESTORE_0(x) AS2(mov x, r13d) - #define RESTORE_1(x) AS2(mov x, r14d) - #define RESTORE_2(x) AS2(mov x, r15d) - #else - #define K_REG esi - #define K_END_REG edi - #define SAVE_K AS2(movd mm4, esi) - #define RESTORE_K AS2(movd esi, mm4) - #define RESTORE_K_END AS2(movd edi, mm5) - #define SAVE_0(x) AS2(movd mm0, x) - #define SAVE_1(x) AS2(movd mm1, x) - #define SAVE_2(x) AS2(movd mm2, x) - #define RESTORE_0(x) AS2(movd x, mm0) - #define RESTORE_1(x) AS2(movd x, mm1) - #define RESTORE_2(x) AS2(movd x, mm2) - #endif -#ifdef __GNUC__ - word32 t0, t1, t2, t3; - __asm__ __volatile__ - ( - ".intel_syntax noprefix;" - #if CRYPTOPP_BOOL_X64 - AS2( mov K_REG, rsi) - AS2( mov K_END_REG, rcx) - #else - AS1( push ebx) - AS1( push ebp) - AS2( movd mm5, ecx) - #endif - AS2( mov AS_REG_7, WORD_REG(ax)) -#elif CRYPTOPP_BOOL_X86 - #if _MSC_VER < 1300 - const word32 *t = Te; - AS2( mov eax, t) - #endif - AS2( mov edx, g_cacheLineSize) - AS2( mov WORD_REG(di), inBlock) - AS2( mov K_REG, k) - AS2( movd mm5, kLoopEnd) - #if _MSC_VER < 1300 - AS1( push ebx) - AS1( push ebp) - AS2( mov AS_REG_7, eax) - #else - AS1( push ebp) - AS2( lea AS_REG_7, Te) - #endif -#endif - AS2( mov eax, [K_REG+0*4]) // s0 - AS2( xor eax, [WORD_REG(di)+0*4]) - SAVE_0(eax) - AS2( mov ebx, [K_REG+1*4]) - AS2( xor ebx, [WORD_REG(di)+1*4]) - SAVE_1(ebx) - AS2( and ebx, eax) - AS2( mov eax, [K_REG+2*4]) - AS2( xor eax, [WORD_REG(di)+2*4]) - SAVE_2(eax) - AS2( and ebx, eax) - AS2( mov ecx, [K_REG+3*4]) - AS2( xor ecx, [WORD_REG(di)+3*4]) - AS2( and ebx, ecx) - - // read Te0 into L1 cache. this code could be simplifed by using lfence, but that is an SSE2 instruction - AS2( and ebx, 0) - AS2( mov edi, ebx) // make index depend on previous loads to simulate lfence - ASL(2) - AS2( and ebx, [AS_REG_7+WORD_REG(di)]) - AS2( add edi, edx) - AS2( and ebx, [AS_REG_7+WORD_REG(di)]) - AS2( add edi, edx) - AS2( and ebx, [AS_REG_7+WORD_REG(di)]) - AS2( add edi, edx) - AS2( and ebx, [AS_REG_7+WORD_REG(di)]) - AS2( add edi, edx) - AS2( cmp edi, 1024) - ASJ( jl, 2, b) - AS2( and ebx, [AS_REG_7+1020]) -#if CRYPTOPP_BOOL_X64 - AS2( xor r13d, ebx) - AS2( xor r14d, ebx) - AS2( xor r15d, ebx) -#else - AS2( movd mm6, ebx) - AS2( pxor mm2, mm6) - AS2( pxor mm1, mm6) - AS2( pxor mm0, mm6) -#endif - AS2( xor ecx, ebx) - - AS2( mov edi, [K_REG+4*4]) // t0 - AS2( mov eax, [K_REG+5*4]) - AS2( mov ebx, [K_REG+6*4]) - AS2( mov edx, [K_REG+7*4]) - AS2( add K_REG, 8*4) - SAVE_K - -#define QUARTER_ROUND(t, a, b, c, d) \ - AS2(movzx esi, t##l)\ - AS2(d, [AS_REG_7+0*1024+4*WORD_REG(si)])\ - AS2(movzx esi, t##h)\ - AS2(c, [AS_REG_7+1*1024+4*WORD_REG(si)])\ - AS2(shr e##t##x, 16)\ - AS2(movzx esi, t##l)\ - AS2(b, [AS_REG_7+2*1024+4*WORD_REG(si)])\ - AS2(movzx esi, t##h)\ - AS2(a, [AS_REG_7+3*1024+4*WORD_REG(si)]) - -#define s0 xor edi -#define s1 xor eax -#define s2 xor ebx -#define s3 xor ecx -#define t0 xor edi -#define t1 xor eax -#define t2 xor ebx -#define t3 xor edx - - QUARTER_ROUND(c, t0, t1, t2, t3) - RESTORE_2(ecx) - QUARTER_ROUND(c, t3, t0, t1, t2) - RESTORE_1(ecx) - QUARTER_ROUND(c, t2, t3, t0, t1) - RESTORE_0(ecx) - QUARTER_ROUND(c, t1, t2, t3, t0) - SAVE_2(ebx) - SAVE_1(eax) - SAVE_0(edi) -#undef QUARTER_ROUND - - RESTORE_K - - ASL(0) - AS2( mov edi, [K_REG+0*4]) - AS2( mov eax, [K_REG+1*4]) - AS2( mov ebx, [K_REG+2*4]) - AS2( mov ecx, [K_REG+3*4]) - -#define QUARTER_ROUND(t, a, b, c, d) \ - AS2(movzx esi, t##l)\ - AS2(a, [AS_REG_7+3*1024+4*WORD_REG(si)])\ - AS2(movzx esi, t##h)\ - AS2(b, [AS_REG_7+2*1024+4*WORD_REG(si)])\ - AS2(shr e##t##x, 16)\ - AS2(movzx esi, t##l)\ - AS2(c, [AS_REG_7+1*1024+4*WORD_REG(si)])\ - AS2(movzx esi, t##h)\ - AS2(d, [AS_REG_7+0*1024+4*WORD_REG(si)]) - - QUARTER_ROUND(d, s0, s1, s2, s3) - RESTORE_2(edx) - QUARTER_ROUND(d, s3, s0, s1, s2) - RESTORE_1(edx) - QUARTER_ROUND(d, s2, s3, s0, s1) - RESTORE_0(edx) - QUARTER_ROUND(d, s1, s2, s3, s0) - RESTORE_K - SAVE_2(ebx) - SAVE_1(eax) - SAVE_0(edi) - - AS2( mov edi, [K_REG+4*4]) - AS2( mov eax, [K_REG+5*4]) - AS2( mov ebx, [K_REG+6*4]) - AS2( mov edx, [K_REG+7*4]) - - QUARTER_ROUND(c, t0, t1, t2, t3) - RESTORE_2(ecx) - QUARTER_ROUND(c, t3, t0, t1, t2) - RESTORE_1(ecx) - QUARTER_ROUND(c, t2, t3, t0, t1) - RESTORE_0(ecx) - QUARTER_ROUND(c, t1, t2, t3, t0) - SAVE_2(ebx) - SAVE_1(eax) - SAVE_0(edi) - - RESTORE_K - RESTORE_K_END - AS2( add K_REG, 8*4) - SAVE_K - AS2( cmp K_END_REG, K_REG) - ASJ( jne, 0, b) - -#undef QUARTER_ROUND -#undef s0 -#undef s1 -#undef s2 -#undef s3 -#undef t0 -#undef t1 -#undef t2 -#undef t3 - - AS2( mov eax, [K_END_REG+0*4]) - AS2( mov ecx, [K_END_REG+1*4]) - AS2( mov esi, [K_END_REG+2*4]) - AS2( mov edi, [K_END_REG+3*4]) - -#define QUARTER_ROUND(a, b, c, d) \ - AS2( movzx ebx, dl)\ - AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\ - AS2( shl ebx, 3*8)\ - AS2( xor a, ebx)\ - AS2( movzx ebx, dh)\ - AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\ - AS2( shl ebx, 2*8)\ - AS2( xor b, ebx)\ - AS2( shr edx, 16)\ - AS2( movzx ebx, dl)\ - AS2( shr edx, 8)\ - AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\ - AS2( shl ebx, 1*8)\ - AS2( xor c, ebx)\ - AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(dx)])\ - AS2( xor d, ebx) - - QUARTER_ROUND(eax, ecx, esi, edi) - RESTORE_2(edx) - QUARTER_ROUND(edi, eax, ecx, esi) - RESTORE_1(edx) - QUARTER_ROUND(esi, edi, eax, ecx) - RESTORE_0(edx) - QUARTER_ROUND(ecx, esi, edi, eax) - -#undef QUARTER_ROUND - -#if CRYPTOPP_BOOL_X86 - AS1(emms) - AS1(pop ebp) - #if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER < 1300) - AS1(pop ebx) - #endif -#endif - -#ifdef __GNUC__ - ".att_syntax prefix;" - : "=a" (t0), "=c" (t1), "=S" (t2), "=D" (t3) - : "a" (Te), "D" (inBlock), "S" (k), "c" (kLoopEnd), "d" (g_cacheLineSize) - : "memory", "cc" - #if CRYPTOPP_BOOL_X64 - , "%ebx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" - #endif - ); - - if (xorBlock) - { - t0 ^= ((const word32 *)xorBlock)[0]; - t1 ^= ((const word32 *)xorBlock)[1]; - t2 ^= ((const word32 *)xorBlock)[2]; - t3 ^= ((const word32 *)xorBlock)[3]; - } - ((word32 *)outBlock)[0] = t0; - ((word32 *)outBlock)[1] = t1; - ((word32 *)outBlock)[2] = t2; - ((word32 *)outBlock)[3] = t3; -#else - #if CRYPTOPP_BOOL_X64 - mov rbx, [rsp + 6*8 + 7*8] ; xorBlock - #else - AS2( mov ebx, xorBlock) - #endif - AS2( test WORD_REG(bx), WORD_REG(bx)) - ASJ( jz, 1, f) - AS2( xor eax, [WORD_REG(bx)+0*4]) - AS2( xor ecx, [WORD_REG(bx)+1*4]) - AS2( xor esi, [WORD_REG(bx)+2*4]) - AS2( xor edi, [WORD_REG(bx)+3*4]) - ASL(1) - #if CRYPTOPP_BOOL_X64 - mov rbx, [rsp + 7*8 + 7*8] ; outBlock - #else - AS2( mov ebx, outBlock) - #endif - AS2( mov [WORD_REG(bx)+0*4], eax) - AS2( mov [WORD_REG(bx)+1*4], ecx) - AS2( mov [WORD_REG(bx)+2*4], esi) - AS2( mov [WORD_REG(bx)+3*4], edi) -#endif - -#if CRYPTOPP_GENERATE_X64_MASM - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbx - ret - Rijndael_Enc_ProcessAndXorBlock ENDP -#else - } - else -#endif -#endif // #ifdef CRYPTOPP_X86_ASM_AVAILABLE -#ifndef CRYPTOPP_GENERATE_X64_MASM - { - word32 s0, s1, s2, s3, t0, t1, t2, t3; - const word32 *rk = m_key; - - s0 = ((const word32 *)inBlock)[0] ^ rk[0]; - s1 = ((const word32 *)inBlock)[1] ^ rk[1]; - s2 = ((const word32 *)inBlock)[2] ^ rk[2]; - s3 = ((const word32 *)inBlock)[3] ^ rk[3]; - t0 = rk[4]; - t1 = rk[5]; - t2 = rk[6]; - t3 = rk[7]; - rk += 8; - - // timing attack countermeasure. see comments at top for more details - const int cacheLineSize = GetCacheLineSize(); - unsigned int i; - word32 u = 0; - for (i=0; i<1024; i+=cacheLineSize) - u &= *(const word32 *)(((const byte *)Te)+i); - u &= Te[255]; - s0 |= u; s1 |= u; s2 |= u; s3 |= u; - - // first round -#ifdef IS_BIG_ENDIAN -#define QUARTER_ROUND(t, a, b, c, d) \ - a ^= rotrFixed(Te[byte(t)], 24); t >>= 8;\ - b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\ - c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\ - d ^= Te[t]; -#else -#define QUARTER_ROUND(t, a, b, c, d) \ - d ^= Te[byte(t)]; t >>= 8;\ - c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\ - b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\ - a ^= rotrFixed(Te[t], 24); -#endif - - QUARTER_ROUND(s3, t0, t1, t2, t3) - QUARTER_ROUND(s2, t3, t0, t1, t2) - QUARTER_ROUND(s1, t2, t3, t0, t1) - QUARTER_ROUND(s0, t1, t2, t3, t0) -#undef QUARTER_ROUND - - // Nr - 2 full rounds: - unsigned int r = m_rounds/2 - 1; - do - { -#define QUARTER_ROUND(t, a, b, c, d) \ - a ^= Te[3*256+byte(t)]; t >>= 8;\ - b ^= Te[2*256+byte(t)]; t >>= 8;\ - c ^= Te[1*256+byte(t)]; t >>= 8;\ - d ^= Te[t]; - - s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3]; - - QUARTER_ROUND(t3, s0, s1, s2, s3) - QUARTER_ROUND(t2, s3, s0, s1, s2) - QUARTER_ROUND(t1, s2, s3, s0, s1) - QUARTER_ROUND(t0, s1, s2, s3, s0) - - t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7]; - - QUARTER_ROUND(s3, t0, t1, t2, t3) - QUARTER_ROUND(s2, t3, t0, t1, t2) - QUARTER_ROUND(s1, t2, t3, t0, t1) - QUARTER_ROUND(s0, t1, t2, t3, t0) -#undef QUARTER_ROUND - - rk += 8; - } while (--r); - - // timing attack countermeasure. see comments at top for more details - u = 0; - for (i=0; i<256; i+=cacheLineSize) - u &= *(const word32 *)(Se+i); - u &= *(const word32 *)(Se+252); - t0 |= u; t1 |= u; t2 |= u; t3 |= u; - - word32 tbw[4]; - byte *const tempBlock = (byte *)tbw; - word32 *const obw = (word32 *)outBlock; - const word32 *const xbw = (const word32 *)xorBlock; - -#define QUARTER_ROUND(t, a, b, c, d) \ - tempBlock[a] = Se[byte(t)]; t >>= 8;\ - tempBlock[b] = Se[byte(t)]; t >>= 8;\ - tempBlock[c] = Se[byte(t)]; t >>= 8;\ - tempBlock[d] = Se[t]; - - QUARTER_ROUND(t2, 15, 2, 5, 8) - QUARTER_ROUND(t1, 11, 14, 1, 4) - QUARTER_ROUND(t0, 7, 10, 13, 0) - QUARTER_ROUND(t3, 3, 6, 9, 12) -#undef QUARTER_ROUND - - if (xbw) - { - obw[0] = tbw[0] ^ xbw[0] ^ rk[0]; - obw[1] = tbw[1] ^ xbw[1] ^ rk[1]; - obw[2] = tbw[2] ^ xbw[2] ^ rk[2]; - obw[3] = tbw[3] ^ xbw[3] ^ rk[3]; - } - else - { - obw[0] = tbw[0] ^ rk[0]; - obw[1] = tbw[1] ^ rk[1]; - obw[2] = tbw[2] ^ rk[2]; - obw[3] = tbw[3] ^ rk[3]; - } - } -} - -void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const -{ - word32 s0, s1, s2, s3, t0, t1, t2, t3; - const word32 *rk = m_key; - - s0 = ((const word32 *)inBlock)[0] ^ rk[0]; - s1 = ((const word32 *)inBlock)[1] ^ rk[1]; - s2 = ((const word32 *)inBlock)[2] ^ rk[2]; - s3 = ((const word32 *)inBlock)[3] ^ rk[3]; - t0 = rk[4]; - t1 = rk[5]; - t2 = rk[6]; - t3 = rk[7]; - rk += 8; - - // timing attack countermeasure. see comments at top for more details - const int cacheLineSize = GetCacheLineSize(); - unsigned int i; - word32 u = 0; - for (i=0; i<1024; i+=cacheLineSize) - u &= *(const word32 *)(((const byte *)Td)+i); - u &= Td[255]; - s0 |= u; s1 |= u; s2 |= u; s3 |= u; - - // first round -#ifdef IS_BIG_ENDIAN -#define QUARTER_ROUND(t, a, b, c, d) \ - a ^= rotrFixed(Td[byte(t)], 24); t >>= 8;\ - b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\ - c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\ - d ^= Td[t]; -#else -#define QUARTER_ROUND(t, a, b, c, d) \ - d ^= Td[byte(t)]; t >>= 8;\ - c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\ - b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\ - a ^= rotrFixed(Td[t], 24); -#endif - - QUARTER_ROUND(s3, t2, t1, t0, t3) - QUARTER_ROUND(s2, t1, t0, t3, t2) - QUARTER_ROUND(s1, t0, t3, t2, t1) - QUARTER_ROUND(s0, t3, t2, t1, t0) -#undef QUARTER_ROUND - - // Nr - 2 full rounds: - unsigned int r = m_rounds/2 - 1; - do - { -#define QUARTER_ROUND(t, a, b, c, d) \ - a ^= Td[3*256+byte(t)]; t >>= 8;\ - b ^= Td[2*256+byte(t)]; t >>= 8;\ - c ^= Td[1*256+byte(t)]; t >>= 8;\ - d ^= Td[t]; - - s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3]; - - QUARTER_ROUND(t3, s2, s1, s0, s3) - QUARTER_ROUND(t2, s1, s0, s3, s2) - QUARTER_ROUND(t1, s0, s3, s2, s1) - QUARTER_ROUND(t0, s3, s2, s1, s0) - - t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7]; - - QUARTER_ROUND(s3, t2, t1, t0, t3) - QUARTER_ROUND(s2, t1, t0, t3, t2) - QUARTER_ROUND(s1, t0, t3, t2, t1) - QUARTER_ROUND(s0, t3, t2, t1, t0) -#undef QUARTER_ROUND - - rk += 8; - } while (--r); - - // timing attack countermeasure. see comments at top for more details - u = 0; - for (i=0; i<256; i+=cacheLineSize) - u &= *(const word32 *)(Sd+i); - u &= *(const word32 *)(Sd+252); - t0 |= u; t1 |= u; t2 |= u; t3 |= u; - - word32 tbw[4]; - byte *const tempBlock = (byte *)tbw; - word32 *const obw = (word32 *)outBlock; - const word32 *const xbw = (const word32 *)xorBlock; - -#define QUARTER_ROUND(t, a, b, c, d) \ - tempBlock[a] = Sd[byte(t)]; t >>= 8;\ - tempBlock[b] = Sd[byte(t)]; t >>= 8;\ - tempBlock[c] = Sd[byte(t)]; t >>= 8;\ - tempBlock[d] = Sd[t]; - - QUARTER_ROUND(t2, 7, 2, 13, 8) - QUARTER_ROUND(t1, 3, 14, 9, 4) - QUARTER_ROUND(t0, 15, 10, 5, 0) - QUARTER_ROUND(t3, 11, 6, 1, 12) -#undef QUARTER_ROUND - - if (xbw) - { - obw[0] = tbw[0] ^ xbw[0] ^ rk[0]; - obw[1] = tbw[1] ^ xbw[1] ^ rk[1]; - obw[2] = tbw[2] ^ xbw[2] ^ rk[2]; - obw[3] = tbw[3] ^ xbw[3] ^ rk[3]; - } - else - { - obw[0] = tbw[0] ^ rk[0]; - obw[1] = tbw[1] ^ rk[1]; - obw[2] = tbw[2] ^ rk[2]; - obw[3] = tbw[3] ^ rk[3]; - } -} - -NAMESPACE_END - -#endif -#endif -- cgit v1.2.3