summaryrefslogtreecommitdiff
path: root/cryptopp/crypto/rijndael.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'cryptopp/crypto/rijndael.cpp')
-rw-r--r--cryptopp/crypto/rijndael.cpp722
1 files changed, 0 insertions, 722 deletions
diff --git a/cryptopp/crypto/rijndael.cpp b/cryptopp/crypto/rijndael.cpp
deleted file mode 100644
index c1682d8..0000000
--- a/cryptopp/crypto/rijndael.cpp
+++ /dev/null
@@ -1,722 +0,0 @@
-// rijndael.cpp - modified by Chris Morgan <cmorgan@wpi.edu>
-// and Wei Dai from Paulo Baretto's Rijndael implementation
-// The original code and all modifications are in the public domain.
-
-// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM rijndael.cpp" to generate MASM code
-
-/*
-Defense against timing attacks was added in July 2006 by Wei Dai.
-
-The code now uses smaller tables in the first and last rounds,
-and preloads them into L1 cache before usage (by loading at least
-one element in each cache line).
-
-We try to delay subsequent accesses to each table (used in the first
-and last rounds) until all of the table has been preloaded. Hopefully
-the compiler isn't smart enough to optimize that code away.
-
-After preloading the table, we also try not to access any memory location
-other than the table and the stack, in order to prevent table entries from
-being unloaded from L1 cache, until that round is finished.
-(Some popular CPUs have 2-way associative caches.)
-*/
-
-// This is the original introductory comment:
-
-/**
- * version 3.0 (December 2000)
- *
- * Optimised ANSI C code for the Rijndael cipher (now AES)
- *
- * author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
- * author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
- * author Paulo Barreto <paulo.barreto@terra.com.br>
- *
- * This code is hereby placed in the public domain.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
- * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "pch.h"
-
-#ifndef CRYPTOPP_IMPORTS
-#ifndef CRYPTOPP_GENERATE_X64_MASM
-
-#include "rijndael.h"
-#include "misc.h"
-#include "cpu.h"
-
-NAMESPACE_BEGIN(CryptoPP)
-
-void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)
-{
- AssertValidKeyLength(keylen);
-
- m_rounds = keylen/4 + 6;
- m_key.New(4*(m_rounds+1));
-
- word32 temp, *rk = m_key;
- const word32 *rc = rcon;
-
- GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen);
-
- while (true)
- {
- temp = rk[keylen/4-1];
- rk[keylen/4] = rk[0] ^
- (word32(Se[GETBYTE(temp, 2)]) << 24) ^
- (word32(Se[GETBYTE(temp, 1)]) << 16) ^
- (word32(Se[GETBYTE(temp, 0)]) << 8) ^
- Se[GETBYTE(temp, 3)] ^
- *(rc++);
- rk[keylen/4+1] = rk[1] ^ rk[keylen/4];
- rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1];
- rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2];
-
- if (rk + keylen/4 + 4 == m_key.end())
- break;
-
- if (keylen == 24)
- {
- rk[10] = rk[ 4] ^ rk[ 9];
- rk[11] = rk[ 5] ^ rk[10];
- }
- else if (keylen == 32)
- {
- temp = rk[11];
- rk[12] = rk[ 4] ^
- (word32(Se[GETBYTE(temp, 3)]) << 24) ^
- (word32(Se[GETBYTE(temp, 2)]) << 16) ^
- (word32(Se[GETBYTE(temp, 1)]) << 8) ^
- Se[GETBYTE(temp, 0)];
- rk[13] = rk[ 5] ^ rk[12];
- rk[14] = rk[ 6] ^ rk[13];
- rk[15] = rk[ 7] ^ rk[14];
- }
- rk += keylen/4;
- }
-
- if (!IsForwardTransformation())
- {
- unsigned int i, j;
- rk = m_key;
-
- /* invert the order of the round keys: */
- for (i = 0, j = 4*m_rounds; i < j; i += 4, j -= 4) {
- temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
- temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
- temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
- temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
- }
- /* apply the inverse MixColumn transform to all round keys but the first and the last: */
- for (i = 1; i < m_rounds; i++) {
- rk += 4;
- rk[0] =
- Td[0*256+Se[GETBYTE(rk[0], 3)]] ^
- Td[1*256+Se[GETBYTE(rk[0], 2)]] ^
- Td[2*256+Se[GETBYTE(rk[0], 1)]] ^
- Td[3*256+Se[GETBYTE(rk[0], 0)]];
- rk[1] =
- Td[0*256+Se[GETBYTE(rk[1], 3)]] ^
- Td[1*256+Se[GETBYTE(rk[1], 2)]] ^
- Td[2*256+Se[GETBYTE(rk[1], 1)]] ^
- Td[3*256+Se[GETBYTE(rk[1], 0)]];
- rk[2] =
- Td[0*256+Se[GETBYTE(rk[2], 3)]] ^
- Td[1*256+Se[GETBYTE(rk[2], 2)]] ^
- Td[2*256+Se[GETBYTE(rk[2], 1)]] ^
- Td[3*256+Se[GETBYTE(rk[2], 0)]];
- rk[3] =
- Td[0*256+Se[GETBYTE(rk[3], 3)]] ^
- Td[1*256+Se[GETBYTE(rk[3], 2)]] ^
- Td[2*256+Se[GETBYTE(rk[3], 1)]] ^
- Td[3*256+Se[GETBYTE(rk[3], 0)]];
- }
- }
-
- ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key.begin(), m_key.begin(), 16);
- ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
-}
-
-#ifdef CRYPTOPP_X64_MASM_AVAILABLE
-extern "C" {
-void Rijndael_Enc_ProcessAndXorBlock(const word32 *table, word32 cacheLineSize, const word32 *k, const word32 *kLoopEnd, const byte *inBlock, const byte *xorBlock, byte *outBlock);
-}
-#endif
-
-#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
-
-void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
-{
-#endif // #ifdef CRYPTOPP_GENERATE_X64_MASM
-
-#ifdef CRYPTOPP_X64_MASM_AVAILABLE
- Rijndael_Enc_ProcessAndXorBlock(Te, g_cacheLineSize, m_key, m_key + m_rounds*4, inBlock, xorBlock, outBlock);
- return;
-#endif
-
-#if defined(CRYPTOPP_X86_ASM_AVAILABLE)
- #ifdef CRYPTOPP_GENERATE_X64_MASM
- ALIGN 8
- Rijndael_Enc_ProcessAndXorBlock PROC FRAME
- rex_push_reg rbx
- push_reg rsi
- push_reg rdi
- push_reg r12
- push_reg r13
- push_reg r14
- push_reg r15
- .endprolog
- mov AS_REG_7, rcx
- mov rdi, [rsp + 5*8 + 7*8] ; inBlock
- #else
- if (HasMMX())
- {
- const word32 *k = m_key;
- const word32 *kLoopEnd = k + m_rounds*4;
- #endif
-
- #if CRYPTOPP_BOOL_X64
- #define K_REG r8
- #define K_END_REG r9
- #define SAVE_K
- #define RESTORE_K
- #define RESTORE_K_END
- #define SAVE_0(x) AS2(mov r13d, x)
- #define SAVE_1(x) AS2(mov r14d, x)
- #define SAVE_2(x) AS2(mov r15d, x)
- #define RESTORE_0(x) AS2(mov x, r13d)
- #define RESTORE_1(x) AS2(mov x, r14d)
- #define RESTORE_2(x) AS2(mov x, r15d)
- #else
- #define K_REG esi
- #define K_END_REG edi
- #define SAVE_K AS2(movd mm4, esi)
- #define RESTORE_K AS2(movd esi, mm4)
- #define RESTORE_K_END AS2(movd edi, mm5)
- #define SAVE_0(x) AS2(movd mm0, x)
- #define SAVE_1(x) AS2(movd mm1, x)
- #define SAVE_2(x) AS2(movd mm2, x)
- #define RESTORE_0(x) AS2(movd x, mm0)
- #define RESTORE_1(x) AS2(movd x, mm1)
- #define RESTORE_2(x) AS2(movd x, mm2)
- #endif
-#ifdef __GNUC__
- word32 t0, t1, t2, t3;
- __asm__ __volatile__
- (
- ".intel_syntax noprefix;"
- #if CRYPTOPP_BOOL_X64
- AS2( mov K_REG, rsi)
- AS2( mov K_END_REG, rcx)
- #else
- AS1( push ebx)
- AS1( push ebp)
- AS2( movd mm5, ecx)
- #endif
- AS2( mov AS_REG_7, WORD_REG(ax))
-#elif CRYPTOPP_BOOL_X86
- #if _MSC_VER < 1300
- const word32 *t = Te;
- AS2( mov eax, t)
- #endif
- AS2( mov edx, g_cacheLineSize)
- AS2( mov WORD_REG(di), inBlock)
- AS2( mov K_REG, k)
- AS2( movd mm5, kLoopEnd)
- #if _MSC_VER < 1300
- AS1( push ebx)
- AS1( push ebp)
- AS2( mov AS_REG_7, eax)
- #else
- AS1( push ebp)
- AS2( lea AS_REG_7, Te)
- #endif
-#endif
- AS2( mov eax, [K_REG+0*4]) // s0
- AS2( xor eax, [WORD_REG(di)+0*4])
- SAVE_0(eax)
- AS2( mov ebx, [K_REG+1*4])
- AS2( xor ebx, [WORD_REG(di)+1*4])
- SAVE_1(ebx)
- AS2( and ebx, eax)
- AS2( mov eax, [K_REG+2*4])
- AS2( xor eax, [WORD_REG(di)+2*4])
- SAVE_2(eax)
- AS2( and ebx, eax)
- AS2( mov ecx, [K_REG+3*4])
- AS2( xor ecx, [WORD_REG(di)+3*4])
- AS2( and ebx, ecx)
-
- // read Te0 into L1 cache. this code could be simplifed by using lfence, but that is an SSE2 instruction
- AS2( and ebx, 0)
- AS2( mov edi, ebx) // make index depend on previous loads to simulate lfence
- ASL(2)
- AS2( and ebx, [AS_REG_7+WORD_REG(di)])
- AS2( add edi, edx)
- AS2( and ebx, [AS_REG_7+WORD_REG(di)])
- AS2( add edi, edx)
- AS2( and ebx, [AS_REG_7+WORD_REG(di)])
- AS2( add edi, edx)
- AS2( and ebx, [AS_REG_7+WORD_REG(di)])
- AS2( add edi, edx)
- AS2( cmp edi, 1024)
- ASJ( jl, 2, b)
- AS2( and ebx, [AS_REG_7+1020])
-#if CRYPTOPP_BOOL_X64
- AS2( xor r13d, ebx)
- AS2( xor r14d, ebx)
- AS2( xor r15d, ebx)
-#else
- AS2( movd mm6, ebx)
- AS2( pxor mm2, mm6)
- AS2( pxor mm1, mm6)
- AS2( pxor mm0, mm6)
-#endif
- AS2( xor ecx, ebx)
-
- AS2( mov edi, [K_REG+4*4]) // t0
- AS2( mov eax, [K_REG+5*4])
- AS2( mov ebx, [K_REG+6*4])
- AS2( mov edx, [K_REG+7*4])
- AS2( add K_REG, 8*4)
- SAVE_K
-
-#define QUARTER_ROUND(t, a, b, c, d) \
- AS2(movzx esi, t##l)\
- AS2(d, [AS_REG_7+0*1024+4*WORD_REG(si)])\
- AS2(movzx esi, t##h)\
- AS2(c, [AS_REG_7+1*1024+4*WORD_REG(si)])\
- AS2(shr e##t##x, 16)\
- AS2(movzx esi, t##l)\
- AS2(b, [AS_REG_7+2*1024+4*WORD_REG(si)])\
- AS2(movzx esi, t##h)\
- AS2(a, [AS_REG_7+3*1024+4*WORD_REG(si)])
-
-#define s0 xor edi
-#define s1 xor eax
-#define s2 xor ebx
-#define s3 xor ecx
-#define t0 xor edi
-#define t1 xor eax
-#define t2 xor ebx
-#define t3 xor edx
-
- QUARTER_ROUND(c, t0, t1, t2, t3)
- RESTORE_2(ecx)
- QUARTER_ROUND(c, t3, t0, t1, t2)
- RESTORE_1(ecx)
- QUARTER_ROUND(c, t2, t3, t0, t1)
- RESTORE_0(ecx)
- QUARTER_ROUND(c, t1, t2, t3, t0)
- SAVE_2(ebx)
- SAVE_1(eax)
- SAVE_0(edi)
-#undef QUARTER_ROUND
-
- RESTORE_K
-
- ASL(0)
- AS2( mov edi, [K_REG+0*4])
- AS2( mov eax, [K_REG+1*4])
- AS2( mov ebx, [K_REG+2*4])
- AS2( mov ecx, [K_REG+3*4])
-
-#define QUARTER_ROUND(t, a, b, c, d) \
- AS2(movzx esi, t##l)\
- AS2(a, [AS_REG_7+3*1024+4*WORD_REG(si)])\
- AS2(movzx esi, t##h)\
- AS2(b, [AS_REG_7+2*1024+4*WORD_REG(si)])\
- AS2(shr e##t##x, 16)\
- AS2(movzx esi, t##l)\
- AS2(c, [AS_REG_7+1*1024+4*WORD_REG(si)])\
- AS2(movzx esi, t##h)\
- AS2(d, [AS_REG_7+0*1024+4*WORD_REG(si)])
-
- QUARTER_ROUND(d, s0, s1, s2, s3)
- RESTORE_2(edx)
- QUARTER_ROUND(d, s3, s0, s1, s2)
- RESTORE_1(edx)
- QUARTER_ROUND(d, s2, s3, s0, s1)
- RESTORE_0(edx)
- QUARTER_ROUND(d, s1, s2, s3, s0)
- RESTORE_K
- SAVE_2(ebx)
- SAVE_1(eax)
- SAVE_0(edi)
-
- AS2( mov edi, [K_REG+4*4])
- AS2( mov eax, [K_REG+5*4])
- AS2( mov ebx, [K_REG+6*4])
- AS2( mov edx, [K_REG+7*4])
-
- QUARTER_ROUND(c, t0, t1, t2, t3)
- RESTORE_2(ecx)
- QUARTER_ROUND(c, t3, t0, t1, t2)
- RESTORE_1(ecx)
- QUARTER_ROUND(c, t2, t3, t0, t1)
- RESTORE_0(ecx)
- QUARTER_ROUND(c, t1, t2, t3, t0)
- SAVE_2(ebx)
- SAVE_1(eax)
- SAVE_0(edi)
-
- RESTORE_K
- RESTORE_K_END
- AS2( add K_REG, 8*4)
- SAVE_K
- AS2( cmp K_END_REG, K_REG)
- ASJ( jne, 0, b)
-
-#undef QUARTER_ROUND
-#undef s0
-#undef s1
-#undef s2
-#undef s3
-#undef t0
-#undef t1
-#undef t2
-#undef t3
-
- AS2( mov eax, [K_END_REG+0*4])
- AS2( mov ecx, [K_END_REG+1*4])
- AS2( mov esi, [K_END_REG+2*4])
- AS2( mov edi, [K_END_REG+3*4])
-
-#define QUARTER_ROUND(a, b, c, d) \
- AS2( movzx ebx, dl)\
- AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
- AS2( shl ebx, 3*8)\
- AS2( xor a, ebx)\
- AS2( movzx ebx, dh)\
- AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
- AS2( shl ebx, 2*8)\
- AS2( xor b, ebx)\
- AS2( shr edx, 16)\
- AS2( movzx ebx, dl)\
- AS2( shr edx, 8)\
- AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(bx)])\
- AS2( shl ebx, 1*8)\
- AS2( xor c, ebx)\
- AS2( movzx ebx, BYTE PTR [AS_REG_7+1+4*WORD_REG(dx)])\
- AS2( xor d, ebx)
-
- QUARTER_ROUND(eax, ecx, esi, edi)
- RESTORE_2(edx)
- QUARTER_ROUND(edi, eax, ecx, esi)
- RESTORE_1(edx)
- QUARTER_ROUND(esi, edi, eax, ecx)
- RESTORE_0(edx)
- QUARTER_ROUND(ecx, esi, edi, eax)
-
-#undef QUARTER_ROUND
-
-#if CRYPTOPP_BOOL_X86
- AS1(emms)
- AS1(pop ebp)
- #if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER < 1300)
- AS1(pop ebx)
- #endif
-#endif
-
-#ifdef __GNUC__
- ".att_syntax prefix;"
- : "=a" (t0), "=c" (t1), "=S" (t2), "=D" (t3)
- : "a" (Te), "D" (inBlock), "S" (k), "c" (kLoopEnd), "d" (g_cacheLineSize)
- : "memory", "cc"
- #if CRYPTOPP_BOOL_X64
- , "%ebx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
- #endif
- );
-
- if (xorBlock)
- {
- t0 ^= ((const word32 *)xorBlock)[0];
- t1 ^= ((const word32 *)xorBlock)[1];
- t2 ^= ((const word32 *)xorBlock)[2];
- t3 ^= ((const word32 *)xorBlock)[3];
- }
- ((word32 *)outBlock)[0] = t0;
- ((word32 *)outBlock)[1] = t1;
- ((word32 *)outBlock)[2] = t2;
- ((word32 *)outBlock)[3] = t3;
-#else
- #if CRYPTOPP_BOOL_X64
- mov rbx, [rsp + 6*8 + 7*8] ; xorBlock
- #else
- AS2( mov ebx, xorBlock)
- #endif
- AS2( test WORD_REG(bx), WORD_REG(bx))
- ASJ( jz, 1, f)
- AS2( xor eax, [WORD_REG(bx)+0*4])
- AS2( xor ecx, [WORD_REG(bx)+1*4])
- AS2( xor esi, [WORD_REG(bx)+2*4])
- AS2( xor edi, [WORD_REG(bx)+3*4])
- ASL(1)
- #if CRYPTOPP_BOOL_X64
- mov rbx, [rsp + 7*8 + 7*8] ; outBlock
- #else
- AS2( mov ebx, outBlock)
- #endif
- AS2( mov [WORD_REG(bx)+0*4], eax)
- AS2( mov [WORD_REG(bx)+1*4], ecx)
- AS2( mov [WORD_REG(bx)+2*4], esi)
- AS2( mov [WORD_REG(bx)+3*4], edi)
-#endif
-
-#if CRYPTOPP_GENERATE_X64_MASM
- pop r15
- pop r14
- pop r13
- pop r12
- pop rdi
- pop rsi
- pop rbx
- ret
- Rijndael_Enc_ProcessAndXorBlock ENDP
-#else
- }
- else
-#endif
-#endif // #ifdef CRYPTOPP_X86_ASM_AVAILABLE
-#ifndef CRYPTOPP_GENERATE_X64_MASM
- {
- word32 s0, s1, s2, s3, t0, t1, t2, t3;
- const word32 *rk = m_key;
-
- s0 = ((const word32 *)inBlock)[0] ^ rk[0];
- s1 = ((const word32 *)inBlock)[1] ^ rk[1];
- s2 = ((const word32 *)inBlock)[2] ^ rk[2];
- s3 = ((const word32 *)inBlock)[3] ^ rk[3];
- t0 = rk[4];
- t1 = rk[5];
- t2 = rk[6];
- t3 = rk[7];
- rk += 8;
-
- // timing attack countermeasure. see comments at top for more details
- const int cacheLineSize = GetCacheLineSize();
- unsigned int i;
- word32 u = 0;
- for (i=0; i<1024; i+=cacheLineSize)
- u &= *(const word32 *)(((const byte *)Te)+i);
- u &= Te[255];
- s0 |= u; s1 |= u; s2 |= u; s3 |= u;
-
- // first round
-#ifdef IS_BIG_ENDIAN
-#define QUARTER_ROUND(t, a, b, c, d) \
- a ^= rotrFixed(Te[byte(t)], 24); t >>= 8;\
- b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\
- c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
- d ^= Te[t];
-#else
-#define QUARTER_ROUND(t, a, b, c, d) \
- d ^= Te[byte(t)]; t >>= 8;\
- c ^= rotrFixed(Te[byte(t)], 8); t >>= 8;\
- b ^= rotrFixed(Te[byte(t)], 16); t >>= 8;\
- a ^= rotrFixed(Te[t], 24);
-#endif
-
- QUARTER_ROUND(s3, t0, t1, t2, t3)
- QUARTER_ROUND(s2, t3, t0, t1, t2)
- QUARTER_ROUND(s1, t2, t3, t0, t1)
- QUARTER_ROUND(s0, t1, t2, t3, t0)
-#undef QUARTER_ROUND
-
- // Nr - 2 full rounds:
- unsigned int r = m_rounds/2 - 1;
- do
- {
-#define QUARTER_ROUND(t, a, b, c, d) \
- a ^= Te[3*256+byte(t)]; t >>= 8;\
- b ^= Te[2*256+byte(t)]; t >>= 8;\
- c ^= Te[1*256+byte(t)]; t >>= 8;\
- d ^= Te[t];
-
- s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
-
- QUARTER_ROUND(t3, s0, s1, s2, s3)
- QUARTER_ROUND(t2, s3, s0, s1, s2)
- QUARTER_ROUND(t1, s2, s3, s0, s1)
- QUARTER_ROUND(t0, s1, s2, s3, s0)
-
- t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
-
- QUARTER_ROUND(s3, t0, t1, t2, t3)
- QUARTER_ROUND(s2, t3, t0, t1, t2)
- QUARTER_ROUND(s1, t2, t3, t0, t1)
- QUARTER_ROUND(s0, t1, t2, t3, t0)
-#undef QUARTER_ROUND
-
- rk += 8;
- } while (--r);
-
- // timing attack countermeasure. see comments at top for more details
- u = 0;
- for (i=0; i<256; i+=cacheLineSize)
- u &= *(const word32 *)(Se+i);
- u &= *(const word32 *)(Se+252);
- t0 |= u; t1 |= u; t2 |= u; t3 |= u;
-
- word32 tbw[4];
- byte *const tempBlock = (byte *)tbw;
- word32 *const obw = (word32 *)outBlock;
- const word32 *const xbw = (const word32 *)xorBlock;
-
-#define QUARTER_ROUND(t, a, b, c, d) \
- tempBlock[a] = Se[byte(t)]; t >>= 8;\
- tempBlock[b] = Se[byte(t)]; t >>= 8;\
- tempBlock[c] = Se[byte(t)]; t >>= 8;\
- tempBlock[d] = Se[t];
-
- QUARTER_ROUND(t2, 15, 2, 5, 8)
- QUARTER_ROUND(t1, 11, 14, 1, 4)
- QUARTER_ROUND(t0, 7, 10, 13, 0)
- QUARTER_ROUND(t3, 3, 6, 9, 12)
-#undef QUARTER_ROUND
-
- if (xbw)
- {
- obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
- obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
- obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
- obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
- }
- else
- {
- obw[0] = tbw[0] ^ rk[0];
- obw[1] = tbw[1] ^ rk[1];
- obw[2] = tbw[2] ^ rk[2];
- obw[3] = tbw[3] ^ rk[3];
- }
- }
-}
-
-void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
-{
- word32 s0, s1, s2, s3, t0, t1, t2, t3;
- const word32 *rk = m_key;
-
- s0 = ((const word32 *)inBlock)[0] ^ rk[0];
- s1 = ((const word32 *)inBlock)[1] ^ rk[1];
- s2 = ((const word32 *)inBlock)[2] ^ rk[2];
- s3 = ((const word32 *)inBlock)[3] ^ rk[3];
- t0 = rk[4];
- t1 = rk[5];
- t2 = rk[6];
- t3 = rk[7];
- rk += 8;
-
- // timing attack countermeasure. see comments at top for more details
- const int cacheLineSize = GetCacheLineSize();
- unsigned int i;
- word32 u = 0;
- for (i=0; i<1024; i+=cacheLineSize)
- u &= *(const word32 *)(((const byte *)Td)+i);
- u &= Td[255];
- s0 |= u; s1 |= u; s2 |= u; s3 |= u;
-
- // first round
-#ifdef IS_BIG_ENDIAN
-#define QUARTER_ROUND(t, a, b, c, d) \
- a ^= rotrFixed(Td[byte(t)], 24); t >>= 8;\
- b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\
- c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\
- d ^= Td[t];
-#else
-#define QUARTER_ROUND(t, a, b, c, d) \
- d ^= Td[byte(t)]; t >>= 8;\
- c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\
- b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\
- a ^= rotrFixed(Td[t], 24);
-#endif
-
- QUARTER_ROUND(s3, t2, t1, t0, t3)
- QUARTER_ROUND(s2, t1, t0, t3, t2)
- QUARTER_ROUND(s1, t0, t3, t2, t1)
- QUARTER_ROUND(s0, t3, t2, t1, t0)
-#undef QUARTER_ROUND
-
- // Nr - 2 full rounds:
- unsigned int r = m_rounds/2 - 1;
- do
- {
-#define QUARTER_ROUND(t, a, b, c, d) \
- a ^= Td[3*256+byte(t)]; t >>= 8;\
- b ^= Td[2*256+byte(t)]; t >>= 8;\
- c ^= Td[1*256+byte(t)]; t >>= 8;\
- d ^= Td[t];
-
- s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
-
- QUARTER_ROUND(t3, s2, s1, s0, s3)
- QUARTER_ROUND(t2, s1, s0, s3, s2)
- QUARTER_ROUND(t1, s0, s3, s2, s1)
- QUARTER_ROUND(t0, s3, s2, s1, s0)
-
- t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
-
- QUARTER_ROUND(s3, t2, t1, t0, t3)
- QUARTER_ROUND(s2, t1, t0, t3, t2)
- QUARTER_ROUND(s1, t0, t3, t2, t1)
- QUARTER_ROUND(s0, t3, t2, t1, t0)
-#undef QUARTER_ROUND
-
- rk += 8;
- } while (--r);
-
- // timing attack countermeasure. see comments at top for more details
- u = 0;
- for (i=0; i<256; i+=cacheLineSize)
- u &= *(const word32 *)(Sd+i);
- u &= *(const word32 *)(Sd+252);
- t0 |= u; t1 |= u; t2 |= u; t3 |= u;
-
- word32 tbw[4];
- byte *const tempBlock = (byte *)tbw;
- word32 *const obw = (word32 *)outBlock;
- const word32 *const xbw = (const word32 *)xorBlock;
-
-#define QUARTER_ROUND(t, a, b, c, d) \
- tempBlock[a] = Sd[byte(t)]; t >>= 8;\
- tempBlock[b] = Sd[byte(t)]; t >>= 8;\
- tempBlock[c] = Sd[byte(t)]; t >>= 8;\
- tempBlock[d] = Sd[t];
-
- QUARTER_ROUND(t2, 7, 2, 13, 8)
- QUARTER_ROUND(t1, 3, 14, 9, 4)
- QUARTER_ROUND(t0, 15, 10, 5, 0)
- QUARTER_ROUND(t3, 11, 6, 1, 12)
-#undef QUARTER_ROUND
-
- if (xbw)
- {
- obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
- obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
- obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
- obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
- }
- else
- {
- obw[0] = tbw[0] ^ rk[0];
- obw[1] = tbw[1] ^ rk[1];
- obw[2] = tbw[2] ^ rk[2];
- obw[3] = tbw[3] ^ rk[3];
- }
-}
-
-NAMESPACE_END
-
-#endif
-#endif