summaryrefslogtreecommitdiff
path: root/plugins/MirOTR/Libgcrypt/cipher/serpent.c
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/MirOTR/Libgcrypt/cipher/serpent.c')
-rw-r--r--plugins/MirOTR/Libgcrypt/cipher/serpent.c1342
1 files changed, 844 insertions, 498 deletions
diff --git a/plugins/MirOTR/Libgcrypt/cipher/serpent.c b/plugins/MirOTR/Libgcrypt/cipher/serpent.c
index 6b7e655a98..0be49da477 100644
--- a/plugins/MirOTR/Libgcrypt/cipher/serpent.c
+++ b/plugins/MirOTR/Libgcrypt/cipher/serpent.c
@@ -28,6 +28,33 @@
#include "g10lib.h"
#include "cipher.h"
#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+
+
+/* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
+#undef USE_SSE2
+#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+# define USE_SSE2 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+# if defined(ENABLE_AVX2_SUPPORT)
+# define USE_AVX2 1
+# endif
+#endif
+
+/* USE_NEON indicates whether to enable ARM NEON assembly code. */
+#undef USE_NEON
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_NEON)
+# define USE_NEON 1
+# endif
+#endif /*ENABLE_NEON_SUPPORT*/
/* Number of rounds per Serpent encrypt/decrypt operation. */
#define ROUNDS 32
@@ -49,415 +76,378 @@ typedef u32 serpent_subkeys_t[ROUNDS + 1][4];
typedef struct serpent_context
{
serpent_subkeys_t keys; /* Generated subkeys. */
+
+#ifdef USE_AVX2
+ int use_avx2;
+#endif
+#ifdef USE_NEON
+ int use_neon;
+#endif
} serpent_context_t;
-/* A prototype. */
-static const char *serpent_test (void);
-
+#ifdef USE_SSE2
+/* Assembler implementations of Serpent using SSE2. Process 8 block in
+ parallel.
+ */
+extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *ctr);
+
+extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv);
+
+extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv);
+#endif
-#define byte_swap_32(x) \
- (0 \
- | (((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) \
- | (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
+#ifdef USE_AVX2
+/* Assembler implementations of Serpent using SSE2. Process 16 block in
+ parallel.
+ */
+extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *ctr);
+
+extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv);
+
+extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv);
+#endif
-/* These are the S-Boxes of Serpent. They are copied from Serpents
- reference implementation (the optimized one, contained in
- `floppy2') and are therefore:
+#ifdef USE_NEON
+/* Assembler implementations of Serpent using ARM NEON. Process 8 block in
+ parallel.
+ */
+extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *ctr);
+
+extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv);
+
+extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx,
+ unsigned char *out,
+ const unsigned char *in,
+ unsigned char *iv);
+#endif
- Copyright (C) 1998 Ross Anderson, Eli Biham, Lars Knudsen.
- To quote the Serpent homepage
- (http://www.cl.cam.ac.uk/~rja14/serpent.html):
+/* A prototype. */
+static const char *serpent_test (void);
- "Serpent is now completely in the public domain, and we impose no
- restrictions on its use. This was announced on the 21st August at
- the First AES Candidate Conference. The optimised implementations
- in the submission package are now under the GNU PUBLIC LICENSE
- (GPL), although some comments in the code still say otherwise. You
- are welcome to use Serpent for any application." */
-#define SBOX0(a, b, c, d, w, x, y, z) \
+/*
+ * These are the S-Boxes of Serpent from following research paper.
+ *
+ * D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
+ * (New York, New York, USA), p. 317–329, National Institute of Standards and
+ * Technology, 2000.
+ *
+ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
+ *
+ */
+
+#define SBOX0(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t05, t06, t07, t08, t09; \
- u32 t11, t12, t13, t14, t15, t17, t01; \
- t01 = b ^ c ; \
- t02 = a | d ; \
- t03 = a ^ b ; \
- z = t02 ^ t01; \
- t05 = c | z ; \
- t06 = a ^ d ; \
- t07 = b | c ; \
- t08 = d & t05; \
- t09 = t03 & t07; \
- y = t09 ^ t08; \
- t11 = t09 & y ; \
- t12 = c ^ d ; \
- t13 = t07 ^ t11; \
- t14 = b & t06; \
- t15 = t06 ^ t13; \
- w = ~ t15; \
- t17 = w ^ t14; \
- x = t12 ^ t17; \
+ u32 r4; \
+ \
+ r3 ^= r0; r4 = r1; \
+ r1 &= r3; r4 ^= r2; \
+ r1 ^= r0; r0 |= r3; \
+ r0 ^= r4; r4 ^= r3; \
+ r3 ^= r2; r2 |= r1; \
+ r2 ^= r4; r4 = ~r4; \
+ r4 |= r1; r1 ^= r3; \
+ r1 ^= r4; r3 |= r0; \
+ r1 ^= r3; r4 ^= r3; \
+ \
+ w = r1; x = r4; y = r2; z = r0; \
}
-#define SBOX0_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t06, t08, t09, t10; \
- u32 t12, t13, t14, t15, t17, t18, t01; \
- t01 = c ^ d ; \
- t02 = a | b ; \
- t03 = b | c ; \
- t04 = c & t01; \
- t05 = t02 ^ t01; \
- t06 = a | t04; \
- y = ~ t05; \
- t08 = b ^ d ; \
- t09 = t03 & t08; \
- t10 = d | y ; \
- x = t09 ^ t06; \
- t12 = a | t05; \
- t13 = x ^ t12; \
- t14 = t03 ^ t10; \
- t15 = a ^ c ; \
- z = t14 ^ t13; \
- t17 = t05 & t13; \
- t18 = t14 | t17; \
- w = t15 ^ t18; \
+ u32 r4; \
+ \
+ r2 = ~r2; r4 = r1; \
+ r1 |= r0; r4 = ~r4; \
+ r1 ^= r2; r2 |= r4; \
+ r1 ^= r3; r0 ^= r4; \
+ r2 ^= r0; r0 &= r3; \
+ r4 ^= r0; r0 |= r1; \
+ r0 ^= r2; r3 ^= r4; \
+ r2 ^= r1; r3 ^= r0; \
+ r3 ^= r1; \
+ r2 &= r3; \
+ r4 ^= r2; \
+ \
+ w = r0; x = r4; y = r1; z = r3; \
}
-#define SBOX1(a, b, c, d, w, x, y, z) \
+#define SBOX1(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t06, t07, t08; \
- u32 t10, t11, t12, t13, t16, t17, t01; \
- t01 = a | d ; \
- t02 = c ^ d ; \
- t03 = ~ b ; \
- t04 = a ^ c ; \
- t05 = a | t03; \
- t06 = d & t04; \
- t07 = t01 & t02; \
- t08 = b | t06; \
- y = t02 ^ t05; \
- t10 = t07 ^ t08; \
- t11 = t01 ^ t10; \
- t12 = y ^ t11; \
- t13 = b & d ; \
- z = ~ t10; \
- x = t13 ^ t12; \
- t16 = t10 | x ; \
- t17 = t05 & t16; \
- w = c ^ t17; \
+ u32 r4; \
+ \
+ r0 = ~r0; r2 = ~r2; \
+ r4 = r0; r0 &= r1; \
+ r2 ^= r0; r0 |= r3; \
+ r3 ^= r2; r1 ^= r0; \
+ r0 ^= r4; r4 |= r1; \
+ r1 ^= r3; r2 |= r0; \
+ r2 &= r4; r0 ^= r1; \
+ r1 &= r2; \
+ r1 ^= r0; r0 &= r2; \
+ r0 ^= r4; \
+ \
+ w = r2; x = r0; y = r3; z = r1; \
}
-#define SBOX1_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t06, t07, t08; \
- u32 t09, t10, t11, t14, t15, t17, t01; \
- t01 = a ^ b ; \
- t02 = b | d ; \
- t03 = a & c ; \
- t04 = c ^ t02; \
- t05 = a | t04; \
- t06 = t01 & t05; \
- t07 = d | t03; \
- t08 = b ^ t06; \
- t09 = t07 ^ t06; \
- t10 = t04 | t03; \
- t11 = d & t08; \
- y = ~ t09; \
- x = t10 ^ t11; \
- t14 = a | y ; \
- t15 = t06 ^ x ; \
- z = t01 ^ t04; \
- t17 = c ^ t15; \
- w = t14 ^ t17; \
+ u32 r4; \
+ \
+ r4 = r1; r1 ^= r3; \
+ r3 &= r1; r4 ^= r2; \
+ r3 ^= r0; r0 |= r1; \
+ r2 ^= r3; r0 ^= r4; \
+ r0 |= r2; r1 ^= r3; \
+ r0 ^= r1; r1 |= r3; \
+ r1 ^= r0; r4 = ~r4; \
+ r4 ^= r1; r1 |= r0; \
+ r1 ^= r0; \
+ r1 |= r4; \
+ r3 ^= r1; \
+ \
+ w = r4; x = r0; y = r3; z = r2; \
}
-#define SBOX2(a, b, c, d, w, x, y, z) \
+#define SBOX2(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t05, t06, t07, t08; \
- u32 t09, t10, t12, t13, t14, t01; \
- t01 = a | c ; \
- t02 = a ^ b ; \
- t03 = d ^ t01; \
- w = t02 ^ t03; \
- t05 = c ^ w ; \
- t06 = b ^ t05; \
- t07 = b | t05; \
- t08 = t01 & t06; \
- t09 = t03 ^ t07; \
- t10 = t02 | t09; \
- x = t10 ^ t08; \
- t12 = a | d ; \
- t13 = t09 ^ x ; \
- t14 = b ^ t13; \
- z = ~ t09; \
- y = t12 ^ t14; \
+ u32 r4; \
+ \
+ r4 = r0; r0 &= r2; \
+ r0 ^= r3; r2 ^= r1; \
+ r2 ^= r0; r3 |= r4; \
+ r3 ^= r1; r4 ^= r2; \
+ r1 = r3; r3 |= r4; \
+ r3 ^= r0; r0 &= r1; \
+ r4 ^= r0; r1 ^= r3; \
+ r1 ^= r4; r4 = ~r4; \
+ \
+ w = r2; x = r3; y = r1; z = r4; \
}
-#define SBOX2_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t06, t07, t08, t09; \
- u32 t10, t11, t12, t15, t16, t17, t01; \
- t01 = a ^ d ; \
- t02 = c ^ d ; \
- t03 = a & c ; \
- t04 = b | t02; \
- w = t01 ^ t04; \
- t06 = a | c ; \
- t07 = d | w ; \
- t08 = ~ d ; \
- t09 = b & t06; \
- t10 = t08 | t03; \
- t11 = b & t07; \
- t12 = t06 & t02; \
- z = t09 ^ t10; \
- x = t12 ^ t11; \
- t15 = c & z ; \
- t16 = w ^ x ; \
- t17 = t10 ^ t15; \
- y = t16 ^ t17; \
+ u32 r4; \
+ \
+ r2 ^= r3; r3 ^= r0; \
+ r4 = r3; r3 &= r2; \
+ r3 ^= r1; r1 |= r2; \
+ r1 ^= r4; r4 &= r3; \
+ r2 ^= r3; r4 &= r0; \
+ r4 ^= r2; r2 &= r1; \
+ r2 |= r0; r3 = ~r3; \
+ r2 ^= r3; r0 ^= r3; \
+ r0 &= r1; r3 ^= r4; \
+ r3 ^= r0; \
+ \
+ w = r1; x = r4; y = r2; z = r3; \
}
-#define SBOX3(a, b, c, d, w, x, y, z) \
+#define SBOX3(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t06, t07, t08; \
- u32 t09, t10, t11, t13, t14, t15, t01; \
- t01 = a ^ c ; \
- t02 = a | d ; \
- t03 = a & d ; \
- t04 = t01 & t02; \
- t05 = b | t03; \
- t06 = a & b ; \
- t07 = d ^ t04; \
- t08 = c | t06; \
- t09 = b ^ t07; \
- t10 = d & t05; \
- t11 = t02 ^ t10; \
- z = t08 ^ t09; \
- t13 = d | z ; \
- t14 = a | t07; \
- t15 = b & t13; \
- y = t08 ^ t11; \
- w = t14 ^ t15; \
- x = t05 ^ t04; \
+ u32 r4; \
+ \
+ r4 = r0; r0 |= r3; \
+ r3 ^= r1; r1 &= r4; \
+ r4 ^= r2; r2 ^= r3; \
+ r3 &= r0; r4 |= r1; \
+ r3 ^= r4; r0 ^= r1; \
+ r4 &= r0; r1 ^= r3; \
+ r4 ^= r2; r1 |= r0; \
+ r1 ^= r2; r0 ^= r3; \
+ r2 = r1; r1 |= r3; \
+ r1 ^= r0; \
+ \
+ w = r1; x = r2; y = r3; z = r4; \
}
-#define SBOX3_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t06, t07, t09; \
- u32 t11, t12, t13, t14, t16, t01; \
- t01 = c | d ; \
- t02 = a | d ; \
- t03 = c ^ t02; \
- t04 = b ^ t02; \
- t05 = a ^ d ; \
- t06 = t04 & t03; \
- t07 = b & t01; \
- y = t05 ^ t06; \
- t09 = a ^ t03; \
- w = t07 ^ t03; \
- t11 = w | t05; \
- t12 = t09 & t11; \
- t13 = a & y ; \
- t14 = t01 ^ t05; \
- x = b ^ t12; \
- t16 = b | t13; \
- z = t14 ^ t16; \
+ u32 r4; \
+ \
+ r4 = r2; r2 ^= r1; \
+ r0 ^= r2; r4 &= r2; \
+ r4 ^= r0; r0 &= r1; \
+ r1 ^= r3; r3 |= r4; \
+ r2 ^= r3; r0 ^= r3; \
+ r1 ^= r4; r3 &= r2; \
+ r3 ^= r1; r1 ^= r0; \
+ r1 |= r2; r0 ^= r3; \
+ r1 ^= r4; \
+ r0 ^= r1; \
+ \
+ w = r2; x = r1; y = r3; z = r0; \
}
-#define SBOX4(a, b, c, d, w, x, y, z) \
+#define SBOX4(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t06, t08, t09; \
- u32 t10, t11, t12, t13, t14, t15, t16, t01; \
- t01 = a | b ; \
- t02 = b | c ; \
- t03 = a ^ t02; \
- t04 = b ^ d ; \
- t05 = d | t03; \
- t06 = d & t01; \
- z = t03 ^ t06; \
- t08 = z & t04; \
- t09 = t04 & t05; \
- t10 = c ^ t06; \
- t11 = b & c ; \
- t12 = t04 ^ t08; \
- t13 = t11 | t03; \
- t14 = t10 ^ t09; \
- t15 = a & t05; \
- t16 = t11 | t12; \
- y = t13 ^ t08; \
- x = t15 ^ t16; \
- w = ~ t14; \
+ u32 r4; \
+ \
+ r1 ^= r3; r3 = ~r3; \
+ r2 ^= r3; r3 ^= r0; \
+ r4 = r1; r1 &= r3; \
+ r1 ^= r2; r4 ^= r3; \
+ r0 ^= r4; r2 &= r4; \
+ r2 ^= r0; r0 &= r1; \
+ r3 ^= r0; r4 |= r1; \
+ r4 ^= r0; r0 |= r3; \
+ r0 ^= r2; r2 &= r3; \
+ r0 = ~r0; r4 ^= r2; \
+ \
+ w = r1; x = r4; y = r0; z = r3; \
}
-#define SBOX4_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t06, t07, t09; \
- u32 t10, t11, t12, t13, t15, t01; \
- t01 = b | d ; \
- t02 = c | d ; \
- t03 = a & t01; \
- t04 = b ^ t02; \
- t05 = c ^ d ; \
- t06 = ~ t03; \
- t07 = a & t04; \
- x = t05 ^ t07; \
- t09 = x | t06; \
- t10 = a ^ t07; \
- t11 = t01 ^ t09; \
- t12 = d ^ t04; \
- t13 = c | t10; \
- z = t03 ^ t12; \
- t15 = a ^ t04; \
- y = t11 ^ t13; \
- w = t15 ^ t09; \
+ u32 r4; \
+ \
+ r4 = r2; r2 &= r3; \
+ r2 ^= r1; r1 |= r3; \
+ r1 &= r0; r4 ^= r2; \
+ r4 ^= r1; r1 &= r2; \
+ r0 = ~r0; r3 ^= r4; \
+ r1 ^= r3; r3 &= r0; \
+ r3 ^= r2; r0 ^= r1; \
+ r2 &= r0; r3 ^= r0; \
+ r2 ^= r4; \
+ r2 |= r3; r3 ^= r0; \
+ r2 ^= r1; \
+ \
+ w = r0; x = r3; y = r2; z = r4; \
}
-#define SBOX5(a, b, c, d, w, x, y, z) \
+#define SBOX5(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t07, t08, t09; \
- u32 t10, t11, t12, t13, t14, t01; \
- t01 = b ^ d ; \
- t02 = b | d ; \
- t03 = a & t01; \
- t04 = c ^ t02; \
- t05 = t03 ^ t04; \
- w = ~ t05; \
- t07 = a ^ t01; \
- t08 = d | w ; \
- t09 = b | t05; \
- t10 = d ^ t08; \
- t11 = b | t07; \
- t12 = t03 | w ; \
- t13 = t07 | t10; \
- t14 = t01 ^ t11; \
- y = t09 ^ t13; \
- x = t07 ^ t08; \
- z = t12 ^ t14; \
+ u32 r4; \
+ \
+ r0 ^= r1; r1 ^= r3; \
+ r3 = ~r3; r4 = r1; \
+ r1 &= r0; r2 ^= r3; \
+ r1 ^= r2; r2 |= r4; \
+ r4 ^= r3; r3 &= r1; \
+ r3 ^= r0; r4 ^= r1; \
+ r4 ^= r2; r2 ^= r0; \
+ r0 &= r3; r2 = ~r2; \
+ r0 ^= r4; r4 |= r3; \
+ r2 ^= r4; \
+ \
+ w = r1; x = r3; y = r0; z = r2; \
}
-#define SBOX5_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t07, t08, t09; \
- u32 t10, t12, t13, t15, t16, t01; \
- t01 = a & d ; \
- t02 = c ^ t01; \
- t03 = a ^ d ; \
- t04 = b & t02; \
- t05 = a & c ; \
- w = t03 ^ t04; \
- t07 = a & w ; \
- t08 = t01 ^ w ; \
- t09 = b | t05; \
- t10 = ~ b ; \
- x = t08 ^ t09; \
- t12 = t10 | t07; \
- t13 = w | x ; \
- z = t02 ^ t12; \
- t15 = t02 ^ t13; \
- t16 = b ^ d ; \
- y = t16 ^ t15; \
+ u32 r4; \
+ \
+ r1 = ~r1; r4 = r3; \
+ r2 ^= r1; r3 |= r0; \
+ r3 ^= r2; r2 |= r1; \
+ r2 &= r0; r4 ^= r3; \
+ r2 ^= r4; r4 |= r0; \
+ r4 ^= r1; r1 &= r2; \
+ r1 ^= r3; r4 ^= r2; \
+ r3 &= r4; r4 ^= r1; \
+ r3 ^= r4; r4 = ~r4; \
+ r3 ^= r0; \
+ \
+ w = r1; x = r4; y = r3; z = r2; \
}
-#define SBOX6(a, b, c, d, w, x, y, z) \
+#define SBOX6(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t07, t08, t09, t10; \
- u32 t11, t12, t13, t15, t17, t18, t01; \
- t01 = a & d ; \
- t02 = b ^ c ; \
- t03 = a ^ d ; \
- t04 = t01 ^ t02; \
- t05 = b | c ; \
- x = ~ t04; \
- t07 = t03 & t05; \
- t08 = b & x ; \
- t09 = a | c ; \
- t10 = t07 ^ t08; \
- t11 = b | d ; \
- t12 = c ^ t11; \
- t13 = t09 ^ t10; \
- y = ~ t13; \
- t15 = x & t03; \
- z = t12 ^ t07; \
- t17 = a ^ b ; \
- t18 = y ^ t15; \
- w = t17 ^ t18; \
+ u32 r4; \
+ \
+ r2 = ~r2; r4 = r3; \
+ r3 &= r0; r0 ^= r4; \
+ r3 ^= r2; r2 |= r4; \
+ r1 ^= r3; r2 ^= r0; \
+ r0 |= r1; r2 ^= r1; \
+ r4 ^= r0; r0 |= r3; \
+ r0 ^= r2; r4 ^= r3; \
+ r4 ^= r0; r3 = ~r3; \
+ r2 &= r4; \
+ r2 ^= r3; \
+ \
+ w = r0; x = r1; y = r4; z = r2; \
}
-#define SBOX6_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t06, t07, t08, t09; \
- u32 t12, t13, t14, t15, t16, t17, t01; \
- t01 = a ^ c ; \
- t02 = ~ c ; \
- t03 = b & t01; \
- t04 = b | t02; \
- t05 = d | t03; \
- t06 = b ^ d ; \
- t07 = a & t04; \
- t08 = a | t02; \
- t09 = t07 ^ t05; \
- x = t06 ^ t08; \
- w = ~ t09; \
- t12 = b & w ; \
- t13 = t01 & t05; \
- t14 = t01 ^ t12; \
- t15 = t07 ^ t13; \
- t16 = d | t02; \
- t17 = a ^ x ; \
- z = t17 ^ t15; \
- y = t16 ^ t14; \
+ u32 r4; \
+ \
+ r0 ^= r2; r4 = r2; \
+ r2 &= r0; r4 ^= r3; \
+ r2 = ~r2; r3 ^= r1; \
+ r2 ^= r3; r4 |= r0; \
+ r0 ^= r2; r3 ^= r4; \
+ r4 ^= r1; r1 &= r3; \
+ r1 ^= r0; r0 ^= r3; \
+ r0 |= r2; r3 ^= r1; \
+ r4 ^= r0; \
+ \
+ w = r1; x = r2; y = r4; z = r3; \
}
-#define SBOX7(a, b, c, d, w, x, y, z) \
+#define SBOX7(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t05, t06, t08, t09, t10; \
- u32 t11, t13, t14, t15, t16, t17, t01; \
- t01 = a & c ; \
- t02 = ~ d ; \
- t03 = a & t02; \
- t04 = b | t01; \
- t05 = a & b ; \
- t06 = c ^ t04; \
- z = t03 ^ t06; \
- t08 = c | z ; \
- t09 = d | t05; \
- t10 = a ^ t08; \
- t11 = t04 & z ; \
- x = t09 ^ t10; \
- t13 = b ^ x ; \
- t14 = t01 ^ x ; \
- t15 = c ^ t05; \
- t16 = t11 | t13; \
- t17 = t02 | t14; \
- w = t15 ^ t17; \
- y = a ^ t16; \
+ u32 r4; \
+ \
+ r4 = r1; r1 |= r2; \
+ r1 ^= r3; r4 ^= r2; \
+ r2 ^= r1; r3 |= r4; \
+ r3 &= r0; r4 ^= r2; \
+ r3 ^= r1; r1 |= r4; \
+ r1 ^= r0; r0 |= r4; \
+ r0 ^= r2; r1 ^= r4; \
+ r2 ^= r1; r1 &= r0; \
+ r1 ^= r4; r2 = ~r2; \
+ r2 |= r0; \
+ r4 ^= r2; \
+ \
+ w = r4; x = r3; y = r1; z = r0; \
}
-#define SBOX7_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \
{ \
- u32 t02, t03, t04, t06, t07, t08, t09; \
- u32 t10, t11, t13, t14, t15, t16, t01; \
- t01 = a & b ; \
- t02 = a | b ; \
- t03 = c | t01; \
- t04 = d & t02; \
- z = t03 ^ t04; \
- t06 = b ^ t04; \
- t07 = d ^ z ; \
- t08 = ~ t07; \
- t09 = t06 | t08; \
- t10 = b ^ d ; \
- t11 = a | d ; \
- x = a ^ t09; \
- t13 = c ^ t06; \
- t14 = c & t11; \
- t15 = d | x ; \
- t16 = t01 | t10; \
- w = t13 ^ t15; \
- y = t14 ^ t16; \
+ u32 r4; \
+ \
+ r4 = r2; r2 ^= r0; \
+ r0 &= r3; r4 |= r3; \
+ r2 = ~r2; r3 ^= r1; \
+ r1 |= r0; r0 ^= r2; \
+ r2 &= r4; r3 &= r4; \
+ r1 ^= r2; r2 ^= r0; \
+ r0 |= r2; r4 ^= r1; \
+ r0 ^= r3; r3 ^= r4; \
+ r4 |= r0; r3 ^= r2; \
+ r4 ^= r2; \
+ \
+ w = r3; x = r0; y = r1; z = r4; \
}
/* XOR BLOCK1 into BLOCK0. */
@@ -478,23 +468,17 @@ static const char *serpent_test (void);
block_dst[3] = block_src[3]; \
}
-/* Apply SBOX number WHICH to to the block found in ARRAY0 at index
- INDEX, writing the output to the block found in ARRAY1 at index
- INDEX. */
-#define SBOX(which, array0, array1, index) \
- SBOX##which (array0[index + 0], array0[index + 1], \
- array0[index + 2], array0[index + 3], \
- array1[index + 0], array1[index + 1], \
- array1[index + 2], array1[index + 3]);
-
-/* Apply inverse SBOX number WHICH to to the block found in ARRAY0 at
- index INDEX, writing the output to the block found in ARRAY1 at
- index INDEX. */
-#define SBOX_INVERSE(which, array0, array1, index) \
- SBOX##which##_INVERSE (array0[index + 0], array0[index + 1], \
- array0[index + 2], array0[index + 3], \
- array1[index + 0], array1[index + 1], \
- array1[index + 2], array1[index + 3]);
+/* Apply SBOX number WHICH to to the block found in ARRAY0, writing
+ the output to the block found in ARRAY1. */
+#define SBOX(which, array0, array1) \
+ SBOX##which (array0[0], array0[1], array0[2], array0[3], \
+ array1[0], array1[1], array1[2], array1[3]);
+
+/* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing
+ the output to the block found in ARRAY1. */
+#define SBOX_INVERSE(which, array0, array1) \
+ SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3], \
+ array1[0], array1[1], array1[2], array1[3]);
/* Apply the linear transformation to BLOCK. */
#define LINEAR_TRANSFORMATION(block) \
@@ -533,7 +517,7 @@ static const char *serpent_test (void);
{ \
BLOCK_XOR (block, subkeys[round]); \
round++; \
- SBOX (which, block, block_tmp, 0); \
+ SBOX (which, block, block_tmp); \
LINEAR_TRANSFORMATION (block_tmp); \
BLOCK_COPY (block, block_tmp); \
}
@@ -546,7 +530,7 @@ static const char *serpent_test (void);
{ \
BLOCK_XOR (block, subkeys[round]); \
round++; \
- SBOX (which, block, block_tmp, 0); \
+ SBOX (which, block, block_tmp); \
BLOCK_XOR (block_tmp, subkeys[round]); \
round++; \
}
@@ -557,7 +541,7 @@ static const char *serpent_test (void);
#define ROUND_INVERSE(which, subkey, block, block_tmp) \
{ \
LINEAR_TRANSFORMATION_INVERSE (block); \
- SBOX_INVERSE (which, block, block_tmp, 0); \
+ SBOX_INVERSE (which, block, block_tmp); \
BLOCK_XOR (block_tmp, subkey[round]); \
round--; \
BLOCK_COPY (block, block_tmp); \
@@ -571,7 +555,7 @@ static const char *serpent_test (void);
{ \
BLOCK_XOR (block, subkeys[round]); \
round--; \
- SBOX_INVERSE (which, block, block_tmp, 0); \
+ SBOX_INVERSE (which, block, block_tmp); \
BLOCK_XOR (block_tmp, subkeys[round]); \
round--; \
}
@@ -585,14 +569,9 @@ serpent_key_prepare (const byte *key, unsigned int key_length,
int i;
/* Copy key. */
- for (i = 0; i < key_length / 4; i++)
- {
-#ifdef WORDS_BIGENDIAN
- key_prepared[i] = byte_swap_32 (((u32 *) key)[i]);
-#else
- key_prepared[i] = ((u32 *) key)[i];
-#endif
- }
+ key_length /= 4;
+ for (i = 0; i < key_length; i++)
+ key_prepared[i] = buf_get_le32 (key + i * 4);
if (i < 8)
{
@@ -609,58 +588,57 @@ serpent_key_prepare (const byte *key, unsigned int key_length,
static void
serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys)
{
- u32 w_real[140]; /* The `prekey'. */
- u32 k[132];
- u32 *w = &w_real[8];
- int i, j;
+ u32 w[8]; /* The `prekey'. */
+ u32 ws[4];
+ u32 wt[4];
/* Initialize with key values. */
- for (i = 0; i < 8; i++)
- w[i - 8] = key[i];
+ w[0] = key[0];
+ w[1] = key[1];
+ w[2] = key[2];
+ w[3] = key[3];
+ w[4] = key[4];
+ w[5] = key[5];
+ w[6] = key[6];
+ w[7] = key[7];
/* Expand to intermediate key using the affine recurrence. */
- for (i = 0; i < 132; i++)
- w[i] = rol (w[i - 8] ^ w[i - 5] ^ w[i - 3] ^ w[i - 1] ^ PHI ^ i, 11);
+#define EXPAND_KEY4(wo, r) \
+ wo[0] = w[(r+0)%8] = \
+ rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); \
+ wo[1] = w[(r+1)%8] = \
+ rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); \
+ wo[2] = w[(r+2)%8] = \
+ rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); \
+ wo[3] = w[(r+3)%8] = \
+ rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11);
+
+#define EXPAND_KEY(r) \
+ EXPAND_KEY4(ws, (r)); \
+ EXPAND_KEY4(wt, (r + 4));
/* Calculate subkeys via S-Boxes, in bitslice mode. */
- SBOX (3, w, k, 0);
- SBOX (2, w, k, 4);
- SBOX (1, w, k, 8);
- SBOX (0, w, k, 12);
- SBOX (7, w, k, 16);
- SBOX (6, w, k, 20);
- SBOX (5, w, k, 24);
- SBOX (4, w, k, 28);
- SBOX (3, w, k, 32);
- SBOX (2, w, k, 36);
- SBOX (1, w, k, 40);
- SBOX (0, w, k, 44);
- SBOX (7, w, k, 48);
- SBOX (6, w, k, 52);
- SBOX (5, w, k, 56);
- SBOX (4, w, k, 60);
- SBOX (3, w, k, 64);
- SBOX (2, w, k, 68);
- SBOX (1, w, k, 72);
- SBOX (0, w, k, 76);
- SBOX (7, w, k, 80);
- SBOX (6, w, k, 84);
- SBOX (5, w, k, 88);
- SBOX (4, w, k, 92);
- SBOX (3, w, k, 96);
- SBOX (2, w, k, 100);
- SBOX (1, w, k, 104);
- SBOX (0, w, k, 108);
- SBOX (7, w, k, 112);
- SBOX (6, w, k, 116);
- SBOX (5, w, k, 120);
- SBOX (4, w, k, 124);
- SBOX (3, w, k, 128);
-
- /* Renumber subkeys. */
- for (i = 0; i < ROUNDS + 1; i++)
- for (j = 0; j < 4; j++)
- subkeys[i][j] = k[4 * i + j];
+ EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]);
+ EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]);
+ EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]);
+ EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]);
+ EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]);
+ EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]);
+ EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]);
+ EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]);
+ EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]);
+ EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]);
+ EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]);
+ EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]);
+ EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]);
+ EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]);
+ EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]);
+ EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]);
+ EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]);
+
+ wipememory (ws, sizeof (ws));
+ wipememory (wt, sizeof (wt));
+ wipememory (w, sizeof (w));
}
/* Initialize CONTEXT with the key KEY of KEY_LENGTH bits. */
@@ -672,7 +650,24 @@ serpent_setkey_internal (serpent_context_t *context,
serpent_key_prepare (key, key_length, key_prepared);
serpent_subkeys_generate (key_prepared, context->keys);
- _gcry_burn_stack (272 * sizeof (u32));
+
+#ifdef USE_AVX2
+ context->use_avx2 = 0;
+ if ((_gcry_get_hw_features () & HWF_INTEL_AVX2))
+ {
+ context->use_avx2 = 1;
+ }
+#endif
+
+#ifdef USE_NEON
+ context->use_neon = 0;
+ if ((_gcry_get_hw_features () & HWF_ARM_NEON))
+ {
+ context->use_neon = 1;
+ }
+#endif
+
+ wipememory (key_prepared, sizeof(key_prepared));
}
/* Initialize CTX with the key KEY of KEY_LENGTH bytes. */
@@ -684,45 +679,35 @@ serpent_setkey (void *ctx,
static const char *serpent_test_ret;
static int serpent_init_done;
gcry_err_code_t ret = GPG_ERR_NO_ERROR;
-
+
if (! serpent_init_done)
{
/* Execute a self-test the first time, Serpent is used. */
+ serpent_init_done = 1;
serpent_test_ret = serpent_test ();
if (serpent_test_ret)
log_error ("Serpent test failure: %s\n", serpent_test_ret);
- serpent_init_done = 1;
}
if (serpent_test_ret)
ret = GPG_ERR_SELFTEST_FAILED;
else
- {
- serpent_setkey_internal (context, key, key_length);
- _gcry_burn_stack (sizeof (serpent_key_t));
- }
+ serpent_setkey_internal (context, key, key_length);
return ret;
}
static void
serpent_encrypt_internal (serpent_context_t *context,
- const serpent_block_t input, serpent_block_t output)
+ const byte *input, byte *output)
{
serpent_block_t b, b_next;
int round = 0;
-#ifdef WORDS_BIGENDIAN
- b[0] = byte_swap_32 (input[0]);
- b[1] = byte_swap_32 (input[1]);
- b[2] = byte_swap_32 (input[2]);
- b[3] = byte_swap_32 (input[3]);
-#else
- b[0] = input[0];
- b[1] = input[1];
- b[2] = input[2];
- b[3] = input[3];
-#endif
+ b[0] = buf_get_le32 (input + 0);
+ b[1] = buf_get_le32 (input + 4);
+ b[2] = buf_get_le32 (input + 8);
+ b[3] = buf_get_le32 (input + 12);
ROUND (0, context->keys, b, b_next);
ROUND (1, context->keys, b, b_next);
@@ -758,37 +743,23 @@ serpent_encrypt_internal (serpent_context_t *context,
ROUND_LAST (7, context->keys, b, b_next);
-#ifdef WORDS_BIGENDIAN
- output[0] = byte_swap_32 (b_next[0]);
- output[1] = byte_swap_32 (b_next[1]);
- output[2] = byte_swap_32 (b_next[2]);
- output[3] = byte_swap_32 (b_next[3]);
-#else
- output[0] = b_next[0];
- output[1] = b_next[1];
- output[2] = b_next[2];
- output[3] = b_next[3];
-#endif
+ buf_put_le32 (output + 0, b_next[0]);
+ buf_put_le32 (output + 4, b_next[1]);
+ buf_put_le32 (output + 8, b_next[2]);
+ buf_put_le32 (output + 12, b_next[3]);
}
static void
serpent_decrypt_internal (serpent_context_t *context,
- const serpent_block_t input, serpent_block_t output)
+ const byte *input, byte *output)
{
serpent_block_t b, b_next;
int round = ROUNDS;
-#ifdef WORDS_BIGENDIAN
- b_next[0] = byte_swap_32 (input[0]);
- b_next[1] = byte_swap_32 (input[1]);
- b_next[2] = byte_swap_32 (input[2]);
- b_next[3] = byte_swap_32 (input[3]);
-#else
- b_next[0] = input[0];
- b_next[1] = input[1];
- b_next[2] = input[2];
- b_next[3] = input[3];
-#endif
+ b_next[0] = buf_get_le32 (input + 0);
+ b_next[1] = buf_get_le32 (input + 4);
+ b_next[2] = buf_get_le32 (input + 8);
+ b_next[3] = buf_get_le32 (input + 12);
ROUND_FIRST_INVERSE (7, context->keys, b_next, b);
@@ -824,43 +795,409 @@ serpent_decrypt_internal (serpent_context_t *context,
ROUND_INVERSE (1, context->keys, b, b_next);
ROUND_INVERSE (0, context->keys, b, b_next);
-
-#ifdef WORDS_BIGENDIAN
- output[0] = byte_swap_32 (b_next[0]);
- output[1] = byte_swap_32 (b_next[1]);
- output[2] = byte_swap_32 (b_next[2]);
- output[3] = byte_swap_32 (b_next[3]);
-#else
- output[0] = b_next[0];
- output[1] = b_next[1];
- output[2] = b_next[2];
- output[3] = b_next[3];
-#endif
+ buf_put_le32 (output + 0, b_next[0]);
+ buf_put_le32 (output + 4, b_next[1]);
+ buf_put_le32 (output + 8, b_next[2]);
+ buf_put_le32 (output + 12, b_next[3]);
}
-static void
+static unsigned int
serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
{
serpent_context_t *context = ctx;
- serpent_encrypt_internal (context,
- (const u32 *) buffer_in, (u32 *) buffer_out);
- _gcry_burn_stack (2 * sizeof (serpent_block_t));
+ serpent_encrypt_internal (context, buffer_in, buffer_out);
+ return /*burn_stack*/ (2 * sizeof (serpent_block_t));
}
-static void
+static unsigned int
serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
{
serpent_context_t *context = ctx;
- serpent_decrypt_internal (context,
- (const u32 *) buffer_in,
- (u32 *) buffer_out);
- _gcry_burn_stack (2 * sizeof (serpent_block_t));
+ serpent_decrypt_internal (context, buffer_in, buffer_out);
+ return /*burn_stack*/ (2 * sizeof (serpent_block_t));
}
+/* Bulk encryption of complete blocks in CTR mode. This function is only
+ intended for the bulk encryption feature of cipher.c. CTR is expected to be
+ of size sizeof(serpent_block_t). */
+void
+_gcry_serpent_ctr_enc(void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ serpent_context_t *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char tmpbuf[sizeof(serpent_block_t)];
+ int burn_stack_depth = 2 * sizeof (serpent_block_t);
+ int i;
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 16;
+ outbuf += 16 * sizeof(serpent_block_t);
+ inbuf += 16 * sizeof(serpent_block_t);
+ did_use_avx2 = 1;
+ }
+
+ if (did_use_avx2)
+ {
+ /* serpent-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic/sse2 code to handle smaller chunks... */
+ /* TODO: use caching instead? */
+ }
+#endif
+
+#ifdef USE_SSE2
+ {
+ int did_use_sse2 = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_sse2 = 1;
+ }
+
+ if (did_use_sse2)
+ {
+ /* serpent-sse2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ /* TODO: use caching instead? */
+ }
+#endif
+
+#ifdef USE_NEON
+ if (ctx->use_neon)
+ {
+ int did_use_neon = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_neon = 1;
+ }
+
+ if (did_use_neon)
+ {
+ /* serpent-neon assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ /* TODO: use caching instead? */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the counter. */
+ serpent_encrypt_internal(ctx, ctr, tmpbuf);
+ /* XOR the input with the encrypted counter and store in output. */
+ buf_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
+ outbuf += sizeof(serpent_block_t);
+ inbuf += sizeof(serpent_block_t);
+ /* Increment the counter. */
+ for (i = sizeof(serpent_block_t); i > 0; i--)
+ {
+ ctr[i-1]++;
+ if (ctr[i-1])
+ break;
+ }
+ }
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CBC mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+void
+_gcry_serpent_cbc_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ serpent_context_t *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char savebuf[sizeof(serpent_block_t)];
+ int burn_stack_depth = 2 * sizeof (serpent_block_t);
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 16;
+ outbuf += 16 * sizeof(serpent_block_t);
+ inbuf += 16 * sizeof(serpent_block_t);
+ did_use_avx2 = 1;
+ }
+
+ if (did_use_avx2)
+ {
+ /* serpent-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic/sse2 code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_SSE2
+ {
+ int did_use_sse2 = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_sse2 = 1;
+ }
+
+ if (did_use_sse2)
+ {
+ /* serpent-sse2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_NEON
+ if (ctx->use_neon)
+ {
+ int did_use_neon = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_neon = 1;
+ }
+
+ if (did_use_neon)
+ {
+ /* serpent-neon assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+ serpent_decrypt_internal (ctx, inbuf, savebuf);
+
+ buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, sizeof(serpent_block_t));
+ inbuf += sizeof(serpent_block_t);
+ outbuf += sizeof(serpent_block_t);
+ }
+
+ wipememory(savebuf, sizeof(savebuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CFB mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+void
+_gcry_serpent_cfb_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ serpent_context_t *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth = 2 * sizeof (serpent_block_t);
+
+#ifdef USE_AVX2
+ if (ctx->use_avx2)
+ {
+ int did_use_avx2 = 0;
+
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
+ {
+ _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 16;
+ outbuf += 16 * sizeof(serpent_block_t);
+ inbuf += 16 * sizeof(serpent_block_t);
+ did_use_avx2 = 1;
+ }
+
+ if (did_use_avx2)
+ {
+ /* serpent-avx2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic/sse2 code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_SSE2
+ {
+ int did_use_sse2 = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_sse2 = 1;
+ }
+
+ if (did_use_sse2)
+ {
+ /* serpent-sse2 assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+#ifdef USE_NEON
+ if (ctx->use_neon)
+ {
+ int did_use_neon = 0;
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
+ {
+ _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_neon = 1;
+ }
+
+ if (did_use_neon)
+ {
+ /* serpent-neon assembly code does not use stack */
+ if (nblocks == 0)
+ burn_stack_depth = 0;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ serpent_encrypt_internal(ctx, iv, iv);
+ buf_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
+ outbuf += sizeof(serpent_block_t);
+ inbuf += sizeof(serpent_block_t);
+ }
+
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+
+/* Run the self-tests for SERPENT-CTR-128, tests IV increment of bulk CTR
+ encryption. Returns NULL on success. */
+static const char*
+selftest_ctr_128 (void)
+{
+ const int nblocks = 16+8+1;
+ const int blocksize = sizeof(serpent_block_t);
+ const int context_size = sizeof(serpent_context_t);
+
+ return _gcry_selftest_helper_ctr("SERPENT", &serpent_setkey,
+ &serpent_encrypt, &_gcry_serpent_ctr_enc, nblocks, blocksize,
+ context_size);
+}
+
+
+/* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char*
+selftest_cbc_128 (void)
+{
+ const int nblocks = 16+8+2;
+ const int blocksize = sizeof(serpent_block_t);
+ const int context_size = sizeof(serpent_context_t);
+
+ return _gcry_selftest_helper_cbc("SERPENT", &serpent_setkey,
+ &serpent_encrypt, &_gcry_serpent_cbc_dec, nblocks, blocksize,
+ context_size);
+}
+
+
+/* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char*
+selftest_cfb_128 (void)
+{
+ const int nblocks = 16+8+2;
+ const int blocksize = sizeof(serpent_block_t);
+ const int context_size = sizeof(serpent_context_t);
+
+ return _gcry_selftest_helper_cfb("SERPENT", &serpent_setkey,
+ &serpent_encrypt, &_gcry_serpent_cfb_dec, nblocks, blocksize,
+ context_size);
+}
+
+
/* Serpent test. */
static const char *
@@ -869,6 +1206,7 @@ serpent_test (void)
serpent_context_t context;
unsigned char scratch[16];
unsigned int i;
+ const char *r;
static struct test
{
@@ -914,9 +1252,7 @@ serpent_test (void)
{
serpent_setkey_internal (&context, test_data[i].key,
test_data[i].key_length);
- serpent_encrypt_internal (&context,
- (const u32 *) test_data[i].text_plain,
- (u32 *) scratch);
+ serpent_encrypt_internal (&context, test_data[i].text_plain, scratch);
if (memcmp (scratch, test_data[i].text_cipher, sizeof (serpent_block_t)))
switch (test_data[i].key_length)
@@ -929,9 +1265,7 @@ serpent_test (void)
return "Serpent-256 test encryption failed.";
}
- serpent_decrypt_internal (&context,
- (const u32 *) test_data[i].text_cipher,
- (u32 *) scratch);
+ serpent_decrypt_internal (&context, test_data[i].text_cipher, scratch);
if (memcmp (scratch, test_data[i].text_plain, sizeof (serpent_block_t)))
switch (test_data[i].key_length)
{
@@ -944,6 +1278,15 @@ serpent_test (void)
}
}
+ if ( (r = selftest_ctr_128 ()) )
+ return r;
+
+ if ( (r = selftest_cbc_128 ()) )
+ return r;
+
+ if ( (r = selftest_cfb_128 ()) )
+ return r;
+
return NULL;
}
@@ -958,6 +1301,7 @@ static const char *cipher_spec_serpent128_aliases[] =
gcry_cipher_spec_t _gcry_cipher_spec_serpent128 =
{
+ GCRY_CIPHER_SERPENT128, {0, 0},
"SERPENT128", cipher_spec_serpent128_aliases, NULL, 16, 128,
sizeof (serpent_context_t),
serpent_setkey, serpent_encrypt, serpent_decrypt
@@ -965,6 +1309,7 @@ gcry_cipher_spec_t _gcry_cipher_spec_serpent128 =
gcry_cipher_spec_t _gcry_cipher_spec_serpent192 =
{
+ GCRY_CIPHER_SERPENT192, {0, 0},
"SERPENT192", NULL, NULL, 16, 192,
sizeof (serpent_context_t),
serpent_setkey, serpent_encrypt, serpent_decrypt
@@ -972,6 +1317,7 @@ gcry_cipher_spec_t _gcry_cipher_spec_serpent192 =
gcry_cipher_spec_t _gcry_cipher_spec_serpent256 =
{
+ GCRY_CIPHER_SERPENT256, {0, 0},
"SERPENT256", NULL, NULL, 16, 256,
sizeof (serpent_context_t),
serpent_setkey, serpent_encrypt, serpent_decrypt