From 46ea86584a9787c8b9dc3983cf23d9b5b93b5841 Mon Sep 17 00:00:00 2001 From: George Hazan Date: Fri, 21 Jun 2024 14:29:17 +0300 Subject: fixes #4477 (libsodium: update to 1.0.20) --- .../src/crypto_aead/aegis128l/aead_aegis128l.c | 159 ++ .../src/crypto_aead/aegis128l/aegis128l_aesni.c | 70 + .../src/crypto_aead/aegis128l/aegis128l_aesni.h | 8 + .../crypto_aead/aegis128l/aegis128l_armcrypto.h | 8 + .../src/crypto_aead/aegis128l/aegis128l_common.h | 249 +++ .../src/crypto_aead/aegis128l/aegis128l_soft.c | 59 + .../src/crypto_aead/aegis128l/aegis128l_soft.h | 8 + .../src/crypto_aead/aegis128l/implementations.h | 17 + .../src/crypto_aead/aegis256/aead_aegis256.c | 158 ++ .../src/crypto_aead/aegis256/aegis256_aesni.c | 65 + .../src/crypto_aead/aegis256/aegis256_aesni.h | 8 + .../src/crypto_aead/aegis256/aegis256_armcrypto.h | 8 + .../src/crypto_aead/aegis256/aegis256_common.h | 232 +++ .../src/crypto_aead/aegis256/aegis256_soft.c | 54 + .../src/crypto_aead/aegis256/aegis256_soft.h | 8 + .../src/crypto_aead/aegis256/implementations.h | 17 + .../src/crypto_aead/aes256gcm/aead_aes256gcm.c | 157 ++ .../aes256gcm/aesni/aead_aes256gcm_aesni.c | 1784 ++++++++++---------- .../aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c | 1033 ++++++++++++ .../chacha20poly1305/aead_chacha20poly1305.c | 400 +++++ .../sodium/aead_chacha20poly1305.c | 400 ----- .../xchacha20poly1305/aead_xchacha20poly1305.c | 262 +++ .../sodium/aead_xchacha20poly1305.c | 262 --- 23 files changed, 3840 insertions(+), 1586 deletions(-) create mode 100644 libs/libsodium/src/crypto_aead/aegis128l/aead_aegis128l.c create mode 100644 libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.c create mode 100644 libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.h create mode 100644 libs/libsodium/src/crypto_aead/aegis128l/aegis128l_armcrypto.h create mode 100644 libs/libsodium/src/crypto_aead/aegis128l/aegis128l_common.h create mode 100644 libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.c create mode 100644 libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.h create mode 100644 libs/libsodium/src/crypto_aead/aegis128l/implementations.h create mode 100644 libs/libsodium/src/crypto_aead/aegis256/aead_aegis256.c create mode 100644 libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.c create mode 100644 libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.h create mode 100644 libs/libsodium/src/crypto_aead/aegis256/aegis256_armcrypto.h create mode 100644 libs/libsodium/src/crypto_aead/aegis256/aegis256_common.h create mode 100644 libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.c create mode 100644 libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.h create mode 100644 libs/libsodium/src/crypto_aead/aegis256/implementations.h create mode 100644 libs/libsodium/src/crypto_aead/aes256gcm/aead_aes256gcm.c create mode 100644 libs/libsodium/src/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c create mode 100644 libs/libsodium/src/crypto_aead/chacha20poly1305/aead_chacha20poly1305.c delete mode 100644 libs/libsodium/src/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c create mode 100644 libs/libsodium/src/crypto_aead/xchacha20poly1305/aead_xchacha20poly1305.c delete mode 100644 libs/libsodium/src/crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c (limited to 'libs/libsodium/src/crypto_aead') diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aead_aegis128l.c b/libs/libsodium/src/crypto_aead/aegis128l/aead_aegis128l.c new file mode 100644 index 0000000000..ab2596e685 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis128l/aead_aegis128l.c @@ -0,0 +1,159 @@ + +#include +#include + +#include "core.h" +#include "crypto_aead_aegis128l.h" +#include "private/common.h" +#include "private/implementations.h" +#include "randombytes.h" +#include "runtime.h" + +#include "aegis128l_soft.h" + +#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN) +#include "aegis128l_armcrypto.h" +#endif + +#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H) +#include "aegis128l_aesni.h" +#endif + +static const aegis128l_implementation *implementation = &aegis128l_soft_implementation; + +size_t +crypto_aead_aegis128l_keybytes(void) +{ + return crypto_aead_aegis128l_KEYBYTES; +} + +size_t +crypto_aead_aegis128l_nsecbytes(void) +{ + return crypto_aead_aegis128l_NSECBYTES; +} + +size_t +crypto_aead_aegis128l_npubbytes(void) +{ + return crypto_aead_aegis128l_NPUBBYTES; +} + +size_t +crypto_aead_aegis128l_abytes(void) +{ + return crypto_aead_aegis128l_ABYTES; +} + +size_t +crypto_aead_aegis128l_messagebytes_max(void) +{ + return crypto_aead_aegis128l_MESSAGEBYTES_MAX; +} + +void +crypto_aead_aegis128l_keygen(unsigned char k[crypto_aead_aegis128l_KEYBYTES]) +{ + randombytes_buf(k, crypto_aead_aegis128l_KEYBYTES); +} + +int +crypto_aead_aegis128l_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + unsigned long long clen = 0ULL; + int ret; + + ret = crypto_aead_aegis128l_encrypt_detached(c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub, + k); + if (clen_p != NULL) { + if (ret == 0) { + clen = mlen + crypto_aead_aegis128l_ABYTES; + } + *clen_p = clen; + } + return ret; +} + +int +crypto_aead_aegis128l_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + unsigned long long mlen = 0ULL; + int ret = -1; + + if (clen >= crypto_aead_aegis128l_ABYTES) { + ret = crypto_aead_aegis128l_decrypt_detached( + m, nsec, c, clen - crypto_aead_aegis128l_ABYTES, + c + clen - crypto_aead_aegis128l_ABYTES, ad, adlen, npub, k); + } + if (mlen_p != NULL) { + if (ret == 0) { + mlen = clen - crypto_aead_aegis128l_ABYTES; + } + *mlen_p = mlen; + } + return ret; +} + +int +crypto_aead_aegis128l_encrypt_detached(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + const size_t maclen = crypto_aead_aegis128l_ABYTES; + + if (maclen_p != NULL) { + *maclen_p = maclen; + } + if (mlen > crypto_aead_aegis128l_MESSAGEBYTES_MAX || + adlen > crypto_aead_aegis128l_MESSAGEBYTES_MAX) { + sodium_misuse(); + } + return implementation->encrypt_detached(c, mac, maclen, m, (size_t) mlen, ad, (size_t) adlen, + npub, k); +} + +int +crypto_aead_aegis128l_decrypt_detached(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const unsigned char *k) +{ + const size_t maclen = crypto_aead_aegis128l_ABYTES; + + if (clen > crypto_aead_aegis128l_MESSAGEBYTES_MAX || + adlen > crypto_aead_aegis128l_MESSAGEBYTES_MAX) { + return -1; + } + return implementation->decrypt_detached(m, c, (size_t) clen, mac, maclen, ad, (size_t) adlen, + npub, k); +} + +int +_crypto_aead_aegis128l_pick_best_implementation(void) +{ + implementation = &aegis128l_soft_implementation; + +#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN) + if (sodium_runtime_has_armcrypto()) { + implementation = &aegis128l_armcrypto_implementation; + return 0; + } +#endif + +#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H) + if (sodium_runtime_has_aesni() & sodium_runtime_has_avx()) { + implementation = &aegis128l_aesni_implementation; + return 0; + } +#endif + return 0; /* LCOV_EXCL_LINE */ +} diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.c b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.c new file mode 100644 index 0000000000..93782ce288 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.c @@ -0,0 +1,70 @@ +#include +#include +#include +#include +#include + +#include "core.h" +#include "crypto_aead_aegis128l.h" +#include "crypto_verify_16.h" +#include "crypto_verify_32.h" +#include "export.h" +#include "utils.h" + +#include "private/common.h" + +#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H) + +#include "aegis128l_aesni.h" + +#ifdef __clang__ +#pragma clang attribute push(__attribute__((target("aes,avx"))), apply_to = function) +#elif defined(__GNUC__) +#pragma GCC target("aes,avx") +#endif + +#include "private/sse2_64_32.h" +#include +#include + +#define AES_BLOCK_LENGTH 16 + +typedef __m128i aes_block_t; +#define AES_BLOCK_XOR(A, B) _mm_xor_si128((A), (B)) +#define AES_BLOCK_AND(A, B) _mm_and_si128((A), (B)) +#define AES_BLOCK_LOAD(A) _mm_loadu_si128((const aes_block_t *) (const void *) (A)) +#define AES_BLOCK_LOAD_64x2(A, B) _mm_set_epi64x((long long) (A), (long long) (B)) +#define AES_BLOCK_STORE(A, B) _mm_storeu_si128((aes_block_t *) (void *) (A), (B)) +#define AES_ENC(A, B) _mm_aesenc_si128((A), (B)) + +static inline void +aegis128l_update(aes_block_t *const state, const aes_block_t d1, const aes_block_t d2) +{ + aes_block_t tmp; + + tmp = state[7]; + state[7] = AES_ENC(state[6], state[7]); + state[6] = AES_ENC(state[5], state[6]); + state[5] = AES_ENC(state[4], state[5]); + state[4] = AES_ENC(state[3], state[4]); + state[3] = AES_ENC(state[2], state[3]); + state[2] = AES_ENC(state[1], state[2]); + state[1] = AES_ENC(state[0], state[1]); + state[0] = AES_ENC(tmp, state[0]); + + state[0] = AES_BLOCK_XOR(state[0], d1); + state[4] = AES_BLOCK_XOR(state[4], d2); +} + +#include "aegis128l_common.h" + +struct aegis128l_implementation aegis128l_aesni_implementation = { SODIUM_C99(.encrypt_detached =) + encrypt_detached, + SODIUM_C99(.decrypt_detached =) + decrypt_detached }; + +#ifdef __clang__ +#pragma clang attribute pop +#endif + +#endif diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.h b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.h new file mode 100644 index 0000000000..65e52dab1b --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.h @@ -0,0 +1,8 @@ +#ifndef aegis128l_aesni_H +#define aegis128l_aesni_H + +#include "implementations.h" + +extern struct aegis128l_implementation aegis128l_aesni_implementation; + +#endif \ No newline at end of file diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_armcrypto.h b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_armcrypto.h new file mode 100644 index 0000000000..41ad43cba0 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_armcrypto.h @@ -0,0 +1,8 @@ +#ifndef aegis128l_armcrypto_H +#define aegis128l_armcrypto_H + +#include "implementations.h" + +extern struct aegis128l_implementation aegis128l_armcrypto_implementation; + +#endif \ No newline at end of file diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_common.h b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_common.h new file mode 100644 index 0000000000..6e503dc35a --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_common.h @@ -0,0 +1,249 @@ +#define RATE 32 + +static void +aegis128l_init(const uint8_t *key, const uint8_t *nonce, aes_block_t *const state) +{ + static CRYPTO_ALIGN(AES_BLOCK_LENGTH) + const uint8_t c0_[AES_BLOCK_LENGTH] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, + 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 }; + static CRYPTO_ALIGN(AES_BLOCK_LENGTH) + const uint8_t c1_[AES_BLOCK_LENGTH] = { 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, + 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd }; + + const aes_block_t c0 = AES_BLOCK_LOAD(c0_); + const aes_block_t c1 = AES_BLOCK_LOAD(c1_); + aes_block_t k; + aes_block_t n; + int i; + + k = AES_BLOCK_LOAD(key); + n = AES_BLOCK_LOAD(nonce); + + state[0] = AES_BLOCK_XOR(k, n); + state[1] = c1; + state[2] = c0; + state[3] = c1; + state[4] = AES_BLOCK_XOR(k, n); + state[5] = AES_BLOCK_XOR(k, c0); + state[6] = AES_BLOCK_XOR(k, c1); + state[7] = AES_BLOCK_XOR(k, c0); + for (i = 0; i < 10; i++) { + aegis128l_update(state, n, k); + } +} + +static int +aegis128l_mac(uint8_t *mac, size_t maclen, size_t adlen, size_t mlen, aes_block_t *const state) +{ + aes_block_t tmp; + int i; + + tmp = AES_BLOCK_LOAD_64x2(((uint64_t) mlen) << 3, ((uint64_t) adlen) << 3); + tmp = AES_BLOCK_XOR(tmp, state[2]); + + for (i = 0; i < 7; i++) { + aegis128l_update(state, tmp, tmp); + } + + if (maclen == 16) { + tmp = AES_BLOCK_XOR(state[6], AES_BLOCK_XOR(state[5], state[4])); + tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[3], state[2])); + tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[1], state[0])); + AES_BLOCK_STORE(mac, tmp); + } else if (maclen == 32) { + tmp = AES_BLOCK_XOR(state[3], state[2]); + tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[1], state[0])); + AES_BLOCK_STORE(mac, tmp); + tmp = AES_BLOCK_XOR(state[7], state[6]); + tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[5], state[4])); + AES_BLOCK_STORE(mac + 16, tmp); + } else { + memset(mac, 0, maclen); + return -1; + } + return 0; +} + +static inline void +aegis128l_absorb(const uint8_t *const src, aes_block_t *const state) +{ + aes_block_t msg0, msg1; + + msg0 = AES_BLOCK_LOAD(src); + msg1 = AES_BLOCK_LOAD(src + AES_BLOCK_LENGTH); + aegis128l_update(state, msg0, msg1); +} + +static inline void +aegis128l_absorb2(const uint8_t *const src, aes_block_t *const state) +{ + aes_block_t msg0, msg1, msg2, msg3; + + msg0 = AES_BLOCK_LOAD(src + 0 * AES_BLOCK_LENGTH); + msg1 = AES_BLOCK_LOAD(src + 1 * AES_BLOCK_LENGTH); + msg2 = AES_BLOCK_LOAD(src + 2 * AES_BLOCK_LENGTH); + msg3 = AES_BLOCK_LOAD(src + 3 * AES_BLOCK_LENGTH); + aegis128l_update(state, msg0, msg1); + aegis128l_update(state, msg2, msg3); +} + +static void +aegis128l_enc(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state) +{ + aes_block_t msg0, msg1; + aes_block_t tmp0, tmp1; + + msg0 = AES_BLOCK_LOAD(src); + msg1 = AES_BLOCK_LOAD(src + AES_BLOCK_LENGTH); + tmp0 = AES_BLOCK_XOR(msg0, state[6]); + tmp0 = AES_BLOCK_XOR(tmp0, state[1]); + tmp1 = AES_BLOCK_XOR(msg1, state[5]); + tmp1 = AES_BLOCK_XOR(tmp1, state[2]); + tmp0 = AES_BLOCK_XOR(tmp0, AES_BLOCK_AND(state[2], state[3])); + tmp1 = AES_BLOCK_XOR(tmp1, AES_BLOCK_AND(state[6], state[7])); + AES_BLOCK_STORE(dst, tmp0); + AES_BLOCK_STORE(dst + AES_BLOCK_LENGTH, tmp1); + + aegis128l_update(state, msg0, msg1); +} + +static void +aegis128l_dec(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state) +{ + aes_block_t msg0, msg1; + + msg0 = AES_BLOCK_LOAD(src); + msg1 = AES_BLOCK_LOAD(src + AES_BLOCK_LENGTH); + msg0 = AES_BLOCK_XOR(msg0, state[6]); + msg0 = AES_BLOCK_XOR(msg0, state[1]); + msg1 = AES_BLOCK_XOR(msg1, state[5]); + msg1 = AES_BLOCK_XOR(msg1, state[2]); + msg0 = AES_BLOCK_XOR(msg0, AES_BLOCK_AND(state[2], state[3])); + msg1 = AES_BLOCK_XOR(msg1, AES_BLOCK_AND(state[6], state[7])); + AES_BLOCK_STORE(dst, msg0); + AES_BLOCK_STORE(dst + AES_BLOCK_LENGTH, msg1); + + aegis128l_update(state, msg0, msg1); +} + +static void +aegis128l_declast(uint8_t *const dst, const uint8_t *const src, size_t len, + aes_block_t *const state) +{ + uint8_t pad[RATE]; + aes_block_t msg0, msg1; + + memset(pad, 0, sizeof pad); + memcpy(pad, src, len); + + msg0 = AES_BLOCK_LOAD(pad); + msg1 = AES_BLOCK_LOAD(pad + AES_BLOCK_LENGTH); + msg0 = AES_BLOCK_XOR(msg0, state[6]); + msg0 = AES_BLOCK_XOR(msg0, state[1]); + msg1 = AES_BLOCK_XOR(msg1, state[5]); + msg1 = AES_BLOCK_XOR(msg1, state[2]); + msg0 = AES_BLOCK_XOR(msg0, AES_BLOCK_AND(state[2], state[3])); + msg1 = AES_BLOCK_XOR(msg1, AES_BLOCK_AND(state[6], state[7])); + AES_BLOCK_STORE(pad, msg0); + AES_BLOCK_STORE(pad + AES_BLOCK_LENGTH, msg1); + + memset(pad + len, 0, sizeof pad - len); + memcpy(dst, pad, len); + + msg0 = AES_BLOCK_LOAD(pad); + msg1 = AES_BLOCK_LOAD(pad + AES_BLOCK_LENGTH); + + aegis128l_update(state, msg0, msg1); +} + +static int +encrypt_detached(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen, + const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k) +{ + aes_block_t state[8]; + CRYPTO_ALIGN(RATE) uint8_t src[RATE]; + CRYPTO_ALIGN(RATE) uint8_t dst[RATE]; + size_t i; + + aegis128l_init(k, npub, state); + + for (i = 0; i + RATE * 2 <= adlen; i += RATE * 2) { + aegis128l_absorb2(ad + i, state); + } + for (; i + RATE <= adlen; i += RATE) { + aegis128l_absorb(ad + i, state); + } + if (adlen % RATE) { + memset(src, 0, RATE); + memcpy(src, ad + i, adlen % RATE); + aegis128l_absorb(src, state); + } + for (i = 0; i + RATE <= mlen; i += RATE) { + aegis128l_enc(c + i, m + i, state); + } + if (mlen % RATE) { + memset(src, 0, RATE); + memcpy(src, m + i, mlen % RATE); + aegis128l_enc(dst, src, state); + memcpy(c + i, dst, mlen % RATE); + } + + return aegis128l_mac(mac, maclen, adlen, mlen, state); +} + +static int +decrypt_detached(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac, size_t maclen, + const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k) +{ + aes_block_t state[8]; + CRYPTO_ALIGN(RATE) uint8_t src[RATE]; + CRYPTO_ALIGN(RATE) uint8_t dst[RATE]; + CRYPTO_ALIGN(16) uint8_t computed_mac[32]; + const size_t mlen = clen; + size_t i; + int ret; + + aegis128l_init(k, npub, state); + + for (i = 0; i + RATE * 2 <= adlen; i += RATE * 2) { + aegis128l_absorb2(ad + i, state); + } + for (; i + RATE <= adlen; i += RATE) { + aegis128l_absorb(ad + i, state); + } + if (adlen % RATE) { + memset(src, 0, RATE); + memcpy(src, ad + i, adlen % RATE); + aegis128l_absorb(src, state); + } + if (m != NULL) { + for (i = 0; i + RATE <= mlen; i += RATE) { + aegis128l_dec(m + i, c + i, state); + } + } else { + for (i = 0; i + RATE <= mlen; i += RATE) { + aegis128l_dec(dst, c + i, state); + } + } + if (mlen % RATE) { + if (m != NULL) { + aegis128l_declast(m + i, c + i, mlen % RATE, state); + } else { + aegis128l_declast(dst, c + i, mlen % RATE, state); + } + } + + COMPILER_ASSERT(sizeof computed_mac >= 32); + ret = -1; + if (aegis128l_mac(computed_mac, maclen, adlen, mlen, state) == 0) { + if (maclen == 16) { + ret = crypto_verify_16(computed_mac, mac); + } else if (maclen == 32) { + ret = crypto_verify_32(computed_mac, mac); + } + } + if (ret != 0 && m != NULL) { + memset(m, 0, mlen); + } + return ret; +} diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.c b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.c new file mode 100644 index 0000000000..e1d60ecb4f --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include +#include + +#include "core.h" +#include "crypto_aead_aegis128l.h" +#include "crypto_verify_16.h" +#include "crypto_verify_32.h" +#include "export.h" +#include "utils.h" + +#include "private/common.h" + +#include "crypto_aead_aegis128l.h" +#include "private/softaes.h" + +#if 1 + +#include "aegis128l_soft.h" + +#define AES_BLOCK_LENGTH 16 + +typedef SoftAesBlock aes_block_t; +#define AES_BLOCK_XOR(A, B) softaes_block_xor((A), (B)) +#define AES_BLOCK_AND(A, B) softaes_block_and((A), (B)) +#define AES_BLOCK_LOAD(A) softaes_block_load(A) +#define AES_BLOCK_LOAD_64x2(A, B) softaes_block_load64x2((A), (B)) +#define AES_BLOCK_STORE(A, B) softaes_block_store((A), (B)) +#define AES_ENC(A, B) softaes_block_encrypt((A), (B)) + +static inline void +aegis128l_update(aes_block_t *const state, const aes_block_t d1, const aes_block_t d2) +{ + aes_block_t tmp; + + tmp = state[7]; + state[7] = AES_ENC(state[6], state[7]); + state[6] = AES_ENC(state[5], state[6]); + state[5] = AES_ENC(state[4], state[5]); + state[4] = AES_ENC(state[3], state[4]); + state[3] = AES_ENC(state[2], state[3]); + state[2] = AES_ENC(state[1], state[2]); + state[1] = AES_ENC(state[0], state[1]); + state[0] = AES_ENC(tmp, state[0]); + + state[0] = AES_BLOCK_XOR(state[0], d1); + state[4] = AES_BLOCK_XOR(state[4], d2); +} + +#include "aegis128l_common.h" + +struct aegis128l_implementation aegis128l_soft_implementation = { SODIUM_C99(.encrypt_detached =) + encrypt_detached, + SODIUM_C99(.decrypt_detached =) + decrypt_detached }; + +#endif diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.h b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.h new file mode 100644 index 0000000000..df8ddece08 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.h @@ -0,0 +1,8 @@ +#ifndef aegis128l_soft_H +#define aegis128l_soft_H + +#include "implementations.h" + +extern struct aegis128l_implementation aegis128l_soft_implementation; + +#endif \ No newline at end of file diff --git a/libs/libsodium/src/crypto_aead/aegis128l/implementations.h b/libs/libsodium/src/crypto_aead/aegis128l/implementations.h new file mode 100644 index 0000000000..29e7b1cb88 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis128l/implementations.h @@ -0,0 +1,17 @@ +#ifndef aegis128l_implementations_H +#define aegis128l_implementations_H + +#include +#include + +#include "crypto_aead_aegis128l.h" + +typedef struct aegis128l_implementation { + int (*encrypt_detached)(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen, + const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k); + int (*decrypt_detached)(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac, + size_t maclen, const uint8_t *ad, size_t adlen, const uint8_t *npub, + const uint8_t *k); +} aegis128l_implementation; + +#endif diff --git a/libs/libsodium/src/crypto_aead/aegis256/aead_aegis256.c b/libs/libsodium/src/crypto_aead/aegis256/aead_aegis256.c new file mode 100644 index 0000000000..0fd8f966cf --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis256/aead_aegis256.c @@ -0,0 +1,158 @@ + +#include +#include + +#include "core.h" +#include "crypto_aead_aegis256.h" +#include "private/common.h" +#include "private/implementations.h" +#include "randombytes.h" +#include "runtime.h" + +#include "aegis256_soft.h" + +#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN) +#include "aegis256_armcrypto.h" +#endif + +#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H) +#include "aegis256_aesni.h" +#endif + +static const aegis256_implementation *implementation = &aegis256_soft_implementation; + +size_t +crypto_aead_aegis256_keybytes(void) +{ + return crypto_aead_aegis256_KEYBYTES; +} + +size_t +crypto_aead_aegis256_nsecbytes(void) +{ + return crypto_aead_aegis256_NSECBYTES; +} + +size_t +crypto_aead_aegis256_npubbytes(void) +{ + return crypto_aead_aegis256_NPUBBYTES; +} + +size_t +crypto_aead_aegis256_abytes(void) +{ + return crypto_aead_aegis256_ABYTES; +} + +size_t +crypto_aead_aegis256_messagebytes_max(void) +{ + return crypto_aead_aegis256_MESSAGEBYTES_MAX; +} + +void +crypto_aead_aegis256_keygen(unsigned char k[crypto_aead_aegis256_KEYBYTES]) +{ + randombytes_buf(k, crypto_aead_aegis256_KEYBYTES); +} + +int +crypto_aead_aegis256_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + unsigned long long clen = 0ULL; + int ret; + + ret = + crypto_aead_aegis256_encrypt_detached(c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub, k); + if (clen_p != NULL) { + if (ret == 0) { + clen = mlen + crypto_aead_aegis256_ABYTES; + } + *clen_p = clen; + } + return ret; +} + +int +crypto_aead_aegis256_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + unsigned long long mlen = 0ULL; + int ret = -1; + + if (clen >= crypto_aead_aegis256_ABYTES) { + ret = crypto_aead_aegis256_decrypt_detached(m, nsec, c, clen - crypto_aead_aegis256_ABYTES, + c + clen - crypto_aead_aegis256_ABYTES, ad, + adlen, npub, k); + } + if (mlen_p != NULL) { + if (ret == 0) { + mlen = clen - crypto_aead_aegis256_ABYTES; + } + *mlen_p = mlen; + } + return ret; +} + +int +crypto_aead_aegis256_encrypt_detached(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + const size_t maclen = crypto_aead_aegis256_ABYTES; + + if (maclen_p != NULL) { + *maclen_p = maclen; + } + if (mlen > crypto_aead_aegis256_MESSAGEBYTES_MAX || + adlen > crypto_aead_aegis256_MESSAGEBYTES_MAX) { + sodium_misuse(); + } + return implementation->encrypt_detached(c, mac, maclen, m, (size_t) mlen, ad, (size_t) adlen, + npub, k); +} + +int +crypto_aead_aegis256_decrypt_detached(unsigned char *m, unsigned char *nsec, const unsigned char *c, + unsigned long long clen, const unsigned char *mac, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + const size_t maclen = crypto_aead_aegis256_ABYTES; + + if (clen > crypto_aead_aegis256_MESSAGEBYTES_MAX || + adlen > crypto_aead_aegis256_MESSAGEBYTES_MAX) { + return -1; + } + return implementation->decrypt_detached(m, c, (size_t) clen, mac, maclen, ad, (size_t) adlen, + npub, k); +} + +int +_crypto_aead_aegis256_pick_best_implementation(void) +{ + implementation = &aegis256_soft_implementation; + +#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN) + if (sodium_runtime_has_armcrypto()) { + implementation = &aegis256_armcrypto_implementation; + return 0; + } +#endif + +#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H) + if (sodium_runtime_has_aesni() & sodium_runtime_has_avx()) { + implementation = &aegis256_aesni_implementation; + return 0; + } +#endif + return 0; /* LCOV_EXCL_LINE */ +} diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.c b/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.c new file mode 100644 index 0000000000..96aa0036ba --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.c @@ -0,0 +1,65 @@ +#include +#include +#include +#include +#include + +#include "core.h" +#include "crypto_aead_aegis256.h" +#include "crypto_verify_16.h" +#include "crypto_verify_32.h" +#include "export.h" +#include "utils.h" + +#include "private/common.h" + +#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H) + +#include "aegis256_aesni.h" + +#ifdef __clang__ +#pragma clang attribute push(__attribute__((target("aes,avx"))), apply_to = function) +#elif defined(__GNUC__) +#pragma GCC target("aes,avx") +#endif + +#include "private/sse2_64_32.h" +#include +#include + +#define AES_BLOCK_LENGTH 16 + +typedef __m128i aes_block_t; +#define AES_BLOCK_XOR(A, B) _mm_xor_si128((A), (B)) +#define AES_BLOCK_AND(A, B) _mm_and_si128((A), (B)) +#define AES_BLOCK_LOAD(A) _mm_loadu_si128((const aes_block_t *) (const void *) (A)) +#define AES_BLOCK_LOAD_64x2(A, B) _mm_set_epi64x((long long) (A), (long long) (B)) +#define AES_BLOCK_STORE(A, B) _mm_storeu_si128((aes_block_t *) (void *) (A), (B)) +#define AES_ENC(A, B) _mm_aesenc_si128((A), (B)) + +static inline void +aegis256_update(aes_block_t *const state, const aes_block_t d) +{ + aes_block_t tmp; + + tmp = state[5]; + state[5] = AES_ENC(state[4], state[5]); + state[4] = AES_ENC(state[3], state[4]); + state[3] = AES_ENC(state[2], state[3]); + state[2] = AES_ENC(state[1], state[2]); + state[1] = AES_ENC(state[0], state[1]); + state[0] = AES_BLOCK_XOR(AES_ENC(tmp, state[0]), d); +} + +#include "aegis256_common.h" + +struct aegis256_implementation aegis256_aesni_implementation = { SODIUM_C99(.encrypt_detached =) + encrypt_detached, + SODIUM_C99(.decrypt_detached =) + decrypt_detached }; + +#ifdef __clang__ +#pragma clang attribute pop +#endif + +#endif diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.h b/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.h new file mode 100644 index 0000000000..21f4d819b9 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.h @@ -0,0 +1,8 @@ +#ifndef aegis256_aesni_H +#define aegis256_aesni_H + +#include "implementations.h" + +extern struct aegis256_implementation aegis256_aesni_implementation; + +#endif \ No newline at end of file diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_armcrypto.h b/libs/libsodium/src/crypto_aead/aegis256/aegis256_armcrypto.h new file mode 100644 index 0000000000..a9bd4ad392 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_armcrypto.h @@ -0,0 +1,8 @@ +#ifndef aegis256_armcrypto_H +#define aegis256_armcrypto_H + +#include "implementations.h" + +extern struct aegis256_implementation aegis256_armcrypto_implementation; + +#endif \ No newline at end of file diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_common.h b/libs/libsodium/src/crypto_aead/aegis256/aegis256_common.h new file mode 100644 index 0000000000..adf837a922 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_common.h @@ -0,0 +1,232 @@ +#define RATE 16 + +static void +aegis256_init(const uint8_t *key, const uint8_t *nonce, aes_block_t *const state) +{ + static CRYPTO_ALIGN(AES_BLOCK_LENGTH) + const uint8_t c0_[AES_BLOCK_LENGTH] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d, + 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 }; + static CRYPTO_ALIGN(AES_BLOCK_LENGTH) + const uint8_t c1_[AES_BLOCK_LENGTH] = { 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1, + 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd }; + + const aes_block_t c0 = AES_BLOCK_LOAD(c0_); + const aes_block_t c1 = AES_BLOCK_LOAD(c1_); + const aes_block_t k0 = AES_BLOCK_LOAD(key); + const aes_block_t k1 = AES_BLOCK_LOAD(key + AES_BLOCK_LENGTH); + const aes_block_t n0 = AES_BLOCK_LOAD(nonce); + const aes_block_t n1 = AES_BLOCK_LOAD(nonce + AES_BLOCK_LENGTH); + const aes_block_t k0_n0 = AES_BLOCK_XOR(k0, n0); + const aes_block_t k1_n1 = AES_BLOCK_XOR(k1, n1); + int i; + + state[0] = k0_n0; + state[1] = k1_n1; + state[2] = c1; + state[3] = c0; + state[4] = AES_BLOCK_XOR(k0, c0); + state[5] = AES_BLOCK_XOR(k1, c1); + for (i = 0; i < 4; i++) { + aegis256_update(state, k0); + aegis256_update(state, k1); + aegis256_update(state, k0_n0); + aegis256_update(state, k1_n1); + } +} + +static int +aegis256_mac(uint8_t *mac, size_t maclen, size_t adlen, size_t mlen, aes_block_t *const state) +{ + aes_block_t tmp; + int i; + + tmp = AES_BLOCK_LOAD_64x2(((uint64_t) mlen) << 3, ((uint64_t) adlen) << 3); + tmp = AES_BLOCK_XOR(tmp, state[3]); + + for (i = 0; i < 7; i++) { + aegis256_update(state, tmp); + } + + if (maclen == 16) { + tmp = AES_BLOCK_XOR(state[5], state[4]); + tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[3], state[2])); + tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[1], state[0])); + AES_BLOCK_STORE(mac, tmp); + } else if (maclen == 32) { + tmp = AES_BLOCK_XOR(AES_BLOCK_XOR(state[2], state[1]), state[0]); + AES_BLOCK_STORE(mac, tmp); + tmp = AES_BLOCK_XOR(AES_BLOCK_XOR(state[5], state[4]), state[3]); + AES_BLOCK_STORE(mac + 16, tmp); + } else { + memset(mac, 0, maclen); + return -1; + } + return 0; +} + +static inline void +aegis256_absorb(const uint8_t *const src, aes_block_t *const state) +{ + aes_block_t msg; + + msg = AES_BLOCK_LOAD(src); + aegis256_update(state, msg); +} + +static inline void +aegis256_absorb2(const uint8_t *const src, aes_block_t *const state) +{ + aes_block_t msg, msg2; + + msg = AES_BLOCK_LOAD(src + 0 * AES_BLOCK_LENGTH); + msg2 = AES_BLOCK_LOAD(src + 1 * AES_BLOCK_LENGTH); + aegis256_update(state, msg); + aegis256_update(state, msg2); +} + +static void +aegis256_enc(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state) +{ + aes_block_t msg; + aes_block_t tmp; + + msg = AES_BLOCK_LOAD(src); + tmp = AES_BLOCK_XOR(msg, state[5]); + tmp = AES_BLOCK_XOR(tmp, state[4]); + tmp = AES_BLOCK_XOR(tmp, state[1]); + tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_AND(state[2], state[3])); + AES_BLOCK_STORE(dst, tmp); + + aegis256_update(state, msg); +} + +static void +aegis256_dec(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state) +{ + aes_block_t msg; + + msg = AES_BLOCK_LOAD(src); + msg = AES_BLOCK_XOR(msg, state[5]); + msg = AES_BLOCK_XOR(msg, state[4]); + msg = AES_BLOCK_XOR(msg, state[1]); + msg = AES_BLOCK_XOR(msg, AES_BLOCK_AND(state[2], state[3])); + AES_BLOCK_STORE(dst, msg); + + aegis256_update(state, msg); +} + +static void +aegis256_declast(uint8_t *const dst, const uint8_t *const src, size_t len, aes_block_t *const state) +{ + uint8_t pad[RATE]; + aes_block_t msg; + + memset(pad, 0, sizeof pad); + memcpy(pad, src, len); + + msg = AES_BLOCK_LOAD(pad); + msg = AES_BLOCK_XOR(msg, state[5]); + msg = AES_BLOCK_XOR(msg, state[4]); + msg = AES_BLOCK_XOR(msg, state[1]); + msg = AES_BLOCK_XOR(msg, AES_BLOCK_AND(state[2], state[3])); + AES_BLOCK_STORE(pad, msg); + + memset(pad + len, 0, sizeof pad - len); + memcpy(dst, pad, len); + + msg = AES_BLOCK_LOAD(pad); + + aegis256_update(state, msg); +} + +static int +encrypt_detached(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen, + const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k) +{ + aes_block_t state[6]; + CRYPTO_ALIGN(RATE) uint8_t src[RATE]; + CRYPTO_ALIGN(RATE) uint8_t dst[RATE]; + size_t i; + + aegis256_init(k, npub, state); + + for (i = 0; i + 2 * RATE <= adlen; i += 2 * RATE) { + aegis256_absorb2(ad + i, state); + } + for (; i + RATE <= adlen; i += RATE) { + aegis256_absorb(ad + i, state); + } + if (adlen % RATE) { + memset(src, 0, RATE); + memcpy(src, ad + i, adlen % RATE); + aegis256_absorb(src, state); + } + for (i = 0; i + RATE <= mlen; i += RATE) { + aegis256_enc(c + i, m + i, state); + } + if (mlen % RATE) { + memset(src, 0, RATE); + memcpy(src, m + i, mlen % RATE); + aegis256_enc(dst, src, state); + memcpy(c + i, dst, mlen % RATE); + } + + return aegis256_mac(mac, maclen, adlen, mlen, state); +} + +static int +decrypt_detached(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac, size_t maclen, + const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k) +{ + aes_block_t state[6]; + CRYPTO_ALIGN(RATE) uint8_t src[RATE]; + CRYPTO_ALIGN(RATE) uint8_t dst[RATE]; + CRYPTO_ALIGN(16) uint8_t computed_mac[32]; + const size_t mlen = clen; + size_t i; + int ret; + + aegis256_init(k, npub, state); + + for (i = 0; i + 2 * RATE <= adlen; i += 2 * RATE) { + aegis256_absorb2(ad + i, state); + } + for (; i + RATE <= adlen; i += RATE) { + aegis256_absorb(ad + i, state); + } + if (adlen % RATE) { + memset(src, 0, RATE); + memcpy(src, ad + i, adlen % RATE); + aegis256_absorb(src, state); + } + if (m != NULL) { + for (i = 0; i + RATE <= mlen; i += RATE) { + aegis256_dec(m + i, c + i, state); + } + } else { + for (i = 0; i + RATE <= mlen; i += RATE) { + aegis256_dec(dst, c + i, state); + } + } + if (mlen % RATE) { + if (m != NULL) { + aegis256_declast(m + i, c + i, mlen % RATE, state); + } else { + aegis256_declast(dst, c + i, mlen % RATE, state); + } + } + + COMPILER_ASSERT(sizeof computed_mac >= 32); + ret = -1; + if (aegis256_mac(computed_mac, maclen, adlen, mlen, state) == 0) { + if (maclen == 16) { + ret = crypto_verify_16(computed_mac, mac); + } else if (maclen == 32) { + ret = crypto_verify_32(computed_mac, mac); + } + } + if (ret != 0 && m != NULL) { + memset(m, 0, mlen); + } + return ret; +} diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.c b/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.c new file mode 100644 index 0000000000..38024d17ad --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.c @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include + +#include "core.h" +#include "crypto_aead_aegis256.h" +#include "crypto_verify_16.h" +#include "crypto_verify_32.h" +#include "export.h" +#include "utils.h" + +#include "private/common.h" + +#include "crypto_aead_aegis256.h" +#include "private/softaes.h" + +#if 1 + +#include "aegis256_soft.h" + +#define AES_BLOCK_LENGTH 16 + +typedef SoftAesBlock aes_block_t; +#define AES_BLOCK_XOR(A, B) softaes_block_xor((A), (B)) +#define AES_BLOCK_AND(A, B) softaes_block_and((A), (B)) +#define AES_BLOCK_LOAD(A) softaes_block_load(A) +#define AES_BLOCK_LOAD_64x2(A, B) softaes_block_load64x2((A), (B)) +#define AES_BLOCK_STORE(A, B) softaes_block_store((A), (B)) +#define AES_ENC(A, B) softaes_block_encrypt((A), (B)) + +static inline void +aegis256_update(aes_block_t *const state, const aes_block_t d) +{ + aes_block_t tmp; + + tmp = state[5]; + state[5] = AES_ENC(state[4], state[5]); + state[4] = AES_ENC(state[3], state[4]); + state[3] = AES_ENC(state[2], state[3]); + state[2] = AES_ENC(state[1], state[2]); + state[1] = AES_ENC(state[0], state[1]); + state[0] = AES_BLOCK_XOR(AES_ENC(tmp, state[0]), d); +} + +#include "aegis256_common.h" + +struct aegis256_implementation aegis256_soft_implementation = { SODIUM_C99(.encrypt_detached =) + encrypt_detached, + SODIUM_C99(.decrypt_detached =) + decrypt_detached }; + +#endif \ No newline at end of file diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.h b/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.h new file mode 100644 index 0000000000..c20198de3f --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.h @@ -0,0 +1,8 @@ +#ifndef aegis256_soft_H +#define aegis256_soft_H + +#include "implementations.h" + +extern struct aegis256_implementation aegis256_soft_implementation; + +#endif \ No newline at end of file diff --git a/libs/libsodium/src/crypto_aead/aegis256/implementations.h b/libs/libsodium/src/crypto_aead/aegis256/implementations.h new file mode 100644 index 0000000000..9efbf38763 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aegis256/implementations.h @@ -0,0 +1,17 @@ +#ifndef aegis256_implementations_H +#define aegis256_implementations_H + +#include +#include + +#include "crypto_aead_aegis256.h" + +typedef struct aegis256_implementation { + int (*encrypt_detached)(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen, + const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k); + int (*decrypt_detached)(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac, + size_t maclen, const uint8_t *ad, size_t adlen, const uint8_t *npub, + const uint8_t *k); +} aegis256_implementation; + +#endif diff --git a/libs/libsodium/src/crypto_aead/aes256gcm/aead_aes256gcm.c b/libs/libsodium/src/crypto_aead/aes256gcm/aead_aes256gcm.c new file mode 100644 index 0000000000..2946ba873b --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aes256gcm/aead_aes256gcm.c @@ -0,0 +1,157 @@ +#include +#include + +#include "crypto_aead_aes256gcm.h" +#include "private/common.h" +#include "randombytes.h" + +size_t +crypto_aead_aes256gcm_keybytes(void) +{ + return crypto_aead_aes256gcm_KEYBYTES; +} + +size_t +crypto_aead_aes256gcm_nsecbytes(void) +{ + return crypto_aead_aes256gcm_NSECBYTES; +} + +size_t +crypto_aead_aes256gcm_npubbytes(void) +{ + return crypto_aead_aes256gcm_NPUBBYTES; +} + +size_t +crypto_aead_aes256gcm_abytes(void) +{ + return crypto_aead_aes256gcm_ABYTES; +} + +size_t +crypto_aead_aes256gcm_statebytes(void) +{ + return (sizeof(crypto_aead_aes256gcm_state) + (size_t) 15U) & ~(size_t) 15U; +} + +size_t +crypto_aead_aes256gcm_messagebytes_max(void) +{ + return crypto_aead_aes256gcm_MESSAGEBYTES_MAX; +} + +void +crypto_aead_aes256gcm_keygen(unsigned char k[crypto_aead_aes256gcm_KEYBYTES]) +{ + randombytes_buf(k, crypto_aead_aes256gcm_KEYBYTES); +} + +#if !((defined(HAVE_ARMCRYPTO) && defined(__clang__) && defined(NATIVE_LITTLE_ENDIAN)) || \ + (defined(HAVE_TMMINTRIN_H) && defined(HAVE_WMMINTRIN_H))) + +#ifndef ENOSYS +#define ENOSYS ENXIO +#endif + +int +crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long mlen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *nsec, + const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p, + unsigned char *nsec, const unsigned char *c, + unsigned long long clen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + errno = ENOSYS; + return -1; +} + +int +crypto_aead_aes256gcm_is_available(void) +{ + return 0; +} + +#endif \ No newline at end of file diff --git a/libs/libsodium/src/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c b/libs/libsodium/src/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c index c0d8674af6..7d9cfd12e9 100644 --- a/libs/libsodium/src/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +++ b/libs/libsodium/src/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c @@ -1,17 +1,12 @@ - -/* - * AES256-GCM, based on the "Intel Carry-Less Multiplication Instruction and its Usage for Computing - * the GCM Mode" paper and reference code, using the aggregated reduction method. - * Originally adapted by Romain Dolbeau. - */ - #include +#include #include #include #include #include "core.h" #include "crypto_aead_aes256gcm.h" +#include "crypto_verify_16.h" #include "export.h" #include "private/common.h" #include "private/sse2_64_32.h" @@ -21,976 +16,803 @@ #if defined(HAVE_TMMINTRIN_H) && defined(HAVE_WMMINTRIN_H) -# ifdef __GNUC__ -# pragma GCC target("ssse3") -# pragma GCC target("aes") -# pragma GCC target("pclmul") +# ifdef __clang__ +# pragma clang attribute push(__attribute__((target("aes,avx,pclmul"))), apply_to = function) +# elif defined(__GNUC__) +# pragma GCC target("aes,avx,pclmul") # endif +#if !defined(_MSC_VER) || _MSC_VER < 1800 +#define __vectorcall +#endif + #include #include -#ifndef ENOSYS -# define ENOSYS ENXIO -#endif - -#if defined(__INTEL_COMPILER) || defined(_bswap64) -#elif defined(_MSC_VER) -# define _bswap64(a) _byteswap_uint64(a) -#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)) -# define _bswap64(a) __builtin_bswap64(a) -#else -static inline uint64_t -_bswap64(const uint64_t x) -{ - return - ((x << 56) & 0xFF00000000000000UL) | ((x << 40) & 0x00FF000000000000UL) | - ((x << 24) & 0x0000FF0000000000UL) | ((x << 8) & 0x000000FF00000000UL) | - ((x >> 8) & 0x00000000FF000000UL) | ((x >> 24) & 0x0000000000FF0000UL) | - ((x >> 40) & 0x000000000000FF00UL) | ((x >> 56) & 0x00000000000000FFUL); +#define ABYTES crypto_aead_aes256gcm_ABYTES +#define NPUBBYTES crypto_aead_aes256gcm_NPUBBYTES +#define KEYBYTES crypto_aead_aes256gcm_KEYBYTES + +#define PARALLEL_BLOCKS 7 +#undef USE_KARATSUBA_MULTIPLICATION + +typedef __m128i BlockVec; + +#define LOAD128(a) _mm_loadu_si128((const BlockVec *) (a)) +#define STORE128(a, b) _mm_storeu_si128((BlockVec *) (a), (b)) +#define AES_ENCRYPT(block_vec, rkey) _mm_aesenc_si128((block_vec), (rkey)) +#define AES_ENCRYPTLAST(block_vec, rkey) _mm_aesenclast_si128((block_vec), (rkey)) +#define AES_KEYGEN(block_vec, rc) _mm_aeskeygenassist_si128((block_vec), (rc)) +#define XOR128(a, b) _mm_xor_si128((a), (b)) +#define AND128(a, b) _mm_and_si128((a), (b)) +#define OR128(a, b) _mm_or_si128((a), (b)) +#define SET64x2(a, b) _mm_set_epi64x((uint64_t) (a), (uint64_t) (b)) +#define ZERO128 _mm_setzero_si128() +#define ONE128 SET64x2(0, 1) +#define ADD64x2(a, b) _mm_add_epi64((a), (b)) +#define SUB64x2(a, b) _mm_sub_epi64((a), (b)) +#define SHL64x2(a, b) _mm_slli_epi64((a), (b)) +#define SHR64x2(a, b) _mm_srli_epi64((a), (b)) +#define REV128(x) \ + _mm_shuffle_epi8((x), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) +#define SHUFFLE32x4(x, a, b, c, d) _mm_shuffle_epi32((x), _MM_SHUFFLE((d), (c), (b), (a))) +#define BYTESHL128(a, b) _mm_slli_si128(a, b) +#define BYTESHR128(a, b) _mm_srli_si128(a, b) +#define SHL128(a, b) OR128(SHL64x2((a), (b)), SHR64x2(BYTESHL128((a), 8), 64 - (b))) +#define CLMULLO128(a, b) _mm_clmulepi64_si128((a), (b), 0x00) +#define CLMULHI128(a, b) _mm_clmulepi64_si128((a), (b), 0x11) +#define CLMULLOHI128(a, b) _mm_clmulepi64_si128((a), (b), 0x10) +#define CLMULHILO128(a, b) _mm_clmulepi64_si128((a), (b), 0x01) +#define PREFETCH_READ(x) _mm_prefetch((x), _MM_HINT_T1) +#define PREFETCH_WRITE(x) _mm_prefetch((x), _MM_HINT_T1) + +#define ROUNDS 14 + +#define PC_COUNT (2 * PARALLEL_BLOCKS) + +typedef struct I256 { + BlockVec hi; + BlockVec lo; + BlockVec mid; +} I256; + +typedef BlockVec Precomp; + +typedef struct GHash { + BlockVec acc; +} GHash; + +typedef struct State { + BlockVec rkeys[ROUNDS + 1]; + Precomp hx[PC_COUNT]; +} State; + +static void __vectorcall expand256(const unsigned char key[KEYBYTES], BlockVec rkeys[1 + ROUNDS]) +{ + BlockVec t1, t2, s; + size_t i = 0; + +#define EXPAND_KEY_1(RC) \ + rkeys[i++] = t2; \ + s = AES_KEYGEN(t2, RC); \ + t1 = XOR128(t1, BYTESHL128(t1, 4)); \ + t1 = XOR128(t1, BYTESHL128(t1, 8)); \ + t1 = XOR128(t1, SHUFFLE32x4(s, 3, 3, 3, 3)); + +#define EXPAND_KEY_2(RC) \ + rkeys[i++] = t1; \ + s = AES_KEYGEN(t1, RC); \ + t2 = XOR128(t2, BYTESHL128(t2, 4)); \ + t2 = XOR128(t2, BYTESHL128(t2, 8)); \ + t2 = XOR128(t2, SHUFFLE32x4(s, 2, 2, 2, 2)); + + t1 = LOAD128(&key[0]); + t2 = LOAD128(&key[16]); + + rkeys[i++] = t1; + EXPAND_KEY_1(0x01); + EXPAND_KEY_2(0x01); + EXPAND_KEY_1(0x02); + EXPAND_KEY_2(0x02); + EXPAND_KEY_1(0x04); + EXPAND_KEY_2(0x04); + EXPAND_KEY_1(0x08); + EXPAND_KEY_2(0x08); + EXPAND_KEY_1(0x10); + EXPAND_KEY_2(0x10); + EXPAND_KEY_1(0x20); + EXPAND_KEY_2(0x20); + EXPAND_KEY_1(0x40); + rkeys[i++] = t1; } -#endif -typedef struct aes256gcm_state { - __m128i rkeys[16]; - unsigned char H[16]; -} aes256gcm_state; +/* Encrypt a single AES block */ static inline void -aesni_key256_expand(const unsigned char *key, __m128i * const rkeys) +encrypt(const State *st, unsigned char dst[16], const unsigned char src[16]) { - __m128i X0, X1, X2, X3; - int i = 0; - - X0 = _mm_loadu_si128((const __m128i *) &key[0]); - rkeys[i++] = X0; - - X2 = _mm_loadu_si128((const __m128i *) &key[16]); - rkeys[i++] = X2; - -#define EXPAND_KEY_1(S) do { \ - X1 = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(X2, (S)), 0xff); \ - X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X0), 0x10)); \ - X0 = _mm_xor_si128(X0, X3); \ - X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X0), 0x8c)); \ - X0 = _mm_xor_si128(_mm_xor_si128(X0, X3), X1); \ - rkeys[i++] = X0; \ -} while (0) - -#define EXPAND_KEY_2(S) do { \ - X1 = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(X0, (S)), 0xaa); \ - X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X2), 0x10)); \ - X2 = _mm_xor_si128(X2, X3); \ - X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X2), 0x8c)); \ - X2 = _mm_xor_si128(_mm_xor_si128(X2, X3), X1); \ - rkeys[i++] = X2; \ -} while (0) - - X3 = _mm_setzero_si128(); - EXPAND_KEY_1(0x01); EXPAND_KEY_2(0x01); - EXPAND_KEY_1(0x02); EXPAND_KEY_2(0x02); - EXPAND_KEY_1(0x04); EXPAND_KEY_2(0x04); - EXPAND_KEY_1(0x08); EXPAND_KEY_2(0x08); - EXPAND_KEY_1(0x10); EXPAND_KEY_2(0x10); - EXPAND_KEY_1(0x20); EXPAND_KEY_2(0x20); - EXPAND_KEY_1(0x40); + BlockVec t; + + size_t i; + + t = XOR128(LOAD128(src), st->rkeys[0]); + for (i = 1; i < ROUNDS; i++) { + t = AES_ENCRYPT(t, st->rkeys[i]); + } + t = AES_ENCRYPTLAST(t, st->rkeys[ROUNDS]); + STORE128(dst, t); } -/** single, by-the-book AES encryption with AES-NI */ -static inline void -aesni_encrypt1(unsigned char *out, __m128i nv, const __m128i *rkeys) +/* Encrypt and add a single AES block */ + +static inline void __vectorcall encrypt_xor_block(const State *st, unsigned char dst[16], + const unsigned char src[16], + const BlockVec counter) { - __m128i temp = _mm_xor_si128(nv, rkeys[0]); - - temp = _mm_aesenc_si128(temp, rkeys[1]); - temp = _mm_aesenc_si128(temp, rkeys[2]); - temp = _mm_aesenc_si128(temp, rkeys[3]); - temp = _mm_aesenc_si128(temp, rkeys[4]); - temp = _mm_aesenc_si128(temp, rkeys[5]); - temp = _mm_aesenc_si128(temp, rkeys[6]); - temp = _mm_aesenc_si128(temp, rkeys[7]); - temp = _mm_aesenc_si128(temp, rkeys[8]); - temp = _mm_aesenc_si128(temp, rkeys[9]); - temp = _mm_aesenc_si128(temp, rkeys[10]); - temp = _mm_aesenc_si128(temp, rkeys[11]); - temp = _mm_aesenc_si128(temp, rkeys[12]); - temp = _mm_aesenc_si128(temp, rkeys[13]); - - temp = _mm_aesenclast_si128(temp, rkeys[14]); - _mm_storeu_si128((__m128i *) out, temp); -} + BlockVec ts; + size_t i; -/** multiple-blocks-at-once AES encryption with AES-NI ; - on Haswell, aesenc has a latency of 7 and a throughput of 1 - so the sequence of aesenc should be bubble-free if you - have at least 8 blocks. Let's build an arbitratry-sized - function */ -/* Step 1 : loading the nonce */ -/* load & increment the n vector (non-vectorized, unused for now) */ -#define NVDECLx(a) \ - __m128i nv##a - -#define NVx(a) \ - nv##a = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) n), pt); \ - n[3]++ - -/* Step 2 : define value in round one (xor with subkey #0, aka key) */ -#define TEMPDECLx(a) \ - __m128i temp##a - -#define TEMPx(a) \ - temp##a = _mm_xor_si128(nv##a, rkeys[0]) - -/* Step 3: one round of AES */ -#define AESENCx(a) \ - temp##a = _mm_aesenc_si128(temp##a, rkeys[roundctr]) - -/* Step 4: last round of AES */ -#define AESENCLASTx(a) \ - temp##a = _mm_aesenclast_si128(temp##a, rkeys[14]) - -/* Step 5: store result */ -#define STOREx(a) \ - _mm_storeu_si128((__m128i *) (out + (a * 16)), temp##a) - -/* all the MAKE* macros are for automatic explicit unrolling */ -#define MAKE4(X) \ - X(0); \ - X(1); \ - X(2); \ - X(3) - -#define MAKE8(X) \ - X(0); \ - X(1); \ - X(2); \ - X(3); \ - X(4); \ - X(5); \ - X(6); \ - X(7) - -#define COUNTER_INC2(N) (N)[3] += 2 - -/* create a function of unrolling N ; the MAKEN is the unrolling - macro, defined above. The N in MAKEN must match N, obviously. */ -#define FUNC(N, MAKEN) \ - static inline void aesni_encrypt##N(unsigned char *out, uint32_t *n, const __m128i *rkeys) \ - { \ - const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int roundctr; \ - MAKEN(NVDECLx); \ - MAKEN(TEMPDECLx); \ - \ - MAKEN(NVx); \ - MAKEN(TEMPx); \ - for (roundctr = 1; roundctr < 14; roundctr++) { \ - MAKEN(AESENCx); \ - } \ - MAKEN(AESENCLASTx); \ - MAKEN(STOREx); \ + ts = XOR128(counter, st->rkeys[0]); + for (i = 1; i < ROUNDS; i++) { + ts = AES_ENCRYPT(ts, st->rkeys[i]); } + ts = AES_ENCRYPTLAST(ts, st->rkeys[i]); + ts = XOR128(ts, LOAD128(src)); + STORE128(dst, ts); +} -FUNC(8, MAKE8) - -/* all GF(2^128) fnctions are by the book, meaning this one: - -*/ +/* Encrypt and add PARALLEL_BLOCKS AES blocks */ -static inline void -addmul(unsigned char *c, const unsigned char *a, unsigned int xlen, const unsigned char *b) +static inline void __vectorcall encrypt_xor_wide(const State *st, + unsigned char dst[16 * PARALLEL_BLOCKS], + const unsigned char src[16 * PARALLEL_BLOCKS], + const BlockVec counters[PARALLEL_BLOCKS]) { - const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - __m128i A, B, C; - __m128i tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; - __m128i tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17, tmp18; - __m128i tmp19, tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; - __m128i tmp28, tmp29, tmp30, tmp31, tmp32, tmp33, tmp34, tmp35, tmp36; - - if (xlen >= 16) { - A = _mm_loadu_si128((const __m128i *) a); - } else { - CRYPTO_ALIGN(16) unsigned char padded[16]; - unsigned int i; + BlockVec ts[PARALLEL_BLOCKS]; + size_t i, j; - memset(padded, 0, 16); - for (i = 0; i < xlen; i++) { - padded[i] = a[i]; + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = XOR128(counters[j], st->rkeys[0]); + } + for (i = 1; i < ROUNDS; i++) { + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = AES_ENCRYPT(ts[j], st->rkeys[i]); } - A = _mm_load_si128((const __m128i *) padded); } - A = _mm_shuffle_epi8(A, rev); - B = _mm_loadu_si128((const __m128i *) b); - C = _mm_loadu_si128((const __m128i *) c); - A = _mm_xor_si128(A, C); - tmp3 = _mm_clmulepi64_si128(A, B, 0x00); - tmp4 = _mm_clmulepi64_si128(A, B, 0x10); - tmp5 = _mm_clmulepi64_si128(A, B, 0x01); - tmp6 = _mm_clmulepi64_si128(A, B, 0x11); - tmp10 = _mm_xor_si128(tmp4, tmp5); - tmp13 = _mm_slli_si128(tmp10, 8); - tmp11 = _mm_srli_si128(tmp10, 8); - tmp15 = _mm_xor_si128(tmp3, tmp13); - tmp17 = _mm_xor_si128(tmp6, tmp11); - tmp7 = _mm_srli_epi32(tmp15, 31); - tmp8 = _mm_srli_epi32(tmp17, 31); - tmp16 = _mm_slli_epi32(tmp15, 1); - tmp18 = _mm_slli_epi32(tmp17, 1); - tmp9 = _mm_srli_si128(tmp7, 12); - tmp22 = _mm_slli_si128(tmp8, 4); - tmp25 = _mm_slli_si128(tmp7, 4); - tmp29 = _mm_or_si128(tmp16, tmp25); - tmp19 = _mm_or_si128(tmp18, tmp22); - tmp20 = _mm_or_si128(tmp19, tmp9); - tmp26 = _mm_slli_epi32(tmp29, 31); - tmp23 = _mm_slli_epi32(tmp29, 30); - tmp32 = _mm_slli_epi32(tmp29, 25); - tmp27 = _mm_xor_si128(tmp26, tmp23); - tmp28 = _mm_xor_si128(tmp27, tmp32); - tmp24 = _mm_srli_si128(tmp28, 4); - tmp33 = _mm_slli_si128(tmp28, 12); - tmp30 = _mm_xor_si128(tmp29, tmp33); - tmp2 = _mm_srli_epi32(tmp30, 1); - tmp12 = _mm_srli_epi32(tmp30, 2); - tmp14 = _mm_srli_epi32(tmp30, 7); - tmp34 = _mm_xor_si128(tmp2, tmp12); - tmp35 = _mm_xor_si128(tmp34, tmp14); - tmp36 = _mm_xor_si128(tmp35, tmp24); - tmp31 = _mm_xor_si128(tmp30, tmp36); - tmp21 = _mm_xor_si128(tmp20, tmp31); - _mm_storeu_si128((__m128i *) c, tmp21); + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = AES_ENCRYPTLAST(ts[j], st->rkeys[i]); + ts[j] = XOR128(ts[j], LOAD128(&src[16 * j])); + } + for (j = 0; j < PARALLEL_BLOCKS; j++) { + STORE128(&dst[16 * j], ts[j]); + } } -/* pure multiplication, for pre-computing powers of H */ -static inline __m128i -mulv(__m128i A, __m128i B) +/* Square a field element */ + +static inline I256 __vectorcall clsq128(const BlockVec x) { - __m128i tmp3 = _mm_clmulepi64_si128(A, B, 0x00); - __m128i tmp4 = _mm_clmulepi64_si128(A, B, 0x10); - __m128i tmp5 = _mm_clmulepi64_si128(A, B, 0x01); - __m128i tmp6 = _mm_clmulepi64_si128(A, B, 0x11); - __m128i tmp10 = _mm_xor_si128(tmp4, tmp5); - __m128i tmp13 = _mm_slli_si128(tmp10, 8); - __m128i tmp11 = _mm_srli_si128(tmp10, 8); - __m128i tmp15 = _mm_xor_si128(tmp3, tmp13); - __m128i tmp17 = _mm_xor_si128(tmp6, tmp11); - __m128i tmp7 = _mm_srli_epi32(tmp15, 31); - __m128i tmp8 = _mm_srli_epi32(tmp17, 31); - __m128i tmp16 = _mm_slli_epi32(tmp15, 1); - __m128i tmp18 = _mm_slli_epi32(tmp17, 1); - __m128i tmp9 = _mm_srli_si128(tmp7, 12); - __m128i tmp22 = _mm_slli_si128(tmp8, 4); - __m128i tmp25 = _mm_slli_si128(tmp7, 4); - __m128i tmp29 = _mm_or_si128(tmp16, tmp25); - __m128i tmp19 = _mm_or_si128(tmp18, tmp22); - __m128i tmp20 = _mm_or_si128(tmp19, tmp9); - __m128i tmp26 = _mm_slli_epi32(tmp29, 31); - __m128i tmp23 = _mm_slli_epi32(tmp29, 30); - __m128i tmp32 = _mm_slli_epi32(tmp29, 25); - __m128i tmp27 = _mm_xor_si128(tmp26, tmp23); - __m128i tmp28 = _mm_xor_si128(tmp27, tmp32); - __m128i tmp24 = _mm_srli_si128(tmp28, 4); - __m128i tmp33 = _mm_slli_si128(tmp28, 12); - __m128i tmp30 = _mm_xor_si128(tmp29, tmp33); - __m128i tmp2 = _mm_srli_epi32(tmp30, 1); - __m128i tmp12 = _mm_srli_epi32(tmp30, 2); - __m128i tmp14 = _mm_srli_epi32(tmp30, 7); - __m128i tmp34 = _mm_xor_si128(tmp2, tmp12); - __m128i tmp35 = _mm_xor_si128(tmp34, tmp14); - __m128i tmp36 = _mm_xor_si128(tmp35, tmp24); - __m128i tmp31 = _mm_xor_si128(tmp30, tmp36); - __m128i C = _mm_xor_si128(tmp20, tmp31); - - return C; + const BlockVec r_lo = CLMULLO128(x, x); + const BlockVec r_hi = CLMULHI128(x, x); + + return (I256) { + SODIUM_C99(.hi =) r_hi, + SODIUM_C99(.lo =) r_lo, + SODIUM_C99(.mid =) ZERO128, + }; } -/* 4 multiply-accumulate at once; again - - for the Aggregated Reduction Method & sample code. - Algorithm by Krzysztof Jankowski, Pierre Laurent - Intel */ - -#define RED_DECL(a) __m128i H##a##_X##a##_lo, H##a##_X##a##_hi, tmp##a, tmp##a##B -#define RED_SHUFFLE(a) X##a = _mm_shuffle_epi8(X##a, rev) -#define RED_MUL_LOW(a) H##a##_X##a##_lo = _mm_clmulepi64_si128(H##a, X##a, 0x00) -#define RED_MUL_HIGH(a) H##a##_X##a##_hi = _mm_clmulepi64_si128(H##a, X##a, 0x11) -#define RED_MUL_MID(a) \ - tmp##a = _mm_shuffle_epi32(H##a, 0x4e); \ - tmp##a##B = _mm_shuffle_epi32(X##a, 0x4e); \ - tmp##a = _mm_xor_si128(tmp##a, H##a); \ - tmp##a##B = _mm_xor_si128(tmp##a##B, X##a); \ - tmp##a = _mm_clmulepi64_si128(tmp##a, tmp##a##B, 0x00) - -#define MULREDUCE4(rev, H0_, H1_, H2_, H3_, X0_, X1_, X2_, X3_, accv) \ -do { \ - MAKE4(RED_DECL); \ - __m128i lo, hi; \ - __m128i tmp8, tmp9; \ - __m128i H0 = H0_; \ - __m128i H1 = H1_; \ - __m128i H2 = H2_; \ - __m128i H3 = H3_; \ - __m128i X0 = X0_; \ - __m128i X1 = X1_; \ - __m128i X2 = X2_; \ - __m128i X3 = X3_; \ -\ -/* byte-revert the inputs & xor the first one into the accumulator */ \ -\ - MAKE4(RED_SHUFFLE); \ - X3 = _mm_xor_si128(X3, accv); \ -\ -/* 4 low H*X (x0*h0) */ \ -\ - MAKE4(RED_MUL_LOW); \ - lo = _mm_xor_si128(H0_X0_lo, H1_X1_lo); \ - lo = _mm_xor_si128(lo, H2_X2_lo); \ - lo = _mm_xor_si128(lo, H3_X3_lo); \ -\ -/* 4 high H*X (x1*h1) */ \ -\ - MAKE4(RED_MUL_HIGH); \ - hi = _mm_xor_si128(H0_X0_hi, H1_X1_hi); \ - hi = _mm_xor_si128(hi, H2_X2_hi); \ - hi = _mm_xor_si128(hi, H3_X3_hi); \ -\ -/* 4 middle H*X, using Karatsuba, i.e. \ - x1*h0+x0*h1 =(x1+x0)*(h1+h0)-x1*h1-x0*h0 \ - we already have all x1y1 & x0y0 (accumulated in hi & lo) \ - (0 is low half and 1 is high half) \ - */ \ -/* permute the high and low 64 bits in H1 & X1, \ - so create (h0,h1) from (h1,h0) and (x0,x1) from (x1,x0), \ - then compute (h0+h1,h1+h0) and (x0+x1,x1+x0), \ - and finally multiply \ - */ \ - MAKE4(RED_MUL_MID); \ -\ -/* substracts x1*h1 and x0*h0 */ \ - tmp0 = _mm_xor_si128(tmp0, lo); \ - tmp0 = _mm_xor_si128(tmp0, hi); \ - tmp0 = _mm_xor_si128(tmp1, tmp0); \ - tmp0 = _mm_xor_si128(tmp2, tmp0); \ - tmp0 = _mm_xor_si128(tmp3, tmp0);\ -\ - /* reduction */ \ - tmp0B = _mm_slli_si128(tmp0, 8); \ - tmp0 = _mm_srli_si128(tmp0, 8); \ - lo = _mm_xor_si128(tmp0B, lo); \ - hi = _mm_xor_si128(tmp0, hi); \ - tmp3 = lo; \ - tmp2B = hi; \ - tmp3B = _mm_srli_epi32(tmp3, 31); \ - tmp8 = _mm_srli_epi32(tmp2B, 31); \ - tmp3 = _mm_slli_epi32(tmp3, 1); \ - tmp2B = _mm_slli_epi32(tmp2B, 1); \ - tmp9 = _mm_srli_si128(tmp3B, 12); \ - tmp8 = _mm_slli_si128(tmp8, 4); \ - tmp3B = _mm_slli_si128(tmp3B, 4); \ - tmp3 = _mm_or_si128(tmp3, tmp3B); \ - tmp2B = _mm_or_si128(tmp2B, tmp8); \ - tmp2B = _mm_or_si128(tmp2B, tmp9); \ - tmp3B = _mm_slli_epi32(tmp3, 31); \ - tmp8 = _mm_slli_epi32(tmp3, 30); \ - tmp9 = _mm_slli_epi32(tmp3, 25); \ - tmp3B = _mm_xor_si128(tmp3B, tmp8); \ - tmp3B = _mm_xor_si128(tmp3B, tmp9); \ - tmp8 = _mm_srli_si128(tmp3B, 4); \ - tmp3B = _mm_slli_si128(tmp3B, 12); \ - tmp3 = _mm_xor_si128(tmp3, tmp3B); \ - tmp2 = _mm_srli_epi32(tmp3, 1); \ - tmp0B = _mm_srli_epi32(tmp3, 2); \ - tmp1B = _mm_srli_epi32(tmp3, 7); \ - tmp2 = _mm_xor_si128(tmp2, tmp0B); \ - tmp2 = _mm_xor_si128(tmp2, tmp1B); \ - tmp2 = _mm_xor_si128(tmp2, tmp8); \ - tmp3 = _mm_xor_si128(tmp3, tmp2); \ - tmp2B = _mm_xor_si128(tmp2B, tmp3); \ -\ - accv = tmp2B; \ -} while(0) - -#define XORx(a) \ - temp##a = _mm_xor_si128(temp##a, \ - _mm_loadu_si128((const __m128i *) (in + a * 16))) - -#define LOADx(a) \ - __m128i in##a = _mm_loadu_si128((const __m128i *) (in + a * 16)) - -/* full encrypt & checksum 8 blocks at once */ -#define aesni_encrypt8full(out_, n_, rkeys, in_, accum, hv_, h2v_, h3v_, h4v_, rev) \ -do { \ - unsigned char *out = out_; \ - uint32_t *n = n_; \ - const unsigned char *in = in_; \ - const __m128i hv = hv_; \ - const __m128i h2v = h2v_; \ - const __m128i h3v = h3v_; \ - const __m128i h4v = h4v_; \ - const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __m128i accv_; \ - int roundctr; \ - \ - MAKE8(NVDECLx); \ - MAKE8(TEMPDECLx); \ - MAKE8(NVx); \ - MAKE8(TEMPx); \ - for (roundctr = 1; roundctr < 14; roundctr++) { \ - MAKE8(AESENCx); \ - } \ - MAKE8(AESENCLASTx); \ - MAKE8(XORx); \ - MAKE8(STOREx); \ - accv_ = _mm_load_si128((const __m128i *) accum); \ - MULREDUCE4(rev, hv, h2v, h3v, h4v, temp3, temp2, temp1, temp0, accv_); \ - MULREDUCE4(rev, hv, h2v, h3v, h4v, temp7, temp6, temp5, temp4, accv_); \ - _mm_store_si128((__m128i *) accum, accv_); \ -} while(0) - -/* checksum 8 blocks at once */ -#define aesni_addmul8full(in_, accum, hv_, h2v_, h3v_, h4v_, rev) \ -do { \ - const unsigned char *in = in_; \ - const __m128i hv = hv_; \ - const __m128i h2v = h2v_; \ - const __m128i h3v = h3v_; \ - const __m128i h4v = h4v_; \ - __m128i accv_; \ - \ - MAKE8(LOADx); \ - accv_ = _mm_load_si128((const __m128i *) accum); \ - MULREDUCE4(rev, hv, h2v, h3v, h4v, in3, in2, in1, in0, accv_); \ - MULREDUCE4(rev, hv, h2v, h3v, h4v, in7, in6, in5, in4, accv_); \ - _mm_store_si128((__m128i *) accum, accv_); \ -} while(0) - -/* decrypt 8 blocks at once */ -#define aesni_decrypt8full(out_, n_, rkeys, in_) \ -do { \ - unsigned char *out = out_; \ - uint32_t *n = n_; \ - const unsigned char *in = in_; \ - const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int roundctr; \ -\ - MAKE8(NVDECLx); \ - MAKE8(TEMPDECLx); \ - MAKE8(NVx); \ - MAKE8(TEMPx); \ - for (roundctr = 1; roundctr < 14; roundctr++) { \ - MAKE8(AESENCx); \ - } \ - MAKE8(AESENCLASTx); \ - MAKE8(XORx); \ - MAKE8(STOREx); \ -} while(0) +/* Multiply two field elements -- Textbook multiplication is faster than Karatsuba on some recent + * CPUs */ -int -crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *ctx_, - const unsigned char *k) +static inline I256 __vectorcall clmul128(const BlockVec x, const BlockVec y) +{ +#ifdef USE_KARATSUBA_MULTIPLICATION + const BlockVec x_hi = BYTESHR128(x, 8); + const BlockVec y_hi = BYTESHR128(y, 8); + const BlockVec r_lo = CLMULLO128(x, y); + const BlockVec r_hi = CLMULHI128(x, y); + const BlockVec r_mid = XOR128(CLMULLO128(XOR128(x, x_hi), XOR128(y, y_hi)), XOR128(r_lo, r_hi)); + + return (I256) { + SODIUM_C99(.hi =) r_hi, + SODIUM_C99(.lo =) r_lo, + SODIUM_C99(.mid =) r_mid, + }; +#else + const BlockVec r_hi = CLMULHI128(x, y); + const BlockVec r_lo = CLMULLO128(x, y); + const BlockVec r_mid = XOR128(CLMULHILO128(x, y), CLMULLOHI128(x, y)); + + return (I256) { + SODIUM_C99(.hi =) r_hi, + SODIUM_C99(.lo =) r_lo, + SODIUM_C99(.mid =) r_mid, + }; +#endif +} + +/* Merge the middle word and reduce a field element */ + +static inline BlockVec __vectorcall gcm_reduce(const I256 x) { - aes256gcm_state *ctx = (aes256gcm_state *) (void *) ctx_; - unsigned char *H = ctx->H; - __m128i *rkeys = ctx->rkeys; - __m128i zero = _mm_setzero_si128(); + const BlockVec hi = XOR128(x.hi, BYTESHR128(x.mid, 8)); + const BlockVec lo = XOR128(x.lo, BYTESHL128(x.mid, 8)); - COMPILER_ASSERT((sizeof *ctx_) >= (sizeof *ctx)); - aesni_key256_expand(k, rkeys); - aesni_encrypt1(H, zero, rkeys); + const BlockVec p64 = SET64x2(0, 0xc200000000000000); + const BlockVec a = CLMULLO128(lo, p64); + const BlockVec b = XOR128(SHUFFLE32x4(lo, 2, 3, 0, 1), a); + const BlockVec c = CLMULLO128(b, p64); + const BlockVec d = XOR128(SHUFFLE32x4(b, 2, 3, 0, 1), c); - return 0; + return XOR128(d, hi); } -int -crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, - unsigned char *mac, unsigned long long *maclen_p, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const crypto_aead_aes256gcm_state *ctx_) +/* Precompute powers of H from `from` to `to` */ + +static inline void __vectorcall precomp(Precomp hx[PC_COUNT], const size_t from, const size_t to) { - const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const aes256gcm_state *ctx = (const aes256gcm_state *) (const void *) ctx_; - const __m128i *rkeys = ctx->rkeys; - __m128i Hv, H2v, H3v, H4v, accv; - unsigned long long i, j; - unsigned long long adlen_rnd64 = adlen & ~63ULL; - unsigned long long mlen_rnd128 = mlen & ~127ULL; - CRYPTO_ALIGN(16) uint32_t n2[4]; - CRYPTO_ALIGN(16) unsigned char H[16]; - CRYPTO_ALIGN(16) unsigned char T[16]; - CRYPTO_ALIGN(16) unsigned char accum[16]; - CRYPTO_ALIGN(16) unsigned char fb[16]; + const Precomp h = hx[0]; + size_t i; - (void) nsec; - memcpy(H, ctx->H, sizeof H); - if (mlen > crypto_aead_aes256gcm_MESSAGEBYTES_MAX) { - sodium_misuse(); /* LCOV_EXCL_LINE */ + for (i = from & ~1U; i < to; i += 2) { + hx[i] = gcm_reduce(clmul128(hx[i - 1], h)); + hx[i + 1] = gcm_reduce(clsq128(hx[i / 2])); } - memcpy(&n2[0], npub, 3 * 4); - n2[3] = 0x01000000; - aesni_encrypt1(T, _mm_load_si128((const __m128i *) n2), rkeys); - { - uint64_t x; - x = _bswap64((uint64_t) (8 * adlen)); - memcpy(&fb[0], &x, sizeof x); - x = _bswap64((uint64_t) (8 * mlen)); - memcpy(&fb[8], &x, sizeof x); +} + +/* Precompute powers of H given a key and a block count */ + +static void __vectorcall precomp_for_block_count(Precomp hx[PC_COUNT], + const unsigned char gh_key[16], + const size_t block_count) +{ + const BlockVec h0 = REV128(LOAD128(gh_key)); + BlockVec carry = SET64x2(0xc200000000000000, 1); + BlockVec mask = SUB64x2(ZERO128, SHR64x2(h0, 63)); + BlockVec h0_shifted; + BlockVec h; + + mask = SHUFFLE32x4(mask, 3, 3, 3, 3); + carry = AND128(carry, mask); + h0_shifted = SHL128(h0, 1); + h = XOR128(h0_shifted, carry); + + hx[0] = h; + hx[1] = gcm_reduce(clsq128(hx[0])); + + if (block_count >= PC_COUNT) { + precomp(hx, 2, PC_COUNT); + } else { + precomp(hx, 2, block_count); } - /* we store H (and it's power) byte-reverted once and for all */ - Hv = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) H), rev); - _mm_store_si128((__m128i *) H, Hv); - H2v = mulv(Hv, Hv); - H3v = mulv(H2v, Hv); - H4v = mulv(H3v, Hv); - - accv = _mm_setzero_si128(); - /* unrolled by 4 GCM (by 8 doesn't improve using MULREDUCE4) */ - for (i = 0; i < adlen_rnd64; i += 64) { - __m128i X4_ = _mm_loadu_si128((const __m128i *) (ad + i + 0)); - __m128i X3_ = _mm_loadu_si128((const __m128i *) (ad + i + 16)); - __m128i X2_ = _mm_loadu_si128((const __m128i *) (ad + i + 32)); - __m128i X1_ = _mm_loadu_si128((const __m128i *) (ad + i + 48)); - MULREDUCE4(rev, Hv, H2v, H3v, H4v, X1_, X2_, X3_, X4_, accv); +} + +/* Initialize a GHash */ + +static inline void +gh_init(GHash *sth) +{ + sth->acc = ZERO128; +} + +static inline I256 __vectorcall gh_update0(const GHash *const sth, const unsigned char *const p, + const Precomp hn) +{ + const BlockVec m = REV128(LOAD128(p)); + return clmul128(XOR128(sth->acc, m), hn); +} + +static inline void __vectorcall gh_update(I256 *const u, const unsigned char *p, const Precomp hn) +{ + const BlockVec m = REV128(LOAD128(p)); + const I256 t = clmul128(m, hn); + *u = (I256) { SODIUM_C99(.hi =) XOR128(u->hi, t.hi), SODIUM_C99(.lo =) XOR128(u->lo, t.lo), + SODIUM_C99(.mid =) XOR128(u->mid, t.mid) }; +} + +/* Absorb ad_len bytes of associated data. There has to be no partial block. */ + +static inline void +gh_ad_blocks(const State *st, GHash *sth, const unsigned char *ad, size_t ad_len) +{ + size_t i; + + i = (size_t) 0U; + for (; i + PC_COUNT * 16 <= ad_len; i += PC_COUNT * 16) { + I256 u = gh_update0(sth, ad + i, st->hx[PC_COUNT - 1 - 0]); + size_t j; + + for (j = 1; j < PC_COUNT; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[PC_COUNT - 1 - j]); + } + sth->acc = gcm_reduce(u); } - _mm_store_si128((__m128i *) accum, accv); + for (; i + PC_COUNT * 16 / 2 <= ad_len; i += PC_COUNT * 16 / 2) { + I256 u = gh_update0(sth, ad + i, st->hx[PC_COUNT / 2 - 1 - 0]); + size_t j; - /* GCM remainder loop */ - for (i = adlen_rnd64; i < adlen; i += 16) { - unsigned int blocklen = 16; + for (j = 1; j < PC_COUNT / 2; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[PC_COUNT / 2 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + for (; i + 4 * 16 <= ad_len; i += 4 * 16) { + size_t j; + I256 u = gh_update0(sth, ad + i, st->hx[4 - 1 - 0]); - if (i + (unsigned long long) blocklen > adlen) { - blocklen = (unsigned int) (adlen - i); + for (j = 1; j < 4; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[4 - 1 - j]); } - addmul(accum, ad + i, blocklen, H); + sth->acc = gcm_reduce(u); } + for (; i + 2 * 16 <= ad_len; i += 2 * 16) { + size_t j; + I256 u = gh_update0(sth, ad + i, st->hx[2 - 1 - 0]); -/* this only does 8 full blocks, so no fancy bounds checking is necessary*/ -#define LOOPRND128 \ - do { \ - const int iter = 8; \ - const int lb = iter * 16; \ - \ - for (i = 0; i < mlen_rnd128; i += lb) { \ - aesni_encrypt8full(c + i, n2, rkeys, m + i, accum, Hv, H2v, H3v, H4v, rev); \ - } \ - } while(0) - -/* remainder loop, with the slower GCM update to accommodate partial blocks */ -#define LOOPRMD128 \ - do { \ - const int iter = 8; \ - const int lb = iter * 16; \ - \ - for (i = mlen_rnd128; i < mlen; i += lb) { \ - CRYPTO_ALIGN(16) unsigned char outni[8 * 16]; \ - unsigned long long mj = lb; \ - \ - aesni_encrypt8(outni, n2, rkeys); \ - if ((i + mj) >= mlen) { \ - mj = mlen - i; \ - } \ - for (j = 0; j < mj; j++) { \ - c[i + j] = m[i + j] ^ outni[j]; \ - } \ - for (j = 0; j < mj; j += 16) { \ - unsigned int bl = 16; \ - \ - if (j + (unsigned long long) bl >= mj) { \ - bl = (unsigned int) (mj - j); \ - } \ - addmul(accum, c + i + j, bl, H); \ - } \ - } \ - } while(0) - - n2[3] &= 0x00ffffff; - COUNTER_INC2(n2); - LOOPRND128; - LOOPRMD128; - - addmul(accum, fb, 16, H); - - for (i = 0; i < 16; ++i) { - mac[i] = T[i] ^ accum[15 - i]; + for (j = 1; j < 2; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[2 - 1 - j]); + } + sth->acc = gcm_reduce(u); } - if (maclen_p != NULL) { - *maclen_p = 16; + if (i < ad_len) { + I256 u = gh_update0(sth, ad + i, st->hx[0]); + sth->acc = gcm_reduce(u); } - return 0; } -int -crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const crypto_aead_aes256gcm_state *ctx_) +/* Increment counters */ + +static inline BlockVec __vectorcall incr_counters(BlockVec rev_counters[], BlockVec counter, + const size_t n) { - int ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, - c + mlen, NULL, - m, mlen, - ad, adlen, - nsec, npub, ctx_); - if (clen_p != NULL) { - *clen_p = mlen + crypto_aead_aes256gcm_ABYTES; + size_t i; + + const BlockVec one = ONE128; + for (i = 0; i < n; i++) { + rev_counters[i] = REV128(counter); + counter = ADD64x2(counter, one); } - return ret; + return counter; } -int -crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *mac, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, - const crypto_aead_aes256gcm_state *ctx_) +/* Compute the number of required blocks to encrypt and authenticate `ad_len` of associated data, + * and `m_len` of encrypted bytes. Return `0` if limits would be exceeded.*/ + +static inline size_t +required_blocks(const size_t ad_len, const size_t m_len) { - const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - const aes256gcm_state *ctx = (const aes256gcm_state *) (const void *) ctx_; - const __m128i *rkeys = ctx->rkeys; - __m128i Hv, H2v, H3v, H4v, accv; - unsigned long long i, j; - unsigned long long adlen_rnd64 = adlen & ~63ULL; - unsigned long long mlen; - unsigned long long mlen_rnd128; - CRYPTO_ALIGN(16) uint32_t n2[4]; - CRYPTO_ALIGN(16) unsigned char H[16]; - CRYPTO_ALIGN(16) unsigned char T[16]; - CRYPTO_ALIGN(16) unsigned char accum[16]; - CRYPTO_ALIGN(16) unsigned char fb[16]; + const size_t ad_blocks = (ad_len + 15) / 16; + const size_t m_blocks = (m_len + 15) / 16; - (void) nsec; - if (clen > crypto_aead_aes256gcm_MESSAGEBYTES_MAX) { - sodium_misuse(); /* LCOV_EXCL_LINE */ + if (ad_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 || + m_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 || ad_len < ad_blocks || m_len < m_blocks || + m_blocks >= (1ULL << 32) - 2) { + return 0; } - mlen = clen; - - memcpy(&n2[0], npub, 3 * 4); - n2[3] = 0x01000000; - aesni_encrypt1(T, _mm_load_si128((const __m128i *) n2), rkeys); - - { - uint64_t x; - x = _bswap64((uint64_t)(8 * adlen)); - memcpy(&fb[0], &x, sizeof x); - x = _bswap64((uint64_t)(8 * mlen)); - memcpy(&fb[8], &x, sizeof x); + return ad_blocks + m_blocks + 1; +} + +/* Generic AES-GCM encryption. "Generic" as it can handle arbitrary input sizes, +unlike a length-limited version that would precompute all the required powers of H */ + +static void +aes_gcm_encrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst, + const unsigned char *src, size_t src_len, const unsigned char *ad, + size_t ad_len, unsigned char counter_[16]) +{ + CRYPTO_ALIGN(32) I256 u; + CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16]; + const BlockVec one = ONE128; + BlockVec final_block; + BlockVec rev_counters[PARALLEL_BLOCKS]; + BlockVec counter; + size_t i; + size_t j; + size_t left; + size_t pi; + + COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0); + + /* Associated data */ + + if (ad != NULL && ad_len != 0) { + gh_ad_blocks(st, sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, sth, pad, sizeof pad); + } } - memcpy(H, ctx->H, sizeof H); - Hv = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) H), rev); - _mm_store_si128((__m128i *) H, Hv); - H2v = mulv(Hv, Hv); - H3v = mulv(H2v, Hv); - H4v = mulv(H3v, Hv); - - accv = _mm_setzero_si128(); - for (i = 0; i < adlen_rnd64; i += 64) { - __m128i X4_ = _mm_loadu_si128((const __m128i *) (ad + i + 0)); - __m128i X3_ = _mm_loadu_si128((const __m128i *) (ad + i + 16)); - __m128i X2_ = _mm_loadu_si128((const __m128i *) (ad + i + 32)); - __m128i X1_ = _mm_loadu_si128((const __m128i *) (ad + i + 48)); - MULREDUCE4(rev, Hv, H2v, H3v, H4v, X1_, X2_, X3_, X4_, accv); + /* Encrypted data */ + + counter = REV128(LOAD128(counter_)); + i = 0; + + /* 2*PARALLEL_BLOCKS aggregation */ + + if (src_len - i >= 2 * PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; + + for (; i + 2 * PARALLEL_BLOCKS * 16 <= src_len; i += 2 * PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + PREFETCH_READ(src + i + PARALLEL_BLOCKS * 16); +#if PARALLEL_BLOCKS >= 64 / 16 + PREFETCH_READ(src + i + PARALLEL_BLOCKS * 16 + 64); +#endif + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[2 * PARALLEL_BLOCKS - 1 - j]); + } + + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i + PARALLEL_BLOCKS * 16, src + i + PARALLEL_BLOCKS * 16, + rev_counters); + + PREFETCH_READ(src + i + 2 * PARALLEL_BLOCKS * 16); +#if PARALLEL_BLOCKS >= 64 / 16 + PREFETCH_READ(src + i + 2 * PARALLEL_BLOCKS * 16 + 64); +#endif + pi = i; + for (j = 0; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); } - _mm_store_si128((__m128i *) accum, accv); - for (i = adlen_rnd64; i < adlen; i += 16) { - unsigned int blocklen = 16; - if (i + (unsigned long long) blocklen > adlen) { - blocklen = (unsigned int) (adlen - i); + /* PARALLEL_BLOCKS aggregation */ + + if (src_len - i >= PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; + + for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); } - addmul(accum, ad + i, blocklen, H); + sth->acc = gcm_reduce(u); } - mlen_rnd128 = mlen & ~127ULL; - -#define LOOPACCUMDRND128 \ - do { \ - const int iter = 8; \ - const int lb = iter * 16; \ - for (i = 0; i < mlen_rnd128; i += lb) { \ - aesni_addmul8full(c + i, accum, Hv, H2v, H3v, H4v, rev); \ - } \ - } while(0) - -#define LOOPDRND128 \ - do { \ - const int iter = 8; \ - const int lb = iter * 16; \ - \ - for (i = 0; i < mlen_rnd128; i += lb) { \ - aesni_decrypt8full(m + i, n2, rkeys, c + i); \ - } \ - } while(0) - -#define LOOPACCUMDRMD128 \ - do { \ - const int iter = 8; \ - const int lb = iter * 16; \ - \ - for (i = mlen_rnd128; i < mlen; i += lb) { \ - unsigned long long mj = lb; \ - \ - if ((i + mj) >= mlen) { \ - mj = mlen - i; \ - } \ - for (j = 0; j < mj; j += 16) { \ - unsigned int bl = 16; \ - \ - if (j + (unsigned long long) bl >= mj) { \ - bl = (unsigned int) (mj - j); \ - } \ - addmul(accum, c + i + j, bl, H); \ - } \ - } \ - } while(0) - -#define LOOPDRMD128 \ - do { \ - const int iter = 8; \ - const int lb = iter * 16; \ - \ - for (i = mlen_rnd128; i < mlen; i += lb) { \ - CRYPTO_ALIGN(16) unsigned char outni[8 * 16]; \ - unsigned long long mj = lb; \ - \ - if ((i + mj) >= mlen) { \ - mj = mlen - i; \ - } \ - aesni_encrypt8(outni, n2, rkeys); \ - for (j = 0; j < mj; j++) { \ - m[i + j] = c[i + j] ^ outni[j]; \ - } \ - } \ - } while(0) - - n2[3] &= 0x00ffffff; - - COUNTER_INC2(n2); - LOOPACCUMDRND128; - LOOPACCUMDRMD128; - addmul(accum, fb, 16, H); - { - unsigned char d = 0; - - for (i = 0; i < 16; i++) { - d |= (mac[i] ^ (T[i] ^ accum[15 - i])); + /* 4-blocks aggregation */ + + for (; i + 4 * 16 <= src_len; i += 4 * 16) { + counter = incr_counters(rev_counters, counter, 4); + for (j = 0; j < 4; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); } - if (d != 0) { - if (m != NULL) { - memset(m, 0, mlen); - } - return -1; + + u = gh_update0(sth, dst + i, st->hx[4 - 1 - 0]); + for (j = 1; j < 4; j += 1) { + gh_update(&u, dst + i + j * 16, st->hx[4 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + /* 2-blocks aggregation */ + + for (; i + 2 * 16 <= src_len; i += 2 * 16) { + counter = incr_counters(rev_counters, counter, 2); + for (j = 0; j < 2; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); } - if (m == NULL) { - return 0; + + u = gh_update0(sth, dst + i, st->hx[2 - 1 - 0]); + for (j = 1; j < 2; j += 1) { + gh_update(&u, dst + i + j * 16, st->hx[2 - 1 - j]); } + sth->acc = gcm_reduce(u); } - n2[3] = 0U; - COUNTER_INC2(n2); - LOOPDRND128; - LOOPDRMD128; - return 0; + /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the + full block authenticated along with the final block, hence < and not <= */ + + for (; i + 16 < src_len; i += 16) { + encrypt_xor_block(st, dst + i, src + i, REV128(counter)); + u = gh_update0(sth, dst + i, st->hx[1 - 1 - 0]); + sth->acc = gcm_reduce(u); + counter = ADD64x2(counter, one); + } + + /* Authenticate both the last block of the message and the final block */ + + final_block = REV128(SET64x2(ad_len * 8, src_len * 8)); + STORE32_BE(counter_ + NPUBBYTES, 1); + encrypt(st, mac, counter_); + left = src_len - i; + if (left != 0) { + for (j = 0; j < left; j++) { + last_blocks[j] = src[i + j]; + } + STORE128(last_blocks + 16, final_block); + encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter)); + for (; j < 16; j++) { + last_blocks[j] = 0; + } + for (j = 0; j < left; j++) { + dst[i + j] = last_blocks[j]; + } + gh_ad_blocks(st, sth, last_blocks, 32); + } else { + STORE128(last_blocks, final_block); + gh_ad_blocks(st, sth, last_blocks, 16); + } + STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc))); } -int -crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p, - unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, - const crypto_aead_aes256gcm_state *ctx_) +/* Generic AES-GCM decryption. "Generic" as it can handle arbitrary input sizes, +unlike a length-limited version that would precompute all the required powers of H */ + +static void +aes_gcm_decrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst, + const unsigned char *src, size_t src_len, const unsigned char *ad, + size_t ad_len, unsigned char counter_[16]) { - unsigned long long mlen = 0ULL; - int ret = -1; + CRYPTO_ALIGN(32) I256 u; + CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16]; + const BlockVec one = ONE128; + BlockVec final_block; + BlockVec rev_counters[PARALLEL_BLOCKS]; + BlockVec counter; + size_t i; + size_t j; + size_t left; + + COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0); - if (clen >= crypto_aead_aes256gcm_ABYTES) { - ret = crypto_aead_aes256gcm_decrypt_detached_afternm - (m, nsec, c, clen - crypto_aead_aes256gcm_ABYTES, - c + clen - crypto_aead_aes256gcm_ABYTES, - ad, adlen, npub, ctx_); + /* Associated data */ + + if (ad != NULL && ad_len != 0) { + gh_ad_blocks(st, sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, sth, pad, sizeof pad); + } } - if (mlen_p != NULL) { - if (ret == 0) { - mlen = clen - crypto_aead_aes256gcm_ABYTES; + + /* Encrypted data */ + + counter = REV128(LOAD128(counter_)); + i = 0; + + /* 2*PARALLEL_BLOCKS aggregation */ + + while (i + 2 * PARALLEL_BLOCKS * 16 <= src_len) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + u = gh_update0(sth, src + i, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[2 * PARALLEL_BLOCKS - 1 - j]); } - *mlen_p = mlen; + + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + i += PARALLEL_BLOCKS * 16; + for (j = 0; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; } - return ret; -} -int -crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, - unsigned char *mac, - unsigned long long *maclen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k) -{ - CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx; + /* PARALLEL_BLOCKS aggregation */ - crypto_aead_aes256gcm_beforenm(&ctx, k); + for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); - return crypto_aead_aes256gcm_encrypt_detached_afternm - (c, mac, maclen_p, m, mlen, ad, adlen, nsec, npub, - (const crypto_aead_aes256gcm_state *) &ctx); -} + u = gh_update0(sth, src + i, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); -int -crypto_aead_aes256gcm_encrypt(unsigned char *c, - unsigned long long *clen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k) -{ - CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx; - int ret; + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + } - crypto_aead_aes256gcm_beforenm(&ctx, k); + /* 4-blocks aggregation */ - ret = crypto_aead_aes256gcm_encrypt_afternm - (c, clen_p, m, mlen, ad, adlen, nsec, npub, - (const crypto_aead_aes256gcm_state *) &ctx); - sodium_memzero(&ctx, sizeof ctx); + for (; i + 4 * 16 <= src_len; i += 4 * 16) { + counter = incr_counters(rev_counters, counter, 4); - return ret; -} + u = gh_update0(sth, src + i, st->hx[4 - 1 - 0]); + for (j = 1; j < 4; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[4 - 1 - j]); + } + sth->acc = gcm_reduce(u); -int -crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, - unsigned char *nsec, - const unsigned char *c, - unsigned long long clen, - const unsigned char *mac, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k) -{ - CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx; + for (j = 0; j < 4; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + } - crypto_aead_aes256gcm_beforenm(&ctx, k); + /* 2-blocks aggregation */ - return crypto_aead_aes256gcm_decrypt_detached_afternm - (m, nsec, c, clen, mac, ad, adlen, npub, - (const crypto_aead_aes256gcm_state *) &ctx); -} + for (; i + 2 * 16 <= src_len; i += 2 * 16) { + counter = incr_counters(rev_counters, counter, 2); -int -crypto_aead_aes256gcm_decrypt(unsigned char *m, - unsigned long long *mlen_p, - unsigned char *nsec, - const unsigned char *c, - unsigned long long clen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k) -{ - CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx; - int ret; + u = gh_update0(sth, src + i, st->hx[2 - 1 - 0]); + for (j = 1; j < 2; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[2 - 1 - j]); + } + sth->acc = gcm_reduce(u); - crypto_aead_aes256gcm_beforenm(&ctx, k); + for (j = 0; j < 2; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + } - ret = crypto_aead_aes256gcm_decrypt_afternm - (m, mlen_p, nsec, c, clen, ad, adlen, npub, - (const crypto_aead_aes256gcm_state *) &ctx); - sodium_memzero(&ctx, sizeof ctx); + /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the + full block authenticated along with the final block, hence < and not <= */ - return ret; + for (; i + 16 < src_len; i += 16) { + u = gh_update0(sth, src + i, st->hx[1 - 1 - 0]); + sth->acc = gcm_reduce(u); + encrypt_xor_block(st, dst + i, src + i, REV128(counter)); + counter = ADD64x2(counter, one); + } + + /* Authenticate both the last block of the message and the final block */ + + final_block = REV128(SET64x2(ad_len * 8, src_len * 8)); + STORE32_BE(counter_ + NPUBBYTES, 1); + encrypt(st, mac, counter_); + left = src_len - i; + if (left != 0) { + for (j = 0; j < left; j++) { + last_blocks[j] = src[i + j]; + } + for (; j < 16; j++) { + last_blocks[j] = 0; + } + STORE128(last_blocks + 16, final_block); + gh_ad_blocks(st, sth, last_blocks, 32); + encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter)); + for (j = 0; j < left; j++) { + dst[i + j] = last_blocks[j]; + } + } else { + STORE128(last_blocks, final_block); + gh_ad_blocks(st, sth, last_blocks, 16); + } + STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc))); } int -crypto_aead_aes256gcm_is_available(void) +crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k) { - return sodium_runtime_has_pclmul() & sodium_runtime_has_aesni(); -} + State *st = (State *) (void *) st_; + CRYPTO_ALIGN(16) unsigned char h[16]; -#else + COMPILER_ASSERT(sizeof *st_ >= sizeof *st); -int -crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, - unsigned char *mac, - unsigned long long *maclen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} + expand256(k, st->rkeys); + memset(h, 0, sizeof h); + encrypt(st, h, h); -int -crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const unsigned char *k) -{ - errno = ENOSYS; - return -1; -} + precomp_for_block_count(st->hx, h, PC_COUNT); -int -crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, - unsigned char *nsec, - const unsigned char *c, - unsigned long long clen, - const unsigned char *mac, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k) -{ - errno = ENOSYS; - return -1; + return 0; } int -crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, - unsigned char *nsec, const unsigned char *c, - unsigned long long clen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const unsigned char *k) -{ - errno = ENOSYS; - return -1; +crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long m_len_, const unsigned char *ad, + unsigned long long ad_len_, + const unsigned char *nsec, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + CRYPTO_ALIGN(16) unsigned char j[16]; + size_t gh_required_blocks; + const size_t ad_len = (size_t) ad_len_; + const size_t m_len = (size_t) m_len_; + + (void) nsec; + if (maclen_p != NULL) { + *maclen_p = 0; + } + if (ad_len_ > SODIUM_SIZE_MAX || m_len_ > SODIUM_SIZE_MAX) { + sodium_misuse(); + } + gh_required_blocks = required_blocks(ad_len, m_len); + if (gh_required_blocks == 0) { + memset(mac, 0xd0, ABYTES); + memset(c, 0, m_len); + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + aes_gcm_encrypt_generic(st, &sth, mac, c, m, m_len, ad, ad_len, j); + + if (maclen_p != NULL) { + *maclen_p = ABYTES; + } + return 0; } int -crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *ctx_, - const unsigned char *k) +crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, + unsigned long long m_len, const unsigned char *ad, + unsigned long long ad_len, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) { - errno = ENOSYS; - return -1; + const int ret = crypto_aead_aes256gcm_encrypt_detached(c, c + m_len, NULL, m, m_len, ad, ad_len, + nsec, npub, k); + if (clen_p != NULL) { + if (ret == 0) { + *clen_p = m_len + crypto_aead_aes256gcm_ABYTES; + } else { + *clen_p = 0; + } + } + return ret; } int -crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, - unsigned char *mac, unsigned long long *maclen_p, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const crypto_aead_aes256gcm_state *ctx_) +crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long m_len, const unsigned char *ad, + unsigned long long ad_len, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) { - errno = ENOSYS; - return -1; + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + int ret; + + PREFETCH_WRITE(c); + PREFETCH_READ(m); + PREFETCH_READ(ad); + + crypto_aead_aes256gcm_beforenm(&st, k); + ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, mac, maclen_p, m, m_len, ad, ad_len, + nsec, npub, &st); + sodium_memzero(&st, sizeof st); + + return ret; } int @@ -998,82 +820,196 @@ crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen const unsigned char *m, unsigned long long mlen, const unsigned char *ad, unsigned long long adlen, const unsigned char *nsec, const unsigned char *npub, - const crypto_aead_aes256gcm_state *ctx_) + const crypto_aead_aes256gcm_state *st_) { - errno = ENOSYS; - return -1; + int ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, c + mlen, NULL, m, mlen, ad, adlen, + nsec, npub, st_); + if (clen_p != NULL) { + *clen_p = mlen + crypto_aead_aes256gcm_ABYTES; + } + return ret; +} + +static int +crypto_aead_aes256gcm_verify_mac(unsigned char *nsec, const unsigned char *c, + unsigned long long c_len_, const unsigned char *mac, + const unsigned char *ad, unsigned long long ad_len_, + const unsigned char *npub, const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + BlockVec final_block; + CRYPTO_ALIGN(16) unsigned char j[16]; + CRYPTO_ALIGN(16) unsigned char computed_mac[16]; + CRYPTO_ALIGN(16) unsigned char last_block[16]; + size_t gh_required_blocks; + size_t left; + const size_t ad_len = (size_t) ad_len_; + const size_t c_len = (size_t) c_len_; + int ret; + + (void) nsec; + if (ad_len_ > SODIUM_SIZE_MAX || c_len_ > SODIUM_SIZE_MAX) { + sodium_misuse(); + } + gh_required_blocks = required_blocks(ad_len, c_len); + if (gh_required_blocks == 0) { + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + gh_ad_blocks(st, &sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, &sth, pad, sizeof pad); + } + + gh_ad_blocks(st, &sth, c, c_len & ~15); + left = c_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, c + c_len - left, left); + gh_ad_blocks(st, &sth, pad, sizeof pad); + } + final_block = REV128(SET64x2(ad_len * 8, c_len * 8)); + STORE32_BE(j + NPUBBYTES, 1); + encrypt(st, computed_mac, j); + STORE128(last_block, final_block); + gh_ad_blocks(st, &sth, last_block, 16); + STORE128(computed_mac, XOR128(LOAD128(computed_mac), REV128(sth.acc))); + + ret = crypto_verify_16(mac, computed_mac); + sodium_memzero(computed_mac, sizeof computed_mac); + + return ret; } int crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *mac, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, - const crypto_aead_aes256gcm_state *ctx_) -{ - errno = ENOSYS; - return -1; + const unsigned char *c, unsigned long long c_len_, + const unsigned char *mac, const unsigned char *ad, + unsigned long long ad_len_, + const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + CRYPTO_ALIGN(16) unsigned char j[16]; + unsigned char computed_mac[16]; + size_t gh_required_blocks; + const size_t ad_len = (size_t) ad_len_; + const size_t c_len = (size_t) c_len_; + const size_t m_len = c_len; + + (void) nsec; + if (ad_len_ > SODIUM_SIZE_MAX || c_len_ > SODIUM_SIZE_MAX) { + sodium_misuse(); + } + if (m == NULL) { + return crypto_aead_aes256gcm_verify_mac(nsec, c, c_len, mac, ad, ad_len, npub, st_); + } + gh_required_blocks = required_blocks(ad_len, m_len); + if (gh_required_blocks == 0) { + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + aes_gcm_decrypt_generic(st, &sth, computed_mac, m, c, m_len, ad, ad_len, j); + + if (crypto_verify_16(mac, computed_mac) != 0) { + sodium_memzero(computed_mac, sizeof computed_mac); + memset(m, 0xd0, m_len); + return -1; + } + return 0; } int crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p, - unsigned char *nsec, - const unsigned char *c, unsigned long long clen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *npub, - const crypto_aead_aes256gcm_state *ctx_) + unsigned char *nsec, const unsigned char *c, + unsigned long long clen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) { - errno = ENOSYS; - return -1; + unsigned long long mlen = 0ULL; + int ret = -1; + + if (clen >= ABYTES) { + ret = crypto_aead_aes256gcm_decrypt_detached_afternm( + m, nsec, c, clen - ABYTES, c + clen - ABYTES, ad, adlen, npub, st_); + } + if (mlen_p != NULL) { + if (ret == 0) { + mlen = clen - ABYTES; + } + *mlen_p = mlen; + } + return ret; } int -crypto_aead_aes256gcm_is_available(void) +crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const unsigned char *k) { - return 0; -} + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; -#endif + PREFETCH_WRITE(m); + PREFETCH_READ(c); + PREFETCH_READ(ad); -size_t -crypto_aead_aes256gcm_keybytes(void) -{ - return crypto_aead_aes256gcm_KEYBYTES; -} + crypto_aead_aes256gcm_beforenm(&st, k); -size_t -crypto_aead_aes256gcm_nsecbytes(void) -{ - return crypto_aead_aes256gcm_NSECBYTES; + return crypto_aead_aes256gcm_decrypt_detached_afternm( + m, nsec, c, clen, mac, ad, adlen, npub, (const crypto_aead_aes256gcm_state *) &st); } -size_t -crypto_aead_aes256gcm_npubbytes(void) +int +crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) { - return crypto_aead_aes256gcm_NPUBBYTES; -} + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + int ret; -size_t -crypto_aead_aes256gcm_abytes(void) -{ - return crypto_aead_aes256gcm_ABYTES; -} + PREFETCH_WRITE(m); + PREFETCH_READ(c); + PREFETCH_READ(ad); -size_t -crypto_aead_aes256gcm_statebytes(void) -{ - return (sizeof(crypto_aead_aes256gcm_state) + (size_t) 15U) & ~(size_t) 15U; -} + crypto_aead_aes256gcm_beforenm(&st, k); -size_t -crypto_aead_aes256gcm_messagebytes_max(void) -{ - return crypto_aead_aes256gcm_MESSAGEBYTES_MAX; + ret = crypto_aead_aes256gcm_decrypt_afternm(m, mlen_p, nsec, c, clen, ad, adlen, npub, + (const crypto_aead_aes256gcm_state *) &st); + sodium_memzero(&st, sizeof st); + + return ret; } -void -crypto_aead_aes256gcm_keygen(unsigned char k[crypto_aead_aes256gcm_KEYBYTES]) +int +crypto_aead_aes256gcm_is_available(void) { - randombytes_buf(k, crypto_aead_aes256gcm_KEYBYTES); + return sodium_runtime_has_pclmul() & sodium_runtime_has_aesni() & sodium_runtime_has_avx(); } + +#ifdef __clang__ +# pragma clang attribute pop +#endif + +#endif diff --git a/libs/libsodium/src/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c b/libs/libsodium/src/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c new file mode 100644 index 0000000000..8f9bba6d74 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c @@ -0,0 +1,1033 @@ +#include +#include +#include +#include +#include + +#include "core.h" +#include "crypto_aead_aes256gcm.h" +#include "crypto_verify_16.h" +#include "export.h" +#include "private/common.h" +#include "randombytes.h" +#include "runtime.h" +#include "utils.h" + +#if defined(HAVE_ARMCRYPTO) && defined(__clang__) && defined(NATIVE_LITTLE_ENDIAN) + +#if !defined(MSC_VER) || _MSC_VER < 1800 +#define __vectorcall +#endif + +#ifndef __ARM_FEATURE_CRYPTO +#define __ARM_FEATURE_CRYPTO 1 +#endif +#ifndef __ARM_FEATURE_AES +#define __ARM_FEATURE_AES 1 +#endif + +#include + +#ifdef __clang__ +#pragma clang attribute push(__attribute__((target("neon,crypto,aes"))), apply_to = function) +#elif defined(__GNUC__) +#pragma GCC target("+simd+crypto") +#endif + +#define ABYTES crypto_aead_aes256gcm_ABYTES +#define NPUBBYTES crypto_aead_aes256gcm_NPUBBYTES +#define KEYBYTES crypto_aead_aes256gcm_KEYBYTES + +#define PARALLEL_BLOCKS 6 +#undef USE_KARATSUBA_MULTIPLICATION + +typedef uint64x2_t BlockVec; + +#define LOAD128(a) vld1q_u64((const uint64_t *) (const void *) (a)) +#define STORE128(a, b) vst1q_u64((uint64_t *) (void *) (a), (b)) +#define AES_XENCRYPT(block_vec, rkey) \ + vreinterpretq_u64_u8( \ + vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), rkey))) +#define AES_XENCRYPTLAST(block_vec, rkey) \ + vreinterpretq_u64_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), rkey)) +#define XOR128(a, b) veorq_u64((a), (b)) +#define AND128(a, b) vandq_u64((a), (b)) +#define OR128(a, b) vorrq_u64((a), (b)) +#define SET64x2(a, b) vsetq_lane_u64((uint64_t) (a), vmovq_n_u64((uint64_t) (b)), 1) +#define ZERO128 vmovq_n_u8(0) +#define ONE128 SET64x2(0, 1) +#define ADD64x2(a, b) vaddq_u64((a), (b)) +#define SUB64x2(a, b) vsubq_u64((a), (b)) +#define SHL64x2(a, b) vshlq_n_u64((a), (b)) +#define SHR64x2(a, b) vshrq_n_u64((a), (b)) +#define REV128(x) \ + vreinterpretq_u64_u8(__builtin_shufflevector(vreinterpretq_u8_u64(x), vreinterpretq_u8_u64(x), \ + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, \ + 1, 0)) +#define SHUFFLE32x4(x, a, b, c, d) \ + vreinterpretq_u64_u32(__builtin_shufflevector(vreinterpretq_u32_u64(x), \ + vreinterpretq_u32_u64(x), (a), (b), (c), (d))) +#define BYTESHL128(a, b) vreinterpretq_u64_u8(vextq_s8(vdupq_n_s8(0), (int8x16_t) a, 16 - (b))) +#define BYTESHR128(a, b) vreinterpretq_u64_u8(vextq_s8((int8x16_t) a, vdupq_n_s8(0), (b))) + +#define SHL128(a, b) OR128(SHL64x2((a), (b)), SHR64x2(BYTESHL128((a), 8), 64 - (b))) +#define CLMULLO128(a, b) \ + vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_low_u64(a), (poly64_t) vget_low_u64(b))) +#define CLMULHI128(a, b) \ + vreinterpretq_u64_p128(vmull_high_p64(vreinterpretq_p64_s64(a), vreinterpretq_p64_s64(b))) +#define CLMULLOHI128(a, b) \ + vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_low_u64(a), (poly64_t) vget_high_u64(b))) +#define CLMULHILO128(a, b) \ + vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_high_u64(a), (poly64_t) vget_low_u64(b))) +#define PREFETCH_READ(x) __builtin_prefetch((x), 0, 2) +#define PREFETCH_WRITE(x) __builtin_prefetch((x), 1, 2); + +static inline BlockVec +AES_KEYGEN(BlockVec block_vec, const int rc) +{ + uint8x16_t a = vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0)); + const uint8x16_t b = + __builtin_shufflevector(a, a, 4, 1, 14, 11, 1, 14, 11, 4, 12, 9, 6, 3, 9, 6, 3, 12); + const uint64x2_t c = SET64x2((uint64_t) rc << 32, (uint64_t) rc << 32); + return XOR128(b, c); +} + +#define ROUNDS 14 + +#define PC_COUNT (2 * PARALLEL_BLOCKS) + +typedef struct I256 { + BlockVec hi; + BlockVec lo; + BlockVec mid; +} I256; + +typedef BlockVec Precomp; + +typedef struct GHash { + BlockVec acc; +} GHash; + +typedef struct State { + BlockVec rkeys[ROUNDS + 1]; + Precomp hx[PC_COUNT]; +} State; + +static void __vectorcall expand256(const unsigned char key[KEYBYTES], BlockVec rkeys[1 + ROUNDS]) +{ + BlockVec t1, t2, s; + size_t i = 0; + +#define EXPAND_KEY_1(RC) \ + rkeys[i++] = t2; \ + s = AES_KEYGEN(t2, RC); \ + t1 = XOR128(t1, BYTESHL128(t1, 4)); \ + t1 = XOR128(t1, BYTESHL128(t1, 8)); \ + t1 = XOR128(t1, SHUFFLE32x4(s, 3, 3, 3, 3)); + +#define EXPAND_KEY_2(RC) \ + rkeys[i++] = t1; \ + s = AES_KEYGEN(t1, RC); \ + t2 = XOR128(t2, BYTESHL128(t2, 4)); \ + t2 = XOR128(t2, BYTESHL128(t2, 8)); \ + t2 = XOR128(t2, SHUFFLE32x4(s, 2, 2, 2, 2)); + + t1 = LOAD128(&key[0]); + t2 = LOAD128(&key[16]); + + rkeys[i++] = t1; + EXPAND_KEY_1(0x01); + EXPAND_KEY_2(0x01); + EXPAND_KEY_1(0x02); + EXPAND_KEY_2(0x02); + EXPAND_KEY_1(0x04); + EXPAND_KEY_2(0x04); + EXPAND_KEY_1(0x08); + EXPAND_KEY_2(0x08); + EXPAND_KEY_1(0x10); + EXPAND_KEY_2(0x10); + EXPAND_KEY_1(0x20); + EXPAND_KEY_2(0x20); + EXPAND_KEY_1(0x40); + rkeys[i++] = t1; +} + +/* Encrypt a single AES block */ + +static inline void +encrypt(const State *st, unsigned char dst[16], const unsigned char src[16]) +{ + BlockVec t; + + size_t i; + + t = AES_XENCRYPT(LOAD128(src), st->rkeys[0]); + for (i = 1; i < ROUNDS - 1; i++) { + t = AES_XENCRYPT(t, st->rkeys[i]); + } + t = AES_XENCRYPTLAST(t, st->rkeys[i]); + t = XOR128(t, st->rkeys[ROUNDS]); + STORE128(dst, t); +} + +/* Encrypt and add a single AES block */ + +static inline void __vectorcall encrypt_xor_block(const State *st, unsigned char dst[16], + const unsigned char src[16], + const BlockVec counter) +{ + BlockVec ts; + size_t i; + + ts = AES_XENCRYPT(counter, st->rkeys[0]); + for (i = 1; i < ROUNDS - 1; i++) { + ts = AES_XENCRYPT(ts, st->rkeys[i]); + } + ts = AES_XENCRYPTLAST(ts, st->rkeys[i]); + ts = XOR128(ts, XOR128(st->rkeys[ROUNDS], LOAD128(src))); + STORE128(dst, ts); +} + +/* Encrypt and add PARALLEL_BLOCKS AES blocks */ + +static inline void __vectorcall encrypt_xor_wide(const State *st, + unsigned char dst[16 * PARALLEL_BLOCKS], + const unsigned char src[16 * PARALLEL_BLOCKS], + const BlockVec counters[PARALLEL_BLOCKS]) +{ + BlockVec ts[PARALLEL_BLOCKS]; + size_t i, j; + + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = AES_XENCRYPT(counters[j], st->rkeys[0]); + } + for (i = 1; i < ROUNDS - 1; i++) { + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = AES_XENCRYPT(ts[j], st->rkeys[i]); + } + } + for (j = 0; j < PARALLEL_BLOCKS; j++) { + ts[j] = AES_XENCRYPTLAST(ts[j], st->rkeys[i]); + ts[j] = XOR128(ts[j], XOR128(st->rkeys[ROUNDS], LOAD128(&src[16 * j]))); + } + for (j = 0; j < PARALLEL_BLOCKS; j++) { + STORE128(&dst[16 * j], ts[j]); + } +} + +/* Square a field element */ + +static inline I256 __vectorcall clsq128(const BlockVec x) +{ + const BlockVec r_lo = CLMULLO128(x, x); + const BlockVec r_hi = CLMULHI128(x, x); + + return (I256) { + SODIUM_C99(.hi =) r_hi, + SODIUM_C99(.lo =) r_lo, + SODIUM_C99(.mid =) ZERO128, + }; +} + +/* Multiply two field elements -- Textbook multiplication is faster than Karatsuba on some recent + * CPUs */ + +static inline I256 __vectorcall clmul128(const BlockVec x, const BlockVec y) +{ +#ifdef USE_KARATSUBA_MULTIPLICATION + const BlockVec x_hi = BYTESHR128(x, 8); + const BlockVec y_hi = BYTESHR128(y, 8); + const BlockVec r_lo = CLMULLO128(x, y); + const BlockVec r_hi = CLMULHI128(x, y); + const BlockVec r_mid = XOR128(CLMULLO128(XOR128(x, x_hi), XOR128(y, y_hi)), XOR128(r_lo, r_hi)); + + return (I256) { + SODIUM_C99(.hi =) r_hi, + SODIUM_C99(.lo =) r_lo, + SODIUM_C99(.mid =) r_mid, + }; +#else + const BlockVec r_hi = CLMULHI128(x, y); + const BlockVec r_lo = CLMULLO128(x, y); + const BlockVec r_mid = XOR128(CLMULHILO128(x, y), CLMULLOHI128(x, y)); + + return (I256) { + SODIUM_C99(.hi =) r_hi, + SODIUM_C99(.lo =) r_lo, + SODIUM_C99(.mid =) r_mid, + }; +#endif +} + +/* Merge the middle word and reduce a field element */ + +static inline BlockVec __vectorcall gcm_reduce(const I256 x) +{ + const BlockVec hi = XOR128(x.hi, BYTESHR128(x.mid, 8)); + const BlockVec lo = XOR128(x.lo, BYTESHL128(x.mid, 8)); + + const BlockVec p64 = SET64x2(0, 0xc200000000000000); + const BlockVec a = CLMULLO128(lo, p64); + const BlockVec b = XOR128(SHUFFLE32x4(lo, 2, 3, 0, 1), a); + const BlockVec c = CLMULLO128(b, p64); + const BlockVec d = XOR128(SHUFFLE32x4(b, 2, 3, 0, 1), c); + + return XOR128(d, hi); +} + +/* Precompute powers of H from `from` to `to` */ + +static inline void __vectorcall precomp(Precomp hx[PC_COUNT], const size_t from, const size_t to) +{ + const Precomp h = hx[0]; + size_t i; + + for (i = from & ~1U; i < to; i += 2) { + hx[i] = gcm_reduce(clmul128(hx[i - 1], h)); + hx[i + 1] = gcm_reduce(clsq128(hx[i / 2])); + } +} + +/* Precompute powers of H given a key and a block count */ + +static void __vectorcall precomp_for_block_count(Precomp hx[PC_COUNT], + const unsigned char gh_key[16], + const size_t block_count) +{ + const BlockVec h0 = REV128(LOAD128(gh_key)); + BlockVec carry = SET64x2(0xc200000000000000, 1); + BlockVec mask = SUB64x2(ZERO128, SHR64x2(h0, 63)); + BlockVec h0_shifted; + BlockVec h; + + mask = SHUFFLE32x4(mask, 3, 3, 3, 3); + carry = AND128(carry, mask); + h0_shifted = SHL128(h0, 1); + h = XOR128(h0_shifted, carry); + + hx[0] = h; + hx[1] = gcm_reduce(clsq128(hx[0])); + + if (block_count >= PC_COUNT) { + precomp(hx, 2, PC_COUNT); + } else { + precomp(hx, 2, block_count); + } +} + +/* Initialize a GHash */ + +static inline void +gh_init(GHash *sth) +{ + sth->acc = ZERO128; +} + +static inline I256 __vectorcall gh_update0(const GHash *const sth, const unsigned char *const p, + const Precomp hn) +{ + const BlockVec m = REV128(LOAD128(p)); + return clmul128(XOR128(sth->acc, m), hn); +} + +static inline void __vectorcall gh_update(I256 *const u, const unsigned char *p, const Precomp hn) +{ + const BlockVec m = REV128(LOAD128(p)); + const I256 t = clmul128(m, hn); + *u = (I256) { SODIUM_C99(.hi =) XOR128(u->hi, t.hi), SODIUM_C99(.lo =) XOR128(u->lo, t.lo), + SODIUM_C99(.mid =) XOR128(u->mid, t.mid) }; +} + +/* Absorb ad_len bytes of associated data. There has to be no partial block. */ + +static inline void +gh_ad_blocks(const State *st, GHash *sth, const unsigned char *ad, size_t ad_len) +{ + size_t i; + + i = (size_t) 0U; + for (; i + PC_COUNT * 16 <= ad_len; i += PC_COUNT * 16) { + I256 u = gh_update0(sth, ad + i, st->hx[PC_COUNT - 1 - 0]); + size_t j; + + for (j = 1; j < PC_COUNT; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[PC_COUNT - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + for (; i + PC_COUNT * 16 / 2 <= ad_len; i += PC_COUNT * 16 / 2) { + I256 u = gh_update0(sth, ad + i, st->hx[PC_COUNT / 2 - 1 - 0]); + size_t j; + + for (j = 1; j < PC_COUNT / 2; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[PC_COUNT / 2 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + for (; i + 4 * 16 <= ad_len; i += 4 * 16) { + size_t j; + I256 u = gh_update0(sth, ad + i, st->hx[4 - 1 - 0]); + + for (j = 1; j < 4; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[4 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + for (; i + 2 * 16 <= ad_len; i += 2 * 16) { + size_t j; + I256 u = gh_update0(sth, ad + i, st->hx[2 - 1 - 0]); + + for (j = 1; j < 2; j += 1) { + gh_update(&u, ad + i + j * 16, st->hx[2 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + if (i < ad_len) { + I256 u = gh_update0(sth, ad + i, st->hx[0]); + sth->acc = gcm_reduce(u); + } +} + +/* Increment counters */ + +static inline BlockVec __vectorcall incr_counters(BlockVec rev_counters[], BlockVec counter, + const size_t n) +{ + size_t i; + + const BlockVec one = ONE128; + for (i = 0; i < n; i++) { + rev_counters[i] = REV128(counter); + counter = ADD64x2(counter, one); + } + return counter; +} + +/* Compute the number of required blocks to encrypt and authenticate `ad_len` of associated data, + * and `m_len` of encrypted bytes. Return `0` if limits would be exceeded.*/ + +static inline size_t +required_blocks(const size_t ad_len, const size_t m_len) +{ + const size_t ad_blocks = (ad_len + 15) / 16; + const size_t m_blocks = (m_len + 15) / 16; + + if (ad_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 || + m_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 || ad_len < ad_blocks || m_len < m_blocks || + m_blocks >= (1ULL << 32) - 2) { + return 0; + } + return ad_blocks + m_blocks + 1; +} + +/* Generic AES-GCM encryption. "Generic" as it can handle arbitrary input sizes, +unlike a length-limited version that would precompute all the required powers of H */ + +static void +aes_gcm_encrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst, + const unsigned char *src, size_t src_len, const unsigned char *ad, + size_t ad_len, unsigned char counter_[16]) +{ + CRYPTO_ALIGN(32) I256 u; + CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16]; + const BlockVec one = ONE128; + BlockVec final_block; + BlockVec rev_counters[PARALLEL_BLOCKS]; + BlockVec counter; + size_t i; + size_t j; + size_t left; + size_t pi; + + COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0); + + /* Associated data */ + + if (ad != NULL && ad_len != 0) { + gh_ad_blocks(st, sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, sth, pad, sizeof pad); + } + } + + /* Encrypted data */ + + counter = REV128(LOAD128(counter_)); + i = 0; + + /* 2*PARALLEL_BLOCKS aggregation */ + + if (src_len - i >= 2 * PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; + + for (; i + 2 * PARALLEL_BLOCKS * 16 <= src_len; i += 2 * PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[2 * PARALLEL_BLOCKS - 1 - j]); + } + + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i + PARALLEL_BLOCKS * 16, src + i + PARALLEL_BLOCKS * 16, + rev_counters); + + pi = i; + for (j = 0; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + /* PARALLEL_BLOCKS aggregation */ + + if (src_len - i >= PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; + + for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + pi = i - PARALLEL_BLOCKS * 16; + u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + /* 4-blocks aggregation */ + + for (; i + 4 * 16 <= src_len; i += 4 * 16) { + counter = incr_counters(rev_counters, counter, 4); + for (j = 0; j < 4; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + + u = gh_update0(sth, dst + i, st->hx[4 - 1 - 0]); + for (j = 1; j < 4; j += 1) { + gh_update(&u, dst + i + j * 16, st->hx[4 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + /* 2-blocks aggregation */ + + for (; i + 2 * 16 <= src_len; i += 2 * 16) { + counter = incr_counters(rev_counters, counter, 2); + for (j = 0; j < 2; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + + u = gh_update0(sth, dst + i, st->hx[2 - 1 - 0]); + for (j = 1; j < 2; j += 1) { + gh_update(&u, dst + i + j * 16, st->hx[2 - 1 - j]); + } + sth->acc = gcm_reduce(u); + } + + /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the + full block authenticated along with the final block, hence < and not <= */ + + for (; i + 16 < src_len; i += 16) { + encrypt_xor_block(st, dst + i, src + i, REV128(counter)); + u = gh_update0(sth, dst + i, st->hx[1 - 1 - 0]); + sth->acc = gcm_reduce(u); + counter = ADD64x2(counter, one); + } + + /* Authenticate both the last block of the message and the final block */ + + final_block = REV128(SET64x2(ad_len * 8, src_len * 8)); + STORE32_BE(counter_ + NPUBBYTES, 1); + encrypt(st, mac, counter_); + left = src_len - i; + if (left != 0) { + for (j = 0; j < left; j++) { + last_blocks[j] = src[i + j]; + } + STORE128(last_blocks + 16, final_block); + encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter)); + for (; j < 16; j++) { + last_blocks[j] = 0; + } + for (j = 0; j < left; j++) { + dst[i + j] = last_blocks[j]; + } + gh_ad_blocks(st, sth, last_blocks, 32); + } else { + STORE128(last_blocks, final_block); + gh_ad_blocks(st, sth, last_blocks, 16); + } + STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc))); +} + +/* Generic AES-GCM decryption. "Generic" as it can handle arbitrary input sizes, +unlike a length-limited version that would precompute all the required powers of H */ + +static void +aes_gcm_decrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst, + const unsigned char *src, size_t src_len, const unsigned char *ad, + size_t ad_len, unsigned char counter_[16]) +{ + CRYPTO_ALIGN(32) I256 u; + CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16]; + const BlockVec one = ONE128; + BlockVec final_block; + BlockVec rev_counters[PARALLEL_BLOCKS]; + BlockVec counter; + size_t i; + size_t j; + size_t left; + + COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0); + + /* Associated data */ + + if (ad != NULL && ad_len != 0) { + gh_ad_blocks(st, sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, sth, pad, sizeof pad); + } + } + + /* Encrypted data */ + + counter = REV128(LOAD128(counter_)); + i = 0; + + /* 2*PARALLEL_BLOCKS aggregation */ + + while (i + 2 * PARALLEL_BLOCKS * 16 <= src_len) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + u = gh_update0(sth, src + i, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[2 * PARALLEL_BLOCKS - 1 - j]); + } + + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + i += PARALLEL_BLOCKS * 16; + for (j = 0; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + i += PARALLEL_BLOCKS * 16; + } + + /* PARALLEL_BLOCKS aggregation */ + + for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) { + counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS); + + u = gh_update0(sth, src + i, st->hx[PARALLEL_BLOCKS - 1 - 0]); + for (j = 1; j < PARALLEL_BLOCKS; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]); + } + sth->acc = gcm_reduce(u); + + encrypt_xor_wide(st, dst + i, src + i, rev_counters); + } + + /* 4-blocks aggregation */ + + for (; i + 4 * 16 <= src_len; i += 4 * 16) { + counter = incr_counters(rev_counters, counter, 4); + + u = gh_update0(sth, src + i, st->hx[4 - 1 - 0]); + for (j = 1; j < 4; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[4 - 1 - j]); + } + sth->acc = gcm_reduce(u); + + for (j = 0; j < 4; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + } + + /* 2-blocks aggregation */ + + for (; i + 2 * 16 <= src_len; i += 2 * 16) { + counter = incr_counters(rev_counters, counter, 2); + + u = gh_update0(sth, src + i, st->hx[2 - 1 - 0]); + for (j = 1; j < 2; j += 1) { + gh_update(&u, src + i + j * 16, st->hx[2 - 1 - j]); + } + sth->acc = gcm_reduce(u); + + for (j = 0; j < 2; j++) { + encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]); + } + } + + /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the + full block authenticated along with the final block, hence < and not <= */ + + for (; i + 16 < src_len; i += 16) { + u = gh_update0(sth, src + i, st->hx[1 - 1 - 0]); + sth->acc = gcm_reduce(u); + encrypt_xor_block(st, dst + i, src + i, REV128(counter)); + counter = ADD64x2(counter, one); + } + + /* Authenticate both the last block of the message and the final block */ + + final_block = REV128(SET64x2(ad_len * 8, src_len * 8)); + STORE32_BE(counter_ + NPUBBYTES, 1); + encrypt(st, mac, counter_); + left = src_len - i; + if (left != 0) { + for (j = 0; j < left; j++) { + last_blocks[j] = src[i + j]; + } + for (; j < 16; j++) { + last_blocks[j] = 0; + } + STORE128(last_blocks + 16, final_block); + gh_ad_blocks(st, sth, last_blocks, 32); + encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter)); + for (j = 0; j < left; j++) { + dst[i + j] = last_blocks[j]; + } + } else { + STORE128(last_blocks, final_block); + gh_ad_blocks(st, sth, last_blocks, 16); + } + STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc))); +} + +int +crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k) +{ + State *st = (State *) (void *) st_; + CRYPTO_ALIGN(16) unsigned char h[16]; + + COMPILER_ASSERT(sizeof *st_ >= sizeof *st); + + expand256(k, st->rkeys); + memset(h, 0, sizeof h); + encrypt(st, h, h); + + precomp_for_block_count(st->hx, h, PC_COUNT); + + return 0; +} + +int +crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long m_len_, const unsigned char *ad, + unsigned long long ad_len_, + const unsigned char *nsec, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + CRYPTO_ALIGN(16) unsigned char j[16]; + size_t gh_required_blocks; + const size_t ad_len = (size_t) ad_len_; + const size_t m_len = (size_t) m_len_; + + (void) nsec; + if (maclen_p != NULL) { + *maclen_p = 0; + } + if (ad_len_ > SODIUM_SIZE_MAX || m_len_ > SODIUM_SIZE_MAX) { + sodium_misuse(); + } + gh_required_blocks = required_blocks(ad_len, m_len); + if (gh_required_blocks == 0) { + memset(mac, 0xd0, ABYTES); + memset(c, 0, m_len); + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + aes_gcm_encrypt_generic(st, &sth, mac, c, m, m_len, ad, ad_len, j); + + if (maclen_p != NULL) { + *maclen_p = ABYTES; + } + return 0; +} + +int +crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m, + unsigned long long m_len, const unsigned char *ad, + unsigned long long ad_len, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + const int ret = crypto_aead_aes256gcm_encrypt_detached(c, c + m_len, NULL, m, m_len, ad, ad_len, + nsec, npub, k); + if (clen_p != NULL) { + if (ret == 0) { + *clen_p = m_len + crypto_aead_aes256gcm_ABYTES; + } else { + *clen_p = 0; + } + } + return ret; +} + +int +crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac, + unsigned long long *maclen_p, const unsigned char *m, + unsigned long long m_len, const unsigned char *ad, + unsigned long long ad_len, const unsigned char *nsec, + const unsigned char *npub, const unsigned char *k) +{ + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + int ret; + + PREFETCH_WRITE(c); + PREFETCH_READ(m); + PREFETCH_READ(ad); + + crypto_aead_aes256gcm_beforenm(&st, k); + ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, mac, maclen_p, m, m_len, ad, ad_len, + nsec, npub, &st); + sodium_memzero(&st, sizeof st); + + return ret; +} + +int +crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + int ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, c + mlen, NULL, m, mlen, ad, adlen, + nsec, npub, st_); + if (clen_p != NULL) { + *clen_p = mlen + crypto_aead_aes256gcm_ABYTES; + } + return ret; +} + +static int +crypto_aead_aes256gcm_verify_mac(unsigned char *nsec, const unsigned char *c, + unsigned long long c_len_, const unsigned char *mac, + const unsigned char *ad, unsigned long long ad_len_, + const unsigned char *npub, const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + BlockVec final_block; + CRYPTO_ALIGN(16) unsigned char j[16]; + CRYPTO_ALIGN(16) unsigned char computed_mac[16]; + CRYPTO_ALIGN(16) unsigned char last_block[16]; + size_t gh_required_blocks; + size_t left; + const size_t ad_len = (size_t) ad_len_; + const size_t c_len = (size_t) c_len_; + int ret; + + (void) nsec; + if (ad_len_ > SODIUM_SIZE_MAX || c_len_ > SODIUM_SIZE_MAX) { + sodium_misuse(); + } + gh_required_blocks = required_blocks(ad_len, c_len); + if (gh_required_blocks == 0) { + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + gh_ad_blocks(st, &sth, ad, ad_len & ~15); + left = ad_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, ad + ad_len - left, left); + gh_ad_blocks(st, &sth, pad, sizeof pad); + } + + gh_ad_blocks(st, &sth, c, c_len & ~15); + left = c_len & 15; + if (left != 0) { + unsigned char pad[16]; + + memset(pad, 0, sizeof pad); + memcpy(pad, c + c_len - left, left); + gh_ad_blocks(st, &sth, pad, sizeof pad); + } + final_block = REV128(SET64x2(ad_len * 8, c_len * 8)); + STORE32_BE(j + NPUBBYTES, 1); + encrypt(st, computed_mac, j); + STORE128(last_block, final_block); + gh_ad_blocks(st, &sth, last_block, 16); + STORE128(computed_mac, XOR128(LOAD128(computed_mac), REV128(sth.acc))); + + ret = crypto_verify_16(mac, computed_mac); + sodium_memzero(computed_mac, sizeof computed_mac); + + return ret; +} + +int +crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long c_len_, + const unsigned char *mac, const unsigned char *ad, + unsigned long long ad_len_, + const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + const State *st = (const State *) (const void *) st_; + GHash sth; + CRYPTO_ALIGN(16) unsigned char j[16]; + unsigned char computed_mac[16]; + size_t gh_required_blocks; + const size_t ad_len = (size_t) ad_len_; + const size_t c_len = (size_t) c_len_; + const size_t m_len = c_len; + + (void) nsec; + if (ad_len_ > SODIUM_SIZE_MAX || c_len_ > SODIUM_SIZE_MAX) { + sodium_misuse(); + } + if (m == NULL) { + return crypto_aead_aes256gcm_verify_mac(nsec, c, c_len, mac, ad, ad_len, npub, st_); + } + gh_required_blocks = required_blocks(ad_len, m_len); + if (gh_required_blocks == 0) { + return -1; + } + + gh_init(&sth); + + memcpy(j, npub, NPUBBYTES); + STORE32_BE(j + NPUBBYTES, 2); + + aes_gcm_decrypt_generic(st, &sth, computed_mac, m, c, m_len, ad, ad_len, j); + + if (crypto_verify_16(mac, computed_mac) != 0) { + sodium_memzero(computed_mac, sizeof computed_mac); + memset(m, 0xd0, m_len); + return -1; + } + return 0; +} + +int +crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p, + unsigned char *nsec, const unsigned char *c, + unsigned long long clen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const crypto_aead_aes256gcm_state *st_) +{ + unsigned long long mlen = 0ULL; + int ret = -1; + + if (clen >= ABYTES) { + ret = crypto_aead_aes256gcm_decrypt_detached_afternm( + m, nsec, c, clen - ABYTES, c + clen - ABYTES, ad, adlen, npub, st_); + } + if (mlen_p != NULL) { + if (ret == 0) { + mlen = clen - ABYTES; + } + *mlen_p = mlen; + } + return ret; +} + +int +crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *mac, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const unsigned char *k) +{ + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + + PREFETCH_WRITE(m); + PREFETCH_READ(c); + PREFETCH_READ(ad); + + crypto_aead_aes256gcm_beforenm(&st, k); + + return crypto_aead_aes256gcm_decrypt_detached_afternm( + m, nsec, c, clen, mac, ad, adlen, npub, (const crypto_aead_aes256gcm_state *) &st); +} + +int +crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec, + const unsigned char *c, unsigned long long clen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *npub, const unsigned char *k) +{ + CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st; + int ret; + + PREFETCH_WRITE(m); + PREFETCH_READ(c); + PREFETCH_READ(ad); + + crypto_aead_aes256gcm_beforenm(&st, k); + + ret = crypto_aead_aes256gcm_decrypt_afternm(m, mlen_p, nsec, c, clen, ad, adlen, npub, + (const crypto_aead_aes256gcm_state *) &st); + sodium_memzero(&st, sizeof st); + + return ret; +} + +int +crypto_aead_aes256gcm_is_available(void) +{ + return sodium_runtime_has_armcrypto(); +} + +#ifdef __clang__ +#pragma clang attribute pop +#endif + +#endif diff --git a/libs/libsodium/src/crypto_aead/chacha20poly1305/aead_chacha20poly1305.c b/libs/libsodium/src/crypto_aead/chacha20poly1305/aead_chacha20poly1305.c new file mode 100644 index 0000000000..c354087975 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/chacha20poly1305/aead_chacha20poly1305.c @@ -0,0 +1,400 @@ + +#include +#include +#include +#include + +#include "core.h" +#include "crypto_aead_chacha20poly1305.h" +#include "crypto_onetimeauth_poly1305.h" +#include "crypto_stream_chacha20.h" +#include "crypto_verify_16.h" +#include "randombytes.h" +#include "utils.h" + +#include "private/chacha20_ietf_ext.h" +#include "private/common.h" + +static const unsigned char _pad0[16] = { 0 }; + +int +crypto_aead_chacha20poly1305_encrypt_detached(unsigned char *c, + unsigned char *mac, + unsigned long long *maclen_p, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + crypto_onetimeauth_poly1305_state state; + unsigned char block0[64U]; + unsigned char slen[8U]; + + (void) nsec; + crypto_stream_chacha20(block0, sizeof block0, npub, k); + crypto_onetimeauth_poly1305_init(&state, block0); + sodium_memzero(block0, sizeof block0); + + crypto_onetimeauth_poly1305_update(&state, ad, adlen); + STORE64_LE(slen, (uint64_t) adlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + crypto_stream_chacha20_xor_ic(c, m, mlen, npub, 1U, k); + + crypto_onetimeauth_poly1305_update(&state, c, mlen); + STORE64_LE(slen, (uint64_t) mlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + crypto_onetimeauth_poly1305_final(&state, mac); + sodium_memzero(&state, sizeof state); + + if (maclen_p != NULL) { + *maclen_p = crypto_aead_chacha20poly1305_ABYTES; + } + return 0; +} + +int +crypto_aead_chacha20poly1305_encrypt(unsigned char *c, + unsigned long long *clen_p, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + unsigned long long clen = 0ULL; + int ret; + + if (mlen > crypto_aead_chacha20poly1305_MESSAGEBYTES_MAX) { + sodium_misuse(); + } + ret = crypto_aead_chacha20poly1305_encrypt_detached(c, + c + mlen, NULL, + m, mlen, + ad, adlen, + nsec, npub, k); + if (clen_p != NULL) { + if (ret == 0) { + clen = mlen + crypto_aead_chacha20poly1305_ABYTES; + } + *clen_p = clen; + } + return ret; +} + +int +crypto_aead_chacha20poly1305_ietf_encrypt_detached(unsigned char *c, + unsigned char *mac, + unsigned long long *maclen_p, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + crypto_onetimeauth_poly1305_state state; + unsigned char block0[64U]; + unsigned char slen[8U]; + + (void) nsec; + crypto_stream_chacha20_ietf(block0, sizeof block0, npub, k); + crypto_onetimeauth_poly1305_init(&state, block0); + sodium_memzero(block0, sizeof block0); + + crypto_onetimeauth_poly1305_update(&state, ad, adlen); + crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf); + + crypto_stream_chacha20_ietf_xor_ic(c, m, mlen, npub, 1U, k); + + crypto_onetimeauth_poly1305_update(&state, c, mlen); + crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf); + + STORE64_LE(slen, (uint64_t) adlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + STORE64_LE(slen, (uint64_t) mlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + crypto_onetimeauth_poly1305_final(&state, mac); + sodium_memzero(&state, sizeof state); + + if (maclen_p != NULL) { + *maclen_p = crypto_aead_chacha20poly1305_ietf_ABYTES; + } + return 0; +} + +int +crypto_aead_chacha20poly1305_ietf_encrypt(unsigned char *c, + unsigned long long *clen_p, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + unsigned long long clen = 0ULL; + int ret; + + if (mlen > crypto_aead_chacha20poly1305_ietf_MESSAGEBYTES_MAX) { + sodium_misuse(); + } + ret = crypto_aead_chacha20poly1305_ietf_encrypt_detached(c, + c + mlen, NULL, + m, mlen, + ad, adlen, + nsec, npub, k); + if (clen_p != NULL) { + if (ret == 0) { + clen = mlen + crypto_aead_chacha20poly1305_ietf_ABYTES; + } + *clen_p = clen; + } + return ret; +} + +int +crypto_aead_chacha20poly1305_decrypt_detached(unsigned char *m, + unsigned char *nsec, + const unsigned char *c, + unsigned long long clen, + const unsigned char *mac, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + crypto_onetimeauth_poly1305_state state; + unsigned char block0[64U]; + unsigned char slen[8U]; + unsigned char computed_mac[crypto_aead_chacha20poly1305_ABYTES]; + unsigned long long mlen; + int ret; + + (void) nsec; + crypto_stream_chacha20(block0, sizeof block0, npub, k); + crypto_onetimeauth_poly1305_init(&state, block0); + sodium_memzero(block0, sizeof block0); + + crypto_onetimeauth_poly1305_update(&state, ad, adlen); + STORE64_LE(slen, (uint64_t) adlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + mlen = clen; + crypto_onetimeauth_poly1305_update(&state, c, mlen); + STORE64_LE(slen, (uint64_t) mlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + crypto_onetimeauth_poly1305_final(&state, computed_mac); + sodium_memzero(&state, sizeof state); + + COMPILER_ASSERT(sizeof computed_mac == 16U); + ret = crypto_verify_16(computed_mac, mac); + sodium_memzero(computed_mac, sizeof computed_mac); + if (m == NULL) { + return ret; + } + if (ret != 0) { + memset(m, 0, mlen); + return -1; + } + crypto_stream_chacha20_xor_ic(m, c, mlen, npub, 1U, k); + + return 0; +} + +int +crypto_aead_chacha20poly1305_decrypt(unsigned char *m, + unsigned long long *mlen_p, + unsigned char *nsec, + const unsigned char *c, + unsigned long long clen, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + unsigned long long mlen = 0ULL; + int ret = -1; + + if (clen >= crypto_aead_chacha20poly1305_ABYTES) { + ret = crypto_aead_chacha20poly1305_decrypt_detached + (m, nsec, + c, clen - crypto_aead_chacha20poly1305_ABYTES, + c + clen - crypto_aead_chacha20poly1305_ABYTES, + ad, adlen, npub, k); + } + if (mlen_p != NULL) { + if (ret == 0) { + mlen = clen - crypto_aead_chacha20poly1305_ABYTES; + } + *mlen_p = mlen; + } + return ret; +} + +int +crypto_aead_chacha20poly1305_ietf_decrypt_detached(unsigned char *m, + unsigned char *nsec, + const unsigned char *c, + unsigned long long clen, + const unsigned char *mac, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + crypto_onetimeauth_poly1305_state state; + unsigned char block0[64U]; + unsigned char slen[8U]; + unsigned char computed_mac[crypto_aead_chacha20poly1305_ietf_ABYTES]; + unsigned long long mlen; + int ret; + + (void) nsec; + crypto_stream_chacha20_ietf(block0, sizeof block0, npub, k); + crypto_onetimeauth_poly1305_init(&state, block0); + sodium_memzero(block0, sizeof block0); + + crypto_onetimeauth_poly1305_update(&state, ad, adlen); + crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf); + + mlen = clen; + crypto_onetimeauth_poly1305_update(&state, c, mlen); + crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf); + + STORE64_LE(slen, (uint64_t) adlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + STORE64_LE(slen, (uint64_t) mlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + crypto_onetimeauth_poly1305_final(&state, computed_mac); + sodium_memzero(&state, sizeof state); + + COMPILER_ASSERT(sizeof computed_mac == 16U); + ret = crypto_verify_16(computed_mac, mac); + sodium_memzero(computed_mac, sizeof computed_mac); + if (m == NULL) { + return ret; + } + if (ret != 0) { + memset(m, 0, mlen); + return -1; + } + crypto_stream_chacha20_ietf_xor_ic(m, c, mlen, npub, 1U, k); + + return 0; +} + +int +crypto_aead_chacha20poly1305_ietf_decrypt(unsigned char *m, + unsigned long long *mlen_p, + unsigned char *nsec, + const unsigned char *c, + unsigned long long clen, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + unsigned long long mlen = 0ULL; + int ret = -1; + + if (clen >= crypto_aead_chacha20poly1305_ietf_ABYTES) { + ret = crypto_aead_chacha20poly1305_ietf_decrypt_detached + (m, nsec, + c, clen - crypto_aead_chacha20poly1305_ietf_ABYTES, + c + clen - crypto_aead_chacha20poly1305_ietf_ABYTES, + ad, adlen, npub, k); + } + if (mlen_p != NULL) { + if (ret == 0) { + mlen = clen - crypto_aead_chacha20poly1305_ietf_ABYTES; + } + *mlen_p = mlen; + } + return ret; +} + +size_t +crypto_aead_chacha20poly1305_ietf_keybytes(void) +{ + return crypto_aead_chacha20poly1305_ietf_KEYBYTES; +} + +size_t +crypto_aead_chacha20poly1305_ietf_npubbytes(void) +{ + return crypto_aead_chacha20poly1305_ietf_NPUBBYTES; +} + +size_t +crypto_aead_chacha20poly1305_ietf_nsecbytes(void) +{ + return crypto_aead_chacha20poly1305_ietf_NSECBYTES; +} + +size_t +crypto_aead_chacha20poly1305_ietf_abytes(void) +{ + return crypto_aead_chacha20poly1305_ietf_ABYTES; +} + +size_t +crypto_aead_chacha20poly1305_ietf_messagebytes_max(void) +{ + return crypto_aead_chacha20poly1305_ietf_MESSAGEBYTES_MAX; +} + +void +crypto_aead_chacha20poly1305_ietf_keygen(unsigned char k[crypto_aead_chacha20poly1305_ietf_KEYBYTES]) +{ + randombytes_buf(k, crypto_aead_chacha20poly1305_ietf_KEYBYTES); +} + +size_t +crypto_aead_chacha20poly1305_keybytes(void) +{ + return crypto_aead_chacha20poly1305_KEYBYTES; +} + +size_t +crypto_aead_chacha20poly1305_npubbytes(void) +{ + return crypto_aead_chacha20poly1305_NPUBBYTES; +} + +size_t +crypto_aead_chacha20poly1305_nsecbytes(void) +{ + return crypto_aead_chacha20poly1305_NSECBYTES; +} + +size_t +crypto_aead_chacha20poly1305_abytes(void) +{ + return crypto_aead_chacha20poly1305_ABYTES; +} + +size_t +crypto_aead_chacha20poly1305_messagebytes_max(void) +{ + return crypto_aead_chacha20poly1305_MESSAGEBYTES_MAX; +} + +void +crypto_aead_chacha20poly1305_keygen(unsigned char k[crypto_aead_chacha20poly1305_KEYBYTES]) +{ + randombytes_buf(k, crypto_aead_chacha20poly1305_KEYBYTES); +} diff --git a/libs/libsodium/src/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c b/libs/libsodium/src/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c deleted file mode 100644 index ce51546200..0000000000 --- a/libs/libsodium/src/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c +++ /dev/null @@ -1,400 +0,0 @@ - -#include -#include -#include -#include - -#include "core.h" -#include "crypto_aead_chacha20poly1305.h" -#include "crypto_onetimeauth_poly1305.h" -#include "crypto_stream_chacha20.h" -#include "crypto_verify_16.h" -#include "randombytes.h" -#include "utils.h" - -#include "private/chacha20_ietf_ext.h" -#include "private/common.h" - -static const unsigned char _pad0[16] = { 0 }; - -int -crypto_aead_chacha20poly1305_encrypt_detached(unsigned char *c, - unsigned char *mac, - unsigned long long *maclen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k) -{ - crypto_onetimeauth_poly1305_state state; - unsigned char block0[64U]; - unsigned char slen[8U]; - - (void) nsec; - crypto_stream_chacha20(block0, sizeof block0, npub, k); - crypto_onetimeauth_poly1305_init(&state, block0); - sodium_memzero(block0, sizeof block0); - - crypto_onetimeauth_poly1305_update(&state, ad, adlen); - STORE64_LE(slen, (uint64_t) adlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - crypto_stream_chacha20_xor_ic(c, m, mlen, npub, 1U, k); - - crypto_onetimeauth_poly1305_update(&state, c, mlen); - STORE64_LE(slen, (uint64_t) mlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - crypto_onetimeauth_poly1305_final(&state, mac); - sodium_memzero(&state, sizeof state); - - if (maclen_p != NULL) { - *maclen_p = crypto_aead_chacha20poly1305_ABYTES; - } - return 0; -} - -int -crypto_aead_chacha20poly1305_encrypt(unsigned char *c, - unsigned long long *clen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k) -{ - unsigned long long clen = 0ULL; - int ret; - - if (mlen > crypto_aead_chacha20poly1305_MESSAGEBYTES_MAX) { - sodium_misuse(); - } - ret = crypto_aead_chacha20poly1305_encrypt_detached(c, - c + mlen, NULL, - m, mlen, - ad, adlen, - nsec, npub, k); - if (clen_p != NULL) { - if (ret == 0) { - clen = mlen + crypto_aead_chacha20poly1305_ABYTES; - } - *clen_p = clen; - } - return ret; -} - -int -crypto_aead_chacha20poly1305_ietf_encrypt_detached(unsigned char *c, - unsigned char *mac, - unsigned long long *maclen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k) -{ - crypto_onetimeauth_poly1305_state state; - unsigned char block0[64U]; - unsigned char slen[8U]; - - (void) nsec; - crypto_stream_chacha20_ietf(block0, sizeof block0, npub, k); - crypto_onetimeauth_poly1305_init(&state, block0); - sodium_memzero(block0, sizeof block0); - - crypto_onetimeauth_poly1305_update(&state, ad, adlen); - crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf); - - crypto_stream_chacha20_ietf_xor_ic(c, m, mlen, npub, 1U, k); - - crypto_onetimeauth_poly1305_update(&state, c, mlen); - crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf); - - STORE64_LE(slen, (uint64_t) adlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - STORE64_LE(slen, (uint64_t) mlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - crypto_onetimeauth_poly1305_final(&state, mac); - sodium_memzero(&state, sizeof state); - - if (maclen_p != NULL) { - *maclen_p = crypto_aead_chacha20poly1305_ietf_ABYTES; - } - return 0; -} - -int -crypto_aead_chacha20poly1305_ietf_encrypt(unsigned char *c, - unsigned long long *clen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k) -{ - unsigned long long clen = 0ULL; - int ret; - - if (mlen > crypto_aead_chacha20poly1305_ietf_MESSAGEBYTES_MAX) { - sodium_misuse(); - } - ret = crypto_aead_chacha20poly1305_ietf_encrypt_detached(c, - c + mlen, NULL, - m, mlen, - ad, adlen, - nsec, npub, k); - if (clen_p != NULL) { - if (ret == 0) { - clen = mlen + crypto_aead_chacha20poly1305_ietf_ABYTES; - } - *clen_p = clen; - } - return ret; -} - -int -crypto_aead_chacha20poly1305_decrypt_detached(unsigned char *m, - unsigned char *nsec, - const unsigned char *c, - unsigned long long clen, - const unsigned char *mac, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k) -{ - crypto_onetimeauth_poly1305_state state; - unsigned char block0[64U]; - unsigned char slen[8U]; - unsigned char computed_mac[crypto_aead_chacha20poly1305_ABYTES]; - unsigned long long mlen; - int ret; - - (void) nsec; - crypto_stream_chacha20(block0, sizeof block0, npub, k); - crypto_onetimeauth_poly1305_init(&state, block0); - sodium_memzero(block0, sizeof block0); - - crypto_onetimeauth_poly1305_update(&state, ad, adlen); - STORE64_LE(slen, (uint64_t) adlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - mlen = clen; - crypto_onetimeauth_poly1305_update(&state, c, mlen); - STORE64_LE(slen, (uint64_t) mlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - crypto_onetimeauth_poly1305_final(&state, computed_mac); - sodium_memzero(&state, sizeof state); - - COMPILER_ASSERT(sizeof computed_mac == 16U); - ret = crypto_verify_16(computed_mac, mac); - sodium_memzero(computed_mac, sizeof computed_mac); - if (m == NULL) { - return ret; - } - if (ret != 0) { - memset(m, 0, mlen); - return -1; - } - crypto_stream_chacha20_xor_ic(m, c, mlen, npub, 1U, k); - - return 0; -} - -int -crypto_aead_chacha20poly1305_decrypt(unsigned char *m, - unsigned long long *mlen_p, - unsigned char *nsec, - const unsigned char *c, - unsigned long long clen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k) -{ - unsigned long long mlen = 0ULL; - int ret = -1; - - if (clen >= crypto_aead_chacha20poly1305_ABYTES) { - ret = crypto_aead_chacha20poly1305_decrypt_detached - (m, nsec, - c, clen - crypto_aead_chacha20poly1305_ABYTES, - c + clen - crypto_aead_chacha20poly1305_ABYTES, - ad, adlen, npub, k); - } - if (mlen_p != NULL) { - if (ret == 0) { - mlen = clen - crypto_aead_chacha20poly1305_ABYTES; - } - *mlen_p = mlen; - } - return ret; -} - -int -crypto_aead_chacha20poly1305_ietf_decrypt_detached(unsigned char *m, - unsigned char *nsec, - const unsigned char *c, - unsigned long long clen, - const unsigned char *mac, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k) -{ - crypto_onetimeauth_poly1305_state state; - unsigned char block0[64U]; - unsigned char slen[8U]; - unsigned char computed_mac[crypto_aead_chacha20poly1305_ietf_ABYTES]; - unsigned long long mlen; - int ret; - - (void) nsec; - crypto_stream_chacha20_ietf(block0, sizeof block0, npub, k); - crypto_onetimeauth_poly1305_init(&state, block0); - sodium_memzero(block0, sizeof block0); - - crypto_onetimeauth_poly1305_update(&state, ad, adlen); - crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf); - - mlen = clen; - crypto_onetimeauth_poly1305_update(&state, c, mlen); - crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf); - - STORE64_LE(slen, (uint64_t) adlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - STORE64_LE(slen, (uint64_t) mlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - crypto_onetimeauth_poly1305_final(&state, computed_mac); - sodium_memzero(&state, sizeof state); - - COMPILER_ASSERT(sizeof computed_mac == 16U); - ret = crypto_verify_16(computed_mac, mac); - sodium_memzero(computed_mac, sizeof computed_mac); - if (m == NULL) { - return ret; - } - if (ret != 0) { - memset(m, 0, mlen); - return -1; - } - crypto_stream_chacha20_ietf_xor_ic(m, c, mlen, npub, 1U, k); - - return 0; -} - -int -crypto_aead_chacha20poly1305_ietf_decrypt(unsigned char *m, - unsigned long long *mlen_p, - unsigned char *nsec, - const unsigned char *c, - unsigned long long clen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k) -{ - unsigned long long mlen = 0ULL; - int ret = -1; - - if (clen >= crypto_aead_chacha20poly1305_ietf_ABYTES) { - ret = crypto_aead_chacha20poly1305_ietf_decrypt_detached - (m, nsec, - c, clen - crypto_aead_chacha20poly1305_ietf_ABYTES, - c + clen - crypto_aead_chacha20poly1305_ietf_ABYTES, - ad, adlen, npub, k); - } - if (mlen_p != NULL) { - if (ret == 0) { - mlen = clen - crypto_aead_chacha20poly1305_ietf_ABYTES; - } - *mlen_p = mlen; - } - return ret; -} - -size_t -crypto_aead_chacha20poly1305_ietf_keybytes(void) -{ - return crypto_aead_chacha20poly1305_ietf_KEYBYTES; -} - -size_t -crypto_aead_chacha20poly1305_ietf_npubbytes(void) -{ - return crypto_aead_chacha20poly1305_ietf_NPUBBYTES; -} - -size_t -crypto_aead_chacha20poly1305_ietf_nsecbytes(void) -{ - return crypto_aead_chacha20poly1305_ietf_NSECBYTES; -} - -size_t -crypto_aead_chacha20poly1305_ietf_abytes(void) -{ - return crypto_aead_chacha20poly1305_ietf_ABYTES; -} - -size_t -crypto_aead_chacha20poly1305_ietf_messagebytes_max(void) -{ - return crypto_aead_chacha20poly1305_ietf_MESSAGEBYTES_MAX; -} - -void -crypto_aead_chacha20poly1305_ietf_keygen(unsigned char k[crypto_aead_chacha20poly1305_ietf_KEYBYTES]) -{ - randombytes_buf(k, crypto_aead_chacha20poly1305_ietf_KEYBYTES); -} - -size_t -crypto_aead_chacha20poly1305_keybytes(void) -{ - return crypto_aead_chacha20poly1305_KEYBYTES; -} - -size_t -crypto_aead_chacha20poly1305_npubbytes(void) -{ - return crypto_aead_chacha20poly1305_NPUBBYTES; -} - -size_t -crypto_aead_chacha20poly1305_nsecbytes(void) -{ - return crypto_aead_chacha20poly1305_NSECBYTES; -} - -size_t -crypto_aead_chacha20poly1305_abytes(void) -{ - return crypto_aead_chacha20poly1305_ABYTES; -} - -size_t -crypto_aead_chacha20poly1305_messagebytes_max(void) -{ - return crypto_aead_chacha20poly1305_MESSAGEBYTES_MAX; -} - -void -crypto_aead_chacha20poly1305_keygen(unsigned char k[crypto_aead_chacha20poly1305_KEYBYTES]) -{ - randombytes_buf(k, crypto_aead_chacha20poly1305_KEYBYTES); -} diff --git a/libs/libsodium/src/crypto_aead/xchacha20poly1305/aead_xchacha20poly1305.c b/libs/libsodium/src/crypto_aead/xchacha20poly1305/aead_xchacha20poly1305.c new file mode 100644 index 0000000000..07e3655731 --- /dev/null +++ b/libs/libsodium/src/crypto_aead/xchacha20poly1305/aead_xchacha20poly1305.c @@ -0,0 +1,262 @@ + +#include +#include +#include +#include + +#include "core.h" +#include "crypto_aead_chacha20poly1305.h" +#include "crypto_aead_xchacha20poly1305.h" +#include "crypto_core_hchacha20.h" +#include "crypto_onetimeauth_poly1305.h" +#include "crypto_stream_chacha20.h" +#include "crypto_verify_16.h" +#include "randombytes.h" +#include "utils.h" + +#include "private/chacha20_ietf_ext.h" +#include "private/common.h" + +static const unsigned char _pad0[16] = { 0 }; + +static int +_encrypt_detached(unsigned char *c, + unsigned char *mac, + unsigned long long *maclen_p, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + crypto_onetimeauth_poly1305_state state; + unsigned char block0[64U]; + unsigned char slen[8U]; + + (void) nsec; + crypto_stream_chacha20_ietf_ext(block0, sizeof block0, npub, k); + crypto_onetimeauth_poly1305_init(&state, block0); + sodium_memzero(block0, sizeof block0); + + crypto_onetimeauth_poly1305_update(&state, ad, adlen); + crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf); + + crypto_stream_chacha20_ietf_ext_xor_ic(c, m, mlen, npub, 1U, k); + + crypto_onetimeauth_poly1305_update(&state, c, mlen); + crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf); + + STORE64_LE(slen, (uint64_t) adlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + STORE64_LE(slen, (uint64_t) mlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + crypto_onetimeauth_poly1305_final(&state, mac); + sodium_memzero(&state, sizeof state); + + if (maclen_p != NULL) { + *maclen_p = crypto_aead_chacha20poly1305_ietf_ABYTES; + } + return 0; +} + +static int +_decrypt_detached(unsigned char *m, + unsigned char *nsec, + const unsigned char *c, + unsigned long long clen, + const unsigned char *mac, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + crypto_onetimeauth_poly1305_state state; + unsigned char block0[64U]; + unsigned char slen[8U]; + unsigned char computed_mac[crypto_aead_chacha20poly1305_ietf_ABYTES]; + unsigned long long mlen; + int ret; + + (void) nsec; + crypto_stream_chacha20_ietf_ext(block0, sizeof block0, npub, k); + crypto_onetimeauth_poly1305_init(&state, block0); + sodium_memzero(block0, sizeof block0); + + crypto_onetimeauth_poly1305_update(&state, ad, adlen); + crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf); + + mlen = clen; + crypto_onetimeauth_poly1305_update(&state, c, mlen); + crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf); + + STORE64_LE(slen, (uint64_t) adlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + STORE64_LE(slen, (uint64_t) mlen); + crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); + + crypto_onetimeauth_poly1305_final(&state, computed_mac); + sodium_memzero(&state, sizeof state); + + COMPILER_ASSERT(sizeof computed_mac == 16U); + ret = crypto_verify_16(computed_mac, mac); + sodium_memzero(computed_mac, sizeof computed_mac); + if (m == NULL) { + return ret; + } + if (ret != 0) { + memset(m, 0, mlen); + return -1; + } + crypto_stream_chacha20_ietf_ext_xor_ic(m, c, mlen, npub, 1U, k); + + return 0; +} + +int +crypto_aead_xchacha20poly1305_ietf_encrypt_detached(unsigned char *c, + unsigned char *mac, + unsigned long long *maclen_p, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + unsigned char k2[crypto_core_hchacha20_OUTPUTBYTES]; + unsigned char npub2[crypto_aead_chacha20poly1305_ietf_NPUBBYTES] = { 0 }; + int ret; + + crypto_core_hchacha20(k2, npub, k, NULL); + memcpy(npub2 + 4, npub + crypto_core_hchacha20_INPUTBYTES, + crypto_aead_chacha20poly1305_ietf_NPUBBYTES - 4); + ret = _encrypt_detached(c, mac, maclen_p, m, mlen, ad, adlen, + nsec, npub2, k2); + sodium_memzero(k2, crypto_core_hchacha20_OUTPUTBYTES); + + return ret; +} + +int +crypto_aead_xchacha20poly1305_ietf_encrypt(unsigned char *c, + unsigned long long *clen_p, + const unsigned char *m, + unsigned long long mlen, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *nsec, + const unsigned char *npub, + const unsigned char *k) +{ + unsigned long long clen = 0ULL; + int ret; + + if (mlen > crypto_aead_xchacha20poly1305_ietf_MESSAGEBYTES_MAX) { + sodium_misuse(); + } + ret = crypto_aead_xchacha20poly1305_ietf_encrypt_detached + (c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub, k); + if (clen_p != NULL) { + if (ret == 0) { + clen = mlen + crypto_aead_xchacha20poly1305_ietf_ABYTES; + } + *clen_p = clen; + } + return ret; +} + +int +crypto_aead_xchacha20poly1305_ietf_decrypt_detached(unsigned char *m, + unsigned char *nsec, + const unsigned char *c, + unsigned long long clen, + const unsigned char *mac, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + unsigned char k2[crypto_core_hchacha20_OUTPUTBYTES]; + unsigned char npub2[crypto_aead_chacha20poly1305_ietf_NPUBBYTES] = { 0 }; + int ret; + + crypto_core_hchacha20(k2, npub, k, NULL); + memcpy(npub2 + 4, npub + crypto_core_hchacha20_INPUTBYTES, + crypto_aead_chacha20poly1305_ietf_NPUBBYTES - 4); + ret = _decrypt_detached(m, nsec, c, clen, mac, ad, adlen, npub2, k2); + sodium_memzero(k2, crypto_core_hchacha20_OUTPUTBYTES); + + return ret; +} + +int +crypto_aead_xchacha20poly1305_ietf_decrypt(unsigned char *m, + unsigned long long *mlen_p, + unsigned char *nsec, + const unsigned char *c, + unsigned long long clen, + const unsigned char *ad, + unsigned long long adlen, + const unsigned char *npub, + const unsigned char *k) +{ + unsigned long long mlen = 0ULL; + int ret = -1; + + if (clen >= crypto_aead_xchacha20poly1305_ietf_ABYTES) { + ret = crypto_aead_xchacha20poly1305_ietf_decrypt_detached + (m, nsec, + c, clen - crypto_aead_xchacha20poly1305_ietf_ABYTES, + c + clen - crypto_aead_xchacha20poly1305_ietf_ABYTES, + ad, adlen, npub, k); + } + if (mlen_p != NULL) { + if (ret == 0) { + mlen = clen - crypto_aead_xchacha20poly1305_ietf_ABYTES; + } + *mlen_p = mlen; + } + return ret; +} + +size_t +crypto_aead_xchacha20poly1305_ietf_keybytes(void) +{ + return crypto_aead_xchacha20poly1305_ietf_KEYBYTES; +} + +size_t +crypto_aead_xchacha20poly1305_ietf_npubbytes(void) +{ + return crypto_aead_xchacha20poly1305_ietf_NPUBBYTES; +} + +size_t +crypto_aead_xchacha20poly1305_ietf_nsecbytes(void) +{ + return crypto_aead_xchacha20poly1305_ietf_NSECBYTES; +} + +size_t +crypto_aead_xchacha20poly1305_ietf_abytes(void) +{ + return crypto_aead_xchacha20poly1305_ietf_ABYTES; +} + +size_t +crypto_aead_xchacha20poly1305_ietf_messagebytes_max(void) +{ + return crypto_aead_xchacha20poly1305_ietf_MESSAGEBYTES_MAX; +} + +void +crypto_aead_xchacha20poly1305_ietf_keygen(unsigned char k[crypto_aead_xchacha20poly1305_ietf_KEYBYTES]) +{ + randombytes_buf(k, crypto_aead_xchacha20poly1305_ietf_KEYBYTES); +} diff --git a/libs/libsodium/src/crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c b/libs/libsodium/src/crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c deleted file mode 100644 index 61ccc84c8c..0000000000 --- a/libs/libsodium/src/crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c +++ /dev/null @@ -1,262 +0,0 @@ - -#include -#include -#include -#include - -#include "core.h" -#include "crypto_aead_chacha20poly1305.h" -#include "crypto_aead_xchacha20poly1305.h" -#include "crypto_core_hchacha20.h" -#include "crypto_onetimeauth_poly1305.h" -#include "crypto_stream_chacha20.h" -#include "crypto_verify_16.h" -#include "randombytes.h" -#include "utils.h" - -#include "private/chacha20_ietf_ext.h" -#include "private/common.h" - -static const unsigned char _pad0[16] = { 0 }; - -static int -_encrypt_detached(unsigned char *c, - unsigned char *mac, - unsigned long long *maclen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k) -{ - crypto_onetimeauth_poly1305_state state; - unsigned char block0[64U]; - unsigned char slen[8U]; - - (void) nsec; - crypto_stream_chacha20_ietf_ext(block0, sizeof block0, npub, k); - crypto_onetimeauth_poly1305_init(&state, block0); - sodium_memzero(block0, sizeof block0); - - crypto_onetimeauth_poly1305_update(&state, ad, adlen); - crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf); - - crypto_stream_chacha20_ietf_ext_xor_ic(c, m, mlen, npub, 1U, k); - - crypto_onetimeauth_poly1305_update(&state, c, mlen); - crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf); - - STORE64_LE(slen, (uint64_t) adlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - STORE64_LE(slen, (uint64_t) mlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - crypto_onetimeauth_poly1305_final(&state, mac); - sodium_memzero(&state, sizeof state); - - if (maclen_p != NULL) { - *maclen_p = crypto_aead_chacha20poly1305_ietf_ABYTES; - } - return 0; -} - -static int -_decrypt_detached(unsigned char *m, - unsigned char *nsec, - const unsigned char *c, - unsigned long long clen, - const unsigned char *mac, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k) -{ - crypto_onetimeauth_poly1305_state state; - unsigned char block0[64U]; - unsigned char slen[8U]; - unsigned char computed_mac[crypto_aead_chacha20poly1305_ietf_ABYTES]; - unsigned long long mlen; - int ret; - - (void) nsec; - crypto_stream_chacha20_ietf_ext(block0, sizeof block0, npub, k); - crypto_onetimeauth_poly1305_init(&state, block0); - sodium_memzero(block0, sizeof block0); - - crypto_onetimeauth_poly1305_update(&state, ad, adlen); - crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf); - - mlen = clen; - crypto_onetimeauth_poly1305_update(&state, c, mlen); - crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf); - - STORE64_LE(slen, (uint64_t) adlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - STORE64_LE(slen, (uint64_t) mlen); - crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen); - - crypto_onetimeauth_poly1305_final(&state, computed_mac); - sodium_memzero(&state, sizeof state); - - COMPILER_ASSERT(sizeof computed_mac == 16U); - ret = crypto_verify_16(computed_mac, mac); - sodium_memzero(computed_mac, sizeof computed_mac); - if (m == NULL) { - return ret; - } - if (ret != 0) { - memset(m, 0, mlen); - return -1; - } - crypto_stream_chacha20_ietf_ext_xor_ic(m, c, mlen, npub, 1U, k); - - return 0; -} - -int -crypto_aead_xchacha20poly1305_ietf_encrypt_detached(unsigned char *c, - unsigned char *mac, - unsigned long long *maclen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k) -{ - unsigned char k2[crypto_core_hchacha20_OUTPUTBYTES]; - unsigned char npub2[crypto_aead_chacha20poly1305_ietf_NPUBBYTES] = { 0 }; - int ret; - - crypto_core_hchacha20(k2, npub, k, NULL); - memcpy(npub2 + 4, npub + crypto_core_hchacha20_INPUTBYTES, - crypto_aead_chacha20poly1305_ietf_NPUBBYTES - 4); - ret = _encrypt_detached(c, mac, maclen_p, m, mlen, ad, adlen, - nsec, npub2, k2); - sodium_memzero(k2, crypto_core_hchacha20_OUTPUTBYTES); - - return ret; -} - -int -crypto_aead_xchacha20poly1305_ietf_encrypt(unsigned char *c, - unsigned long long *clen_p, - const unsigned char *m, - unsigned long long mlen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *nsec, - const unsigned char *npub, - const unsigned char *k) -{ - unsigned long long clen = 0ULL; - int ret; - - if (mlen > crypto_aead_xchacha20poly1305_ietf_MESSAGEBYTES_MAX) { - sodium_misuse(); - } - ret = crypto_aead_xchacha20poly1305_ietf_encrypt_detached - (c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub, k); - if (clen_p != NULL) { - if (ret == 0) { - clen = mlen + crypto_aead_xchacha20poly1305_ietf_ABYTES; - } - *clen_p = clen; - } - return ret; -} - -int -crypto_aead_xchacha20poly1305_ietf_decrypt_detached(unsigned char *m, - unsigned char *nsec, - const unsigned char *c, - unsigned long long clen, - const unsigned char *mac, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k) -{ - unsigned char k2[crypto_core_hchacha20_OUTPUTBYTES]; - unsigned char npub2[crypto_aead_chacha20poly1305_ietf_NPUBBYTES] = { 0 }; - int ret; - - crypto_core_hchacha20(k2, npub, k, NULL); - memcpy(npub2 + 4, npub + crypto_core_hchacha20_INPUTBYTES, - crypto_aead_chacha20poly1305_ietf_NPUBBYTES - 4); - ret = _decrypt_detached(m, nsec, c, clen, mac, ad, adlen, npub2, k2); - sodium_memzero(k2, crypto_core_hchacha20_OUTPUTBYTES); - - return ret; -} - -int -crypto_aead_xchacha20poly1305_ietf_decrypt(unsigned char *m, - unsigned long long *mlen_p, - unsigned char *nsec, - const unsigned char *c, - unsigned long long clen, - const unsigned char *ad, - unsigned long long adlen, - const unsigned char *npub, - const unsigned char *k) -{ - unsigned long long mlen = 0ULL; - int ret = -1; - - if (clen >= crypto_aead_xchacha20poly1305_ietf_ABYTES) { - ret = crypto_aead_xchacha20poly1305_ietf_decrypt_detached - (m, nsec, - c, clen - crypto_aead_xchacha20poly1305_ietf_ABYTES, - c + clen - crypto_aead_xchacha20poly1305_ietf_ABYTES, - ad, adlen, npub, k); - } - if (mlen_p != NULL) { - if (ret == 0) { - mlen = clen - crypto_aead_xchacha20poly1305_ietf_ABYTES; - } - *mlen_p = mlen; - } - return ret; -} - -size_t -crypto_aead_xchacha20poly1305_ietf_keybytes(void) -{ - return crypto_aead_xchacha20poly1305_ietf_KEYBYTES; -} - -size_t -crypto_aead_xchacha20poly1305_ietf_npubbytes(void) -{ - return crypto_aead_xchacha20poly1305_ietf_NPUBBYTES; -} - -size_t -crypto_aead_xchacha20poly1305_ietf_nsecbytes(void) -{ - return crypto_aead_xchacha20poly1305_ietf_NSECBYTES; -} - -size_t -crypto_aead_xchacha20poly1305_ietf_abytes(void) -{ - return crypto_aead_xchacha20poly1305_ietf_ABYTES; -} - -size_t -crypto_aead_xchacha20poly1305_ietf_messagebytes_max(void) -{ - return crypto_aead_xchacha20poly1305_ietf_MESSAGEBYTES_MAX; -} - -void -crypto_aead_xchacha20poly1305_ietf_keygen(unsigned char k[crypto_aead_xchacha20poly1305_ietf_KEYBYTES]) -{ - randombytes_buf(k, crypto_aead_xchacha20poly1305_ietf_KEYBYTES); -} -- cgit v1.2.3