summaryrefslogtreecommitdiff
path: root/libs/libsodium/src/crypto_aead
diff options
context:
space:
mode:
authorGeorge Hazan <george.hazan@gmail.com>2024-06-21 14:29:17 +0300
committerGeorge Hazan <george.hazan@gmail.com>2024-06-21 14:29:17 +0300
commit46ea86584a9787c8b9dc3983cf23d9b5b93b5841 (patch)
treefbaf3793ae2170f7982f08a62c028a23cd9afedd /libs/libsodium/src/crypto_aead
parent82e75be329dd0f30c0281ef9c3c08488b89d109f (diff)
fixes #4477 (libsodium: update to 1.0.20)
Diffstat (limited to 'libs/libsodium/src/crypto_aead')
-rw-r--r--libs/libsodium/src/crypto_aead/aegis128l/aead_aegis128l.c159
-rw-r--r--libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.c70
-rw-r--r--libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.h8
-rw-r--r--libs/libsodium/src/crypto_aead/aegis128l/aegis128l_armcrypto.h8
-rw-r--r--libs/libsodium/src/crypto_aead/aegis128l/aegis128l_common.h249
-rw-r--r--libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.c59
-rw-r--r--libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.h8
-rw-r--r--libs/libsodium/src/crypto_aead/aegis128l/implementations.h17
-rw-r--r--libs/libsodium/src/crypto_aead/aegis256/aead_aegis256.c158
-rw-r--r--libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.c65
-rw-r--r--libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.h8
-rw-r--r--libs/libsodium/src/crypto_aead/aegis256/aegis256_armcrypto.h8
-rw-r--r--libs/libsodium/src/crypto_aead/aegis256/aegis256_common.h232
-rw-r--r--libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.c54
-rw-r--r--libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.h8
-rw-r--r--libs/libsodium/src/crypto_aead/aegis256/implementations.h17
-rw-r--r--libs/libsodium/src/crypto_aead/aes256gcm/aead_aes256gcm.c157
-rw-r--r--libs/libsodium/src/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c1784
-rw-r--r--libs/libsodium/src/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c1033
-rw-r--r--libs/libsodium/src/crypto_aead/chacha20poly1305/aead_chacha20poly1305.c (renamed from libs/libsodium/src/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c)800
-rw-r--r--libs/libsodium/src/crypto_aead/xchacha20poly1305/aead_xchacha20poly1305.c (renamed from libs/libsodium/src/crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c)524
21 files changed, 3840 insertions, 1586 deletions
diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aead_aegis128l.c b/libs/libsodium/src/crypto_aead/aegis128l/aead_aegis128l.c
new file mode 100644
index 0000000000..ab2596e685
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis128l/aead_aegis128l.c
@@ -0,0 +1,159 @@
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "core.h"
+#include "crypto_aead_aegis128l.h"
+#include "private/common.h"
+#include "private/implementations.h"
+#include "randombytes.h"
+#include "runtime.h"
+
+#include "aegis128l_soft.h"
+
+#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN)
+#include "aegis128l_armcrypto.h"
+#endif
+
+#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
+#include "aegis128l_aesni.h"
+#endif
+
+static const aegis128l_implementation *implementation = &aegis128l_soft_implementation;
+
+size_t
+crypto_aead_aegis128l_keybytes(void)
+{
+ return crypto_aead_aegis128l_KEYBYTES;
+}
+
+size_t
+crypto_aead_aegis128l_nsecbytes(void)
+{
+ return crypto_aead_aegis128l_NSECBYTES;
+}
+
+size_t
+crypto_aead_aegis128l_npubbytes(void)
+{
+ return crypto_aead_aegis128l_NPUBBYTES;
+}
+
+size_t
+crypto_aead_aegis128l_abytes(void)
+{
+ return crypto_aead_aegis128l_ABYTES;
+}
+
+size_t
+crypto_aead_aegis128l_messagebytes_max(void)
+{
+ return crypto_aead_aegis128l_MESSAGEBYTES_MAX;
+}
+
+void
+crypto_aead_aegis128l_keygen(unsigned char k[crypto_aead_aegis128l_KEYBYTES])
+{
+ randombytes_buf(k, crypto_aead_aegis128l_KEYBYTES);
+}
+
+int
+crypto_aead_aegis128l_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m,
+ unsigned long long mlen, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *nsec,
+ const unsigned char *npub, const unsigned char *k)
+{
+ unsigned long long clen = 0ULL;
+ int ret;
+
+ ret = crypto_aead_aegis128l_encrypt_detached(c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub,
+ k);
+ if (clen_p != NULL) {
+ if (ret == 0) {
+ clen = mlen + crypto_aead_aegis128l_ABYTES;
+ }
+ *clen_p = clen;
+ }
+ return ret;
+}
+
+int
+crypto_aead_aegis128l_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec,
+ const unsigned char *c, unsigned long long clen,
+ const unsigned char *ad, unsigned long long adlen,
+ const unsigned char *npub, const unsigned char *k)
+{
+ unsigned long long mlen = 0ULL;
+ int ret = -1;
+
+ if (clen >= crypto_aead_aegis128l_ABYTES) {
+ ret = crypto_aead_aegis128l_decrypt_detached(
+ m, nsec, c, clen - crypto_aead_aegis128l_ABYTES,
+ c + clen - crypto_aead_aegis128l_ABYTES, ad, adlen, npub, k);
+ }
+ if (mlen_p != NULL) {
+ if (ret == 0) {
+ mlen = clen - crypto_aead_aegis128l_ABYTES;
+ }
+ *mlen_p = mlen;
+ }
+ return ret;
+}
+
+int
+crypto_aead_aegis128l_encrypt_detached(unsigned char *c, unsigned char *mac,
+ unsigned long long *maclen_p, const unsigned char *m,
+ unsigned long long mlen, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *nsec,
+ const unsigned char *npub, const unsigned char *k)
+{
+ const size_t maclen = crypto_aead_aegis128l_ABYTES;
+
+ if (maclen_p != NULL) {
+ *maclen_p = maclen;
+ }
+ if (mlen > crypto_aead_aegis128l_MESSAGEBYTES_MAX ||
+ adlen > crypto_aead_aegis128l_MESSAGEBYTES_MAX) {
+ sodium_misuse();
+ }
+ return implementation->encrypt_detached(c, mac, maclen, m, (size_t) mlen, ad, (size_t) adlen,
+ npub, k);
+}
+
+int
+crypto_aead_aegis128l_decrypt_detached(unsigned char *m, unsigned char *nsec,
+ const unsigned char *c, unsigned long long clen,
+ const unsigned char *mac, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *npub,
+ const unsigned char *k)
+{
+ const size_t maclen = crypto_aead_aegis128l_ABYTES;
+
+ if (clen > crypto_aead_aegis128l_MESSAGEBYTES_MAX ||
+ adlen > crypto_aead_aegis128l_MESSAGEBYTES_MAX) {
+ return -1;
+ }
+ return implementation->decrypt_detached(m, c, (size_t) clen, mac, maclen, ad, (size_t) adlen,
+ npub, k);
+}
+
+int
+_crypto_aead_aegis128l_pick_best_implementation(void)
+{
+ implementation = &aegis128l_soft_implementation;
+
+#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN)
+ if (sodium_runtime_has_armcrypto()) {
+ implementation = &aegis128l_armcrypto_implementation;
+ return 0;
+ }
+#endif
+
+#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
+ if (sodium_runtime_has_aesni() & sodium_runtime_has_avx()) {
+ implementation = &aegis128l_aesni_implementation;
+ return 0;
+ }
+#endif
+ return 0; /* LCOV_EXCL_LINE */
+}
diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.c b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.c
new file mode 100644
index 0000000000..93782ce288
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.c
@@ -0,0 +1,70 @@
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "core.h"
+#include "crypto_aead_aegis128l.h"
+#include "crypto_verify_16.h"
+#include "crypto_verify_32.h"
+#include "export.h"
+#include "utils.h"
+
+#include "private/common.h"
+
+#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
+
+#include "aegis128l_aesni.h"
+
+#ifdef __clang__
+#pragma clang attribute push(__attribute__((target("aes,avx"))), apply_to = function)
+#elif defined(__GNUC__)
+#pragma GCC target("aes,avx")
+#endif
+
+#include "private/sse2_64_32.h"
+#include <immintrin.h>
+#include <wmmintrin.h>
+
+#define AES_BLOCK_LENGTH 16
+
+typedef __m128i aes_block_t;
+#define AES_BLOCK_XOR(A, B) _mm_xor_si128((A), (B))
+#define AES_BLOCK_AND(A, B) _mm_and_si128((A), (B))
+#define AES_BLOCK_LOAD(A) _mm_loadu_si128((const aes_block_t *) (const void *) (A))
+#define AES_BLOCK_LOAD_64x2(A, B) _mm_set_epi64x((long long) (A), (long long) (B))
+#define AES_BLOCK_STORE(A, B) _mm_storeu_si128((aes_block_t *) (void *) (A), (B))
+#define AES_ENC(A, B) _mm_aesenc_si128((A), (B))
+
+static inline void
+aegis128l_update(aes_block_t *const state, const aes_block_t d1, const aes_block_t d2)
+{
+ aes_block_t tmp;
+
+ tmp = state[7];
+ state[7] = AES_ENC(state[6], state[7]);
+ state[6] = AES_ENC(state[5], state[6]);
+ state[5] = AES_ENC(state[4], state[5]);
+ state[4] = AES_ENC(state[3], state[4]);
+ state[3] = AES_ENC(state[2], state[3]);
+ state[2] = AES_ENC(state[1], state[2]);
+ state[1] = AES_ENC(state[0], state[1]);
+ state[0] = AES_ENC(tmp, state[0]);
+
+ state[0] = AES_BLOCK_XOR(state[0], d1);
+ state[4] = AES_BLOCK_XOR(state[4], d2);
+}
+
+#include "aegis128l_common.h"
+
+struct aegis128l_implementation aegis128l_aesni_implementation = { SODIUM_C99(.encrypt_detached =)
+ encrypt_detached,
+ SODIUM_C99(.decrypt_detached =)
+ decrypt_detached };
+
+#ifdef __clang__
+#pragma clang attribute pop
+#endif
+
+#endif
diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.h b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.h
new file mode 100644
index 0000000000..65e52dab1b
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_aesni.h
@@ -0,0 +1,8 @@
+#ifndef aegis128l_aesni_H
+#define aegis128l_aesni_H
+
+#include "implementations.h"
+
+extern struct aegis128l_implementation aegis128l_aesni_implementation;
+
+#endif \ No newline at end of file
diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_armcrypto.h b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_armcrypto.h
new file mode 100644
index 0000000000..41ad43cba0
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_armcrypto.h
@@ -0,0 +1,8 @@
+#ifndef aegis128l_armcrypto_H
+#define aegis128l_armcrypto_H
+
+#include "implementations.h"
+
+extern struct aegis128l_implementation aegis128l_armcrypto_implementation;
+
+#endif \ No newline at end of file
diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_common.h b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_common.h
new file mode 100644
index 0000000000..6e503dc35a
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_common.h
@@ -0,0 +1,249 @@
+#define RATE 32
+
+static void
+aegis128l_init(const uint8_t *key, const uint8_t *nonce, aes_block_t *const state)
+{
+ static CRYPTO_ALIGN(AES_BLOCK_LENGTH)
+ const uint8_t c0_[AES_BLOCK_LENGTH] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
+ 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 };
+ static CRYPTO_ALIGN(AES_BLOCK_LENGTH)
+ const uint8_t c1_[AES_BLOCK_LENGTH] = { 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
+ 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd };
+
+ const aes_block_t c0 = AES_BLOCK_LOAD(c0_);
+ const aes_block_t c1 = AES_BLOCK_LOAD(c1_);
+ aes_block_t k;
+ aes_block_t n;
+ int i;
+
+ k = AES_BLOCK_LOAD(key);
+ n = AES_BLOCK_LOAD(nonce);
+
+ state[0] = AES_BLOCK_XOR(k, n);
+ state[1] = c1;
+ state[2] = c0;
+ state[3] = c1;
+ state[4] = AES_BLOCK_XOR(k, n);
+ state[5] = AES_BLOCK_XOR(k, c0);
+ state[6] = AES_BLOCK_XOR(k, c1);
+ state[7] = AES_BLOCK_XOR(k, c0);
+ for (i = 0; i < 10; i++) {
+ aegis128l_update(state, n, k);
+ }
+}
+
+static int
+aegis128l_mac(uint8_t *mac, size_t maclen, size_t adlen, size_t mlen, aes_block_t *const state)
+{
+ aes_block_t tmp;
+ int i;
+
+ tmp = AES_BLOCK_LOAD_64x2(((uint64_t) mlen) << 3, ((uint64_t) adlen) << 3);
+ tmp = AES_BLOCK_XOR(tmp, state[2]);
+
+ for (i = 0; i < 7; i++) {
+ aegis128l_update(state, tmp, tmp);
+ }
+
+ if (maclen == 16) {
+ tmp = AES_BLOCK_XOR(state[6], AES_BLOCK_XOR(state[5], state[4]));
+ tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[3], state[2]));
+ tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[1], state[0]));
+ AES_BLOCK_STORE(mac, tmp);
+ } else if (maclen == 32) {
+ tmp = AES_BLOCK_XOR(state[3], state[2]);
+ tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[1], state[0]));
+ AES_BLOCK_STORE(mac, tmp);
+ tmp = AES_BLOCK_XOR(state[7], state[6]);
+ tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[5], state[4]));
+ AES_BLOCK_STORE(mac + 16, tmp);
+ } else {
+ memset(mac, 0, maclen);
+ return -1;
+ }
+ return 0;
+}
+
+static inline void
+aegis128l_absorb(const uint8_t *const src, aes_block_t *const state)
+{
+ aes_block_t msg0, msg1;
+
+ msg0 = AES_BLOCK_LOAD(src);
+ msg1 = AES_BLOCK_LOAD(src + AES_BLOCK_LENGTH);
+ aegis128l_update(state, msg0, msg1);
+}
+
+static inline void
+aegis128l_absorb2(const uint8_t *const src, aes_block_t *const state)
+{
+ aes_block_t msg0, msg1, msg2, msg3;
+
+ msg0 = AES_BLOCK_LOAD(src + 0 * AES_BLOCK_LENGTH);
+ msg1 = AES_BLOCK_LOAD(src + 1 * AES_BLOCK_LENGTH);
+ msg2 = AES_BLOCK_LOAD(src + 2 * AES_BLOCK_LENGTH);
+ msg3 = AES_BLOCK_LOAD(src + 3 * AES_BLOCK_LENGTH);
+ aegis128l_update(state, msg0, msg1);
+ aegis128l_update(state, msg2, msg3);
+}
+
+static void
+aegis128l_enc(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state)
+{
+ aes_block_t msg0, msg1;
+ aes_block_t tmp0, tmp1;
+
+ msg0 = AES_BLOCK_LOAD(src);
+ msg1 = AES_BLOCK_LOAD(src + AES_BLOCK_LENGTH);
+ tmp0 = AES_BLOCK_XOR(msg0, state[6]);
+ tmp0 = AES_BLOCK_XOR(tmp0, state[1]);
+ tmp1 = AES_BLOCK_XOR(msg1, state[5]);
+ tmp1 = AES_BLOCK_XOR(tmp1, state[2]);
+ tmp0 = AES_BLOCK_XOR(tmp0, AES_BLOCK_AND(state[2], state[3]));
+ tmp1 = AES_BLOCK_XOR(tmp1, AES_BLOCK_AND(state[6], state[7]));
+ AES_BLOCK_STORE(dst, tmp0);
+ AES_BLOCK_STORE(dst + AES_BLOCK_LENGTH, tmp1);
+
+ aegis128l_update(state, msg0, msg1);
+}
+
+static void
+aegis128l_dec(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state)
+{
+ aes_block_t msg0, msg1;
+
+ msg0 = AES_BLOCK_LOAD(src);
+ msg1 = AES_BLOCK_LOAD(src + AES_BLOCK_LENGTH);
+ msg0 = AES_BLOCK_XOR(msg0, state[6]);
+ msg0 = AES_BLOCK_XOR(msg0, state[1]);
+ msg1 = AES_BLOCK_XOR(msg1, state[5]);
+ msg1 = AES_BLOCK_XOR(msg1, state[2]);
+ msg0 = AES_BLOCK_XOR(msg0, AES_BLOCK_AND(state[2], state[3]));
+ msg1 = AES_BLOCK_XOR(msg1, AES_BLOCK_AND(state[6], state[7]));
+ AES_BLOCK_STORE(dst, msg0);
+ AES_BLOCK_STORE(dst + AES_BLOCK_LENGTH, msg1);
+
+ aegis128l_update(state, msg0, msg1);
+}
+
+static void
+aegis128l_declast(uint8_t *const dst, const uint8_t *const src, size_t len,
+ aes_block_t *const state)
+{
+ uint8_t pad[RATE];
+ aes_block_t msg0, msg1;
+
+ memset(pad, 0, sizeof pad);
+ memcpy(pad, src, len);
+
+ msg0 = AES_BLOCK_LOAD(pad);
+ msg1 = AES_BLOCK_LOAD(pad + AES_BLOCK_LENGTH);
+ msg0 = AES_BLOCK_XOR(msg0, state[6]);
+ msg0 = AES_BLOCK_XOR(msg0, state[1]);
+ msg1 = AES_BLOCK_XOR(msg1, state[5]);
+ msg1 = AES_BLOCK_XOR(msg1, state[2]);
+ msg0 = AES_BLOCK_XOR(msg0, AES_BLOCK_AND(state[2], state[3]));
+ msg1 = AES_BLOCK_XOR(msg1, AES_BLOCK_AND(state[6], state[7]));
+ AES_BLOCK_STORE(pad, msg0);
+ AES_BLOCK_STORE(pad + AES_BLOCK_LENGTH, msg1);
+
+ memset(pad + len, 0, sizeof pad - len);
+ memcpy(dst, pad, len);
+
+ msg0 = AES_BLOCK_LOAD(pad);
+ msg1 = AES_BLOCK_LOAD(pad + AES_BLOCK_LENGTH);
+
+ aegis128l_update(state, msg0, msg1);
+}
+
+static int
+encrypt_detached(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen,
+ const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k)
+{
+ aes_block_t state[8];
+ CRYPTO_ALIGN(RATE) uint8_t src[RATE];
+ CRYPTO_ALIGN(RATE) uint8_t dst[RATE];
+ size_t i;
+
+ aegis128l_init(k, npub, state);
+
+ for (i = 0; i + RATE * 2 <= adlen; i += RATE * 2) {
+ aegis128l_absorb2(ad + i, state);
+ }
+ for (; i + RATE <= adlen; i += RATE) {
+ aegis128l_absorb(ad + i, state);
+ }
+ if (adlen % RATE) {
+ memset(src, 0, RATE);
+ memcpy(src, ad + i, adlen % RATE);
+ aegis128l_absorb(src, state);
+ }
+ for (i = 0; i + RATE <= mlen; i += RATE) {
+ aegis128l_enc(c + i, m + i, state);
+ }
+ if (mlen % RATE) {
+ memset(src, 0, RATE);
+ memcpy(src, m + i, mlen % RATE);
+ aegis128l_enc(dst, src, state);
+ memcpy(c + i, dst, mlen % RATE);
+ }
+
+ return aegis128l_mac(mac, maclen, adlen, mlen, state);
+}
+
+static int
+decrypt_detached(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac, size_t maclen,
+ const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k)
+{
+ aes_block_t state[8];
+ CRYPTO_ALIGN(RATE) uint8_t src[RATE];
+ CRYPTO_ALIGN(RATE) uint8_t dst[RATE];
+ CRYPTO_ALIGN(16) uint8_t computed_mac[32];
+ const size_t mlen = clen;
+ size_t i;
+ int ret;
+
+ aegis128l_init(k, npub, state);
+
+ for (i = 0; i + RATE * 2 <= adlen; i += RATE * 2) {
+ aegis128l_absorb2(ad + i, state);
+ }
+ for (; i + RATE <= adlen; i += RATE) {
+ aegis128l_absorb(ad + i, state);
+ }
+ if (adlen % RATE) {
+ memset(src, 0, RATE);
+ memcpy(src, ad + i, adlen % RATE);
+ aegis128l_absorb(src, state);
+ }
+ if (m != NULL) {
+ for (i = 0; i + RATE <= mlen; i += RATE) {
+ aegis128l_dec(m + i, c + i, state);
+ }
+ } else {
+ for (i = 0; i + RATE <= mlen; i += RATE) {
+ aegis128l_dec(dst, c + i, state);
+ }
+ }
+ if (mlen % RATE) {
+ if (m != NULL) {
+ aegis128l_declast(m + i, c + i, mlen % RATE, state);
+ } else {
+ aegis128l_declast(dst, c + i, mlen % RATE, state);
+ }
+ }
+
+ COMPILER_ASSERT(sizeof computed_mac >= 32);
+ ret = -1;
+ if (aegis128l_mac(computed_mac, maclen, adlen, mlen, state) == 0) {
+ if (maclen == 16) {
+ ret = crypto_verify_16(computed_mac, mac);
+ } else if (maclen == 32) {
+ ret = crypto_verify_32(computed_mac, mac);
+ }
+ }
+ if (ret != 0 && m != NULL) {
+ memset(m, 0, mlen);
+ }
+ return ret;
+}
diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.c b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.c
new file mode 100644
index 0000000000..e1d60ecb4f
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.c
@@ -0,0 +1,59 @@
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "core.h"
+#include "crypto_aead_aegis128l.h"
+#include "crypto_verify_16.h"
+#include "crypto_verify_32.h"
+#include "export.h"
+#include "utils.h"
+
+#include "private/common.h"
+
+#include "crypto_aead_aegis128l.h"
+#include "private/softaes.h"
+
+#if 1
+
+#include "aegis128l_soft.h"
+
+#define AES_BLOCK_LENGTH 16
+
+typedef SoftAesBlock aes_block_t;
+#define AES_BLOCK_XOR(A, B) softaes_block_xor((A), (B))
+#define AES_BLOCK_AND(A, B) softaes_block_and((A), (B))
+#define AES_BLOCK_LOAD(A) softaes_block_load(A)
+#define AES_BLOCK_LOAD_64x2(A, B) softaes_block_load64x2((A), (B))
+#define AES_BLOCK_STORE(A, B) softaes_block_store((A), (B))
+#define AES_ENC(A, B) softaes_block_encrypt((A), (B))
+
+static inline void
+aegis128l_update(aes_block_t *const state, const aes_block_t d1, const aes_block_t d2)
+{
+ aes_block_t tmp;
+
+ tmp = state[7];
+ state[7] = AES_ENC(state[6], state[7]);
+ state[6] = AES_ENC(state[5], state[6]);
+ state[5] = AES_ENC(state[4], state[5]);
+ state[4] = AES_ENC(state[3], state[4]);
+ state[3] = AES_ENC(state[2], state[3]);
+ state[2] = AES_ENC(state[1], state[2]);
+ state[1] = AES_ENC(state[0], state[1]);
+ state[0] = AES_ENC(tmp, state[0]);
+
+ state[0] = AES_BLOCK_XOR(state[0], d1);
+ state[4] = AES_BLOCK_XOR(state[4], d2);
+}
+
+#include "aegis128l_common.h"
+
+struct aegis128l_implementation aegis128l_soft_implementation = { SODIUM_C99(.encrypt_detached =)
+ encrypt_detached,
+ SODIUM_C99(.decrypt_detached =)
+ decrypt_detached };
+
+#endif
diff --git a/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.h b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.h
new file mode 100644
index 0000000000..df8ddece08
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis128l/aegis128l_soft.h
@@ -0,0 +1,8 @@
+#ifndef aegis128l_soft_H
+#define aegis128l_soft_H
+
+#include "implementations.h"
+
+extern struct aegis128l_implementation aegis128l_soft_implementation;
+
+#endif \ No newline at end of file
diff --git a/libs/libsodium/src/crypto_aead/aegis128l/implementations.h b/libs/libsodium/src/crypto_aead/aegis128l/implementations.h
new file mode 100644
index 0000000000..29e7b1cb88
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis128l/implementations.h
@@ -0,0 +1,17 @@
+#ifndef aegis128l_implementations_H
+#define aegis128l_implementations_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "crypto_aead_aegis128l.h"
+
+typedef struct aegis128l_implementation {
+ int (*encrypt_detached)(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen,
+ const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k);
+ int (*decrypt_detached)(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac,
+ size_t maclen, const uint8_t *ad, size_t adlen, const uint8_t *npub,
+ const uint8_t *k);
+} aegis128l_implementation;
+
+#endif
diff --git a/libs/libsodium/src/crypto_aead/aegis256/aead_aegis256.c b/libs/libsodium/src/crypto_aead/aegis256/aead_aegis256.c
new file mode 100644
index 0000000000..0fd8f966cf
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis256/aead_aegis256.c
@@ -0,0 +1,158 @@
+
+#include <errno.h>
+#include <stdlib.h>
+
+#include "core.h"
+#include "crypto_aead_aegis256.h"
+#include "private/common.h"
+#include "private/implementations.h"
+#include "randombytes.h"
+#include "runtime.h"
+
+#include "aegis256_soft.h"
+
+#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN)
+#include "aegis256_armcrypto.h"
+#endif
+
+#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
+#include "aegis256_aesni.h"
+#endif
+
+static const aegis256_implementation *implementation = &aegis256_soft_implementation;
+
+size_t
+crypto_aead_aegis256_keybytes(void)
+{
+ return crypto_aead_aegis256_KEYBYTES;
+}
+
+size_t
+crypto_aead_aegis256_nsecbytes(void)
+{
+ return crypto_aead_aegis256_NSECBYTES;
+}
+
+size_t
+crypto_aead_aegis256_npubbytes(void)
+{
+ return crypto_aead_aegis256_NPUBBYTES;
+}
+
+size_t
+crypto_aead_aegis256_abytes(void)
+{
+ return crypto_aead_aegis256_ABYTES;
+}
+
+size_t
+crypto_aead_aegis256_messagebytes_max(void)
+{
+ return crypto_aead_aegis256_MESSAGEBYTES_MAX;
+}
+
+void
+crypto_aead_aegis256_keygen(unsigned char k[crypto_aead_aegis256_KEYBYTES])
+{
+ randombytes_buf(k, crypto_aead_aegis256_KEYBYTES);
+}
+
+int
+crypto_aead_aegis256_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m,
+ unsigned long long mlen, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *nsec,
+ const unsigned char *npub, const unsigned char *k)
+{
+ unsigned long long clen = 0ULL;
+ int ret;
+
+ ret =
+ crypto_aead_aegis256_encrypt_detached(c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub, k);
+ if (clen_p != NULL) {
+ if (ret == 0) {
+ clen = mlen + crypto_aead_aegis256_ABYTES;
+ }
+ *clen_p = clen;
+ }
+ return ret;
+}
+
+int
+crypto_aead_aegis256_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec,
+ const unsigned char *c, unsigned long long clen,
+ const unsigned char *ad, unsigned long long adlen,
+ const unsigned char *npub, const unsigned char *k)
+{
+ unsigned long long mlen = 0ULL;
+ int ret = -1;
+
+ if (clen >= crypto_aead_aegis256_ABYTES) {
+ ret = crypto_aead_aegis256_decrypt_detached(m, nsec, c, clen - crypto_aead_aegis256_ABYTES,
+ c + clen - crypto_aead_aegis256_ABYTES, ad,
+ adlen, npub, k);
+ }
+ if (mlen_p != NULL) {
+ if (ret == 0) {
+ mlen = clen - crypto_aead_aegis256_ABYTES;
+ }
+ *mlen_p = mlen;
+ }
+ return ret;
+}
+
+int
+crypto_aead_aegis256_encrypt_detached(unsigned char *c, unsigned char *mac,
+ unsigned long long *maclen_p, const unsigned char *m,
+ unsigned long long mlen, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *nsec,
+ const unsigned char *npub, const unsigned char *k)
+{
+ const size_t maclen = crypto_aead_aegis256_ABYTES;
+
+ if (maclen_p != NULL) {
+ *maclen_p = maclen;
+ }
+ if (mlen > crypto_aead_aegis256_MESSAGEBYTES_MAX ||
+ adlen > crypto_aead_aegis256_MESSAGEBYTES_MAX) {
+ sodium_misuse();
+ }
+ return implementation->encrypt_detached(c, mac, maclen, m, (size_t) mlen, ad, (size_t) adlen,
+ npub, k);
+}
+
+int
+crypto_aead_aegis256_decrypt_detached(unsigned char *m, unsigned char *nsec, const unsigned char *c,
+ unsigned long long clen, const unsigned char *mac,
+ const unsigned char *ad, unsigned long long adlen,
+ const unsigned char *npub, const unsigned char *k)
+{
+ const size_t maclen = crypto_aead_aegis256_ABYTES;
+
+ if (clen > crypto_aead_aegis256_MESSAGEBYTES_MAX ||
+ adlen > crypto_aead_aegis256_MESSAGEBYTES_MAX) {
+ return -1;
+ }
+ return implementation->decrypt_detached(m, c, (size_t) clen, mac, maclen, ad, (size_t) adlen,
+ npub, k);
+}
+
+int
+_crypto_aead_aegis256_pick_best_implementation(void)
+{
+ implementation = &aegis256_soft_implementation;
+
+#if defined(HAVE_ARMCRYPTO) && defined(NATIVE_LITTLE_ENDIAN)
+ if (sodium_runtime_has_armcrypto()) {
+ implementation = &aegis256_armcrypto_implementation;
+ return 0;
+ }
+#endif
+
+#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
+ if (sodium_runtime_has_aesni() & sodium_runtime_has_avx()) {
+ implementation = &aegis256_aesni_implementation;
+ return 0;
+ }
+#endif
+ return 0; /* LCOV_EXCL_LINE */
+}
diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.c b/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.c
new file mode 100644
index 0000000000..96aa0036ba
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.c
@@ -0,0 +1,65 @@
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "core.h"
+#include "crypto_aead_aegis256.h"
+#include "crypto_verify_16.h"
+#include "crypto_verify_32.h"
+#include "export.h"
+#include "utils.h"
+
+#include "private/common.h"
+
+#if defined(HAVE_AVXINTRIN_H) && defined(HAVE_WMMINTRIN_H)
+
+#include "aegis256_aesni.h"
+
+#ifdef __clang__
+#pragma clang attribute push(__attribute__((target("aes,avx"))), apply_to = function)
+#elif defined(__GNUC__)
+#pragma GCC target("aes,avx")
+#endif
+
+#include "private/sse2_64_32.h"
+#include <immintrin.h>
+#include <wmmintrin.h>
+
+#define AES_BLOCK_LENGTH 16
+
+typedef __m128i aes_block_t;
+#define AES_BLOCK_XOR(A, B) _mm_xor_si128((A), (B))
+#define AES_BLOCK_AND(A, B) _mm_and_si128((A), (B))
+#define AES_BLOCK_LOAD(A) _mm_loadu_si128((const aes_block_t *) (const void *) (A))
+#define AES_BLOCK_LOAD_64x2(A, B) _mm_set_epi64x((long long) (A), (long long) (B))
+#define AES_BLOCK_STORE(A, B) _mm_storeu_si128((aes_block_t *) (void *) (A), (B))
+#define AES_ENC(A, B) _mm_aesenc_si128((A), (B))
+
+static inline void
+aegis256_update(aes_block_t *const state, const aes_block_t d)
+{
+ aes_block_t tmp;
+
+ tmp = state[5];
+ state[5] = AES_ENC(state[4], state[5]);
+ state[4] = AES_ENC(state[3], state[4]);
+ state[3] = AES_ENC(state[2], state[3]);
+ state[2] = AES_ENC(state[1], state[2]);
+ state[1] = AES_ENC(state[0], state[1]);
+ state[0] = AES_BLOCK_XOR(AES_ENC(tmp, state[0]), d);
+}
+
+#include "aegis256_common.h"
+
+struct aegis256_implementation aegis256_aesni_implementation = { SODIUM_C99(.encrypt_detached =)
+ encrypt_detached,
+ SODIUM_C99(.decrypt_detached =)
+ decrypt_detached };
+
+#ifdef __clang__
+#pragma clang attribute pop
+#endif
+
+#endif
diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.h b/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.h
new file mode 100644
index 0000000000..21f4d819b9
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_aesni.h
@@ -0,0 +1,8 @@
+#ifndef aegis256_aesni_H
+#define aegis256_aesni_H
+
+#include "implementations.h"
+
+extern struct aegis256_implementation aegis256_aesni_implementation;
+
+#endif \ No newline at end of file
diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_armcrypto.h b/libs/libsodium/src/crypto_aead/aegis256/aegis256_armcrypto.h
new file mode 100644
index 0000000000..a9bd4ad392
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_armcrypto.h
@@ -0,0 +1,8 @@
+#ifndef aegis256_armcrypto_H
+#define aegis256_armcrypto_H
+
+#include "implementations.h"
+
+extern struct aegis256_implementation aegis256_armcrypto_implementation;
+
+#endif \ No newline at end of file
diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_common.h b/libs/libsodium/src/crypto_aead/aegis256/aegis256_common.h
new file mode 100644
index 0000000000..adf837a922
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_common.h
@@ -0,0 +1,232 @@
+#define RATE 16
+
+static void
+aegis256_init(const uint8_t *key, const uint8_t *nonce, aes_block_t *const state)
+{
+ static CRYPTO_ALIGN(AES_BLOCK_LENGTH)
+ const uint8_t c0_[AES_BLOCK_LENGTH] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
+ 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 };
+ static CRYPTO_ALIGN(AES_BLOCK_LENGTH)
+ const uint8_t c1_[AES_BLOCK_LENGTH] = { 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
+ 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd };
+
+ const aes_block_t c0 = AES_BLOCK_LOAD(c0_);
+ const aes_block_t c1 = AES_BLOCK_LOAD(c1_);
+ const aes_block_t k0 = AES_BLOCK_LOAD(key);
+ const aes_block_t k1 = AES_BLOCK_LOAD(key + AES_BLOCK_LENGTH);
+ const aes_block_t n0 = AES_BLOCK_LOAD(nonce);
+ const aes_block_t n1 = AES_BLOCK_LOAD(nonce + AES_BLOCK_LENGTH);
+ const aes_block_t k0_n0 = AES_BLOCK_XOR(k0, n0);
+ const aes_block_t k1_n1 = AES_BLOCK_XOR(k1, n1);
+ int i;
+
+ state[0] = k0_n0;
+ state[1] = k1_n1;
+ state[2] = c1;
+ state[3] = c0;
+ state[4] = AES_BLOCK_XOR(k0, c0);
+ state[5] = AES_BLOCK_XOR(k1, c1);
+ for (i = 0; i < 4; i++) {
+ aegis256_update(state, k0);
+ aegis256_update(state, k1);
+ aegis256_update(state, k0_n0);
+ aegis256_update(state, k1_n1);
+ }
+}
+
+static int
+aegis256_mac(uint8_t *mac, size_t maclen, size_t adlen, size_t mlen, aes_block_t *const state)
+{
+ aes_block_t tmp;
+ int i;
+
+ tmp = AES_BLOCK_LOAD_64x2(((uint64_t) mlen) << 3, ((uint64_t) adlen) << 3);
+ tmp = AES_BLOCK_XOR(tmp, state[3]);
+
+ for (i = 0; i < 7; i++) {
+ aegis256_update(state, tmp);
+ }
+
+ if (maclen == 16) {
+ tmp = AES_BLOCK_XOR(state[5], state[4]);
+ tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[3], state[2]));
+ tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_XOR(state[1], state[0]));
+ AES_BLOCK_STORE(mac, tmp);
+ } else if (maclen == 32) {
+ tmp = AES_BLOCK_XOR(AES_BLOCK_XOR(state[2], state[1]), state[0]);
+ AES_BLOCK_STORE(mac, tmp);
+ tmp = AES_BLOCK_XOR(AES_BLOCK_XOR(state[5], state[4]), state[3]);
+ AES_BLOCK_STORE(mac + 16, tmp);
+ } else {
+ memset(mac, 0, maclen);
+ return -1;
+ }
+ return 0;
+}
+
+static inline void
+aegis256_absorb(const uint8_t *const src, aes_block_t *const state)
+{
+ aes_block_t msg;
+
+ msg = AES_BLOCK_LOAD(src);
+ aegis256_update(state, msg);
+}
+
+static inline void
+aegis256_absorb2(const uint8_t *const src, aes_block_t *const state)
+{
+ aes_block_t msg, msg2;
+
+ msg = AES_BLOCK_LOAD(src + 0 * AES_BLOCK_LENGTH);
+ msg2 = AES_BLOCK_LOAD(src + 1 * AES_BLOCK_LENGTH);
+ aegis256_update(state, msg);
+ aegis256_update(state, msg2);
+}
+
+static void
+aegis256_enc(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state)
+{
+ aes_block_t msg;
+ aes_block_t tmp;
+
+ msg = AES_BLOCK_LOAD(src);
+ tmp = AES_BLOCK_XOR(msg, state[5]);
+ tmp = AES_BLOCK_XOR(tmp, state[4]);
+ tmp = AES_BLOCK_XOR(tmp, state[1]);
+ tmp = AES_BLOCK_XOR(tmp, AES_BLOCK_AND(state[2], state[3]));
+ AES_BLOCK_STORE(dst, tmp);
+
+ aegis256_update(state, msg);
+}
+
+static void
+aegis256_dec(uint8_t *const dst, const uint8_t *const src, aes_block_t *const state)
+{
+ aes_block_t msg;
+
+ msg = AES_BLOCK_LOAD(src);
+ msg = AES_BLOCK_XOR(msg, state[5]);
+ msg = AES_BLOCK_XOR(msg, state[4]);
+ msg = AES_BLOCK_XOR(msg, state[1]);
+ msg = AES_BLOCK_XOR(msg, AES_BLOCK_AND(state[2], state[3]));
+ AES_BLOCK_STORE(dst, msg);
+
+ aegis256_update(state, msg);
+}
+
+static void
+aegis256_declast(uint8_t *const dst, const uint8_t *const src, size_t len, aes_block_t *const state)
+{
+ uint8_t pad[RATE];
+ aes_block_t msg;
+
+ memset(pad, 0, sizeof pad);
+ memcpy(pad, src, len);
+
+ msg = AES_BLOCK_LOAD(pad);
+ msg = AES_BLOCK_XOR(msg, state[5]);
+ msg = AES_BLOCK_XOR(msg, state[4]);
+ msg = AES_BLOCK_XOR(msg, state[1]);
+ msg = AES_BLOCK_XOR(msg, AES_BLOCK_AND(state[2], state[3]));
+ AES_BLOCK_STORE(pad, msg);
+
+ memset(pad + len, 0, sizeof pad - len);
+ memcpy(dst, pad, len);
+
+ msg = AES_BLOCK_LOAD(pad);
+
+ aegis256_update(state, msg);
+}
+
+static int
+encrypt_detached(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen,
+ const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k)
+{
+ aes_block_t state[6];
+ CRYPTO_ALIGN(RATE) uint8_t src[RATE];
+ CRYPTO_ALIGN(RATE) uint8_t dst[RATE];
+ size_t i;
+
+ aegis256_init(k, npub, state);
+
+ for (i = 0; i + 2 * RATE <= adlen; i += 2 * RATE) {
+ aegis256_absorb2(ad + i, state);
+ }
+ for (; i + RATE <= adlen; i += RATE) {
+ aegis256_absorb(ad + i, state);
+ }
+ if (adlen % RATE) {
+ memset(src, 0, RATE);
+ memcpy(src, ad + i, adlen % RATE);
+ aegis256_absorb(src, state);
+ }
+ for (i = 0; i + RATE <= mlen; i += RATE) {
+ aegis256_enc(c + i, m + i, state);
+ }
+ if (mlen % RATE) {
+ memset(src, 0, RATE);
+ memcpy(src, m + i, mlen % RATE);
+ aegis256_enc(dst, src, state);
+ memcpy(c + i, dst, mlen % RATE);
+ }
+
+ return aegis256_mac(mac, maclen, adlen, mlen, state);
+}
+
+static int
+decrypt_detached(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac, size_t maclen,
+ const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k)
+{
+ aes_block_t state[6];
+ CRYPTO_ALIGN(RATE) uint8_t src[RATE];
+ CRYPTO_ALIGN(RATE) uint8_t dst[RATE];
+ CRYPTO_ALIGN(16) uint8_t computed_mac[32];
+ const size_t mlen = clen;
+ size_t i;
+ int ret;
+
+ aegis256_init(k, npub, state);
+
+ for (i = 0; i + 2 * RATE <= adlen; i += 2 * RATE) {
+ aegis256_absorb2(ad + i, state);
+ }
+ for (; i + RATE <= adlen; i += RATE) {
+ aegis256_absorb(ad + i, state);
+ }
+ if (adlen % RATE) {
+ memset(src, 0, RATE);
+ memcpy(src, ad + i, adlen % RATE);
+ aegis256_absorb(src, state);
+ }
+ if (m != NULL) {
+ for (i = 0; i + RATE <= mlen; i += RATE) {
+ aegis256_dec(m + i, c + i, state);
+ }
+ } else {
+ for (i = 0; i + RATE <= mlen; i += RATE) {
+ aegis256_dec(dst, c + i, state);
+ }
+ }
+ if (mlen % RATE) {
+ if (m != NULL) {
+ aegis256_declast(m + i, c + i, mlen % RATE, state);
+ } else {
+ aegis256_declast(dst, c + i, mlen % RATE, state);
+ }
+ }
+
+ COMPILER_ASSERT(sizeof computed_mac >= 32);
+ ret = -1;
+ if (aegis256_mac(computed_mac, maclen, adlen, mlen, state) == 0) {
+ if (maclen == 16) {
+ ret = crypto_verify_16(computed_mac, mac);
+ } else if (maclen == 32) {
+ ret = crypto_verify_32(computed_mac, mac);
+ }
+ }
+ if (ret != 0 && m != NULL) {
+ memset(m, 0, mlen);
+ }
+ return ret;
+}
diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.c b/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.c
new file mode 100644
index 0000000000..38024d17ad
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.c
@@ -0,0 +1,54 @@
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "core.h"
+#include "crypto_aead_aegis256.h"
+#include "crypto_verify_16.h"
+#include "crypto_verify_32.h"
+#include "export.h"
+#include "utils.h"
+
+#include "private/common.h"
+
+#include "crypto_aead_aegis256.h"
+#include "private/softaes.h"
+
+#if 1
+
+#include "aegis256_soft.h"
+
+#define AES_BLOCK_LENGTH 16
+
+typedef SoftAesBlock aes_block_t;
+#define AES_BLOCK_XOR(A, B) softaes_block_xor((A), (B))
+#define AES_BLOCK_AND(A, B) softaes_block_and((A), (B))
+#define AES_BLOCK_LOAD(A) softaes_block_load(A)
+#define AES_BLOCK_LOAD_64x2(A, B) softaes_block_load64x2((A), (B))
+#define AES_BLOCK_STORE(A, B) softaes_block_store((A), (B))
+#define AES_ENC(A, B) softaes_block_encrypt((A), (B))
+
+static inline void
+aegis256_update(aes_block_t *const state, const aes_block_t d)
+{
+ aes_block_t tmp;
+
+ tmp = state[5];
+ state[5] = AES_ENC(state[4], state[5]);
+ state[4] = AES_ENC(state[3], state[4]);
+ state[3] = AES_ENC(state[2], state[3]);
+ state[2] = AES_ENC(state[1], state[2]);
+ state[1] = AES_ENC(state[0], state[1]);
+ state[0] = AES_BLOCK_XOR(AES_ENC(tmp, state[0]), d);
+}
+
+#include "aegis256_common.h"
+
+struct aegis256_implementation aegis256_soft_implementation = { SODIUM_C99(.encrypt_detached =)
+ encrypt_detached,
+ SODIUM_C99(.decrypt_detached =)
+ decrypt_detached };
+
+#endif \ No newline at end of file
diff --git a/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.h b/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.h
new file mode 100644
index 0000000000..c20198de3f
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis256/aegis256_soft.h
@@ -0,0 +1,8 @@
+#ifndef aegis256_soft_H
+#define aegis256_soft_H
+
+#include "implementations.h"
+
+extern struct aegis256_implementation aegis256_soft_implementation;
+
+#endif \ No newline at end of file
diff --git a/libs/libsodium/src/crypto_aead/aegis256/implementations.h b/libs/libsodium/src/crypto_aead/aegis256/implementations.h
new file mode 100644
index 0000000000..9efbf38763
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aegis256/implementations.h
@@ -0,0 +1,17 @@
+#ifndef aegis256_implementations_H
+#define aegis256_implementations_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "crypto_aead_aegis256.h"
+
+typedef struct aegis256_implementation {
+ int (*encrypt_detached)(uint8_t *c, uint8_t *mac, size_t maclen, const uint8_t *m, size_t mlen,
+ const uint8_t *ad, size_t adlen, const uint8_t *npub, const uint8_t *k);
+ int (*decrypt_detached)(uint8_t *m, const uint8_t *c, size_t clen, const uint8_t *mac,
+ size_t maclen, const uint8_t *ad, size_t adlen, const uint8_t *npub,
+ const uint8_t *k);
+} aegis256_implementation;
+
+#endif
diff --git a/libs/libsodium/src/crypto_aead/aes256gcm/aead_aes256gcm.c b/libs/libsodium/src/crypto_aead/aes256gcm/aead_aes256gcm.c
new file mode 100644
index 0000000000..2946ba873b
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aes256gcm/aead_aes256gcm.c
@@ -0,0 +1,157 @@
+#include <errno.h>
+#include <stdlib.h>
+
+#include "crypto_aead_aes256gcm.h"
+#include "private/common.h"
+#include "randombytes.h"
+
+size_t
+crypto_aead_aes256gcm_keybytes(void)
+{
+ return crypto_aead_aes256gcm_KEYBYTES;
+}
+
+size_t
+crypto_aead_aes256gcm_nsecbytes(void)
+{
+ return crypto_aead_aes256gcm_NSECBYTES;
+}
+
+size_t
+crypto_aead_aes256gcm_npubbytes(void)
+{
+ return crypto_aead_aes256gcm_NPUBBYTES;
+}
+
+size_t
+crypto_aead_aes256gcm_abytes(void)
+{
+ return crypto_aead_aes256gcm_ABYTES;
+}
+
+size_t
+crypto_aead_aes256gcm_statebytes(void)
+{
+ return (sizeof(crypto_aead_aes256gcm_state) + (size_t) 15U) & ~(size_t) 15U;
+}
+
+size_t
+crypto_aead_aes256gcm_messagebytes_max(void)
+{
+ return crypto_aead_aes256gcm_MESSAGEBYTES_MAX;
+}
+
+void
+crypto_aead_aes256gcm_keygen(unsigned char k[crypto_aead_aes256gcm_KEYBYTES])
+{
+ randombytes_buf(k, crypto_aead_aes256gcm_KEYBYTES);
+}
+
+#if !((defined(HAVE_ARMCRYPTO) && defined(__clang__) && defined(NATIVE_LITTLE_ENDIAN)) || \
+ (defined(HAVE_TMMINTRIN_H) && defined(HAVE_WMMINTRIN_H)))
+
+#ifndef ENOSYS
+#define ENOSYS ENXIO
+#endif
+
+int
+crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac,
+ unsigned long long *maclen_p, const unsigned char *m,
+ unsigned long long mlen, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *nsec,
+ const unsigned char *npub, const unsigned char *k)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m,
+ unsigned long long mlen, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *nsec,
+ const unsigned char *npub, const unsigned char *k)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec,
+ const unsigned char *c, unsigned long long clen,
+ const unsigned char *mac, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *npub,
+ const unsigned char *k)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec,
+ const unsigned char *c, unsigned long long clen,
+ const unsigned char *ad, unsigned long long adlen,
+ const unsigned char *npub, const unsigned char *k)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac,
+ unsigned long long *maclen_p, const unsigned char *m,
+ unsigned long long mlen, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *nsec,
+ const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p,
+ const unsigned char *m, unsigned long long mlen,
+ const unsigned char *ad, unsigned long long adlen,
+ const unsigned char *nsec, const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec,
+ const unsigned char *c, unsigned long long clen,
+ const unsigned char *mac, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p,
+ unsigned char *nsec, const unsigned char *c,
+ unsigned long long clen, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
+{
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+crypto_aead_aes256gcm_is_available(void)
+{
+ return 0;
+}
+
+#endif \ No newline at end of file
diff --git a/libs/libsodium/src/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c b/libs/libsodium/src/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c
index c0d8674af6..7d9cfd12e9 100644
--- a/libs/libsodium/src/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c
+++ b/libs/libsodium/src/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c
@@ -1,17 +1,12 @@
-
-/*
- * AES256-GCM, based on the "Intel Carry-Less Multiplication Instruction and its Usage for Computing
- * the GCM Mode" paper and reference code, using the aggregated reduction method.
- * Originally adapted by Romain Dolbeau.
- */
-
#include <errno.h>
+#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "core.h"
#include "crypto_aead_aes256gcm.h"
+#include "crypto_verify_16.h"
#include "export.h"
#include "private/common.h"
#include "private/sse2_64_32.h"
@@ -21,976 +16,803 @@
#if defined(HAVE_TMMINTRIN_H) && defined(HAVE_WMMINTRIN_H)
-# ifdef __GNUC__
-# pragma GCC target("ssse3")
-# pragma GCC target("aes")
-# pragma GCC target("pclmul")
+# ifdef __clang__
+# pragma clang attribute push(__attribute__((target("aes,avx,pclmul"))), apply_to = function)
+# elif defined(__GNUC__)
+# pragma GCC target("aes,avx,pclmul")
# endif
+#if !defined(_MSC_VER) || _MSC_VER < 1800
+#define __vectorcall
+#endif
+
#include <tmmintrin.h>
#include <wmmintrin.h>
-#ifndef ENOSYS
-# define ENOSYS ENXIO
-#endif
-
-#if defined(__INTEL_COMPILER) || defined(_bswap64)
-#elif defined(_MSC_VER)
-# define _bswap64(a) _byteswap_uint64(a)
-#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2))
-# define _bswap64(a) __builtin_bswap64(a)
-#else
-static inline uint64_t
-_bswap64(const uint64_t x)
-{
- return
- ((x << 56) & 0xFF00000000000000UL) | ((x << 40) & 0x00FF000000000000UL) |
- ((x << 24) & 0x0000FF0000000000UL) | ((x << 8) & 0x000000FF00000000UL) |
- ((x >> 8) & 0x00000000FF000000UL) | ((x >> 24) & 0x0000000000FF0000UL) |
- ((x >> 40) & 0x000000000000FF00UL) | ((x >> 56) & 0x00000000000000FFUL);
+#define ABYTES crypto_aead_aes256gcm_ABYTES
+#define NPUBBYTES crypto_aead_aes256gcm_NPUBBYTES
+#define KEYBYTES crypto_aead_aes256gcm_KEYBYTES
+
+#define PARALLEL_BLOCKS 7
+#undef USE_KARATSUBA_MULTIPLICATION
+
+typedef __m128i BlockVec;
+
+#define LOAD128(a) _mm_loadu_si128((const BlockVec *) (a))
+#define STORE128(a, b) _mm_storeu_si128((BlockVec *) (a), (b))
+#define AES_ENCRYPT(block_vec, rkey) _mm_aesenc_si128((block_vec), (rkey))
+#define AES_ENCRYPTLAST(block_vec, rkey) _mm_aesenclast_si128((block_vec), (rkey))
+#define AES_KEYGEN(block_vec, rc) _mm_aeskeygenassist_si128((block_vec), (rc))
+#define XOR128(a, b) _mm_xor_si128((a), (b))
+#define AND128(a, b) _mm_and_si128((a), (b))
+#define OR128(a, b) _mm_or_si128((a), (b))
+#define SET64x2(a, b) _mm_set_epi64x((uint64_t) (a), (uint64_t) (b))
+#define ZERO128 _mm_setzero_si128()
+#define ONE128 SET64x2(0, 1)
+#define ADD64x2(a, b) _mm_add_epi64((a), (b))
+#define SUB64x2(a, b) _mm_sub_epi64((a), (b))
+#define SHL64x2(a, b) _mm_slli_epi64((a), (b))
+#define SHR64x2(a, b) _mm_srli_epi64((a), (b))
+#define REV128(x) \
+ _mm_shuffle_epi8((x), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))
+#define SHUFFLE32x4(x, a, b, c, d) _mm_shuffle_epi32((x), _MM_SHUFFLE((d), (c), (b), (a)))
+#define BYTESHL128(a, b) _mm_slli_si128(a, b)
+#define BYTESHR128(a, b) _mm_srli_si128(a, b)
+#define SHL128(a, b) OR128(SHL64x2((a), (b)), SHR64x2(BYTESHL128((a), 8), 64 - (b)))
+#define CLMULLO128(a, b) _mm_clmulepi64_si128((a), (b), 0x00)
+#define CLMULHI128(a, b) _mm_clmulepi64_si128((a), (b), 0x11)
+#define CLMULLOHI128(a, b) _mm_clmulepi64_si128((a), (b), 0x10)
+#define CLMULHILO128(a, b) _mm_clmulepi64_si128((a), (b), 0x01)
+#define PREFETCH_READ(x) _mm_prefetch((x), _MM_HINT_T1)
+#define PREFETCH_WRITE(x) _mm_prefetch((x), _MM_HINT_T1)
+
+#define ROUNDS 14
+
+#define PC_COUNT (2 * PARALLEL_BLOCKS)
+
+typedef struct I256 {
+ BlockVec hi;
+ BlockVec lo;
+ BlockVec mid;
+} I256;
+
+typedef BlockVec Precomp;
+
+typedef struct GHash {
+ BlockVec acc;
+} GHash;
+
+typedef struct State {
+ BlockVec rkeys[ROUNDS + 1];
+ Precomp hx[PC_COUNT];
+} State;
+
+static void __vectorcall expand256(const unsigned char key[KEYBYTES], BlockVec rkeys[1 + ROUNDS])
+{
+ BlockVec t1, t2, s;
+ size_t i = 0;
+
+#define EXPAND_KEY_1(RC) \
+ rkeys[i++] = t2; \
+ s = AES_KEYGEN(t2, RC); \
+ t1 = XOR128(t1, BYTESHL128(t1, 4)); \
+ t1 = XOR128(t1, BYTESHL128(t1, 8)); \
+ t1 = XOR128(t1, SHUFFLE32x4(s, 3, 3, 3, 3));
+
+#define EXPAND_KEY_2(RC) \
+ rkeys[i++] = t1; \
+ s = AES_KEYGEN(t1, RC); \
+ t2 = XOR128(t2, BYTESHL128(t2, 4)); \
+ t2 = XOR128(t2, BYTESHL128(t2, 8)); \
+ t2 = XOR128(t2, SHUFFLE32x4(s, 2, 2, 2, 2));
+
+ t1 = LOAD128(&key[0]);
+ t2 = LOAD128(&key[16]);
+
+ rkeys[i++] = t1;
+ EXPAND_KEY_1(0x01);
+ EXPAND_KEY_2(0x01);
+ EXPAND_KEY_1(0x02);
+ EXPAND_KEY_2(0x02);
+ EXPAND_KEY_1(0x04);
+ EXPAND_KEY_2(0x04);
+ EXPAND_KEY_1(0x08);
+ EXPAND_KEY_2(0x08);
+ EXPAND_KEY_1(0x10);
+ EXPAND_KEY_2(0x10);
+ EXPAND_KEY_1(0x20);
+ EXPAND_KEY_2(0x20);
+ EXPAND_KEY_1(0x40);
+ rkeys[i++] = t1;
}
-#endif
-typedef struct aes256gcm_state {
- __m128i rkeys[16];
- unsigned char H[16];
-} aes256gcm_state;
+/* Encrypt a single AES block */
static inline void
-aesni_key256_expand(const unsigned char *key, __m128i * const rkeys)
+encrypt(const State *st, unsigned char dst[16], const unsigned char src[16])
{
- __m128i X0, X1, X2, X3;
- int i = 0;
-
- X0 = _mm_loadu_si128((const __m128i *) &key[0]);
- rkeys[i++] = X0;
-
- X2 = _mm_loadu_si128((const __m128i *) &key[16]);
- rkeys[i++] = X2;
-
-#define EXPAND_KEY_1(S) do { \
- X1 = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(X2, (S)), 0xff); \
- X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X0), 0x10)); \
- X0 = _mm_xor_si128(X0, X3); \
- X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X0), 0x8c)); \
- X0 = _mm_xor_si128(_mm_xor_si128(X0, X3), X1); \
- rkeys[i++] = X0; \
-} while (0)
-
-#define EXPAND_KEY_2(S) do { \
- X1 = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(X0, (S)), 0xaa); \
- X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X2), 0x10)); \
- X2 = _mm_xor_si128(X2, X3); \
- X3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(X3), _mm_castsi128_ps(X2), 0x8c)); \
- X2 = _mm_xor_si128(_mm_xor_si128(X2, X3), X1); \
- rkeys[i++] = X2; \
-} while (0)
-
- X3 = _mm_setzero_si128();
- EXPAND_KEY_1(0x01); EXPAND_KEY_2(0x01);
- EXPAND_KEY_1(0x02); EXPAND_KEY_2(0x02);
- EXPAND_KEY_1(0x04); EXPAND_KEY_2(0x04);
- EXPAND_KEY_1(0x08); EXPAND_KEY_2(0x08);
- EXPAND_KEY_1(0x10); EXPAND_KEY_2(0x10);
- EXPAND_KEY_1(0x20); EXPAND_KEY_2(0x20);
- EXPAND_KEY_1(0x40);
+ BlockVec t;
+
+ size_t i;
+
+ t = XOR128(LOAD128(src), st->rkeys[0]);
+ for (i = 1; i < ROUNDS; i++) {
+ t = AES_ENCRYPT(t, st->rkeys[i]);
+ }
+ t = AES_ENCRYPTLAST(t, st->rkeys[ROUNDS]);
+ STORE128(dst, t);
}
-/** single, by-the-book AES encryption with AES-NI */
-static inline void
-aesni_encrypt1(unsigned char *out, __m128i nv, const __m128i *rkeys)
+/* Encrypt and add a single AES block */
+
+static inline void __vectorcall encrypt_xor_block(const State *st, unsigned char dst[16],
+ const unsigned char src[16],
+ const BlockVec counter)
{
- __m128i temp = _mm_xor_si128(nv, rkeys[0]);
-
- temp = _mm_aesenc_si128(temp, rkeys[1]);
- temp = _mm_aesenc_si128(temp, rkeys[2]);
- temp = _mm_aesenc_si128(temp, rkeys[3]);
- temp = _mm_aesenc_si128(temp, rkeys[4]);
- temp = _mm_aesenc_si128(temp, rkeys[5]);
- temp = _mm_aesenc_si128(temp, rkeys[6]);
- temp = _mm_aesenc_si128(temp, rkeys[7]);
- temp = _mm_aesenc_si128(temp, rkeys[8]);
- temp = _mm_aesenc_si128(temp, rkeys[9]);
- temp = _mm_aesenc_si128(temp, rkeys[10]);
- temp = _mm_aesenc_si128(temp, rkeys[11]);
- temp = _mm_aesenc_si128(temp, rkeys[12]);
- temp = _mm_aesenc_si128(temp, rkeys[13]);
-
- temp = _mm_aesenclast_si128(temp, rkeys[14]);
- _mm_storeu_si128((__m128i *) out, temp);
-}
+ BlockVec ts;
+ size_t i;
-/** multiple-blocks-at-once AES encryption with AES-NI ;
- on Haswell, aesenc has a latency of 7 and a throughput of 1
- so the sequence of aesenc should be bubble-free if you
- have at least 8 blocks. Let's build an arbitratry-sized
- function */
-/* Step 1 : loading the nonce */
-/* load & increment the n vector (non-vectorized, unused for now) */
-#define NVDECLx(a) \
- __m128i nv##a
-
-#define NVx(a) \
- nv##a = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) n), pt); \
- n[3]++
-
-/* Step 2 : define value in round one (xor with subkey #0, aka key) */
-#define TEMPDECLx(a) \
- __m128i temp##a
-
-#define TEMPx(a) \
- temp##a = _mm_xor_si128(nv##a, rkeys[0])
-
-/* Step 3: one round of AES */
-#define AESENCx(a) \
- temp##a = _mm_aesenc_si128(temp##a, rkeys[roundctr])
-
-/* Step 4: last round of AES */
-#define AESENCLASTx(a) \
- temp##a = _mm_aesenclast_si128(temp##a, rkeys[14])
-
-/* Step 5: store result */
-#define STOREx(a) \
- _mm_storeu_si128((__m128i *) (out + (a * 16)), temp##a)
-
-/* all the MAKE* macros are for automatic explicit unrolling */
-#define MAKE4(X) \
- X(0); \
- X(1); \
- X(2); \
- X(3)
-
-#define MAKE8(X) \
- X(0); \
- X(1); \
- X(2); \
- X(3); \
- X(4); \
- X(5); \
- X(6); \
- X(7)
-
-#define COUNTER_INC2(N) (N)[3] += 2
-
-/* create a function of unrolling N ; the MAKEN is the unrolling
- macro, defined above. The N in MAKEN must match N, obviously. */
-#define FUNC(N, MAKEN) \
- static inline void aesni_encrypt##N(unsigned char *out, uint32_t *n, const __m128i *rkeys) \
- { \
- const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
- int roundctr; \
- MAKEN(NVDECLx); \
- MAKEN(TEMPDECLx); \
- \
- MAKEN(NVx); \
- MAKEN(TEMPx); \
- for (roundctr = 1; roundctr < 14; roundctr++) { \
- MAKEN(AESENCx); \
- } \
- MAKEN(AESENCLASTx); \
- MAKEN(STOREx); \
+ ts = XOR128(counter, st->rkeys[0]);
+ for (i = 1; i < ROUNDS; i++) {
+ ts = AES_ENCRYPT(ts, st->rkeys[i]);
}
+ ts = AES_ENCRYPTLAST(ts, st->rkeys[i]);
+ ts = XOR128(ts, LOAD128(src));
+ STORE128(dst, ts);
+}
-FUNC(8, MAKE8)
-
-/* all GF(2^128) fnctions are by the book, meaning this one:
- <https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf>
-*/
+/* Encrypt and add PARALLEL_BLOCKS AES blocks */
-static inline void
-addmul(unsigned char *c, const unsigned char *a, unsigned int xlen, const unsigned char *b)
+static inline void __vectorcall encrypt_xor_wide(const State *st,
+ unsigned char dst[16 * PARALLEL_BLOCKS],
+ const unsigned char src[16 * PARALLEL_BLOCKS],
+ const BlockVec counters[PARALLEL_BLOCKS])
{
- const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
- __m128i A, B, C;
- __m128i tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9;
- __m128i tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17, tmp18;
- __m128i tmp19, tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
- __m128i tmp28, tmp29, tmp30, tmp31, tmp32, tmp33, tmp34, tmp35, tmp36;
-
- if (xlen >= 16) {
- A = _mm_loadu_si128((const __m128i *) a);
- } else {
- CRYPTO_ALIGN(16) unsigned char padded[16];
- unsigned int i;
+ BlockVec ts[PARALLEL_BLOCKS];
+ size_t i, j;
- memset(padded, 0, 16);
- for (i = 0; i < xlen; i++) {
- padded[i] = a[i];
+ for (j = 0; j < PARALLEL_BLOCKS; j++) {
+ ts[j] = XOR128(counters[j], st->rkeys[0]);
+ }
+ for (i = 1; i < ROUNDS; i++) {
+ for (j = 0; j < PARALLEL_BLOCKS; j++) {
+ ts[j] = AES_ENCRYPT(ts[j], st->rkeys[i]);
}
- A = _mm_load_si128((const __m128i *) padded);
}
- A = _mm_shuffle_epi8(A, rev);
- B = _mm_loadu_si128((const __m128i *) b);
- C = _mm_loadu_si128((const __m128i *) c);
- A = _mm_xor_si128(A, C);
- tmp3 = _mm_clmulepi64_si128(A, B, 0x00);
- tmp4 = _mm_clmulepi64_si128(A, B, 0x10);
- tmp5 = _mm_clmulepi64_si128(A, B, 0x01);
- tmp6 = _mm_clmulepi64_si128(A, B, 0x11);
- tmp10 = _mm_xor_si128(tmp4, tmp5);
- tmp13 = _mm_slli_si128(tmp10, 8);
- tmp11 = _mm_srli_si128(tmp10, 8);
- tmp15 = _mm_xor_si128(tmp3, tmp13);
- tmp17 = _mm_xor_si128(tmp6, tmp11);
- tmp7 = _mm_srli_epi32(tmp15, 31);
- tmp8 = _mm_srli_epi32(tmp17, 31);
- tmp16 = _mm_slli_epi32(tmp15, 1);
- tmp18 = _mm_slli_epi32(tmp17, 1);
- tmp9 = _mm_srli_si128(tmp7, 12);
- tmp22 = _mm_slli_si128(tmp8, 4);
- tmp25 = _mm_slli_si128(tmp7, 4);
- tmp29 = _mm_or_si128(tmp16, tmp25);
- tmp19 = _mm_or_si128(tmp18, tmp22);
- tmp20 = _mm_or_si128(tmp19, tmp9);
- tmp26 = _mm_slli_epi32(tmp29, 31);
- tmp23 = _mm_slli_epi32(tmp29, 30);
- tmp32 = _mm_slli_epi32(tmp29, 25);
- tmp27 = _mm_xor_si128(tmp26, tmp23);
- tmp28 = _mm_xor_si128(tmp27, tmp32);
- tmp24 = _mm_srli_si128(tmp28, 4);
- tmp33 = _mm_slli_si128(tmp28, 12);
- tmp30 = _mm_xor_si128(tmp29, tmp33);
- tmp2 = _mm_srli_epi32(tmp30, 1);
- tmp12 = _mm_srli_epi32(tmp30, 2);
- tmp14 = _mm_srli_epi32(tmp30, 7);
- tmp34 = _mm_xor_si128(tmp2, tmp12);
- tmp35 = _mm_xor_si128(tmp34, tmp14);
- tmp36 = _mm_xor_si128(tmp35, tmp24);
- tmp31 = _mm_xor_si128(tmp30, tmp36);
- tmp21 = _mm_xor_si128(tmp20, tmp31);
- _mm_storeu_si128((__m128i *) c, tmp21);
+ for (j = 0; j < PARALLEL_BLOCKS; j++) {
+ ts[j] = AES_ENCRYPTLAST(ts[j], st->rkeys[i]);
+ ts[j] = XOR128(ts[j], LOAD128(&src[16 * j]));
+ }
+ for (j = 0; j < PARALLEL_BLOCKS; j++) {
+ STORE128(&dst[16 * j], ts[j]);
+ }
}
-/* pure multiplication, for pre-computing powers of H */
-static inline __m128i
-mulv(__m128i A, __m128i B)
+/* Square a field element */
+
+static inline I256 __vectorcall clsq128(const BlockVec x)
{
- __m128i tmp3 = _mm_clmulepi64_si128(A, B, 0x00);
- __m128i tmp4 = _mm_clmulepi64_si128(A, B, 0x10);
- __m128i tmp5 = _mm_clmulepi64_si128(A, B, 0x01);
- __m128i tmp6 = _mm_clmulepi64_si128(A, B, 0x11);
- __m128i tmp10 = _mm_xor_si128(tmp4, tmp5);
- __m128i tmp13 = _mm_slli_si128(tmp10, 8);
- __m128i tmp11 = _mm_srli_si128(tmp10, 8);
- __m128i tmp15 = _mm_xor_si128(tmp3, tmp13);
- __m128i tmp17 = _mm_xor_si128(tmp6, tmp11);
- __m128i tmp7 = _mm_srli_epi32(tmp15, 31);
- __m128i tmp8 = _mm_srli_epi32(tmp17, 31);
- __m128i tmp16 = _mm_slli_epi32(tmp15, 1);
- __m128i tmp18 = _mm_slli_epi32(tmp17, 1);
- __m128i tmp9 = _mm_srli_si128(tmp7, 12);
- __m128i tmp22 = _mm_slli_si128(tmp8, 4);
- __m128i tmp25 = _mm_slli_si128(tmp7, 4);
- __m128i tmp29 = _mm_or_si128(tmp16, tmp25);
- __m128i tmp19 = _mm_or_si128(tmp18, tmp22);
- __m128i tmp20 = _mm_or_si128(tmp19, tmp9);
- __m128i tmp26 = _mm_slli_epi32(tmp29, 31);
- __m128i tmp23 = _mm_slli_epi32(tmp29, 30);
- __m128i tmp32 = _mm_slli_epi32(tmp29, 25);
- __m128i tmp27 = _mm_xor_si128(tmp26, tmp23);
- __m128i tmp28 = _mm_xor_si128(tmp27, tmp32);
- __m128i tmp24 = _mm_srli_si128(tmp28, 4);
- __m128i tmp33 = _mm_slli_si128(tmp28, 12);
- __m128i tmp30 = _mm_xor_si128(tmp29, tmp33);
- __m128i tmp2 = _mm_srli_epi32(tmp30, 1);
- __m128i tmp12 = _mm_srli_epi32(tmp30, 2);
- __m128i tmp14 = _mm_srli_epi32(tmp30, 7);
- __m128i tmp34 = _mm_xor_si128(tmp2, tmp12);
- __m128i tmp35 = _mm_xor_si128(tmp34, tmp14);
- __m128i tmp36 = _mm_xor_si128(tmp35, tmp24);
- __m128i tmp31 = _mm_xor_si128(tmp30, tmp36);
- __m128i C = _mm_xor_si128(tmp20, tmp31);
-
- return C;
+ const BlockVec r_lo = CLMULLO128(x, x);
+ const BlockVec r_hi = CLMULHI128(x, x);
+
+ return (I256) {
+ SODIUM_C99(.hi =) r_hi,
+ SODIUM_C99(.lo =) r_lo,
+ SODIUM_C99(.mid =) ZERO128,
+ };
}
-/* 4 multiply-accumulate at once; again
- <https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf>
- for the Aggregated Reduction Method & sample code.
- Algorithm by Krzysztof Jankowski, Pierre Laurent - Intel */
-
-#define RED_DECL(a) __m128i H##a##_X##a##_lo, H##a##_X##a##_hi, tmp##a, tmp##a##B
-#define RED_SHUFFLE(a) X##a = _mm_shuffle_epi8(X##a, rev)
-#define RED_MUL_LOW(a) H##a##_X##a##_lo = _mm_clmulepi64_si128(H##a, X##a, 0x00)
-#define RED_MUL_HIGH(a) H##a##_X##a##_hi = _mm_clmulepi64_si128(H##a, X##a, 0x11)
-#define RED_MUL_MID(a) \
- tmp##a = _mm_shuffle_epi32(H##a, 0x4e); \
- tmp##a##B = _mm_shuffle_epi32(X##a, 0x4e); \
- tmp##a = _mm_xor_si128(tmp##a, H##a); \
- tmp##a##B = _mm_xor_si128(tmp##a##B, X##a); \
- tmp##a = _mm_clmulepi64_si128(tmp##a, tmp##a##B, 0x00)
-
-#define MULREDUCE4(rev, H0_, H1_, H2_, H3_, X0_, X1_, X2_, X3_, accv) \
-do { \
- MAKE4(RED_DECL); \
- __m128i lo, hi; \
- __m128i tmp8, tmp9; \
- __m128i H0 = H0_; \
- __m128i H1 = H1_; \
- __m128i H2 = H2_; \
- __m128i H3 = H3_; \
- __m128i X0 = X0_; \
- __m128i X1 = X1_; \
- __m128i X2 = X2_; \
- __m128i X3 = X3_; \
-\
-/* byte-revert the inputs & xor the first one into the accumulator */ \
-\
- MAKE4(RED_SHUFFLE); \
- X3 = _mm_xor_si128(X3, accv); \
-\
-/* 4 low H*X (x0*h0) */ \
-\
- MAKE4(RED_MUL_LOW); \
- lo = _mm_xor_si128(H0_X0_lo, H1_X1_lo); \
- lo = _mm_xor_si128(lo, H2_X2_lo); \
- lo = _mm_xor_si128(lo, H3_X3_lo); \
-\
-/* 4 high H*X (x1*h1) */ \
-\
- MAKE4(RED_MUL_HIGH); \
- hi = _mm_xor_si128(H0_X0_hi, H1_X1_hi); \
- hi = _mm_xor_si128(hi, H2_X2_hi); \
- hi = _mm_xor_si128(hi, H3_X3_hi); \
-\
-/* 4 middle H*X, using Karatsuba, i.e. \
- x1*h0+x0*h1 =(x1+x0)*(h1+h0)-x1*h1-x0*h0 \
- we already have all x1y1 & x0y0 (accumulated in hi & lo) \
- (0 is low half and 1 is high half) \
- */ \
-/* permute the high and low 64 bits in H1 & X1, \
- so create (h0,h1) from (h1,h0) and (x0,x1) from (x1,x0), \
- then compute (h0+h1,h1+h0) and (x0+x1,x1+x0), \
- and finally multiply \
- */ \
- MAKE4(RED_MUL_MID); \
-\
-/* substracts x1*h1 and x0*h0 */ \
- tmp0 = _mm_xor_si128(tmp0, lo); \
- tmp0 = _mm_xor_si128(tmp0, hi); \
- tmp0 = _mm_xor_si128(tmp1, tmp0); \
- tmp0 = _mm_xor_si128(tmp2, tmp0); \
- tmp0 = _mm_xor_si128(tmp3, tmp0);\
-\
- /* reduction */ \
- tmp0B = _mm_slli_si128(tmp0, 8); \
- tmp0 = _mm_srli_si128(tmp0, 8); \
- lo = _mm_xor_si128(tmp0B, lo); \
- hi = _mm_xor_si128(tmp0, hi); \
- tmp3 = lo; \
- tmp2B = hi; \
- tmp3B = _mm_srli_epi32(tmp3, 31); \
- tmp8 = _mm_srli_epi32(tmp2B, 31); \
- tmp3 = _mm_slli_epi32(tmp3, 1); \
- tmp2B = _mm_slli_epi32(tmp2B, 1); \
- tmp9 = _mm_srli_si128(tmp3B, 12); \
- tmp8 = _mm_slli_si128(tmp8, 4); \
- tmp3B = _mm_slli_si128(tmp3B, 4); \
- tmp3 = _mm_or_si128(tmp3, tmp3B); \
- tmp2B = _mm_or_si128(tmp2B, tmp8); \
- tmp2B = _mm_or_si128(tmp2B, tmp9); \
- tmp3B = _mm_slli_epi32(tmp3, 31); \
- tmp8 = _mm_slli_epi32(tmp3, 30); \
- tmp9 = _mm_slli_epi32(tmp3, 25); \
- tmp3B = _mm_xor_si128(tmp3B, tmp8); \
- tmp3B = _mm_xor_si128(tmp3B, tmp9); \
- tmp8 = _mm_srli_si128(tmp3B, 4); \
- tmp3B = _mm_slli_si128(tmp3B, 12); \
- tmp3 = _mm_xor_si128(tmp3, tmp3B); \
- tmp2 = _mm_srli_epi32(tmp3, 1); \
- tmp0B = _mm_srli_epi32(tmp3, 2); \
- tmp1B = _mm_srli_epi32(tmp3, 7); \
- tmp2 = _mm_xor_si128(tmp2, tmp0B); \
- tmp2 = _mm_xor_si128(tmp2, tmp1B); \
- tmp2 = _mm_xor_si128(tmp2, tmp8); \
- tmp3 = _mm_xor_si128(tmp3, tmp2); \
- tmp2B = _mm_xor_si128(tmp2B, tmp3); \
-\
- accv = tmp2B; \
-} while(0)
-
-#define XORx(a) \
- temp##a = _mm_xor_si128(temp##a, \
- _mm_loadu_si128((const __m128i *) (in + a * 16)))
-
-#define LOADx(a) \
- __m128i in##a = _mm_loadu_si128((const __m128i *) (in + a * 16))
-
-/* full encrypt & checksum 8 blocks at once */
-#define aesni_encrypt8full(out_, n_, rkeys, in_, accum, hv_, h2v_, h3v_, h4v_, rev) \
-do { \
- unsigned char *out = out_; \
- uint32_t *n = n_; \
- const unsigned char *in = in_; \
- const __m128i hv = hv_; \
- const __m128i h2v = h2v_; \
- const __m128i h3v = h3v_; \
- const __m128i h4v = h4v_; \
- const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
- __m128i accv_; \
- int roundctr; \
- \
- MAKE8(NVDECLx); \
- MAKE8(TEMPDECLx); \
- MAKE8(NVx); \
- MAKE8(TEMPx); \
- for (roundctr = 1; roundctr < 14; roundctr++) { \
- MAKE8(AESENCx); \
- } \
- MAKE8(AESENCLASTx); \
- MAKE8(XORx); \
- MAKE8(STOREx); \
- accv_ = _mm_load_si128((const __m128i *) accum); \
- MULREDUCE4(rev, hv, h2v, h3v, h4v, temp3, temp2, temp1, temp0, accv_); \
- MULREDUCE4(rev, hv, h2v, h3v, h4v, temp7, temp6, temp5, temp4, accv_); \
- _mm_store_si128((__m128i *) accum, accv_); \
-} while(0)
-
-/* checksum 8 blocks at once */
-#define aesni_addmul8full(in_, accum, hv_, h2v_, h3v_, h4v_, rev) \
-do { \
- const unsigned char *in = in_; \
- const __m128i hv = hv_; \
- const __m128i h2v = h2v_; \
- const __m128i h3v = h3v_; \
- const __m128i h4v = h4v_; \
- __m128i accv_; \
- \
- MAKE8(LOADx); \
- accv_ = _mm_load_si128((const __m128i *) accum); \
- MULREDUCE4(rev, hv, h2v, h3v, h4v, in3, in2, in1, in0, accv_); \
- MULREDUCE4(rev, hv, h2v, h3v, h4v, in7, in6, in5, in4, accv_); \
- _mm_store_si128((__m128i *) accum, accv_); \
-} while(0)
-
-/* decrypt 8 blocks at once */
-#define aesni_decrypt8full(out_, n_, rkeys, in_) \
-do { \
- unsigned char *out = out_; \
- uint32_t *n = n_; \
- const unsigned char *in = in_; \
- const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
- int roundctr; \
-\
- MAKE8(NVDECLx); \
- MAKE8(TEMPDECLx); \
- MAKE8(NVx); \
- MAKE8(TEMPx); \
- for (roundctr = 1; roundctr < 14; roundctr++) { \
- MAKE8(AESENCx); \
- } \
- MAKE8(AESENCLASTx); \
- MAKE8(XORx); \
- MAKE8(STOREx); \
-} while(0)
+/* Multiply two field elements -- Textbook multiplication is faster than Karatsuba on some recent
+ * CPUs */
-int
-crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *ctx_,
- const unsigned char *k)
+static inline I256 __vectorcall clmul128(const BlockVec x, const BlockVec y)
+{
+#ifdef USE_KARATSUBA_MULTIPLICATION
+ const BlockVec x_hi = BYTESHR128(x, 8);
+ const BlockVec y_hi = BYTESHR128(y, 8);
+ const BlockVec r_lo = CLMULLO128(x, y);
+ const BlockVec r_hi = CLMULHI128(x, y);
+ const BlockVec r_mid = XOR128(CLMULLO128(XOR128(x, x_hi), XOR128(y, y_hi)), XOR128(r_lo, r_hi));
+
+ return (I256) {
+ SODIUM_C99(.hi =) r_hi,
+ SODIUM_C99(.lo =) r_lo,
+ SODIUM_C99(.mid =) r_mid,
+ };
+#else
+ const BlockVec r_hi = CLMULHI128(x, y);
+ const BlockVec r_lo = CLMULLO128(x, y);
+ const BlockVec r_mid = XOR128(CLMULHILO128(x, y), CLMULLOHI128(x, y));
+
+ return (I256) {
+ SODIUM_C99(.hi =) r_hi,
+ SODIUM_C99(.lo =) r_lo,
+ SODIUM_C99(.mid =) r_mid,
+ };
+#endif
+}
+
+/* Merge the middle word and reduce a field element */
+
+static inline BlockVec __vectorcall gcm_reduce(const I256 x)
{
- aes256gcm_state *ctx = (aes256gcm_state *) (void *) ctx_;
- unsigned char *H = ctx->H;
- __m128i *rkeys = ctx->rkeys;
- __m128i zero = _mm_setzero_si128();
+ const BlockVec hi = XOR128(x.hi, BYTESHR128(x.mid, 8));
+ const BlockVec lo = XOR128(x.lo, BYTESHL128(x.mid, 8));
- COMPILER_ASSERT((sizeof *ctx_) >= (sizeof *ctx));
- aesni_key256_expand(k, rkeys);
- aesni_encrypt1(H, zero, rkeys);
+ const BlockVec p64 = SET64x2(0, 0xc200000000000000);
+ const BlockVec a = CLMULLO128(lo, p64);
+ const BlockVec b = XOR128(SHUFFLE32x4(lo, 2, 3, 0, 1), a);
+ const BlockVec c = CLMULLO128(b, p64);
+ const BlockVec d = XOR128(SHUFFLE32x4(b, 2, 3, 0, 1), c);
- return 0;
+ return XOR128(d, hi);
}
-int
-crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c,
- unsigned char *mac, unsigned long long *maclen_p,
- const unsigned char *m, unsigned long long mlen,
- const unsigned char *ad, unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const crypto_aead_aes256gcm_state *ctx_)
+/* Precompute powers of H from `from` to `to` */
+
+static inline void __vectorcall precomp(Precomp hx[PC_COUNT], const size_t from, const size_t to)
{
- const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
- const aes256gcm_state *ctx = (const aes256gcm_state *) (const void *) ctx_;
- const __m128i *rkeys = ctx->rkeys;
- __m128i Hv, H2v, H3v, H4v, accv;
- unsigned long long i, j;
- unsigned long long adlen_rnd64 = adlen & ~63ULL;
- unsigned long long mlen_rnd128 = mlen & ~127ULL;
- CRYPTO_ALIGN(16) uint32_t n2[4];
- CRYPTO_ALIGN(16) unsigned char H[16];
- CRYPTO_ALIGN(16) unsigned char T[16];
- CRYPTO_ALIGN(16) unsigned char accum[16];
- CRYPTO_ALIGN(16) unsigned char fb[16];
+ const Precomp h = hx[0];
+ size_t i;
- (void) nsec;
- memcpy(H, ctx->H, sizeof H);
- if (mlen > crypto_aead_aes256gcm_MESSAGEBYTES_MAX) {
- sodium_misuse(); /* LCOV_EXCL_LINE */
+ for (i = from & ~1U; i < to; i += 2) {
+ hx[i] = gcm_reduce(clmul128(hx[i - 1], h));
+ hx[i + 1] = gcm_reduce(clsq128(hx[i / 2]));
}
- memcpy(&n2[0], npub, 3 * 4);
- n2[3] = 0x01000000;
- aesni_encrypt1(T, _mm_load_si128((const __m128i *) n2), rkeys);
- {
- uint64_t x;
- x = _bswap64((uint64_t) (8 * adlen));
- memcpy(&fb[0], &x, sizeof x);
- x = _bswap64((uint64_t) (8 * mlen));
- memcpy(&fb[8], &x, sizeof x);
+}
+
+/* Precompute powers of H given a key and a block count */
+
+static void __vectorcall precomp_for_block_count(Precomp hx[PC_COUNT],
+ const unsigned char gh_key[16],
+ const size_t block_count)
+{
+ const BlockVec h0 = REV128(LOAD128(gh_key));
+ BlockVec carry = SET64x2(0xc200000000000000, 1);
+ BlockVec mask = SUB64x2(ZERO128, SHR64x2(h0, 63));
+ BlockVec h0_shifted;
+ BlockVec h;
+
+ mask = SHUFFLE32x4(mask, 3, 3, 3, 3);
+ carry = AND128(carry, mask);
+ h0_shifted = SHL128(h0, 1);
+ h = XOR128(h0_shifted, carry);
+
+ hx[0] = h;
+ hx[1] = gcm_reduce(clsq128(hx[0]));
+
+ if (block_count >= PC_COUNT) {
+ precomp(hx, 2, PC_COUNT);
+ } else {
+ precomp(hx, 2, block_count);
}
- /* we store H (and it's power) byte-reverted once and for all */
- Hv = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) H), rev);
- _mm_store_si128((__m128i *) H, Hv);
- H2v = mulv(Hv, Hv);
- H3v = mulv(H2v, Hv);
- H4v = mulv(H3v, Hv);
-
- accv = _mm_setzero_si128();
- /* unrolled by 4 GCM (by 8 doesn't improve using MULREDUCE4) */
- for (i = 0; i < adlen_rnd64; i += 64) {
- __m128i X4_ = _mm_loadu_si128((const __m128i *) (ad + i + 0));
- __m128i X3_ = _mm_loadu_si128((const __m128i *) (ad + i + 16));
- __m128i X2_ = _mm_loadu_si128((const __m128i *) (ad + i + 32));
- __m128i X1_ = _mm_loadu_si128((const __m128i *) (ad + i + 48));
- MULREDUCE4(rev, Hv, H2v, H3v, H4v, X1_, X2_, X3_, X4_, accv);
+}
+
+/* Initialize a GHash */
+
+static inline void
+gh_init(GHash *sth)
+{
+ sth->acc = ZERO128;
+}
+
+static inline I256 __vectorcall gh_update0(const GHash *const sth, const unsigned char *const p,
+ const Precomp hn)
+{
+ const BlockVec m = REV128(LOAD128(p));
+ return clmul128(XOR128(sth->acc, m), hn);
+}
+
+static inline void __vectorcall gh_update(I256 *const u, const unsigned char *p, const Precomp hn)
+{
+ const BlockVec m = REV128(LOAD128(p));
+ const I256 t = clmul128(m, hn);
+ *u = (I256) { SODIUM_C99(.hi =) XOR128(u->hi, t.hi), SODIUM_C99(.lo =) XOR128(u->lo, t.lo),
+ SODIUM_C99(.mid =) XOR128(u->mid, t.mid) };
+}
+
+/* Absorb ad_len bytes of associated data. There has to be no partial block. */
+
+static inline void
+gh_ad_blocks(const State *st, GHash *sth, const unsigned char *ad, size_t ad_len)
+{
+ size_t i;
+
+ i = (size_t) 0U;
+ for (; i + PC_COUNT * 16 <= ad_len; i += PC_COUNT * 16) {
+ I256 u = gh_update0(sth, ad + i, st->hx[PC_COUNT - 1 - 0]);
+ size_t j;
+
+ for (j = 1; j < PC_COUNT; j += 1) {
+ gh_update(&u, ad + i + j * 16, st->hx[PC_COUNT - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
}
- _mm_store_si128((__m128i *) accum, accv);
+ for (; i + PC_COUNT * 16 / 2 <= ad_len; i += PC_COUNT * 16 / 2) {
+ I256 u = gh_update0(sth, ad + i, st->hx[PC_COUNT / 2 - 1 - 0]);
+ size_t j;
- /* GCM remainder loop */
- for (i = adlen_rnd64; i < adlen; i += 16) {
- unsigned int blocklen = 16;
+ for (j = 1; j < PC_COUNT / 2; j += 1) {
+ gh_update(&u, ad + i + j * 16, st->hx[PC_COUNT / 2 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+ for (; i + 4 * 16 <= ad_len; i += 4 * 16) {
+ size_t j;
+ I256 u = gh_update0(sth, ad + i, st->hx[4 - 1 - 0]);
- if (i + (unsigned long long) blocklen > adlen) {
- blocklen = (unsigned int) (adlen - i);
+ for (j = 1; j < 4; j += 1) {
+ gh_update(&u, ad + i + j * 16, st->hx[4 - 1 - j]);
}
- addmul(accum, ad + i, blocklen, H);
+ sth->acc = gcm_reduce(u);
}
+ for (; i + 2 * 16 <= ad_len; i += 2 * 16) {
+ size_t j;
+ I256 u = gh_update0(sth, ad + i, st->hx[2 - 1 - 0]);
-/* this only does 8 full blocks, so no fancy bounds checking is necessary*/
-#define LOOPRND128 \
- do { \
- const int iter = 8; \
- const int lb = iter * 16; \
- \
- for (i = 0; i < mlen_rnd128; i += lb) { \
- aesni_encrypt8full(c + i, n2, rkeys, m + i, accum, Hv, H2v, H3v, H4v, rev); \
- } \
- } while(0)
-
-/* remainder loop, with the slower GCM update to accommodate partial blocks */
-#define LOOPRMD128 \
- do { \
- const int iter = 8; \
- const int lb = iter * 16; \
- \
- for (i = mlen_rnd128; i < mlen; i += lb) { \
- CRYPTO_ALIGN(16) unsigned char outni[8 * 16]; \
- unsigned long long mj = lb; \
- \
- aesni_encrypt8(outni, n2, rkeys); \
- if ((i + mj) >= mlen) { \
- mj = mlen - i; \
- } \
- for (j = 0; j < mj; j++) { \
- c[i + j] = m[i + j] ^ outni[j]; \
- } \
- for (j = 0; j < mj; j += 16) { \
- unsigned int bl = 16; \
- \
- if (j + (unsigned long long) bl >= mj) { \
- bl = (unsigned int) (mj - j); \
- } \
- addmul(accum, c + i + j, bl, H); \
- } \
- } \
- } while(0)
-
- n2[3] &= 0x00ffffff;
- COUNTER_INC2(n2);
- LOOPRND128;
- LOOPRMD128;
-
- addmul(accum, fb, 16, H);
-
- for (i = 0; i < 16; ++i) {
- mac[i] = T[i] ^ accum[15 - i];
+ for (j = 1; j < 2; j += 1) {
+ gh_update(&u, ad + i + j * 16, st->hx[2 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
}
- if (maclen_p != NULL) {
- *maclen_p = 16;
+ if (i < ad_len) {
+ I256 u = gh_update0(sth, ad + i, st->hx[0]);
+ sth->acc = gcm_reduce(u);
}
- return 0;
}
-int
-crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p,
- const unsigned char *m, unsigned long long mlen,
- const unsigned char *ad, unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const crypto_aead_aes256gcm_state *ctx_)
+/* Increment counters */
+
+static inline BlockVec __vectorcall incr_counters(BlockVec rev_counters[], BlockVec counter,
+ const size_t n)
{
- int ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c,
- c + mlen, NULL,
- m, mlen,
- ad, adlen,
- nsec, npub, ctx_);
- if (clen_p != NULL) {
- *clen_p = mlen + crypto_aead_aes256gcm_ABYTES;
+ size_t i;
+
+ const BlockVec one = ONE128;
+ for (i = 0; i < n; i++) {
+ rev_counters[i] = REV128(counter);
+ counter = ADD64x2(counter, one);
}
- return ret;
+ return counter;
}
-int
-crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec,
- const unsigned char *c, unsigned long long clen,
- const unsigned char *mac,
- const unsigned char *ad, unsigned long long adlen,
- const unsigned char *npub,
- const crypto_aead_aes256gcm_state *ctx_)
+/* Compute the number of required blocks to encrypt and authenticate `ad_len` of associated data,
+ * and `m_len` of encrypted bytes. Return `0` if limits would be exceeded.*/
+
+static inline size_t
+required_blocks(const size_t ad_len, const size_t m_len)
{
- const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
- const aes256gcm_state *ctx = (const aes256gcm_state *) (const void *) ctx_;
- const __m128i *rkeys = ctx->rkeys;
- __m128i Hv, H2v, H3v, H4v, accv;
- unsigned long long i, j;
- unsigned long long adlen_rnd64 = adlen & ~63ULL;
- unsigned long long mlen;
- unsigned long long mlen_rnd128;
- CRYPTO_ALIGN(16) uint32_t n2[4];
- CRYPTO_ALIGN(16) unsigned char H[16];
- CRYPTO_ALIGN(16) unsigned char T[16];
- CRYPTO_ALIGN(16) unsigned char accum[16];
- CRYPTO_ALIGN(16) unsigned char fb[16];
+ const size_t ad_blocks = (ad_len + 15) / 16;
+ const size_t m_blocks = (m_len + 15) / 16;
- (void) nsec;
- if (clen > crypto_aead_aes256gcm_MESSAGEBYTES_MAX) {
- sodium_misuse(); /* LCOV_EXCL_LINE */
+ if (ad_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 ||
+ m_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 || ad_len < ad_blocks || m_len < m_blocks ||
+ m_blocks >= (1ULL << 32) - 2) {
+ return 0;
}
- mlen = clen;
-
- memcpy(&n2[0], npub, 3 * 4);
- n2[3] = 0x01000000;
- aesni_encrypt1(T, _mm_load_si128((const __m128i *) n2), rkeys);
-
- {
- uint64_t x;
- x = _bswap64((uint64_t)(8 * adlen));
- memcpy(&fb[0], &x, sizeof x);
- x = _bswap64((uint64_t)(8 * mlen));
- memcpy(&fb[8], &x, sizeof x);
+ return ad_blocks + m_blocks + 1;
+}
+
+/* Generic AES-GCM encryption. "Generic" as it can handle arbitrary input sizes,
+unlike a length-limited version that would precompute all the required powers of H */
+
+static void
+aes_gcm_encrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst,
+ const unsigned char *src, size_t src_len, const unsigned char *ad,
+ size_t ad_len, unsigned char counter_[16])
+{
+ CRYPTO_ALIGN(32) I256 u;
+ CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16];
+ const BlockVec one = ONE128;
+ BlockVec final_block;
+ BlockVec rev_counters[PARALLEL_BLOCKS];
+ BlockVec counter;
+ size_t i;
+ size_t j;
+ size_t left;
+ size_t pi;
+
+ COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0);
+
+ /* Associated data */
+
+ if (ad != NULL && ad_len != 0) {
+ gh_ad_blocks(st, sth, ad, ad_len & ~15);
+ left = ad_len & 15;
+ if (left != 0) {
+ unsigned char pad[16];
+
+ memset(pad, 0, sizeof pad);
+ memcpy(pad, ad + ad_len - left, left);
+ gh_ad_blocks(st, sth, pad, sizeof pad);
+ }
}
- memcpy(H, ctx->H, sizeof H);
- Hv = _mm_shuffle_epi8(_mm_load_si128((const __m128i *) H), rev);
- _mm_store_si128((__m128i *) H, Hv);
- H2v = mulv(Hv, Hv);
- H3v = mulv(H2v, Hv);
- H4v = mulv(H3v, Hv);
-
- accv = _mm_setzero_si128();
- for (i = 0; i < adlen_rnd64; i += 64) {
- __m128i X4_ = _mm_loadu_si128((const __m128i *) (ad + i + 0));
- __m128i X3_ = _mm_loadu_si128((const __m128i *) (ad + i + 16));
- __m128i X2_ = _mm_loadu_si128((const __m128i *) (ad + i + 32));
- __m128i X1_ = _mm_loadu_si128((const __m128i *) (ad + i + 48));
- MULREDUCE4(rev, Hv, H2v, H3v, H4v, X1_, X2_, X3_, X4_, accv);
+ /* Encrypted data */
+
+ counter = REV128(LOAD128(counter_));
+ i = 0;
+
+ /* 2*PARALLEL_BLOCKS aggregation */
+
+ if (src_len - i >= 2 * PARALLEL_BLOCKS * 16) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+ i += PARALLEL_BLOCKS * 16;
+
+ for (; i + 2 * PARALLEL_BLOCKS * 16 <= src_len; i += 2 * PARALLEL_BLOCKS * 16) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+
+ PREFETCH_READ(src + i + PARALLEL_BLOCKS * 16);
+#if PARALLEL_BLOCKS >= 64 / 16
+ PREFETCH_READ(src + i + PARALLEL_BLOCKS * 16 + 64);
+#endif
+
+ pi = i - PARALLEL_BLOCKS * 16;
+ u = gh_update0(sth, dst + pi, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, dst + pi + j * 16, st->hx[2 * PARALLEL_BLOCKS - 1 - j]);
+ }
+
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+ encrypt_xor_wide(st, dst + i + PARALLEL_BLOCKS * 16, src + i + PARALLEL_BLOCKS * 16,
+ rev_counters);
+
+ PREFETCH_READ(src + i + 2 * PARALLEL_BLOCKS * 16);
+#if PARALLEL_BLOCKS >= 64 / 16
+ PREFETCH_READ(src + i + 2 * PARALLEL_BLOCKS * 16 + 64);
+#endif
+ pi = i;
+ for (j = 0; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+
+ pi = i - PARALLEL_BLOCKS * 16;
+ u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
}
- _mm_store_si128((__m128i *) accum, accv);
- for (i = adlen_rnd64; i < adlen; i += 16) {
- unsigned int blocklen = 16;
- if (i + (unsigned long long) blocklen > adlen) {
- blocklen = (unsigned int) (adlen - i);
+ /* PARALLEL_BLOCKS aggregation */
+
+ if (src_len - i >= PARALLEL_BLOCKS * 16) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+ i += PARALLEL_BLOCKS * 16;
+
+ for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+
+ pi = i - PARALLEL_BLOCKS * 16;
+ u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+
+ pi = i - PARALLEL_BLOCKS * 16;
+ u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
}
- addmul(accum, ad + i, blocklen, H);
+ sth->acc = gcm_reduce(u);
}
- mlen_rnd128 = mlen & ~127ULL;
-
-#define LOOPACCUMDRND128 \
- do { \
- const int iter = 8; \
- const int lb = iter * 16; \
- for (i = 0; i < mlen_rnd128; i += lb) { \
- aesni_addmul8full(c + i, accum, Hv, H2v, H3v, H4v, rev); \
- } \
- } while(0)
-
-#define LOOPDRND128 \
- do { \
- const int iter = 8; \
- const int lb = iter * 16; \
- \
- for (i = 0; i < mlen_rnd128; i += lb) { \
- aesni_decrypt8full(m + i, n2, rkeys, c + i); \
- } \
- } while(0)
-
-#define LOOPACCUMDRMD128 \
- do { \
- const int iter = 8; \
- const int lb = iter * 16; \
- \
- for (i = mlen_rnd128; i < mlen; i += lb) { \
- unsigned long long mj = lb; \
- \
- if ((i + mj) >= mlen) { \
- mj = mlen - i; \
- } \
- for (j = 0; j < mj; j += 16) { \
- unsigned int bl = 16; \
- \
- if (j + (unsigned long long) bl >= mj) { \
- bl = (unsigned int) (mj - j); \
- } \
- addmul(accum, c + i + j, bl, H); \
- } \
- } \
- } while(0)
-
-#define LOOPDRMD128 \
- do { \
- const int iter = 8; \
- const int lb = iter * 16; \
- \
- for (i = mlen_rnd128; i < mlen; i += lb) { \
- CRYPTO_ALIGN(16) unsigned char outni[8 * 16]; \
- unsigned long long mj = lb; \
- \
- if ((i + mj) >= mlen) { \
- mj = mlen - i; \
- } \
- aesni_encrypt8(outni, n2, rkeys); \
- for (j = 0; j < mj; j++) { \
- m[i + j] = c[i + j] ^ outni[j]; \
- } \
- } \
- } while(0)
-
- n2[3] &= 0x00ffffff;
-
- COUNTER_INC2(n2);
- LOOPACCUMDRND128;
- LOOPACCUMDRMD128;
- addmul(accum, fb, 16, H);
- {
- unsigned char d = 0;
-
- for (i = 0; i < 16; i++) {
- d |= (mac[i] ^ (T[i] ^ accum[15 - i]));
+ /* 4-blocks aggregation */
+
+ for (; i + 4 * 16 <= src_len; i += 4 * 16) {
+ counter = incr_counters(rev_counters, counter, 4);
+ for (j = 0; j < 4; j++) {
+ encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]);
}
- if (d != 0) {
- if (m != NULL) {
- memset(m, 0, mlen);
- }
- return -1;
+
+ u = gh_update0(sth, dst + i, st->hx[4 - 1 - 0]);
+ for (j = 1; j < 4; j += 1) {
+ gh_update(&u, dst + i + j * 16, st->hx[4 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+
+ /* 2-blocks aggregation */
+
+ for (; i + 2 * 16 <= src_len; i += 2 * 16) {
+ counter = incr_counters(rev_counters, counter, 2);
+ for (j = 0; j < 2; j++) {
+ encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]);
}
- if (m == NULL) {
- return 0;
+
+ u = gh_update0(sth, dst + i, st->hx[2 - 1 - 0]);
+ for (j = 1; j < 2; j += 1) {
+ gh_update(&u, dst + i + j * 16, st->hx[2 - 1 - j]);
}
+ sth->acc = gcm_reduce(u);
}
- n2[3] = 0U;
- COUNTER_INC2(n2);
- LOOPDRND128;
- LOOPDRMD128;
- return 0;
+ /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the
+ full block authenticated along with the final block, hence < and not <= */
+
+ for (; i + 16 < src_len; i += 16) {
+ encrypt_xor_block(st, dst + i, src + i, REV128(counter));
+ u = gh_update0(sth, dst + i, st->hx[1 - 1 - 0]);
+ sth->acc = gcm_reduce(u);
+ counter = ADD64x2(counter, one);
+ }
+
+ /* Authenticate both the last block of the message and the final block */
+
+ final_block = REV128(SET64x2(ad_len * 8, src_len * 8));
+ STORE32_BE(counter_ + NPUBBYTES, 1);
+ encrypt(st, mac, counter_);
+ left = src_len - i;
+ if (left != 0) {
+ for (j = 0; j < left; j++) {
+ last_blocks[j] = src[i + j];
+ }
+ STORE128(last_blocks + 16, final_block);
+ encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter));
+ for (; j < 16; j++) {
+ last_blocks[j] = 0;
+ }
+ for (j = 0; j < left; j++) {
+ dst[i + j] = last_blocks[j];
+ }
+ gh_ad_blocks(st, sth, last_blocks, 32);
+ } else {
+ STORE128(last_blocks, final_block);
+ gh_ad_blocks(st, sth, last_blocks, 16);
+ }
+ STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc)));
}
-int
-crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p,
- unsigned char *nsec,
- const unsigned char *c, unsigned long long clen,
- const unsigned char *ad, unsigned long long adlen,
- const unsigned char *npub,
- const crypto_aead_aes256gcm_state *ctx_)
+/* Generic AES-GCM decryption. "Generic" as it can handle arbitrary input sizes,
+unlike a length-limited version that would precompute all the required powers of H */
+
+static void
+aes_gcm_decrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst,
+ const unsigned char *src, size_t src_len, const unsigned char *ad,
+ size_t ad_len, unsigned char counter_[16])
{
- unsigned long long mlen = 0ULL;
- int ret = -1;
+ CRYPTO_ALIGN(32) I256 u;
+ CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16];
+ const BlockVec one = ONE128;
+ BlockVec final_block;
+ BlockVec rev_counters[PARALLEL_BLOCKS];
+ BlockVec counter;
+ size_t i;
+ size_t j;
+ size_t left;
+
+ COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0);
- if (clen >= crypto_aead_aes256gcm_ABYTES) {
- ret = crypto_aead_aes256gcm_decrypt_detached_afternm
- (m, nsec, c, clen - crypto_aead_aes256gcm_ABYTES,
- c + clen - crypto_aead_aes256gcm_ABYTES,
- ad, adlen, npub, ctx_);
+ /* Associated data */
+
+ if (ad != NULL && ad_len != 0) {
+ gh_ad_blocks(st, sth, ad, ad_len & ~15);
+ left = ad_len & 15;
+ if (left != 0) {
+ unsigned char pad[16];
+
+ memset(pad, 0, sizeof pad);
+ memcpy(pad, ad + ad_len - left, left);
+ gh_ad_blocks(st, sth, pad, sizeof pad);
+ }
}
- if (mlen_p != NULL) {
- if (ret == 0) {
- mlen = clen - crypto_aead_aes256gcm_ABYTES;
+
+ /* Encrypted data */
+
+ counter = REV128(LOAD128(counter_));
+ i = 0;
+
+ /* 2*PARALLEL_BLOCKS aggregation */
+
+ while (i + 2 * PARALLEL_BLOCKS * 16 <= src_len) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+
+ u = gh_update0(sth, src + i, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, src + i + j * 16, st->hx[2 * PARALLEL_BLOCKS - 1 - j]);
}
- *mlen_p = mlen;
+
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+
+ i += PARALLEL_BLOCKS * 16;
+ for (j = 0; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, src + i + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+ i += PARALLEL_BLOCKS * 16;
}
- return ret;
-}
-int
-crypto_aead_aes256gcm_encrypt_detached(unsigned char *c,
- unsigned char *mac,
- unsigned long long *maclen_p,
- const unsigned char *m,
- unsigned long long mlen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const unsigned char *k)
-{
- CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx;
+ /* PARALLEL_BLOCKS aggregation */
- crypto_aead_aes256gcm_beforenm(&ctx, k);
+ for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
- return crypto_aead_aes256gcm_encrypt_detached_afternm
- (c, mac, maclen_p, m, mlen, ad, adlen, nsec, npub,
- (const crypto_aead_aes256gcm_state *) &ctx);
-}
+ u = gh_update0(sth, src + i, st->hx[PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, src + i + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
-int
-crypto_aead_aes256gcm_encrypt(unsigned char *c,
- unsigned long long *clen_p,
- const unsigned char *m,
- unsigned long long mlen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const unsigned char *k)
-{
- CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx;
- int ret;
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+ }
- crypto_aead_aes256gcm_beforenm(&ctx, k);
+ /* 4-blocks aggregation */
- ret = crypto_aead_aes256gcm_encrypt_afternm
- (c, clen_p, m, mlen, ad, adlen, nsec, npub,
- (const crypto_aead_aes256gcm_state *) &ctx);
- sodium_memzero(&ctx, sizeof ctx);
+ for (; i + 4 * 16 <= src_len; i += 4 * 16) {
+ counter = incr_counters(rev_counters, counter, 4);
- return ret;
-}
+ u = gh_update0(sth, src + i, st->hx[4 - 1 - 0]);
+ for (j = 1; j < 4; j += 1) {
+ gh_update(&u, src + i + j * 16, st->hx[4 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
-int
-crypto_aead_aes256gcm_decrypt_detached(unsigned char *m,
- unsigned char *nsec,
- const unsigned char *c,
- unsigned long long clen,
- const unsigned char *mac,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *npub,
- const unsigned char *k)
-{
- CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx;
+ for (j = 0; j < 4; j++) {
+ encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]);
+ }
+ }
- crypto_aead_aes256gcm_beforenm(&ctx, k);
+ /* 2-blocks aggregation */
- return crypto_aead_aes256gcm_decrypt_detached_afternm
- (m, nsec, c, clen, mac, ad, adlen, npub,
- (const crypto_aead_aes256gcm_state *) &ctx);
-}
+ for (; i + 2 * 16 <= src_len; i += 2 * 16) {
+ counter = incr_counters(rev_counters, counter, 2);
-int
-crypto_aead_aes256gcm_decrypt(unsigned char *m,
- unsigned long long *mlen_p,
- unsigned char *nsec,
- const unsigned char *c,
- unsigned long long clen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *npub,
- const unsigned char *k)
-{
- CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state ctx;
- int ret;
+ u = gh_update0(sth, src + i, st->hx[2 - 1 - 0]);
+ for (j = 1; j < 2; j += 1) {
+ gh_update(&u, src + i + j * 16, st->hx[2 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
- crypto_aead_aes256gcm_beforenm(&ctx, k);
+ for (j = 0; j < 2; j++) {
+ encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]);
+ }
+ }
- ret = crypto_aead_aes256gcm_decrypt_afternm
- (m, mlen_p, nsec, c, clen, ad, adlen, npub,
- (const crypto_aead_aes256gcm_state *) &ctx);
- sodium_memzero(&ctx, sizeof ctx);
+ /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the
+ full block authenticated along with the final block, hence < and not <= */
- return ret;
+ for (; i + 16 < src_len; i += 16) {
+ u = gh_update0(sth, src + i, st->hx[1 - 1 - 0]);
+ sth->acc = gcm_reduce(u);
+ encrypt_xor_block(st, dst + i, src + i, REV128(counter));
+ counter = ADD64x2(counter, one);
+ }
+
+ /* Authenticate both the last block of the message and the final block */
+
+ final_block = REV128(SET64x2(ad_len * 8, src_len * 8));
+ STORE32_BE(counter_ + NPUBBYTES, 1);
+ encrypt(st, mac, counter_);
+ left = src_len - i;
+ if (left != 0) {
+ for (j = 0; j < left; j++) {
+ last_blocks[j] = src[i + j];
+ }
+ for (; j < 16; j++) {
+ last_blocks[j] = 0;
+ }
+ STORE128(last_blocks + 16, final_block);
+ gh_ad_blocks(st, sth, last_blocks, 32);
+ encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter));
+ for (j = 0; j < left; j++) {
+ dst[i + j] = last_blocks[j];
+ }
+ } else {
+ STORE128(last_blocks, final_block);
+ gh_ad_blocks(st, sth, last_blocks, 16);
+ }
+ STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc)));
}
int
-crypto_aead_aes256gcm_is_available(void)
+crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k)
{
- return sodium_runtime_has_pclmul() & sodium_runtime_has_aesni();
-}
+ State *st = (State *) (void *) st_;
+ CRYPTO_ALIGN(16) unsigned char h[16];
-#else
+ COMPILER_ASSERT(sizeof *st_ >= sizeof *st);
-int
-crypto_aead_aes256gcm_encrypt_detached(unsigned char *c,
- unsigned char *mac,
- unsigned long long *maclen_p,
- const unsigned char *m,
- unsigned long long mlen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const unsigned char *k)
-{
- errno = ENOSYS;
- return -1;
-}
+ expand256(k, st->rkeys);
+ memset(h, 0, sizeof h);
+ encrypt(st, h, h);
-int
-crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p,
- const unsigned char *m, unsigned long long mlen,
- const unsigned char *ad, unsigned long long adlen,
- const unsigned char *nsec, const unsigned char *npub,
- const unsigned char *k)
-{
- errno = ENOSYS;
- return -1;
-}
+ precomp_for_block_count(st->hx, h, PC_COUNT);
-int
-crypto_aead_aes256gcm_decrypt_detached(unsigned char *m,
- unsigned char *nsec,
- const unsigned char *c,
- unsigned long long clen,
- const unsigned char *mac,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *npub,
- const unsigned char *k)
-{
- errno = ENOSYS;
- return -1;
+ return 0;
}
int
-crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p,
- unsigned char *nsec, const unsigned char *c,
- unsigned long long clen, const unsigned char *ad,
- unsigned long long adlen, const unsigned char *npub,
- const unsigned char *k)
-{
- errno = ENOSYS;
- return -1;
+crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac,
+ unsigned long long *maclen_p, const unsigned char *m,
+ unsigned long long m_len_, const unsigned char *ad,
+ unsigned long long ad_len_,
+ const unsigned char *nsec, const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
+{
+ const State *st = (const State *) (const void *) st_;
+ GHash sth;
+ CRYPTO_ALIGN(16) unsigned char j[16];
+ size_t gh_required_blocks;
+ const size_t ad_len = (size_t) ad_len_;
+ const size_t m_len = (size_t) m_len_;
+
+ (void) nsec;
+ if (maclen_p != NULL) {
+ *maclen_p = 0;
+ }
+ if (ad_len_ > SODIUM_SIZE_MAX || m_len_ > SODIUM_SIZE_MAX) {
+ sodium_misuse();
+ }
+ gh_required_blocks = required_blocks(ad_len, m_len);
+ if (gh_required_blocks == 0) {
+ memset(mac, 0xd0, ABYTES);
+ memset(c, 0, m_len);
+ return -1;
+ }
+
+ gh_init(&sth);
+
+ memcpy(j, npub, NPUBBYTES);
+ STORE32_BE(j + NPUBBYTES, 2);
+
+ aes_gcm_encrypt_generic(st, &sth, mac, c, m, m_len, ad, ad_len, j);
+
+ if (maclen_p != NULL) {
+ *maclen_p = ABYTES;
+ }
+ return 0;
}
int
-crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *ctx_,
- const unsigned char *k)
+crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m,
+ unsigned long long m_len, const unsigned char *ad,
+ unsigned long long ad_len, const unsigned char *nsec,
+ const unsigned char *npub, const unsigned char *k)
{
- errno = ENOSYS;
- return -1;
+ const int ret = crypto_aead_aes256gcm_encrypt_detached(c, c + m_len, NULL, m, m_len, ad, ad_len,
+ nsec, npub, k);
+ if (clen_p != NULL) {
+ if (ret == 0) {
+ *clen_p = m_len + crypto_aead_aes256gcm_ABYTES;
+ } else {
+ *clen_p = 0;
+ }
+ }
+ return ret;
}
int
-crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c,
- unsigned char *mac, unsigned long long *maclen_p,
- const unsigned char *m, unsigned long long mlen,
- const unsigned char *ad, unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const crypto_aead_aes256gcm_state *ctx_)
+crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac,
+ unsigned long long *maclen_p, const unsigned char *m,
+ unsigned long long m_len, const unsigned char *ad,
+ unsigned long long ad_len, const unsigned char *nsec,
+ const unsigned char *npub, const unsigned char *k)
{
- errno = ENOSYS;
- return -1;
+ CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st;
+ int ret;
+
+ PREFETCH_WRITE(c);
+ PREFETCH_READ(m);
+ PREFETCH_READ(ad);
+
+ crypto_aead_aes256gcm_beforenm(&st, k);
+ ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, mac, maclen_p, m, m_len, ad, ad_len,
+ nsec, npub, &st);
+ sodium_memzero(&st, sizeof st);
+
+ return ret;
}
int
@@ -998,82 +820,196 @@ crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen
const unsigned char *m, unsigned long long mlen,
const unsigned char *ad, unsigned long long adlen,
const unsigned char *nsec, const unsigned char *npub,
- const crypto_aead_aes256gcm_state *ctx_)
+ const crypto_aead_aes256gcm_state *st_)
{
- errno = ENOSYS;
- return -1;
+ int ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, c + mlen, NULL, m, mlen, ad, adlen,
+ nsec, npub, st_);
+ if (clen_p != NULL) {
+ *clen_p = mlen + crypto_aead_aes256gcm_ABYTES;
+ }
+ return ret;
+}
+
+static int
+crypto_aead_aes256gcm_verify_mac(unsigned char *nsec, const unsigned char *c,
+ unsigned long long c_len_, const unsigned char *mac,
+ const unsigned char *ad, unsigned long long ad_len_,
+ const unsigned char *npub, const crypto_aead_aes256gcm_state *st_)
+{
+ const State *st = (const State *) (const void *) st_;
+ GHash sth;
+ BlockVec final_block;
+ CRYPTO_ALIGN(16) unsigned char j[16];
+ CRYPTO_ALIGN(16) unsigned char computed_mac[16];
+ CRYPTO_ALIGN(16) unsigned char last_block[16];
+ size_t gh_required_blocks;
+ size_t left;
+ const size_t ad_len = (size_t) ad_len_;
+ const size_t c_len = (size_t) c_len_;
+ int ret;
+
+ (void) nsec;
+ if (ad_len_ > SODIUM_SIZE_MAX || c_len_ > SODIUM_SIZE_MAX) {
+ sodium_misuse();
+ }
+ gh_required_blocks = required_blocks(ad_len, c_len);
+ if (gh_required_blocks == 0) {
+ return -1;
+ }
+
+ gh_init(&sth);
+
+ memcpy(j, npub, NPUBBYTES);
+ STORE32_BE(j + NPUBBYTES, 2);
+
+ gh_ad_blocks(st, &sth, ad, ad_len & ~15);
+ left = ad_len & 15;
+ if (left != 0) {
+ unsigned char pad[16];
+
+ memset(pad, 0, sizeof pad);
+ memcpy(pad, ad + ad_len - left, left);
+ gh_ad_blocks(st, &sth, pad, sizeof pad);
+ }
+
+ gh_ad_blocks(st, &sth, c, c_len & ~15);
+ left = c_len & 15;
+ if (left != 0) {
+ unsigned char pad[16];
+
+ memset(pad, 0, sizeof pad);
+ memcpy(pad, c + c_len - left, left);
+ gh_ad_blocks(st, &sth, pad, sizeof pad);
+ }
+ final_block = REV128(SET64x2(ad_len * 8, c_len * 8));
+ STORE32_BE(j + NPUBBYTES, 1);
+ encrypt(st, computed_mac, j);
+ STORE128(last_block, final_block);
+ gh_ad_blocks(st, &sth, last_block, 16);
+ STORE128(computed_mac, XOR128(LOAD128(computed_mac), REV128(sth.acc)));
+
+ ret = crypto_verify_16(mac, computed_mac);
+ sodium_memzero(computed_mac, sizeof computed_mac);
+
+ return ret;
}
int
crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec,
- const unsigned char *c, unsigned long long clen,
- const unsigned char *mac,
- const unsigned char *ad, unsigned long long adlen,
- const unsigned char *npub,
- const crypto_aead_aes256gcm_state *ctx_)
-{
- errno = ENOSYS;
- return -1;
+ const unsigned char *c, unsigned long long c_len_,
+ const unsigned char *mac, const unsigned char *ad,
+ unsigned long long ad_len_,
+ const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
+{
+ const State *st = (const State *) (const void *) st_;
+ GHash sth;
+ CRYPTO_ALIGN(16) unsigned char j[16];
+ unsigned char computed_mac[16];
+ size_t gh_required_blocks;
+ const size_t ad_len = (size_t) ad_len_;
+ const size_t c_len = (size_t) c_len_;
+ const size_t m_len = c_len;
+
+ (void) nsec;
+ if (ad_len_ > SODIUM_SIZE_MAX || c_len_ > SODIUM_SIZE_MAX) {
+ sodium_misuse();
+ }
+ if (m == NULL) {
+ return crypto_aead_aes256gcm_verify_mac(nsec, c, c_len, mac, ad, ad_len, npub, st_);
+ }
+ gh_required_blocks = required_blocks(ad_len, m_len);
+ if (gh_required_blocks == 0) {
+ return -1;
+ }
+
+ gh_init(&sth);
+
+ memcpy(j, npub, NPUBBYTES);
+ STORE32_BE(j + NPUBBYTES, 2);
+
+ aes_gcm_decrypt_generic(st, &sth, computed_mac, m, c, m_len, ad, ad_len, j);
+
+ if (crypto_verify_16(mac, computed_mac) != 0) {
+ sodium_memzero(computed_mac, sizeof computed_mac);
+ memset(m, 0xd0, m_len);
+ return -1;
+ }
+ return 0;
}
int
crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p,
- unsigned char *nsec,
- const unsigned char *c, unsigned long long clen,
- const unsigned char *ad, unsigned long long adlen,
- const unsigned char *npub,
- const crypto_aead_aes256gcm_state *ctx_)
+ unsigned char *nsec, const unsigned char *c,
+ unsigned long long clen, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
{
- errno = ENOSYS;
- return -1;
+ unsigned long long mlen = 0ULL;
+ int ret = -1;
+
+ if (clen >= ABYTES) {
+ ret = crypto_aead_aes256gcm_decrypt_detached_afternm(
+ m, nsec, c, clen - ABYTES, c + clen - ABYTES, ad, adlen, npub, st_);
+ }
+ if (mlen_p != NULL) {
+ if (ret == 0) {
+ mlen = clen - ABYTES;
+ }
+ *mlen_p = mlen;
+ }
+ return ret;
}
int
-crypto_aead_aes256gcm_is_available(void)
+crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec,
+ const unsigned char *c, unsigned long long clen,
+ const unsigned char *mac, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *npub,
+ const unsigned char *k)
{
- return 0;
-}
+ CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st;
-#endif
+ PREFETCH_WRITE(m);
+ PREFETCH_READ(c);
+ PREFETCH_READ(ad);
-size_t
-crypto_aead_aes256gcm_keybytes(void)
-{
- return crypto_aead_aes256gcm_KEYBYTES;
-}
+ crypto_aead_aes256gcm_beforenm(&st, k);
-size_t
-crypto_aead_aes256gcm_nsecbytes(void)
-{
- return crypto_aead_aes256gcm_NSECBYTES;
+ return crypto_aead_aes256gcm_decrypt_detached_afternm(
+ m, nsec, c, clen, mac, ad, adlen, npub, (const crypto_aead_aes256gcm_state *) &st);
}
-size_t
-crypto_aead_aes256gcm_npubbytes(void)
+int
+crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec,
+ const unsigned char *c, unsigned long long clen,
+ const unsigned char *ad, unsigned long long adlen,
+ const unsigned char *npub, const unsigned char *k)
{
- return crypto_aead_aes256gcm_NPUBBYTES;
-}
+ CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st;
+ int ret;
-size_t
-crypto_aead_aes256gcm_abytes(void)
-{
- return crypto_aead_aes256gcm_ABYTES;
-}
+ PREFETCH_WRITE(m);
+ PREFETCH_READ(c);
+ PREFETCH_READ(ad);
-size_t
-crypto_aead_aes256gcm_statebytes(void)
-{
- return (sizeof(crypto_aead_aes256gcm_state) + (size_t) 15U) & ~(size_t) 15U;
-}
+ crypto_aead_aes256gcm_beforenm(&st, k);
-size_t
-crypto_aead_aes256gcm_messagebytes_max(void)
-{
- return crypto_aead_aes256gcm_MESSAGEBYTES_MAX;
+ ret = crypto_aead_aes256gcm_decrypt_afternm(m, mlen_p, nsec, c, clen, ad, adlen, npub,
+ (const crypto_aead_aes256gcm_state *) &st);
+ sodium_memzero(&st, sizeof st);
+
+ return ret;
}
-void
-crypto_aead_aes256gcm_keygen(unsigned char k[crypto_aead_aes256gcm_KEYBYTES])
+int
+crypto_aead_aes256gcm_is_available(void)
{
- randombytes_buf(k, crypto_aead_aes256gcm_KEYBYTES);
+ return sodium_runtime_has_pclmul() & sodium_runtime_has_aesni() & sodium_runtime_has_avx();
}
+
+#ifdef __clang__
+# pragma clang attribute pop
+#endif
+
+#endif
diff --git a/libs/libsodium/src/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c b/libs/libsodium/src/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c
new file mode 100644
index 0000000000..8f9bba6d74
--- /dev/null
+++ b/libs/libsodium/src/crypto_aead/aes256gcm/armcrypto/aead_aes256gcm_armcrypto.c
@@ -0,0 +1,1033 @@
+#include <errno.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "core.h"
+#include "crypto_aead_aes256gcm.h"
+#include "crypto_verify_16.h"
+#include "export.h"
+#include "private/common.h"
+#include "randombytes.h"
+#include "runtime.h"
+#include "utils.h"
+
+#if defined(HAVE_ARMCRYPTO) && defined(__clang__) && defined(NATIVE_LITTLE_ENDIAN)
+
+#if !defined(MSC_VER) || _MSC_VER < 1800
+#define __vectorcall
+#endif
+
+#ifndef __ARM_FEATURE_CRYPTO
+#define __ARM_FEATURE_CRYPTO 1
+#endif
+#ifndef __ARM_FEATURE_AES
+#define __ARM_FEATURE_AES 1
+#endif
+
+#include <arm_neon.h>
+
+#ifdef __clang__
+#pragma clang attribute push(__attribute__((target("neon,crypto,aes"))), apply_to = function)
+#elif defined(__GNUC__)
+#pragma GCC target("+simd+crypto")
+#endif
+
+#define ABYTES crypto_aead_aes256gcm_ABYTES
+#define NPUBBYTES crypto_aead_aes256gcm_NPUBBYTES
+#define KEYBYTES crypto_aead_aes256gcm_KEYBYTES
+
+#define PARALLEL_BLOCKS 6
+#undef USE_KARATSUBA_MULTIPLICATION
+
+typedef uint64x2_t BlockVec;
+
+#define LOAD128(a) vld1q_u64((const uint64_t *) (const void *) (a))
+#define STORE128(a, b) vst1q_u64((uint64_t *) (void *) (a), (b))
+#define AES_XENCRYPT(block_vec, rkey) \
+ vreinterpretq_u64_u8( \
+ vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), rkey)))
+#define AES_XENCRYPTLAST(block_vec, rkey) \
+ vreinterpretq_u64_u8(vaeseq_u8(vreinterpretq_u8_u64(block_vec), rkey))
+#define XOR128(a, b) veorq_u64((a), (b))
+#define AND128(a, b) vandq_u64((a), (b))
+#define OR128(a, b) vorrq_u64((a), (b))
+#define SET64x2(a, b) vsetq_lane_u64((uint64_t) (a), vmovq_n_u64((uint64_t) (b)), 1)
+#define ZERO128 vmovq_n_u8(0)
+#define ONE128 SET64x2(0, 1)
+#define ADD64x2(a, b) vaddq_u64((a), (b))
+#define SUB64x2(a, b) vsubq_u64((a), (b))
+#define SHL64x2(a, b) vshlq_n_u64((a), (b))
+#define SHR64x2(a, b) vshrq_n_u64((a), (b))
+#define REV128(x) \
+ vreinterpretq_u64_u8(__builtin_shufflevector(vreinterpretq_u8_u64(x), vreinterpretq_u8_u64(x), \
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, \
+ 1, 0))
+#define SHUFFLE32x4(x, a, b, c, d) \
+ vreinterpretq_u64_u32(__builtin_shufflevector(vreinterpretq_u32_u64(x), \
+ vreinterpretq_u32_u64(x), (a), (b), (c), (d)))
+#define BYTESHL128(a, b) vreinterpretq_u64_u8(vextq_s8(vdupq_n_s8(0), (int8x16_t) a, 16 - (b)))
+#define BYTESHR128(a, b) vreinterpretq_u64_u8(vextq_s8((int8x16_t) a, vdupq_n_s8(0), (b)))
+
+#define SHL128(a, b) OR128(SHL64x2((a), (b)), SHR64x2(BYTESHL128((a), 8), 64 - (b)))
+#define CLMULLO128(a, b) \
+ vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_low_u64(a), (poly64_t) vget_low_u64(b)))
+#define CLMULHI128(a, b) \
+ vreinterpretq_u64_p128(vmull_high_p64(vreinterpretq_p64_s64(a), vreinterpretq_p64_s64(b)))
+#define CLMULLOHI128(a, b) \
+ vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_low_u64(a), (poly64_t) vget_high_u64(b)))
+#define CLMULHILO128(a, b) \
+ vreinterpretq_u64_p128(vmull_p64((poly64_t) vget_high_u64(a), (poly64_t) vget_low_u64(b)))
+#define PREFETCH_READ(x) __builtin_prefetch((x), 0, 2)
+#define PREFETCH_WRITE(x) __builtin_prefetch((x), 1, 2);
+
+static inline BlockVec
+AES_KEYGEN(BlockVec block_vec, const int rc)
+{
+ uint8x16_t a = vaeseq_u8(vreinterpretq_u8_u64(block_vec), vmovq_n_u8(0));
+ const uint8x16_t b =
+ __builtin_shufflevector(a, a, 4, 1, 14, 11, 1, 14, 11, 4, 12, 9, 6, 3, 9, 6, 3, 12);
+ const uint64x2_t c = SET64x2((uint64_t) rc << 32, (uint64_t) rc << 32);
+ return XOR128(b, c);
+}
+
+#define ROUNDS 14
+
+#define PC_COUNT (2 * PARALLEL_BLOCKS)
+
+typedef struct I256 {
+ BlockVec hi;
+ BlockVec lo;
+ BlockVec mid;
+} I256;
+
+typedef BlockVec Precomp;
+
+typedef struct GHash {
+ BlockVec acc;
+} GHash;
+
+typedef struct State {
+ BlockVec rkeys[ROUNDS + 1];
+ Precomp hx[PC_COUNT];
+} State;
+
+static void __vectorcall expand256(const unsigned char key[KEYBYTES], BlockVec rkeys[1 + ROUNDS])
+{
+ BlockVec t1, t2, s;
+ size_t i = 0;
+
+#define EXPAND_KEY_1(RC) \
+ rkeys[i++] = t2; \
+ s = AES_KEYGEN(t2, RC); \
+ t1 = XOR128(t1, BYTESHL128(t1, 4)); \
+ t1 = XOR128(t1, BYTESHL128(t1, 8)); \
+ t1 = XOR128(t1, SHUFFLE32x4(s, 3, 3, 3, 3));
+
+#define EXPAND_KEY_2(RC) \
+ rkeys[i++] = t1; \
+ s = AES_KEYGEN(t1, RC); \
+ t2 = XOR128(t2, BYTESHL128(t2, 4)); \
+ t2 = XOR128(t2, BYTESHL128(t2, 8)); \
+ t2 = XOR128(t2, SHUFFLE32x4(s, 2, 2, 2, 2));
+
+ t1 = LOAD128(&key[0]);
+ t2 = LOAD128(&key[16]);
+
+ rkeys[i++] = t1;
+ EXPAND_KEY_1(0x01);
+ EXPAND_KEY_2(0x01);
+ EXPAND_KEY_1(0x02);
+ EXPAND_KEY_2(0x02);
+ EXPAND_KEY_1(0x04);
+ EXPAND_KEY_2(0x04);
+ EXPAND_KEY_1(0x08);
+ EXPAND_KEY_2(0x08);
+ EXPAND_KEY_1(0x10);
+ EXPAND_KEY_2(0x10);
+ EXPAND_KEY_1(0x20);
+ EXPAND_KEY_2(0x20);
+ EXPAND_KEY_1(0x40);
+ rkeys[i++] = t1;
+}
+
+/* Encrypt a single AES block */
+
+static inline void
+encrypt(const State *st, unsigned char dst[16], const unsigned char src[16])
+{
+ BlockVec t;
+
+ size_t i;
+
+ t = AES_XENCRYPT(LOAD128(src), st->rkeys[0]);
+ for (i = 1; i < ROUNDS - 1; i++) {
+ t = AES_XENCRYPT(t, st->rkeys[i]);
+ }
+ t = AES_XENCRYPTLAST(t, st->rkeys[i]);
+ t = XOR128(t, st->rkeys[ROUNDS]);
+ STORE128(dst, t);
+}
+
+/* Encrypt and add a single AES block */
+
+static inline void __vectorcall encrypt_xor_block(const State *st, unsigned char dst[16],
+ const unsigned char src[16],
+ const BlockVec counter)
+{
+ BlockVec ts;
+ size_t i;
+
+ ts = AES_XENCRYPT(counter, st->rkeys[0]);
+ for (i = 1; i < ROUNDS - 1; i++) {
+ ts = AES_XENCRYPT(ts, st->rkeys[i]);
+ }
+ ts = AES_XENCRYPTLAST(ts, st->rkeys[i]);
+ ts = XOR128(ts, XOR128(st->rkeys[ROUNDS], LOAD128(src)));
+ STORE128(dst, ts);
+}
+
+/* Encrypt and add PARALLEL_BLOCKS AES blocks */
+
+static inline void __vectorcall encrypt_xor_wide(const State *st,
+ unsigned char dst[16 * PARALLEL_BLOCKS],
+ const unsigned char src[16 * PARALLEL_BLOCKS],
+ const BlockVec counters[PARALLEL_BLOCKS])
+{
+ BlockVec ts[PARALLEL_BLOCKS];
+ size_t i, j;
+
+ for (j = 0; j < PARALLEL_BLOCKS; j++) {
+ ts[j] = AES_XENCRYPT(counters[j], st->rkeys[0]);
+ }
+ for (i = 1; i < ROUNDS - 1; i++) {
+ for (j = 0; j < PARALLEL_BLOCKS; j++) {
+ ts[j] = AES_XENCRYPT(ts[j], st->rkeys[i]);
+ }
+ }
+ for (j = 0; j < PARALLEL_BLOCKS; j++) {
+ ts[j] = AES_XENCRYPTLAST(ts[j], st->rkeys[i]);
+ ts[j] = XOR128(ts[j], XOR128(st->rkeys[ROUNDS], LOAD128(&src[16 * j])));
+ }
+ for (j = 0; j < PARALLEL_BLOCKS; j++) {
+ STORE128(&dst[16 * j], ts[j]);
+ }
+}
+
+/* Square a field element */
+
+static inline I256 __vectorcall clsq128(const BlockVec x)
+{
+ const BlockVec r_lo = CLMULLO128(x, x);
+ const BlockVec r_hi = CLMULHI128(x, x);
+
+ return (I256) {
+ SODIUM_C99(.hi =) r_hi,
+ SODIUM_C99(.lo =) r_lo,
+ SODIUM_C99(.mid =) ZERO128,
+ };
+}
+
+/* Multiply two field elements -- Textbook multiplication is faster than Karatsuba on some recent
+ * CPUs */
+
+static inline I256 __vectorcall clmul128(const BlockVec x, const BlockVec y)
+{
+#ifdef USE_KARATSUBA_MULTIPLICATION
+ const BlockVec x_hi = BYTESHR128(x, 8);
+ const BlockVec y_hi = BYTESHR128(y, 8);
+ const BlockVec r_lo = CLMULLO128(x, y);
+ const BlockVec r_hi = CLMULHI128(x, y);
+ const BlockVec r_mid = XOR128(CLMULLO128(XOR128(x, x_hi), XOR128(y, y_hi)), XOR128(r_lo, r_hi));
+
+ return (I256) {
+ SODIUM_C99(.hi =) r_hi,
+ SODIUM_C99(.lo =) r_lo,
+ SODIUM_C99(.mid =) r_mid,
+ };
+#else
+ const BlockVec r_hi = CLMULHI128(x, y);
+ const BlockVec r_lo = CLMULLO128(x, y);
+ const BlockVec r_mid = XOR128(CLMULHILO128(x, y), CLMULLOHI128(x, y));
+
+ return (I256) {
+ SODIUM_C99(.hi =) r_hi,
+ SODIUM_C99(.lo =) r_lo,
+ SODIUM_C99(.mid =) r_mid,
+ };
+#endif
+}
+
+/* Merge the middle word and reduce a field element */
+
+static inline BlockVec __vectorcall gcm_reduce(const I256 x)
+{
+ const BlockVec hi = XOR128(x.hi, BYTESHR128(x.mid, 8));
+ const BlockVec lo = XOR128(x.lo, BYTESHL128(x.mid, 8));
+
+ const BlockVec p64 = SET64x2(0, 0xc200000000000000);
+ const BlockVec a = CLMULLO128(lo, p64);
+ const BlockVec b = XOR128(SHUFFLE32x4(lo, 2, 3, 0, 1), a);
+ const BlockVec c = CLMULLO128(b, p64);
+ const BlockVec d = XOR128(SHUFFLE32x4(b, 2, 3, 0, 1), c);
+
+ return XOR128(d, hi);
+}
+
+/* Precompute powers of H from `from` to `to` */
+
+static inline void __vectorcall precomp(Precomp hx[PC_COUNT], const size_t from, const size_t to)
+{
+ const Precomp h = hx[0];
+ size_t i;
+
+ for (i = from & ~1U; i < to; i += 2) {
+ hx[i] = gcm_reduce(clmul128(hx[i - 1], h));
+ hx[i + 1] = gcm_reduce(clsq128(hx[i / 2]));
+ }
+}
+
+/* Precompute powers of H given a key and a block count */
+
+static void __vectorcall precomp_for_block_count(Precomp hx[PC_COUNT],
+ const unsigned char gh_key[16],
+ const size_t block_count)
+{
+ const BlockVec h0 = REV128(LOAD128(gh_key));
+ BlockVec carry = SET64x2(0xc200000000000000, 1);
+ BlockVec mask = SUB64x2(ZERO128, SHR64x2(h0, 63));
+ BlockVec h0_shifted;
+ BlockVec h;
+
+ mask = SHUFFLE32x4(mask, 3, 3, 3, 3);
+ carry = AND128(carry, mask);
+ h0_shifted = SHL128(h0, 1);
+ h = XOR128(h0_shifted, carry);
+
+ hx[0] = h;
+ hx[1] = gcm_reduce(clsq128(hx[0]));
+
+ if (block_count >= PC_COUNT) {
+ precomp(hx, 2, PC_COUNT);
+ } else {
+ precomp(hx, 2, block_count);
+ }
+}
+
+/* Initialize a GHash */
+
+static inline void
+gh_init(GHash *sth)
+{
+ sth->acc = ZERO128;
+}
+
+static inline I256 __vectorcall gh_update0(const GHash *const sth, const unsigned char *const p,
+ const Precomp hn)
+{
+ const BlockVec m = REV128(LOAD128(p));
+ return clmul128(XOR128(sth->acc, m), hn);
+}
+
+static inline void __vectorcall gh_update(I256 *const u, const unsigned char *p, const Precomp hn)
+{
+ const BlockVec m = REV128(LOAD128(p));
+ const I256 t = clmul128(m, hn);
+ *u = (I256) { SODIUM_C99(.hi =) XOR128(u->hi, t.hi), SODIUM_C99(.lo =) XOR128(u->lo, t.lo),
+ SODIUM_C99(.mid =) XOR128(u->mid, t.mid) };
+}
+
+/* Absorb ad_len bytes of associated data. There has to be no partial block. */
+
+static inline void
+gh_ad_blocks(const State *st, GHash *sth, const unsigned char *ad, size_t ad_len)
+{
+ size_t i;
+
+ i = (size_t) 0U;
+ for (; i + PC_COUNT * 16 <= ad_len; i += PC_COUNT * 16) {
+ I256 u = gh_update0(sth, ad + i, st->hx[PC_COUNT - 1 - 0]);
+ size_t j;
+
+ for (j = 1; j < PC_COUNT; j += 1) {
+ gh_update(&u, ad + i + j * 16, st->hx[PC_COUNT - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+ for (; i + PC_COUNT * 16 / 2 <= ad_len; i += PC_COUNT * 16 / 2) {
+ I256 u = gh_update0(sth, ad + i, st->hx[PC_COUNT / 2 - 1 - 0]);
+ size_t j;
+
+ for (j = 1; j < PC_COUNT / 2; j += 1) {
+ gh_update(&u, ad + i + j * 16, st->hx[PC_COUNT / 2 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+ for (; i + 4 * 16 <= ad_len; i += 4 * 16) {
+ size_t j;
+ I256 u = gh_update0(sth, ad + i, st->hx[4 - 1 - 0]);
+
+ for (j = 1; j < 4; j += 1) {
+ gh_update(&u, ad + i + j * 16, st->hx[4 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+ for (; i + 2 * 16 <= ad_len; i += 2 * 16) {
+ size_t j;
+ I256 u = gh_update0(sth, ad + i, st->hx[2 - 1 - 0]);
+
+ for (j = 1; j < 2; j += 1) {
+ gh_update(&u, ad + i + j * 16, st->hx[2 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+ if (i < ad_len) {
+ I256 u = gh_update0(sth, ad + i, st->hx[0]);
+ sth->acc = gcm_reduce(u);
+ }
+}
+
+/* Increment counters */
+
+static inline BlockVec __vectorcall incr_counters(BlockVec rev_counters[], BlockVec counter,
+ const size_t n)
+{
+ size_t i;
+
+ const BlockVec one = ONE128;
+ for (i = 0; i < n; i++) {
+ rev_counters[i] = REV128(counter);
+ counter = ADD64x2(counter, one);
+ }
+ return counter;
+}
+
+/* Compute the number of required blocks to encrypt and authenticate `ad_len` of associated data,
+ * and `m_len` of encrypted bytes. Return `0` if limits would be exceeded.*/
+
+static inline size_t
+required_blocks(const size_t ad_len, const size_t m_len)
+{
+ const size_t ad_blocks = (ad_len + 15) / 16;
+ const size_t m_blocks = (m_len + 15) / 16;
+
+ if (ad_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 ||
+ m_len > SIZE_MAX - 2 * PARALLEL_BLOCKS * 16 || ad_len < ad_blocks || m_len < m_blocks ||
+ m_blocks >= (1ULL << 32) - 2) {
+ return 0;
+ }
+ return ad_blocks + m_blocks + 1;
+}
+
+/* Generic AES-GCM encryption. "Generic" as it can handle arbitrary input sizes,
+unlike a length-limited version that would precompute all the required powers of H */
+
+static void
+aes_gcm_encrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst,
+ const unsigned char *src, size_t src_len, const unsigned char *ad,
+ size_t ad_len, unsigned char counter_[16])
+{
+ CRYPTO_ALIGN(32) I256 u;
+ CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16];
+ const BlockVec one = ONE128;
+ BlockVec final_block;
+ BlockVec rev_counters[PARALLEL_BLOCKS];
+ BlockVec counter;
+ size_t i;
+ size_t j;
+ size_t left;
+ size_t pi;
+
+ COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0);
+
+ /* Associated data */
+
+ if (ad != NULL && ad_len != 0) {
+ gh_ad_blocks(st, sth, ad, ad_len & ~15);
+ left = ad_len & 15;
+ if (left != 0) {
+ unsigned char pad[16];
+
+ memset(pad, 0, sizeof pad);
+ memcpy(pad, ad + ad_len - left, left);
+ gh_ad_blocks(st, sth, pad, sizeof pad);
+ }
+ }
+
+ /* Encrypted data */
+
+ counter = REV128(LOAD128(counter_));
+ i = 0;
+
+ /* 2*PARALLEL_BLOCKS aggregation */
+
+ if (src_len - i >= 2 * PARALLEL_BLOCKS * 16) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+ i += PARALLEL_BLOCKS * 16;
+
+ for (; i + 2 * PARALLEL_BLOCKS * 16 <= src_len; i += 2 * PARALLEL_BLOCKS * 16) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+
+ pi = i - PARALLEL_BLOCKS * 16;
+ u = gh_update0(sth, dst + pi, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, dst + pi + j * 16, st->hx[2 * PARALLEL_BLOCKS - 1 - j]);
+ }
+
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+ encrypt_xor_wide(st, dst + i + PARALLEL_BLOCKS * 16, src + i + PARALLEL_BLOCKS * 16,
+ rev_counters);
+
+ pi = i;
+ for (j = 0; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+
+ pi = i - PARALLEL_BLOCKS * 16;
+ u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+
+ /* PARALLEL_BLOCKS aggregation */
+
+ if (src_len - i >= PARALLEL_BLOCKS * 16) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+ i += PARALLEL_BLOCKS * 16;
+
+ for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+
+ pi = i - PARALLEL_BLOCKS * 16;
+ u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+
+ pi = i - PARALLEL_BLOCKS * 16;
+ u = gh_update0(sth, dst + pi, st->hx[PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, dst + pi + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+
+ /* 4-blocks aggregation */
+
+ for (; i + 4 * 16 <= src_len; i += 4 * 16) {
+ counter = incr_counters(rev_counters, counter, 4);
+ for (j = 0; j < 4; j++) {
+ encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]);
+ }
+
+ u = gh_update0(sth, dst + i, st->hx[4 - 1 - 0]);
+ for (j = 1; j < 4; j += 1) {
+ gh_update(&u, dst + i + j * 16, st->hx[4 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+
+ /* 2-blocks aggregation */
+
+ for (; i + 2 * 16 <= src_len; i += 2 * 16) {
+ counter = incr_counters(rev_counters, counter, 2);
+ for (j = 0; j < 2; j++) {
+ encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]);
+ }
+
+ u = gh_update0(sth, dst + i, st->hx[2 - 1 - 0]);
+ for (j = 1; j < 2; j += 1) {
+ gh_update(&u, dst + i + j * 16, st->hx[2 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+ }
+
+ /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the
+ full block authenticated along with the final block, hence < and not <= */
+
+ for (; i + 16 < src_len; i += 16) {
+ encrypt_xor_block(st, dst + i, src + i, REV128(counter));
+ u = gh_update0(sth, dst + i, st->hx[1 - 1 - 0]);
+ sth->acc = gcm_reduce(u);
+ counter = ADD64x2(counter, one);
+ }
+
+ /* Authenticate both the last block of the message and the final block */
+
+ final_block = REV128(SET64x2(ad_len * 8, src_len * 8));
+ STORE32_BE(counter_ + NPUBBYTES, 1);
+ encrypt(st, mac, counter_);
+ left = src_len - i;
+ if (left != 0) {
+ for (j = 0; j < left; j++) {
+ last_blocks[j] = src[i + j];
+ }
+ STORE128(last_blocks + 16, final_block);
+ encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter));
+ for (; j < 16; j++) {
+ last_blocks[j] = 0;
+ }
+ for (j = 0; j < left; j++) {
+ dst[i + j] = last_blocks[j];
+ }
+ gh_ad_blocks(st, sth, last_blocks, 32);
+ } else {
+ STORE128(last_blocks, final_block);
+ gh_ad_blocks(st, sth, last_blocks, 16);
+ }
+ STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc)));
+}
+
+/* Generic AES-GCM decryption. "Generic" as it can handle arbitrary input sizes,
+unlike a length-limited version that would precompute all the required powers of H */
+
+static void
+aes_gcm_decrypt_generic(const State *st, GHash *sth, unsigned char mac[ABYTES], unsigned char *dst,
+ const unsigned char *src, size_t src_len, const unsigned char *ad,
+ size_t ad_len, unsigned char counter_[16])
+{
+ CRYPTO_ALIGN(32) I256 u;
+ CRYPTO_ALIGN(16) unsigned char last_blocks[2 * 16];
+ const BlockVec one = ONE128;
+ BlockVec final_block;
+ BlockVec rev_counters[PARALLEL_BLOCKS];
+ BlockVec counter;
+ size_t i;
+ size_t j;
+ size_t left;
+
+ COMPILER_ASSERT(PC_COUNT % PARALLEL_BLOCKS == 0);
+
+ /* Associated data */
+
+ if (ad != NULL && ad_len != 0) {
+ gh_ad_blocks(st, sth, ad, ad_len & ~15);
+ left = ad_len & 15;
+ if (left != 0) {
+ unsigned char pad[16];
+
+ memset(pad, 0, sizeof pad);
+ memcpy(pad, ad + ad_len - left, left);
+ gh_ad_blocks(st, sth, pad, sizeof pad);
+ }
+ }
+
+ /* Encrypted data */
+
+ counter = REV128(LOAD128(counter_));
+ i = 0;
+
+ /* 2*PARALLEL_BLOCKS aggregation */
+
+ while (i + 2 * PARALLEL_BLOCKS * 16 <= src_len) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+
+ u = gh_update0(sth, src + i, st->hx[2 * PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, src + i + j * 16, st->hx[2 * PARALLEL_BLOCKS - 1 - j]);
+ }
+
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+
+ i += PARALLEL_BLOCKS * 16;
+ for (j = 0; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, src + i + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+ i += PARALLEL_BLOCKS * 16;
+ }
+
+ /* PARALLEL_BLOCKS aggregation */
+
+ for (; i + PARALLEL_BLOCKS * 16 <= src_len; i += PARALLEL_BLOCKS * 16) {
+ counter = incr_counters(rev_counters, counter, PARALLEL_BLOCKS);
+
+ u = gh_update0(sth, src + i, st->hx[PARALLEL_BLOCKS - 1 - 0]);
+ for (j = 1; j < PARALLEL_BLOCKS; j += 1) {
+ gh_update(&u, src + i + j * 16, st->hx[PARALLEL_BLOCKS - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+
+ encrypt_xor_wide(st, dst + i, src + i, rev_counters);
+ }
+
+ /* 4-blocks aggregation */
+
+ for (; i + 4 * 16 <= src_len; i += 4 * 16) {
+ counter = incr_counters(rev_counters, counter, 4);
+
+ u = gh_update0(sth, src + i, st->hx[4 - 1 - 0]);
+ for (j = 1; j < 4; j += 1) {
+ gh_update(&u, src + i + j * 16, st->hx[4 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+
+ for (j = 0; j < 4; j++) {
+ encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]);
+ }
+ }
+
+ /* 2-blocks aggregation */
+
+ for (; i + 2 * 16 <= src_len; i += 2 * 16) {
+ counter = incr_counters(rev_counters, counter, 2);
+
+ u = gh_update0(sth, src + i, st->hx[2 - 1 - 0]);
+ for (j = 1; j < 2; j += 1) {
+ gh_update(&u, src + i + j * 16, st->hx[2 - 1 - j]);
+ }
+ sth->acc = gcm_reduce(u);
+
+ for (j = 0; j < 2; j++) {
+ encrypt_xor_block(st, dst + i + j * 16, src + i + j * 16, rev_counters[j]);
+ }
+ }
+
+ /* Remaining *partial* blocks; if we have 16 bytes left, we want to keep the
+ full block authenticated along with the final block, hence < and not <= */
+
+ for (; i + 16 < src_len; i += 16) {
+ u = gh_update0(sth, src + i, st->hx[1 - 1 - 0]);
+ sth->acc = gcm_reduce(u);
+ encrypt_xor_block(st, dst + i, src + i, REV128(counter));
+ counter = ADD64x2(counter, one);
+ }
+
+ /* Authenticate both the last block of the message and the final block */
+
+ final_block = REV128(SET64x2(ad_len * 8, src_len * 8));
+ STORE32_BE(counter_ + NPUBBYTES, 1);
+ encrypt(st, mac, counter_);
+ left = src_len - i;
+ if (left != 0) {
+ for (j = 0; j < left; j++) {
+ last_blocks[j] = src[i + j];
+ }
+ for (; j < 16; j++) {
+ last_blocks[j] = 0;
+ }
+ STORE128(last_blocks + 16, final_block);
+ gh_ad_blocks(st, sth, last_blocks, 32);
+ encrypt_xor_block(st, last_blocks, last_blocks, REV128(counter));
+ for (j = 0; j < left; j++) {
+ dst[i + j] = last_blocks[j];
+ }
+ } else {
+ STORE128(last_blocks, final_block);
+ gh_ad_blocks(st, sth, last_blocks, 16);
+ }
+ STORE128(mac, XOR128(LOAD128(mac), REV128(sth->acc)));
+}
+
+int
+crypto_aead_aes256gcm_beforenm(crypto_aead_aes256gcm_state *st_, const unsigned char *k)
+{
+ State *st = (State *) (void *) st_;
+ CRYPTO_ALIGN(16) unsigned char h[16];
+
+ COMPILER_ASSERT(sizeof *st_ >= sizeof *st);
+
+ expand256(k, st->rkeys);
+ memset(h, 0, sizeof h);
+ encrypt(st, h, h);
+
+ precomp_for_block_count(st->hx, h, PC_COUNT);
+
+ return 0;
+}
+
+int
+crypto_aead_aes256gcm_encrypt_detached_afternm(unsigned char *c, unsigned char *mac,
+ unsigned long long *maclen_p, const unsigned char *m,
+ unsigned long long m_len_, const unsigned char *ad,
+ unsigned long long ad_len_,
+ const unsigned char *nsec, const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
+{
+ const State *st = (const State *) (const void *) st_;
+ GHash sth;
+ CRYPTO_ALIGN(16) unsigned char j[16];
+ size_t gh_required_blocks;
+ const size_t ad_len = (size_t) ad_len_;
+ const size_t m_len = (size_t) m_len_;
+
+ (void) nsec;
+ if (maclen_p != NULL) {
+ *maclen_p = 0;
+ }
+ if (ad_len_ > SODIUM_SIZE_MAX || m_len_ > SODIUM_SIZE_MAX) {
+ sodium_misuse();
+ }
+ gh_required_blocks = required_blocks(ad_len, m_len);
+ if (gh_required_blocks == 0) {
+ memset(mac, 0xd0, ABYTES);
+ memset(c, 0, m_len);
+ return -1;
+ }
+
+ gh_init(&sth);
+
+ memcpy(j, npub, NPUBBYTES);
+ STORE32_BE(j + NPUBBYTES, 2);
+
+ aes_gcm_encrypt_generic(st, &sth, mac, c, m, m_len, ad, ad_len, j);
+
+ if (maclen_p != NULL) {
+ *maclen_p = ABYTES;
+ }
+ return 0;
+}
+
+int
+crypto_aead_aes256gcm_encrypt(unsigned char *c, unsigned long long *clen_p, const unsigned char *m,
+ unsigned long long m_len, const unsigned char *ad,
+ unsigned long long ad_len, const unsigned char *nsec,
+ const unsigned char *npub, const unsigned char *k)
+{
+ const int ret = crypto_aead_aes256gcm_encrypt_detached(c, c + m_len, NULL, m, m_len, ad, ad_len,
+ nsec, npub, k);
+ if (clen_p != NULL) {
+ if (ret == 0) {
+ *clen_p = m_len + crypto_aead_aes256gcm_ABYTES;
+ } else {
+ *clen_p = 0;
+ }
+ }
+ return ret;
+}
+
+int
+crypto_aead_aes256gcm_encrypt_detached(unsigned char *c, unsigned char *mac,
+ unsigned long long *maclen_p, const unsigned char *m,
+ unsigned long long m_len, const unsigned char *ad,
+ unsigned long long ad_len, const unsigned char *nsec,
+ const unsigned char *npub, const unsigned char *k)
+{
+ CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st;
+ int ret;
+
+ PREFETCH_WRITE(c);
+ PREFETCH_READ(m);
+ PREFETCH_READ(ad);
+
+ crypto_aead_aes256gcm_beforenm(&st, k);
+ ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, mac, maclen_p, m, m_len, ad, ad_len,
+ nsec, npub, &st);
+ sodium_memzero(&st, sizeof st);
+
+ return ret;
+}
+
+int
+crypto_aead_aes256gcm_encrypt_afternm(unsigned char *c, unsigned long long *clen_p,
+ const unsigned char *m, unsigned long long mlen,
+ const unsigned char *ad, unsigned long long adlen,
+ const unsigned char *nsec, const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
+{
+ int ret = crypto_aead_aes256gcm_encrypt_detached_afternm(c, c + mlen, NULL, m, mlen, ad, adlen,
+ nsec, npub, st_);
+ if (clen_p != NULL) {
+ *clen_p = mlen + crypto_aead_aes256gcm_ABYTES;
+ }
+ return ret;
+}
+
+static int
+crypto_aead_aes256gcm_verify_mac(unsigned char *nsec, const unsigned char *c,
+ unsigned long long c_len_, const unsigned char *mac,
+ const unsigned char *ad, unsigned long long ad_len_,
+ const unsigned char *npub, const crypto_aead_aes256gcm_state *st_)
+{
+ const State *st = (const State *) (const void *) st_;
+ GHash sth;
+ BlockVec final_block;
+ CRYPTO_ALIGN(16) unsigned char j[16];
+ CRYPTO_ALIGN(16) unsigned char computed_mac[16];
+ CRYPTO_ALIGN(16) unsigned char last_block[16];
+ size_t gh_required_blocks;
+ size_t left;
+ const size_t ad_len = (size_t) ad_len_;
+ const size_t c_len = (size_t) c_len_;
+ int ret;
+
+ (void) nsec;
+ if (ad_len_ > SODIUM_SIZE_MAX || c_len_ > SODIUM_SIZE_MAX) {
+ sodium_misuse();
+ }
+ gh_required_blocks = required_blocks(ad_len, c_len);
+ if (gh_required_blocks == 0) {
+ return -1;
+ }
+
+ gh_init(&sth);
+
+ memcpy(j, npub, NPUBBYTES);
+ STORE32_BE(j + NPUBBYTES, 2);
+
+ gh_ad_blocks(st, &sth, ad, ad_len & ~15);
+ left = ad_len & 15;
+ if (left != 0) {
+ unsigned char pad[16];
+
+ memset(pad, 0, sizeof pad);
+ memcpy(pad, ad + ad_len - left, left);
+ gh_ad_blocks(st, &sth, pad, sizeof pad);
+ }
+
+ gh_ad_blocks(st, &sth, c, c_len & ~15);
+ left = c_len & 15;
+ if (left != 0) {
+ unsigned char pad[16];
+
+ memset(pad, 0, sizeof pad);
+ memcpy(pad, c + c_len - left, left);
+ gh_ad_blocks(st, &sth, pad, sizeof pad);
+ }
+ final_block = REV128(SET64x2(ad_len * 8, c_len * 8));
+ STORE32_BE(j + NPUBBYTES, 1);
+ encrypt(st, computed_mac, j);
+ STORE128(last_block, final_block);
+ gh_ad_blocks(st, &sth, last_block, 16);
+ STORE128(computed_mac, XOR128(LOAD128(computed_mac), REV128(sth.acc)));
+
+ ret = crypto_verify_16(mac, computed_mac);
+ sodium_memzero(computed_mac, sizeof computed_mac);
+
+ return ret;
+}
+
+int
+crypto_aead_aes256gcm_decrypt_detached_afternm(unsigned char *m, unsigned char *nsec,
+ const unsigned char *c, unsigned long long c_len_,
+ const unsigned char *mac, const unsigned char *ad,
+ unsigned long long ad_len_,
+ const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
+{
+ const State *st = (const State *) (const void *) st_;
+ GHash sth;
+ CRYPTO_ALIGN(16) unsigned char j[16];
+ unsigned char computed_mac[16];
+ size_t gh_required_blocks;
+ const size_t ad_len = (size_t) ad_len_;
+ const size_t c_len = (size_t) c_len_;
+ const size_t m_len = c_len;
+
+ (void) nsec;
+ if (ad_len_ > SODIUM_SIZE_MAX || c_len_ > SODIUM_SIZE_MAX) {
+ sodium_misuse();
+ }
+ if (m == NULL) {
+ return crypto_aead_aes256gcm_verify_mac(nsec, c, c_len, mac, ad, ad_len, npub, st_);
+ }
+ gh_required_blocks = required_blocks(ad_len, m_len);
+ if (gh_required_blocks == 0) {
+ return -1;
+ }
+
+ gh_init(&sth);
+
+ memcpy(j, npub, NPUBBYTES);
+ STORE32_BE(j + NPUBBYTES, 2);
+
+ aes_gcm_decrypt_generic(st, &sth, computed_mac, m, c, m_len, ad, ad_len, j);
+
+ if (crypto_verify_16(mac, computed_mac) != 0) {
+ sodium_memzero(computed_mac, sizeof computed_mac);
+ memset(m, 0xd0, m_len);
+ return -1;
+ }
+ return 0;
+}
+
+int
+crypto_aead_aes256gcm_decrypt_afternm(unsigned char *m, unsigned long long *mlen_p,
+ unsigned char *nsec, const unsigned char *c,
+ unsigned long long clen, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *npub,
+ const crypto_aead_aes256gcm_state *st_)
+{
+ unsigned long long mlen = 0ULL;
+ int ret = -1;
+
+ if (clen >= ABYTES) {
+ ret = crypto_aead_aes256gcm_decrypt_detached_afternm(
+ m, nsec, c, clen - ABYTES, c + clen - ABYTES, ad, adlen, npub, st_);
+ }
+ if (mlen_p != NULL) {
+ if (ret == 0) {
+ mlen = clen - ABYTES;
+ }
+ *mlen_p = mlen;
+ }
+ return ret;
+}
+
+int
+crypto_aead_aes256gcm_decrypt_detached(unsigned char *m, unsigned char *nsec,
+ const unsigned char *c, unsigned long long clen,
+ const unsigned char *mac, const unsigned char *ad,
+ unsigned long long adlen, const unsigned char *npub,
+ const unsigned char *k)
+{
+ CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st;
+
+ PREFETCH_WRITE(m);
+ PREFETCH_READ(c);
+ PREFETCH_READ(ad);
+
+ crypto_aead_aes256gcm_beforenm(&st, k);
+
+ return crypto_aead_aes256gcm_decrypt_detached_afternm(
+ m, nsec, c, clen, mac, ad, adlen, npub, (const crypto_aead_aes256gcm_state *) &st);
+}
+
+int
+crypto_aead_aes256gcm_decrypt(unsigned char *m, unsigned long long *mlen_p, unsigned char *nsec,
+ const unsigned char *c, unsigned long long clen,
+ const unsigned char *ad, unsigned long long adlen,
+ const unsigned char *npub, const unsigned char *k)
+{
+ CRYPTO_ALIGN(16) crypto_aead_aes256gcm_state st;
+ int ret;
+
+ PREFETCH_WRITE(m);
+ PREFETCH_READ(c);
+ PREFETCH_READ(ad);
+
+ crypto_aead_aes256gcm_beforenm(&st, k);
+
+ ret = crypto_aead_aes256gcm_decrypt_afternm(m, mlen_p, nsec, c, clen, ad, adlen, npub,
+ (const crypto_aead_aes256gcm_state *) &st);
+ sodium_memzero(&st, sizeof st);
+
+ return ret;
+}
+
+int
+crypto_aead_aes256gcm_is_available(void)
+{
+ return sodium_runtime_has_armcrypto();
+}
+
+#ifdef __clang__
+#pragma clang attribute pop
+#endif
+
+#endif
diff --git a/libs/libsodium/src/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c b/libs/libsodium/src/crypto_aead/chacha20poly1305/aead_chacha20poly1305.c
index ce51546200..c354087975 100644
--- a/libs/libsodium/src/crypto_aead/chacha20poly1305/sodium/aead_chacha20poly1305.c
+++ b/libs/libsodium/src/crypto_aead/chacha20poly1305/aead_chacha20poly1305.c
@@ -1,400 +1,400 @@
-
-#include <stdint.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <string.h>
-
-#include "core.h"
-#include "crypto_aead_chacha20poly1305.h"
-#include "crypto_onetimeauth_poly1305.h"
-#include "crypto_stream_chacha20.h"
-#include "crypto_verify_16.h"
-#include "randombytes.h"
-#include "utils.h"
-
-#include "private/chacha20_ietf_ext.h"
-#include "private/common.h"
-
-static const unsigned char _pad0[16] = { 0 };
-
-int
-crypto_aead_chacha20poly1305_encrypt_detached(unsigned char *c,
- unsigned char *mac,
- unsigned long long *maclen_p,
- const unsigned char *m,
- unsigned long long mlen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const unsigned char *k)
-{
- crypto_onetimeauth_poly1305_state state;
- unsigned char block0[64U];
- unsigned char slen[8U];
-
- (void) nsec;
- crypto_stream_chacha20(block0, sizeof block0, npub, k);
- crypto_onetimeauth_poly1305_init(&state, block0);
- sodium_memzero(block0, sizeof block0);
-
- crypto_onetimeauth_poly1305_update(&state, ad, adlen);
- STORE64_LE(slen, (uint64_t) adlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- crypto_stream_chacha20_xor_ic(c, m, mlen, npub, 1U, k);
-
- crypto_onetimeauth_poly1305_update(&state, c, mlen);
- STORE64_LE(slen, (uint64_t) mlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- crypto_onetimeauth_poly1305_final(&state, mac);
- sodium_memzero(&state, sizeof state);
-
- if (maclen_p != NULL) {
- *maclen_p = crypto_aead_chacha20poly1305_ABYTES;
- }
- return 0;
-}
-
-int
-crypto_aead_chacha20poly1305_encrypt(unsigned char *c,
- unsigned long long *clen_p,
- const unsigned char *m,
- unsigned long long mlen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const unsigned char *k)
-{
- unsigned long long clen = 0ULL;
- int ret;
-
- if (mlen > crypto_aead_chacha20poly1305_MESSAGEBYTES_MAX) {
- sodium_misuse();
- }
- ret = crypto_aead_chacha20poly1305_encrypt_detached(c,
- c + mlen, NULL,
- m, mlen,
- ad, adlen,
- nsec, npub, k);
- if (clen_p != NULL) {
- if (ret == 0) {
- clen = mlen + crypto_aead_chacha20poly1305_ABYTES;
- }
- *clen_p = clen;
- }
- return ret;
-}
-
-int
-crypto_aead_chacha20poly1305_ietf_encrypt_detached(unsigned char *c,
- unsigned char *mac,
- unsigned long long *maclen_p,
- const unsigned char *m,
- unsigned long long mlen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const unsigned char *k)
-{
- crypto_onetimeauth_poly1305_state state;
- unsigned char block0[64U];
- unsigned char slen[8U];
-
- (void) nsec;
- crypto_stream_chacha20_ietf(block0, sizeof block0, npub, k);
- crypto_onetimeauth_poly1305_init(&state, block0);
- sodium_memzero(block0, sizeof block0);
-
- crypto_onetimeauth_poly1305_update(&state, ad, adlen);
- crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
-
- crypto_stream_chacha20_ietf_xor_ic(c, m, mlen, npub, 1U, k);
-
- crypto_onetimeauth_poly1305_update(&state, c, mlen);
- crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
-
- STORE64_LE(slen, (uint64_t) adlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- STORE64_LE(slen, (uint64_t) mlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- crypto_onetimeauth_poly1305_final(&state, mac);
- sodium_memzero(&state, sizeof state);
-
- if (maclen_p != NULL) {
- *maclen_p = crypto_aead_chacha20poly1305_ietf_ABYTES;
- }
- return 0;
-}
-
-int
-crypto_aead_chacha20poly1305_ietf_encrypt(unsigned char *c,
- unsigned long long *clen_p,
- const unsigned char *m,
- unsigned long long mlen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const unsigned char *k)
-{
- unsigned long long clen = 0ULL;
- int ret;
-
- if (mlen > crypto_aead_chacha20poly1305_ietf_MESSAGEBYTES_MAX) {
- sodium_misuse();
- }
- ret = crypto_aead_chacha20poly1305_ietf_encrypt_detached(c,
- c + mlen, NULL,
- m, mlen,
- ad, adlen,
- nsec, npub, k);
- if (clen_p != NULL) {
- if (ret == 0) {
- clen = mlen + crypto_aead_chacha20poly1305_ietf_ABYTES;
- }
- *clen_p = clen;
- }
- return ret;
-}
-
-int
-crypto_aead_chacha20poly1305_decrypt_detached(unsigned char *m,
- unsigned char *nsec,
- const unsigned char *c,
- unsigned long long clen,
- const unsigned char *mac,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *npub,
- const unsigned char *k)
-{
- crypto_onetimeauth_poly1305_state state;
- unsigned char block0[64U];
- unsigned char slen[8U];
- unsigned char computed_mac[crypto_aead_chacha20poly1305_ABYTES];
- unsigned long long mlen;
- int ret;
-
- (void) nsec;
- crypto_stream_chacha20(block0, sizeof block0, npub, k);
- crypto_onetimeauth_poly1305_init(&state, block0);
- sodium_memzero(block0, sizeof block0);
-
- crypto_onetimeauth_poly1305_update(&state, ad, adlen);
- STORE64_LE(slen, (uint64_t) adlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- mlen = clen;
- crypto_onetimeauth_poly1305_update(&state, c, mlen);
- STORE64_LE(slen, (uint64_t) mlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- crypto_onetimeauth_poly1305_final(&state, computed_mac);
- sodium_memzero(&state, sizeof state);
-
- COMPILER_ASSERT(sizeof computed_mac == 16U);
- ret = crypto_verify_16(computed_mac, mac);
- sodium_memzero(computed_mac, sizeof computed_mac);
- if (m == NULL) {
- return ret;
- }
- if (ret != 0) {
- memset(m, 0, mlen);
- return -1;
- }
- crypto_stream_chacha20_xor_ic(m, c, mlen, npub, 1U, k);
-
- return 0;
-}
-
-int
-crypto_aead_chacha20poly1305_decrypt(unsigned char *m,
- unsigned long long *mlen_p,
- unsigned char *nsec,
- const unsigned char *c,
- unsigned long long clen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *npub,
- const unsigned char *k)
-{
- unsigned long long mlen = 0ULL;
- int ret = -1;
-
- if (clen >= crypto_aead_chacha20poly1305_ABYTES) {
- ret = crypto_aead_chacha20poly1305_decrypt_detached
- (m, nsec,
- c, clen - crypto_aead_chacha20poly1305_ABYTES,
- c + clen - crypto_aead_chacha20poly1305_ABYTES,
- ad, adlen, npub, k);
- }
- if (mlen_p != NULL) {
- if (ret == 0) {
- mlen = clen - crypto_aead_chacha20poly1305_ABYTES;
- }
- *mlen_p = mlen;
- }
- return ret;
-}
-
-int
-crypto_aead_chacha20poly1305_ietf_decrypt_detached(unsigned char *m,
- unsigned char *nsec,
- const unsigned char *c,
- unsigned long long clen,
- const unsigned char *mac,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *npub,
- const unsigned char *k)
-{
- crypto_onetimeauth_poly1305_state state;
- unsigned char block0[64U];
- unsigned char slen[8U];
- unsigned char computed_mac[crypto_aead_chacha20poly1305_ietf_ABYTES];
- unsigned long long mlen;
- int ret;
-
- (void) nsec;
- crypto_stream_chacha20_ietf(block0, sizeof block0, npub, k);
- crypto_onetimeauth_poly1305_init(&state, block0);
- sodium_memzero(block0, sizeof block0);
-
- crypto_onetimeauth_poly1305_update(&state, ad, adlen);
- crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
-
- mlen = clen;
- crypto_onetimeauth_poly1305_update(&state, c, mlen);
- crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
-
- STORE64_LE(slen, (uint64_t) adlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- STORE64_LE(slen, (uint64_t) mlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- crypto_onetimeauth_poly1305_final(&state, computed_mac);
- sodium_memzero(&state, sizeof state);
-
- COMPILER_ASSERT(sizeof computed_mac == 16U);
- ret = crypto_verify_16(computed_mac, mac);
- sodium_memzero(computed_mac, sizeof computed_mac);
- if (m == NULL) {
- return ret;
- }
- if (ret != 0) {
- memset(m, 0, mlen);
- return -1;
- }
- crypto_stream_chacha20_ietf_xor_ic(m, c, mlen, npub, 1U, k);
-
- return 0;
-}
-
-int
-crypto_aead_chacha20poly1305_ietf_decrypt(unsigned char *m,
- unsigned long long *mlen_p,
- unsigned char *nsec,
- const unsigned char *c,
- unsigned long long clen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *npub,
- const unsigned char *k)
-{
- unsigned long long mlen = 0ULL;
- int ret = -1;
-
- if (clen >= crypto_aead_chacha20poly1305_ietf_ABYTES) {
- ret = crypto_aead_chacha20poly1305_ietf_decrypt_detached
- (m, nsec,
- c, clen - crypto_aead_chacha20poly1305_ietf_ABYTES,
- c + clen - crypto_aead_chacha20poly1305_ietf_ABYTES,
- ad, adlen, npub, k);
- }
- if (mlen_p != NULL) {
- if (ret == 0) {
- mlen = clen - crypto_aead_chacha20poly1305_ietf_ABYTES;
- }
- *mlen_p = mlen;
- }
- return ret;
-}
-
-size_t
-crypto_aead_chacha20poly1305_ietf_keybytes(void)
-{
- return crypto_aead_chacha20poly1305_ietf_KEYBYTES;
-}
-
-size_t
-crypto_aead_chacha20poly1305_ietf_npubbytes(void)
-{
- return crypto_aead_chacha20poly1305_ietf_NPUBBYTES;
-}
-
-size_t
-crypto_aead_chacha20poly1305_ietf_nsecbytes(void)
-{
- return crypto_aead_chacha20poly1305_ietf_NSECBYTES;
-}
-
-size_t
-crypto_aead_chacha20poly1305_ietf_abytes(void)
-{
- return crypto_aead_chacha20poly1305_ietf_ABYTES;
-}
-
-size_t
-crypto_aead_chacha20poly1305_ietf_messagebytes_max(void)
-{
- return crypto_aead_chacha20poly1305_ietf_MESSAGEBYTES_MAX;
-}
-
-void
-crypto_aead_chacha20poly1305_ietf_keygen(unsigned char k[crypto_aead_chacha20poly1305_ietf_KEYBYTES])
-{
- randombytes_buf(k, crypto_aead_chacha20poly1305_ietf_KEYBYTES);
-}
-
-size_t
-crypto_aead_chacha20poly1305_keybytes(void)
-{
- return crypto_aead_chacha20poly1305_KEYBYTES;
-}
-
-size_t
-crypto_aead_chacha20poly1305_npubbytes(void)
-{
- return crypto_aead_chacha20poly1305_NPUBBYTES;
-}
-
-size_t
-crypto_aead_chacha20poly1305_nsecbytes(void)
-{
- return crypto_aead_chacha20poly1305_NSECBYTES;
-}
-
-size_t
-crypto_aead_chacha20poly1305_abytes(void)
-{
- return crypto_aead_chacha20poly1305_ABYTES;
-}
-
-size_t
-crypto_aead_chacha20poly1305_messagebytes_max(void)
-{
- return crypto_aead_chacha20poly1305_MESSAGEBYTES_MAX;
-}
-
-void
-crypto_aead_chacha20poly1305_keygen(unsigned char k[crypto_aead_chacha20poly1305_KEYBYTES])
-{
- randombytes_buf(k, crypto_aead_chacha20poly1305_KEYBYTES);
-}
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
+#include "core.h"
+#include "crypto_aead_chacha20poly1305.h"
+#include "crypto_onetimeauth_poly1305.h"
+#include "crypto_stream_chacha20.h"
+#include "crypto_verify_16.h"
+#include "randombytes.h"
+#include "utils.h"
+
+#include "private/chacha20_ietf_ext.h"
+#include "private/common.h"
+
+static const unsigned char _pad0[16] = { 0 };
+
+int
+crypto_aead_chacha20poly1305_encrypt_detached(unsigned char *c,
+ unsigned char *mac,
+ unsigned long long *maclen_p,
+ const unsigned char *m,
+ unsigned long long mlen,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *nsec,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ crypto_onetimeauth_poly1305_state state;
+ unsigned char block0[64U];
+ unsigned char slen[8U];
+
+ (void) nsec;
+ crypto_stream_chacha20(block0, sizeof block0, npub, k);
+ crypto_onetimeauth_poly1305_init(&state, block0);
+ sodium_memzero(block0, sizeof block0);
+
+ crypto_onetimeauth_poly1305_update(&state, ad, adlen);
+ STORE64_LE(slen, (uint64_t) adlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ crypto_stream_chacha20_xor_ic(c, m, mlen, npub, 1U, k);
+
+ crypto_onetimeauth_poly1305_update(&state, c, mlen);
+ STORE64_LE(slen, (uint64_t) mlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ crypto_onetimeauth_poly1305_final(&state, mac);
+ sodium_memzero(&state, sizeof state);
+
+ if (maclen_p != NULL) {
+ *maclen_p = crypto_aead_chacha20poly1305_ABYTES;
+ }
+ return 0;
+}
+
+int
+crypto_aead_chacha20poly1305_encrypt(unsigned char *c,
+ unsigned long long *clen_p,
+ const unsigned char *m,
+ unsigned long long mlen,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *nsec,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ unsigned long long clen = 0ULL;
+ int ret;
+
+ if (mlen > crypto_aead_chacha20poly1305_MESSAGEBYTES_MAX) {
+ sodium_misuse();
+ }
+ ret = crypto_aead_chacha20poly1305_encrypt_detached(c,
+ c + mlen, NULL,
+ m, mlen,
+ ad, adlen,
+ nsec, npub, k);
+ if (clen_p != NULL) {
+ if (ret == 0) {
+ clen = mlen + crypto_aead_chacha20poly1305_ABYTES;
+ }
+ *clen_p = clen;
+ }
+ return ret;
+}
+
+int
+crypto_aead_chacha20poly1305_ietf_encrypt_detached(unsigned char *c,
+ unsigned char *mac,
+ unsigned long long *maclen_p,
+ const unsigned char *m,
+ unsigned long long mlen,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *nsec,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ crypto_onetimeauth_poly1305_state state;
+ unsigned char block0[64U];
+ unsigned char slen[8U];
+
+ (void) nsec;
+ crypto_stream_chacha20_ietf(block0, sizeof block0, npub, k);
+ crypto_onetimeauth_poly1305_init(&state, block0);
+ sodium_memzero(block0, sizeof block0);
+
+ crypto_onetimeauth_poly1305_update(&state, ad, adlen);
+ crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
+
+ crypto_stream_chacha20_ietf_xor_ic(c, m, mlen, npub, 1U, k);
+
+ crypto_onetimeauth_poly1305_update(&state, c, mlen);
+ crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
+
+ STORE64_LE(slen, (uint64_t) adlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ STORE64_LE(slen, (uint64_t) mlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ crypto_onetimeauth_poly1305_final(&state, mac);
+ sodium_memzero(&state, sizeof state);
+
+ if (maclen_p != NULL) {
+ *maclen_p = crypto_aead_chacha20poly1305_ietf_ABYTES;
+ }
+ return 0;
+}
+
+int
+crypto_aead_chacha20poly1305_ietf_encrypt(unsigned char *c,
+ unsigned long long *clen_p,
+ const unsigned char *m,
+ unsigned long long mlen,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *nsec,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ unsigned long long clen = 0ULL;
+ int ret;
+
+ if (mlen > crypto_aead_chacha20poly1305_ietf_MESSAGEBYTES_MAX) {
+ sodium_misuse();
+ }
+ ret = crypto_aead_chacha20poly1305_ietf_encrypt_detached(c,
+ c + mlen, NULL,
+ m, mlen,
+ ad, adlen,
+ nsec, npub, k);
+ if (clen_p != NULL) {
+ if (ret == 0) {
+ clen = mlen + crypto_aead_chacha20poly1305_ietf_ABYTES;
+ }
+ *clen_p = clen;
+ }
+ return ret;
+}
+
+int
+crypto_aead_chacha20poly1305_decrypt_detached(unsigned char *m,
+ unsigned char *nsec,
+ const unsigned char *c,
+ unsigned long long clen,
+ const unsigned char *mac,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ crypto_onetimeauth_poly1305_state state;
+ unsigned char block0[64U];
+ unsigned char slen[8U];
+ unsigned char computed_mac[crypto_aead_chacha20poly1305_ABYTES];
+ unsigned long long mlen;
+ int ret;
+
+ (void) nsec;
+ crypto_stream_chacha20(block0, sizeof block0, npub, k);
+ crypto_onetimeauth_poly1305_init(&state, block0);
+ sodium_memzero(block0, sizeof block0);
+
+ crypto_onetimeauth_poly1305_update(&state, ad, adlen);
+ STORE64_LE(slen, (uint64_t) adlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ mlen = clen;
+ crypto_onetimeauth_poly1305_update(&state, c, mlen);
+ STORE64_LE(slen, (uint64_t) mlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ crypto_onetimeauth_poly1305_final(&state, computed_mac);
+ sodium_memzero(&state, sizeof state);
+
+ COMPILER_ASSERT(sizeof computed_mac == 16U);
+ ret = crypto_verify_16(computed_mac, mac);
+ sodium_memzero(computed_mac, sizeof computed_mac);
+ if (m == NULL) {
+ return ret;
+ }
+ if (ret != 0) {
+ memset(m, 0, mlen);
+ return -1;
+ }
+ crypto_stream_chacha20_xor_ic(m, c, mlen, npub, 1U, k);
+
+ return 0;
+}
+
+int
+crypto_aead_chacha20poly1305_decrypt(unsigned char *m,
+ unsigned long long *mlen_p,
+ unsigned char *nsec,
+ const unsigned char *c,
+ unsigned long long clen,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ unsigned long long mlen = 0ULL;
+ int ret = -1;
+
+ if (clen >= crypto_aead_chacha20poly1305_ABYTES) {
+ ret = crypto_aead_chacha20poly1305_decrypt_detached
+ (m, nsec,
+ c, clen - crypto_aead_chacha20poly1305_ABYTES,
+ c + clen - crypto_aead_chacha20poly1305_ABYTES,
+ ad, adlen, npub, k);
+ }
+ if (mlen_p != NULL) {
+ if (ret == 0) {
+ mlen = clen - crypto_aead_chacha20poly1305_ABYTES;
+ }
+ *mlen_p = mlen;
+ }
+ return ret;
+}
+
+int
+crypto_aead_chacha20poly1305_ietf_decrypt_detached(unsigned char *m,
+ unsigned char *nsec,
+ const unsigned char *c,
+ unsigned long long clen,
+ const unsigned char *mac,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ crypto_onetimeauth_poly1305_state state;
+ unsigned char block0[64U];
+ unsigned char slen[8U];
+ unsigned char computed_mac[crypto_aead_chacha20poly1305_ietf_ABYTES];
+ unsigned long long mlen;
+ int ret;
+
+ (void) nsec;
+ crypto_stream_chacha20_ietf(block0, sizeof block0, npub, k);
+ crypto_onetimeauth_poly1305_init(&state, block0);
+ sodium_memzero(block0, sizeof block0);
+
+ crypto_onetimeauth_poly1305_update(&state, ad, adlen);
+ crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
+
+ mlen = clen;
+ crypto_onetimeauth_poly1305_update(&state, c, mlen);
+ crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
+
+ STORE64_LE(slen, (uint64_t) adlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ STORE64_LE(slen, (uint64_t) mlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ crypto_onetimeauth_poly1305_final(&state, computed_mac);
+ sodium_memzero(&state, sizeof state);
+
+ COMPILER_ASSERT(sizeof computed_mac == 16U);
+ ret = crypto_verify_16(computed_mac, mac);
+ sodium_memzero(computed_mac, sizeof computed_mac);
+ if (m == NULL) {
+ return ret;
+ }
+ if (ret != 0) {
+ memset(m, 0, mlen);
+ return -1;
+ }
+ crypto_stream_chacha20_ietf_xor_ic(m, c, mlen, npub, 1U, k);
+
+ return 0;
+}
+
+int
+crypto_aead_chacha20poly1305_ietf_decrypt(unsigned char *m,
+ unsigned long long *mlen_p,
+ unsigned char *nsec,
+ const unsigned char *c,
+ unsigned long long clen,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ unsigned long long mlen = 0ULL;
+ int ret = -1;
+
+ if (clen >= crypto_aead_chacha20poly1305_ietf_ABYTES) {
+ ret = crypto_aead_chacha20poly1305_ietf_decrypt_detached
+ (m, nsec,
+ c, clen - crypto_aead_chacha20poly1305_ietf_ABYTES,
+ c + clen - crypto_aead_chacha20poly1305_ietf_ABYTES,
+ ad, adlen, npub, k);
+ }
+ if (mlen_p != NULL) {
+ if (ret == 0) {
+ mlen = clen - crypto_aead_chacha20poly1305_ietf_ABYTES;
+ }
+ *mlen_p = mlen;
+ }
+ return ret;
+}
+
+size_t
+crypto_aead_chacha20poly1305_ietf_keybytes(void)
+{
+ return crypto_aead_chacha20poly1305_ietf_KEYBYTES;
+}
+
+size_t
+crypto_aead_chacha20poly1305_ietf_npubbytes(void)
+{
+ return crypto_aead_chacha20poly1305_ietf_NPUBBYTES;
+}
+
+size_t
+crypto_aead_chacha20poly1305_ietf_nsecbytes(void)
+{
+ return crypto_aead_chacha20poly1305_ietf_NSECBYTES;
+}
+
+size_t
+crypto_aead_chacha20poly1305_ietf_abytes(void)
+{
+ return crypto_aead_chacha20poly1305_ietf_ABYTES;
+}
+
+size_t
+crypto_aead_chacha20poly1305_ietf_messagebytes_max(void)
+{
+ return crypto_aead_chacha20poly1305_ietf_MESSAGEBYTES_MAX;
+}
+
+void
+crypto_aead_chacha20poly1305_ietf_keygen(unsigned char k[crypto_aead_chacha20poly1305_ietf_KEYBYTES])
+{
+ randombytes_buf(k, crypto_aead_chacha20poly1305_ietf_KEYBYTES);
+}
+
+size_t
+crypto_aead_chacha20poly1305_keybytes(void)
+{
+ return crypto_aead_chacha20poly1305_KEYBYTES;
+}
+
+size_t
+crypto_aead_chacha20poly1305_npubbytes(void)
+{
+ return crypto_aead_chacha20poly1305_NPUBBYTES;
+}
+
+size_t
+crypto_aead_chacha20poly1305_nsecbytes(void)
+{
+ return crypto_aead_chacha20poly1305_NSECBYTES;
+}
+
+size_t
+crypto_aead_chacha20poly1305_abytes(void)
+{
+ return crypto_aead_chacha20poly1305_ABYTES;
+}
+
+size_t
+crypto_aead_chacha20poly1305_messagebytes_max(void)
+{
+ return crypto_aead_chacha20poly1305_MESSAGEBYTES_MAX;
+}
+
+void
+crypto_aead_chacha20poly1305_keygen(unsigned char k[crypto_aead_chacha20poly1305_KEYBYTES])
+{
+ randombytes_buf(k, crypto_aead_chacha20poly1305_KEYBYTES);
+}
diff --git a/libs/libsodium/src/crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c b/libs/libsodium/src/crypto_aead/xchacha20poly1305/aead_xchacha20poly1305.c
index 61ccc84c8c..07e3655731 100644
--- a/libs/libsodium/src/crypto_aead/xchacha20poly1305/sodium/aead_xchacha20poly1305.c
+++ b/libs/libsodium/src/crypto_aead/xchacha20poly1305/aead_xchacha20poly1305.c
@@ -1,262 +1,262 @@
-
-#include <stdint.h>
-#include <stdlib.h>
-#include <limits.h>
-#include <string.h>
-
-#include "core.h"
-#include "crypto_aead_chacha20poly1305.h"
-#include "crypto_aead_xchacha20poly1305.h"
-#include "crypto_core_hchacha20.h"
-#include "crypto_onetimeauth_poly1305.h"
-#include "crypto_stream_chacha20.h"
-#include "crypto_verify_16.h"
-#include "randombytes.h"
-#include "utils.h"
-
-#include "private/chacha20_ietf_ext.h"
-#include "private/common.h"
-
-static const unsigned char _pad0[16] = { 0 };
-
-static int
-_encrypt_detached(unsigned char *c,
- unsigned char *mac,
- unsigned long long *maclen_p,
- const unsigned char *m,
- unsigned long long mlen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const unsigned char *k)
-{
- crypto_onetimeauth_poly1305_state state;
- unsigned char block0[64U];
- unsigned char slen[8U];
-
- (void) nsec;
- crypto_stream_chacha20_ietf_ext(block0, sizeof block0, npub, k);
- crypto_onetimeauth_poly1305_init(&state, block0);
- sodium_memzero(block0, sizeof block0);
-
- crypto_onetimeauth_poly1305_update(&state, ad, adlen);
- crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
-
- crypto_stream_chacha20_ietf_ext_xor_ic(c, m, mlen, npub, 1U, k);
-
- crypto_onetimeauth_poly1305_update(&state, c, mlen);
- crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
-
- STORE64_LE(slen, (uint64_t) adlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- STORE64_LE(slen, (uint64_t) mlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- crypto_onetimeauth_poly1305_final(&state, mac);
- sodium_memzero(&state, sizeof state);
-
- if (maclen_p != NULL) {
- *maclen_p = crypto_aead_chacha20poly1305_ietf_ABYTES;
- }
- return 0;
-}
-
-static int
-_decrypt_detached(unsigned char *m,
- unsigned char *nsec,
- const unsigned char *c,
- unsigned long long clen,
- const unsigned char *mac,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *npub,
- const unsigned char *k)
-{
- crypto_onetimeauth_poly1305_state state;
- unsigned char block0[64U];
- unsigned char slen[8U];
- unsigned char computed_mac[crypto_aead_chacha20poly1305_ietf_ABYTES];
- unsigned long long mlen;
- int ret;
-
- (void) nsec;
- crypto_stream_chacha20_ietf_ext(block0, sizeof block0, npub, k);
- crypto_onetimeauth_poly1305_init(&state, block0);
- sodium_memzero(block0, sizeof block0);
-
- crypto_onetimeauth_poly1305_update(&state, ad, adlen);
- crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
-
- mlen = clen;
- crypto_onetimeauth_poly1305_update(&state, c, mlen);
- crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
-
- STORE64_LE(slen, (uint64_t) adlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- STORE64_LE(slen, (uint64_t) mlen);
- crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
-
- crypto_onetimeauth_poly1305_final(&state, computed_mac);
- sodium_memzero(&state, sizeof state);
-
- COMPILER_ASSERT(sizeof computed_mac == 16U);
- ret = crypto_verify_16(computed_mac, mac);
- sodium_memzero(computed_mac, sizeof computed_mac);
- if (m == NULL) {
- return ret;
- }
- if (ret != 0) {
- memset(m, 0, mlen);
- return -1;
- }
- crypto_stream_chacha20_ietf_ext_xor_ic(m, c, mlen, npub, 1U, k);
-
- return 0;
-}
-
-int
-crypto_aead_xchacha20poly1305_ietf_encrypt_detached(unsigned char *c,
- unsigned char *mac,
- unsigned long long *maclen_p,
- const unsigned char *m,
- unsigned long long mlen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const unsigned char *k)
-{
- unsigned char k2[crypto_core_hchacha20_OUTPUTBYTES];
- unsigned char npub2[crypto_aead_chacha20poly1305_ietf_NPUBBYTES] = { 0 };
- int ret;
-
- crypto_core_hchacha20(k2, npub, k, NULL);
- memcpy(npub2 + 4, npub + crypto_core_hchacha20_INPUTBYTES,
- crypto_aead_chacha20poly1305_ietf_NPUBBYTES - 4);
- ret = _encrypt_detached(c, mac, maclen_p, m, mlen, ad, adlen,
- nsec, npub2, k2);
- sodium_memzero(k2, crypto_core_hchacha20_OUTPUTBYTES);
-
- return ret;
-}
-
-int
-crypto_aead_xchacha20poly1305_ietf_encrypt(unsigned char *c,
- unsigned long long *clen_p,
- const unsigned char *m,
- unsigned long long mlen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *nsec,
- const unsigned char *npub,
- const unsigned char *k)
-{
- unsigned long long clen = 0ULL;
- int ret;
-
- if (mlen > crypto_aead_xchacha20poly1305_ietf_MESSAGEBYTES_MAX) {
- sodium_misuse();
- }
- ret = crypto_aead_xchacha20poly1305_ietf_encrypt_detached
- (c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub, k);
- if (clen_p != NULL) {
- if (ret == 0) {
- clen = mlen + crypto_aead_xchacha20poly1305_ietf_ABYTES;
- }
- *clen_p = clen;
- }
- return ret;
-}
-
-int
-crypto_aead_xchacha20poly1305_ietf_decrypt_detached(unsigned char *m,
- unsigned char *nsec,
- const unsigned char *c,
- unsigned long long clen,
- const unsigned char *mac,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *npub,
- const unsigned char *k)
-{
- unsigned char k2[crypto_core_hchacha20_OUTPUTBYTES];
- unsigned char npub2[crypto_aead_chacha20poly1305_ietf_NPUBBYTES] = { 0 };
- int ret;
-
- crypto_core_hchacha20(k2, npub, k, NULL);
- memcpy(npub2 + 4, npub + crypto_core_hchacha20_INPUTBYTES,
- crypto_aead_chacha20poly1305_ietf_NPUBBYTES - 4);
- ret = _decrypt_detached(m, nsec, c, clen, mac, ad, adlen, npub2, k2);
- sodium_memzero(k2, crypto_core_hchacha20_OUTPUTBYTES);
-
- return ret;
-}
-
-int
-crypto_aead_xchacha20poly1305_ietf_decrypt(unsigned char *m,
- unsigned long long *mlen_p,
- unsigned char *nsec,
- const unsigned char *c,
- unsigned long long clen,
- const unsigned char *ad,
- unsigned long long adlen,
- const unsigned char *npub,
- const unsigned char *k)
-{
- unsigned long long mlen = 0ULL;
- int ret = -1;
-
- if (clen >= crypto_aead_xchacha20poly1305_ietf_ABYTES) {
- ret = crypto_aead_xchacha20poly1305_ietf_decrypt_detached
- (m, nsec,
- c, clen - crypto_aead_xchacha20poly1305_ietf_ABYTES,
- c + clen - crypto_aead_xchacha20poly1305_ietf_ABYTES,
- ad, adlen, npub, k);
- }
- if (mlen_p != NULL) {
- if (ret == 0) {
- mlen = clen - crypto_aead_xchacha20poly1305_ietf_ABYTES;
- }
- *mlen_p = mlen;
- }
- return ret;
-}
-
-size_t
-crypto_aead_xchacha20poly1305_ietf_keybytes(void)
-{
- return crypto_aead_xchacha20poly1305_ietf_KEYBYTES;
-}
-
-size_t
-crypto_aead_xchacha20poly1305_ietf_npubbytes(void)
-{
- return crypto_aead_xchacha20poly1305_ietf_NPUBBYTES;
-}
-
-size_t
-crypto_aead_xchacha20poly1305_ietf_nsecbytes(void)
-{
- return crypto_aead_xchacha20poly1305_ietf_NSECBYTES;
-}
-
-size_t
-crypto_aead_xchacha20poly1305_ietf_abytes(void)
-{
- return crypto_aead_xchacha20poly1305_ietf_ABYTES;
-}
-
-size_t
-crypto_aead_xchacha20poly1305_ietf_messagebytes_max(void)
-{
- return crypto_aead_xchacha20poly1305_ietf_MESSAGEBYTES_MAX;
-}
-
-void
-crypto_aead_xchacha20poly1305_ietf_keygen(unsigned char k[crypto_aead_xchacha20poly1305_ietf_KEYBYTES])
-{
- randombytes_buf(k, crypto_aead_xchacha20poly1305_ietf_KEYBYTES);
-}
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
+#include "core.h"
+#include "crypto_aead_chacha20poly1305.h"
+#include "crypto_aead_xchacha20poly1305.h"
+#include "crypto_core_hchacha20.h"
+#include "crypto_onetimeauth_poly1305.h"
+#include "crypto_stream_chacha20.h"
+#include "crypto_verify_16.h"
+#include "randombytes.h"
+#include "utils.h"
+
+#include "private/chacha20_ietf_ext.h"
+#include "private/common.h"
+
+static const unsigned char _pad0[16] = { 0 };
+
+static int
+_encrypt_detached(unsigned char *c,
+ unsigned char *mac,
+ unsigned long long *maclen_p,
+ const unsigned char *m,
+ unsigned long long mlen,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *nsec,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ crypto_onetimeauth_poly1305_state state;
+ unsigned char block0[64U];
+ unsigned char slen[8U];
+
+ (void) nsec;
+ crypto_stream_chacha20_ietf_ext(block0, sizeof block0, npub, k);
+ crypto_onetimeauth_poly1305_init(&state, block0);
+ sodium_memzero(block0, sizeof block0);
+
+ crypto_onetimeauth_poly1305_update(&state, ad, adlen);
+ crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
+
+ crypto_stream_chacha20_ietf_ext_xor_ic(c, m, mlen, npub, 1U, k);
+
+ crypto_onetimeauth_poly1305_update(&state, c, mlen);
+ crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
+
+ STORE64_LE(slen, (uint64_t) adlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ STORE64_LE(slen, (uint64_t) mlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ crypto_onetimeauth_poly1305_final(&state, mac);
+ sodium_memzero(&state, sizeof state);
+
+ if (maclen_p != NULL) {
+ *maclen_p = crypto_aead_chacha20poly1305_ietf_ABYTES;
+ }
+ return 0;
+}
+
+static int
+_decrypt_detached(unsigned char *m,
+ unsigned char *nsec,
+ const unsigned char *c,
+ unsigned long long clen,
+ const unsigned char *mac,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ crypto_onetimeauth_poly1305_state state;
+ unsigned char block0[64U];
+ unsigned char slen[8U];
+ unsigned char computed_mac[crypto_aead_chacha20poly1305_ietf_ABYTES];
+ unsigned long long mlen;
+ int ret;
+
+ (void) nsec;
+ crypto_stream_chacha20_ietf_ext(block0, sizeof block0, npub, k);
+ crypto_onetimeauth_poly1305_init(&state, block0);
+ sodium_memzero(block0, sizeof block0);
+
+ crypto_onetimeauth_poly1305_update(&state, ad, adlen);
+ crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - adlen) & 0xf);
+
+ mlen = clen;
+ crypto_onetimeauth_poly1305_update(&state, c, mlen);
+ crypto_onetimeauth_poly1305_update(&state, _pad0, (0x10 - mlen) & 0xf);
+
+ STORE64_LE(slen, (uint64_t) adlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ STORE64_LE(slen, (uint64_t) mlen);
+ crypto_onetimeauth_poly1305_update(&state, slen, sizeof slen);
+
+ crypto_onetimeauth_poly1305_final(&state, computed_mac);
+ sodium_memzero(&state, sizeof state);
+
+ COMPILER_ASSERT(sizeof computed_mac == 16U);
+ ret = crypto_verify_16(computed_mac, mac);
+ sodium_memzero(computed_mac, sizeof computed_mac);
+ if (m == NULL) {
+ return ret;
+ }
+ if (ret != 0) {
+ memset(m, 0, mlen);
+ return -1;
+ }
+ crypto_stream_chacha20_ietf_ext_xor_ic(m, c, mlen, npub, 1U, k);
+
+ return 0;
+}
+
+int
+crypto_aead_xchacha20poly1305_ietf_encrypt_detached(unsigned char *c,
+ unsigned char *mac,
+ unsigned long long *maclen_p,
+ const unsigned char *m,
+ unsigned long long mlen,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *nsec,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ unsigned char k2[crypto_core_hchacha20_OUTPUTBYTES];
+ unsigned char npub2[crypto_aead_chacha20poly1305_ietf_NPUBBYTES] = { 0 };
+ int ret;
+
+ crypto_core_hchacha20(k2, npub, k, NULL);
+ memcpy(npub2 + 4, npub + crypto_core_hchacha20_INPUTBYTES,
+ crypto_aead_chacha20poly1305_ietf_NPUBBYTES - 4);
+ ret = _encrypt_detached(c, mac, maclen_p, m, mlen, ad, adlen,
+ nsec, npub2, k2);
+ sodium_memzero(k2, crypto_core_hchacha20_OUTPUTBYTES);
+
+ return ret;
+}
+
+int
+crypto_aead_xchacha20poly1305_ietf_encrypt(unsigned char *c,
+ unsigned long long *clen_p,
+ const unsigned char *m,
+ unsigned long long mlen,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *nsec,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ unsigned long long clen = 0ULL;
+ int ret;
+
+ if (mlen > crypto_aead_xchacha20poly1305_ietf_MESSAGEBYTES_MAX) {
+ sodium_misuse();
+ }
+ ret = crypto_aead_xchacha20poly1305_ietf_encrypt_detached
+ (c, c + mlen, NULL, m, mlen, ad, adlen, nsec, npub, k);
+ if (clen_p != NULL) {
+ if (ret == 0) {
+ clen = mlen + crypto_aead_xchacha20poly1305_ietf_ABYTES;
+ }
+ *clen_p = clen;
+ }
+ return ret;
+}
+
+int
+crypto_aead_xchacha20poly1305_ietf_decrypt_detached(unsigned char *m,
+ unsigned char *nsec,
+ const unsigned char *c,
+ unsigned long long clen,
+ const unsigned char *mac,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ unsigned char k2[crypto_core_hchacha20_OUTPUTBYTES];
+ unsigned char npub2[crypto_aead_chacha20poly1305_ietf_NPUBBYTES] = { 0 };
+ int ret;
+
+ crypto_core_hchacha20(k2, npub, k, NULL);
+ memcpy(npub2 + 4, npub + crypto_core_hchacha20_INPUTBYTES,
+ crypto_aead_chacha20poly1305_ietf_NPUBBYTES - 4);
+ ret = _decrypt_detached(m, nsec, c, clen, mac, ad, adlen, npub2, k2);
+ sodium_memzero(k2, crypto_core_hchacha20_OUTPUTBYTES);
+
+ return ret;
+}
+
+int
+crypto_aead_xchacha20poly1305_ietf_decrypt(unsigned char *m,
+ unsigned long long *mlen_p,
+ unsigned char *nsec,
+ const unsigned char *c,
+ unsigned long long clen,
+ const unsigned char *ad,
+ unsigned long long adlen,
+ const unsigned char *npub,
+ const unsigned char *k)
+{
+ unsigned long long mlen = 0ULL;
+ int ret = -1;
+
+ if (clen >= crypto_aead_xchacha20poly1305_ietf_ABYTES) {
+ ret = crypto_aead_xchacha20poly1305_ietf_decrypt_detached
+ (m, nsec,
+ c, clen - crypto_aead_xchacha20poly1305_ietf_ABYTES,
+ c + clen - crypto_aead_xchacha20poly1305_ietf_ABYTES,
+ ad, adlen, npub, k);
+ }
+ if (mlen_p != NULL) {
+ if (ret == 0) {
+ mlen = clen - crypto_aead_xchacha20poly1305_ietf_ABYTES;
+ }
+ *mlen_p = mlen;
+ }
+ return ret;
+}
+
+size_t
+crypto_aead_xchacha20poly1305_ietf_keybytes(void)
+{
+ return crypto_aead_xchacha20poly1305_ietf_KEYBYTES;
+}
+
+size_t
+crypto_aead_xchacha20poly1305_ietf_npubbytes(void)
+{
+ return crypto_aead_xchacha20poly1305_ietf_NPUBBYTES;
+}
+
+size_t
+crypto_aead_xchacha20poly1305_ietf_nsecbytes(void)
+{
+ return crypto_aead_xchacha20poly1305_ietf_NSECBYTES;
+}
+
+size_t
+crypto_aead_xchacha20poly1305_ietf_abytes(void)
+{
+ return crypto_aead_xchacha20poly1305_ietf_ABYTES;
+}
+
+size_t
+crypto_aead_xchacha20poly1305_ietf_messagebytes_max(void)
+{
+ return crypto_aead_xchacha20poly1305_ietf_MESSAGEBYTES_MAX;
+}
+
+void
+crypto_aead_xchacha20poly1305_ietf_keygen(unsigned char k[crypto_aead_xchacha20poly1305_ietf_KEYBYTES])
+{
+ randombytes_buf(k, crypto_aead_xchacha20poly1305_ietf_KEYBYTES);
+}