summaryrefslogtreecommitdiff
path: root/libs/libsodium/src/crypto_generichash/blake2b
diff options
context:
space:
mode:
authoraunsane <aunsane@gmail.com>2017-12-15 01:05:56 +0300
committeraunsane <aunsane@gmail.com>2017-12-15 01:05:56 +0300
commite124aa3611f38573898aa79c6eabe77bc874e58f (patch)
tree819464260f758bbc002b23c0c8a77f93751dcb42 /libs/libsodium/src/crypto_generichash/blake2b
parentbbd9647d47f20d10b39570def918a0ac68c305c9 (diff)
preparing to build tox from sources
Diffstat (limited to 'libs/libsodium/src/crypto_generichash/blake2b')
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/generichash_blake2.c55
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2.h109
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-avx2.c49
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-avx2.h140
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ref.c93
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-sse41.c87
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-sse41.h103
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.c90
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.h103
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-avx2.h340
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-sse2.h164
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-sse41.h307
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-ref.c436
-rw-r--r--libs/libsodium/src/crypto_generichash/blake2b/ref/generichash_blake2b.c111
14 files changed, 2187 insertions, 0 deletions
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/generichash_blake2.c b/libs/libsodium/src/crypto_generichash/blake2b/generichash_blake2.c
new file mode 100644
index 0000000000..781d4c584e
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/generichash_blake2.c
@@ -0,0 +1,55 @@
+#include "crypto_generichash_blake2b.h"
+#include "randombytes.h"
+
+size_t
+crypto_generichash_blake2b_bytes_min(void) {
+ return crypto_generichash_blake2b_BYTES_MIN;
+}
+
+size_t
+crypto_generichash_blake2b_bytes_max(void) {
+ return crypto_generichash_blake2b_BYTES_MAX;
+}
+
+size_t
+crypto_generichash_blake2b_bytes(void) {
+ return crypto_generichash_blake2b_BYTES;
+}
+
+size_t
+crypto_generichash_blake2b_keybytes_min(void) {
+ return crypto_generichash_blake2b_KEYBYTES_MIN;
+}
+
+size_t
+crypto_generichash_blake2b_keybytes_max(void) {
+ return crypto_generichash_blake2b_KEYBYTES_MAX;
+}
+
+size_t
+crypto_generichash_blake2b_keybytes(void) {
+ return crypto_generichash_blake2b_KEYBYTES;
+}
+
+size_t
+crypto_generichash_blake2b_saltbytes(void) {
+ return crypto_generichash_blake2b_SALTBYTES;
+}
+
+size_t
+crypto_generichash_blake2b_personalbytes(void) {
+ return crypto_generichash_blake2b_PERSONALBYTES;
+}
+
+size_t
+crypto_generichash_blake2b_statebytes(void)
+{
+ return (sizeof(crypto_generichash_blake2b_state) + (size_t) 63U)
+ & ~(size_t) 63U;
+}
+
+void
+crypto_generichash_blake2b_keygen(unsigned char k[crypto_generichash_blake2b_KEYBYTES])
+{
+ randombytes_buf(k, crypto_generichash_blake2b_KEYBYTES);
+}
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2.h b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2.h
new file mode 100644
index 0000000000..c6c4fccbb7
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2.h
@@ -0,0 +1,109 @@
+/*
+ BLAKE2 reference source code package - reference C implementations
+
+ Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ All code is triple-licensed under the
+ [CC0](http://creativecommons.org/publicdomain/zero/1.0), the
+ [OpenSSL Licence](https://www.openssl.org/source/license.html), or
+ the [Apache Public License 2.0](http://www.apache.org/licenses/LICENSE-2.0),
+ at your choosing.
+ */
+
+#ifndef blake2_H
+#define blake2_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "crypto_generichash_blake2b.h"
+#include "export.h"
+
+#define blake2b_init_param crypto_generichash_blake2b__init_param
+#define blake2b_init crypto_generichash_blake2b__init
+#define blake2b_init_salt_personal \
+ crypto_generichash_blake2b__init_salt_personal
+#define blake2b_init_key crypto_generichash_blake2b__init_key
+#define blake2b_init_key_salt_personal \
+ crypto_generichash_blake2b__init_key_salt_personal
+#define blake2b_update crypto_generichash_blake2b__update
+#define blake2b_final crypto_generichash_blake2b__final
+#define blake2b crypto_generichash_blake2b__blake2b
+#define blake2b_salt_personal crypto_generichash_blake2b__blake2b_salt_personal
+#define blake2b_pick_best_implementation \
+ crypto_generichash_blake2b__pick_best_implementation
+
+enum blake2b_constant {
+ BLAKE2B_BLOCKBYTES = 128,
+ BLAKE2B_OUTBYTES = 64,
+ BLAKE2B_KEYBYTES = 64,
+ BLAKE2B_SALTBYTES = 16,
+ BLAKE2B_PERSONALBYTES = 16
+};
+
+#if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+#pragma pack(1)
+#else
+#pragma pack(push, 1)
+#endif
+
+typedef struct blake2b_param_ {
+ uint8_t digest_length; /* 1 */
+ uint8_t key_length; /* 2 */
+ uint8_t fanout; /* 3 */
+ uint8_t depth; /* 4 */
+ uint8_t leaf_length[4]; /* 8 */
+ uint8_t node_offset[8]; /* 16 */
+ uint8_t node_depth; /* 17 */
+ uint8_t inner_length; /* 18 */
+ uint8_t reserved[14]; /* 32 */
+ uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
+ uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
+} blake2b_param;
+
+typedef crypto_generichash_blake2b_state blake2b_state;
+
+#if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+#pragma pack()
+#else
+#pragma pack(pop)
+#endif
+
+/* Streaming API */
+int blake2b_init(blake2b_state *S, const uint8_t outlen);
+int blake2b_init_salt_personal(blake2b_state *S, const uint8_t outlen,
+ const void *salt, const void *personal);
+int blake2b_init_key(blake2b_state *S, const uint8_t outlen, const void *key,
+ const uint8_t keylen);
+int blake2b_init_key_salt_personal(blake2b_state *S, const uint8_t outlen,
+ const void *key, const uint8_t keylen,
+ const void *salt, const void *personal);
+int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
+int blake2b_update(blake2b_state *S, const uint8_t *in, uint64_t inlen);
+int blake2b_final(blake2b_state *S, uint8_t *out, uint8_t outlen);
+
+/* Simple API */
+int blake2b(uint8_t *out, const void *in, const void *key, const uint8_t outlen,
+ const uint64_t inlen, uint8_t keylen);
+int blake2b_salt_personal(uint8_t *out, const void *in, const void *key,
+ const uint8_t outlen, const uint64_t inlen,
+ uint8_t keylen, const void *salt,
+ const void *personal);
+
+typedef int (*blake2b_compress_fn)(blake2b_state *S,
+ const uint8_t block[BLAKE2B_BLOCKBYTES]);
+int blake2b_pick_best_implementation(void);
+int blake2b_compress_ref(blake2b_state *S,
+ const uint8_t block[BLAKE2B_BLOCKBYTES]);
+int blake2b_compress_ssse3(blake2b_state *S,
+ const uint8_t block[BLAKE2B_BLOCKBYTES]);
+int blake2b_compress_sse41(blake2b_state *S,
+ const uint8_t block[BLAKE2B_BLOCKBYTES]);
+int blake2b_compress_avx2(blake2b_state *S,
+ const uint8_t block[BLAKE2B_BLOCKBYTES]);
+
+#endif
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-avx2.c b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-avx2.c
new file mode 100644
index 0000000000..7cb41fb6e7
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-avx2.c
@@ -0,0 +1,49 @@
+
+#define BLAKE2_USE_SSSE3
+#define BLAKE2_USE_SSE41
+#define BLAKE2_USE_AVX2
+
+#include <stdint.h>
+#include <string.h>
+
+#include "blake2.h"
+#include "private/common.h"
+#include "private/sse2_64_32.h"
+
+#if defined(HAVE_AVX2INTRIN_H) && defined(HAVE_EMMINTRIN_H) && \
+ defined(HAVE_TMMINTRIN_H) && defined(HAVE_SMMINTRIN_H)
+
+# ifdef __GNUC__
+# pragma GCC target("sse2")
+# pragma GCC target("ssse3")
+# pragma GCC target("sse4.1")
+# pragma GCC target("avx2")
+# endif
+
+# include <emmintrin.h>
+# include <immintrin.h>
+# include <smmintrin.h>
+# include <tmmintrin.h>
+
+# include "blake2b-compress-avx2.h"
+
+CRYPTO_ALIGN(64)
+static const uint64_t blake2b_IV[8] = {
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
+ 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+int
+blake2b_compress_avx2(blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES])
+{
+ __m256i a = LOADU(&S->h[0]);
+ __m256i b = LOADU(&S->h[4]);
+ BLAKE2B_COMPRESS_V1(a, b, block, S->t[0], S->t[1], S->f[0], S->f[1]);
+ STOREU(&S->h[0], a);
+ STOREU(&S->h[4], b);
+
+ return 0;
+}
+
+#endif
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-avx2.h b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-avx2.h
new file mode 100644
index 0000000000..21acb2fa0c
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-avx2.h
@@ -0,0 +1,140 @@
+
+#ifndef blake2b_compress_avx2_H
+#define blake2b_compress_avx2_H
+
+#define LOAD128(p) _mm_load_si128((__m128i *) (p))
+#define STORE128(p, r) _mm_store_si128((__m128i *) (p), r)
+
+#define LOADU128(p) _mm_loadu_si128((__m128i *) (p))
+#define STOREU128(p, r) _mm_storeu_si128((__m128i *) (p), r)
+
+#define LOAD(p) _mm256_load_si256((__m256i *) (p))
+#define STORE(p, r) _mm256_store_si256((__m256i *) (p), r)
+
+#define LOADU(p) _mm256_loadu_si256((__m256i *) (p))
+#define STOREU(p, r) _mm256_storeu_si256((__m256i *) (p), r)
+
+static inline uint64_t
+LOADU64(const void *p)
+{
+ uint64_t v;
+ memcpy(&v, p, sizeof v);
+ return v;
+}
+
+#define ROTATE16 \
+ _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, \
+ 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)
+
+#define ROTATE24 \
+ _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, \
+ 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)
+
+#define ADD(a, b) _mm256_add_epi64(a, b)
+#define SUB(a, b) _mm256_sub_epi64(a, b)
+
+#define XOR(a, b) _mm256_xor_si256(a, b)
+#define AND(a, b) _mm256_and_si256(a, b)
+#define OR(a, b) _mm256_or_si256(a, b)
+
+#define ROT32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))
+#define ROT24(x) _mm256_shuffle_epi8((x), ROTATE24)
+#define ROT16(x) _mm256_shuffle_epi8((x), ROTATE16)
+#define ROT63(x) _mm256_or_si256(_mm256_srli_epi64((x), 63), ADD((x), (x)))
+
+#define BLAKE2B_G1_V1(a, b, c, d, m) \
+ do { \
+ a = ADD(a, m); \
+ a = ADD(a, b); \
+ d = XOR(d, a); \
+ d = ROT32(d); \
+ c = ADD(c, d); \
+ b = XOR(b, c); \
+ b = ROT24(b); \
+ } while (0)
+
+#define BLAKE2B_G2_V1(a, b, c, d, m) \
+ do { \
+ a = ADD(a, m); \
+ a = ADD(a, b); \
+ d = XOR(d, a); \
+ d = ROT16(d); \
+ c = ADD(c, d); \
+ b = XOR(b, c); \
+ b = ROT63(b); \
+ } while (0)
+
+#define BLAKE2B_DIAG_V1(a, b, c, d) \
+ do { \
+ d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2, 1, 0, 3)); \
+ c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \
+ b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0, 3, 2, 1)); \
+ } while (0)
+
+#define BLAKE2B_UNDIAG_V1(a, b, c, d) \
+ do { \
+ d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0, 3, 2, 1)); \
+ c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); \
+ b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2, 1, 0, 3)); \
+ } while (0)
+
+#include "blake2b-load-avx2.h"
+
+#define BLAKE2B_ROUND_V1(a, b, c, d, r, m) \
+ do { \
+ __m256i b0; \
+ BLAKE2B_LOAD_MSG_##r##_1(b0); \
+ BLAKE2B_G1_V1(a, b, c, d, b0); \
+ BLAKE2B_LOAD_MSG_##r##_2(b0); \
+ BLAKE2B_G2_V1(a, b, c, d, b0); \
+ BLAKE2B_DIAG_V1(a, b, c, d); \
+ BLAKE2B_LOAD_MSG_##r##_3(b0); \
+ BLAKE2B_G1_V1(a, b, c, d, b0); \
+ BLAKE2B_LOAD_MSG_##r##_4(b0); \
+ BLAKE2B_G2_V1(a, b, c, d, b0); \
+ BLAKE2B_UNDIAG_V1(a, b, c, d); \
+ } while (0)
+
+#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) \
+ do { \
+ BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 3, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 4, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 5, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 6, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 7, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 8, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 9, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 10, (m)); \
+ BLAKE2B_ROUND_V1(a, b, c, d, 11, (m)); \
+ } while (0)
+
+#define DECLARE_MESSAGE_WORDS(m) \
+ const __m256i m0 = _mm256_broadcastsi128_si256(LOADU128((m) + 0)); \
+ const __m256i m1 = _mm256_broadcastsi128_si256(LOADU128((m) + 16)); \
+ const __m256i m2 = _mm256_broadcastsi128_si256(LOADU128((m) + 32)); \
+ const __m256i m3 = _mm256_broadcastsi128_si256(LOADU128((m) + 48)); \
+ const __m256i m4 = _mm256_broadcastsi128_si256(LOADU128((m) + 64)); \
+ const __m256i m5 = _mm256_broadcastsi128_si256(LOADU128((m) + 80)); \
+ const __m256i m6 = _mm256_broadcastsi128_si256(LOADU128((m) + 96)); \
+ const __m256i m7 = _mm256_broadcastsi128_si256(LOADU128((m) + 112)); \
+ __m256i t0, t1;
+
+#define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) \
+ do { \
+ DECLARE_MESSAGE_WORDS(m) \
+ const __m256i iv0 = a; \
+ const __m256i iv1 = b; \
+ __m256i c = LOAD(&blake2b_IV[0]); \
+ __m256i d = \
+ XOR(LOAD(&blake2b_IV[4]), _mm256_set_epi64x(f1, f0, t1, t0)); \
+ BLAKE2B_ROUNDS_V1(a, b, c, d, m); \
+ a = XOR(a, c); \
+ b = XOR(b, d); \
+ a = XOR(a, iv0); \
+ b = XOR(b, iv1); \
+ } while (0)
+
+#endif
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ref.c b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ref.c
new file mode 100644
index 0000000000..614fa34af7
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ref.c
@@ -0,0 +1,93 @@
+
+#include <stdint.h>
+#include <string.h>
+
+#include "blake2.h"
+#include "private/common.h"
+
+CRYPTO_ALIGN(64)
+static const uint64_t blake2b_IV[8] = {
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
+ 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+static const uint8_t blake2b_sigma[12][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
+};
+
+int
+blake2b_compress_ref(blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES])
+{
+ uint64_t m[16];
+ uint64_t v[16];
+ int i;
+
+ for (i = 0; i < 16; ++i)
+ m[i] = LOAD64_LE(block + i * sizeof(m[i]));
+
+ for (i = 0; i < 8; ++i)
+ v[i] = S->h[i];
+
+ v[8] = blake2b_IV[0];
+ v[9] = blake2b_IV[1];
+ v[10] = blake2b_IV[2];
+ v[11] = blake2b_IV[3];
+ v[12] = S->t[0] ^ blake2b_IV[4];
+ v[13] = S->t[1] ^ blake2b_IV[5];
+ v[14] = S->f[0] ^ blake2b_IV[6];
+ v[15] = S->f[1] ^ blake2b_IV[7];
+#define G(r, i, a, b, c, d) \
+ do { \
+ a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
+ d = ROTR64(d ^ a, 32); \
+ c = c + d; \
+ b = ROTR64(b ^ c, 24); \
+ a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
+ d = ROTR64(d ^ a, 16); \
+ c = c + d; \
+ b = ROTR64(b ^ c, 63); \
+ } while (0)
+#define ROUND(r) \
+ do { \
+ G(r, 0, v[0], v[4], v[8], v[12]); \
+ G(r, 1, v[1], v[5], v[9], v[13]); \
+ G(r, 2, v[2], v[6], v[10], v[14]); \
+ G(r, 3, v[3], v[7], v[11], v[15]); \
+ G(r, 4, v[0], v[5], v[10], v[15]); \
+ G(r, 5, v[1], v[6], v[11], v[12]); \
+ G(r, 6, v[2], v[7], v[8], v[13]); \
+ G(r, 7, v[3], v[4], v[9], v[14]); \
+ } while (0)
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+ ROUND(10);
+ ROUND(11);
+
+ for (i = 0; i < 8; ++i) {
+ S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
+ }
+
+#undef G
+#undef ROUND
+ return 0;
+}
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-sse41.c b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-sse41.c
new file mode 100644
index 0000000000..9e5c0c5081
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-sse41.c
@@ -0,0 +1,87 @@
+
+#define BLAKE2_USE_SSSE3
+#define BLAKE2_USE_SSE41
+
+#include <stdint.h>
+#include <string.h>
+
+#include "blake2.h"
+#include "private/common.h"
+#include "private/sse2_64_32.h"
+
+#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && \
+ defined(HAVE_SMMINTRIN_H)
+
+# ifdef __GNUC__
+# pragma GCC target("sse2")
+# pragma GCC target("ssse3")
+# pragma GCC target("sse4.1")
+# endif
+
+# include <emmintrin.h>
+# include <smmintrin.h>
+# include <tmmintrin.h>
+
+# include "blake2b-compress-sse41.h"
+
+CRYPTO_ALIGN(64)
+static const uint64_t blake2b_IV[8] = {
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
+ 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+int
+blake2b_compress_sse41(blake2b_state *S,
+ const uint8_t block[BLAKE2B_BLOCKBYTES])
+{
+ __m128i row1l, row1h;
+ __m128i row2l, row2h;
+ __m128i row3l, row3h;
+ __m128i row4l, row4h;
+ __m128i b0, b1;
+ __m128i t0, t1;
+ const __m128i r16 =
+ _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
+ const __m128i r24 =
+ _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
+ const __m128i m0 = LOADU(block + 00);
+ const __m128i m1 = LOADU(block + 16);
+ const __m128i m2 = LOADU(block + 32);
+ const __m128i m3 = LOADU(block + 48);
+ const __m128i m4 = LOADU(block + 64);
+ const __m128i m5 = LOADU(block + 80);
+ const __m128i m6 = LOADU(block + 96);
+ const __m128i m7 = LOADU(block + 112);
+ row1l = LOADU(&S->h[0]);
+ row1h = LOADU(&S->h[2]);
+ row2l = LOADU(&S->h[4]);
+ row2h = LOADU(&S->h[6]);
+ row3l = LOADU(&blake2b_IV[0]);
+ row3h = LOADU(&blake2b_IV[2]);
+ row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
+ row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+ ROUND(10);
+ ROUND(11);
+ row1l = _mm_xor_si128(row3l, row1l);
+ row1h = _mm_xor_si128(row3h, row1h);
+ STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
+ STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
+ row2l = _mm_xor_si128(row4l, row2l);
+ row2h = _mm_xor_si128(row4h, row2h);
+ STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
+ STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
+ return 0;
+}
+
+#endif
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-sse41.h b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-sse41.h
new file mode 100644
index 0000000000..ac78e5bb1e
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-sse41.h
@@ -0,0 +1,103 @@
+
+#ifndef blake2b_compress_sse41_H
+#define blake2b_compress_sse41_H
+
+#define LOADU(p) _mm_loadu_si128((const __m128i *) (const void *) (p))
+#define STOREU(p, r) _mm_storeu_si128((__m128i *) (void *) (p), r)
+
+#define _mm_roti_epi64(x, c) \
+ (-(c) == 32) \
+ ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
+ : (-(c) == 24) \
+ ? _mm_shuffle_epi8((x), r24) \
+ : (-(c) == 16) \
+ ? _mm_shuffle_epi8((x), r16) \
+ : (-(c) == 63) \
+ ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
+ _mm_add_epi64((x), (x))) \
+ : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
+ _mm_slli_epi64((x), 64 - (-(c))))
+
+#define G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
+ row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
+ row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
+ \
+ row4l = _mm_xor_si128(row4l, row1l); \
+ row4h = _mm_xor_si128(row4h, row1h); \
+ \
+ row4l = _mm_roti_epi64(row4l, -32); \
+ row4h = _mm_roti_epi64(row4h, -32); \
+ \
+ row3l = _mm_add_epi64(row3l, row4l); \
+ row3h = _mm_add_epi64(row3h, row4h); \
+ \
+ row2l = _mm_xor_si128(row2l, row3l); \
+ row2h = _mm_xor_si128(row2h, row3h); \
+ \
+ row2l = _mm_roti_epi64(row2l, -24); \
+ row2h = _mm_roti_epi64(row2h, -24);
+
+#define G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
+ row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
+ row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
+ \
+ row4l = _mm_xor_si128(row4l, row1l); \
+ row4h = _mm_xor_si128(row4h, row1h); \
+ \
+ row4l = _mm_roti_epi64(row4l, -16); \
+ row4h = _mm_roti_epi64(row4h, -16); \
+ \
+ row3l = _mm_add_epi64(row3l, row4l); \
+ row3h = _mm_add_epi64(row3h, row4h); \
+ \
+ row2l = _mm_xor_si128(row2l, row3l); \
+ row2h = _mm_xor_si128(row2h, row3h); \
+ \
+ row2l = _mm_roti_epi64(row2l, -63); \
+ row2h = _mm_roti_epi64(row2h, -63);
+
+#define DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
+ t0 = _mm_alignr_epi8(row2h, row2l, 8); \
+ t1 = _mm_alignr_epi8(row2l, row2h, 8); \
+ row2l = t0; \
+ row2h = t1; \
+ \
+ t0 = row3l; \
+ row3l = row3h; \
+ row3h = t0; \
+ \
+ t0 = _mm_alignr_epi8(row4h, row4l, 8); \
+ t1 = _mm_alignr_epi8(row4l, row4h, 8); \
+ row4l = t1; \
+ row4h = t0;
+
+#define UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
+ t0 = _mm_alignr_epi8(row2l, row2h, 8); \
+ t1 = _mm_alignr_epi8(row2h, row2l, 8); \
+ row2l = t0; \
+ row2h = t1; \
+ \
+ t0 = row3l; \
+ row3l = row3h; \
+ row3h = t0; \
+ \
+ t0 = _mm_alignr_epi8(row4l, row4h, 8); \
+ t1 = _mm_alignr_epi8(row4h, row4l, 8); \
+ row4l = t1; \
+ row4h = t0;
+
+#include "blake2b-load-sse41.h"
+
+#define ROUND(r) \
+ LOAD_MSG_##r##_1(b0, b1); \
+ G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
+ LOAD_MSG_##r##_2(b0, b1); \
+ G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
+ DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
+ LOAD_MSG_##r##_3(b0, b1); \
+ G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
+ LOAD_MSG_##r##_4(b0, b1); \
+ G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
+ UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h);
+
+#endif
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.c b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.c
new file mode 100644
index 0000000000..a207a64d40
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.c
@@ -0,0 +1,90 @@
+
+#include <stdint.h>
+#include <string.h>
+
+#include "blake2.h"
+#include "private/common.h"
+#include "private/sse2_64_32.h"
+
+#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)
+
+# ifdef __GNUC__
+# pragma GCC target("sse2")
+# pragma GCC target("ssse3")
+# endif
+
+# include <emmintrin.h>
+# include <tmmintrin.h>
+
+# include "blake2b-compress-ssse3.h"
+
+CRYPTO_ALIGN(64)
+static const uint64_t blake2b_IV[8] = {
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
+ 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+int
+blake2b_compress_ssse3(blake2b_state *S,
+ const uint8_t block[BLAKE2B_BLOCKBYTES])
+{
+ __m128i row1l, row1h;
+ __m128i row2l, row2h;
+ __m128i row3l, row3h;
+ __m128i row4l, row4h;
+ __m128i b0, b1;
+ __m128i t0, t1;
+ const __m128i r16 =
+ _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
+ const __m128i r24 =
+ _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
+ const uint64_t m0 = ((uint64_t *) block)[0];
+ const uint64_t m1 = ((uint64_t *) block)[1];
+ const uint64_t m2 = ((uint64_t *) block)[2];
+ const uint64_t m3 = ((uint64_t *) block)[3];
+ const uint64_t m4 = ((uint64_t *) block)[4];
+ const uint64_t m5 = ((uint64_t *) block)[5];
+ const uint64_t m6 = ((uint64_t *) block)[6];
+ const uint64_t m7 = ((uint64_t *) block)[7];
+ const uint64_t m8 = ((uint64_t *) block)[8];
+ const uint64_t m9 = ((uint64_t *) block)[9];
+ const uint64_t m10 = ((uint64_t *) block)[10];
+ const uint64_t m11 = ((uint64_t *) block)[11];
+ const uint64_t m12 = ((uint64_t *) block)[12];
+ const uint64_t m13 = ((uint64_t *) block)[13];
+ const uint64_t m14 = ((uint64_t *) block)[14];
+ const uint64_t m15 = ((uint64_t *) block)[15];
+
+ row1l = LOADU(&S->h[0]);
+ row1h = LOADU(&S->h[2]);
+ row2l = LOADU(&S->h[4]);
+ row2h = LOADU(&S->h[6]);
+ row3l = LOADU(&blake2b_IV[0]);
+ row3h = LOADU(&blake2b_IV[2]);
+ row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&S->t[0]));
+ row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&S->f[0]));
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+ ROUND(10);
+ ROUND(11);
+ row1l = _mm_xor_si128(row3l, row1l);
+ row1h = _mm_xor_si128(row3h, row1h);
+ STOREU(&S->h[0], _mm_xor_si128(LOADU(&S->h[0]), row1l));
+ STOREU(&S->h[2], _mm_xor_si128(LOADU(&S->h[2]), row1h));
+ row2l = _mm_xor_si128(row4l, row2l);
+ row2h = _mm_xor_si128(row4h, row2h);
+ STOREU(&S->h[4], _mm_xor_si128(LOADU(&S->h[4]), row2l));
+ STOREU(&S->h[6], _mm_xor_si128(LOADU(&S->h[6]), row2h));
+ return 0;
+}
+
+#endif
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.h b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.h
new file mode 100644
index 0000000000..9a7164fe25
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-compress-ssse3.h
@@ -0,0 +1,103 @@
+
+#ifndef blake2b_compress_ssse3_H
+#define blake2b_compress_ssse3_H
+
+#define LOADU(p) _mm_loadu_si128((const __m128i *) (const void *) (p))
+#define STOREU(p, r) _mm_storeu_si128((__m128i *) (void *) (p), r)
+
+#define _mm_roti_epi64(x, c) \
+ (-(c) == 32) \
+ ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
+ : (-(c) == 24) \
+ ? _mm_shuffle_epi8((x), r24) \
+ : (-(c) == 16) \
+ ? _mm_shuffle_epi8((x), r16) \
+ : (-(c) == 63) \
+ ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
+ _mm_add_epi64((x), (x))) \
+ : _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
+ _mm_slli_epi64((x), 64 - (-(c))))
+
+#define G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
+ row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
+ row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
+ \
+ row4l = _mm_xor_si128(row4l, row1l); \
+ row4h = _mm_xor_si128(row4h, row1h); \
+ \
+ row4l = _mm_roti_epi64(row4l, -32); \
+ row4h = _mm_roti_epi64(row4h, -32); \
+ \
+ row3l = _mm_add_epi64(row3l, row4l); \
+ row3h = _mm_add_epi64(row3h, row4h); \
+ \
+ row2l = _mm_xor_si128(row2l, row3l); \
+ row2h = _mm_xor_si128(row2h, row3h); \
+ \
+ row2l = _mm_roti_epi64(row2l, -24); \
+ row2h = _mm_roti_epi64(row2h, -24);
+
+#define G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1) \
+ row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
+ row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
+ \
+ row4l = _mm_xor_si128(row4l, row1l); \
+ row4h = _mm_xor_si128(row4h, row1h); \
+ \
+ row4l = _mm_roti_epi64(row4l, -16); \
+ row4h = _mm_roti_epi64(row4h, -16); \
+ \
+ row3l = _mm_add_epi64(row3l, row4l); \
+ row3h = _mm_add_epi64(row3h, row4h); \
+ \
+ row2l = _mm_xor_si128(row2l, row3l); \
+ row2h = _mm_xor_si128(row2h, row3h); \
+ \
+ row2l = _mm_roti_epi64(row2l, -63); \
+ row2h = _mm_roti_epi64(row2h, -63);
+
+#define DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
+ t0 = _mm_alignr_epi8(row2h, row2l, 8); \
+ t1 = _mm_alignr_epi8(row2l, row2h, 8); \
+ row2l = t0; \
+ row2h = t1; \
+ \
+ t0 = row3l; \
+ row3l = row3h; \
+ row3h = t0; \
+ \
+ t0 = _mm_alignr_epi8(row4h, row4l, 8); \
+ t1 = _mm_alignr_epi8(row4l, row4h, 8); \
+ row4l = t1; \
+ row4h = t0;
+
+#define UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h) \
+ t0 = _mm_alignr_epi8(row2l, row2h, 8); \
+ t1 = _mm_alignr_epi8(row2h, row2l, 8); \
+ row2l = t0; \
+ row2h = t1; \
+ \
+ t0 = row3l; \
+ row3l = row3h; \
+ row3h = t0; \
+ \
+ t0 = _mm_alignr_epi8(row4l, row4h, 8); \
+ t1 = _mm_alignr_epi8(row4h, row4l, 8); \
+ row4l = t1; \
+ row4h = t0;
+
+#include "blake2b-load-sse2.h"
+
+#define ROUND(r) \
+ LOAD_MSG_##r##_1(b0, b1); \
+ G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
+ LOAD_MSG_##r##_2(b0, b1); \
+ G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
+ DIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); \
+ LOAD_MSG_##r##_3(b0, b1); \
+ G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
+ LOAD_MSG_##r##_4(b0, b1); \
+ G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
+ UNDIAGONALIZE(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h);
+
+#endif
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-avx2.h b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-avx2.h
new file mode 100644
index 0000000000..8c15f177c7
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-avx2.h
@@ -0,0 +1,340 @@
+#ifndef blake2b_load_avx2_H
+#define blake2b_load_avx2_H
+
+#define BLAKE2B_LOAD_MSG_0_1(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m0, m1); \
+ t1 = _mm256_unpacklo_epi64(m2, m3); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_0_2(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m0, m1); \
+ t1 = _mm256_unpackhi_epi64(m2, m3); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_0_3(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m4, m5); \
+ t1 = _mm256_unpacklo_epi64(m6, m7); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_0_4(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m4, m5); \
+ t1 = _mm256_unpackhi_epi64(m6, m7); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_1_1(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m7, m2); \
+ t1 = _mm256_unpackhi_epi64(m4, m6); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_1_2(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m5, m4); \
+ t1 = _mm256_alignr_epi8(m3, m7, 8); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_1_3(b0) \
+ do { \
+ t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
+ t1 = _mm256_unpackhi_epi64(m5, m2); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_1_4(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m6, m1); \
+ t1 = _mm256_unpackhi_epi64(m3, m1); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_2_1(b0) \
+ do { \
+ t0 = _mm256_alignr_epi8(m6, m5, 8); \
+ t1 = _mm256_unpackhi_epi64(m2, m7); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_2_2(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m4, m0); \
+ t1 = _mm256_blend_epi32(m6, m1, 0x33); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_2_3(b0) \
+ do { \
+ t0 = _mm256_blend_epi32(m1, m5, 0x33); \
+ t1 = _mm256_unpackhi_epi64(m3, m4); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_2_4(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m7, m3); \
+ t1 = _mm256_alignr_epi8(m2, m0, 8); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_3_1(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m3, m1); \
+ t1 = _mm256_unpackhi_epi64(m6, m5); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_3_2(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m4, m0); \
+ t1 = _mm256_unpacklo_epi64(m6, m7); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_3_3(b0) \
+ do { \
+ t0 = _mm256_blend_epi32(m2, m1, 0x33); \
+ t1 = _mm256_blend_epi32(m7, m2, 0x33); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_3_4(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m3, m5); \
+ t1 = _mm256_unpacklo_epi64(m0, m4); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_4_1(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m4, m2); \
+ t1 = _mm256_unpacklo_epi64(m1, m5); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_4_2(b0) \
+ do { \
+ t0 = _mm256_blend_epi32(m3, m0, 0x33); \
+ t1 = _mm256_blend_epi32(m7, m2, 0x33); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_4_3(b0) \
+ do { \
+ t0 = _mm256_blend_epi32(m5, m7, 0x33); \
+ t1 = _mm256_blend_epi32(m1, m3, 0x33); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_4_4(b0) \
+ do { \
+ t0 = _mm256_alignr_epi8(m6, m0, 8); \
+ t1 = _mm256_blend_epi32(m6, m4, 0x33); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_5_1(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m1, m3); \
+ t1 = _mm256_unpacklo_epi64(m0, m4); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_5_2(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m6, m5); \
+ t1 = _mm256_unpackhi_epi64(m5, m1); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_5_3(b0) \
+ do { \
+ t0 = _mm256_blend_epi32(m3, m2, 0x33); \
+ t1 = _mm256_unpackhi_epi64(m7, m0); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_5_4(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m6, m2); \
+ t1 = _mm256_blend_epi32(m4, m7, 0x33); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_6_1(b0) \
+ do { \
+ t0 = _mm256_blend_epi32(m0, m6, 0x33); \
+ t1 = _mm256_unpacklo_epi64(m7, m2); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_6_2(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m2, m7); \
+ t1 = _mm256_alignr_epi8(m5, m6, 8); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_6_3(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m0, m3); \
+ t1 = _mm256_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_6_4(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m3, m1); \
+ t1 = _mm256_blend_epi32(m5, m1, 0x33); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_7_1(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m6, m3); \
+ t1 = _mm256_blend_epi32(m1, m6, 0x33); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_7_2(b0) \
+ do { \
+ t0 = _mm256_alignr_epi8(m7, m5, 8); \
+ t1 = _mm256_unpackhi_epi64(m0, m4); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_7_3(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m2, m7); \
+ t1 = _mm256_unpacklo_epi64(m4, m1); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_7_4(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m0, m2); \
+ t1 = _mm256_unpacklo_epi64(m3, m5); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_8_1(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m3, m7); \
+ t1 = _mm256_alignr_epi8(m0, m5, 8); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_8_2(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m7, m4); \
+ t1 = _mm256_alignr_epi8(m4, m1, 8); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_8_3(b0) \
+ do { \
+ t0 = m6; \
+ t1 = _mm256_alignr_epi8(m5, m0, 8); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_8_4(b0) \
+ do { \
+ t0 = _mm256_blend_epi32(m3, m1, 0x33); \
+ t1 = m2; \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_9_1(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m5, m4); \
+ t1 = _mm256_unpackhi_epi64(m3, m0); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_9_2(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m1, m2); \
+ t1 = _mm256_blend_epi32(m2, m3, 0x33); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_9_3(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m7, m4); \
+ t1 = _mm256_unpackhi_epi64(m1, m6); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_9_4(b0) \
+ do { \
+ t0 = _mm256_alignr_epi8(m7, m5, 8); \
+ t1 = _mm256_unpacklo_epi64(m6, m0); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_10_1(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m0, m1); \
+ t1 = _mm256_unpacklo_epi64(m2, m3); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_10_2(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m0, m1); \
+ t1 = _mm256_unpackhi_epi64(m2, m3); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_10_3(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m4, m5); \
+ t1 = _mm256_unpacklo_epi64(m6, m7); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_10_4(b0) \
+ do { \
+ t0 = _mm256_unpackhi_epi64(m4, m5); \
+ t1 = _mm256_unpackhi_epi64(m6, m7); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_11_1(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m7, m2); \
+ t1 = _mm256_unpackhi_epi64(m4, m6); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_11_2(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m5, m4); \
+ t1 = _mm256_alignr_epi8(m3, m7, 8); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_11_3(b0) \
+ do { \
+ t0 = _mm256_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
+ t1 = _mm256_unpackhi_epi64(m5, m2); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#define BLAKE2B_LOAD_MSG_11_4(b0) \
+ do { \
+ t0 = _mm256_unpacklo_epi64(m6, m1); \
+ t1 = _mm256_unpackhi_epi64(m3, m1); \
+ b0 = _mm256_blend_epi32(t0, t1, 0xF0); \
+ } while (0)
+
+#endif
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-sse2.h b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-sse2.h
new file mode 100644
index 0000000000..8e67421aca
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-sse2.h
@@ -0,0 +1,164 @@
+/*
+ BLAKE2 reference source code package - optimized C implementations
+
+ Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ You should have received a copy of the CC0 Public Domain Dedication along
+ with
+ this software. If not, see
+ <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+
+#ifndef blake2b_load_sse2_H
+#define blake2b_load_sse2_H
+
+#define LOAD_MSG_0_1(b0, b1) \
+ b0 = _mm_set_epi64x(m2, m0); \
+ b1 = _mm_set_epi64x(m6, m4)
+#define LOAD_MSG_0_2(b0, b1) \
+ b0 = _mm_set_epi64x(m3, m1); \
+ b1 = _mm_set_epi64x(m7, m5)
+#define LOAD_MSG_0_3(b0, b1) \
+ b0 = _mm_set_epi64x(m10, m8); \
+ b1 = _mm_set_epi64x(m14, m12)
+#define LOAD_MSG_0_4(b0, b1) \
+ b0 = _mm_set_epi64x(m11, m9); \
+ b1 = _mm_set_epi64x(m15, m13)
+#define LOAD_MSG_1_1(b0, b1) \
+ b0 = _mm_set_epi64x(m4, m14); \
+ b1 = _mm_set_epi64x(m13, m9)
+#define LOAD_MSG_1_2(b0, b1) \
+ b0 = _mm_set_epi64x(m8, m10); \
+ b1 = _mm_set_epi64x(m6, m15)
+#define LOAD_MSG_1_3(b0, b1) \
+ b0 = _mm_set_epi64x(m0, m1); \
+ b1 = _mm_set_epi64x(m5, m11)
+#define LOAD_MSG_1_4(b0, b1) \
+ b0 = _mm_set_epi64x(m2, m12); \
+ b1 = _mm_set_epi64x(m3, m7)
+#define LOAD_MSG_2_1(b0, b1) \
+ b0 = _mm_set_epi64x(m12, m11); \
+ b1 = _mm_set_epi64x(m15, m5)
+#define LOAD_MSG_2_2(b0, b1) \
+ b0 = _mm_set_epi64x(m0, m8); \
+ b1 = _mm_set_epi64x(m13, m2)
+#define LOAD_MSG_2_3(b0, b1) \
+ b0 = _mm_set_epi64x(m3, m10); \
+ b1 = _mm_set_epi64x(m9, m7)
+#define LOAD_MSG_2_4(b0, b1) \
+ b0 = _mm_set_epi64x(m6, m14); \
+ b1 = _mm_set_epi64x(m4, m1)
+#define LOAD_MSG_3_1(b0, b1) \
+ b0 = _mm_set_epi64x(m3, m7); \
+ b1 = _mm_set_epi64x(m11, m13)
+#define LOAD_MSG_3_2(b0, b1) \
+ b0 = _mm_set_epi64x(m1, m9); \
+ b1 = _mm_set_epi64x(m14, m12)
+#define LOAD_MSG_3_3(b0, b1) \
+ b0 = _mm_set_epi64x(m5, m2); \
+ b1 = _mm_set_epi64x(m15, m4)
+#define LOAD_MSG_3_4(b0, b1) \
+ b0 = _mm_set_epi64x(m10, m6); \
+ b1 = _mm_set_epi64x(m8, m0)
+#define LOAD_MSG_4_1(b0, b1) \
+ b0 = _mm_set_epi64x(m5, m9); \
+ b1 = _mm_set_epi64x(m10, m2)
+#define LOAD_MSG_4_2(b0, b1) \
+ b0 = _mm_set_epi64x(m7, m0); \
+ b1 = _mm_set_epi64x(m15, m4)
+#define LOAD_MSG_4_3(b0, b1) \
+ b0 = _mm_set_epi64x(m11, m14); \
+ b1 = _mm_set_epi64x(m3, m6)
+#define LOAD_MSG_4_4(b0, b1) \
+ b0 = _mm_set_epi64x(m12, m1); \
+ b1 = _mm_set_epi64x(m13, m8)
+#define LOAD_MSG_5_1(b0, b1) \
+ b0 = _mm_set_epi64x(m6, m2); \
+ b1 = _mm_set_epi64x(m8, m0)
+#define LOAD_MSG_5_2(b0, b1) \
+ b0 = _mm_set_epi64x(m10, m12); \
+ b1 = _mm_set_epi64x(m3, m11)
+#define LOAD_MSG_5_3(b0, b1) \
+ b0 = _mm_set_epi64x(m7, m4); \
+ b1 = _mm_set_epi64x(m1, m15)
+#define LOAD_MSG_5_4(b0, b1) \
+ b0 = _mm_set_epi64x(m5, m13); \
+ b1 = _mm_set_epi64x(m9, m14)
+#define LOAD_MSG_6_1(b0, b1) \
+ b0 = _mm_set_epi64x(m1, m12); \
+ b1 = _mm_set_epi64x(m4, m14)
+#define LOAD_MSG_6_2(b0, b1) \
+ b0 = _mm_set_epi64x(m15, m5); \
+ b1 = _mm_set_epi64x(m10, m13)
+#define LOAD_MSG_6_3(b0, b1) \
+ b0 = _mm_set_epi64x(m6, m0); \
+ b1 = _mm_set_epi64x(m8, m9)
+#define LOAD_MSG_6_4(b0, b1) \
+ b0 = _mm_set_epi64x(m3, m7); \
+ b1 = _mm_set_epi64x(m11, m2)
+#define LOAD_MSG_7_1(b0, b1) \
+ b0 = _mm_set_epi64x(m7, m13); \
+ b1 = _mm_set_epi64x(m3, m12)
+#define LOAD_MSG_7_2(b0, b1) \
+ b0 = _mm_set_epi64x(m14, m11); \
+ b1 = _mm_set_epi64x(m9, m1)
+#define LOAD_MSG_7_3(b0, b1) \
+ b0 = _mm_set_epi64x(m15, m5); \
+ b1 = _mm_set_epi64x(m2, m8)
+#define LOAD_MSG_7_4(b0, b1) \
+ b0 = _mm_set_epi64x(m4, m0); \
+ b1 = _mm_set_epi64x(m10, m6)
+#define LOAD_MSG_8_1(b0, b1) \
+ b0 = _mm_set_epi64x(m14, m6); \
+ b1 = _mm_set_epi64x(m0, m11)
+#define LOAD_MSG_8_2(b0, b1) \
+ b0 = _mm_set_epi64x(m9, m15); \
+ b1 = _mm_set_epi64x(m8, m3)
+#define LOAD_MSG_8_3(b0, b1) \
+ b0 = _mm_set_epi64x(m13, m12); \
+ b1 = _mm_set_epi64x(m10, m1)
+#define LOAD_MSG_8_4(b0, b1) \
+ b0 = _mm_set_epi64x(m7, m2); \
+ b1 = _mm_set_epi64x(m5, m4)
+#define LOAD_MSG_9_1(b0, b1) \
+ b0 = _mm_set_epi64x(m8, m10); \
+ b1 = _mm_set_epi64x(m1, m7)
+#define LOAD_MSG_9_2(b0, b1) \
+ b0 = _mm_set_epi64x(m4, m2); \
+ b1 = _mm_set_epi64x(m5, m6)
+#define LOAD_MSG_9_3(b0, b1) \
+ b0 = _mm_set_epi64x(m9, m15); \
+ b1 = _mm_set_epi64x(m13, m3)
+#define LOAD_MSG_9_4(b0, b1) \
+ b0 = _mm_set_epi64x(m14, m11); \
+ b1 = _mm_set_epi64x(m0, m12)
+#define LOAD_MSG_10_1(b0, b1) \
+ b0 = _mm_set_epi64x(m2, m0); \
+ b1 = _mm_set_epi64x(m6, m4)
+#define LOAD_MSG_10_2(b0, b1) \
+ b0 = _mm_set_epi64x(m3, m1); \
+ b1 = _mm_set_epi64x(m7, m5)
+#define LOAD_MSG_10_3(b0, b1) \
+ b0 = _mm_set_epi64x(m10, m8); \
+ b1 = _mm_set_epi64x(m14, m12)
+#define LOAD_MSG_10_4(b0, b1) \
+ b0 = _mm_set_epi64x(m11, m9); \
+ b1 = _mm_set_epi64x(m15, m13)
+#define LOAD_MSG_11_1(b0, b1) \
+ b0 = _mm_set_epi64x(m4, m14); \
+ b1 = _mm_set_epi64x(m13, m9)
+#define LOAD_MSG_11_2(b0, b1) \
+ b0 = _mm_set_epi64x(m8, m10); \
+ b1 = _mm_set_epi64x(m6, m15)
+#define LOAD_MSG_11_3(b0, b1) \
+ b0 = _mm_set_epi64x(m0, m1); \
+ b1 = _mm_set_epi64x(m5, m11)
+#define LOAD_MSG_11_4(b0, b1) \
+ b0 = _mm_set_epi64x(m2, m12); \
+ b1 = _mm_set_epi64x(m3, m7)
+
+#endif
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-sse41.h b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-sse41.h
new file mode 100644
index 0000000000..31745fc139
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-load-sse41.h
@@ -0,0 +1,307 @@
+/*
+ BLAKE2 reference source code package - optimized C implementations
+
+ Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ You should have received a copy of the CC0 Public Domain Dedication along
+ with
+ this software. If not, see
+ <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+
+#ifndef blake2b_load_sse41_H
+#define blake2b_load_sse41_H
+
+#define LOAD_MSG_0_1(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m0, m1); \
+ b1 = _mm_unpacklo_epi64(m2, m3); \
+ } while (0)
+
+#define LOAD_MSG_0_2(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m0, m1); \
+ b1 = _mm_unpackhi_epi64(m2, m3); \
+ } while (0)
+
+#define LOAD_MSG_0_3(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m4, m5); \
+ b1 = _mm_unpacklo_epi64(m6, m7); \
+ } while (0)
+
+#define LOAD_MSG_0_4(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m4, m5); \
+ b1 = _mm_unpackhi_epi64(m6, m7); \
+ } while (0)
+
+#define LOAD_MSG_1_1(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m7, m2); \
+ b1 = _mm_unpackhi_epi64(m4, m6); \
+ } while (0)
+
+#define LOAD_MSG_1_2(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m5, m4); \
+ b1 = _mm_alignr_epi8(m3, m7, 8); \
+ } while (0)
+
+#define LOAD_MSG_1_3(b0, b1) \
+ do { \
+ b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
+ b1 = _mm_unpackhi_epi64(m5, m2); \
+ } while (0)
+
+#define LOAD_MSG_1_4(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m6, m1); \
+ b1 = _mm_unpackhi_epi64(m3, m1); \
+ } while (0)
+
+#define LOAD_MSG_2_1(b0, b1) \
+ do { \
+ b0 = _mm_alignr_epi8(m6, m5, 8); \
+ b1 = _mm_unpackhi_epi64(m2, m7); \
+ } while (0)
+
+#define LOAD_MSG_2_2(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m4, m0); \
+ b1 = _mm_blend_epi16(m1, m6, 0xF0); \
+ } while (0)
+
+#define LOAD_MSG_2_3(b0, b1) \
+ do { \
+ b0 = _mm_blend_epi16(m5, m1, 0xF0); \
+ b1 = _mm_unpackhi_epi64(m3, m4); \
+ } while (0)
+
+#define LOAD_MSG_2_4(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m7, m3); \
+ b1 = _mm_alignr_epi8(m2, m0, 8); \
+ } while (0)
+
+#define LOAD_MSG_3_1(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m3, m1); \
+ b1 = _mm_unpackhi_epi64(m6, m5); \
+ } while (0)
+
+#define LOAD_MSG_3_2(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m4, m0); \
+ b1 = _mm_unpacklo_epi64(m6, m7); \
+ } while (0)
+
+#define LOAD_MSG_3_3(b0, b1) \
+ do { \
+ b0 = _mm_blend_epi16(m1, m2, 0xF0); \
+ b1 = _mm_blend_epi16(m2, m7, 0xF0); \
+ } while (0)
+
+#define LOAD_MSG_3_4(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m3, m5); \
+ b1 = _mm_unpacklo_epi64(m0, m4); \
+ } while (0)
+
+#define LOAD_MSG_4_1(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m4, m2); \
+ b1 = _mm_unpacklo_epi64(m1, m5); \
+ } while (0)
+
+#define LOAD_MSG_4_2(b0, b1) \
+ do { \
+ b0 = _mm_blend_epi16(m0, m3, 0xF0); \
+ b1 = _mm_blend_epi16(m2, m7, 0xF0); \
+ } while (0)
+
+#define LOAD_MSG_4_3(b0, b1) \
+ do { \
+ b0 = _mm_blend_epi16(m7, m5, 0xF0); \
+ b1 = _mm_blend_epi16(m3, m1, 0xF0); \
+ } while (0)
+
+#define LOAD_MSG_4_4(b0, b1) \
+ do { \
+ b0 = _mm_alignr_epi8(m6, m0, 8); \
+ b1 = _mm_blend_epi16(m4, m6, 0xF0); \
+ } while (0)
+
+#define LOAD_MSG_5_1(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m1, m3); \
+ b1 = _mm_unpacklo_epi64(m0, m4); \
+ } while (0)
+
+#define LOAD_MSG_5_2(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m6, m5); \
+ b1 = _mm_unpackhi_epi64(m5, m1); \
+ } while (0)
+
+#define LOAD_MSG_5_3(b0, b1) \
+ do { \
+ b0 = _mm_blend_epi16(m2, m3, 0xF0); \
+ b1 = _mm_unpackhi_epi64(m7, m0); \
+ } while (0)
+
+#define LOAD_MSG_5_4(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m6, m2); \
+ b1 = _mm_blend_epi16(m7, m4, 0xF0); \
+ } while (0)
+
+#define LOAD_MSG_6_1(b0, b1) \
+ do { \
+ b0 = _mm_blend_epi16(m6, m0, 0xF0); \
+ b1 = _mm_unpacklo_epi64(m7, m2); \
+ } while (0)
+
+#define LOAD_MSG_6_2(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m2, m7); \
+ b1 = _mm_alignr_epi8(m5, m6, 8); \
+ } while (0)
+
+#define LOAD_MSG_6_3(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m0, m3); \
+ b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \
+ } while (0)
+
+#define LOAD_MSG_6_4(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m3, m1); \
+ b1 = _mm_blend_epi16(m1, m5, 0xF0); \
+ } while (0)
+
+#define LOAD_MSG_7_1(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m6, m3); \
+ b1 = _mm_blend_epi16(m6, m1, 0xF0); \
+ } while (0)
+
+#define LOAD_MSG_7_2(b0, b1) \
+ do { \
+ b0 = _mm_alignr_epi8(m7, m5, 8); \
+ b1 = _mm_unpackhi_epi64(m0, m4); \
+ } while (0)
+
+#define LOAD_MSG_7_3(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m2, m7); \
+ b1 = _mm_unpacklo_epi64(m4, m1); \
+ } while (0)
+
+#define LOAD_MSG_7_4(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m0, m2); \
+ b1 = _mm_unpacklo_epi64(m3, m5); \
+ } while (0)
+
+#define LOAD_MSG_8_1(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m3, m7); \
+ b1 = _mm_alignr_epi8(m0, m5, 8); \
+ } while (0)
+
+#define LOAD_MSG_8_2(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m7, m4); \
+ b1 = _mm_alignr_epi8(m4, m1, 8); \
+ } while (0)
+
+#define LOAD_MSG_8_3(b0, b1) \
+ do { \
+ b0 = m6; \
+ b1 = _mm_alignr_epi8(m5, m0, 8); \
+ } while (0)
+
+#define LOAD_MSG_8_4(b0, b1) \
+ do { \
+ b0 = _mm_blend_epi16(m1, m3, 0xF0); \
+ b1 = m2; \
+ } while (0)
+
+#define LOAD_MSG_9_1(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m5, m4); \
+ b1 = _mm_unpackhi_epi64(m3, m0); \
+ } while (0)
+
+#define LOAD_MSG_9_2(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m1, m2); \
+ b1 = _mm_blend_epi16(m3, m2, 0xF0); \
+ } while (0)
+
+#define LOAD_MSG_9_3(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m7, m4); \
+ b1 = _mm_unpackhi_epi64(m1, m6); \
+ } while (0)
+
+#define LOAD_MSG_9_4(b0, b1) \
+ do { \
+ b0 = _mm_alignr_epi8(m7, m5, 8); \
+ b1 = _mm_unpacklo_epi64(m6, m0); \
+ } while (0)
+
+#define LOAD_MSG_10_1(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m0, m1); \
+ b1 = _mm_unpacklo_epi64(m2, m3); \
+ } while (0)
+
+#define LOAD_MSG_10_2(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m0, m1); \
+ b1 = _mm_unpackhi_epi64(m2, m3); \
+ } while (0)
+
+#define LOAD_MSG_10_3(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m4, m5); \
+ b1 = _mm_unpacklo_epi64(m6, m7); \
+ } while (0)
+
+#define LOAD_MSG_10_4(b0, b1) \
+ do { \
+ b0 = _mm_unpackhi_epi64(m4, m5); \
+ b1 = _mm_unpackhi_epi64(m6, m7); \
+ } while (0)
+
+#define LOAD_MSG_11_1(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m7, m2); \
+ b1 = _mm_unpackhi_epi64(m4, m6); \
+ } while (0)
+
+#define LOAD_MSG_11_2(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m5, m4); \
+ b1 = _mm_alignr_epi8(m3, m7, 8); \
+ } while (0)
+
+#define LOAD_MSG_11_3(b0, b1) \
+ do { \
+ b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
+ b1 = _mm_unpackhi_epi64(m5, m2); \
+ } while (0)
+
+#define LOAD_MSG_11_4(b0, b1) \
+ do { \
+ b0 = _mm_unpacklo_epi64(m6, m1); \
+ b1 = _mm_unpackhi_epi64(m3, m1); \
+ } while (0)
+
+#endif
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-ref.c b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-ref.c
new file mode 100644
index 0000000000..91435a1b16
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/blake2b-ref.c
@@ -0,0 +1,436 @@
+/*
+ BLAKE2 reference source code package - C implementations
+
+ Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
+
+ To the extent possible under law, the author(s) have dedicated all copyright
+ and related and neighboring rights to this software to the public domain
+ worldwide. This software is distributed without any warranty.
+
+ You should have received a copy of the CC0 Public Domain Dedication along
+ with
+ this software. If not, see
+ <http://creativecommons.org/publicdomain/zero/1.0/>.
+*/
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "blake2.h"
+#include "core.h"
+#include "private/common.h"
+#include "runtime.h"
+#include "utils.h"
+
+static blake2b_compress_fn blake2b_compress = blake2b_compress_ref;
+
+static const uint64_t blake2b_IV[8] = {
+ 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL,
+ 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
+ 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
+};
+
+/* LCOV_EXCL_START */
+static inline int
+blake2b_set_lastnode(blake2b_state *S)
+{
+ S->f[1] = -1;
+ return 0;
+}
+/* LCOV_EXCL_STOP */
+
+static inline int
+blake2b_is_lastblock(const blake2b_state *S)
+{
+ return S->f[0] != 0;
+}
+
+static inline int
+blake2b_set_lastblock(blake2b_state *S)
+{
+ if (S->last_node)
+ blake2b_set_lastnode(S);
+
+ S->f[0] = -1;
+ return 0;
+}
+
+static inline int
+blake2b_increment_counter(blake2b_state *S, const uint64_t inc)
+{
+#ifdef HAVE_TI_MODE
+ uint128_t t = ((uint128_t) S->t[1] << 64) | S->t[0];
+ t += inc;
+ S->t[0] = (uint64_t)(t >> 0);
+ S->t[1] = (uint64_t)(t >> 64);
+#else
+ S->t[0] += inc;
+ S->t[1] += (S->t[0] < inc);
+#endif
+ return 0;
+}
+
+/* Parameter-related functions */
+static inline int
+blake2b_param_set_salt(blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES])
+{
+ memcpy(P->salt, salt, BLAKE2B_SALTBYTES);
+ return 0;
+}
+
+static inline int
+blake2b_param_set_personal(blake2b_param *P,
+ const uint8_t personal[BLAKE2B_PERSONALBYTES])
+{
+ memcpy(P->personal, personal, BLAKE2B_PERSONALBYTES);
+ return 0;
+}
+
+static inline int
+blake2b_init0(blake2b_state *S)
+{
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ S->h[i] = blake2b_IV[i];
+ }
+ memset(S->t, 0, offsetof(blake2b_state, last_node) + sizeof(S->last_node)
+ - offsetof(blake2b_state, t));
+ return 0;
+}
+
+/* init xors IV with input parameter block */
+int
+blake2b_init_param(blake2b_state *S, const blake2b_param *P)
+{
+ size_t i;
+ const uint8_t *p;
+
+ COMPILER_ASSERT(sizeof *P == 64);
+ blake2b_init0(S);
+ p = (const uint8_t *) (P);
+
+ /* IV XOR ParamBlock */
+ for (i = 0; i < 8; i++) {
+ S->h[i] ^= LOAD64_LE(p + sizeof(S->h[i]) * i);
+ }
+ return 0;
+}
+
+int
+blake2b_init(blake2b_state *S, const uint8_t outlen)
+{
+ blake2b_param P[1];
+
+ if ((!outlen) || (outlen > BLAKE2B_OUTBYTES)) {
+ sodium_misuse();
+ }
+ P->digest_length = outlen;
+ P->key_length = 0;
+ P->fanout = 1;
+ P->depth = 1;
+ STORE32_LE(P->leaf_length, 0);
+ STORE64_LE(P->node_offset, 0);
+ P->node_depth = 0;
+ P->inner_length = 0;
+ memset(P->reserved, 0, sizeof(P->reserved));
+ memset(P->salt, 0, sizeof(P->salt));
+ memset(P->personal, 0, sizeof(P->personal));
+ return blake2b_init_param(S, P);
+}
+
+int
+blake2b_init_salt_personal(blake2b_state *S, const uint8_t outlen,
+ const void *salt, const void *personal)
+{
+ blake2b_param P[1];
+
+ if ((!outlen) || (outlen > BLAKE2B_OUTBYTES)) {
+ sodium_misuse();
+ }
+ P->digest_length = outlen;
+ P->key_length = 0;
+ P->fanout = 1;
+ P->depth = 1;
+ STORE32_LE(P->leaf_length, 0);
+ STORE64_LE(P->node_offset, 0);
+ P->node_depth = 0;
+ P->inner_length = 0;
+ memset(P->reserved, 0, sizeof(P->reserved));
+ if (salt != NULL) {
+ blake2b_param_set_salt(P, (const uint8_t *) salt);
+ } else {
+ memset(P->salt, 0, sizeof(P->salt));
+ }
+ if (personal != NULL) {
+ blake2b_param_set_personal(P, (const uint8_t *) personal);
+ } else {
+ memset(P->personal, 0, sizeof(P->personal));
+ }
+ return blake2b_init_param(S, P);
+}
+
+int
+blake2b_init_key(blake2b_state *S, const uint8_t outlen, const void *key,
+ const uint8_t keylen)
+{
+ blake2b_param P[1];
+
+ if ((!outlen) || (outlen > BLAKE2B_OUTBYTES)) {
+ sodium_misuse();
+ }
+ if (!key || !keylen || keylen > BLAKE2B_KEYBYTES) {
+ sodium_misuse();
+ }
+ P->digest_length = outlen;
+ P->key_length = keylen;
+ P->fanout = 1;
+ P->depth = 1;
+ STORE32_LE(P->leaf_length, 0);
+ STORE64_LE(P->node_offset, 0);
+ P->node_depth = 0;
+ P->inner_length = 0;
+ memset(P->reserved, 0, sizeof(P->reserved));
+ memset(P->salt, 0, sizeof(P->salt));
+ memset(P->personal, 0, sizeof(P->personal));
+
+ if (blake2b_init_param(S, P) < 0) {
+ sodium_misuse();
+ }
+ {
+ uint8_t block[BLAKE2B_BLOCKBYTES];
+ memset(block, 0, BLAKE2B_BLOCKBYTES);
+ memcpy(block, key, keylen); /* keylen cannot be 0 */
+ blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
+ sodium_memzero(block, BLAKE2B_BLOCKBYTES); /* Burn the key from stack */
+ }
+ return 0;
+}
+
+int
+blake2b_init_key_salt_personal(blake2b_state *S, const uint8_t outlen,
+ const void *key, const uint8_t keylen,
+ const void *salt, const void *personal)
+{
+ blake2b_param P[1];
+
+ if ((!outlen) || (outlen > BLAKE2B_OUTBYTES)) {
+ sodium_misuse();
+ }
+ if (!key || !keylen || keylen > BLAKE2B_KEYBYTES) {
+ sodium_misuse();
+ }
+ P->digest_length = outlen;
+ P->key_length = keylen;
+ P->fanout = 1;
+ P->depth = 1;
+ STORE32_LE(P->leaf_length, 0);
+ STORE64_LE(P->node_offset, 0);
+ P->node_depth = 0;
+ P->inner_length = 0;
+ memset(P->reserved, 0, sizeof(P->reserved));
+ if (salt != NULL) {
+ blake2b_param_set_salt(P, (const uint8_t *) salt);
+ } else {
+ memset(P->salt, 0, sizeof(P->salt));
+ }
+ if (personal != NULL) {
+ blake2b_param_set_personal(P, (const uint8_t *) personal);
+ } else {
+ memset(P->personal, 0, sizeof(P->personal));
+ }
+
+ if (blake2b_init_param(S, P) < 0) {
+ sodium_misuse();
+ }
+ {
+ uint8_t block[BLAKE2B_BLOCKBYTES];
+ memset(block, 0, BLAKE2B_BLOCKBYTES);
+ memcpy(block, key, keylen); /* keylen cannot be 0 */
+ blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
+ sodium_memzero(block, BLAKE2B_BLOCKBYTES); /* Burn the key from stack */
+ }
+ return 0;
+}
+
+/* inlen now in bytes */
+int
+blake2b_update(blake2b_state *S, const uint8_t *in, uint64_t inlen)
+{
+ while (inlen > 0) {
+ size_t left = S->buflen;
+ size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
+
+ if (inlen > fill) {
+ memcpy(S->buf + left, in, fill); /* Fill buffer */
+ S->buflen += fill;
+ blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+ blake2b_compress(S, S->buf); /* Compress */
+ memcpy(S->buf, S->buf + BLAKE2B_BLOCKBYTES,
+ BLAKE2B_BLOCKBYTES); /* Shift buffer left */
+ S->buflen -= BLAKE2B_BLOCKBYTES;
+ in += fill;
+ inlen -= fill;
+ } else /* inlen <= fill */
+ {
+ memcpy(S->buf + left, in, inlen);
+ S->buflen += inlen; /* Be lazy, do not compress */
+ in += inlen;
+ inlen -= inlen;
+ }
+ }
+
+ return 0;
+}
+
+int
+blake2b_final(blake2b_state *S, uint8_t *out, uint8_t outlen)
+{
+ unsigned char buffer[BLAKE2B_OUTBYTES];
+
+ if (!outlen || outlen > BLAKE2B_OUTBYTES) {
+ sodium_misuse();
+ }
+ if (blake2b_is_lastblock(S)) {
+ return -1;
+ }
+ if (S->buflen > BLAKE2B_BLOCKBYTES) {
+ blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
+ blake2b_compress(S, S->buf);
+ S->buflen -= BLAKE2B_BLOCKBYTES;
+ assert(S->buflen <= BLAKE2B_BLOCKBYTES);
+ memcpy(S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen);
+ }
+
+ blake2b_increment_counter(S, S->buflen);
+ blake2b_set_lastblock(S);
+ memset(S->buf + S->buflen, 0,
+ 2 * BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
+ blake2b_compress(S, S->buf);
+
+ COMPILER_ASSERT(sizeof buffer == 64U);
+ STORE64_LE(buffer + 8 * 0, S->h[0]);
+ STORE64_LE(buffer + 8 * 1, S->h[1]);
+ STORE64_LE(buffer + 8 * 2, S->h[2]);
+ STORE64_LE(buffer + 8 * 3, S->h[3]);
+ STORE64_LE(buffer + 8 * 4, S->h[4]);
+ STORE64_LE(buffer + 8 * 5, S->h[5]);
+ STORE64_LE(buffer + 8 * 6, S->h[6]);
+ STORE64_LE(buffer + 8 * 7, S->h[7]);
+ memcpy(out, buffer, outlen); /* outlen <= BLAKE2B_OUTBYTES (64) */
+
+ sodium_memzero(S->h, sizeof S->h);
+ sodium_memzero(S->buf, sizeof S->buf);
+
+ return 0;
+}
+
+/* inlen, at least, should be uint64_t. Others can be size_t. */
+int
+blake2b(uint8_t *out, const void *in, const void *key, const uint8_t outlen,
+ const uint64_t inlen, uint8_t keylen)
+{
+ blake2b_state S[1];
+
+ /* Verify parameters */
+ if (NULL == in && inlen > 0) {
+ sodium_misuse();
+ }
+ if (NULL == out) {
+ sodium_misuse();
+ }
+ if (!outlen || outlen > BLAKE2B_OUTBYTES) {
+ sodium_misuse();
+ }
+ if (NULL == key && keylen > 0) {
+ sodium_misuse();
+ }
+ if (keylen > BLAKE2B_KEYBYTES) {
+ sodium_misuse();
+ }
+ if (keylen > 0) {
+ if (blake2b_init_key(S, outlen, key, keylen) < 0) {
+ sodium_misuse();
+ }
+ } else {
+ if (blake2b_init(S, outlen) < 0) {
+ sodium_misuse();
+ }
+ }
+
+ blake2b_update(S, (const uint8_t *) in, inlen);
+ blake2b_final(S, out, outlen);
+ return 0;
+}
+
+int
+blake2b_salt_personal(uint8_t *out, const void *in, const void *key,
+ const uint8_t outlen, const uint64_t inlen,
+ uint8_t keylen, const void *salt, const void *personal)
+{
+ blake2b_state S[1];
+
+ /* Verify parameters */
+ if (NULL == in && inlen > 0) {
+ sodium_misuse();
+ }
+ if (NULL == out) {
+ sodium_misuse();
+ }
+ if (!outlen || outlen > BLAKE2B_OUTBYTES) {
+ sodium_misuse();
+ }
+ if (NULL == key && keylen > 0) {
+ sodium_misuse();
+ }
+ if (keylen > BLAKE2B_KEYBYTES) {
+ sodium_misuse();
+ }
+ if (keylen > 0) {
+ if (blake2b_init_key_salt_personal(S, outlen, key, keylen, salt,
+ personal) < 0) {
+ sodium_misuse();
+ }
+ } else {
+ if (blake2b_init_salt_personal(S, outlen, salt, personal) < 0) {
+ sodium_misuse();
+ }
+ }
+
+ blake2b_update(S, (const uint8_t *) in, inlen);
+ blake2b_final(S, out, outlen);
+ return 0;
+}
+
+int
+blake2b_pick_best_implementation(void)
+{
+/* LCOV_EXCL_START */
+#if defined(HAVE_AVX2INTRIN_H) && defined(HAVE_TMMINTRIN_H) && \
+ defined(HAVE_SMMINTRIN_H)
+ if (sodium_runtime_has_avx2()) {
+ blake2b_compress = blake2b_compress_avx2;
+ return 0;
+ }
+#endif
+#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && \
+ defined(HAVE_SMMINTRIN_H)
+ if (sodium_runtime_has_sse41()) {
+ blake2b_compress = blake2b_compress_sse41;
+ return 0;
+ }
+#endif
+#if defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H)
+ if (sodium_runtime_has_ssse3()) {
+ blake2b_compress = blake2b_compress_ssse3;
+ return 0;
+ }
+#endif
+ blake2b_compress = blake2b_compress_ref;
+
+ return 0;
+ /* LCOV_EXCL_STOP */
+}
diff --git a/libs/libsodium/src/crypto_generichash/blake2b/ref/generichash_blake2b.c b/libs/libsodium/src/crypto_generichash/blake2b/ref/generichash_blake2b.c
new file mode 100644
index 0000000000..4bd0855006
--- /dev/null
+++ b/libs/libsodium/src/crypto_generichash/blake2b/ref/generichash_blake2b.c
@@ -0,0 +1,111 @@
+
+#include <assert.h>
+#include <limits.h>
+#include <stdint.h>
+
+#include "blake2.h"
+#include "crypto_generichash_blake2b.h"
+#include "private/implementations.h"
+
+int
+crypto_generichash_blake2b(unsigned char *out, size_t outlen,
+ const unsigned char *in, unsigned long long inlen,
+ const unsigned char *key, size_t keylen)
+{
+ if (outlen <= 0U || outlen > BLAKE2B_OUTBYTES ||
+ keylen > BLAKE2B_KEYBYTES || inlen > UINT64_MAX) {
+ return -1;
+ }
+ assert(outlen <= UINT8_MAX);
+ assert(keylen <= UINT8_MAX);
+
+ return blake2b((uint8_t *) out, in, key, (uint8_t) outlen, (uint64_t) inlen,
+ (uint8_t) keylen);
+}
+
+int
+crypto_generichash_blake2b_salt_personal(
+ unsigned char *out, size_t outlen, const unsigned char *in,
+ unsigned long long inlen, const unsigned char *key, size_t keylen,
+ const unsigned char *salt, const unsigned char *personal)
+{
+ if (outlen <= 0U || outlen > BLAKE2B_OUTBYTES ||
+ keylen > BLAKE2B_KEYBYTES || inlen > UINT64_MAX) {
+ return -1;
+ }
+ assert(outlen <= UINT8_MAX);
+ assert(keylen <= UINT8_MAX);
+
+ return blake2b_salt_personal((uint8_t *) out, in, key, (uint8_t) outlen,
+ (uint64_t) inlen, (uint8_t) keylen, salt,
+ personal);
+}
+
+int
+crypto_generichash_blake2b_init(crypto_generichash_blake2b_state *state,
+ const unsigned char *key, const size_t keylen,
+ const size_t outlen)
+{
+ if (outlen <= 0U || outlen > BLAKE2B_OUTBYTES ||
+ keylen > BLAKE2B_KEYBYTES) {
+ return -1;
+ }
+ assert(outlen <= UINT8_MAX);
+ assert(keylen <= UINT8_MAX);
+ if (key == NULL || keylen <= 0U) {
+ if (blake2b_init(state, (uint8_t) outlen) != 0) {
+ return -1; /* LCOV_EXCL_LINE */
+ }
+ } else if (blake2b_init_key(state, (uint8_t) outlen, key,
+ (uint8_t) keylen) != 0) {
+ return -1; /* LCOV_EXCL_LINE */
+ }
+ return 0;
+}
+
+int
+crypto_generichash_blake2b_init_salt_personal(
+ crypto_generichash_blake2b_state *state, const unsigned char *key,
+ const size_t keylen, const size_t outlen, const unsigned char *salt,
+ const unsigned char *personal)
+{
+ if (outlen <= 0U || outlen > BLAKE2B_OUTBYTES ||
+ keylen > BLAKE2B_KEYBYTES) {
+ return -1;
+ }
+ assert(outlen <= UINT8_MAX);
+ assert(keylen <= UINT8_MAX);
+ if (key == NULL || keylen <= 0U) {
+ if (blake2b_init_salt_personal(state, (uint8_t) outlen, salt,
+ personal) != 0) {
+ return -1; /* LCOV_EXCL_LINE */
+ }
+ } else if (blake2b_init_key_salt_personal(state, (uint8_t) outlen, key,
+ (uint8_t) keylen, salt,
+ personal) != 0) {
+ return -1; /* LCOV_EXCL_LINE */
+ }
+ return 0;
+}
+
+int
+crypto_generichash_blake2b_update(crypto_generichash_blake2b_state *state,
+ const unsigned char *in,
+ unsigned long long inlen)
+{
+ return blake2b_update(state, (const uint8_t *) in, (uint64_t) inlen);
+}
+
+int
+crypto_generichash_blake2b_final(crypto_generichash_blake2b_state *state,
+ unsigned char *out, const size_t outlen)
+{
+ assert(outlen <= UINT8_MAX);
+ return blake2b_final(state, (uint8_t *) out, (uint8_t) outlen);
+}
+
+int
+_crypto_generichash_blake2b_pick_best_implementation(void)
+{
+ return blake2b_pick_best_implementation();
+}