summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-10-30 19:19:15 +0200
committerGitHub <noreply@github.com>2023-10-30 19:19:15 +0200
commit207b51900e15cc7f89763a3bb1c565fe11cbb45d (patch)
tree0e37b11eafe495bfc66893f70e1e471bf52c3cd4
parent6e08281e588bbba1a5d180290a94a43f167f3a1a (diff)
ggml : move FP16 <-> FP32 code to ggml-impl.h (#3861)
* ggml : move FP16 <-> FP32 stuff to ggml-impl.h ggml-ci * tests : fix ARM build * ggml : explicitly initialize deprecated type traits * ggml : add math.h to ggml-impl.h * ggml : remove duplicate static assert macros * ggml : prefix lookup tables with ggml_ ggml-ci * ggml-impl : move extern "C" to start of file
-rw-r--r--ggml-impl.h237
-rw-r--r--ggml-quants.c350
-rw-r--r--ggml-quants.h14
-rw-r--r--ggml.c282
-rw-r--r--llama.cpp2
-rw-r--r--tests/test-double-float.cpp2
-rw-r--r--tests/test-quantize-fns.cpp7
7 files changed, 470 insertions, 424 deletions
diff --git a/ggml-impl.h b/ggml-impl.h
new file mode 100644
index 00000000..5ec18a50
--- /dev/null
+++ b/ggml-impl.h
@@ -0,0 +1,237 @@
+#pragma once
+
+#include "ggml.h"
+
+// GGML internal header
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h> // memcpy
+#include <math.h> // fabsf
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// static_assert should be a #define, but if it's not,
+// fall back to the _Static_assert C11 keyword.
+// if C99 - static_assert is noop
+// ref: https://stackoverflow.com/a/53923785/4039976
+#ifndef static_assert
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
+#define static_assert(cond, msg) _Static_assert(cond, msg)
+#else
+#define static_assert(cond, msg) struct global_scope_noop_trick
+#endif
+#endif
+
+// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
+#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
+#ifndef __FMA__
+#define __FMA__
+#endif
+#ifndef __F16C__
+#define __F16C__
+#endif
+#ifndef __SSE3__
+#define __SSE3__
+#endif
+#endif
+
+#undef MIN
+#undef MAX
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+// 16-bit float
+// on Arm, we use __fp16
+// on x86, we use uint16_t
+#if defined(__ARM_NEON) && !defined(_MSC_VER)
+
+// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
+//
+// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
+//
+#include <arm_neon.h>
+
+#define GGML_COMPUTE_FP16_TO_FP32(x) ((float) (x))
+#define GGML_COMPUTE_FP32_TO_FP16(x) (x)
+
+#define GGML_FP16_TO_FP32(x) ((float) (x))
+#define GGML_FP32_TO_FP16(x) (x)
+
+#else
+
+#ifdef __wasm_simd128__
+#include <wasm_simd128.h>
+#else
+#ifdef __POWER9_VECTOR__
+#include <altivec.h>
+#undef bool
+#define bool _Bool
+#else
+#if defined(_MSC_VER) || defined(__MINGW32__)
+#include <intrin.h>
+#else
+#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__)
+#if !defined(__riscv)
+#include <immintrin.h>
+#endif
+#endif
+#endif
+#endif
+#endif
+
+#ifdef __riscv_v_intrinsic
+#include <riscv_vector.h>
+#endif
+
+#ifdef __F16C__
+
+#ifdef _MSC_VER
+#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
+#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
+#else
+#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
+#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
+#endif
+
+#elif defined(__POWER9_VECTOR__)
+
+#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
+#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
+/* the inline asm below is about 12% faster than the lookup method */
+#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
+#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
+
+static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
+ register float f;
+ register double d;
+ __asm__(
+ "mtfprd %0,%2\n"
+ "xscvhpdp %0,%0\n"
+ "frsp %1,%0\n" :
+ /* temp */ "=d"(d),
+ /* out */ "=f"(f):
+ /* in */ "r"(h));
+ return f;
+}
+
+static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
+ register double d;
+ register ggml_fp16_t r;
+ __asm__( /* xscvdphp can work on double or single precision */
+ "xscvdphp %0,%2\n"
+ "mffprd %1,%0\n" :
+ /* temp */ "=d"(d),
+ /* out */ "=r"(r):
+ /* in */ "f"(f));
+ return r;
+}
+
+#else
+
+// FP16 <-> FP32
+// ref: https://github.com/Maratyszcza/FP16
+
+static inline float fp32_from_bits(uint32_t w) {
+ union {
+ uint32_t as_bits;
+ float as_value;
+ } fp32;
+ fp32.as_bits = w;
+ return fp32.as_value;
+}
+
+static inline uint32_t fp32_to_bits(float f) {
+ union {
+ float as_value;
+ uint32_t as_bits;
+ } fp32;
+ fp32.as_value = f;
+ return fp32.as_bits;
+}
+
+static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
+ const uint32_t w = (uint32_t) h << 16;
+ const uint32_t sign = w & UINT32_C(0x80000000);
+ const uint32_t two_w = w + w;
+
+ const uint32_t exp_offset = UINT32_C(0xE0) << 23;
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+ const float exp_scale = 0x1.0p-112f;
+#else
+ const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
+#endif
+ const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
+
+ const uint32_t magic_mask = UINT32_C(126) << 23;
+ const float magic_bias = 0.5f;
+ const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
+
+ const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
+ const uint32_t result = sign |
+ (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
+ return fp32_from_bits(result);
+}
+
+static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
+ const float scale_to_inf = 0x1.0p+112f;
+ const float scale_to_zero = 0x1.0p-110f;
+#else
+ const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
+ const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
+#endif
+ float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
+
+ const uint32_t w = fp32_to_bits(f);
+ const uint32_t shl1_w = w + w;
+ const uint32_t sign = w & UINT32_C(0x80000000);
+ uint32_t bias = shl1_w & UINT32_C(0xFF000000);
+ if (bias < UINT32_C(0x71000000)) {
+ bias = UINT32_C(0x71000000);
+ }
+
+ base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
+ const uint32_t bits = fp32_to_bits(base);
+ const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
+ const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
+ const uint32_t nonsign = exp_bits + mantissa_bits;
+ return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
+}
+
+#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
+#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
+
+#endif // __F16C__
+
+#endif // __ARM_NEON
+
+// precomputed f32 table for f16 (256 KB)
+// defined in ggml.c, initialized in ggml_init()
+extern float ggml_table_f32_f16[1 << 16];
+
+// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
+// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
+// This is also true for POWER9.
+#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
+
+inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
+ uint16_t s;
+ memcpy(&s, &f, sizeof(uint16_t));
+ return ggml_table_f32_f16[s];
+}
+
+#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
+#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
+
+#endif
+
+ // TODO: backend v2 PR
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/ggml-quants.c b/ggml-quants.c
index fd4ee1be..72159446 100644
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -1,5 +1,5 @@
#include "ggml-quants.h"
-#include "ggml.h"
+#include "ggml-impl.h"
#include <math.h>
#include <string.h>
@@ -352,7 +352,7 @@ void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict
const float d = max / -8;
const float id = d ? 1.0f/d : 0.0f;
- y[i].d = ggml_fp32_to_fp16(d);
+ y[i].d = GGML_FP32_TO_FP16(d);
for (int j = 0; j < qk/2; ++j) {
const float x0 = x[i*qk + 0 + j]*id;
@@ -392,8 +392,8 @@ void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict
const float d = (max - min) / ((1 << 4) - 1);
const float id = d ? 1.0f/d : 0.0f;
- y[i].d = ggml_fp32_to_fp16(d);
- y[i].m = ggml_fp32_to_fp16(min);
+ y[i].d = GGML_FP32_TO_FP16(d);
+ y[i].m = GGML_FP32_TO_FP16(min);
for (int j = 0; j < qk/2; ++j) {
const float x0 = (x[i*qk + 0 + j] - min)*id;
@@ -434,7 +434,7 @@ void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict
const float d = max / -16;
const float id = d ? 1.0f/d : 0.0f;
- y[i].d = ggml_fp32_to_fp16(d);
+ y[i].d = GGML_FP32_TO_FP16(d);
uint32_t qh = 0;
@@ -481,8 +481,8 @@ void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict
const float d = (max - min) / ((1 << 5) - 1);
const float id = d ? 1.0f/d : 0.0f;
- y[i].d = ggml_fp32_to_fp16(d);
- y[i].m = ggml_fp32_to_fp16(min);
+ y[i].d = GGML_FP32_TO_FP16(d);
+ y[i].m = GGML_FP32_TO_FP16(min);
uint32_t qh = 0;
@@ -524,7 +524,7 @@ void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict
const float d = amax / ((1 << 7) - 1);
const float id = d ? 1.0f/d : 0.0f;
- y[i].d = ggml_fp32_to_fp16(d);
+ y[i].d = GGML_FP32_TO_FP16(d);
for (int j = 0; j < QK8_0; ++j) {
const float x0 = x[i*QK8_0 + j]*id;
@@ -559,7 +559,7 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
const float d = amax / ((1 << 7) - 1);
const float id = d ? 1.0f/d : 0.0f;
- y[i].d = ggml_fp32_to_fp16(d);
+ y[i].d = GGML_FP32_TO_FP16(d);
for (int j = 0; j < 8; j++) {
const float32x4_t v = vmulq_n_f32(srcv[j], id);
@@ -592,7 +592,7 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
const float d = amax / ((1 << 7) - 1);
const float id = d ? 1.0f/d : 0.0f;
- y[i].d = ggml_fp32_to_fp16(d);
+ y[i].d = GGML_FP32_TO_FP16(d);
for (int j = 0; j < 8; j++) {
const v128_t v = wasm_f32x4_mul(srcv[j], wasm_f32x4_splat(id));
@@ -627,7 +627,7 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
// Quantize these floats
const float d = maxScalar / 127.f;
- y[i].d = ggml_fp32_to_fp16(d);
+ y[i].d = GGML_FP32_TO_FP16(d);
const float id = ( maxScalar != 0.0f ) ? 127.f / maxScalar : 0.0f;
const __m256 mul = _mm256_set1_ps( id );
@@ -704,7 +704,7 @@ void quantize_row_q8_0(const float * restrict x, void * restrict vy, int k) {
const float d = amax / ((1 << 7) - 1);
const float id = d ? 1.0f/d : 0.0f;
- y[i].d = ggml_fp32_to_fp16(d);
+ y[i].d = GGML_FP32_TO_FP16(d);
vfloat32m4_t x0 = __riscv_vfmul_vf_f32m4(v_x, id, vl);
@@ -982,7 +982,7 @@ void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int
const int nb = k / qk;
for (int i = 0; i < nb; i++) {
- const float d = ggml_fp16_to_fp32(x[i].d);
+ const float d = GGML_FP16_TO_FP32(x[i].d);
for (int j = 0; j < qk/2; ++j) {
const int x0 = (x[i].qs[j] & 0x0F) - 8;
@@ -1002,8 +1002,8 @@ void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int
const int nb = k / qk;
for (int i = 0; i < nb; i++) {
- const float d = ggml_fp16_to_fp32(x[i].d);
- const float m = ggml_fp16_to_fp32(x[i].m);
+ const float d = GGML_FP16_TO_FP32(x[i].d);
+ const float m = GGML_FP16_TO_FP32(x[i].m);
for (int j = 0; j < qk/2; ++j) {
const int x0 = (x[i].qs[j] & 0x0F);
@@ -1023,7 +1023,7 @@ void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int
const int nb = k / qk;
for (int i = 0; i < nb; i++) {
- const float d = ggml_fp16_to_fp32(x[i].d);
+ const float d = GGML_FP16_TO_FP32(x[i].d);
uint32_t qh;
memcpy(&qh, x[i].qh, sizeof(qh));
@@ -1049,8 +1049,8 @@ void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int
const int nb = k / qk;
for (int i = 0; i < nb; i++) {
- const float d = ggml_fp16_to_fp32(x[i].d);
- const float m = ggml_fp16_to_fp32(x[i].m);
+ const float d = GGML_FP16_TO_FP32(x[i].d);
+ const float m = GGML_FP16_TO_FP32(x[i].m);
uint32_t qh;
memcpy(&qh, x[i].qh, sizeof(qh));
@@ -1076,7 +1076,7 @@ void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int
const int nb = k / qk;
for (int i = 0; i < nb; i++) {
- const float d = ggml_fp16_to_fp32(x[i].d);
+ const float d = GGML_FP16_TO_FP32(x[i].d);
for (int j = 0; j < qk; ++j) {
y[i*qk + j] = x[i].qs[j]*d;
@@ -1387,10 +1387,10 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
int l = nearest_int(iscale*scales[j]);
y[i].scales[j] = l;
}
- y[i].d = ggml_fp32_to_fp16(max_scale/q4scale);
+ y[i].d = GGML_FP32_TO_FP16(max_scale/q4scale);
} else {
for (int j = 0; j < QK_K/16; ++j) y[i].scales[j] = 0;
- y[i].d = ggml_fp32_to_fp16(0.f);
+ y[i].d = GGML_FP32_TO_FP16(0.f);
}
if (max_min > 0) {
float iscale = q4scale/max_min;
@@ -1398,14 +1398,14 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
int l = nearest_int(iscale*mins[j]);
y[i].scales[j] |= (l << 4);
}
- y[i].dmin = ggml_fp32_to_fp16(max_min/q4scale);
+ y[i].dmin = GGML_FP32_TO_FP16(max_min/q4scale);
} else {
- y[i].dmin = ggml_fp32_to_fp16(0.f);
+ y[i].dmin = GGML_FP32_TO_FP16(0.f);
}
for (int j = 0; j < QK_K/16; ++j) {
- const float d = ggml_fp16_to_fp32(y[i].d) * (y[i].scales[j] & 0xF);
+ const float d = GGML_FP16_TO_FP32(y[i].d) * (y[i].scales[j] & 0xF);
if (!d) continue;
- const float dm = ggml_fp16_to_fp32(y[i].dmin) * (y[i].scales[j] >> 4);
+ const float dm = GGML_FP16_TO_FP32(y[i].dmin) * (y[i].scales[j] >> 4);
for (int ii = 0; ii < 16; ++ii) {
int l = nearest_int((x[16*j + ii] + dm)/d);
l = MAX(0, MIN(3, l));
@@ -1436,8 +1436,8 @@ void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int
for (int i = 0; i < nb; i++) {
- const float d = ggml_fp16_to_fp32(x[i].d);
- const float min = ggml_fp16_to_fp32(x[i].dmin);
+ const float d = GGML_FP16_TO_FP32(x[i].d);
+ const float min = GGML_FP16_TO_FP32(x[i].dmin);
const uint8_t * q = x[i].qs;
@@ -1526,16 +1526,16 @@ void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict
l >>= 4;
y[i].scales[j%4 + 8] |= (l << (2*(j/4)));
}
- y[i].d = ggml_fp32_to_fp16(1/iscale);
+ y[i].d = GGML_FP32_TO_FP16(1/iscale);
} else {
- y[i].d = ggml_fp32_to_fp16(0.f);
+ y[i].d = GGML_FP32_TO_FP16(0.f);
}
int8_t sc;
for (int j = 0; j < QK_K/16; ++j) {
sc = j < 8 ? y[i].scales[j] & 0xF : y[i].scales[j-8] >> 4;
sc = (sc | (((y[i].scales[8 + j%4] >> (2*(j/4))) & 3) << 4)) - 32;
- float d = ggml_fp16_to_fp32(y[i].d) * sc;
+ float d = GGML_FP16_TO_FP32(y[i].d) * sc;
if (!d) {
continue;
}
@@ -1555,16 +1555,16 @@ void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict
l2 = 8 + MAX(-8, MIN(7, l2));
y[i].scales[j/2] = l1 | (l2 << 4);
}
- y[i].d = ggml_fp32_to_fp16(1/iscale);
+ y[i].d = GGML_FP32_TO_FP16(1/iscale);
} else {
for (int j = 0; j < QK_K/16; j+=2) {
y[i].scales[j/2] = 0;
}
- y[i].d = ggml_fp32_to_fp16(0.f);
+ y[i].d = GGML_FP32_TO_FP16(0.f);
}
for (int j = 0; j < QK_K/16; ++j) {
int s = j%2 == 0 ? y[i].scales[j/2] & 0xF : y[i].scales[j/2] >> 4;
- float d = ggml_fp16_to_fp32(y[i].d) * (s - 8);
+ float d = GGML_FP16_TO_FP32(y[i].d) * (s - 8);
if (!d) {
continue;
}
@@ -1618,7 +1618,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int
for (int i = 0; i < nb; i++) {
- const float d_all = ggml_fp16_to_fp32(x[i].d);
+ const float d_all = GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q = x[i].qs;
const uint8_t * restrict hm = x[i].hmask;
@@ -1663,7 +1663,7 @@ void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int
for (int i = 0; i < nb; i++) {
- const float d_all = ggml_fp16_to_fp32(x[i].d);
+ const float d_all = GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q = x[i].qs;
const uint8_t * restrict hm = x[i].hmask;
@@ -1753,15 +1753,15 @@ void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict
y[i].scales[j-0] |= ((lm >> 4) << 6);
}
}
- y[i].d = ggml_fp32_to_fp16(max_scale/63.f);
- y[i].dmin = ggml_fp32_to_fp16(max_min/63.f);
+ y[i].d = GGML_FP32_TO_FP16(max_scale/63.f);
+ y[i].dmin = GGML_FP32_TO_FP16(max_min/63.f);
uint8_t sc, m;
for (int j = 0; j < QK_K/32; ++j) {
get_scale_min_k4(j, y[i].scales, &sc, &m);
- const float d = ggml_fp16_to_fp32(y[i].d) * sc;
+ const float d = GGML_FP16_TO_FP32(y[i].d) * sc;
if (!d) continue;
- const float dm = ggml_fp16_to_fp32(y[i].dmin) * m;
+ const float dm = GGML_FP16_TO_FP32(y[i].dmin) * m;
for (int ii = 0; ii < 32; ++ii) {
int l = nearest_int((x[32*j + ii] + dm)/d);
l = MAX(0, MIN(15, l));
@@ -1778,17 +1778,17 @@ void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict
int m2 = nearest_int(inv_min*mins[1]);
y[i].scales[0] = d1 | (m1 << 4);
y[i].scales[1] = d2 | (m2 << 4);
- y[i].d[0] = ggml_fp32_to_fp16(max_scale/s_factor);
- y[i].d[1] = ggml_fp32_to_fp16(max_min/s_factor);
+ y[i].d[0] = GGML_FP32_TO_FP16(max_scale/s_factor);
+ y[i].d[1] = GGML_FP32_TO_FP16(max_min/s_factor);
float sumlx = 0;
int suml2 = 0;
for (int j = 0; j < QK_K/32; ++j) {
const uint8_t sd = y[i].scales[j] & 0xF;
const uint8_t sm = y[i].scales[j] >> 4;
- const float d = ggml_fp16_to_fp32(y[i].d[0]) * sd;
+ const float d = GGML_FP16_TO_FP32(y[i].d[0]) * sd;
if (!d) continue;
- const float m = ggml_fp16_to_fp32(y[i].d[1]) * sm;
+ const float m = GGML_FP16_TO_FP32(y[i].d[1]) * sm;
for (int ii = 0; ii < 32; ++ii) {
int l = nearest_int((x[32*j + ii] + m)/d);
l = MAX(0, MIN(15, l));
@@ -1798,7 +1798,7 @@ void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict
}
}
if (suml2) {
- y[i].d[0] = ggml_fp32_to_fp16(sumlx/suml2);
+ y[i].d[0] = GGML_FP32_TO_FP16(sumlx/suml2);
}
#endif
uint8_t * q = y[i].qs;
@@ -1822,8 +1822,8 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int
#if QK_K == 256
- const float d = ggml_fp16_to_fp32(x[i].d);
- const float min = ggml_fp16_to_fp32(x[i].dmin);
+ const float d = GGML_FP16_TO_FP32(x[i].d);
+ const float min = GGML_FP16_TO_FP32(x[i].dmin);
int is = 0;
uint8_t sc, m;
@@ -1837,8 +1837,8 @@ void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int
q += 32; is += 2;
}
#else
- const float dall = ggml_fp16_to_fp32(x[i].d[0]);
- const float mall = ggml_fp16_to_fp32(x[i].d[1]);
+ const float dall = GGML_FP16_TO_FP32(x[i].d[0]);
+ const float mall = GGML_FP16_TO_FP32(x[i].d[1]);
const float d1 = dall * (x[i].scales[0] & 0xF), m1 = mall * (x[i].scales[0] >> 4);
const float d2 = dall * (x[i].scales[1] & 0xF), m2 = mall * (x[i].scales[1] >> 4);
for (int l = 0; l < 32; ++l) {
@@ -1924,15 +1924,15 @@ void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict
y[i].scales[j-0] |= ((lm >> 4) << 6);
}
}
- y[i].d = ggml_fp32_to_fp16(max_scale/63.f);
- y[i].dmin = ggml_fp32_to_fp16(max_min/63.f);
+ y[i].d = GGML_FP32_TO_FP16(max_scale/63.f);
+ y[i].dmin = GGML_FP32_TO_FP16(max_min/63.f);
uint8_t sc, m;
for (int j = 0; j < QK_K/32; ++j) {
get_scale_min_k4(j, y[i].scales, &sc, &m);
- const float d = ggml_fp16_to_fp32(y[i].d) * sc;
+ const float d = GGML_FP16_TO_FP32(y[i].d) * sc;
if (!d) continue;
- const float dm = ggml_fp16_to_fp32(y[i].dmin) * m;
+ const float dm = GGML_FP16_TO_FP32(y[i].dmin) * m;
for (int ii = 0; ii < 32; ++ii) {
int l = nearest_int((x[32*j + ii] + dm)/d);
l = MAX(0, MIN(31, l));
@@ -1976,10 +1976,10 @@ void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict
int l = nearest_int(iscale*scales[j]);
y[i].scales[j] = MAX(-128, MIN(127, l));
}
- y[i].d = ggml_fp32_to_fp16(1/iscale);
+ y[i].d = GGML_FP32_TO_FP16(1/iscale);
for (int j = 0; j < QK_K/16; ++j) {
- const float d = ggml_fp16_to_fp32(y[i].d) * y[i].scales[j];
+ const float d = GGML_FP16_TO_FP32(y[i].d) * y[i].scales[j];
if (!d) continue;
for (int ii = 0; ii < 16; ++ii) {
int l = nearest_int(x[16*j + ii]/d);
@@ -2023,8 +2023,8 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int
#if QK_K == 256
- const float d = ggml_fp16_to_fp32(x[i].d);
- const float min = ggml_fp16_to_fp32(x[i].dmin);
+ const float d = GGML_FP16_TO_FP32(x[i].d);
+ const float min = GGML_FP16_TO_FP32(x[i].dmin);
int is = 0;
uint8_t sc, m;
@@ -2040,7 +2040,7 @@ void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int
u1 <<= 2; u2 <<= 2;
}
#else
- float d = ggml_fp16_to_fp32(x[i].d);
+ float d = GGML_FP16_TO_FP32(x[i].d);
const int8_t * restrict s = x[i].scales;
for (int l = 0; l < 8; ++l) {
y[l+ 0] = d * s[0] * ((ql[l+ 0] & 0xF) - (qh[l] & 0x01 ? 0 : 16));
@@ -2103,19 +2103,19 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict
if (!max_abs_scale) {
memset(&y[i], 0, sizeof(block_q6_K));
- y[i].d = ggml_fp32_to_fp16(0.f);
+ y[i].d = GGML_FP32_TO_FP16(0.f);
x += QK_K;
continue;
}
float iscale = -128.f/max_scale;
- y[i].d = ggml_fp32_to_fp16(1/iscale);
+ y[i].d = GGML_FP32_TO_FP16(1/iscale);
for (int ib = 0; ib < QK_K/16; ++ib) {
y[i].scales[ib] = MIN(127, nearest_int(iscale*scales[ib]));
}
for (int j = 0; j < QK_K/16; ++j) {
- float d = ggml_fp16_to_fp32(y[i].d) * y[i].scales[j];
+ float d = GGML_FP16_TO_FP32(y[i].d) * y[i].scales[j];
if (!d) {
continue;
}
@@ -2164,7 +2164,7 @@ void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int
for (int i = 0; i < nb; i++) {
- const float d = ggml_fp16_to_fp32(x[i].d);
+ const float d = GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict ql = x[i].ql;
const uint8_t * restrict qh = x[i].qh;
@@ -2371,8 +2371,8 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx,
const int32x4_t p_0 = vdotq_s32(vdotq_s32(vdupq_n_s32(0), v0_0ls, v1_0l), v0_0hs, v1_0h);
const int32x4_t p_1 = vdotq_s32(vdotq_s32(vdupq_n_s32(0), v0_1ls, v1_1l), v0_1hs, v1_1h);
- sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), ggml_fp16_to_fp32(x0->d)*ggml_fp16_to_fp32(y0->d));
- sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), ggml_fp16_to_fp32(x1->d)*ggml_fp16_to_fp32(y1->d));
+ sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
+ sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
#else
const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0ls), vget_low_s8 (v1_0l));
const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0ls), vget_high_s8(v1_0l));
@@ -2389,8 +2389,8 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx,
const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h));
const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h));
- sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), ggml_fp16_to_fp32(x0->d)*ggml_fp16_to_fp32(y0->d));
- sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), ggml_fp16_to_fp32(x1->d)*ggml_fp16_to_fp32(y1->d));
+ sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
+ sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
#endif
}
@@ -2402,7 +2402,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx,
// Main loop
for (int i = 0; i < nb; ++i) {
/* Compute combined scale for the block */
- const __m256 d = _mm256_set1_ps( ggml_fp16_to_fp32(x[i].d) * ggml_fp16_to_fp32(y[i].d) );
+ const __m256 d = _mm256_set1_ps( GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d) );
__m256i bx = bytes_from_nibbles_32(x[i].qs);
@@ -2426,7 +2426,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx,
// Main loop
for (int i = 0; i < nb; ++i) {
// Compute combined scale for the block
- const __m256 d = _mm256_set1_ps( ggml_fp16_to_fp32(x[i].d) * ggml_fp16_to_fp32(y[i].d) );
+ const __m256 d = _mm256_set1_ps( GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d) );
const __m128i lowMask = _mm_set1_epi8(0xF);
const __m128i off = _mm_set1_epi8(8);
@@ -2468,7 +2468,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx,
_mm_prefetch(&y[0] + sizeof(block_q8_0), _MM_HINT_T0);
// Compute combined scale for the block 0 and 1
- const __m128 d_0_1 = _mm_set1_ps( ggml_fp16_to_fp32(x[0].d) * ggml_fp16_to_fp32(y[0].d) );
+ const __m128 d_0_1 = _mm_set1_ps( GGML_FP16_TO_FP32(x[0].d) * GGML_FP16_TO_FP32(y[0].d) );
const __m128i tmp_0_1 = _mm_loadu_si128((const __m128i *)x[0].qs);
@@ -2486,7 +2486,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx,
_mm_prefetch(&y[1] + sizeof(block_q8_0), _MM_HINT_T0);
// Compute combined scale for the block 2 and 3
- const __m128 d_2_3 = _mm_set1_ps( ggml_fp16_to_fp32(x[1].d) * ggml_fp16_to_fp32(y[1].d) );
+ const __m128 d_2_3 = _mm_set1_ps( GGML_FP16_TO_FP32(x[1].d) * GGML_FP16_TO_FP32(y[1].d) );
const __m128i tmp_2_3 = _mm_loadu_si128((const __m128i *)x[1].qs);
@@ -2521,7 +2521,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx,
_mm_prefetch(&y[i] + sizeof(block_q8_0), _MM_HINT_T0);
// Compute combined scale for the block 0 and 1
- const __m128 d_0_1 = _mm_set1_ps( ggml_fp16_to_fp32(x[i].d) * ggml_fp16_to_fp32(y[i].d) );
+ const __m128 d_0_1 = _mm_set1_ps( GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d) );
const __m128i tmp_0_1 = _mm_loadu_si128((const __m128i *)x[i].qs);
@@ -2539,7 +2539,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx,
_mm_prefetch(&y[i] + 2 * sizeof(block_q8_0), _MM_HINT_T0);
// Compute combined scale for the block 2 and 3
- const __m128 d_2_3 = _mm_set1_ps( ggml_fp16_to_fp32(x[i + 1].d) * ggml_fp16_to_fp32(y[i + 1].d) );
+ const __m128 d_2_3 = _mm_set1_ps( GGML_FP16_TO_FP32(x[i + 1].d) * GGML_FP16_TO_FP32(y[i + 1].d) );
const __m128i tmp_2_3 = _mm_loadu_si128((const __m128i *)x[i + 1].qs);
@@ -2606,7 +2606,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx,
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
- sumf += sumi*ggml_fp16_to_fp32(x[i].d)*ggml_fp16_to_fp32(y[i].d);
+ sumf += sumi*GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d);
}
*s = sumf;
@@ -2624,7 +2624,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx,
sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]);
}
- sumf += sumi*ggml_fp16_to_fp32(x[i].d)*ggml_fp16_to_fp32(y[i].d);
+ sumf += sumi*GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d);
}
*s = sumf;
@@ -2655,7 +2655,7 @@ void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restri
const block_q8_1 * restrict y0 = &y[i + 0];
const block_q8_1 * restrict y1 = &y[i + 1];
- summs += ggml_fp16_to_fp32(x0->m) * y0->s + ggml_fp16_to_fp32(x1->m) * y1->s;
+ summs += GGML_FP16_TO_FP32(x0->m) * y0->s + GGML_FP16_TO_FP32(x1->m) * y1->s;
const uint8x16_t m4b = vdupq_n_u8(0x0F);
@@ -2679,8 +2679,8 @@ void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restri
const int32x4_t p_0 = vdotq_s32(vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0l), v0_0h, v1_0h);
const int32x4_t p_1 = vdotq_s32(vdotq_s32(vdupq_n_s32(0), v0_1l, v1_1l), v0_1h, v1_1h);
- sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), ggml_fp16_to_fp32(x0->d)*y0->d);
- sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), ggml_fp16_to_fp32(x1->d)*y1->d);
+ sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*y0->d);
+ sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*y1->d);
#else
const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0l), vget_low_s8 (v1_0l));
const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0l), vget_high_s8(v1_0l));
@@ -2697,8 +2697,8 @@ void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restri
const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h));
const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h));
- sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), ggml_fp16_to_fp32(x0->d)*y0->d);
- sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), ggml_fp16_to_fp32(x1->d)*y1->d);
+ sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), GGML_FP16_TO_FP32(x0->d)*y0->d);
+ sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), GGML_FP16_TO_FP32(x1->d)*y1->d);
#endif
}
@@ -2711,10 +2711,10 @@ void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restri
// Main loop
for (int i = 0; i < nb; ++i) {
- const float d0 = ggml_fp16_to_fp32(x[i].d);
+ const float d0 = GGML_FP16_TO_FP32(x[i].d);
const float d1 = y[i].d;
- summs += ggml_fp16_to_fp32(x[i].m) * y[i].s;
+ summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
const __m256 d0v = _mm256_set1_ps( d0 );
const __m256 d1v = _mm256_set1_ps( d1 );
@@ -2766,7 +2766,7 @@ void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restri
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
- sumf += (ggml_fp16_to_fp32(x[i].d)*y[i].d)*sumi + ggml_fp16_to_fp32(x[i].m)*y[i].s;
+ sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
}
*s = sumf;
@@ -2784,7 +2784,7 @@ void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void * restri
sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]);
}
- sumf += (ggml_fp16_to_fp32(x[i].d)*y[i].d)*sumi + ggml_fp16_to_fp32(x[i].m)*y[i].s;
+ sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
}
*s = sumf;
@@ -2864,10 +2864,10 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri
#if defined(__ARM_FEATURE_DOTPROD)
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(
vdotq_s32(vdupq_n_s32(0), v0_0lf, v1_0l),
- vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), ggml_fp16_to_fp32(x0->d)*ggml_fp16_to_fp32(y0->d));
+ vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(
vdotq_s32(vdupq_n_s32(0), v0_1lf, v1_1l),
- vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), ggml_fp16_to_fp32(x1->d)*ggml_fp16_to_fp32(y1->d));
+ vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
#else
const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0lf), vget_low_s8 (v1_0l));
const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0lf), vget_high_s8(v1_0l));
@@ -2884,8 +2884,8 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri
const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h));
const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h));
- sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), ggml_fp16_to_fp32(x0->d)*ggml_fp16_to_fp32(y0->d));
- sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), ggml_fp16_to_fp32(x1->d)*ggml_fp16_to_fp32(y1->d));
+ sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
+ sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
#endif
}
@@ -2946,7 +2946,7 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri
wasm_i32x4_dot_i16x8(v0lfh, v1lh)),
wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl, v1hl),
wasm_i32x4_dot_i16x8(v0hfh, v1hh)))),
- wasm_f32x4_splat(ggml_fp16_to_fp32(x0->d) * ggml_fp16_to_fp32(y0->d))));
+ wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d))));
}
*s = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) +
@@ -2958,7 +2958,7 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri
// Main loop
for (int i = 0; i < nb; i++) {
/* Compute combined scale for the block */
- const __m256 d = _mm256_set1_ps(ggml_fp16_to_fp32(x[i].d) * ggml_fp16_to_fp32(y[i].d));
+ const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d));
__m256i bx = bytes_from_nibbles_32(x[i].qs);
__m256i bxhi = bytes_from_bits_32(x[i].qh);
@@ -2982,7 +2982,7 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri
// Main loop
for (int i = 0; i < nb; i++) {
/* Compute combined scale for the block */
- const __m256 d = _mm256_set1_ps(ggml_fp16_to_fp32(x[i].d) * ggml_fp16_to_fp32(y[i].d));
+ const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d));
__m256i bx = bytes_from_nibbles_32(x[i].qs);
const __m256i bxhi = bytes_from_bits_32(x[i].qh);
@@ -3066,7 +3066,7 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
- sumf += (ggml_fp16_to_fp32(x[i].d)*ggml_fp16_to_fp32(y[i].d)) * sumi;
+ sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d)) * sumi;
}
*s = sumf;
@@ -3090,7 +3090,7 @@ void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void * restri
sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]);
}
- sumf += (ggml_fp16_to_fp32(x[i].d)*ggml_fp16_to_fp32(y[i].d)) * sumi;
+ sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d)) * sumi;
}
*s = sumf;
@@ -3130,8 +3130,8 @@ void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restri
const uint8x16_t m4b = vdupq_n_u8(0x0F);
- summs0 += ggml_fp16_to_fp32(x0->m) * y0->s;
- summs1 += ggml_fp16_to_fp32(x1->m) * y1->s;
+ summs0 += GGML_FP16_TO_FP32(x0->m) * y0->s;
+ summs1 += GGML_FP16_TO_FP32(x1->m) * y1->s;
// extract the 5th bit via lookup table ((b) << 4)
memcpy(&qh0, x0->qh, sizeof(qh0));
@@ -3176,10 +3176,10 @@ void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restri
#if defined(__ARM_FEATURE_DOTPROD)
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(
vdotq_s32(vdupq_n_s32(0), v0_0lf, v1_0l),
- vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), ggml_fp16_to_fp32(x0->d)*y0->d);
+ vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*y0->d);
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(
vdotq_s32(vdupq_n_s32(0), v0_1lf, v1_1l),
- vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), ggml_fp16_to_fp32(x1->d)*y1->d);
+ vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*y1->d);
#else
const int16x8_t pl0l = vmull_s8(vget_low_s8 (v0_0lf), vget_low_s8 (v1_0l));
const int16x8_t pl0h = vmull_s8(vget_high_s8(v0_0lf), vget_high_s8(v1_0l));
@@ -3196,8 +3196,8 @@ void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restri
const int32x4_t pl1 = vaddq_s32(vpaddlq_s16(pl1l), vpaddlq_s16(pl1h));
const int32x4_t ph1 = vaddq_s32(vpaddlq_s16(ph1l), vpaddlq_s16(ph1h));
- sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), ggml_fp16_to_fp32(x0->d)*y0->d);
- sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), ggml_fp16_to_fp32(x1->d)*y1->d);
+ sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(pl0, ph0)), GGML_FP16_TO_FP32(x0->d)*y0->d);
+ sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(pl1, ph1)), GGML_FP16_TO_FP32(x1->d)*y1->d);
#endif
}
@@ -3215,7 +3215,7 @@ void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restri
const block_q5_1 * restrict x0 = &x[i];
const block_q8_1 * restrict y0 = &y[i];
- summs += ggml_fp16_to_fp32(x0->m) * y0->s;
+ summs += GGML_FP16_TO_FP32(x0->m) * y0->s;
const v128_t m4b = wasm_i8x16_splat(0x0F);
@@ -3262,7 +3262,7 @@ void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restri
wasm_i32x4_dot_i16x8(v0lfh, v1lh)),
wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl, v1hl),
wasm_i32x4_dot_i16x8(v0hfh, v1hh)))),
- wasm_f32x4_splat(ggml_fp16_to_fp32(x0->d) * y0->d)));
+ wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * y0->d)));
}
*s = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) +
@@ -3275,9 +3275,9 @@ void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restri
// Main loop
for (int i = 0; i < nb; i++) {
- const __m256 dx = _mm256_set1_ps(ggml_fp16_to_fp32(x[i].d));
+ const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d));
- summs += ggml_fp16_to_fp32(x[i].m) * y[i].s;
+ summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
__m256i bx = bytes_from_nibbles_32(x[i].qs);
__m256i bxhi = bytes_from_bits_32(x[i].qh);
@@ -3302,9 +3302,9 @@ void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restri
// Main loop
for (int i = 0; i < nb; i++) {
- const __m256 dx = _mm256_set1_ps(ggml_fp16_to_fp32(x[i].d));
+ const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d));
- summs += ggml_fp16_to_fp32(x[i].m) * y[i].s;
+ summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
__m256i bx = bytes_from_nibbles_32(x[i].qs);
const __m256i bxhi = bytes_from_bits_32(x[i].qh);
@@ -3385,7 +3385,7 @@ void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restri
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
- sumf += (ggml_fp16_to_fp32(x[i].d)*y[i].d)*sumi + ggml_fp16_to_fp32(x[i].m)*y[i].s;
+ sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
}
*s = sumf;
@@ -3409,7 +3409,7 @@ void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void * restri
sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]);
}
- sumf += (ggml_fp16_to_fp32(x[i].d)*y[i].d)*sumi + ggml_fp16_to_fp32(x[i].m)*y[i].s;
+ sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
}
*s = sumf;
@@ -3451,11 +3451,11 @@ void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restri
#if defined(__ARM_FEATURE_DOTPROD)
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(
vdotq_s32(vdupq_n_s32(0), x0_0, y0_0),
- vdotq_s32(vdupq_n_s32(0), x0_1, y0_1))), ggml_fp16_to_fp32(x0->d)*ggml_fp16_to_fp32(y0->d));
+ vdotq_s32(vdupq_n_s32(0), x0_1, y0_1))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(
vdotq_s32(vdupq_n_s32(0), x1_0, y1_0),
- vdotq_s32(vdupq_n_s32(0), x1_1, y1_1))), ggml_fp16_to_fp32(x1->d)*ggml_fp16_to_fp32(y1->d));
+ vdotq_s32(vdupq_n_s32(0), x1_1, y1_1))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
#else
const int16x8_t p0_0 = vmull_s8(vget_low_s8 (x0_0), vget_low_s8 (y0_0));
@@ -3473,8 +3473,8 @@ void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restri
const int32x4_t p2 = vaddq_s32(vpaddlq_s16(p1_0), vpaddlq_s16(p1_1));
const int32x4_t p3 = vaddq_s32(vpaddlq_s16(p1_2), vpaddlq_s16(p1_3));
- sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(p0, p1)), ggml_fp16_to_fp32(x0->d)*ggml_fp16_to_fp32(y0->d));
- sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(p2, p3)), ggml_fp16_to_fp32(x1->d)*ggml_fp16_to_fp32(y1->d));
+ sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(p0, p1)), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
+ sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(p2, p3)), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
#endif
}
@@ -3486,7 +3486,7 @@ void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restri
// Main loop
for (int i = 0; i < nb; ++i) {
// Compute combined scale for the block
- const __m256 d = _mm256_set1_ps(ggml_fp16_to_fp32(x[i].d) * ggml_fp16_to_fp32(y[i].d));
+ const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d) * GGML_FP16_TO_FP32(y[i].d));
__m256i bx = _mm256_loadu_si256((const __m256i *)x[i].qs);
__m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
@@ -3517,7 +3517,7 @@ void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restri
int sumi = __riscv_vmv_x_s_i32m1_i32(v_sum);
- sumf += sumi*(ggml_fp16_to_fp32(x[i].d)*ggml_fp16_to_fp32(y[i].d));
+ sumf += sumi*(GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d));
}
*s = sumf;
@@ -3532,7 +3532,7 @@ void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void * restri
sumi += x[i].qs[j]*y[i].qs[j];
}
- sumf += sumi*(ggml_fp16_to_fp32(x[i].d)*ggml_fp16_to_fp32(y[i].d));
+ sumf += sumi*(GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d));
}
*s = sumf;
@@ -3562,8 +3562,8 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const uint8_t * restrict q2 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -3641,8 +3641,8 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const uint8_t * restrict q2 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -3708,8 +3708,8 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float dall = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const uint8_t * restrict q2 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -3816,8 +3816,8 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
const int8_t * q8 = y[i].qs;
const uint8_t * sc = x[i].scales;
- const float dall = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
size_t vl = 16;
@@ -3903,8 +3903,8 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
summs += y[i].bsums[j] * (sc[j] >> 4);
}
- const float dall = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
int isum = 0;
int is = 0;
@@ -4021,8 +4021,8 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const uint8_t * restrict q2 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -4073,8 +4073,8 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const uint8_t * restrict q2 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -4188,8 +4188,8 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
summs += y[i].bsums[j] * (sc[j] >> 4);
}
- const float dall = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
isum[0] = isum[1] = isum[2] = isum[3] = 0;
for (int l = 0; l < 16; ++l) {
@@ -4242,7 +4242,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q3 = x[i].qs;
const uint8_t * restrict qh = x[i].hmask;
@@ -4350,7 +4350,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q3 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -4455,7 +4455,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q3 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -4676,7 +4676,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
}
- const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
sumf += d*sum_t;
@@ -4741,7 +4741,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l];
q8 += 8; a += 8;
}
- const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l];
}
for (int l = 0; l < 8; ++l) sumf += sums[l];
@@ -4843,7 +4843,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q3 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -4914,7 +4914,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q3 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -5099,7 +5099,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
q8 += 8; a += 8;
for (int l = 0; l < 8; ++l) aux32[l] += scales[j] * aux16[l];
}
- const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l];
}
for (int l = 0; l < 8; ++l) sumf += sums[l];
@@ -5139,8 +5139,8 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const int16x8_t q8sums = vpaddq_s16(vld1q_s16(y[i].bsums), vld1q_s16(y[i].bsums + 8));
@@ -5222,8 +5222,8 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
memcpy(utmp, x[i].scales, 12);
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
@@ -5288,8 +5288,8 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const uint8_t * restrict q4 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -5371,8 +5371,8 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
size_t vl = 8;
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
vint16mf2_t q8sums_0 = __riscv_vlse16_v_i16mf2(y[i].bsums, 4, vl);
vint16mf2_t q8sums_1 = __riscv_vlse16_v_i16mf2(y[i].bsums+1, 4, vl);
@@ -5482,9 +5482,9 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l];
q8 += 8; a += 8;
}
- const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l];
- const float dmin = ggml_fp16_to_fp32(x[i].dmin) * y[i].d;
+ const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d;
sumf -= dmin * sumi;
}
for (int l = 0; l < 8; ++l) sumf += sums[l];
@@ -5586,8 +5586,8 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = ggml_fp16_to_fp32(x[i].d[0]) * y[i].d;
- const float m = ggml_fp16_to_fp32(x[i].d[1]) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d[0]) * y[i].d;
+ const float m = GGML_FP16_TO_FP32(x[i].d[1]) * y[i].d;
const __m256 vd = _mm256_set1_ps(d);
const uint16_t * a = (const uint16_t *)x[i].scales;
@@ -5632,8 +5632,8 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = ggml_fp16_to_fp32(x[i].d[0]) * y[i].d;
- const float m = ggml_fp16_to_fp32(x[i].d[1]) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d[0]) * y[i].d;
+ const float m = GGML_FP16_TO_FP32(x[i].d[1]) * y[i].d;
const __m256 vd = _mm256_set1_ps(d);
const uint16_t * a = (const uint16_t *)x[i].scales;
@@ -5689,8 +5689,8 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
s16[0] = b[0] & 0x0f0f;
s16[1] = (b[0] >> 4) & 0x0f0f;
- sumf -= y[i].d * ggml_fp16_to_fp32(x[i].d[1]) * (scales[2] * (y[i].bsums[0] + y[i].bsums[1]) + scales[3] * (y[i].bsums[2] + y[i].bsums[3]));
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d[0]);
+ sumf -= y[i].d * GGML_FP16_TO_FP32(x[i].d[1]) * (scales[2] * (y[i].bsums[0] + y[i].bsums[1]) + scales[3] * (y[i].bsums[2] + y[i].bsums[3]));
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d[0]);
size_t vl = 32;
@@ -5739,9 +5739,9 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
s16[0] = b[0] & 0x0f0f;
s16[1] = (b[0] >> 4) & 0x0f0f;
- sumf -= y[i].d * ggml_fp16_to_fp32(x[i].d[1]) * (scales[2] * (y[i].bsums[0] + y[i].bsums[1]) + scales[3] * (y[i].bsums[2] + y[i].bsums[3]));
+ sumf -= y[i].d * GGML_FP16_TO_FP32(x[i].d[1]) * (scales[2] * (y[i].bsums[0] + y[i].bsums[1]) + scales[3] * (y[i].bsums[2] + y[i].bsums[3]));
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d[0]);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d[0]);
for (int j = 0; j < QK_K/32; ++j) {
for (int l = 0; l < 16; ++l) aux16[l] = q8[l] * a[l];
@@ -5789,8 +5789,8 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const int16x8_t q8sums = vpaddq_s16(vld1q_s16(y[i].bsums), vld1q_s16(y[i].bsums + 8));
@@ -5878,8 +5878,8 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
const int8_t * restrict q8 = y[i].qs;
#if QK_K == 256
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
memcpy(utmp, x[i].scales, 12);
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
@@ -5960,8 +5960,8 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
- const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
+ const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
const uint8_t * restrict q5 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
@@ -6065,8 +6065,8 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
const uint8_t * restrict hm = x[i].qh;
const int8_t * restrict q8 = y[i].qs;
- const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
- const float dmin = ggml_fp16_to_fp32(x[i].dmin) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
+ const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d;
vint16mf2_t q8sums_0 = __riscv_vlse16_v_i16mf2(y[i].bsums, 4, vl);
vint16mf2_t q8sums_1 = __riscv_vlse16_v_i16mf2(y[i].bsums+1, 4, vl);
@@ -6188,9 +6188,9 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l];
q8 += 8; a += 8;
}
- const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l];
- const float dmin = ggml_fp16_to_fp32(x[i].dmin) * y[i].d;
+ const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d;
sumf -= dmin * sumi;
}
for (int l = 0; l < 8; ++l) sumf += sums[l];
@@ -6288,7 +6288,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
const uint8_t * restrict q5 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const __m256i q5bits = _mm256_loadu_si256((const __m256i*)q5);
@@ -6334,7 +6334,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
const uint8_t * restrict q5 = x[i].qs;
const int8_t * restrict q8 = y[i].qs;
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const __m256i q5bits = _mm256_loadu_si256((const __m256i*)q5);
@@ -6471,7 +6471,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
for (int l = 0; l < 8; ++l) a[8*is + l] -= (hm[l] & m ? 0 : 16);
}
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const int8_t * restrict sc = x[i].scales;
for (int j = 0; j < QK_K/16; ++j) {
@@ -6514,7 +6514,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d_all = ggml_fp16_to_fp32(x[i].d);
+ const float d_all = GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q6 = x[i].ql;
const uint8_t * restrict qh = x[i].qh;
@@ -6646,7 +6646,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q4 = x[i].ql;
const uint8_t * restrict qh = x[i].qh;
@@ -6726,7 +6726,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q4 = x[i].ql;
const uint8_t * restrict qh = x[i].qh;
@@ -6838,7 +6838,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
float sumf = 0;
for (int i = 0; i < nb; ++i) {
- const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
const uint8_t * restrict q6 = x[i].ql;
const uint8_t * restrict qh = x[i].qh;
@@ -6955,7 +6955,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l];
q8 += 8; a += 8;
}
- const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l];
}
for (int l = 0; l < 8; ++l) sumf += sums[l];
@@ -7053,7 +7053,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q4 = x[i].ql;
const uint8_t * restrict qh = x[i].qh;
@@ -7110,7 +7110,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
for (int i = 0; i < nb; ++i) {
- const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
+ const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
const uint8_t * restrict q4 = x[i].ql;
const uint8_t * restrict qh = x[i].qh;
@@ -7269,7 +7269,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l];
q8 += 8; a += 8;
}
- const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
+ const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l];
}
for (int l = 0; l < 8; ++l) sumf += sums[l];
diff --git a/ggml-quants.h b/ggml-quants.h
index d88f99e3..70c12c27 100644
--- a/ggml-quants.h
+++ b/ggml-quants.h
@@ -1,22 +1,12 @@
#pragma once
-// This is a private API for quantization and dequantization
-// Should not be used directly, use ggml.h instead
+#include "ggml-impl.h"
-#include "ggml.h"
+// GGML internal header
#include <stdint.h>
-#include <assert.h>
#include <stddef.h>
-#ifndef static_assert
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
-#define static_assert(cond, msg) _Static_assert(cond, msg)
-#else
-#define static_assert(cond, msg) struct global_scope_noop_trick
-#endif
-#endif
-
#define QK4_0 32
typedef struct {
ggml_fp16_t d; // delta
diff --git a/ggml.c b/ggml.c
index 95f72c35..84407b12 100644
--- a/ggml.c
+++ b/ggml.c
@@ -1,6 +1,6 @@
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
-#include "ggml.h"
+#include "ggml-impl.h"
#include "ggml-quants.h"
#if defined(_MSC_VER) || defined(__MINGW32__)
@@ -27,18 +27,6 @@
#include <unistd.h>
#endif
-// static_assert should be a #define, but if it's not,
-// fall back to the _Static_assert C11 keyword.
-// if C99 - static_assert is noop
-// ref: https://stackoverflow.com/a/53923785/4039976
-#ifndef static_assert
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
-#define static_assert(cond, msg) _Static_assert(cond, msg)
-#else
-#define static_assert(cond, msg) struct global_scope_noop_trick
-#endif
-#endif
-
#if defined(_MSC_VER)
// disable "possible loss of data" to avoid hundreds of casts
// we should just be careful :)
@@ -106,23 +94,11 @@ typedef void * thread_ret_t;
#include <unistd.h>
#endif
+
#ifdef GGML_USE_CPU_HBM
#include <hbwmalloc.h>
#endif
-// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
-#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
-#ifndef __FMA__
-#define __FMA__
-#endif
-#ifndef __F16C__
-#define __F16C__
-#endif
-#ifndef __SSE3__
-#define __SSE3__
-#endif
-#endif
-
/*#define GGML_PERF*/
#define GGML_DEBUG 0
#define GGML_GELU_FP16
@@ -248,213 +224,27 @@ inline static void * ggml_aligned_malloc(size_t size) {
#include "ggml-opencl.h"
#endif
-#undef MIN
-#undef MAX
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-
// floating point type used to accumulate sums
typedef double ggml_float;
-// 16-bit float
-// on Arm, we use __fp16
-// on x86, we use uint16_t
-#if defined(__ARM_NEON) && !defined(_MSC_VER)
-
-// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
-//
-// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
-//
-#include <arm_neon.h>
-
-#define GGML_COMPUTE_FP16_TO_FP32(x) ((float) (x))
-#define GGML_COMPUTE_FP32_TO_FP16(x) (x)
-
-#define GGML_FP16_TO_FP32(x) ((float) (x))
-#define GGML_FP32_TO_FP16(x) (x)
-
-#else
-
-#ifdef __wasm_simd128__
-#include <wasm_simd128.h>
-#else
-#ifdef __POWER9_VECTOR__
-#include <altivec.h>
-#undef bool
-#define bool _Bool
-#else
-#if defined(_MSC_VER) || defined(__MINGW32__)
-#include <intrin.h>
-#else
-#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__)
-#if !defined(__riscv)
-#include <immintrin.h>
-#endif
-#endif
-#endif
-#endif
-#endif
-
-#ifdef __riscv_v_intrinsic
-#include <riscv_vector.h>
-#endif
-
-#ifdef __F16C__
-
-#ifdef _MSC_VER
-#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
-#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
-#else
-#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
-#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
-#endif
-
-#elif defined(__POWER9_VECTOR__)
-
-#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
-#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
-/* the inline asm below is about 12% faster than the lookup method */
-#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x)
-#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
-
-static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
- register float f;
- register double d;
- __asm__(
- "mtfprd %0,%2\n"
- "xscvhpdp %0,%0\n"
- "frsp %1,%0\n" :
- /* temp */ "=d"(d),
- /* out */ "=f"(f):
- /* in */ "r"(h));
- return f;
-}
-
-static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
- register double d;
- register ggml_fp16_t r;
- __asm__( /* xscvdphp can work on double or single precision */
- "xscvdphp %0,%2\n"
- "mffprd %1,%0\n" :
- /* temp */ "=d"(d),
- /* out */ "=r"(r):
- /* in */ "f"(f));
- return r;
-}
-
-#else
-
-// FP16 <-> FP32
-// ref: https://github.com/Maratyszcza/FP16
-
-static inline float fp32_from_bits(uint32_t w) {
- union {
- uint32_t as_bits;
- float as_value;
- } fp32;
- fp32.as_bits = w;
- return fp32.as_value;
-}
-
-static inline uint32_t fp32_to_bits(float f) {
- union {
- float as_value;
- uint32_t as_bits;
- } fp32;
- fp32.as_value = f;
- return fp32.as_bits;
-}
-
-static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
- const uint32_t w = (uint32_t) h << 16;
- const uint32_t sign = w & UINT32_C(0x80000000);
- const uint32_t two_w = w + w;
-
- const uint32_t exp_offset = UINT32_C(0xE0) << 23;
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
- const float exp_scale = 0x1.0p-112f;
-#else
- const float exp_scale = fp32_from_bits(UINT32_C(0x7800000));
-#endif
- const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale;
-
- const uint32_t magic_mask = UINT32_C(126) << 23;
- const float magic_bias = 0.5f;
- const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias;
-
- const uint32_t denormalized_cutoff = UINT32_C(1) << 27;
- const uint32_t result = sign |
- (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value));
- return fp32_from_bits(result);
-}
-
-static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
- const float scale_to_inf = 0x1.0p+112f;
- const float scale_to_zero = 0x1.0p-110f;
-#else
- const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
- const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
-#endif
- float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
-
- const uint32_t w = fp32_to_bits(f);
- const uint32_t shl1_w = w + w;
- const uint32_t sign = w & UINT32_C(0x80000000);
- uint32_t bias = shl1_w & UINT32_C(0xFF000000);
- if (bias < UINT32_C(0x71000000)) {
- bias = UINT32_C(0x71000000);
- }
-
- base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base;
- const uint32_t bits = fp32_to_bits(base);
- const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00);
- const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF);
- const uint32_t nonsign = exp_bits + mantissa_bits;
- return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign);
-}
-
-#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
-#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
-
-#endif // __F16C__
-
-#endif // __ARM_NEON
-
//
// global data
//
// precomputed gelu table for f16 (128 KB)
-static ggml_fp16_t table_gelu_f16[1 << 16];
+static ggml_fp16_t ggml_table_gelu_f16[1 << 16];
// precomputed quick gelu table for f16 (128 KB)
-static ggml_fp16_t table_gelu_quick_f16[1 << 16];
+static ggml_fp16_t ggml_table_gelu_quick_f16[1 << 16];
// precomputed silu table for f16 (128 KB)
-static ggml_fp16_t table_silu_f16[1 << 16];
+static ggml_fp16_t ggml_table_silu_f16[1 << 16];
// precomputed exp table for f16 (128 KB)
-static ggml_fp16_t table_exp_f16[1 << 16];
-
-// precomputed f32 table for f16 (256 KB)
-static float table_f32_f16[1 << 16];
+static ggml_fp16_t ggml_table_exp_f16[1 << 16];
-// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
-// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
-// This is also true for POWER9.
-#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
-
-inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
- uint16_t s;
- memcpy(&s, &f, sizeof(uint16_t));
- return table_f32_f16[s];
-}
-
-#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
-#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
-
-#endif
+// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
+float ggml_table_f32_f16[1 << 16];
// note: do not use these inside ggml.c
// these are meant to be used via the ggml.h API
@@ -632,6 +422,28 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = ggml_vec_dot_q4_1_q8_1,
.vec_dot_type = GGML_TYPE_Q8_1,
},
+ [4] = { // GGML_TYPE_Q4_2
+ .type_name = "DEPRECATED",
+ .blck_size = 0,
+ .type_size = 0,
+ .is_quantized = false,
+ .to_float = NULL,
+ .from_float = NULL,
+ .from_float_reference = NULL,
+ .vec_dot = NULL,
+ .vec_dot_type = GGML_TYPE_COUNT,
+ },
+ [5] = { // GGML_TYPE_Q4_3
+ .type_name = "DEPRECATED",
+ .blck_size = 0,
+ .type_size = 0,
+ .is_quantized = false,
+ .to_float = NULL,
+ .from_float = NULL,
+ .from_float_reference = NULL,
+ .vec_dot = NULL,
+ .vec_dot_type = GGML_TYPE_COUNT,
+ },
[GGML_TYPE_Q5_0] = {
.type_name = "q5_0",
.blck_size = QK5_0,
@@ -1551,7 +1363,7 @@ inline static float ggml_gelu_f32(float x) {
inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
const uint16_t * i16 = (const uint16_t *) x;
for (int i = 0; i < n; ++i) {
- y[i] = table_gelu_f16[i16[i]];
+ y[i] = ggml_table_gelu_f16[i16[i]];
}
}
@@ -1561,7 +1373,7 @@ inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
for (int i = 0; i < n; ++i) {
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
memcpy(&t, &fp16, sizeof(uint16_t));
- y[i] = GGML_FP16_TO_FP32(table_gelu_f16[t]);
+ y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
}
}
#else
@@ -1579,7 +1391,7 @@ inline static float ggml_gelu_quick_f32(float x) {
//inline static void ggml_vec_gelu_quick_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
// const uint16_t * i16 = (const uint16_t *) x;
// for (int i = 0; i < n; ++i) {
-// y[i] = table_gelu_quick_f16[i16[i]];
+// y[i] = ggml_table_gelu_quick_f16[i16[i]];
// }
//}
@@ -1589,7 +1401,7 @@ inline static void ggml_vec_gelu_quick_f32(const int n, float * y, const float *
for (int i = 0; i < n; ++i) {
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
memcpy(&t, &fp16, sizeof(uint16_t));
- y[i] = GGML_FP16_TO_FP32(table_gelu_quick_f16[t]);
+ y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_quick_f16[t]);
}
}
#else
@@ -1608,7 +1420,7 @@ inline static float ggml_silu_f32(float x) {
//inline static void ggml_vec_silu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
// const uint16_t * i16 = (const uint16_t *) x;
// for (int i = 0; i < n; ++i) {
-// y[i] = table_silu_f16[i16[i]];
+// y[i] = ggml_table_silu_f16[i16[i]];
// }
//}
@@ -1618,7 +1430,7 @@ inline static void ggml_vec_silu_f32(const int n, float * y, const float * x) {
for (int i = 0; i < n; ++i) {
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
memcpy(&t, &fp16, sizeof(uint16_t));
- y[i] = GGML_FP16_TO_FP32(table_silu_f16[t]);
+ y[i] = GGML_FP16_TO_FP32(ggml_table_silu_f16[t]);
}
}
#else
@@ -2334,11 +2146,11 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
for (int i = 0; i < (1 << 16); ++i) {
uint16_t ui = i;
memcpy(&ii, &ui, sizeof(ii));
- const float f = table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(ii);
- table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
- table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
- table_silu_f16[i] = GGML_FP32_TO_FP16(ggml_silu_f32(f));
- table_exp_f16[i] = GGML_FP32_TO_FP16(expf(f));
+ const float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(ii);
+ ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
+ ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
+ ggml_table_silu_f16[i] = GGML_FP32_TO_FP16(ggml_silu_f32(f));
+ ggml_table_exp_f16[i] = GGML_FP32_TO_FP16(expf(f));
}
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
@@ -10701,7 +10513,7 @@ static void ggml_compute_forward_soft_max_f32(
// const float val = (sp[i] == -INFINITY) ? 0.0 : exp(sp[i] - max);
ggml_fp16_t s = GGML_FP32_TO_FP16(sp[i] - max);
memcpy(&scvt, &s, sizeof(scvt));
- const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
+ const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt]);
sum += (ggml_float)val;
dp[i] = val;
}
@@ -12990,7 +12802,7 @@ static void ggml_compute_forward_flash_attn_f32(
#else
ggml_fp16_t s = GGML_FP32_TO_FP16(SS[j] - max);
memcpy(&scvt[j], &s, sizeof(uint16_t));
- const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]);
+ const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt[j]]);
#endif
sump[j] += (ggml_float)val;
SS[j] = val;
@@ -13192,7 +13004,7 @@ static void ggml_compute_forward_flash_attn_f16(
} else {
ggml_fp16_t s = GGML_FP32_TO_FP16(SS[j] - max);
memcpy(&scvt[j], &s, sizeof(uint16_t));
- const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]);
+ const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt[j]]);
sump[j] += (ggml_float)val;
SS[j] = val;
}
@@ -13643,7 +13455,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
#else
ggml_fp16_t s = GGML_FP32_TO_FP16(SR[j] - max);
memcpy(&scvt[j], &s, sizeof(uint16_t));
- const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt[j]]);
+ const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt[j]]);
#endif
sump[j] += (ggml_float)val;
SW[j] = val;
@@ -14393,7 +14205,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
#else
ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max);
memcpy(&scvt, &s, sizeof(scvt));
- const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
+ const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt]);
#endif
sum += (ggml_float)val;
st[i] = val;
@@ -14507,7 +14319,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
#else
ggml_fp16_t s = GGML_FP32_TO_FP16(s0[i] - max);
memcpy(&scvt, &s, sizeof(scvt));
- const float val = GGML_FP16_TO_FP32(table_exp_f16[scvt]);
+ const float val = GGML_FP16_TO_FP32(ggml_table_exp_f16[scvt]);
#endif
sum += (ggml_float)val;
ds0[i] = val;
diff --git a/llama.cpp b/llama.cpp
index a4340d52..e599917a 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1467,7 +1467,7 @@ static int32_t llama_kv_cache_cell_max(const struct llama_kv_cache & cache) {
}
static void llama_kv_cache_clear(struct llama_kv_cache & cache) {
- for (int32_t i = 0; i < cache.size; ++i) {
+ for (int32_t i = 0; i < (int32_t) cache.size; ++i) {
cache.cells[i].pos = -1;
cache.cells[i].seq_id.clear();
}
diff --git a/tests/test-double-float.cpp b/tests/test-double-float.cpp
index afd7bf77..753dae91 100644
--- a/tests/test-double-float.cpp
+++ b/tests/test-double-float.cpp
@@ -4,7 +4,7 @@
#undef NDEBUG
#include <cassert>
-#if !defined(__riscv) && !defined(__s390__)
+#if !defined(__riscv) && !defined(__s390__) && !defined(__ARM_NEON)
#include <immintrin.h>
#endif
#include <cmath>
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp
index 884af405..a2459a28 100644
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -129,6 +129,13 @@ int main(int argc, char * argv[]) {
ggml_type type = (ggml_type) i;
ggml_type_traits_t qfns = ggml_internal_get_type_traits(type);
+ // deprecated - skip
+ if (qfns.blck_size == 0) {
+ continue;
+ }
+
+ printf("Testing %s\n", ggml_type_name((ggml_type) i));
+
if (qfns.from_float && qfns.to_float) {
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
const float max_quantization_error =