ggml : add Q8_0 quantization for intermediate results (#951)

* ggml : add Q8_0 quantization for intermediate results * quantize-stats : fix test + add it to Makefile default * Q8: use int8_t, AVX/AVX2 optimizations * ggml : fix quantize_row_q8_0() ARM_NEON rounding * minor : updates after rebase to latest master * quantize-stats : delete obsolete strings * ggml : fix q4_1 dot func --------- Co-authored-by: Stephan Walter <stephan@walter.name>
author: Georgi Gerganov <ggerganov@gmail.com> 2023-04-15 17:53:22 +0300
committer: GitHub <noreply@github.com> 2023-04-15 17:53:22 +0300
commit: e95b6554b493e71a0275764342e09bd5784a7026 (patch)
tree: 6b9d3e9d4eb23b64ae76f0108b409aa5825cd1b8 /ggml.h
parent: aa485cee334e84437e21681c14b6f80b65876d8b (diff)
1 files changed, 2 insertions, 0 deletions
diff --git a/ggml.h b/ggml.h
index 617298a9..241e96a1 100644
--- a/ggml.h
+++ b/ggml.h
@@ -204,6 +204,7 @@ enum ggml_type {
     GGML_TYPE_F16  = 1,
     GGML_TYPE_Q4_0 = 2,
     GGML_TYPE_Q4_1 = 3,
+    GGML_TYPE_Q8_0 = 4,
     GGML_TYPE_I8,
     GGML_TYPE_I16,
     GGML_TYPE_I32,
@@ -836,6 +837,7 @@ typedef struct {
     dequantize_row_q_t dequantize_row_q;
     quantize_row_q_t   quantize_row_q;
     quantize_row_q_t   quantize_row_q_reference;
+    quantize_row_q_t   quantize_row_q_dot;
     vec_dot_q_t        vec_dot_q;
 } quantize_fns_t;
author	Georgi Gerganov <ggerganov@gmail.com>	2023-04-15 17:53:22 +0300
committer	GitHub <noreply@github.com>	2023-04-15 17:53:22 +0300
commit	e95b6554b493e71a0275764342e09bd5784a7026 (patch)
tree	6b9d3e9d4eb23b64ae76f0108b409aa5825cd1b8 /ggml.h
parent	aa485cee334e84437e21681c14b6f80b65876d8b (diff)