diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-04-15 17:53:22 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-15 17:53:22 +0300 |
commit | e95b6554b493e71a0275764342e09bd5784a7026 (patch) | |
tree | 6b9d3e9d4eb23b64ae76f0108b409aa5825cd1b8 /ggml.h | |
parent | aa485cee334e84437e21681c14b6f80b65876d8b (diff) |
ggml : add Q8_0 quantization for intermediate results (#951)
* ggml : add Q8_0 quantization for intermediate results
* quantize-stats : fix test + add it to Makefile default
* Q8: use int8_t, AVX/AVX2 optimizations
* ggml : fix quantize_row_q8_0() ARM_NEON rounding
* minor : updates after rebase to latest master
* quantize-stats : delete obsolete strings
* ggml : fix q4_1 dot func
---------
Co-authored-by: Stephan Walter <stephan@walter.name>
Diffstat (limited to 'ggml.h')
-rw-r--r-- | ggml.h | 2 |
1 files changed, 2 insertions, 0 deletions
@@ -204,6 +204,7 @@ enum ggml_type { GGML_TYPE_F16 = 1, GGML_TYPE_Q4_0 = 2, GGML_TYPE_Q4_1 = 3, + GGML_TYPE_Q8_0 = 4, GGML_TYPE_I8, GGML_TYPE_I16, GGML_TYPE_I32, @@ -836,6 +837,7 @@ typedef struct { dequantize_row_q_t dequantize_row_q; quantize_row_q_t quantize_row_q; quantize_row_q_t quantize_row_q_reference; + quantize_row_q_t quantize_row_q_dot; vec_dot_q_t vec_dot_q; } quantize_fns_t; |