ggml : quantization refactoring (#3833)

* ggml : factor all quantization code in ggml-quants ggml-ci * ggml-quants : fix Zig and Swift builds + quantize tool ggml-ci * quantize : --pure option for disabling k-quant mixtures --------- Co-authored-by: cebtenzzre <cebtenzzre@gmail.com>
author: Georgi Gerganov <ggerganov@gmail.com> 2023-10-29 18:32:28 +0200
committer: GitHub <noreply@github.com> 2023-10-29 18:32:28 +0200
commit: d69d777c02b9ac405a95f3cbfba219a990caefff (patch)
tree: 89c43e860850c0647b41025442e61ffa8534c5d7 /ggml.h
parent: ff3bad83e29e3009010cbc923bebd769055eaa7f (diff)
1 files changed, 7 insertions, 0 deletions
diff --git a/ggml.h b/ggml.h
index 08bff551..8c954904 100644
--- a/ggml.h
+++ b/ggml.h
@@ -1930,12 +1930,19 @@ extern "C" {
     // quantization
     //
 
+    // TODO: these would probably get removed in favor of the more general ggml_quantize_chunk
     GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
     GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
     GGML_API size_t ggml_quantize_q5_0(const float * src, void * dst, int n, int k, int64_t * hist);
     GGML_API size_t ggml_quantize_q5_1(const float * src, void * dst, int n, int k, int64_t * hist);
     GGML_API size_t ggml_quantize_q8_0(const float * src, void * dst, int n, int k, int64_t * hist);
 
+    GGML_API size_t ggml_quantize_q2_K(const float * src, void * dst, int n, int k, int64_t * hist);
+    GGML_API size_t ggml_quantize_q3_K(const float * src, void * dst, int n, int k, int64_t * hist);
+    GGML_API size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist);
+    GGML_API size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);
+    GGML_API size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);
+
     GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
 
     //
author	Georgi Gerganov <ggerganov@gmail.com>	2023-10-29 18:32:28 +0200
committer	GitHub <noreply@github.com>	2023-10-29 18:32:28 +0200
commit	d69d777c02b9ac405a95f3cbfba219a990caefff (patch)
tree	89c43e860850c0647b41025442e61ffa8534c5d7 /ggml.h
parent	ff3bad83e29e3009010cbc923bebd769055eaa7f (diff)