summaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-01-14 09:45:56 +0200
committerGitHub <noreply@github.com>2024-01-14 09:45:56 +0200
commit147b17ac94a24d524e367cda26a9ff6245689f34 (patch)
tree6bae34826f82aa28a60ccb26de8eda0464774110 /ggml.h
parent807179ec583dcb882f97d9704577c06beb2c5ec9 (diff)
2-bit quantizations (#4897)
* imatrix: load * imatrix: WIP * imatrix: Add Q2_K quantization * imatrix: also guard against Q2_K_S quantization without importance matrix * imatrix: guard even more against low-bit quantization misuse --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h9
1 files changed, 6 insertions, 3 deletions
diff --git a/ggml.h b/ggml.h
index b18ba781..1187074f 100644
--- a/ggml.h
+++ b/ggml.h
@@ -2067,10 +2067,13 @@ extern "C" {
GGML_API size_t ggml_quantize_q4_K(const float * src, void * dst, int n, int k, int64_t * hist);
GGML_API size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist);
GGML_API size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist);
- GGML_API size_t ggml_quantize_iq2_xxs(const float * src, void * dst, int n, int k, int64_t * hist);
- GGML_API size_t ggml_quantize_iq2_xs (const float * src, void * dst, int n, int k, int64_t * hist);
- GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
+ GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst,
+ int start, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
+
+ // These are needed for IQ2_XS and IQ2_XXS quantizations
+ GGML_API void ggml_init_iq2_quantization(enum ggml_type type);
+ GGML_API void ggml_deinit_iq2_quantization(enum ggml_type type);
//
// Importance matrix