summaryrefslogtreecommitdiff
path: root/ggml-quants.h
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-01-16 19:51:26 +0200
committerGitHub <noreply@github.com>2024-01-16 19:51:26 +0200
commit334a835a1ccc8106a5fa355683a965efb1bfa24b (patch)
tree83172d25be464c4f041dd43eb56592c372b2c784 /ggml-quants.h
parent4feb4b33eeb1756e46084a4db9230b279af1a480 (diff)
ggml : importance matrix support for legacy quants (#4969)
* imatrix: adding support for legacy quants * imatrix: guard Q4_0/Q5_0 against ffn_down craziness --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml-quants.h')
-rw-r--r--ggml-quants.h4
1 files changed, 4 insertions, 0 deletions
diff --git a/ggml-quants.h b/ggml-quants.h
index 99467936..d7fefdb5 100644
--- a/ggml-quants.h
+++ b/ggml-quants.h
@@ -253,3 +253,7 @@ size_t quantize_q3_K (const float * src, void * dst, int nrows, int n_per_row,
size_t quantize_q4_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
size_t quantize_q5_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
size_t quantize_q6_K (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
+size_t quantize_q4_0 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
+size_t quantize_q4_1 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
+size_t quantize_q5_0 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);
+size_t quantize_q5_1 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix);