From 574406dc7e350ddbffaeca33bf0392b7bfeb1436 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 26 Apr 2023 23:14:13 +0300 Subject: ggml : add Q5_0 and Q5_1 quantization (#1187) * ggml : add Q5_0 quantization (cuBLAS only) * ggml : fix Q5_0 qh -> uint32_t * ggml : fix q5_0 histogram stats * ggml : q5_0 scalar dot product * ggml : q5_0 ARM NEON dot * ggml : q5_0 more efficient ARM NEON using uint64_t masks * ggml : rename Q5_0 -> Q5_1 * ggml : adding Q5_0 mode * quantize : add Q5_0 and Q5_1 to map * ggml : AVX2 optimizations for Q5_0, Q5_1 (#1195) --------- Co-authored-by: Stephan Walter --- examples/quantize/quantize.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'examples/quantize/quantize.cpp') diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index ec7f91aa..60966595 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -10,6 +10,8 @@ static const std::map LLAMA_FTYPE_MAP = { {"q4_1", LLAMA_FTYPE_MOSTLY_Q4_1}, {"q4_2", LLAMA_FTYPE_MOSTLY_Q4_2}, {"q4_3", LLAMA_FTYPE_MOSTLY_Q4_3}, + {"q5_0", LLAMA_FTYPE_MOSTLY_Q5_0}, + {"q5_1", LLAMA_FTYPE_MOSTLY_Q5_1}, {"q8_0", LLAMA_FTYPE_MOSTLY_Q8_0}, }; -- cgit v1.2.3