From 76aa30a26353f597e4fbe3cf776772ae812af89a Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Thu, 21 Mar 2024 08:27:57 +0100 Subject: Add ability to use Q5_0, Q5_1, and IQ4_NL for quantized K cache (#6183) * k_cache: be able to use Q5_0 * k_cache: be able to use Q5_1 on CODA * k_cache: be able to use Q5_0 on Metal * k_cache: be able to use Q5_1 on Metal * k_cache: be able to use IQ4_NL - just CUDA for now * k_cache: be able to use IQ4_NL on Metal * k_cache: add newly added supported types to llama-bench and CUDA supports_op --------- Co-authored-by: Iwan Kawrakow --- common/common.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'common/common.cpp') diff --git a/common/common.cpp b/common/common.cpp index 5f10718e..192182d0 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1590,6 +1590,9 @@ static ggml_type kv_cache_type_from_str(const std::string & s) { if (s == "q4_1") { return GGML_TYPE_Q4_1; } + if (s == "iq4_nl") { + return GGML_TYPE_IQ4_NL; + } if (s == "q5_0") { return GGML_TYPE_Q5_0; } -- cgit v1.2.3