From 76aa30a26353f597e4fbe3cf776772ae812af89a Mon Sep 17 00:00:00 2001
From: Kawrakow <48489457+ikawrakow@users.noreply.github.com>
Date: Thu, 21 Mar 2024 08:27:57 +0100
Subject: Add ability to use Q5_0, Q5_1, and IQ4_NL for quantized K cache
 (#6183)

* k_cache: be able to use Q5_0

* k_cache: be able to use Q5_1 on CODA

* k_cache: be able to use Q5_0 on Metal

* k_cache: be able to use Q5_1 on Metal

* k_cache: be able to use IQ4_NL - just CUDA for now

* k_cache: be able to use IQ4_NL on Metal

* k_cache: add newly added supported types to llama-bench and CUDA supports_op

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
---
 examples/llama-bench/llama-bench.cpp | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'examples')

diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 4cb23080..82413b79 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -249,6 +249,9 @@ static ggml_type ggml_type_from_name(const std::string & s) {
     if (s == "q5_1") {
         return GGML_TYPE_Q5_1;
     }
+    if (s == "iq4_nl") {
+        return GGML_TYPE_IQ4_NL;
+    }
 
     return GGML_TYPE_COUNT;
 }
-- 
cgit v1.2.3