From c7ecd4e23acb42f1150abf0b118e0a2c7b8dc959 Mon Sep 17 00:00:00 2001 From: Nexes the Elder <124105151+Nexesenex@users.noreply.github.com> Date: Sat, 24 May 2025 10:49:10 +0200 Subject: Legacy quants conversion schemes in convert_hf_to_gguf.py (#449) * Legacy quants conversion schemes in convert_hf_to_gguf.py This, notably in order to make smaller conversions to generate an iMatrix file. `Q4_0`,`Q4_1` are here using embeddings, output, attn_k and attn_v in q5_0. `Q5_0`,`Q5_1` are here using embeddings, output, attn_k and attn_v in q8_0. Adapted from the following llama.cpp mainline PR : https://github.com/ggml-org/llama.cpp/pull/9022 Original author @chentyjpm Also, 2 forgotten mentions of FTYPE IQ3_KL in llama.cpp file. * forgotten IQ5_KS case mention --- ggml/src/ggml-cuda/mmvq.cu | 1 + 1 file changed, 1 insertion(+) (limited to 'ggml/src') diff --git a/ggml/src/ggml-cuda/mmvq.cu b/ggml/src/ggml-cuda/mmvq.cu index 30a6a58b..89b74f4b 100644 --- a/ggml/src/ggml-cuda/mmvq.cu +++ b/ggml/src/ggml-cuda/mmvq.cu @@ -652,6 +652,7 @@ bool ggml_cuda_mmvq_type_supported(ggml_type src0_type) { case GGML_TYPE_IQ4_KSS: case GGML_TYPE_IQ2_KS: case GGML_TYPE_IQ5_K: + case GGML_TYPE_IQ5_KS: case GGML_TYPE_IQ6_K: case GGML_TYPE_IQ3_S: return true; -- cgit v1.2.3