From 469e75d0a35b08de549a4fd87f082ca7a8a539ba Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Thu, 11 Jan 2024 20:43:15 +0100 Subject: llama : restore intended k-quants mixes for MoE models (#4872) * Restore intended k-quants quantization mixes for MoE models * Update Q2_K_S values in the quantize tool Still using LLaMA-v1 PPL values in the quant description today does not make much sense. But let's leave this update for another PR. --------- Co-authored-by: Iwan Kawrakow Co-authored-by: Georgi Gerganov --- llama.h | 1 + 1 file changed, 1 insertion(+) (limited to 'llama.h') diff --git a/llama.h b/llama.h index 6fde113f..43d41b8f 100644 --- a/llama.h +++ b/llama.h @@ -105,6 +105,7 @@ extern "C" { LLAMA_FTYPE_MOSTLY_Q6_K = 18, // except 1d tensors LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file }; -- cgit v1.2.3