From e0305ead3a072db9c08b35c9600c49273b38a4b5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 20 Apr 2023 20:35:53 +0300 Subject: ggml : add Q4_3 quantization (#1082) --- llama.h | 1 + 1 file changed, 1 insertion(+) (limited to 'llama.h') diff --git a/llama.h b/llama.h index 208b03d1..011e34c0 100644 --- a/llama.h +++ b/llama.h @@ -73,6 +73,7 @@ extern "C" { LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // except 1d tensors }; LLAMA_API struct llama_context_params llama_context_default_params(); -- cgit v1.2.3