llama : fix F16/F32 downcast + improve names (#5980)

author: Georgi Gerganov <ggerganov@gmail.com> 2024-03-11 09:56:47 +0200
committer: Georgi Gerganov <ggerganov@gmail.com> 2024-03-11 09:56:47 +0200
commit: ee35600b9061b1ea0c4ea87fce6844297632b2a8 (patch)
tree: 5782738d8cae052eff621785d0297e17daaac2ba /llama.h
parent: be858f620508385ad12d0e5e862010e666ca729c (diff)
1 files changed, 1 insertions, 1 deletions
diff --git a/llama.h b/llama.h
index c8e05aad..ccf65ca4 100644
--- a/llama.h
+++ b/llama.h
@@ -278,7 +278,7 @@ extern "C" {
         bool allow_requantize;       // allow quantizing non-f32/f16 tensors
         bool quantize_output_tensor; // quantize output.weight
         bool only_copy;              // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
-        bool pure;                   // disable k-quant mixtures and quantize all tensors to the same type
+        bool pure;                   // quantize all tensors to the default type
         void * imatrix;              // pointer to importance matrix data
     } llama_model_quantize_params;
author	Georgi Gerganov <ggerganov@gmail.com>	2024-03-11 09:56:47 +0200
committer	Georgi Gerganov <ggerganov@gmail.com>	2024-03-11 09:56:47 +0200
commit	ee35600b9061b1ea0c4ea87fce6844297632b2a8 (patch)
tree	5782738d8cae052eff621785d0297e17daaac2ba /llama.h
parent	be858f620508385ad12d0e5e862010e666ca729c (diff)