From 1966eb2615242f224bf9ca939db8905ab6a174a0 Mon Sep 17 00:00:00 2001 From: jiez <373447296@qq.com> Date: Thu, 25 Apr 2024 18:29:35 +0800 Subject: quantize : add '--keep-split' to quantize model into shards (#6688) * Implement '--keep-split' to quantize model into several shards * Add test script * Update examples/quantize/quantize.cpp Co-authored-by: Georgi Gerganov * Split model correctly even if tensor id is out-of-order * Update llama_model_quantize_params * Fix preci failures --------- Co-authored-by: z5269887 Co-authored-by: Georgi Gerganov --- llama.h | 1 + 1 file changed, 1 insertion(+) (limited to 'llama.h') diff --git a/llama.h b/llama.h index 0eb2a1e9..8aa76367 100644 --- a/llama.h +++ b/llama.h @@ -288,6 +288,7 @@ extern "C" { bool quantize_output_tensor; // quantize output.weight bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored bool pure; // quantize all tensors to the default type + bool keep_split; // quantize to the same number of shards void * imatrix; // pointer to importance matrix data void * kv_overrides; // pointer to vector containing overrides } llama_model_quantize_params; -- cgit v1.2.3