From 91c736015b66ba1d0b82cbae6313b6d5eaa61b68 Mon Sep 17 00:00:00 2001 From: jiez <373447296@qq.com> Date: Fri, 12 Apr 2024 18:45:06 +0800 Subject: llama : add gguf_remove_key + remove split meta during quantize (#6591) * Remove split metadata when quantize model shards * Find metadata key by enum * Correct loop range for gguf_remove_key and code format * Free kv memory --------- Co-authored-by: z5269887 --- llama.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index dad2c4fb..83dd55ef 100644 --- a/llama.cpp +++ b/llama.cpp @@ -13535,6 +13535,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s gguf_set_kv (ctx_out, ml.meta); gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION); gguf_set_val_u32(ctx_out, "general.file_type", ftype); + // Remove split metadata + gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_NO).c_str()); + gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str()); + gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str()); if (params->kv_overrides) { const std::vector & overrides = *(const std::vector *)params->kv_overrides; -- cgit v1.2.3