summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjiez <373447296@qq.com>2024-04-12 18:45:06 +0800
committerGitHub <noreply@github.com>2024-04-12 13:45:06 +0300
commit91c736015b66ba1d0b82cbae6313b6d5eaa61b68 (patch)
tree098b60b95e78a1062daf0fe2b362de506eb23df7
parent5c4d767ac028c0f9c31cba3fceaf765c6097abfc (diff)
llama : add gguf_remove_key + remove split meta during quantize (#6591)
* Remove split metadata when quantize model shards * Find metadata key by enum * Correct loop range for gguf_remove_key and code format * Free kv memory --------- Co-authored-by: z5269887 <z5269887@unsw.edu.au>
-rw-r--r--ggml.c65
-rw-r--r--ggml.h3
-rw-r--r--llama.cpp4
3 files changed, 47 insertions, 25 deletions
diff --git a/ggml.c b/ggml.c
index 793b67f4..90af4342 100644
--- a/ggml.c
+++ b/ggml.c
@@ -20550,6 +20550,32 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
return ok;
}
+static void gguf_free_kv(struct gguf_kv * kv) {
+ if (kv->key.data) {
+ GGML_FREE(kv->key.data);
+ }
+
+ if (kv->type == GGUF_TYPE_STRING) {
+ if (kv->value.str.data) {
+ GGML_FREE(kv->value.str.data);
+ }
+ }
+
+ if (kv->type == GGUF_TYPE_ARRAY) {
+ if (kv->value.arr.data) {
+ if (kv->value.arr.type == GGUF_TYPE_STRING) {
+ for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
+ struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
+ if (str->data) {
+ GGML_FREE(str->data);
+ }
+ }
+ }
+ GGML_FREE(kv->value.arr.data);
+ }
+ }
+}
+
struct gguf_context * gguf_init_empty(void) {
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
@@ -20899,31 +20925,7 @@ void gguf_free(struct gguf_context * ctx) {
if (ctx->kv) {
// free string memory - not great..
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
- struct gguf_kv * kv = &ctx->kv[i];
-
- if (kv->key.data) {
- GGML_FREE(kv->key.data);
- }
-
- if (kv->type == GGUF_TYPE_STRING) {
- if (kv->value.str.data) {
- GGML_FREE(kv->value.str.data);
- }
- }
-
- if (kv->type == GGUF_TYPE_ARRAY) {
- if (kv->value.arr.data) {
- if (kv->value.arr.type == GGUF_TYPE_STRING) {
- for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
- struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
- if (str->data) {
- GGML_FREE(str->data);
- }
- }
- }
- GGML_FREE(kv->value.arr.data);
- }
- }
+ gguf_free_kv(&ctx->kv[i]);
}
GGML_FREE(ctx->kv);
@@ -21148,6 +21150,19 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
return n_kv;
}
+void gguf_remove_key(struct gguf_context * ctx, const char * key) {
+ const int idx = gguf_find_key(ctx, key);
+ if (idx >= 0) {
+ const int n_kv = gguf_get_n_kv(ctx);
+ gguf_free_kv(&ctx->kv[idx]);
+ for (int i = idx; i < n_kv-1; ++i) {
+ ctx->kv[i] = ctx->kv[i+1];
+ }
+ ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
+ ctx->header.n_kv--;
+ }
+}
+
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
const int idx = gguf_get_or_add_key(ctx, key);
diff --git a/ggml.h b/ggml.h
index abe3767f..e9ed8eee 100644
--- a/ggml.h
+++ b/ggml.h
@@ -2289,6 +2289,9 @@ extern "C" {
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
+ // removes key if it exists
+ GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
+
// overrides existing values or adds a new one
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
diff --git a/llama.cpp b/llama.cpp
index dad2c4fb..83dd55ef 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -13535,6 +13535,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
gguf_set_kv (ctx_out, ml.meta);
gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION);
gguf_set_val_u32(ctx_out, "general.file_type", ftype);
+ // Remove split metadata
+ gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_NO).c_str());
+ gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str());
+ gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str());
if (params->kv_overrides) {
const std::vector<llama_model_kv_override> & overrides = *(const std::vector<llama_model_kv_override> *)params->kv_overrides;