llama : allow overriding GGUF metadata when loading model (#4092)

* feat: Allow overriding GGUF metadata when loading model * Fix the one time GCC is stricter than clang about something * Step1 * Refactor... basically everything! * Nuke obsolete GetArrayLen struct * simplify std::string specialization * Various cleanups Add informational output when overrides are applied Warn user when an override with the wrong type is specified * Fix broken logic for parsing bool KV overrides Fix issue where overrides didn't apply when key missing in GGUF metadata Resolve merge changes * llama : rearrange model params * Update new GET_KEY call Add note that metadata KV overrides aren't reflected in initial metadata KV info dump --------- Co-authored-by: cebtenzzre <cebtenzzre@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> 2023-12-05 10:19:18 -0700
committer: GitHub <noreply@github.com> 2023-12-05 19:19:18 +0200
commit: 5aa365d88fdb8fdd430ef3fc141c7a5fd37c3502 (patch)
tree: b9653918c6b5cd09b5a981927b143d3011456acb /llama.h
parent: 52c8bc3cf312e1caf02d37bfb9d9d865cbe33594 (diff)
1 files changed, 20 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index 89cb6198..517245a3 100644
--- a/llama.h
+++ b/llama.h
@@ -158,6 +158,22 @@ extern "C" {
         llama_seq_id all_seq_id; // used if seq_id == NULL
     } llama_batch;
 
+    enum llama_model_kv_override_type {
+        LLAMA_KV_OVERRIDE_INT,
+        LLAMA_KV_OVERRIDE_FLOAT,
+        LLAMA_KV_OVERRIDE_BOOL,
+    };
+
+    struct llama_model_kv_override {
+        char key[128];
+        enum llama_model_kv_override_type tag;
+        union {
+            int64_t int_value;
+            double float_value;
+            bool bool_value;
+        };
+    };
+
     struct llama_model_params {
         int32_t n_gpu_layers; // number of layers to store in VRAM
         int32_t main_gpu;     // the GPU that is used for scratch and small tensors
@@ -165,9 +181,13 @@ extern "C" {
 
         // called with a progress value between 0 and 1, pass NULL to disable
         llama_progress_callback progress_callback;
+
         // context pointer passed to the progress callback
         void * progress_callback_user_data;
 
+        // override key-value pairs of the model meta data
+        const struct llama_model_kv_override * kv_overrides;
+
         // Keep the booleans together to avoid misalignment during copy-by-value.
         bool vocab_only; // only load the vocabulary, no weights
         bool use_mmap;   // use mmap if possible
author	Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>	2023-12-05 10:19:18 -0700
committer	GitHub <noreply@github.com>	2023-12-05 19:19:18 +0200
commit	5aa365d88fdb8fdd430ef3fc141c7a5fd37c3502 (patch)
tree	b9653918c6b5cd09b5a981927b143d3011456acb /llama.h
parent	52c8bc3cf312e1caf02d37bfb9d9d865cbe33594 (diff)