summaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorKerfuffle <44031344+KerfuffleV2@users.noreply.github.com>2023-12-05 10:19:18 -0700
committerGitHub <noreply@github.com>2023-12-05 19:19:18 +0200
commit5aa365d88fdb8fdd430ef3fc141c7a5fd37c3502 (patch)
treeb9653918c6b5cd09b5a981927b143d3011456acb /llama.h
parent52c8bc3cf312e1caf02d37bfb9d9d865cbe33594 (diff)
llama : allow overriding GGUF metadata when loading model (#4092)
* feat: Allow overriding GGUF metadata when loading model * Fix the one time GCC is stricter than clang about something * Step1 * Refactor... basically everything! * Nuke obsolete GetArrayLen struct * simplify std::string specialization * Various cleanups Add informational output when overrides are applied Warn user when an override with the wrong type is specified * Fix broken logic for parsing bool KV overrides Fix issue where overrides didn't apply when key missing in GGUF metadata Resolve merge changes * llama : rearrange model params * Update new GET_KEY call Add note that metadata KV overrides aren't reflected in initial metadata KV info dump --------- Co-authored-by: cebtenzzre <cebtenzzre@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h20
1 files changed, 20 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index 89cb6198..517245a3 100644
--- a/llama.h
+++ b/llama.h
@@ -158,6 +158,22 @@ extern "C" {
llama_seq_id all_seq_id; // used if seq_id == NULL
} llama_batch;
+ enum llama_model_kv_override_type {
+ LLAMA_KV_OVERRIDE_INT,
+ LLAMA_KV_OVERRIDE_FLOAT,
+ LLAMA_KV_OVERRIDE_BOOL,
+ };
+
+ struct llama_model_kv_override {
+ char key[128];
+ enum llama_model_kv_override_type tag;
+ union {
+ int64_t int_value;
+ double float_value;
+ bool bool_value;
+ };
+ };
+
struct llama_model_params {
int32_t n_gpu_layers; // number of layers to store in VRAM
int32_t main_gpu; // the GPU that is used for scratch and small tensors
@@ -165,9 +181,13 @@ extern "C" {
// called with a progress value between 0 and 1, pass NULL to disable
llama_progress_callback progress_callback;
+
// context pointer passed to the progress callback
void * progress_callback_user_data;
+ // override key-value pairs of the model meta data
+ const struct llama_model_kv_override * kv_overrides;
+
// Keep the booleans together to avoid misalignment during copy-by-value.
bool vocab_only; // only load the vocabulary, no weights
bool use_mmap; // use mmap if possible