diff options
author | Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> | 2023-12-05 10:19:18 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-05 19:19:18 +0200 |
commit | 5aa365d88fdb8fdd430ef3fc141c7a5fd37c3502 (patch) | |
tree | b9653918c6b5cd09b5a981927b143d3011456acb /common/common.cpp | |
parent | 52c8bc3cf312e1caf02d37bfb9d9d865cbe33594 (diff) |
llama : allow overriding GGUF metadata when loading model (#4092)
* feat: Allow overriding GGUF metadata when loading model
* Fix the one time GCC is stricter than clang about something
* Step1
* Refactor... basically everything!
* Nuke obsolete GetArrayLen struct
* simplify std::string specialization
* Various cleanups
Add informational output when overrides are applied
Warn user when an override with the wrong type is specified
* Fix broken logic for parsing bool KV overrides
Fix issue where overrides didn't apply when key missing in GGUF metadata
Resolve merge changes
* llama : rearrange model params
* Update new GET_KEY call
Add note that metadata KV overrides aren't reflected in initial metadata KV info dump
---------
Co-authored-by: cebtenzzre <cebtenzzre@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'common/common.cpp')
-rw-r--r-- | common/common.cpp | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/common/common.cpp b/common/common.cpp index 8e6d74d0..4e823c52 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -690,6 +690,47 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { std::istreambuf_iterator<char>(), std::back_inserter(sparams.grammar) ); + } else if (arg == "--override-kv") { + if (++i >= argc) { + invalid_param = true; + break; + } + char * sep = strchr(argv[i], '='); + if (sep == nullptr || sep - argv[i] >= 128) { + fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]); + invalid_param = true; + break; + } + struct llama_model_kv_override kvo; + std::strncpy(kvo.key, argv[i], sep - argv[i]); + kvo.key[sep - argv[i]] = 0; + sep++; + if (strncmp(sep, "int:", 4) == 0) { + sep += 4; + kvo.tag = LLAMA_KV_OVERRIDE_INT; + kvo.int_value = std::atol(sep); + } else if (strncmp(sep, "float:", 6) == 0) { + sep += 6; + kvo.tag = LLAMA_KV_OVERRIDE_FLOAT; + kvo.float_value = std::atof(sep); + } else if (strncmp(sep, "bool:", 5) == 0) { + sep += 5; + kvo.tag = LLAMA_KV_OVERRIDE_BOOL; + if (std::strcmp(sep, "true") == 0) { + kvo.bool_value = true; + } else if (std::strcmp(sep, "false") == 0) { + kvo.bool_value = false; + } else { + fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]); + invalid_param = true; + break; + } + } else { + fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]); + invalid_param = true; + break; + } + params.kv_overrides.push_back(kvo); #ifndef LOG_DISABLE_LOGS // Parse args for logging parameters } else if ( log_param_single_parse( argv[i] ) ) { @@ -733,6 +774,11 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { } } + if (!params.kv_overrides.empty()) { + params.kv_overrides.emplace_back(llama_model_kv_override()); + params.kv_overrides.back().key[0] = 0; + } + return true; } @@ -864,6 +910,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" draft model for speculative decoding (default: %s)\n", params.model.c_str()); printf(" -ld LOGDIR, --logdir LOGDIR\n"); printf(" path under which to save YAML logs (no logging if unset)\n"); + printf(" --override-kv KEY=TYPE:VALUE\n"); + printf(" advanced option to override model metadata by key. may be specified multiple times.\n"); + printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n"); printf("\n"); #ifndef LOG_DISABLE_LOGS log_print_usage(); @@ -956,6 +1005,12 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params & mparams.tensor_split = params.tensor_split; mparams.use_mmap = params.use_mmap; mparams.use_mlock = params.use_mlock; + if (params.kv_overrides.empty()) { + mparams.kv_overrides = NULL; + } else { + GGML_ASSERT(params.kv_overrides.back().key[0] == 0 && "KV overrides not terminated with empty key"); + mparams.kv_overrides = params.kv_overrides.data(); + } return mparams; } |