summaryrefslogtreecommitdiff
path: root/examples/server/server.cpp
diff options
context:
space:
mode:
authorPierrick Hymbert <pierrick.hymbert@gmail.com>2024-04-26 20:06:33 +0200
committerGitHub <noreply@github.com>2024-04-26 20:06:33 +0200
commit0c4d489e29e53589bf13a801fe7c94b7b546d8f6 (patch)
treefc83fade919050b3a9471dd892d8aef438c39aaf /examples/server/server.cpp
parent017e6999b5184234370b22a2f868e1be911e8d88 (diff)
quantize: add imatrix and dataset metadata in GGUF (#6658)
* imatrix: save the dataset file used in the output file * llama: support kv overrides type string string * common: factorize KV Overrides parsing between common and server * quantize: add imatrix n entries and dataset KV metadata quantize: factorize KV Overrides parsing between common #6656 * llama: remove kv override str_value initialization as it does not compile on some toolchain * quantize: add imatrix m_last_call as `quantize.imatrix.chunks_count` * quantize: add imatrix filename in KV * llama: add llama_model_kv_override_free * common: add llama_model_kv_override_free common: free kv override if used after model loading * llama: finally move the string KV override value to the stack * llama : minor * no need to add a NUL to the std::vector, std::string can be initialized from a pair of iterators. Co-authored-by: slaren <slarengh@gmail.com> * kv override: ensure string termination --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: slaren <slarengh@gmail.com>
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r--examples/server/server.cpp36
1 files changed, 2 insertions, 34 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 48ef8ff2..6f8ba3fc 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2392,7 +2392,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
printf(" -n, --n-predict maximum tokens to predict (default: %d)\n", params.n_predict);
printf(" --override-kv KEY=TYPE:VALUE\n");
printf(" advanced option to override model metadata by key. may be specified multiple times.\n");
- printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
+ printf(" types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
printf(" -gan N, --grp-attn-n N set the group attention factor to extend context size through self-extend(default: 1=disabled), used together with group attention width `--grp-attn-w`\n");
printf(" -gaw N, --grp-attn-w N set the group attention width to extend context size through self-extend(default: 512), used together with group attention factor `--grp-attn-n`\n");
printf(" --chat-template JINJA_TEMPLATE\n");
@@ -2823,43 +2823,11 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
invalid_param = true;
break;
}
- char * sep = strchr(argv[i], '=');
- if (sep == nullptr || sep - argv[i] >= 128) {
- fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]);
- invalid_param = true;
- break;
- }
-
- struct llama_model_kv_override kvo;
- std::strncpy(kvo.key, argv[i], sep - argv[i]);
- kvo.key[sep - argv[i]] = 0;
- sep++;
- if (strncmp(sep, "int:", 4) == 0) {
- sep += 4;
- kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
- kvo.int_value = std::atol(sep);
- } else if (strncmp(sep, "float:", 6) == 0) {
- sep += 6;
- kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
- kvo.float_value = std::atof(sep);
- } else if (strncmp(sep, "bool:", 5) == 0) {
- sep += 5;
- kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
- if (std::strcmp(sep, "true") == 0) {
- kvo.bool_value = true;
- } else if (std::strcmp(sep, "false") == 0) {
- kvo.bool_value = false;
- } else {
- fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
- invalid_param = true;
- break;
- }
- } else {
+ if (!parse_kv_override(argv[i], params.kv_overrides)) {
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
invalid_param = true;
break;
}
- params.kv_overrides.push_back(kvo);
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
server_print_usage(argv[0], default_params, default_sparams);