summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorXuan Son Nguyen <thichthat@gmail.com>2024-02-20 15:58:27 +0100
committerGitHub <noreply@github.com>2024-02-20 15:58:27 +0100
commit9c405c9f9a7cfd23511fd6b2de05dc72481119b4 (patch)
tree694b5a169d63eb4640df2d6f536d384cc481b300 /llama.cpp
parent5207b3fbc500f89dfe528693e96540956dbaed96 (diff)
Server: use llama_chat_apply_template (#5593)
* server: use llama_chat_apply_template * server: remove trailing space * server: fix format_chat * server: fix help message Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server: fix formatted_chat --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/llama.cpp b/llama.cpp
index 5de07dfa..4296eca3 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -12602,7 +12602,7 @@ LLAMA_API int32_t llama_chat_apply_template(
// load template from model
std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
std::string template_key = "tokenizer.chat_template";
- int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), curr_tmpl.size());
+ int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
if (res < 0) {
// worst case: there is no information about template, we will use chatml by default
curr_tmpl = "<|im_start|>"; // see llama_chat_apply_template_internal