From 9c405c9f9a7cfd23511fd6b2de05dc72481119b4 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <thichthat@gmail.com>
Date: Tue, 20 Feb 2024 15:58:27 +0100
Subject: Server: use llama_chat_apply_template (#5593)

* server: use llama_chat_apply_template

* server: remove trailing space

* server: fix format_chat

* server: fix help message

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* server: fix formatted_chat

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'llama.cpp')
diff --git a/llama.cpp b/llama.cpp
index 5de07dfa..4296eca3 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -12602,7 +12602,7 @@ LLAMA_API int32_t llama_chat_apply_template(
         // load template from model
         std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
         std::string template_key = "tokenizer.chat_template";
-        int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), curr_tmpl.size());
+        int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
         if (res < 0) {
             // worst case: there is no information about template, we will use chatml by default
             curr_tmpl = "<|im_start|>"; // see llama_chat_apply_template_internal
-- 
cgit v1.2.3