From 9c405c9f9a7cfd23511fd6b2de05dc72481119b4 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <thichthat@gmail.com>
Date: Tue, 20 Feb 2024 15:58:27 +0100
Subject: Server: use llama_chat_apply_template (#5593)

* server: use llama_chat_apply_template

* server: remove trailing space

* server: fix format_chat

* server: fix help message

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* server: fix formatted_chat

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 examples/server/oai.hpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'examples/server/oai.hpp')

diff --git a/examples/server/oai.hpp b/examples/server/oai.hpp
index 2eca8a9f..ff4ad699 100644
--- a/examples/server/oai.hpp
+++ b/examples/server/oai.hpp
@@ -15,13 +15,11 @@
 using json = nlohmann::json;
 
 inline static json oaicompat_completion_params_parse(
+    const struct llama_model * model,
     const json &body, /* openai api json semantics */
     const std::string &chat_template)
 {
     json llama_params;
-    std::string formatted_prompt = chat_template == "chatml"
-        ? format_chatml(body["messages"])  // OpenAI 'messages' to chatml (with <|im_start|>,...)
-        : format_llama2(body["messages"]); // OpenAI 'messages' to llama2 (with [INST],...)
 
     llama_params["__oaicompat"] = true;
 
@@ -34,7 +32,7 @@ inline static json oaicompat_completion_params_parse(
     // https://platform.openai.com/docs/api-reference/chat/create
     llama_sampling_params default_sparams;
     llama_params["model"]             = json_value(body, "model", std::string("unknown"));
-    llama_params["prompt"]            = formatted_prompt;
+    llama_params["prompt"]            = format_chat(model, chat_template, body["messages"]);
     llama_params["cache_prompt"]      = json_value(body, "cache_prompt", false);
     llama_params["temperature"]       = json_value(body, "temperature", 0.0);
     llama_params["top_k"]             = json_value(body, "top_k", default_sparams.top_k);
-- 
cgit v1.2.3