server : fallback to chatml, add AlphaMonarch chat template (#5628)

* server: fallback to chatml * add new chat template * server: add AlphaMonarch to test chat template * server: only check model template if there is no custom tmpl * remove TODO
author: Xuan Son Nguyen <thichthat@gmail.com> 2024-02-22 09:33:24 +0100
committer: GitHub <noreply@github.com> 2024-02-22 10:33:24 +0200
commit: a46f50747b2028f7f9c9883b26bfba12bf92556e (patch)
tree: 48cdc5ae91b49c9982122e710fffb1ad210f7dbe /llama.cpp
parent: c5688c6250430d2b8e0259efcf26c16dfa4c1f46 (diff)
1 files changed, 9 insertions, 0 deletions
diff --git a/llama.cpp b/llama.cpp
index 9cae8c76..055b57e3 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -12773,6 +12773,15 @@ static int32_t llama_chat_apply_template_internal(
         if (add_ass) {
             ss << "<|assistant|>\n";
         }
+    } else if (tmpl.find("bos_token + message['role']") != std::string::npos) {
+        // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
+        for (auto message : chat) {
+            std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
+            ss << bos << message->role << "\n" << message->content << "</s>\n";
+        }
+        if (add_ass) {
+            ss << "<s>assistant\n";
+        }
     } else {
         // template not supported
         return -1;
author	Xuan Son Nguyen <thichthat@gmail.com>	2024-02-22 09:33:24 +0100
committer	GitHub <noreply@github.com>	2024-02-22 10:33:24 +0200
commit	a46f50747b2028f7f9c9883b26bfba12bf92556e (patch)
tree	48cdc5ae91b49c9982122e710fffb1ad210f7dbe /llama.cpp
parent	c5688c6250430d2b8e0259efcf26c16dfa4c1f46 (diff)