From b97bc3966e852adb626c90be64fd48282800f504 Mon Sep 17 00:00:00 2001
From: Pedro Cuenca <pedro@huggingface.co>
Date: Sun, 21 Apr 2024 13:50:41 +0200
Subject: llama : support Llama 3 HF conversion (#6745)

* Support Llama 3 conversion

The tokenizer is BPE.

* style

* Accept suggestion

Co-authored-by: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>

* llama : add llama_token_is_eog()

ggml-ci

* llama : auto-detect more EOT tokens when missing in KV data

* convert : replacing EOS token is a hack

* llama : fix codegemma EOT token + add TODOs

* llama : fix model type string for 8B model

---------

Co-authored-by: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 examples/server/server.cpp | 2 +-
 examples/server/utils.hpp  | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'examples/server')

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 634e653a..25bc2963 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1201,7 +1201,7 @@ struct server_context {
             });
         }
 
-        if (result.tok == llama_token_eos(model)) {
+        if (llama_token_is_eog(model, result.tok)) {
             slot.stopped_eos    = true;
             slot.has_next_token = false;
 
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index a8d43ac6..1a221250 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -381,10 +381,6 @@ static json oaicompat_completion_params_parse(
     } else {
         llama_params["stop"] = json_value(body, "stop", json::array());
     }
-    // Some chat templates don't use EOS token to stop generation
-    // We must add their end sequences to list of stop words
-    llama_params["stop"].push_back("<|im_end|>"); // chatml
-    llama_params["stop"].push_back("<end_of_turn>"); // gemma
 
     // Handle "response_format" field
     if (body.contains("response_format")) {
-- 
cgit v1.2.3