From b97bc3966e852adb626c90be64fd48282800f504 Mon Sep 17 00:00:00 2001
From: Pedro Cuenca <pedro@huggingface.co>
Date: Sun, 21 Apr 2024 13:50:41 +0200
Subject: llama : support Llama 3 HF conversion (#6745)

* Support Llama 3 conversion

The tokenizer is BPE.

* style

* Accept suggestion

Co-authored-by: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>

* llama : add llama_token_is_eog()

ggml-ci

* llama : auto-detect more EOT tokens when missing in KV data

* convert : replacing EOS token is a hack

* llama : fix codegemma EOT token + add TODOs

* llama : fix model type string for 8B model

---------

Co-authored-by: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 examples/infill/infill.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'examples/infill/infill.cpp')

diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp
index c69dcd06..afac145f 100644
--- a/examples/infill/infill.cpp
+++ b/examples/infill/infill.cpp
@@ -586,7 +586,7 @@ int main(int argc, char ** argv) {
 
             // deal with eot token in infill mode
             if ((llama_sampling_last(ctx_sampling) == llama_token_eot(model) || is_interacting) && params.interactive){
-                if(is_interacting && !params.interactive_first) {
+                if (is_interacting && !params.interactive_first) {
                     // print an eot token
                     printf("%s", llama_token_to_piece(ctx, llama_token_eot(model)).c_str());
                 }
@@ -651,8 +651,8 @@ int main(int argc, char ** argv) {
                 // LOG_TEE("took new input\n");
                 is_interacting = false;
             }
-            // deal with end of text token in interactive mode
-            else if (llama_sampling_last(ctx_sampling) == llama_token_eos(model)) {
+            // deal with end of generation tokens in interactive mode
+            else if (llama_token_is_eog(model, llama_sampling_last(ctx_sampling))) {
                 LOG("found EOS token\n");
 
                 if (params.interactive) {
@@ -731,8 +731,8 @@ int main(int argc, char ** argv) {
             }
         }
 
-        // end of text token
-        if (!embd.empty() && embd.back() == llama_token_eos(model) && !params.interactive) {
+        // end of generation
+        if (!embd.empty() && llama_token_is_eog(model, embd.back()) && !params.interactive) {
             break;
         }
 
-- 
cgit v1.2.3