llama : support Llama 3 HF conversion (#6745)

* Support Llama 3 conversion The tokenizer is BPE. * style * Accept suggestion Co-authored-by: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> * llama : add llama_token_is_eog() ggml-ci * llama : auto-detect more EOT tokens when missing in KV data * convert : replacing EOS token is a hack * llama : fix codegemma EOT token + add TODOs * llama : fix model type string for 8B model --------- Co-authored-by: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: Pedro Cuenca <pedro@huggingface.co> 2024-04-21 13:50:41 +0200
committer: GitHub <noreply@github.com> 2024-04-21 14:50:41 +0300
commit: b97bc3966e852adb626c90be64fd48282800f504 (patch)
tree: 178656d15821205889fa03ec603c7327facbb265 /examples/main/main.cpp
parent: b8109bc0139f15a5b321909f47510b89dca47ffc (diff)
1 files changed, 4 insertions, 4 deletions
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 249fc2bb..1180734b 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -795,8 +795,8 @@ int main(int argc, char ** argv) {
                 }
             }
 
-            // deal with end of text token in interactive mode
-            if (llama_sampling_last(ctx_sampling) == llama_token_eos(model)) {
+            // deal with end of generation tokens in interactive mode
+            if (llama_token_is_eog(model, llama_sampling_last(ctx_sampling))) {
                 LOG("found EOS token\n");
 
                 if (params.interactive) {
@@ -920,8 +920,8 @@ int main(int argc, char ** argv) {
             }
         }
 
-        // end of text token
-        if (!embd.empty() && embd.back() == llama_token_eos(model) && !(params.instruct || params.interactive || params.chatml)) {
+        // end of generation
+        if (!embd.empty() && llama_token_is_eog(model, embd.back()) && !(params.instruct || params.interactive || params.chatml)) {
             LOG_TEE(" [end of text]\n");
             break;
         }
author	Pedro Cuenca <pedro@huggingface.co>	2024-04-21 13:50:41 +0200
committer	GitHub <noreply@github.com>	2024-04-21 14:50:41 +0300
commit	b97bc3966e852adb626c90be64fd48282800f504 (patch)
tree	178656d15821205889fa03ec603c7327facbb265 /examples/main/main.cpp
parent	b8109bc0139f15a5b321909f47510b89dca47ffc (diff)