summaryrefslogtreecommitdiff
path: root/examples/server
diff options
context:
space:
mode:
authorPedro Cuenca <pedro@huggingface.co>2024-04-21 13:50:41 +0200
committerGitHub <noreply@github.com>2024-04-21 14:50:41 +0300
commitb97bc3966e852adb626c90be64fd48282800f504 (patch)
tree178656d15821205889fa03ec603c7327facbb265 /examples/server
parentb8109bc0139f15a5b321909f47510b89dca47ffc (diff)
llama : support Llama 3 HF conversion (#6745)
* Support Llama 3 conversion The tokenizer is BPE. * style * Accept suggestion Co-authored-by: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> * llama : add llama_token_is_eog() ggml-ci * llama : auto-detect more EOT tokens when missing in KV data * convert : replacing EOS token is a hack * llama : fix codegemma EOT token + add TODOs * llama : fix model type string for 8B model --------- Co-authored-by: Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/server')
-rw-r--r--examples/server/server.cpp2
-rw-r--r--examples/server/utils.hpp4
2 files changed, 1 insertions, 5 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 634e653a..25bc2963 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1201,7 +1201,7 @@ struct server_context {
});
}
- if (result.tok == llama_token_eos(model)) {
+ if (llama_token_is_eog(model, result.tok)) {
slot.stopped_eos = true;
slot.has_next_token = false;
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index a8d43ac6..1a221250 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -381,10 +381,6 @@ static json oaicompat_completion_params_parse(
} else {
llama_params["stop"] = json_value(body, "stop", json::array());
}
- // Some chat templates don't use EOS token to stop generation
- // We must add their end sequences to list of stop words
- llama_params["stop"].push_back("<|im_end|>"); // chatml
- llama_params["stop"].push_back("<end_of_turn>"); // gemma
// Handle "response_format" field
if (body.contains("response_format")) {