summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-04-22 15:41:11 +0300
committerGeorgi Gerganov <ggerganov@gmail.com>2024-04-22 15:41:11 +0300
commit8960fe86ae075c846c5df8848230d1904ba8877f (patch)
tree9ab4c48f1bdefd57eca0f73a71c9257d45c525c8
parentc0956b09ba845a7cd787d5580d7c8b96e80f40f5 (diff)
llama : fix typo in <|im_end|> token text (#6745)
-rw-r--r--llama.cpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/llama.cpp b/llama.cpp
index 7440c740..a25d115c 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4340,7 +4340,7 @@ static void llm_load_vocab(
}
}
- // find EOT token: "<|eot_id|>", "<|im_emd|>", "<end_of_turn>", etc.
+ // find EOT token: "<|eot_id|>", "<|im_end|>", "<end_of_turn>", etc.
//
// TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
// for now, we apply this workaround to find the EOT token based on its text
@@ -4351,7 +4351,7 @@ static void llm_load_vocab(
// need to fix convert script
//vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
(t.first == "<|eot_id|>" ||
- t.first == "<|im_emd|>" ||
+ t.first == "<|im_end|>" ||
t.first == "<end_of_turn>"
)
) {