summaryrefslogtreecommitdiff
path: root/src/llama-vocab.h
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-08-12 15:14:32 +0200
committerGitHub <noreply@github.com>2024-08-12 15:14:32 +0200
commit8f43e551038af2547b5c01d0e9edd641c0e4bd29 (patch)
tree07a4373620a9381d0b5c7189a475990a6feb48a5 /src/llama-vocab.h
parentf5d1af61d79fb53ccfbac2e665e43208c07b083d (diff)
Merge mainline - Aug 12 2024 (#17)
* Merge mainline * Fix after merge * Remove CI check --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'src/llama-vocab.h')
-rw-r--r--src/llama-vocab.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 30b565d5..7adfc16d 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -45,6 +45,7 @@ struct llama_vocab {
id special_suffix_id = -1;
id special_middle_id = -1;
id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
+ id special_eom_id = -1;
// tokenizer flags
bool tokenizer_add_space_prefix = false;
@@ -101,6 +102,7 @@ llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
llama_token llama_token_suffix_impl(const struct llama_vocab & vocab);
llama_token llama_token_eot_impl (const struct llama_vocab & vocab);
+llama_token llama_token_eom_impl (const struct llama_vocab & vocab);
int32_t llama_tokenize_impl(
const struct llama_vocab & vocab,