summaryrefslogtreecommitdiff
path: root/tests/test-tokenizer-0.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test-tokenizer-0.cpp')
-rw-r--r--tests/test-tokenizer-0.cpp3
1 files changed, 2 insertions, 1 deletions
diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp
index f3ee851a..7e9ac918 100644
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@@ -100,7 +100,8 @@ int main(int argc, char **argv) {
bool success = true;
for (const auto & test_kv : k_tests()) {
- std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, true);
+ // Add a space in front of the first character to match OG llama tokenizer behavior
+ std::vector<llama_token> res = llama_tokenize(ctx, " " + test_kv.first, true);
fprintf(stderr, "%s : '%s' tokenized to '%s'\n",
__func__, test_kv.first.c_str(), unescape_whitespace(ctx, res).c_str());