summaryrefslogtreecommitdiff
path: root/tests/test-tokenizer-0-falcon.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test-tokenizer-0-falcon.cpp')
-rw-r--r--tests/test-tokenizer-0-falcon.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/tests/test-tokenizer-0-falcon.cpp b/tests/test-tokenizer-0-falcon.cpp
index 0f3c50bc..a4e9d2b9 100644
--- a/tests/test-tokenizer-0-falcon.cpp
+++ b/tests/test-tokenizer-0-falcon.cpp
@@ -36,6 +36,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
{ " Hello" , { 258, 23090, }, },
{ " Hello" , { 466, 23090, }, },
{ " Hello\n Hello" , { 466, 23090, 742, 23090, }, },
+ { "\n =" , { 1212, 40, }, },
+ { "' era" , { 18, 4932, }, },
};
return _k_tests;
@@ -155,7 +157,7 @@ int main(int argc, char **argv) {
fprintf(stderr, "%s : text size: %zu\n", __func__, text.size());
- const std::vector<llama_token> res = llama_tokenize(ctx, text, true);
+ const std::vector<llama_token> res = llama_tokenize(ctx, text, false);
fprintf(stderr, "%s : tokens: %zu\n", __func__, res.size());
@@ -169,10 +171,8 @@ int main(int argc, char **argv) {
}
for (const auto & tok : res) {
- ofs << tok << " ";
+ ofs << tok << " '" << llama_detokenize_bpe(ctx, std::vector<int>{tok}) << "'" << std::endl;
}
-
- ofs << "\n";
}
fprintf(stderr, "%s : tokens written to '%s'\n", __func__, (fname_text + ".tokcpp").c_str());