diff options
author | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-07-27 07:55:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-27 07:55:01 +0200 |
commit | 154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch) | |
tree | 81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /tests/test-tokenizer-0.cpp | |
parent | 0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff) |
Merge mainline llama.cpp (#3)
* Merging mainline - WIP
* Merging mainline - WIP
AVX2 and CUDA appear to work.
CUDA performance seems slightly (~1-2%) lower as it is so often
the case with llama.cpp/ggml after some "improvements" have been made.
* Merging mainline - fix Metal
* Remove check
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'tests/test-tokenizer-0.cpp')
-rw-r--r-- | tests/test-tokenizer-0.cpp | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp index d478f104..d3d21331 100644 --- a/tests/test-tokenizer-0.cpp +++ b/tests/test-tokenizer-0.cpp @@ -195,11 +195,11 @@ int main(int argc, char **argv) { const bool add_special = false; for (const auto & test_kv : k_tests) { - const std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, add_special); + const std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, add_special, false); printf("\n"); printf("src: '%s'\n", test_kv.first.c_str()); - printf("res: '%s'\n", llama_detokenize_bpe(ctx, res).c_str()); + printf("res: '%s'\n", llama_detokenize(ctx, res).c_str()); printf("tok: "); for (const auto & tok : res) { printf("%d ", tok); @@ -216,8 +216,8 @@ int main(int argc, char **argv) { if (!correct) { fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str()); fprintf(stderr, "%s : detokenized to: '%s' instead of '%s'\n", __func__, - llama_detokenize_bpe(ctx, res).c_str(), - llama_detokenize_bpe(ctx, test_kv.second).c_str()); + llama_detokenize(ctx, res).c_str(), + llama_detokenize(ctx, test_kv.second).c_str()); fprintf(stderr, "%s : expected tokens: ", __func__); for (const auto & t : test_kv.second) { fprintf(stderr, "%6d '%s', ", t, llama_token_to_piece(ctx, t).c_str()); @@ -253,7 +253,7 @@ int main(int argc, char **argv) { { const auto t_start = ggml_time_us(); - res = llama_tokenize(ctx, text, add_special); + res = llama_tokenize(ctx, text, add_special, false); const auto t_end = ggml_time_us(); @@ -272,7 +272,7 @@ int main(int argc, char **argv) { } for (const auto & tok : res) { - //ofs << tok << " '" << string_strip(llama_detokenize_bpe(ctx, std::vector<int>{tok})) << "'" << std::endl; + //ofs << tok << " '" << string_strip(llama_detokenize(ctx, std::vector<int>{tok})) << "'" << std::endl; ofs << tok << "\n"; } } |