diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-05-21 19:53:48 +0300 |
---|---|---|
committer | Georgi Gerganov <ggerganov@gmail.com> | 2024-05-21 19:53:48 +0300 |
commit | c3f8d583560b4f261fa21c976793e538c60cd66c (patch) | |
tree | 32be46859a6703b3a91f7c1318225ec032d89649 /tests/test-tokenizer-0.sh | |
parent | 11474e756de3f56b760986e73086d40e787e52f8 (diff) |
tests : test-tokenizer-0.sh print more info (#7402)
Diffstat (limited to 'tests/test-tokenizer-0.sh')
-rwxr-xr-x | tests/test-tokenizer-0.sh | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/tests/test-tokenizer-0.sh b/tests/test-tokenizer-0.sh index 2fb8632d..1fec8bbf 100755 --- a/tests/test-tokenizer-0.sh +++ b/tests/test-tokenizer-0.sh @@ -17,10 +17,15 @@ make -j tests/test-tokenizer-0 printf "Testing %s on %s ...\n" $name $input +set -e + +printf "Tokenizing using (py) Python AutoTokenizer ...\n" python3 ./tests/test-tokenizer-0.py ./models/tokenizers/$name --fname-tok $input > /tmp/test-tokenizer-0-$name-py.log 2>&1 -cat /tmp/test-tokenizer-0-$name-py.log | grep "tokenized in" +printf "Tokenizing using (cpp) llama.cpp ...\n" ./tests/test-tokenizer-0 ./models/ggml-vocab-$name.gguf $input > /tmp/test-tokenizer-0-$name-cpp.log 2>&1 + +cat /tmp/test-tokenizer-0-$name-py.log | grep "tokenized in" cat /tmp/test-tokenizer-0-$name-cpp.log | grep "tokenized in" diff $input.tok $input.tokcpp > /dev/null 2>&1 |