summaryrefslogtreecommitdiff
path: root/tests/test-tokenizer-0-llama.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test-tokenizer-0-llama.py')
-rw-r--r--tests/test-tokenizer-0-llama.py52
1 files changed, 26 insertions, 26 deletions
diff --git a/tests/test-tokenizer-0-llama.py b/tests/test-tokenizer-0-llama.py
index 21df8e6e..f3d4d7e3 100644
--- a/tests/test-tokenizer-0-llama.py
+++ b/tests/test-tokenizer-0-llama.py
@@ -14,32 +14,32 @@ dir_tokenizer = args.dir_tokenizer
tokenizer = SentencePieceProcessor(dir_tokenizer + '/tokenizer.model')
tests = [
- "",
- " ",
- " ",
- " ",
- "\t",
- "\n",
- "\t\n",
- "Hello world",
- " Hello world",
- "Hello World",
- " Hello World",
- " Hello World!",
- "Hello, world!",
- " Hello, world!",
- " this is πŸ¦™.cpp",
- "w048 7tuijk dsdfhu",
- "Π½Π΅Ρ‰ΠΎ Π½Π° Π‘ΡŠΠ»Π³Π°Ρ€ΡΠΊΠΈ",
- "αž€αžΆαž“αŸ‹αžαŸ‚αž–αž·αžŸαŸαžŸαž’αžΆαž…αžαž›αž…αŸαž‰",
- "πŸš€ (normal) πŸ˜Άβ€πŸŒ«οΈ (multiple emojis concatenated) βœ… (only emoji that has its own token)",
- "Hello",
- " Hello",
- " Hello",
- " Hello",
- " Hello",
- " Hello\n Hello",
- ]
+ "",
+ " ",
+ " ",
+ " ",
+ "\t",
+ "\n",
+ "\t\n",
+ "Hello world",
+ " Hello world",
+ "Hello World",
+ " Hello World",
+ " Hello World!",
+ "Hello, world!",
+ " Hello, world!",
+ " this is πŸ¦™.cpp",
+ "w048 7tuijk dsdfhu",
+ "Π½Π΅Ρ‰ΠΎ Π½Π° Π‘ΡŠΠ»Π³Π°Ρ€ΡΠΊΠΈ",
+ "αž€αžΆαž“αŸ‹αžαŸ‚αž–αž·αžŸαŸαžŸαž’αžΆαž…αžαž›αž…αŸαž‰",
+ "πŸš€ (normal) πŸ˜Άβ€πŸŒ«οΈ (multiple emojis concatenated) βœ… (only emoji that has its own token)",
+ "Hello",
+ " Hello",
+ " Hello",
+ " Hello",
+ " Hello",
+ " Hello\n Hello",
+]
for text in tests: