From b08e75baea294e366628b898e85c0bd359b58115 Mon Sep 17 00:00:00 2001 From: goerch Date: Sat, 16 Sep 2023 13:41:33 +0200 Subject: Fixing the last deviations from sentencepiece indicated by test-tokenizer-1 (#3170) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix für #2721 * Reenable tokenizer test for LLaMa * Add `console.cpp` dependency * Fix dependency to `common` * Fixing wrong fix. * Make console usage platform specific Work on compiler warnings. * Adapting makefile * Remove trailing whitespace * Adapting the other parts of the makefile * Fix typo. * Fixing the last deviations from sentencepiece indicated by test-tokenizer-1 * Simplify logic * Add missing change... * Fix ugly compiler warning * llama_tokenize should accept strings containing NUL now * Adding huichen's test case --- tests/test-tokenizer-0-llama.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'tests/test-tokenizer-0-llama.cpp') diff --git a/tests/test-tokenizer-0-llama.cpp b/tests/test-tokenizer-0-llama.cpp index edbd86f8..dfb2e81a 100644 --- a/tests/test-tokenizer-0-llama.cpp +++ b/tests/test-tokenizer-0-llama.cpp @@ -36,6 +36,7 @@ static const std::map> & k_tests() { { " Hello" , { 1678, 15043, }, }, { " Hello" , { 268, 15043, }, }, { " Hello\n Hello" , { 268, 15043, 13, 1678, 15043, }, }, + { " (" , { 29871, 313, }, }, }; return _k_tests; -- cgit v1.2.3