From 889bdd76866ea31a7625ec2dcea63ff469f3e981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?DAN=E2=84=A2?= Date: Sun, 5 May 2024 01:19:30 -0400 Subject: command-r : add BPE pre-tokenization (#7063) * Add BPE pre-tokenization for Command-R/R+. * Bump transformers convert requirement. * command-r : add individual digits regex --------- Co-authored-by: Georgi Gerganov --- tests/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'tests') diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cad703fc..208f0cf7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -83,6 +83,7 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-bert-bge ARGS ${CMAKE llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-2.gguf) llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) +llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-command-r.gguf) # build test-tokenizer-1-bpe target once and add many tests add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp) -- cgit v1.2.3