From 4cd621c26de2095cd7c4464bdec5fe2e696ef3f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?DAN=E2=84=A2?= Date: Wed, 8 May 2024 06:43:23 -0400 Subject: convert : add BPE pre-tokenization for DBRX (#7132) * Add BPE pre-tokenization for DBRX. * Add vocab GGUFs. * Remove test. * Remove GGUFs. --- convert-hf-to-gguf-update.py | 1 + 1 file changed, 1 insertion(+) (limited to 'convert-hf-to-gguf-update.py') diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py index a3fe67ee..b5101098 100755 --- a/convert-hf-to-gguf-update.py +++ b/convert-hf-to-gguf-update.py @@ -68,6 +68,7 @@ models = [ {"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", }, {"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", }, {"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", }, + {"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", }, ] # make directory "models/tokenizers" if it doesn't exist -- cgit v1.2.3