summaryrefslogtreecommitdiff
path: root/convert-hf-to-gguf-update.py
diff options
context:
space:
mode:
authorJoan Fontanals <joan.fontanals.martinez@jina.ai>2024-05-13 10:35:14 +0200
committerGitHub <noreply@github.com>2024-05-13 11:35:14 +0300
commit9aa672490c848e45eaa704a554e0f1f6df995fc8 (patch)
tree674957b27a03ce726c364471c4382d0398c1d58c /convert-hf-to-gguf-update.py
parentb1f8af1886e8187db6bb2a9b87cfc1c0f175f629 (diff)
llama : rename jina tokenizers to v2 (#7249)
* refactor: rename jina tokenizers to v2 * refactor: keep refactoring non-breaking
Diffstat (limited to 'convert-hf-to-gguf-update.py')
-rwxr-xr-xconvert-hf-to-gguf-update.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py
index cd2674a0..14aa0c45 100755
--- a/convert-hf-to-gguf-update.py
+++ b/convert-hf-to-gguf-update.py
@@ -74,9 +74,9 @@ models = [
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
{"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
{"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
- {"name": "jina-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
- {"name": "jina-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
- {"name": "jina-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
+ {"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
+ {"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
+ {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
]
# make directory "models/tokenizers" if it doesn't exist