summaryrefslogtreecommitdiff
path: root/convert-hf-to-gguf.py
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-05-04 08:32:32 +0300
committerGitHub <noreply@github.com>2024-05-04 08:32:32 +0300
commit92139b90af4841d7fd060b526bdd443b621770ff (patch)
tree9679c3de1b39970ca73b5bd988c63ddac0359ca6 /convert-hf-to-gguf.py
parenta2ac89d6efb41b535778bfeaecaae8fe295b6ed3 (diff)
tests : add test-tokenizer-0.sh + fix some tokenizers (#7036)
* tests : add test-tokenizer-0.sh * unicode : add all unicode number ranges * starcoder : fix pre-tokenizer * tests : add test that fails with DeepSeek tokenizers * falcon : fix regex * unicode : regenerate unicode tables * refact : add tokenizer model * lint : fix * tests : disable failing tests ggml-ci * refact : add tests files ggml-ci * convert : print -> logging ggml-ci * lint : fix * unicode : digit -> number * phi-3 : update
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-xconvert-hf-to-gguf.py5
1 files changed, 4 insertions, 1 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 52932628..88c16676 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -308,6 +308,9 @@ class Model(ABC):
if chkhsh == "3ce83efda5659b07b1ad37ca97ca5797ea4285d9b9ab0dc679e4a720c9da7454":
# ref: https://huggingface.co/openai-community/gpt2
res = "gpt-2"
+ if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff":
+ # ref: https://huggingface.co/smallcloudai/Refact-1_6-base
+ res = "refact"
if res is None:
logger.warning("\n")
@@ -324,7 +327,7 @@ class Model(ABC):
logger.warning("\n")
raise NotImplementedError("BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
- logger.debug(f"tokenizer.ggml.pre: {res}")
+ logger.debug(f"tokenizer.ggml.pre: {repr(res)}")
logger.debug(f"chkhsh: {chkhsh}")
return res