summaryrefslogtreecommitdiff
path: root/convert-hf-to-gguf-update.py
diff options
context:
space:
mode:
authorCrispStrobe <154636388+CrispStrobe@users.noreply.github.com>2024-05-11 10:18:35 +0200
committerGitHub <noreply@github.com>2024-05-11 11:18:35 +0300
commit3292733f95d4632a956890a438af5192e7031c12 (patch)
treee77246f1d4e513b82f97d9e2ea0cf060307d2705 /convert-hf-to-gguf-update.py
parent988631335a20d06497f58be0b8ba13adb4323a22 (diff)
convert : skip unaccessible HF repos (#7210)
Diffstat (limited to 'convert-hf-to-gguf-update.py')
-rwxr-xr-xconvert-hf-to-gguf-update.py22
1 files changed, 20 insertions, 2 deletions
diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py
index e757d5cc..cd2674a0 100755
--- a/convert-hf-to-gguf-update.py
+++ b/convert-hf-to-gguf-update.py
@@ -145,8 +145,17 @@ for model in models:
if tokt == TOKENIZER_TYPE.SPM:
continue
+ # Skip if the tokenizer folder does not exist or there are other download issues previously
+ if not os.path.exists(f"models/tokenizers/{name}"):
+ logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
+ continue
+
# create the tokenizer
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
+ try:
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
+ except OSError as e:
+ logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
+ continue # Skip to the next model if the tokenizer can't be loaded
chktok = tokenizer.encode(chktxt)
chkhsh = sha256(str(chktok).encode()).hexdigest()
@@ -287,8 +296,17 @@ for model in models:
name = model["name"]
tokt = model["tokt"]
+ # Skip if the tokenizer folder does not exist or there are other download issues previously
+ if not os.path.exists(f"models/tokenizers/{name}"):
+ logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
+ continue
+
# create the tokenizer
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
+ try:
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
+ except OSError as e:
+ logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
+ continue # Skip this model and continue with the next one in the loop
with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
for text in tests: