summaryrefslogtreecommitdiff
path: root/convert-hf-to-gguf.py
diff options
context:
space:
mode:
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-xconvert-hf-to-gguf.py95
1 files changed, 50 insertions, 45 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 3557a825..51724c0d 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -242,7 +242,7 @@ class Model:
tokens: list[bytearray] = []
toktypes: list[int] = []
- from transformers import AutoTokenizer # type: ignore[attr-defined]
+ from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(dir_model)
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
assert max(tokenizer.vocab.values()) < vocab_size
@@ -856,7 +856,7 @@ class StableLMModel(Model):
hparams = self.hparams
block_count = hparams["num_hidden_layers"]
- self.gguf_writer.add_name(dir_model.name)
+ self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
self.gguf_writer.add_block_count(block_count)
@@ -902,7 +902,7 @@ class QwenModel(Model):
tokens: list[bytearray] = []
toktypes: list[int] = []
- from transformers import AutoTokenizer # type: ignore[attr-defined]
+ from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
vocab_size = hparams["vocab_size"]
assert max(tokenizer.get_vocab().values()) < vocab_size
@@ -1185,57 +1185,62 @@ def parse_args() -> argparse.Namespace:
return parser.parse_args()
-args = parse_args()
+def main() -> None:
+ args = parse_args()
-dir_model = args.model
+ dir_model = args.model
-if args.awq_path:
- sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
- from awq.apply_awq import add_scale_weights
- tmp_model_path = args.model / "weighted_model"
- dir_model = tmp_model_path
- if tmp_model_path.is_dir():
- print(f"{tmp_model_path} exists as a weighted model.")
+ if args.awq_path:
+ sys.path.insert(1, str(Path(__file__).parent / 'awq-py'))
+ from awq.apply_awq import add_scale_weights
+ tmp_model_path = args.model / "weighted_model"
+ dir_model = tmp_model_path
+ if tmp_model_path.is_dir():
+ print(f"{tmp_model_path} exists as a weighted model.")
+ else:
+ tmp_model_path.mkdir(parents=True, exist_ok=True)
+ print("Saving new weighted model ...")
+ add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
+ print(f"Saved weighted model at {tmp_model_path}.")
+
+ if not dir_model.is_dir():
+ print(f'Error: {args.model} is not a directory', file=sys.stderr)
+ sys.exit(1)
+
+ ftype_map = {
+ "f32": gguf.GGMLQuantizationType.F32,
+ "f16": gguf.GGMLQuantizationType.F16,
+ }
+
+ if args.outfile is not None:
+ fname_out = args.outfile
else:
- tmp_model_path.mkdir(parents=True, exist_ok=True)
- print("Saving new weighted model ...")
- add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path))
- print(f"Saved weighted model at {tmp_model_path}.")
-
-if not dir_model.is_dir():
- print(f'Error: {args.model} is not a directory', file=sys.stderr)
- sys.exit(1)
+ # output in the same directory as the model by default
+ fname_out = dir_model / f'ggml-model-{args.outtype}.gguf'
-ftype_map = {
- "f32": gguf.GGMLQuantizationType.F32,
- "f16": gguf.GGMLQuantizationType.F16,
-}
+ print(f"Loading model: {dir_model.name}")
-if args.outfile is not None:
- fname_out = args.outfile
-else:
- # output in the same directory as the model by default
- fname_out = dir_model / f'ggml-model-{args.outtype}.gguf'
+ hparams = Model.load_hparams(dir_model)
-print(f"Loading model: {dir_model.name}")
+ with torch.inference_mode():
+ model_class = Model.from_model_architecture(hparams["architectures"][0])
+ model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian)
-hparams = Model.load_hparams(dir_model)
+ print("Set model parameters")
+ model_instance.set_gguf_parameters()
-with torch.inference_mode():
- model_class = Model.from_model_architecture(hparams["architectures"][0])
- model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian)
+ print("Set model tokenizer")
+ model_instance.set_vocab()
- print("Set model parameters")
- model_instance.set_gguf_parameters()
+ if args.vocab_only:
+ print(f"Exporting model vocab to '{fname_out}'")
+ model_instance.write_vocab()
+ else:
+ print(f"Exporting model to '{fname_out}'")
+ model_instance.write()
- print("Set model tokenizer")
- model_instance.set_vocab()
+ print(f"Model successfully exported to '{fname_out}'")
- if args.vocab_only:
- print(f"Exporting model vocab to '{fname_out}'")
- model_instance.write_vocab()
- else:
- print(f"Exporting model to '{fname_out}'")
- model_instance.write()
- print(f"Model successfully exported to '{fname_out}'")
+if __name__ == '__main__':
+ main()