diff options
author | Nam D. Tran <42194884+namtranase@users.noreply.github.com> | 2023-12-27 22:39:45 +0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-27 17:39:45 +0200 |
commit | f6793491b5af6da75edad34d6f503ef86d31b09f (patch) | |
tree | ba50b7ae1aba91cb465a06970a11137baab7afcf /convert-hf-to-gguf.py | |
parent | 879b690a9e1eb1ab0a29b58236fc76978fb4d902 (diff) |
llama : add AWQ for llama, llama2, mpt, and mistral models (#4593)
* update: awq support llama-7b model
* update: change order
* update: benchmark results for llama2-7b
* update: mistral 7b v1 benchmark
* update: support 4 models
* fix: Readme
* update: ready for PR
* update: readme
* fix: readme
* update: change order import
* black
* format code
* update: work for bot mpt and awqmpt
* update: readme
* Rename to llm_build_ffn_mpt_awq
* Formatted other files
* Fixed params count
* fix: remove code
* update: more detail for mpt
* fix: readme
* fix: readme
* update: change folder architecture
* fix: common.cpp
* fix: readme
* fix: remove ggml_repeat
* update: cicd
* update: cicd
* uppdate: remove use_awq arg
* update: readme
* llama : adapt plamo to new ffn
ggml-ci
---------
Co-authored-by: Trần Đức Nam <v.namtd12@vinai.io>
Co-authored-by: Le Hoang Anh <v.anhlh33@vinai.io>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-x | convert-hf-to-gguf.py | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 303d0817..7dbc2814 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -46,7 +46,7 @@ class Model: self.part_names = self._get_part_names() self.hparams = Model.load_hparams(self.dir_model) self.model_arch = self._get_model_architecture() - self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess) + self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=False) def set_vocab(self): self._set_vocab_gpt2() @@ -59,7 +59,7 @@ class Model: from safetensors import safe_open ctx = cast(ContextManager[Any], safe_open(self.dir_model / part_name, framework="pt", device="cpu")) else: - ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True)) + ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", weights_only=True)) with ctx as model_part: for name in model_part.keys(): @@ -464,7 +464,11 @@ class MPTModel(Model): data = data_torch.squeeze().numpy() # map tensor names - new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) + if "scales" in name: + new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias", ".scales")) + new_name = new_name.replace("scales", "act.scales") + else: + new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias")) if new_name is None: print(f"Can not map tensor {name!r}") sys.exit() @@ -1096,6 +1100,9 @@ def parse_args() -> argparse.Namespace: help="extract only the vocab", ) parser.add_argument( + "--awq-path", type=Path, default=None, + help="Path to scale awq cache file") + parser.add_argument( "--outfile", type=Path, help="path to write to; default: based on input", ) @@ -1115,6 +1122,20 @@ def parse_args() -> argparse.Namespace: args = parse_args() dir_model = args.model + +if args.awq_path: + sys.path.insert(1, str(Path(__file__).parent / 'awq-py')) + from awq.apply_awq import add_scale_weights + tmp_model_path = args.model / "weighted_model" + dir_model = tmp_model_path + if tmp_model_path.is_dir(): + print(f"{tmp_model_path} exists as a weighted model.") + else: + tmp_model_path.mkdir(parents=True, exist_ok=True) + print("Saving new weighted model ...") + add_scale_weights(str(args.model), str(args.awq_path), str(tmp_model_path)) + print(f"Saved weighted model at {tmp_model_path}.") + if not dir_model.is_dir(): print(f'Error: {args.model} is not a directory', file=sys.stderr) sys.exit(1) |