summaryrefslogtreecommitdiff
path: root/convert-lora-to-ggml.py
diff options
context:
space:
mode:
authorBrian <mofosyne@gmail.com>2024-05-04 05:36:41 +1000
committerGitHub <noreply@github.com>2024-05-03 22:36:41 +0300
commita2ac89d6efb41b535778bfeaecaae8fe295b6ed3 (patch)
tree584a6f5316a627e64bfbc3aa5e098b911aef285a /convert-lora-to-ggml.py
parent433def286e98751bf17db75dce53847d075c0be5 (diff)
convert.py : add python logging instead of print() (#6511)
* convert.py: add python logging instead of print() * convert.py: verbose flag takes priority over dump flag log suppression * convert.py: named instance logging * convert.py: use explicit logger id string * convert.py: convert extra print() to named logger * convert.py: sys.stderr.write --> logger.error * *.py: Convert all python scripts to use logging module * requirements.txt: remove extra line * flake8: update flake8 ignore and exclude to match ci settings * gh-actions: add flake8-no-print to flake8 lint step * pre-commit: add flake8-no-print to flake8 and also update pre-commit version * convert-hf-to-gguf.py: print() to logger conversion * *.py: logging basiconfig refactor to use conditional expression * *.py: removed commented out logging * fixup! *.py: logging basiconfig refactor to use conditional expression * constant.py: logger.error then exit should be a raise exception instead * *.py: Convert logger error and sys.exit() into a raise exception (for atypical error) * gguf-convert-endian.py: refactor convert_byteorder() to use tqdm progressbar * verify-checksum-model.py: This is the result of the program, it should be printed to stdout. * compare-llama-bench.py: add blank line for readability during missing repo response * reader.py: read_gguf_file() use print() over logging * convert.py: warning goes to stderr and won't hurt the dump output * gguf-dump.py: dump_metadata() should print to stdout * convert-hf-to-gguf.py: print --> logger.debug or ValueError() * verify-checksum-models.py: use print() for printing table * *.py: refactor logging.basicConfig() * gguf-py/gguf/*.py: use __name__ as logger name Since they will be imported and not run directly. * python-lint.yml: use .flake8 file instead * constants.py: logger no longer required * convert-hf-to-gguf.py: add additional logging * convert-hf-to-gguf.py: print() --> logger * *.py: fix flake8 warnings * revert changes to convert-hf-to-gguf.py for get_name() * convert-hf-to-gguf-update.py: use triple quoted f-string instead * *.py: accidentally corrected the wrong line * *.py: add compilade warning suggestions and style fixes
Diffstat (limited to 'convert-lora-to-ggml.py')
-rwxr-xr-xconvert-lora-to-ggml.py31
1 files changed, 16 insertions, 15 deletions
diff --git a/convert-lora-to-ggml.py b/convert-lora-to-ggml.py
index 9a9936de..39536feb 100755
--- a/convert-lora-to-ggml.py
+++ b/convert-lora-to-ggml.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
from __future__ import annotations
+import logging
import json
import os
import struct
@@ -15,6 +16,8 @@ if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
import gguf
+logger = logging.getLogger("lora-to-gguf")
+
NUMPY_TYPE_TO_FTYPE: dict[str, int] = {"float32": 0, "float16": 1}
@@ -48,11 +51,9 @@ def write_tensor_header(fout: BinaryIO, name: str, shape: Sequence[int], data_ty
if __name__ == '__main__':
if len(sys.argv) < 2:
- print(f"Usage: python {sys.argv[0]} <path> [arch]")
- print(
- "Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'"
- )
- print(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
+ logger.info(f"Usage: python {sys.argv[0]} <path> [arch]")
+ logger.info("Path must contain HuggingFace PEFT LoRA files 'adapter_config.json' and 'adapter_model.bin'")
+ logger.info(f"Arch must be one of {list(gguf.MODEL_ARCH_NAMES.values())} (default: llama)")
sys.exit(1)
input_json = os.path.join(sys.argv[1], "adapter_config.json")
@@ -70,7 +71,7 @@ if __name__ == '__main__':
arch_name = sys.argv[2] if len(sys.argv) == 3 else "llama"
if arch_name not in gguf.MODEL_ARCH_NAMES.values():
- print(f"Error: unsupported architecture {arch_name}")
+ logger.error(f"Error: unsupported architecture {arch_name}")
sys.exit(1)
arch = list(gguf.MODEL_ARCH_NAMES.keys())[list(gguf.MODEL_ARCH_NAMES.values()).index(arch_name)]
@@ -80,21 +81,21 @@ if __name__ == '__main__':
params = json.load(f)
if params["peft_type"] != "LORA":
- print(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
+ logger.error(f"Error: unsupported adapter type {params['peft_type']}, expected LORA")
sys.exit(1)
if params["fan_in_fan_out"] is True:
- print("Error: param fan_in_fan_out is not supported")
+ logger.error("Error: param fan_in_fan_out is not supported")
sys.exit(1)
if params["bias"] is not None and params["bias"] != "none":
- print("Error: param bias is not supported")
+ logger.error("Error: param bias is not supported")
sys.exit(1)
# TODO: these seem to be layers that have been trained but without lora.
# doesn't seem widely used but eventually should be supported
if params["modules_to_save"] is not None and len(params["modules_to_save"]) > 0:
- print("Error: param modules_to_save is not supported")
+ logger.error("Error: param modules_to_save is not supported")
sys.exit(1)
with open(output_path, "wb") as fout:
@@ -125,13 +126,13 @@ if __name__ == '__main__':
suffix = k[-len(lora_suffixes[0]):]
k = k[: -len(lora_suffixes[0])]
else:
- print(f"Error: unrecognized tensor name {orig_k}")
+ logger.error(f"Error: unrecognized tensor name {orig_k}")
sys.exit(1)
tname = name_map.get_name(k)
if tname is None:
- print(f"Error: could not map tensor name {orig_k}")
- print(" Note: the arch parameter must be specified if the model is not llama")
+ logger.error(f"Error: could not map tensor name {orig_k}")
+ logger.error(" Note: the arch parameter must be specified if the model is not llama")
sys.exit(1)
if suffix == ".lora_A.weight":
@@ -141,8 +142,8 @@ if __name__ == '__main__':
else:
assert False
- print(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
+ logger.info(f"{k} => {tname} {t.shape} {t.dtype} {t.nbytes/1024/1024:.2f}MB")
write_tensor_header(fout, tname, t.shape, t.dtype)
t.tofile(fout)
- print(f"Converted {input_json} and {input_model} to {output_path}")
+ logger.info(f"Converted {input_json} and {input_model} to {output_path}")