summaryrefslogtreecommitdiff
path: root/gguf-py/gguf/utility.py
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-07-27 07:55:01 +0200
committerGitHub <noreply@github.com>2024-07-27 07:55:01 +0200
commit154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch)
tree81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /gguf-py/gguf/utility.py
parent0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff)
Merge mainline llama.cpp (#3)
* Merging mainline - WIP * Merging mainline - WIP AVX2 and CUDA appear to work. CUDA performance seems slightly (~1-2%) lower as it is so often the case with llama.cpp/ggml after some "improvements" have been made. * Merging mainline - fix Metal * Remove check --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'gguf-py/gguf/utility.py')
-rw-r--r--gguf-py/gguf/utility.py69
1 files changed, 69 insertions, 0 deletions
diff --git a/gguf-py/gguf/utility.py b/gguf-py/gguf/utility.py
new file mode 100644
index 00000000..40d59b75
--- /dev/null
+++ b/gguf-py/gguf/utility.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+from typing import Literal
+
+
+def fill_templated_filename(filename: str, output_type: str | None) -> str:
+ # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
+ ftype_lowercase: str = output_type.lower() if output_type is not None else ""
+ ftype_uppercase: str = output_type.upper() if output_type is not None else ""
+ return filename.format(ftype_lowercase,
+ outtype=ftype_lowercase, ftype=ftype_lowercase,
+ OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
+
+
+def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str:
+ if model_params_count > 1e12 :
+ # Trillions Of Parameters
+ scaled_model_params = model_params_count * 1e-12
+ scale_suffix = "T"
+ elif model_params_count > 1e9 :
+ # Billions Of Parameters
+ scaled_model_params = model_params_count * 1e-9
+ scale_suffix = "B"
+ elif model_params_count > 1e6 :
+ # Millions Of Parameters
+ scaled_model_params = model_params_count * 1e-6
+ scale_suffix = "M"
+ else:
+ # Thousands Of Parameters
+ scaled_model_params = model_params_count * 1e-3
+ scale_suffix = "K"
+
+ fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0)
+
+ return f"{scaled_model_params:.{fix}f}{scale_suffix}"
+
+
+def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
+
+ if expert_count > 0:
+ pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2)
+ size_class = f"{expert_count}x{pretty_size}"
+ else:
+ size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2)
+
+ return size_class
+
+
+def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
+ # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
+
+ if base_name is not None:
+ name = base_name.strip().replace(' ', '-').replace('/', '-')
+ elif model_name is not None:
+ name = model_name.strip().replace(' ', '-').replace('/', '-')
+ else:
+ name = "ggml-model"
+
+ parameters = f"-{size_label}" if size_label is not None else ""
+
+ finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else ""
+
+ version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
+
+ encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
+
+ kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
+
+ return f"{name}{parameters}{finetune}{version}{encoding}{kind}"