summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
authorsaood06 <saood05@gmail.com>2025-05-09 09:17:41 -0500
committerGitHub <noreply@github.com>2025-05-09 09:17:41 -0500
commit967a2e1860482397a6a386952d972ac1205474ad (patch)
treee5fce0dba400eaf98ba343a06f007ddad408e5ce /gguf-py
parente5a4a3ce78ce96b6822dcd6138a98c4d237ecc9b (diff)
Fix missing rope_freqs with convert_hf_to_gguf (#402)
* lora : fix llama conversion script with ROPE_FREQS * convert : refactor rope_freqs generation This should also fix vocab-only conversion for Phi-3. * convert : adapt MiniCPM3 to separate rope_freqs insertion MiniCPM3's tokenizer is treated as a SentencePiece tokenizer to avoid having to run its custom Python code which mixes tokenization in the same file as tool calls. gguf-py : add long and short RoPE factors to tensor mappings Empty, but the key names are used to populate the mappings. --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co> Co-authored-by: Francis Couture-Harpin <git@compilade.net>
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/constants.py2
-rw-r--r--gguf-py/gguf/tensor_mapping.py3
2 files changed, 5 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index e2f4eb1a..6819979f 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -806,6 +806,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_Q,
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index 3ff70cd7..9688b02c 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -82,6 +82,9 @@ class TensorNameMap:
"rope.freqs", # llama-pth
"rotary_pos_emb.inv_freq", # chatglm
),
+
+ MODEL_TENSOR.ROPE_FACTORS_LONG: (),
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
}
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {