From bc6ae515ceb14eeaf198e00251a9689539cea176 Mon Sep 17 00:00:00 2001 From: saood06 Date: Fri, 9 May 2025 02:09:59 -0500 Subject: Support for Llama-3-Nemotron models (#377) * conflict resolution * Changes to make work and add longrope support * Changes to n_attention_wv rule * Untested support of 253B * DeciLMCausalModel now reads rope_theta from config.json properly * Remove errant Granite mentions * Better n_attention_vw rule * Update vocab.py --------- Co-authored-by: Yee Man Chan Co-authored-by: Iwan Kawrakow --- gguf-py/gguf/tensor_mapping.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'gguf-py/gguf/tensor_mapping.py') diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 1dea6a82..3ff70cd7 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -176,7 +176,8 @@ class TensorNameMap: "transformer.blocks.{bid}.attn.out_proj", # mpt "transformer.h.{bid}.self_attention.dense", # falcon "h.{bid}.self_attention.dense", # bloom - "model.layers.{bid}.self_attn.o_proj", # llama-hf + "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 + "model.layers.{bid}.self_attn.linear_attn", # deci "layers.{bid}.attention.wo", # llama-pth "encoder.layer.{bid}.attention.output.dense", # bert "transformer.h.{bid}.attn.out_proj", # gpt-j -- cgit v1.2.3