diff options
Diffstat (limited to 'gguf-py/gguf/tensor_mapping.py')
-rw-r--r-- | gguf-py/gguf/tensor_mapping.py | 16 |
1 files changed, 6 insertions, 10 deletions
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 11fd34b8..93a5a455 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -231,9 +231,8 @@ class TensorNameMap: ), MODEL_TENSOR.FFN_UP_EXP: ( - "layers.{bid}.feed_forward.experts.{xid}.w3", # mixtral - "model.layers.{bid}.block_sparse_moe.experts.{xid}.w3", # mixtral - "transformer.decoder_layer.{bid}.moe.{xid}.linear_v", # Grok + "layers.{bid}.feed_forward.experts.w3", # mixtral (merged) + "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged) ), # AWQ-activation gate @@ -252,9 +251,8 @@ class TensorNameMap: ), MODEL_TENSOR.FFN_GATE_EXP: ( - "layers.{bid}.feed_forward.experts.{xid}.w1", # mixtral - "model.layers.{bid}.block_sparse_moe.experts.{xid}.w1", # mixtral - "transformer.decoder_layer.{bid}.moe.{xid}.linear" # Grok + "layers.{bid}.feed_forward.experts.w1", # mixtral (merged) + "transformer.decoder_layer.{bid}.moe.linear" # Grok (merged) ), # Feed-forward down @@ -280,10 +278,8 @@ class TensorNameMap: ), MODEL_TENSOR.FFN_DOWN_EXP: ( - "layers.{bid}.feed_forward.experts.{xid}.w2", # mixtral - "model.layers.{bid}.block_sparse_moe.experts.{xid}.w2", # mixtral - "transformer.decoder_layer.{bid}.moe.{xid}.linear_1", # Grok - + "layers.{bid}.feed_forward.experts.w2", # mixtral (merged) + "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged) ), MODEL_TENSOR.ATTN_Q_NORM: ( |