summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/constants.py6
-rw-r--r--gguf-py/gguf/tensor_mapping.py16
-rw-r--r--gguf-py/pyproject.toml2
3 files changed, 10 insertions, 14 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 27eaf723..f468802d 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -221,9 +221,9 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
- MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate.{xid}",
- MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}",
- MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}",
+ MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
+ MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
+ MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index 11fd34b8..93a5a455 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -231,9 +231,8 @@ class TensorNameMap:
),
MODEL_TENSOR.FFN_UP_EXP: (
- "layers.{bid}.feed_forward.experts.{xid}.w3", # mixtral
- "model.layers.{bid}.block_sparse_moe.experts.{xid}.w3", # mixtral
- "transformer.decoder_layer.{bid}.moe.{xid}.linear_v", # Grok
+ "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
+ "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
),
# AWQ-activation gate
@@ -252,9 +251,8 @@ class TensorNameMap:
),
MODEL_TENSOR.FFN_GATE_EXP: (
- "layers.{bid}.feed_forward.experts.{xid}.w1", # mixtral
- "model.layers.{bid}.block_sparse_moe.experts.{xid}.w1", # mixtral
- "transformer.decoder_layer.{bid}.moe.{xid}.linear" # Grok
+ "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
+ "transformer.decoder_layer.{bid}.moe.linear" # Grok (merged)
),
# Feed-forward down
@@ -280,10 +278,8 @@ class TensorNameMap:
),
MODEL_TENSOR.FFN_DOWN_EXP: (
- "layers.{bid}.feed_forward.experts.{xid}.w2", # mixtral
- "model.layers.{bid}.block_sparse_moe.experts.{xid}.w2", # mixtral
- "transformer.decoder_layer.{bid}.moe.{xid}.linear_1", # Grok
-
+ "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
+ "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
),
MODEL_TENSOR.ATTN_Q_NORM: (
diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml
index 96396e04..13cbfffb 100644
--- a/gguf-py/pyproject.toml
+++ b/gguf-py/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "gguf"
-version = "0.8.0"
+version = "0.9.0"
description = "Read and write ML models in GGUF for GGML"
authors = ["GGML <ggml@ggml.ai>"]
packages = [