summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
authorȘtefan-Gabriel Muscalu <legraphista@users.noreply.github.com>2024-06-17 22:08:46 +0300
committerGitHub <noreply@github.com>2024-06-17 21:08:46 +0200
commita94e6ff8774b7c9f950d9545baf0ce35e8d1ed2f (patch)
treeabfa71d6bf6b3743185ead9f9c337c80c49acc04 /gguf-py
parent5b6da187508f49a9fa9d95fa22ae804a0780d256 (diff)
update: support Qwen2-57B-A14B (#7835)
* update: convert-hf-to-gguf.py to support Qwen2-57B-A14B * fix: QWEN2MOE support for expert_feed_forward_length previously, expert ff was taken from n_ff (intermediate size) but it is now properly taken from LLM_KV_EXPERT_FEED_FORWARD_LENGTH n_ff_exp and n_ff_shared_exp are now properly calculated * update: convert-hf-to-gguf.py cleanup for Qwen2MoeForCausalLM * fix: QWEN2MOE support for expert_feed_forward_length previously, expert ff was taken from n_ff (intermediate size) but it is now properly taken from LLM_KV_EXPERT_FEED_FORWARD_LENGTH n_ff_exp and n_ff_shexp are now properly calculated
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/constants.py31
-rw-r--r--gguf-py/gguf/gguf_writer.py3
2 files changed, 19 insertions, 15 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 8908585c..fb20cfab 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -33,21 +33,22 @@ class Keys:
FILE_TYPE = "general.file_type"
class LLM:
- VOCAB_SIZE = "{arch}.vocab_size"
- CONTEXT_LENGTH = "{arch}.context_length"
- EMBEDDING_LENGTH = "{arch}.embedding_length"
- BLOCK_COUNT = "{arch}.block_count"
- LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
- FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
- EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length"
- USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
- TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
- EXPERT_COUNT = "{arch}.expert_count"
- EXPERT_USED_COUNT = "{arch}.expert_used_count"
- EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
- EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
- POOLING_TYPE = "{arch}.pooling_type"
- LOGIT_SCALE = "{arch}.logit_scale"
+ VOCAB_SIZE = "{arch}.vocab_size"
+ CONTEXT_LENGTH = "{arch}.context_length"
+ EMBEDDING_LENGTH = "{arch}.embedding_length"
+ BLOCK_COUNT = "{arch}.block_count"
+ LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
+ FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
+ EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length"
+ EXPERT_SHARED_FEED_FORWARD_LENGTH = "{arch}.expert_shared_feed_forward_length"
+ USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
+ TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
+ EXPERT_COUNT = "{arch}.expert_count"
+ EXPERT_USED_COUNT = "{arch}.expert_used_count"
+ EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
+ EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
+ POOLING_TYPE = "{arch}.pooling_type"
+ LOGIT_SCALE = "{arch}.logit_scale"
class Attention:
HEAD_COUNT = "{arch}.attention.head_count"
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index ed56abfb..a697f657 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -394,6 +394,9 @@ class GGUFWriter:
def add_expert_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
+ def add_expert_shared_feed_forward_length(self, length: int) -> None:
+ self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
+
def add_parallel_residual(self, use: bool) -> None:
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)