From a94e6ff8774b7c9f950d9545baf0ce35e8d1ed2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C8=98tefan-Gabriel=20Muscalu?= Date: Mon, 17 Jun 2024 22:08:46 +0300 Subject: update: support Qwen2-57B-A14B (#7835) * update: convert-hf-to-gguf.py to support Qwen2-57B-A14B * fix: QWEN2MOE support for expert_feed_forward_length previously, expert ff was taken from n_ff (intermediate size) but it is now properly taken from LLM_KV_EXPERT_FEED_FORWARD_LENGTH n_ff_exp and n_ff_shared_exp are now properly calculated * update: convert-hf-to-gguf.py cleanup for Qwen2MoeForCausalLM * fix: QWEN2MOE support for expert_feed_forward_length previously, expert ff was taken from n_ff (intermediate size) but it is now properly taken from LLM_KV_EXPERT_FEED_FORWARD_LENGTH n_ff_exp and n_ff_shexp are now properly calculated --- gguf-py/gguf/constants.py | 31 ++++++++++++++++--------------- gguf-py/gguf/gguf_writer.py | 3 +++ 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'gguf-py') diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 8908585c..fb20cfab 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -33,21 +33,22 @@ class Keys: FILE_TYPE = "general.file_type" class LLM: - VOCAB_SIZE = "{arch}.vocab_size" - CONTEXT_LENGTH = "{arch}.context_length" - EMBEDDING_LENGTH = "{arch}.embedding_length" - BLOCK_COUNT = "{arch}.block_count" - LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count" - FEED_FORWARD_LENGTH = "{arch}.feed_forward_length" - EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length" - USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual" - TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout" - EXPERT_COUNT = "{arch}.expert_count" - EXPERT_USED_COUNT = "{arch}.expert_used_count" - EXPERT_SHARED_COUNT = "{arch}.expert_shared_count" - EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale" - POOLING_TYPE = "{arch}.pooling_type" - LOGIT_SCALE = "{arch}.logit_scale" + VOCAB_SIZE = "{arch}.vocab_size" + CONTEXT_LENGTH = "{arch}.context_length" + EMBEDDING_LENGTH = "{arch}.embedding_length" + BLOCK_COUNT = "{arch}.block_count" + LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count" + FEED_FORWARD_LENGTH = "{arch}.feed_forward_length" + EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length" + EXPERT_SHARED_FEED_FORWARD_LENGTH = "{arch}.expert_shared_feed_forward_length" + USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual" + TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout" + EXPERT_COUNT = "{arch}.expert_count" + EXPERT_USED_COUNT = "{arch}.expert_used_count" + EXPERT_SHARED_COUNT = "{arch}.expert_shared_count" + EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale" + POOLING_TYPE = "{arch}.pooling_type" + LOGIT_SCALE = "{arch}.logit_scale" class Attention: HEAD_COUNT = "{arch}.attention.head_count" diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index ed56abfb..a697f657 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -394,6 +394,9 @@ class GGUFWriter: def add_expert_feed_forward_length(self, length: int) -> None: self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length) + def add_expert_shared_feed_forward_length(self, length: int) -> None: + self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length) + def add_parallel_residual(self, use: bool) -> None: self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use) -- cgit v1.2.3