summaryrefslogtreecommitdiff
path: root/gguf-py/gguf
diff options
context:
space:
mode:
Diffstat (limited to 'gguf-py/gguf')
-rw-r--r--gguf-py/gguf/constants.py31
-rw-r--r--gguf-py/gguf/gguf_writer.py3
2 files changed, 19 insertions, 15 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 8908585c..fb20cfab 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -33,21 +33,22 @@ class Keys:
FILE_TYPE = "general.file_type"
class LLM:
- VOCAB_SIZE = "{arch}.vocab_size"
- CONTEXT_LENGTH = "{arch}.context_length"
- EMBEDDING_LENGTH = "{arch}.embedding_length"
- BLOCK_COUNT = "{arch}.block_count"
- LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
- FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
- EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length"
- USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
- TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
- EXPERT_COUNT = "{arch}.expert_count"
- EXPERT_USED_COUNT = "{arch}.expert_used_count"
- EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
- EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
- POOLING_TYPE = "{arch}.pooling_type"
- LOGIT_SCALE = "{arch}.logit_scale"
+ VOCAB_SIZE = "{arch}.vocab_size"
+ CONTEXT_LENGTH = "{arch}.context_length"
+ EMBEDDING_LENGTH = "{arch}.embedding_length"
+ BLOCK_COUNT = "{arch}.block_count"
+ LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
+ FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
+ EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length"
+ EXPERT_SHARED_FEED_FORWARD_LENGTH = "{arch}.expert_shared_feed_forward_length"
+ USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
+ TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
+ EXPERT_COUNT = "{arch}.expert_count"
+ EXPERT_USED_COUNT = "{arch}.expert_used_count"
+ EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
+ EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
+ POOLING_TYPE = "{arch}.pooling_type"
+ LOGIT_SCALE = "{arch}.logit_scale"
class Attention:
HEAD_COUNT = "{arch}.attention.head_count"
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index ed56abfb..a697f657 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -394,6 +394,9 @@ class GGUFWriter:
def add_expert_feed_forward_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
+ def add_expert_shared_feed_forward_length(self, length: int) -> None:
+ self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
+
def add_parallel_residual(self, use: bool) -> None:
self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)