summaryrefslogtreecommitdiff
path: root/convert_hf_to_gguf.py
diff options
context:
space:
mode:
Diffstat (limited to 'convert_hf_to_gguf.py')
-rw-r--r--convert_hf_to_gguf.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 33be63fa..b0a82c80 100644
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -3864,6 +3864,34 @@ class JaisModel(Model):
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
+@Model.register("Dots1ForCausalLM")
+class Dots1Model(Qwen2MoeModel):
+ model_arch = gguf.MODEL_ARCH.DOTS1
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.hparams["num_experts"] = self.hparams["n_routed_experts"]
+
+ def set_gguf_parameters(self):
+ super().set_gguf_parameters()
+ self.gguf_writer.add_leading_dense_block_count(self.hparams["first_k_dense_replace"])
+ self.gguf_writer.add_expert_shared_count(self.hparams["n_shared_experts"])
+ self.gguf_writer.add_expert_weights_scale(self.hparams["routed_scaling_factor"])
+ self.gguf_writer.add_expert_weights_norm(self.hparams["norm_topk_prob"])
+
+ if self.hparams["scoring_func"] == "sigmoid":
+ self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
+ else:
+ raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}")
+
+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
+ if name.endswith("e_score_correction_bias"):
+ name = name.replace("e_score_correction_bias", "e_score_correction.bias")
+ if "shared_experts" in name:
+ return [(self.map_tensor_name(name), data_torch)]
+ return super().modify_tensors(data_torch, name, bid)
+
+
@Model.register("ChatGLMModel", "ChatGLMForConditionalGeneration")
class ChatGLMModel(Model):
model_arch = gguf.MODEL_ARCH.CHATGLM