Apply Qwen3 PR from llama.cpp (#355)

author: Ben Harris <mail@bharr.is> 2025-04-29 16:02:08 +0800
committer: GitHub <noreply@github.com> 2025-04-29 10:02:08 +0200
commit: 1064f5bc312f61e5a1b7ef3fef918be300f74641 (patch)
tree: bdcb5b56d509ec24395c8c4a18af48634d91a8b8 /convert_hf_to_gguf.py
parent: 99b87a375fdf8cc409c4a95cf451f0462f56f71b (diff)
1 files changed, 7 insertions, 0 deletions
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index a6ab09c0..20d27a5c 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -1938,6 +1938,13 @@ class Qwen2MoeModel(Model):
             if len(experts) > 0:
                 raise ValueError(f"Unprocessed experts: {experts}")
 
+@Model.register("Qwen3ForCausalLM")
+class Qwen3Model(Qwen2Model):
+    model_arch = gguf.MODEL_ARCH.QWEN3
+
+@Model.register("Qwen3MoeForCausalLM")
+class Qwen3MoeModel(Qwen2MoeModel):
+    model_arch = gguf.MODEL_ARCH.QWEN3MOE
 
 @Model.register("GPT2LMHeadModel")
 class GPT2Model(Model):
author	Ben Harris <mail@bharr.is>	2025-04-29 16:02:08 +0800
committer	GitHub <noreply@github.com>	2025-04-29 10:02:08 +0200
commit	1064f5bc312f61e5a1b7ef3fef918be300f74641 (patch)
tree	bdcb5b56d509ec24395c8c4a18af48634d91a8b8 /convert_hf_to_gguf.py
parent	99b87a375fdf8cc409c4a95cf451f0462f56f71b (diff)