py : fix StableLM conversion after config.json changes (#5703)

* Fix issues during StableLM models conversion * Fix hard coded layer_norm_eps * Support layer_norm_eps for LlavaStableLM Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * Add missing parenthesis Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * Support rotary_factor for LlavaStableLM Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * fix typo * Add StableLMEpochForCausalLM for safety Co-authored-by: compilade <113953597+compilade@users.noreply.github.com> * Add StableLMEpochForCausalLM for safety 2 Co-authored-by: compilade <113953597+compilade@users.noreply.github.com> --------- Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> Co-authored-by: Jared Van Bortel <jared@nomic.ai> Co-authored-by: compilade <113953597+compilade@users.noreply.github.com>
author: Anas Ahouzi <112881240+aahouzi@users.noreply.github.com> 2024-02-25 10:54:04 +0100
committer: GitHub <noreply@github.com> 2024-02-25 11:54:04 +0200
commit: 69917dfa55674c608360638bb4d6a12a315e2810 (patch)
tree: 76bf37a8e692bd3109787ee045350abba93b48a8 /convert-hf-to-gguf.py
parent: 9e359a4f47c1b2dceb99e29706c9f7403d32ab5e (diff)
1 files changed, 5 insertions, 4 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 32d54b45..ae30b2a7 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -192,7 +192,7 @@ class Model:
             return RefactModel
         if model_architecture == "PersimmonForCausalLM":
             return PersimmonModel
-        if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
+        if model_architecture in ("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
             return StableLMModel
         if model_architecture == "QWenLMHeadModel":
             return QwenModel
@@ -253,7 +253,7 @@ class Model:
             return gguf.MODEL_ARCH.REFACT
         if arch == "PersimmonForCausalLM":
             return gguf.MODEL_ARCH.PERSIMMON
-        if arch in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
+        if arch in ("StableLmForCausalLM", "StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
             return gguf.MODEL_ARCH.STABLELM
         if arch == "QWenLMHeadModel":
             return gguf.MODEL_ARCH.QWEN
@@ -1074,10 +1074,11 @@ class StableLMModel(Model):
         self.gguf_writer.add_embedding_length(hparams["hidden_size"])
         self.gguf_writer.add_block_count(block_count)
         self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
-        self.gguf_writer.add_rope_dimension_count(int(hparams["rope_pct"] * (hparams["hidden_size"] // hparams["num_attention_heads"])))
+        rotary_factor = self.find_hparam(["partial_rotary_factor", "rope_pct"])
+        self.gguf_writer.add_rope_dimension_count(int(rotary_factor * (hparams["hidden_size"] // hparams["num_attention_heads"])))
         self.gguf_writer.add_head_count(hparams["num_attention_heads"])
         self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True)
-        self.gguf_writer.add_layer_norm_eps(1e-5)
+        self.gguf_writer.add_layer_norm_eps(self.find_hparam(["layer_norm_eps", "norm_eps"]))
 
 
 class MixtralModel(Model):
author	Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>	2024-02-25 10:54:04 +0100
committer	GitHub <noreply@github.com>	2024-02-25 11:54:04 +0200
commit	69917dfa55674c608360638bb4d6a12a315e2810 (patch)
tree	76bf37a8e692bd3109787ee045350abba93b48a8 /convert-hf-to-gguf.py
parent	9e359a4f47c1b2dceb99e29706c9f7403d32ab5e (diff)