summaryrefslogtreecommitdiff
path: root/convert-hf-to-gguf.py
diff options
context:
space:
mode:
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-xconvert-hf-to-gguf.py30
1 files changed, 19 insertions, 11 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index f7fe29fd..e7db7591 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -150,8 +150,6 @@ class Model:
@staticmethod
def from_model_architecture(model_architecture):
- if model_architecture == "StableLMEpochForCausalLM":
- return StableLMModel
if model_architecture == "GPTNeoXForCausalLM":
return GPTNeoXModel
if model_architecture == "BloomForCausalLM":
@@ -168,6 +166,8 @@ class Model:
return RefactModel
if model_architecture == "PersimmonForCausalLM":
return PersimmonModel
+ if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
+ return StableLMModel
return Model
def _is_model_safetensors(self) -> bool:
@@ -201,6 +201,8 @@ class Model:
return gguf.MODEL_ARCH.REFACT
if arch == "PersimmonForCausalLM":
return gguf.MODEL_ARCH.PERSIMMON
+ if arch in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"):
+ return gguf.MODEL_ARCH.STABLELM
raise NotImplementedError(f'Architecture "{arch}" not supported!')
@@ -294,15 +296,6 @@ class Model:
special_vocab.add_to_gguf(self.gguf_writer)
-class StableLMModel(Model):
- def set_gguf_parameters(self):
- super().set_gguf_parameters()
- self.gguf_writer.add_rope_dimension_count(
- int(self.hparams["rope_pct"] * (self.hparams["hidden_size"] // self.hparams["num_attention_heads"])),
- )
- self.gguf_writer.add_layer_norm_eps(1e-5)
-
-
class GPTNeoXModel(Model):
def set_gguf_parameters(self):
block_count = self.hparams["num_hidden_layers"]
@@ -824,6 +817,21 @@ class PersimmonModel(Model):
self.gguf_writer.add_tensor(new_name, data)
+class StableLMModel(Model):
+ def set_gguf_parameters(self):
+ hparams = self.hparams
+ block_count = hparams["num_hidden_layers"]
+
+ self.gguf_writer.add_name(dir_model.name)
+ self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
+ self.gguf_writer.add_embedding_length(hparams["hidden_size"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
+ self.gguf_writer.add_rope_dimension_count(int(hparams["rope_pct"]*(hparams["hidden_size"] // hparams["num_attention_heads"])))
+ self.gguf_writer.add_head_count(hparams["num_attention_heads"])
+ self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True)
+ self.gguf_writer.add_layer_norm_eps(1e-5)
+
###### CONVERSION LOGIC ######
def parse_args() -> argparse.Namespace: