diff options
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-x | convert-hf-to-gguf.py | 30 |
1 files changed, 19 insertions, 11 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index f7fe29fd..e7db7591 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -150,8 +150,6 @@ class Model: @staticmethod def from_model_architecture(model_architecture): - if model_architecture == "StableLMEpochForCausalLM": - return StableLMModel if model_architecture == "GPTNeoXForCausalLM": return GPTNeoXModel if model_architecture == "BloomForCausalLM": @@ -168,6 +166,8 @@ class Model: return RefactModel if model_architecture == "PersimmonForCausalLM": return PersimmonModel + if model_architecture in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"): + return StableLMModel return Model def _is_model_safetensors(self) -> bool: @@ -201,6 +201,8 @@ class Model: return gguf.MODEL_ARCH.REFACT if arch == "PersimmonForCausalLM": return gguf.MODEL_ARCH.PERSIMMON + if arch in ("StableLMEpochForCausalLM", "LlavaStableLMEpochForCausalLM"): + return gguf.MODEL_ARCH.STABLELM raise NotImplementedError(f'Architecture "{arch}" not supported!') @@ -294,15 +296,6 @@ class Model: special_vocab.add_to_gguf(self.gguf_writer) -class StableLMModel(Model): - def set_gguf_parameters(self): - super().set_gguf_parameters() - self.gguf_writer.add_rope_dimension_count( - int(self.hparams["rope_pct"] * (self.hparams["hidden_size"] // self.hparams["num_attention_heads"])), - ) - self.gguf_writer.add_layer_norm_eps(1e-5) - - class GPTNeoXModel(Model): def set_gguf_parameters(self): block_count = self.hparams["num_hidden_layers"] @@ -824,6 +817,21 @@ class PersimmonModel(Model): self.gguf_writer.add_tensor(new_name, data) +class StableLMModel(Model): + def set_gguf_parameters(self): + hparams = self.hparams + block_count = hparams["num_hidden_layers"] + + self.gguf_writer.add_name(dir_model.name) + self.gguf_writer.add_context_length(hparams["max_position_embeddings"]) + self.gguf_writer.add_embedding_length(hparams["hidden_size"]) + self.gguf_writer.add_block_count(block_count) + self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"]) + self.gguf_writer.add_rope_dimension_count(int(hparams["rope_pct"]*(hparams["hidden_size"] // hparams["num_attention_heads"]))) + self.gguf_writer.add_head_count(hparams["num_attention_heads"]) + self.gguf_writer.add_parallel_residual(hparams["use_parallel_residual"] if "use_parallel_residual" in hparams else True) + self.gguf_writer.add_layer_norm_eps(1e-5) + ###### CONVERSION LOGIC ###### def parse_args() -> argparse.Namespace: |