summaryrefslogtreecommitdiff
path: root/convert-hf-to-gguf.py
diff options
context:
space:
mode:
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-xconvert-hf-to-gguf.py67
1 files changed, 67 insertions, 0 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 1178d63a..aae3a5e8 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -197,6 +197,8 @@ class Model:
return Phi2Model
if model_architecture == "PlamoForCausalLM":
return PlamoModel
+ if model_architecture == "CodeShellForCausalLM":
+ return CodeShellModel
return Model
def _is_model_safetensors(self) -> bool:
@@ -242,6 +244,8 @@ class Model:
return gguf.MODEL_ARCH.PHI2
if arch == "PlamoForCausalLM":
return gguf.MODEL_ARCH.PLAMO
+ if arch == "CodeShellForCausalLM":
+ return gguf.MODEL_ARCH.CODESHELL
raise NotImplementedError(f'Architecture "{arch}" not supported!')
@@ -1175,6 +1179,69 @@ class PlamoModel(Model):
self.gguf_writer.add_tensor(new_name, data)
+class CodeShellModel(Model):
+ def set_gguf_parameters(self):
+ block_count = self.hparams["n_layer"]
+
+ self.gguf_writer.add_name("CodeShell")
+ self.gguf_writer.add_context_length(self.hparams["n_positions"])
+ self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
+ self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_head_count(self.hparams["n_head"])
+ self.gguf_writer.add_head_count_kv(self.hparams["num_query_groups"])
+ self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
+ self.gguf_writer.add_file_type(self.ftype)
+ self.gguf_writer.add_rope_freq_base(10000.0)
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
+ self.gguf_writer.add_rope_scaling_factor(1.0)
+
+ def write_tensors(self):
+ block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
+ tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
+ tensors = dict(self.get_tensors())
+ has_lm_head = "lm_head.weight" in tensors.keys() or "output.weight" in tensors.keys()
+ for name, data_torch in tensors.items():
+ # we don't need these
+ if name.endswith((".attn.rotary_emb.inv_freq")):
+ continue
+
+ old_dtype = data_torch.dtype
+
+ # convert any unsupported data types to float32
+ if data_torch.dtype not in (torch.float16, torch.float32):
+ data_torch = data_torch.to(torch.float32)
+
+ data = data_torch.squeeze().numpy()
+
+ # map tensor names
+ new_name = tensor_map.get_name(name, try_suffixes=(".weight", ".bias"))
+ if new_name is None:
+ print(f"Can not map tensor {name!r}")
+ sys.exit()
+
+ n_dims = len(data.shape)
+ data_dtype = data.dtype
+
+ # if f32 desired, convert any float16 to float32
+ if self.ftype == 0 and data_dtype == np.float16:
+ data = data.astype(np.float32)
+
+ # TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
+ if self.ftype == 1 and data_dtype == np.float16 and n_dims == 1:
+ data = data.astype(np.float32)
+
+ # if f16 desired, convert any float32 2-dim weight tensors to float16
+ if self.ftype == 1 and data_dtype == np.float32 and name.endswith(".weight") and n_dims == 2:
+ data = data.astype(np.float16)
+
+ print(f"{new_name}, n_dims = {n_dims}, {old_dtype} --> {data.dtype}")
+
+ self.gguf_writer.add_tensor(new_name, data)
+
+ if not has_lm_head and name == "transformer.wte.weight":
+ self.gguf_writer.add_tensor("output.weight", data)
+ print(name, f"=> output.weight, shape = {data.shape}, {old_dtype} --> {data.dtype}")
###### CONVERSION LOGIC ######