convert.py : fix baichuan7B support (#2870)

* [Fix]: convert.py support baichuan7B * convert.py : fix trailing whitespaces --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: jameswu2014 <545426914@qq.com> 2023-08-29 17:48:41 +0800
committer: GitHub <noreply@github.com> 2023-08-29 12:48:41 +0300
commit: bcce96ba4dd95482824700c4ce2455fe8c49055a (patch)
tree: e33ac58e1179748991bca7518407d5a1f52f8ae2
parent: 74e0caeb82fc9db77fa2cc93070bb919a9a935dd (diff)
1 files changed, 4 insertions, 3 deletions
diff --git a/convert.py b/convert.py
index a15e6ccd..3f0a1c93 100755
--- a/convert.py
+++ b/convert.py
@@ -469,7 +469,7 @@ class UnquantizedTensor(Tensor):
 
     def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
         r = self.ndarray.shape[0] // 3
-        return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head))
+        return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head, n_head))
 
     def part(self, n_part: int) -> 'UnquantizedTensor':
         r = self.ndarray.shape[0] // 3
@@ -952,9 +952,10 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
            #tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
         elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
             print(f"Unpacking and permuting layer {i}")
-            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
-            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
+            tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head)
+            tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head)
             tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy        (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
+            del tmp[f"model.layers.{i}.self_attn.W_pack.weight"]
         else:
             break
author	jameswu2014 <545426914@qq.com>	2023-08-29 17:48:41 +0800
committer	GitHub <noreply@github.com>	2023-08-29 12:48:41 +0300
commit	bcce96ba4dd95482824700c4ce2455fe8c49055a (patch)
tree	e33ac58e1179748991bca7518407d5a1f52f8ae2
parent	74e0caeb82fc9db77fa2cc93070bb919a9a935dd (diff)