From 36680f6e40e4440c3ec3385d0b7e5ca8bb6c37f7 Mon Sep 17 00:00:00 2001 From: Judd Date: Fri, 7 Jul 2023 00:23:49 +0800 Subject: convert : update for baichuan (#2081) 1. guess n_layers; 2. relax warnings on context size; 3. add a note that its derivations are also supported. Co-authored-by: Judd --- convert.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'convert.py') diff --git a/convert.py b/convert.py index 14269277..66509b99 100644 --- a/convert.py +++ b/convert.py @@ -154,9 +154,15 @@ class Params: # try transformer naming first if "model.layers.0.self_attn.q_proj.weight" in model: n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model) + elif "model.layers.0.self_attn.W_pack.weight" in model: # next: try baichuan naming + n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.W_pack.weight" not in model) else: n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model) + if n_layer < 1: + raise Exception("failed to guess 'n_layer'. This model is unknown or unsupported.\n" + "Suggestion: provide 'config.json' of the model in the same directory containing model files.") + n_head=n_embd // 128 # guessed return Params( -- cgit v1.2.3