From 4fe09dfe665c58a753dc9eb638dd4dca1cd35488 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sat, 16 Sep 2023 03:02:13 +0800 Subject: llama : add support for StarCoder model architectures (#3187) * add placeholder of starcoder in gguf / llama.cpp * support convert starcoder weights to gguf * convert MQA to MHA * fix ffn_down name * add LLM_ARCH_STARCODER to llama.cpp * set head_count_kv = 1 * load starcoder weight * add max_position_embeddings * set n_positions to max_positioin_embeddings * properly load all starcoder params * fix head count kv * fix comments * fix vram calculation for starcoder * store mqa directly * add input embeddings handling * add TBD * working in cpu, metal buggy * cleanup useless code * metal : fix out-of-bounds access in soft_max kernels * llama : make starcoder graph build more consistent with others * refactor: cleanup comments a bit * add other starcoder models: 3B, 7B, 15B * support-mqa-directly * fix: remove max_position_embeddings, use n_train_ctx * Update llama.cpp Co-authored-by: Georgi Gerganov * Update llama.cpp Co-authored-by: Georgi Gerganov * Apply suggestions from code review Co-authored-by: Georgi Gerganov * fix: switch to space from tab --------- Co-authored-by: Georgi Gerganov --- gguf-py/gguf/gguf.py | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'gguf-py') diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 7f7204ea..e0e0dbcb 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -77,13 +77,14 @@ KEY_TOKENIZER_RWKV = "tokenizer.rwkv.world" class MODEL_ARCH(IntEnum): - LLAMA : int = auto() - FALCON : int = auto() - BAICHUAN:int = auto() - GPT2 : int = auto() - GPTJ : int = auto() - GPTNEOX: int = auto() - MPT : int = auto() + LLAMA : int = auto() + FALCON : int = auto() + BAICHUAN : int = auto() + GPT2 : int = auto() + GPTJ : int = auto() + GPTNEOX : int = auto() + MPT : int = auto() + STARCODER : int = auto() class MODEL_TENSOR(IntEnum): @@ -107,13 +108,14 @@ class MODEL_TENSOR(IntEnum): MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { - MODEL_ARCH.LLAMA: "llama", - MODEL_ARCH.FALCON: "falcon", - MODEL_ARCH.BAICHUAN:"baichuan", - MODEL_ARCH.GPT2: "gpt2", - MODEL_ARCH.GPTJ: "gptj", - MODEL_ARCH.GPTNEOX: "gptneox", - MODEL_ARCH.MPT: "mpt", + MODEL_ARCH.LLAMA: "llama", + MODEL_ARCH.FALCON: "falcon", + MODEL_ARCH.BAICHUAN: "baichuan", + MODEL_ARCH.GPT2: "gpt2", + MODEL_ARCH.GPTJ: "gptj", + MODEL_ARCH.GPTNEOX: "gptneox", + MODEL_ARCH.MPT: "mpt", + MODEL_ARCH.STARCODER: "starcoder", } MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = { @@ -171,6 +173,18 @@ MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = { MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", }, + MODEL_ARCH.STARCODER: { + MODEL_TENSOR.TOKEN_EMBD: "token_embd", + MODEL_TENSOR.POS_EMBD: "position_embd", + MODEL_TENSOR.OUTPUT_NORM: "output_norm", + MODEL_TENSOR.OUTPUT: "output", + MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm", + MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv", + MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output", + MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm", + MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", + MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", + }, MODEL_ARCH.GPT2: { # TODO }, -- cgit v1.2.3