summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/gguf.py42
1 files changed, 28 insertions, 14 deletions
diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py
index 7f7204ea..e0e0dbcb 100644
--- a/gguf-py/gguf/gguf.py
+++ b/gguf-py/gguf/gguf.py
@@ -77,13 +77,14 @@ KEY_TOKENIZER_RWKV = "tokenizer.rwkv.world"
class MODEL_ARCH(IntEnum):
- LLAMA : int = auto()
- FALCON : int = auto()
- BAICHUAN:int = auto()
- GPT2 : int = auto()
- GPTJ : int = auto()
- GPTNEOX: int = auto()
- MPT : int = auto()
+ LLAMA : int = auto()
+ FALCON : int = auto()
+ BAICHUAN : int = auto()
+ GPT2 : int = auto()
+ GPTJ : int = auto()
+ GPTNEOX : int = auto()
+ MPT : int = auto()
+ STARCODER : int = auto()
class MODEL_TENSOR(IntEnum):
@@ -107,13 +108,14 @@ class MODEL_TENSOR(IntEnum):
MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
- MODEL_ARCH.LLAMA: "llama",
- MODEL_ARCH.FALCON: "falcon",
- MODEL_ARCH.BAICHUAN:"baichuan",
- MODEL_ARCH.GPT2: "gpt2",
- MODEL_ARCH.GPTJ: "gptj",
- MODEL_ARCH.GPTNEOX: "gptneox",
- MODEL_ARCH.MPT: "mpt",
+ MODEL_ARCH.LLAMA: "llama",
+ MODEL_ARCH.FALCON: "falcon",
+ MODEL_ARCH.BAICHUAN: "baichuan",
+ MODEL_ARCH.GPT2: "gpt2",
+ MODEL_ARCH.GPTJ: "gptj",
+ MODEL_ARCH.GPTNEOX: "gptneox",
+ MODEL_ARCH.MPT: "mpt",
+ MODEL_ARCH.STARCODER: "starcoder",
}
MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = {
@@ -171,6 +173,18 @@ MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = {
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
},
+ MODEL_ARCH.STARCODER: {
+ MODEL_TENSOR.TOKEN_EMBD: "token_embd",
+ MODEL_TENSOR.POS_EMBD: "position_embd",
+ MODEL_TENSOR.OUTPUT_NORM: "output_norm",
+ MODEL_TENSOR.OUTPUT: "output",
+ MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
+ MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
+ MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
+ MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
+ MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
+ MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
+ },
MODEL_ARCH.GPT2: {
# TODO
},