diff options
author | Nam D. Tran <42194884+namtranase@users.noreply.github.com> | 2023-12-27 22:39:45 +0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-27 17:39:45 +0200 |
commit | f6793491b5af6da75edad34d6f503ef86d31b09f (patch) | |
tree | ba50b7ae1aba91cb465a06970a11137baab7afcf /gguf-py/gguf/constants.py | |
parent | 879b690a9e1eb1ab0a29b58236fc76978fb4d902 (diff) |
llama : add AWQ for llama, llama2, mpt, and mistral models (#4593)
* update: awq support llama-7b model
* update: change order
* update: benchmark results for llama2-7b
* update: mistral 7b v1 benchmark
* update: support 4 models
* fix: Readme
* update: ready for PR
* update: readme
* fix: readme
* update: change order import
* black
* format code
* update: work for bot mpt and awqmpt
* update: readme
* Rename to llm_build_ffn_mpt_awq
* Formatted other files
* Fixed params count
* fix: remove code
* update: more detail for mpt
* fix: readme
* fix: readme
* update: change folder architecture
* fix: common.cpp
* fix: readme
* fix: remove ggml_repeat
* update: cicd
* update: cicd
* uppdate: remove use_awq arg
* update: readme
* llama : adapt plamo to new ffn
ggml-ci
---------
Co-authored-by: Trần Đức Nam <v.namtd12@vinai.io>
Co-authored-by: Le Hoang Anh <v.anhlh33@vinai.io>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r-- | gguf-py/gguf/constants.py | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 4cd87cdd..c9be2111 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -120,6 +120,7 @@ class MODEL_TENSOR(IntEnum): FFN_GATE = auto() FFN_DOWN = auto() FFN_UP = auto() + FFN_ACT = auto() FFN_GATE_EXP = auto() FFN_DOWN_EXP = auto() FFN_UP_EXP = auto() @@ -169,6 +170,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = { MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate", MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", + MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn", MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate.{xid}", MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}", MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}", @@ -269,6 +271,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_NORM, MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.FFN_ACT, ], MODEL_ARCH.GPTJ: [ MODEL_TENSOR.TOKEN_EMBD, |