From f6793491b5af6da75edad34d6f503ef86d31b09f Mon Sep 17 00:00:00 2001 From: "Nam D. Tran" <42194884+namtranase@users.noreply.github.com> Date: Wed, 27 Dec 2023 22:39:45 +0700 Subject: llama : add AWQ for llama, llama2, mpt, and mistral models (#4593) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update: awq support llama-7b model * update: change order * update: benchmark results for llama2-7b * update: mistral 7b v1 benchmark * update: support 4 models * fix: Readme * update: ready for PR * update: readme * fix: readme * update: change order import * black * format code * update: work for bot mpt and awqmpt * update: readme * Rename to llm_build_ffn_mpt_awq * Formatted other files * Fixed params count * fix: remove code * update: more detail for mpt * fix: readme * fix: readme * update: change folder architecture * fix: common.cpp * fix: readme * fix: remove ggml_repeat * update: cicd * update: cicd * uppdate: remove use_awq arg * update: readme * llama : adapt plamo to new ffn ggml-ci --------- Co-authored-by: Trần Đức Nam Co-authored-by: Le Hoang Anh Co-authored-by: Georgi Gerganov --- gguf-py/gguf/constants.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'gguf-py/gguf/constants.py') diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 4cd87cdd..c9be2111 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -120,6 +120,7 @@ class MODEL_TENSOR(IntEnum): FFN_GATE = auto() FFN_DOWN = auto() FFN_UP = auto() + FFN_ACT = auto() FFN_GATE_EXP = auto() FFN_DOWN_EXP = auto() FFN_UP_EXP = auto() @@ -169,6 +170,7 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = { MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate", MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", + MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn", MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate.{xid}", MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down.{xid}", MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up.{xid}", @@ -269,6 +271,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_NORM, MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.FFN_ACT, ], MODEL_ARCH.GPTJ: [ MODEL_TENSOR.TOKEN_EMBD, -- cgit v1.2.3