diff options
author | Shijie <821898965@qq.com> | 2023-12-02 02:16:31 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-01 20:16:31 +0200 |
commit | 37c746d687d877bc11803e96b4dc5f378b83c0a0 (patch) | |
tree | 00976a7933be847bcb58e24c54d8a22c5bb0125b /gguf-py/gguf/constants.py | |
parent | 880f57973b8e0091d0f9f50eb5ab4cd4e31582ca (diff) |
llama : add Qwen support (#4281)
* enable qwen to llama.cpp
* llama : do not GPU split bias tensors
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r-- | gguf-py/gguf/constants.py | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 8bd82dac..685c88f1 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -92,6 +92,7 @@ class MODEL_ARCH(IntEnum): BERT = auto() BLOOM = auto() STABLELM = auto() + QWEN = auto() class MODEL_TENSOR(IntEnum): @@ -132,6 +133,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { MODEL_ARCH.BERT: "bert", MODEL_ARCH.BLOOM: "bloom", MODEL_ARCH.STABLELM: "stablelm", + MODEL_ARCH.QWEN: "qwen", } TENSOR_NAMES: dict[MODEL_TENSOR, str] = { @@ -317,6 +319,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, ], + MODEL_ARCH.QWEN: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_QKV, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.ATTN_ROT_EMBD, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + ], MODEL_ARCH.GPT2: [ # TODO ], @@ -336,6 +352,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_ARCH.PERSIMMON: [ MODEL_TENSOR.ROPE_FREQS, ], + MODEL_ARCH.QWEN: [ + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_ROT_EMBD, + ], } # |