diff options
author | saood06 <saood05@gmail.com> | 2025-07-10 02:37:36 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-10 02:37:36 -0500 |
commit | c53cb65251168006b29da91fea7e5e42d815af15 (patch) | |
tree | 0afdec44454a7e3dc379102de8346bbe6c92cd91 /gguf-py/gguf/constants.py | |
parent | 283753cabcabd30eb2cfb93739d9c1679200bf1f (diff) |
Support for dots.llm1 models (#573)
* Add llama.cpp changes for dots1 support
* Add python changes for dots1 support
* Fix to make it convert
* Remove V reshaping, remove BOS by default for dots1 and fix warmup to handle models without BOS
* Minor fix
* Remove commented lines
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r-- | gguf-py/gguf/constants.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 489714c4..b3b2bc50 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -226,6 +226,7 @@ class MODEL_ARCH(IntEnum): T5 = auto() T5ENCODER = auto() JAIS = auto() + DOTS1 = auto() class MODEL_TENSOR(IntEnum): @@ -362,6 +363,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { MODEL_ARCH.T5: "t5", MODEL_ARCH.T5ENCODER: "t5encoder", MODEL_ARCH.JAIS: "jais", + MODEL_ARCH.DOTS1: "dots1", } TENSOR_NAMES: dict[MODEL_TENSOR, str] = { @@ -1164,6 +1166,30 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_GATE, MODEL_TENSOR.FFN_UP, ], + MODEL_ARCH.DOTS1: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_Q_NORM, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_K_NORM, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_EXP_PROBS_B, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_GATE_EXP, + MODEL_TENSOR.FFN_GATE_INP, + MODEL_TENSOR.FFN_GATE_SHEXP, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_DOWN_EXP, + MODEL_TENSOR.FFN_DOWN_SHEXP, + MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.FFN_UP_EXP, + MODEL_TENSOR.FFN_UP_SHEXP, + ], # TODO } |