summaryrefslogtreecommitdiff
path: root/gguf-py/gguf/constants.py
diff options
context:
space:
mode:
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r--gguf-py/gguf/constants.py49
1 files changed, 48 insertions, 1 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index e343c2ef..1bea66aa 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -161,6 +161,7 @@ class Keys:
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
EOT_ID = "tokenizer.ggml.eot_token_id"
+ EOM_ID = "tokenizer.ggml.eom_token_id"
class Adapter:
TYPE = "adapter.type"
@@ -216,6 +217,7 @@ class MODEL_ARCH(IntEnum):
CHATGLM = auto()
BITNET = auto()
T5 = auto()
+ T5ENCODER = auto()
JAIS = auto()
@@ -343,6 +345,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.CHATGLM: "chatglm",
MODEL_ARCH.BITNET: "bitnet",
MODEL_ARCH.T5: "t5",
+ MODEL_ARCH.T5ENCODER: "t5encoder",
MODEL_ARCH.JAIS: "jais",
}
@@ -1035,6 +1038,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.ENC_FFN_UP,
MODEL_TENSOR.ENC_OUTPUT_NORM,
],
+ MODEL_ARCH.T5ENCODER: [
+ MODEL_TENSOR.TOKEN_EMBD,
+ MODEL_TENSOR.OUTPUT,
+ MODEL_TENSOR.ENC_ATTN_NORM,
+ MODEL_TENSOR.ENC_ATTN_Q,
+ MODEL_TENSOR.ENC_ATTN_K,
+ MODEL_TENSOR.ENC_ATTN_V,
+ MODEL_TENSOR.ENC_ATTN_OUT,
+ MODEL_TENSOR.ENC_ATTN_REL_B,
+ MODEL_TENSOR.ENC_FFN_NORM,
+ MODEL_TENSOR.ENC_FFN_GATE,
+ MODEL_TENSOR.ENC_FFN_DOWN,
+ MODEL_TENSOR.ENC_FFN_UP,
+ MODEL_TENSOR.ENC_OUTPUT_NORM,
+ ],
MODEL_ARCH.JAIS: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
@@ -1145,6 +1163,19 @@ class GGMLQuantizationType(IntEnum):
F64 = 28
IQ1_M = 29
BF16 = 30
+ Q4_0_4_4 = 31
+ Q4_0_4_8 = 32
+ Q4_0_8_8 = 33
+ IQ1_BN = 34,
+ IQ2_BN = 35,
+ Q8_K64 = 36,
+ IQ2_K = 37,
+ IQ3_K = 38,
+ IQ4_K = 39,
+ IQ5_K = 40,
+ IQ6_K = 41,
+ IQ2_TN = 42,
+
# TODO: add GGMLFileType from ggml_ftype in ggml.h
@@ -1157,7 +1188,7 @@ class LlamaFileType(IntEnum):
MOSTLY_F16 = 1 # except 1d tensors
MOSTLY_Q4_0 = 2 # except 1d tensors
MOSTLY_Q4_1 = 3 # except 1d tensors
- MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
+ # MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
# MOSTLY_Q4_2 = 5 # support has been removed
# MOSTLY_Q4_3 = 6 # support has been removed
MOSTLY_Q8_0 = 7 # except 1d tensors
@@ -1186,6 +1217,18 @@ class LlamaFileType(IntEnum):
MOSTLY_IQ4_XS = 30 # except 1d tensors
MOSTLY_IQ1_M = 31 # except 1d tensors
MOSTLY_BF16 = 32 # except 1d tensors
+ MOSTLY_Q4_0_4_4 = 33 # except 1d tensors
+ MOSTLY_Q4_0_4_8 = 34 # except 1d tensors
+ MOSTLY_Q4_0_8_8 = 35 # except 1d tensors
+ MOSTLY_IQ1_BN = 36, # except 1d tensors
+ MOSTLY_IQ2_BN = 37, # except 1d tensors
+ MOSTLY_IQ2_K = 38, # except 1d tensors
+ MOSTLY_IQ3_K = 39, # except 1d tensors
+ MOSTLY_IQ4_K = 40, # except 1d tensors
+ MOSTLY_IQ5_K = 41, # except 1d tensors
+ MOSTLY_IQ6_K = 42, # except 1d tensors
+ MOSTLY_IQ2_TN = 43, # except 1d tensors
+
GUESSED = 1024 # not specified in the model file
@@ -1259,6 +1302,9 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
GGMLQuantizationType.F64: (1, 8),
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
GGMLQuantizationType.BF16: (1, 2),
+ GGMLQuantizationType.Q4_0_4_4:(32, 2 + 16),
+ GGMLQuantizationType.Q4_0_4_8:(32, 2 + 16),
+ GGMLQuantizationType.Q4_0_8_8:(32, 2 + 16),
}
@@ -1327,3 +1373,4 @@ KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
+KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID