diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2025-05-25 09:55:36 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-25 09:55:36 +0300 |
commit | 24c010b3916b5f1bb9d712d610d1fe9308ef7df4 (patch) | |
tree | 892902985664a19b75c89781987594391face168 | |
parent | c7ecd4e23acb42f1150abf0b118e0a2c7b8dc959 (diff) |
Add missing gguf-py constants (#458)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
-rw-r--r-- | gguf-py/gguf/constants.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 6819979f..ecb8234a 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -1290,6 +1290,10 @@ class GGMLQuantizationType(IntEnum): Q8_KR8 = 149 Q8_K128 = 150 Q8_KV = 151 + IQ5_KS = 152 + IQ2_KT = 153 + IQ3_KT = 154 + IQ4_KT = 155 Q4_0_R8 = 202 Q5_0_R4 = 206 Q8_0_R8 = 208 @@ -1315,6 +1319,7 @@ class GGMLQuantizationType(IntEnum): IQ4_K_R4 = 339 IQ5_K_R4 = 340 IQ4_KS_R4 = 344 + IQ5_KS_R4 = 352 Q8_KV_R8 = 398 Q8_K_R8 = 399 @@ -1368,6 +1373,10 @@ class LlamaFileType(IntEnum): MOSTLY_IQ2_KS = 138 #except 1d tensors MOSTLY_IQ4_KSS = 139 #except 1d tensors MOSTLY_Q8_KV = 140 #except 1d tensors + MOSTLY_IQ5_KS = 141 #except 1d tensors + MOSTLY_IQ2_KT = 142 #except 1d tensors + MOSTLY_IQ3_KT = 143 #except 1d tensors + MOSTLY_IQ4_KT = 144 #except 1d tensors MOSTLY_Q4_0_R8 = 202 #except 1d tensors MOSTLY_Q8_0_R8 = 207 #except 1d tensors MOSTLY_Q5_0_R4 = 208 #except 1d tensors @@ -1393,6 +1402,7 @@ class LlamaFileType(IntEnum): MOSTLY_IQ4_K_R4 = 332 #except 1d tensors MOSTLY_IQ5_K_R4 = 333 #except 1d tensors MOSTLY_IQ4_KS_R4 = 337 #except 1d tensors + MOSTLY_IQ5_KS_R4 = 341 #except 1d tensors MOSTLY_Q8_KV_R8 = 398 #except 1d tensors MOSTLY_Q8_K_R8 = 399 #except 1d tensors @@ -1495,6 +1505,10 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = { GGMLQuantizationType.Q8_KR8 : ( 256, 292), GGMLQuantizationType.Q8_K128 : ( 128, 140), GGMLQuantizationType.Q8_KV : ( 32, 32), + GGMLQuantizationType.IQ5_KS : ( 256, 168), + GGMLQuantizationType.IQ2_KT : ( 256, 68), + GGMLQuantizationType.IQ3_KT : ( 256, 100), + GGMLQuantizationType.IQ4_KT : ( 256, 128), GGMLQuantizationType.Q4_0_R8 : ( 32, 18), GGMLQuantizationType.Q5_0_R4 : ( 32, 22), GGMLQuantizationType.Q8_0_R8 : ( 32, 34), @@ -1520,6 +1534,7 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = { GGMLQuantizationType.IQ4_K_R4 : ( 256, 144), GGMLQuantizationType.IQ5_K_R4 : ( 256, 176), GGMLQuantizationType.IQ4_KS_R4 : ( 256, 136), + GGMLQuantizationType.IQ5_KS_R4 : ( 256, 168), GGMLQuantizationType.Q8_KV_R8 : ( 32, 32), GGMLQuantizationType.Q8_K_R8 : ( 256, 258), } |