summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c26
1 files changed, 26 insertions, 0 deletions
diff --git a/ggml.c b/ggml.c
index 62f0f18e..adb38710 100644
--- a/ggml.c
+++ b/ggml.c
@@ -573,6 +573,17 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = ggml_vec_dot_q6_K_q8_K,
.vec_dot_type = GGML_TYPE_Q8_K,
},
+ [GGML_TYPE_IQ2_XXS] = {
+ .type_name = "iq2_xxs",
+ .blck_size = QK_K,
+ .type_size = sizeof(block_iq2_xxs),
+ .is_quantized = true,
+ .to_float = (ggml_to_float_t) dequantize_row_iq2_xxs,
+ .from_float = quantize_row_iq2_xxs,
+ .from_float_reference = (ggml_from_float_t) quantize_row_iq2_xxs_reference,
+ .vec_dot = ggml_vec_dot_iq2_xxs_q8_K,
+ .vec_dot_type = GGML_TYPE_Q8_K,
+ },
[GGML_TYPE_Q8_K] = {
.type_name = "q8_K",
.blck_size = QK_K,
@@ -2111,6 +2122,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
case GGML_FTYPE_MOSTLY_Q4_K: wtype = GGML_TYPE_Q4_K; break;
case GGML_FTYPE_MOSTLY_Q5_K: wtype = GGML_TYPE_Q5_K; break;
case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
+ case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
}
@@ -7436,6 +7448,7 @@ static void ggml_compute_forward_add(
case GGML_TYPE_Q4_K:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
+ case GGML_TYPE_IQ2_XXS:
{
ggml_compute_forward_add_q_f32(params, src0, src1, dst);
} break;
@@ -7700,6 +7713,7 @@ static void ggml_compute_forward_add1(
case GGML_TYPE_Q4_K:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
+ case GGML_TYPE_IQ2_XXS:
{
ggml_compute_forward_add1_q_f32(params, src0, src1, dst);
} break;
@@ -7814,6 +7828,7 @@ static void ggml_compute_forward_acc(
case GGML_TYPE_Q4_K:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
+ case GGML_TYPE_IQ2_XXS:
default:
{
GGML_ASSERT(false);
@@ -10455,6 +10470,7 @@ static void ggml_compute_forward_out_prod(
case GGML_TYPE_Q4_K:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
+ case GGML_TYPE_IQ2_XXS:
{
ggml_compute_forward_out_prod_q_f32(params, src0, src1, dst);
} break;
@@ -10629,6 +10645,7 @@ static void ggml_compute_forward_set(
case GGML_TYPE_Q4_K:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
+ case GGML_TYPE_IQ2_XXS:
default:
{
GGML_ASSERT(false);
@@ -10823,6 +10840,7 @@ static void ggml_compute_forward_get_rows(
case GGML_TYPE_Q4_K:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
+ case GGML_TYPE_IQ2_XXS:
{
ggml_compute_forward_get_rows_q(params, src0, src1, dst);
} break;
@@ -11459,6 +11477,7 @@ static void ggml_compute_forward_alibi(
case GGML_TYPE_Q4_K:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
+ case GGML_TYPE_IQ2_XXS:
case GGML_TYPE_Q8_K:
case GGML_TYPE_I8:
case GGML_TYPE_I16:
@@ -11533,6 +11552,7 @@ static void ggml_compute_forward_clamp(
case GGML_TYPE_Q4_K:
case GGML_TYPE_Q5_K:
case GGML_TYPE_Q6_K:
+ case GGML_TYPE_IQ2_XXS:
case GGML_TYPE_Q8_K:
case GGML_TYPE_I8:
case GGML_TYPE_I16:
@@ -18648,6 +18668,12 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
block_q6_K * block = (block_q6_K*)dst + start / QK_K;
result = ggml_quantize_q6_K(src + start, block, n, n, hist);
} break;
+ case GGML_TYPE_IQ2_XXS:
+ {
+ GGML_ASSERT(start % QK_K == 0);
+ block_iq2_xxs * block = (block_iq2_xxs*)dst + start / QK_K;
+ result = ggml_quantize_iq2_xxs(src + start, block, n, n, hist);
+ } break;
case GGML_TYPE_F16:
{
int elemsize = sizeof(ggml_fp16_t);