diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2025-06-23 15:50:24 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-06-23 15:50:24 +0200 |
commit | ddda4d9e64fa889389b784f28da6453f14137452 (patch) | |
tree | a5e58bf26d55e181c9ee10b74f328281dbe5df37 /ggml/src/iqk/iqk_mul_mat.cpp | |
parent | 4776dd280976784eb0abd743186cc30370104b78 (diff) |
Much faster prompt processing for I-quants (ARM_NEON) (#550)
* iq2_xxs
55.8 -> 167.5 t/s. iq2_xxs is at 93.7 t/s
* iq2_xs
46.4 -> 166.6 t/s. iq2_xs_r4 is at 72.3 t/s.
* iq2_s
42.8 t/s -> 166.8 t/s. iq2_s_r4 is at 71.1 t/s.
* iq3_xxs
51.8 t/s -> 165.6 t/s. iq3_xxs_r4 is at 84.6 t/s.
* iq3_s
46.0 t/s -> 162.0 t/s. iq3_s_r4 is at 79.4 t/s
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src/iqk/iqk_mul_mat.cpp')
-rw-r--r-- | ggml/src/iqk/iqk_mul_mat.cpp | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index 4d7083e2..47edcc98 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -271,6 +271,11 @@ struct MulMat { } #else switch (type) { + case GGML_TYPE_IQ2_XXS: return nrc_y >= 32 ? GGML_TYPE_Q8_K_R8 : type; + case GGML_TYPE_IQ2_XS : return nrc_y >= 32 ? GGML_TYPE_Q8_K_R8 : type; + case GGML_TYPE_IQ2_S : return nrc_y >= 32 ? GGML_TYPE_Q8_K_R8 : type; + case GGML_TYPE_IQ3_XXS: return nrc_y >= 32 ? GGML_TYPE_Q8_K_R8 : type; + case GGML_TYPE_IQ3_S : return nrc_y >= 32 ? GGML_TYPE_Q8_K_R8 : type; case GGML_TYPE_Q4_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; case GGML_TYPE_Q4_1 : return nrc_y >= 32 ? GGML_TYPE_Q8_1 : type; case GGML_TYPE_Q5_0 : return nrc_y >= 32 ? GGML_TYPE_Q8_0_R8 : type; |