diff options
-rw-r--r-- | ggml/src/iqk/iqk_gemm_iqk_quants.cpp | 17 | ||||
-rw-r--r-- | ggml/src/iqk/iqk_gemm_iquants.cpp | 14 | ||||
-rw-r--r-- | ggml/src/iqk/iqk_gemm_kquants.cpp | 14 | ||||
-rw-r--r-- | ggml/src/iqk/iqk_gemm_ktquants.cpp | 14 | ||||
-rw-r--r-- | ggml/src/iqk/iqk_gemm_legacy_quants.cpp | 15 |
5 files changed, 67 insertions, 7 deletions
diff --git a/ggml/src/iqk/iqk_gemm_iqk_quants.cpp b/ggml/src/iqk/iqk_gemm_iqk_quants.cpp index a01d7e4c..f4352ff1 100644 --- a/ggml/src/iqk/iqk_gemm_iqk_quants.cpp +++ b/ggml/src/iqk/iqk_gemm_iqk_quants.cpp @@ -3908,6 +3908,23 @@ void mul_mat_iq5_k_r4_q8_k(int n, const void * vx, size_t bx, const DataInfo& in } +bool iqk_convert_iqk_quants_q80_r8([[maybe_unused]] int type, int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, int nrc_x) { + if (n%QK_K != 0 || nrc_x%8 != 0) return false; + return false; + //switch (ggml_type(type)) { + // case GGML_TYPE_IQ2_KS : iqk_convert_iq2_ks_q8_k_r8(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ2_K : iqk_convert_iq2_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ3_K : iqk_convert_iq3_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ4_KS : iqk_convert_iq4_ks_q8_k_r8(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ4_K : iqk_convert_iq4_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ5_KS : iqk_convert_iq5_ks_q8_k_r8(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ5_K : iqk_convert_iq5_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ6_K : iqk_convert_iq6_k_q8_k_r8 (n, vx, bx, vy, nrc_x); break; + // default: return false; + //} + //return true; +} + bool iqk_set_kernels_iqk_quants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, [[maybe_unused]] mul_mat_t& func16) { if (ne00%QK_K != 0 || ggml_type(typeB) != GGML_TYPE_Q8_K) { diff --git a/ggml/src/iqk/iqk_gemm_iquants.cpp b/ggml/src/iqk/iqk_gemm_iquants.cpp index c8688dc6..44e73f9a 100644 --- a/ggml/src/iqk/iqk_gemm_iquants.cpp +++ b/ggml/src/iqk/iqk_gemm_iquants.cpp @@ -3350,6 +3350,20 @@ static void mul_mat_iq3_s_r4_q8_k(int n, const void * vx, size_t bx, const DataI } +bool iqk_convert_iquants_q80_r8([[maybe_unused]] int type, int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, int nrc_x) { + if (n%QK_K != 0 || nrc_x%8 != 0) return false; + return false; + //switch (ggml_type(type)) { + // case GGML_TYPE_IQ2_XXS: iqk_convert_iq2_xxs_q8_k_r8(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ2_XS : iqk_convert_iq2_xs_q8_k_r8 (n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ2_S : iqk_convert_iq2_s_q8_k_r8 (n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ3_XXS: iqk_convert_iq3_xxs_q8_k_r8(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ3_S : iqk_convert_iq3_s_q8_k_r8 (n, vx, bx, vy, nrc_x); break; + // default: return false; + //} + //return true; +} + bool iqk_set_kernels_iquants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, mul_mat_t& func16) { if (ne00%QK_K != 0 || ggml_type(typeB) != GGML_TYPE_Q8_K) { diff --git a/ggml/src/iqk/iqk_gemm_kquants.cpp b/ggml/src/iqk/iqk_gemm_kquants.cpp index 79e619b8..86f0b102 100644 --- a/ggml/src/iqk/iqk_gemm_kquants.cpp +++ b/ggml/src/iqk/iqk_gemm_kquants.cpp @@ -3704,6 +3704,20 @@ void mul_mat_q8_KV_r8_q8_KV(int n, const void * vx, size_t bx, const DataInfo& i } +bool iqk_convert_kquants_q8X_r8([[maybe_unused]] int type, [[maybe_unused]] int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, [[maybe_unused]] int nrc_x) { + return false; + //switch (ggml_type(type)) { + // case GGML_TYPE_Q2_K: iqk_convert_q2_k_q8_k_r8(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_Q3_K: iqk_convert_q3_k_q8_k_r8(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_Q4_K: iqk_convert_q4_k_q8_1_r8(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_Q5_K: iqk_convert_q5_k_q8_1_r8(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_Q6_K: iqk_convert_q6_k_q8_0_r8(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ4_XS: iqk_convert_iq4_xs_q8_k_r8(n, vx, bx, vy, nrc_x); break; + // default: return false; + //} + //return true; +} + bool iqk_set_kernels_kquants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, [[maybe_unused]] mul_mat_t& func16) { auto etypeA = ggml_type(typeA); diff --git a/ggml/src/iqk/iqk_gemm_ktquants.cpp b/ggml/src/iqk/iqk_gemm_ktquants.cpp index 7e895f27..e69e3561 100644 --- a/ggml/src/iqk/iqk_gemm_ktquants.cpp +++ b/ggml/src/iqk/iqk_gemm_ktquants.cpp @@ -1615,17 +1615,17 @@ struct Trellis3 { return result; } inline int8x16x2_t next32(const uint16_t * val, uint32_t v0) const { - auto vka3 = vdupq_n_u32(ka3), vkb3 = vdupq_n_u32(kb3); + auto vka3 = vdupq_n_u32(ka3); int8x16x2_t result = {vdupq_n_s8(-126), vdupq_n_s8(-126)}; int8x16x2_t i8; for (int i = 0; i < 2; ++i) { i8.val[0] = vmulq_u32(mka, vdupq_n_u32(val[2*i+0]+v0)); - i8.val[1] = vmlaq_u32(vkb3, vka3, i8.val[0]); + i8.val[1] = vmulq_u32(vka3, i8.val[0]); i8.val[0] = vandq_u32(i8.val[0], vdupq_n_u32(0x3f3f3f3f)); i8.val[1] = vandq_u32(i8.val[1], vdupq_n_u32(0x3f3f3f3f)); auto s1 = vpaddq_s8(vreinterpretq_s8_u32(i8.val[0]), vreinterpretq_s8_u32(i8.val[1])); i8.val[0] = vmulq_u32(mka, vdupq_n_u32(val[2*i+1]+v0)); - i8.val[1] = vmlaq_u32(vkb3, vka3, i8.val[0]); + i8.val[1] = vmulq_u32(vka3, i8.val[0]); i8.val[0] = vandq_u32(i8.val[0], vdupq_n_u32(0x3f3f3f3f)); i8.val[1] = vandq_u32(i8.val[1], vdupq_n_u32(0x3f3f3f3f)); auto s2 = vpaddq_s8(vreinterpretq_s8_u32(i8.val[0]), vreinterpretq_s8_u32(i8.val[1])); @@ -1634,11 +1634,11 @@ struct Trellis3 { return result; } inline int8x16x4_t next64(const uint32_t * val) const { - auto vka3 = vdupq_n_u32(ka3), vkb3 = vdupq_n_u32(kb3); + auto vka3 = vdupq_n_u32(ka3); int8x16x4_t result = {vdupq_n_s8(-126), vdupq_n_s8(-126), vdupq_n_s8(-126), vdupq_n_s8(-126)}; for (int i = 0; i < 2; ++i) { auto i8_1 = next8(val[4*i+0], val[4*i+1]); - int8x16x2_t i8_2{vmlaq_u32(vkb3, vka3, i8_1.val[0]), vmlaq_u32(vkb3, vka3, i8_1.val[1])}; + int8x16x2_t i8_2{vmulq_u32(vka3, i8_1.val[0]), vmulq_u32(vka3, i8_1.val[1])}; i8_1.val[0] = vandq_u32(i8_1.val[0], vdupq_n_u32(0x3f3f3f3f)); i8_1.val[1] = vandq_u32(i8_1.val[1], vdupq_n_u32(0x3f3f3f3f)); i8_2.val[0] = vandq_u32(i8_2.val[0], vdupq_n_u32(0x3f3f3f3f)); @@ -1646,8 +1646,8 @@ struct Trellis3 { auto s1_1 = vpaddq_s8(vreinterpretq_s8_u32(i8_1.val[0]), vreinterpretq_s8_u32(i8_1.val[1])); auto s1_2 = vpaddq_s8(vreinterpretq_s8_u32(i8_2.val[0]), vreinterpretq_s8_u32(i8_2.val[1])); i8_1 = next8(val[4*i+2], val[4*i+3]); - i8_2.val[0] = vmlaq_u32(vkb3, vka3, i8_1.val[0]); - i8_2.val[1] = vmlaq_u32(vkb3, vka3, i8_1.val[1]); + i8_2.val[0] = vmulq_u32(vka3, i8_1.val[0]); + i8_2.val[1] = vmulq_u32(vka3, i8_1.val[1]); i8_1.val[0] = vandq_u32(i8_1.val[0], vdupq_n_u32(0x3f3f3f3f)); i8_1.val[1] = vandq_u32(i8_1.val[1], vdupq_n_u32(0x3f3f3f3f)); i8_2.val[0] = vandq_u32(i8_2.val[0], vdupq_n_u32(0x3f3f3f3f)); diff --git a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp index 76f8db09..312c556e 100644 --- a/ggml/src/iqk/iqk_gemm_legacy_quants.cpp +++ b/ggml/src/iqk/iqk_gemm_legacy_quants.cpp @@ -2784,6 +2784,21 @@ void mul_mat_q8_0_r8_q8_0(int n, const void * vx, size_t bx, const DataInfo& inf } +bool iqk_convert_legacy_quants_q8_r8([[maybe_unused]] int type, [[maybe_unused]] int n, [[maybe_unused]] const void * vx, [[maybe_unused]] size_t bx, [[maybe_unused]] void * vy, [[maybe_unused]] int nrc_x) { + return false; + //switch (type) { + // case GGML_TYPE_Q4_0 : iqk_convert_qX_q80_r8<block_q4_0, Q4_0_Dequantizer>(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_Q4_1 : iqk_convert_qX_1_q8_1_r8<block_q4_1, Q4_1_Dequantizer>(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_Q5_0 : iqk_convert_qX_q80_r8<block_q5_0, Q5_0_Dequantizer>(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_Q5_1 : iqk_convert_qX_1_q8_1_r8<block_q5_1, Q5_1_Dequantizer<block_q5_1>>(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_Q6_0 : iqk_convert_qX_q80_r8<block_q6_0, Q6_0_Dequantizer>(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_IQ4_NL: iqk_convert_qX_q80_r8<block_iq4_nl, IQ4_NL0_Dequantizer>(n, vx, bx, vy, nrc_x); break; + // case GGML_TYPE_Q8_0 : iqk_convert_q80_q80_r8(n, vx, bx, vy, nrc_x); break; + // default: return false; + //} + //return true; +} + bool iqk_set_kernels_legacy_quants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, mul_mat_t& func16) { if (ne00%QK8_0 != 0) return false; |