summaryrefslogtreecommitdiff
path: root/ggml
diff options
context:
space:
mode:
Diffstat (limited to 'ggml')
-rw-r--r--ggml/src/ggml.c2
-rw-r--r--ggml/src/iqk/iqk_mul_mat.cpp49
2 files changed, 48 insertions, 3 deletions
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index ad9393cc..88013f74 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -1289,7 +1289,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.from_float_ref = (ggml_from_float_t)quantize_row_iq4_nl_ref,
.vec_dot = ggml_vec_dot_iq4_nl_q8_0,
#if GGML_USE_IQK_MULMAT
-#if defined __AVX2__
+#if defined HAVE_FANCY_SIMD
.vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp
index 45d804a4..e7ab2e5b 100644
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -1750,6 +1750,15 @@ __m256i inline load_iq4nl_values_256() {
return MM256_SET_M128I(val128, val128);
}
+__m128i inline load_iq4k_values_128() {
+ return _mm_loadu_si128((const __m128i *)iq4k_values);
+}
+
+__m256i inline load_iq4k_values_256() {
+ auto val128 = load_iq4k_values_128();
+ return MM256_SET_M128I(val128, val128);
+}
+
#ifdef HAVE_FANCY_SIMD
//====================================== Zen4 ==================================================
@@ -8519,7 +8528,11 @@ struct Q4_0_1_Dequantizer {
struct IQ4_NL_Dequantizer {
Dequantizer4bit b4;
+#ifdef HAVE_FANCY_SIMD
const __m256i values = load_iq4nl_values_256();
+#else
+ const __m256i values = load_iq4k_values_256();
+#endif
inline __m256i dequant(const block_iq4_nl * x) const {
return _mm256_shuffle_epi8(values, b4.dequant(x->qs));
}
@@ -8630,11 +8643,19 @@ struct Q4_0_1_Unpacker final : public Q_Unpacker<block_q4_0, ScaleHelperQ_0_1<8>
using Sum4T = Sum4TypeQ82;
inline static int block_size() { return QK4_0; }
};
+#ifdef HAVE_FANCY_SIMD
struct IQ4_NL_Unpacker final : public Q_Unpacker<block_iq4_nl, ScaleHelperQ_0_1<128>, IQ4_NL_Dequantizer> {
IQ4_NL_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {}
using Sum4T = Sum4TypeQ82;
inline static int block_size() { return QK4_NL; }
};
+#else
+struct IQ4_NL_Unpacker final : public Q_Unpacker<block_iq4_nl, ScaleHelperQ_0, IQ4_NL_Dequantizer> {
+ IQ4_NL_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {}
+ using Sum4T = Sum4TypeQ80;
+ inline static int block_size() { return QK4_NL; }
+};
+#endif
struct Q5_0_Unpacker final : public Q_Unpacker<block_q5_0, ScaleHelperQ_0, Q5_0_Dequantizer> {
Q5_0_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {}
using Sum4T = Sum4TypeQ80;
@@ -9155,9 +9176,29 @@ template <typename Dequantizer> void MulMat::set_functions(MulMat& m) {
m.funcs[6] = mul_mat_qX_1_q8_2_T<Dequantizer, 7>;
m.funcs[7] = mul_mat_qX_1_q8_2_T<Dequantizer, 8>;
}
+ else if constexpr (std::is_same_v<Dequantizer, IQ4_NL_Unpacker>) {
+#ifdef HAVE_FANCY_SIMD
+ m.funcs[0] = mul_mat_qX_1_q8_2_T<Dequantizer, 1>;
+ m.funcs[1] = mul_mat_qX_1_q8_2_T<Dequantizer, 2>;
+ m.funcs[2] = mul_mat_qX_1_q8_2_T<Dequantizer, 3>;
+ m.funcs[3] = mul_mat_qX_1_q8_2_T<Dequantizer, 4>;
+ m.funcs[4] = mul_mat_qX_1_q8_2_T<Dequantizer, 5>;
+ m.funcs[5] = mul_mat_qX_1_q8_2_T<Dequantizer, 6>;
+ m.funcs[6] = mul_mat_qX_1_q8_2_T<Dequantizer, 7>;
+ m.funcs[7] = mul_mat_qX_1_q8_2_T<Dequantizer, 8>;
+#else
+ m.funcs[0] = mul_mat_qX_0_q8_0_T<Dequantizer, 1>;
+ m.funcs[1] = mul_mat_qX_0_q8_0_T<Dequantizer, 2>;
+ m.funcs[2] = mul_mat_qX_0_q8_0_T<Dequantizer, 3>;
+ m.funcs[3] = mul_mat_qX_0_q8_0_T<Dequantizer, 4>;
+ m.funcs[4] = mul_mat_qX_0_q8_0_T<Dequantizer, 5>;
+ m.funcs[5] = mul_mat_qX_0_q8_0_T<Dequantizer, 6>;
+ m.funcs[6] = mul_mat_qX_0_q8_0_T<Dequantizer, 7>;
+ m.funcs[7] = mul_mat_qX_0_q8_0_T<Dequantizer, 8>;
+#endif
+ }
else if constexpr (std::is_same_v<Dequantizer, Q8_0_1_Unpacker> || std::is_same_v<Dequantizer, Q4_0_1_Unpacker> ||
- std::is_same_v<Dequantizer, Q5_0_1_Unpacker> || std::is_same_v<Dequantizer, IQ4_NL_Unpacker> ||
- std::is_same_v<Dequantizer, Q6_0_1_Unpacker>) {
+ std::is_same_v<Dequantizer, Q5_0_1_Unpacker> || std::is_same_v<Dequantizer, Q6_0_1_Unpacker>) {
m.funcs[0] = mul_mat_qX_1_q8_2_T<Dequantizer, 1>;
m.funcs[1] = mul_mat_qX_1_q8_2_T<Dequantizer, 2>;
m.funcs[2] = mul_mat_qX_1_q8_2_T<Dequantizer, 3>;
@@ -9476,7 +9517,11 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& mm, int Ny) {
case GGML_TYPE_IQ4_NL:
assert (ne00 % QK4_NL == 0);
MulMat::set_functions<IQ4_NL_Unpacker>(mm);
+#ifdef HAVE_FANCY_SIMD
expected_typeB = GGML_TYPE_Q8_2_X4;
+#else
+ expected_typeB = GGML_TYPE_Q8_0_X4;
+#endif
break;
case GGML_TYPE_IQ4_NL_R4:
assert (ne00 % QK4_NL == 0);