diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-05-27 19:04:25 +0200 |
---|---|---|
committer | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-22 12:02:49 +0300 |
commit | 9ac9e928d55f4c3e557d0054e94945672ed8631b (patch) | |
tree | 3577864e88ec861699b004f233eb9577a4dd2fc3 | |
parent | 3f996d0c70b8ac5368f60ee3ebb1ac636ce8cf91 (diff) |
Add Q8_0
-rw-r--r-- | iqk_mul_mat.cpp | 28 |
1 files changed, 27 insertions, 1 deletions
diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp index c10ca438..cc4c9c30 100644 --- a/iqk_mul_mat.cpp +++ b/iqk_mul_mat.cpp @@ -2404,6 +2404,27 @@ struct DequantizerQ50 final : public BaseLegacyDequantizer<block_q5_0> { }; +struct DequantizerQ80 final : public BaseLegacyDequantizer<block_q8_0> { + + DequantizerQ80(const void * vx, size_t bx) : BaseLegacyDequantizer(vx, bx) {} + + inline void prepare1(int i) { + bits.b[0] = vld1q_s8(x[i].qs); + bits.b[1] = vld1q_s8(x[i].qs+16); + } + + inline float16x4_t new_block(int i) { + ggml_half aux[4]; + for (int k = 0; k < 4; ++k) { + aux[k] = x[4*i+k].d; + bits.b[2*k+0] = vld1q_s8(x[4*i+k].qs); + bits.b[2*k+1] = vld1q_s8(x[4*i+k].qs+16); + } + return vld1_f16((const float16_t *)aux); + } + +}; + struct DequantizerQ51 final : public BaseLegacyDequantizer<block_q5_1> { DequantizerQ51(const void * vx, size_t bx) : BaseLegacyDequantizer(vx, bx) {} @@ -2541,7 +2562,8 @@ static void mul_mat_qX_0_q8_0_1(int n, const void * vx, size_t bx, const DataInf } template <typename Dequantizer> void MulMat::set_functions(MulMat& m) { - if constexpr (std::is_same_v<Dequantizer, DequantizerQ40> || std::is_same_v<Dequantizer, DequantizerQ50>) { + if constexpr (std::is_same_v<Dequantizer, DequantizerQ40> || std::is_same_v<Dequantizer, DequantizerQ50> || + std::is_same_v<Dequantizer, DequantizerQ80>) { m.funcs[0] = mul_mat_qX_0_q8_0<Dequantizer, 1>; m.funcs[1] = mul_mat_qX_0_q8_0<Dequantizer, 2>; m.funcs[2] = mul_mat_qX_0_q8_0<Dequantizer, 3>; @@ -2614,6 +2636,10 @@ bool MulMat::set_mul_mat(int typeA, int ne00, MulMat& m, int& row_size_q8, int / MulMat::set_functions<DequantizerQ51>(m); row_size_q8 = ggml_row_size(GGML_TYPE_Q8_1, ne00); break; + case GGML_TYPE_Q8_0: + MulMat::set_functions<DequantizerQ80>(m); + row_size_q8 = ggml_row_size(GGML_TYPE_Q8_0, ne00); + break; default: return false; } |