diff options
Diffstat (limited to 'ggml/src/iqk')
| -rw-r--r-- | ggml/src/iqk/iqk_mul_mat.cpp | 69 | ||||
| -rw-r--r-- | ggml/src/iqk/iqk_quantize.cpp | 23 |
2 files changed, 44 insertions, 48 deletions
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index 7543d895..33b0a0d5 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -140,8 +140,9 @@ bool iqk_mul_mat(long Nx, long Ny, long ne00, return false; } - auto row_size_qx = strideA*ggml_type_size(ggml_type(typeA)); - auto row_size_qy = strideB*ggml_type_size(ggml_type(typeB)); + size_t row_size_qx = strideA; //*ggml_type_size(ggml_type(typeA)); + size_t row_size_qy = strideB; //*ggml_type_size(ggml_type(typeB)); + //if (ith == 0) printf("%s: ne00 = %d, row_size_qx = %d, strideA = %d\n", __func__, int(ne00), int(row_size_qx), int(strideA)); auto nrc_x = (Nx + nth - 1)/nth; auto first_x = ith*nrc_x; @@ -165,8 +166,8 @@ bool iqk_mul_mat_moe(long Nx, long Ny, long ne00, int ne11, if (!MulMat::prepare(typeA, typeB, ne00, mm, Ny)) { return false; } - auto row_size_qx = strideA*ggml_type_size(ggml_type(typeA)); - auto row_size_qy = strideB*ggml_type_size(ggml_type(typeB)); + size_t row_size_qx = strideA; //*ggml_type_size(ggml_type(typeA)); + size_t row_size_qy = strideB; //*ggml_type_size(ggml_type(typeB)); int nrc_x = (Nx + nth - 1)/nth; int first_x = ith*nrc_x; if (first_x + nrc_x > Nx) nrc_x = Nx - first_x; @@ -378,11 +379,17 @@ struct ScaleIQ4XS { const __m128i m32 = _mm_set1_epi16(-32); }; -template <typename Block> +template <typename Block, bool per_row_scale = false> struct BaseDequantizer { BaseDequantizer(const void * vx, size_t bx) : vx(vx), bx(bx) {} inline void new_row(int ix) { - x = (const Block *)((const char *)vx + bx*ix); + if constexpr (per_row_scale) { + const float * dptr = (const float *)((const char *)vx + bx*ix); + d = *dptr; + x = (const Block *)(dptr + 1); + } else { + x = (const Block *)((const char *)vx + bx*ix); + } } const void * vx; @@ -700,14 +707,13 @@ struct DequantizerQ2K final : public BaseDequantizer<block_q2_K> { }; -struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn> { +struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn, true> { DequantizerIQ2TN(const void * vx, size_t bx) : BaseDequantizer(vx, bx) {} template <typename Q8> inline void new_block(int i, [[maybe_unused]] const Q8& q8, [[maybe_unused]] __m256 * accm, [[maybe_unused]] __m512i * scales) { new_block(i); } inline void new_block(int i) { - d = GGML_FP16_TO_FP32(x[i].d); bits.prepare(x[i].qs); } Q2Bits bits; @@ -1158,7 +1164,7 @@ static void mul_mat_iq2tn_q8_K_AVX512(int n, const void * vx, size_t bx, const D deq1.new_block(i); deq2.new_block(i); - float d = 0.5f*(deq1.d + deq2.d); // The scale is supposed to be per per tensor, so we can use the same scale for both rows + //float d = 0.5f*(deq1.d + deq2.d); // The scale is supposed to be per per tensor, so we can use the same scale for both rows for (int iy = 0; iy < nrc_y; ++iy) { auto sumi_scales_256 = _mm256_madd_epi16(_mm256_set1_epi16(-1), q8.load_bsums(iy, i)); @@ -1176,7 +1182,7 @@ static void mul_mat_iq2tn_q8_K_AVX512(int n, const void * vx, size_t bx, const D sumi_1 = _mm512_dpbusd_epi32(sumi_1, deq1.bits.values[3], q8q); sumi_2 = _mm512_dpbusd_epi32(sumi_2, deq2.bits.values[3], q8q); // The scale is supposed to be per per tensor, so we can use the same scale - auto vd = _mm512_set1_ps(d*q8.scale(iy, i)); + auto vd = _mm512_set1_ps(/*d* */q8.scale(iy, i)); accd[2*iy+0] = _mm512_fmadd_ps(vd, _mm512_cvtepi32_ps(sumi_1), accd[2*iy+0]); accd[2*iy+1] = _mm512_fmadd_ps(vd, _mm512_cvtepi32_ps(sumi_2), accd[2*iy+1]); // Leaving this here just in case ternary models start using per row scales @@ -1187,8 +1193,8 @@ static void mul_mat_iq2tn_q8_K_AVX512(int n, const void * vx, size_t bx, const D } for (int iy = 0; iy < nrc_y; ++iy) { - info.store(ix+0, iy, _mm512_reduce_add_ps(accd[2*iy+0])); - info.store(ix+1, iy, _mm512_reduce_add_ps(accd[2*iy+1])); + info.store(ix+0, iy, deq1.d*_mm512_reduce_add_ps(accd[2*iy+0])); + info.store(ix+1, iy, deq2.d*_mm512_reduce_add_ps(accd[2*iy+1])); } } @@ -4104,14 +4110,23 @@ struct Q2bits { } }; -template <typename block_q> +template <typename block_q, bool has_row_scale = false> struct BaseDequantizer { BaseDequantizer(const void * vx, size_t bx, int nrc) : vx(vx), x(nullptr), bx(bx), nrc(nrc) {} - inline void new_row(int ix) { x = (const block_q *)((const char *)vx + ix*bx); } + inline void new_row(int ix) { + if constexpr (has_row_scale) { + const float * dptr = (const float *)((const char *)vx + ix*bx); + d = *dptr; + x = (const block_q *)(dptr + 1); + } else { + x = (const block_q *)((const char *)vx + ix*bx); + } + } const void * vx; const block_q * x; const size_t bx; const int nrc; + float d; }; struct DequantizerQ4K final : public BaseDequantizer<block_q4_K> { @@ -4133,7 +4148,6 @@ struct DequantizerQ4K final : public BaseDequantizer<block_q4_K> { Q4bits bits; Scales8 s8; - float d; }; struct HighBit5 { @@ -4202,7 +4216,6 @@ struct DequantizerQ5K final : public BaseDequantizer<block_q5_K> { uint8x16x2_t hbits; - float d; }; inline int32x4x4_t make_wider(const int16x8x2_t& scales16) { @@ -4256,7 +4269,6 @@ struct DequantizerQ6K final : public BaseDequantizer<block_q6_K> { const uint8x16_t mhb = vdupq_n_u8(0x30); - float d; }; struct DequantizerQ3K final : public BaseDequantizer<block_q3_K> { @@ -4317,7 +4329,6 @@ struct DequantizerQ3K final : public BaseDequantizer<block_q3_K> { uint8x16_t mask; HighBit3 h; - float d; }; struct DequantizerQ2K final : public BaseDequantizer<block_q2_K> { @@ -4389,7 +4400,6 @@ struct DequantizerQ2K final : public BaseDequantizer<block_q2_K> { Q2bits bits; - float d; }; // ============================= i-quants @@ -4453,7 +4463,6 @@ struct DequantizerIQ4K final : public BaseDequantizer<block_iq4_k> { const int8x16_t values; const uint8x16_t hshuff = vreinterpretq_u8_u32(uint32x4_t{0x09010800, 0x0b030a02, 0x0d050c04, 0x0f070e06}); - float d; }; struct DequantizerIQ5K final : public BaseDequantizer<block_iq5_k> { @@ -4503,7 +4512,6 @@ struct DequantizerIQ5K final : public BaseDequantizer<block_iq5_k> { const uint8x16_t hm = vdupq_n_u8(0x10); uint8x16x2_t hbits; - float d; }; struct DequantizerIQ6K final : public BaseDequantizer<block_iq6_k> { @@ -4538,7 +4546,6 @@ struct DequantizerIQ6K final : public BaseDequantizer<block_iq6_k> { const int8x16x4_t values; const uint8x16_t hm = vdupq_n_u8(0x30); - float d; }; struct DequantizerIQ2K final : public BaseDequantizer<block_iq2_k> { @@ -4570,7 +4577,6 @@ struct DequantizerIQ2K final : public BaseDequantizer<block_iq2_k> { const int8x16_t values = vreinterpretq_s8_u64(vdupq_n_u64(0x000000001101f3e1)); const uint8x16_t hshuff = vreinterpretq_u8_u32(uint32x4_t{0x09010800, 0x0b030a02, 0x0d050c04, 0x0f070e06}); - float d; }; struct DequantizerIQ3K final : public BaseDequantizer<block_iq3_k> { @@ -4630,7 +4636,6 @@ struct DequantizerIQ3K final : public BaseDequantizer<block_iq3_k> { const uint8x16_t sign_mask = vreinterpretq_u8_u64(uint64x2_t{0x0808040402020101, 0x8080404020201010}); const uint8x16_t sign_shuffle = load_sign_shuffle(); - float d; }; struct DequantizerIQ4XS final : public BaseDequantizer<block_iq4_xs> { @@ -4688,7 +4693,6 @@ struct DequantizerIQ4XS final : public BaseDequantizer<block_iq4_xs> { constexpr static uint32x2_t hshuff = {0x05010400, 0x07030602}; - float d; }; struct SimpleBits { @@ -4747,7 +4751,6 @@ struct DequantizerIQ2XXS final : public BaseDequantizer<block_iq2_xxs> { uint32x4x4_t data; SimpleBits bits; - float d; }; inline int32x4x4_t prepare_4bit_scales16(const uint8_t * sc) { @@ -4793,7 +4796,6 @@ struct DequantizerIQ2XS final : public BaseDequantizer<block_iq2_xs> { SimpleBits bits; - float d; }; @@ -4857,7 +4859,6 @@ struct DequantizerIQ2S final : public BaseDequantizer<block_iq2_s> { SimpleBits bits; SignHelper sh; - float d; }; @@ -4891,8 +4892,6 @@ struct DequantizerIQ3XXS final : public BaseDequantizer<block_iq3_xxs> { SimpleBits bits; uint32x4x2_t gas; - float d; - }; struct DequantizerIQ3S final : public BaseDequantizer<block_iq3_s> { @@ -4951,11 +4950,9 @@ struct DequantizerIQ3S final : public BaseDequantizer<block_iq3_s> { SignHelper sh; uint32x4x2_t gas; - float d; - }; -struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn> { +struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn, true> { DequantizerIQ2TN(const void * vx, size_t bx, int nrc) : BaseDequantizer(vx, bx, nrc) {} constexpr static int num_blocks() { return 16; } @@ -4966,9 +4963,7 @@ struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn> { // d = GGML_FP16_TO_FP32(x[i].d); //} - inline void new_block(int i) { - d = GGML_FP16_TO_FP32(x[i].d); - } + inline void new_block(int) { } template <typename Q8> inline void compute(const Q8& q8, int i, int j, int32x4_t * sumi) { @@ -5019,8 +5014,6 @@ struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn> { } Q2bits bits; - - float d; }; template <int nrc_y> diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp index 42441584..28bad18e 100644 --- a/ggml/src/iqk/iqk_quantize.cpp +++ b/ggml/src/iqk/iqk_quantize.cpp @@ -1972,15 +1972,15 @@ void quantize_row_iq2_tn_ref(const float * x, block_iq2_tn * y, int64_t k) { auto quantize = [] (float xmax, float x) { return x < -0.5f*xmax ? 0 : x < 0.5f*xmax ? 1 : 2; }; + int n = k; + float max = x[0]; + for (int j = 1; j < n; ++j) max = std::max(max, fabsf(x[j])); + + *(float *)y = max; + y = (block_iq2_tn *)((float *)y + 1); for (int ibl = 0; ibl < nb; ++ibl) { auto xb = x + QK_K*ibl; - float max = xb[0]; - for (int j = 0; j < QK_K; ++j) { - float ax = fabsf(xb[j]); - max = std::max(ax, max); - } - y[ibl].d = GGML_FP32_TO_FP16(max); auto qs = y[ibl].qs; for (int l = 0; l < QK_K/128; ++l) { for (int j = 0; j < 32; ++j) { @@ -1992,7 +1992,7 @@ void quantize_row_iq2_tn_ref(const float * x, block_iq2_tn * y, int64_t k) { } } -void quantize_row_iq2_tn(const float * x, void * y, int64_t k) { +void quantize_row_iq2_tn(const float * x, void * y, int64_t k) { quantize_row_iq2_tn_ref(x, (block_iq2_tn *)y, k); } @@ -2009,9 +2009,11 @@ size_t quantize_iq2_tn(const float * src, void * dst, int64_t nrows, int64_t n_p void dequantize_row_iq2_tn(const block_iq2_tn * x, float * y, int64_t k) { GGML_ASSERT(k%QK_K == 0); + const float * dptr = (const float *)x; + float d = *dptr; + x = (const block_iq2_tn *)(dptr + 1); int nb = k/QK_K; for (int ibl = 0; ibl < nb; ++ibl) { - float d = GGML_FP16_TO_FP32(x[ibl].d); auto qs = x[ibl].qs; for (int l = 0; l < QK_K/128; ++l) { for (int j = 0; j < 32; ++j) { @@ -2039,13 +2041,14 @@ void vec_dot_iq2_tn_q8_k(int n, float * s, size_t bs, const void * vx, size_t const int nb = n / QK_K; - const block_iq2_tn * x = (const block_iq2_tn *)vx; + const float * dptr = (const float *)vx; + const float d = *dptr; + const block_iq2_tn * x = (const block_iq2_tn *)(dptr + 1); const block_q8_K * y = (const block_q8_K *)vy; float sumf = 0; for (int i = 0; i < nb; i++) { - float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; auto qs = x[i].qs; auto q8 = y[i].qs; int sumi1 = 0, sumi2 = 0, sumi3 = 0,sumi4 = 0; |
