summaryrefslogtreecommitdiff
path: root/ggml/src/iqk
diff options
context:
space:
mode:
Diffstat (limited to 'ggml/src/iqk')
-rw-r--r--ggml/src/iqk/iqk_mul_mat.cpp69
-rw-r--r--ggml/src/iqk/iqk_quantize.cpp23
2 files changed, 44 insertions, 48 deletions
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp
index 7543d895..33b0a0d5 100644
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -140,8 +140,9 @@ bool iqk_mul_mat(long Nx, long Ny, long ne00,
return false;
}
- auto row_size_qx = strideA*ggml_type_size(ggml_type(typeA));
- auto row_size_qy = strideB*ggml_type_size(ggml_type(typeB));
+ size_t row_size_qx = strideA; //*ggml_type_size(ggml_type(typeA));
+ size_t row_size_qy = strideB; //*ggml_type_size(ggml_type(typeB));
+ //if (ith == 0) printf("%s: ne00 = %d, row_size_qx = %d, strideA = %d\n", __func__, int(ne00), int(row_size_qx), int(strideA));
auto nrc_x = (Nx + nth - 1)/nth;
auto first_x = ith*nrc_x;
@@ -165,8 +166,8 @@ bool iqk_mul_mat_moe(long Nx, long Ny, long ne00, int ne11,
if (!MulMat::prepare(typeA, typeB, ne00, mm, Ny)) {
return false;
}
- auto row_size_qx = strideA*ggml_type_size(ggml_type(typeA));
- auto row_size_qy = strideB*ggml_type_size(ggml_type(typeB));
+ size_t row_size_qx = strideA; //*ggml_type_size(ggml_type(typeA));
+ size_t row_size_qy = strideB; //*ggml_type_size(ggml_type(typeB));
int nrc_x = (Nx + nth - 1)/nth;
int first_x = ith*nrc_x;
if (first_x + nrc_x > Nx) nrc_x = Nx - first_x;
@@ -378,11 +379,17 @@ struct ScaleIQ4XS {
const __m128i m32 = _mm_set1_epi16(-32);
};
-template <typename Block>
+template <typename Block, bool per_row_scale = false>
struct BaseDequantizer {
BaseDequantizer(const void * vx, size_t bx) : vx(vx), bx(bx) {}
inline void new_row(int ix) {
- x = (const Block *)((const char *)vx + bx*ix);
+ if constexpr (per_row_scale) {
+ const float * dptr = (const float *)((const char *)vx + bx*ix);
+ d = *dptr;
+ x = (const Block *)(dptr + 1);
+ } else {
+ x = (const Block *)((const char *)vx + bx*ix);
+ }
}
const void * vx;
@@ -700,14 +707,13 @@ struct DequantizerQ2K final : public BaseDequantizer<block_q2_K> {
};
-struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn> {
+struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn, true> {
DequantizerIQ2TN(const void * vx, size_t bx) : BaseDequantizer(vx, bx) {}
template <typename Q8>
inline void new_block(int i, [[maybe_unused]] const Q8& q8, [[maybe_unused]] __m256 * accm, [[maybe_unused]] __m512i * scales) {
new_block(i);
}
inline void new_block(int i) {
- d = GGML_FP16_TO_FP32(x[i].d);
bits.prepare(x[i].qs);
}
Q2Bits bits;
@@ -1158,7 +1164,7 @@ static void mul_mat_iq2tn_q8_K_AVX512(int n, const void * vx, size_t bx, const D
deq1.new_block(i);
deq2.new_block(i);
- float d = 0.5f*(deq1.d + deq2.d); // The scale is supposed to be per per tensor, so we can use the same scale for both rows
+ //float d = 0.5f*(deq1.d + deq2.d); // The scale is supposed to be per per tensor, so we can use the same scale for both rows
for (int iy = 0; iy < nrc_y; ++iy) {
auto sumi_scales_256 = _mm256_madd_epi16(_mm256_set1_epi16(-1), q8.load_bsums(iy, i));
@@ -1176,7 +1182,7 @@ static void mul_mat_iq2tn_q8_K_AVX512(int n, const void * vx, size_t bx, const D
sumi_1 = _mm512_dpbusd_epi32(sumi_1, deq1.bits.values[3], q8q);
sumi_2 = _mm512_dpbusd_epi32(sumi_2, deq2.bits.values[3], q8q);
// The scale is supposed to be per per tensor, so we can use the same scale
- auto vd = _mm512_set1_ps(d*q8.scale(iy, i));
+ auto vd = _mm512_set1_ps(/*d* */q8.scale(iy, i));
accd[2*iy+0] = _mm512_fmadd_ps(vd, _mm512_cvtepi32_ps(sumi_1), accd[2*iy+0]);
accd[2*iy+1] = _mm512_fmadd_ps(vd, _mm512_cvtepi32_ps(sumi_2), accd[2*iy+1]);
// Leaving this here just in case ternary models start using per row scales
@@ -1187,8 +1193,8 @@ static void mul_mat_iq2tn_q8_K_AVX512(int n, const void * vx, size_t bx, const D
}
for (int iy = 0; iy < nrc_y; ++iy) {
- info.store(ix+0, iy, _mm512_reduce_add_ps(accd[2*iy+0]));
- info.store(ix+1, iy, _mm512_reduce_add_ps(accd[2*iy+1]));
+ info.store(ix+0, iy, deq1.d*_mm512_reduce_add_ps(accd[2*iy+0]));
+ info.store(ix+1, iy, deq2.d*_mm512_reduce_add_ps(accd[2*iy+1]));
}
}
@@ -4104,14 +4110,23 @@ struct Q2bits {
}
};
-template <typename block_q>
+template <typename block_q, bool has_row_scale = false>
struct BaseDequantizer {
BaseDequantizer(const void * vx, size_t bx, int nrc) : vx(vx), x(nullptr), bx(bx), nrc(nrc) {}
- inline void new_row(int ix) { x = (const block_q *)((const char *)vx + ix*bx); }
+ inline void new_row(int ix) {
+ if constexpr (has_row_scale) {
+ const float * dptr = (const float *)((const char *)vx + ix*bx);
+ d = *dptr;
+ x = (const block_q *)(dptr + 1);
+ } else {
+ x = (const block_q *)((const char *)vx + ix*bx);
+ }
+ }
const void * vx;
const block_q * x;
const size_t bx;
const int nrc;
+ float d;
};
struct DequantizerQ4K final : public BaseDequantizer<block_q4_K> {
@@ -4133,7 +4148,6 @@ struct DequantizerQ4K final : public BaseDequantizer<block_q4_K> {
Q4bits bits;
Scales8 s8;
- float d;
};
struct HighBit5 {
@@ -4202,7 +4216,6 @@ struct DequantizerQ5K final : public BaseDequantizer<block_q5_K> {
uint8x16x2_t hbits;
- float d;
};
inline int32x4x4_t make_wider(const int16x8x2_t& scales16) {
@@ -4256,7 +4269,6 @@ struct DequantizerQ6K final : public BaseDequantizer<block_q6_K> {
const uint8x16_t mhb = vdupq_n_u8(0x30);
- float d;
};
struct DequantizerQ3K final : public BaseDequantizer<block_q3_K> {
@@ -4317,7 +4329,6 @@ struct DequantizerQ3K final : public BaseDequantizer<block_q3_K> {
uint8x16_t mask;
HighBit3 h;
- float d;
};
struct DequantizerQ2K final : public BaseDequantizer<block_q2_K> {
@@ -4389,7 +4400,6 @@ struct DequantizerQ2K final : public BaseDequantizer<block_q2_K> {
Q2bits bits;
- float d;
};
// ============================= i-quants
@@ -4453,7 +4463,6 @@ struct DequantizerIQ4K final : public BaseDequantizer<block_iq4_k> {
const int8x16_t values;
const uint8x16_t hshuff = vreinterpretq_u8_u32(uint32x4_t{0x09010800, 0x0b030a02, 0x0d050c04, 0x0f070e06});
- float d;
};
struct DequantizerIQ5K final : public BaseDequantizer<block_iq5_k> {
@@ -4503,7 +4512,6 @@ struct DequantizerIQ5K final : public BaseDequantizer<block_iq5_k> {
const uint8x16_t hm = vdupq_n_u8(0x10);
uint8x16x2_t hbits;
- float d;
};
struct DequantizerIQ6K final : public BaseDequantizer<block_iq6_k> {
@@ -4538,7 +4546,6 @@ struct DequantizerIQ6K final : public BaseDequantizer<block_iq6_k> {
const int8x16x4_t values;
const uint8x16_t hm = vdupq_n_u8(0x30);
- float d;
};
struct DequantizerIQ2K final : public BaseDequantizer<block_iq2_k> {
@@ -4570,7 +4577,6 @@ struct DequantizerIQ2K final : public BaseDequantizer<block_iq2_k> {
const int8x16_t values = vreinterpretq_s8_u64(vdupq_n_u64(0x000000001101f3e1));
const uint8x16_t hshuff = vreinterpretq_u8_u32(uint32x4_t{0x09010800, 0x0b030a02, 0x0d050c04, 0x0f070e06});
- float d;
};
struct DequantizerIQ3K final : public BaseDequantizer<block_iq3_k> {
@@ -4630,7 +4636,6 @@ struct DequantizerIQ3K final : public BaseDequantizer<block_iq3_k> {
const uint8x16_t sign_mask = vreinterpretq_u8_u64(uint64x2_t{0x0808040402020101, 0x8080404020201010});
const uint8x16_t sign_shuffle = load_sign_shuffle();
- float d;
};
struct DequantizerIQ4XS final : public BaseDequantizer<block_iq4_xs> {
@@ -4688,7 +4693,6 @@ struct DequantizerIQ4XS final : public BaseDequantizer<block_iq4_xs> {
constexpr static uint32x2_t hshuff = {0x05010400, 0x07030602};
- float d;
};
struct SimpleBits {
@@ -4747,7 +4751,6 @@ struct DequantizerIQ2XXS final : public BaseDequantizer<block_iq2_xxs> {
uint32x4x4_t data;
SimpleBits bits;
- float d;
};
inline int32x4x4_t prepare_4bit_scales16(const uint8_t * sc) {
@@ -4793,7 +4796,6 @@ struct DequantizerIQ2XS final : public BaseDequantizer<block_iq2_xs> {
SimpleBits bits;
- float d;
};
@@ -4857,7 +4859,6 @@ struct DequantizerIQ2S final : public BaseDequantizer<block_iq2_s> {
SimpleBits bits;
SignHelper sh;
- float d;
};
@@ -4891,8 +4892,6 @@ struct DequantizerIQ3XXS final : public BaseDequantizer<block_iq3_xxs> {
SimpleBits bits;
uint32x4x2_t gas;
- float d;
-
};
struct DequantizerIQ3S final : public BaseDequantizer<block_iq3_s> {
@@ -4951,11 +4950,9 @@ struct DequantizerIQ3S final : public BaseDequantizer<block_iq3_s> {
SignHelper sh;
uint32x4x2_t gas;
- float d;
-
};
-struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn> {
+struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn, true> {
DequantizerIQ2TN(const void * vx, size_t bx, int nrc) : BaseDequantizer(vx, bx, nrc) {}
constexpr static int num_blocks() { return 16; }
@@ -4966,9 +4963,7 @@ struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn> {
// d = GGML_FP16_TO_FP32(x[i].d);
//}
- inline void new_block(int i) {
- d = GGML_FP16_TO_FP32(x[i].d);
- }
+ inline void new_block(int) { }
template <typename Q8>
inline void compute(const Q8& q8, int i, int j, int32x4_t * sumi) {
@@ -5019,8 +5014,6 @@ struct DequantizerIQ2TN final : public BaseDequantizer<block_iq2_tn> {
}
Q2bits bits;
-
- float d;
};
template <int nrc_y>
diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp
index 42441584..28bad18e 100644
--- a/ggml/src/iqk/iqk_quantize.cpp
+++ b/ggml/src/iqk/iqk_quantize.cpp
@@ -1972,15 +1972,15 @@ void quantize_row_iq2_tn_ref(const float * x, block_iq2_tn * y, int64_t k) {
auto quantize = [] (float xmax, float x) {
return x < -0.5f*xmax ? 0 : x < 0.5f*xmax ? 1 : 2;
};
+ int n = k;
+ float max = x[0];
+ for (int j = 1; j < n; ++j) max = std::max(max, fabsf(x[j]));
+
+ *(float *)y = max;
+ y = (block_iq2_tn *)((float *)y + 1);
for (int ibl = 0; ibl < nb; ++ibl) {
auto xb = x + QK_K*ibl;
- float max = xb[0];
- for (int j = 0; j < QK_K; ++j) {
- float ax = fabsf(xb[j]);
- max = std::max(ax, max);
- }
- y[ibl].d = GGML_FP32_TO_FP16(max);
auto qs = y[ibl].qs;
for (int l = 0; l < QK_K/128; ++l) {
for (int j = 0; j < 32; ++j) {
@@ -1992,7 +1992,7 @@ void quantize_row_iq2_tn_ref(const float * x, block_iq2_tn * y, int64_t k) {
}
}
-void quantize_row_iq2_tn(const float * x, void * y, int64_t k) {
+void quantize_row_iq2_tn(const float * x, void * y, int64_t k) {
quantize_row_iq2_tn_ref(x, (block_iq2_tn *)y, k);
}
@@ -2009,9 +2009,11 @@ size_t quantize_iq2_tn(const float * src, void * dst, int64_t nrows, int64_t n_p
void dequantize_row_iq2_tn(const block_iq2_tn * x, float * y, int64_t k) {
GGML_ASSERT(k%QK_K == 0);
+ const float * dptr = (const float *)x;
+ float d = *dptr;
+ x = (const block_iq2_tn *)(dptr + 1);
int nb = k/QK_K;
for (int ibl = 0; ibl < nb; ++ibl) {
- float d = GGML_FP16_TO_FP32(x[ibl].d);
auto qs = x[ibl].qs;
for (int l = 0; l < QK_K/128; ++l) {
for (int j = 0; j < 32; ++j) {
@@ -2039,13 +2041,14 @@ void vec_dot_iq2_tn_q8_k(int n, float * s, size_t bs, const void * vx, size_t
const int nb = n / QK_K;
- const block_iq2_tn * x = (const block_iq2_tn *)vx;
+ const float * dptr = (const float *)vx;
+ const float d = *dptr;
+ const block_iq2_tn * x = (const block_iq2_tn *)(dptr + 1);
const block_q8_K * y = (const block_q8_K *)vy;
float sumf = 0;
for (int i = 0; i < nb; i++) {
- float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d;
auto qs = x[i].qs;
auto q8 = y[i].qs;
int sumi1 = 0, sumi2 = 0, sumi3 = 0,sumi4 = 0;