summaryrefslogtreecommitdiff
path: root/ggml/src/iqk/iqk_quantize.cpp
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2024-12-15 09:54:21 +0100
committerGitHub <noreply@github.com>2024-12-15 09:54:21 +0100
commit85c5a1a99569ccc00c280835fe3a69b4af02c43b (patch)
treeda421487d5ddd0467b2bfd6cbbfb2666406c46f1 /ggml/src/iqk/iqk_quantize.cpp
parent20758edcae65213b2f575b6d23dfea67ad9dd0e0 (diff)
BF16_R16 - 16 interleaved bf16 rows (#142)
* Not working bf16_r4 * Adding bf16_r8 Small performance gain compared to bf16 - 258 t/s vs 234 t/s. I guess, this is still sub-obtimal. * bf16_rx: Very slightly faster by interleaving 16 rows 258 t/s -> 263 t/s * Rename bf16_r4 to bf16_r16 We are interleaving 16 rows now. * Cleanup unused stuff --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src/iqk/iqk_quantize.cpp')
-rw-r--r--ggml/src/iqk/iqk_quantize.cpp38
1 files changed, 37 insertions, 1 deletions
diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp
index de8c0d99..abe81858 100644
--- a/ggml/src/iqk/iqk_quantize.cpp
+++ b/ggml/src/iqk/iqk_quantize.cpp
@@ -4708,7 +4708,7 @@ static void repack_q8_k(int nrows, int n_per_row, const block_q8_K * x, block_q8
}
}
}
- x += 4*nblock;
+ x += 8*nblock;
y += nblock;
}
}
@@ -4759,3 +4759,39 @@ void vec_dot_q8_k_r8_q8_k(int n, float * s, size_t bs, const void * vx, size_t b
GGML_UNUSED(by);
}
+//
+// ========================================= bf16_r4
+//
+namespace {
+inline ggml_bf16_t to_bf16(const float& x) {
+ union { float f; uint32_t u; } helper;
+ helper.f = x;
+ return ggml_bf16_t{(uint16_t)(helper.u >> 16)};
+}
+inline ggml_bf16_t to_bf16(const ggml_bf16_t& x) { return x; }
+template <typename T>
+void repack_bf16(int nrows, int n_per_row, const T * x, ggml_bf16_t * y) {
+ GGML_ASSERT(nrows%16 == 0);
+ GGML_ASSERT(n_per_row%2 == 0);
+ for (int row = 0; row < nrows; row += 16) {
+ for (int k = 0; k < 16; ++k) {
+ auto x8 = x + k*n_per_row;
+ for (int ib = 0; ib < n_per_row/2; ++ib) {
+ y[32*ib + 2*k + 0] = to_bf16(x8[2*ib+0]);
+ y[32*ib + 2*k + 1] = to_bf16(x8[2*ib+1]);
+ }
+ }
+ x += 16*n_per_row;
+ y += 16*n_per_row;
+ }
+}
+}
+
+void repack_f32_bf16_r16(const void * src, void * dst, int64_t nrows, int64_t n_per_row) {
+ repack_bf16(nrows, n_per_row, (const float *)src, (ggml_bf16_t *)dst);
+}
+
+void repack_bf16_bf16_r16(const void * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrows, int64_t n_per_row) {
+ repack_bf16(nrows, n_per_row, (const ggml_bf16_t *)src, (ggml_bf16_t *)dst);
+}
+