From 22c84a126f50146a851641ccaa6e8a24f0985d79 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Wed, 19 Mar 2025 15:47:24 +0100 Subject: Fix ggml_compute_forward_dup_q (#269) I broke it with PR #265. I was testing with a model where the wk_b and wk_v tensors were present, so didn't need to be computed, so didn't notice that the change I made to ggml_compute_forward_dup_q breaks that computation. Co-authored-by: Iwan Kawrakow --- ggml/src/ggml.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'ggml/src/ggml.c') diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 1552d91b..faf1902d 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -10576,6 +10576,11 @@ static void ggml_compute_forward_dup_q( if (dst->type == GGML_TYPE_Q8_0 && dst->src[0]->type == GGML_TYPE_Q8_0 && ggml_are_same_shape(dst, dst->src[0])) { + if (dst->src[0]->nb[0] == sizeof(block_q8_0) && dst->nb[0] == sizeof(block_q8_0)) { + ggml_compute_forward_dup_bytes(params, dst); + return; + } + // we assume src is transposed and that's why we are here GGML_ASSERT(dst->ne[0] % QK8_0 == 0); -- cgit v1.2.3