summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2024-02-01 18:30:17 +0100
committerGitHub <noreply@github.com>2024-02-01 18:30:17 +0100
commit8ca511cadee2c67f0bd8c7034a2513778ee9a1b7 (patch)
tree689adb7b7d0f0210a43175b9f896627cff175ec7
parentd71ac90985854b0905e1abba778e407e17f9f887 (diff)
cuda : fix LLAMA_CUDA_F16 (#5262)
-rw-r--r--ggml-cuda.cu6
1 files changed, 3 insertions, 3 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index e5659574..3242a0b4 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -8657,9 +8657,9 @@ static void ggml_cuda_op_dequantize_mul_mat_vec(
if (src1_convert_f16) {
src1_dfloat = src1_dfloat_a.alloc(ne00);
- ggml_cpy_f32_f16_cuda((const char *) src1_ddf_i, (char *) src1_dfloat, ne00,
- ne00, 1, sizeof(float), 0, 0,
- ne00, 1, sizeof(half), 0, 0, stream);
+ const to_fp16_cuda_t to_fp16_cuda = ggml_get_to_fp16_cuda(src1->type);
+ GGML_ASSERT(to_fp16_cuda != nullptr);
+ to_fp16_cuda(src1_ddf_i, src1_dfloat, ne00, stream);
}
#else
const dfloat * src1_dfloat = (const dfloat *) src1_ddf_i; // dfloat == float, no conversion