Fix f16_sycl cpy call from Arc (#5411)

* fix f16_sycl cpy call * rm old logic * add fp16 build CI * use macro * format fix
author: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> 2024-02-08 22:39:10 +0530
committer: GitHub <noreply@github.com> 2024-02-08 22:39:10 +0530
commit: 6e99f2a04f1871d637dd77eb4d81de31a5510253 (patch)
tree: ef11166429d2b6bb6163ec07bdd58047db78b40b /ggml-sycl.cpp
parent: ff4ff05c5ff4311c05a8ce1f984c7d8def4f07a5 (diff)
1 files changed, 5 insertions, 3 deletions
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index a03df4c6..dd562a89 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -12148,7 +12148,8 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
     const int64_t src1_ncols, const int64_t src1_padded_row_size,
     const dpct::queue_ptr &stream) {
 
-    const int64_t ne00 = src0->ne[0];
+    GGML_TENSOR_BINARY_OP_LOCALS
+
     const int64_t row_diff = row_high - row_low;
 
     // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics
@@ -12167,8 +12168,9 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
         } else {
             src1_dfloat = src1_dfloat_a.alloc(ne00);
             ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
-                                  ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1,
-                                  sizeof(sycl::half), 0, 0, stream);
+                                  ne00, ne00, ne01, ne02, nb00, nb01, nb02,
+                                  nb03, ne10, ne11, ne12, nb10, nb11, nb12,
+                                  nb13, stream);
         }
     }
 #else
author	Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>	2024-02-08 22:39:10 +0530
committer	GitHub <noreply@github.com>	2024-02-08 22:39:10 +0530
commit	6e99f2a04f1871d637dd77eb4d81de31a5510253 (patch)
tree	ef11166429d2b6bb6163ec07bdd58047db78b40b /ggml-sycl.cpp
parent	ff4ff05c5ff4311c05a8ce1f984c7d8def4f07a5 (diff)