summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/build.yml41
-rw-r--r--ggml-sycl.cpp8
2 files changed, 46 insertions, 3 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f4c374ce..ed292d6b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -184,6 +184,47 @@ jobs:
cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
cmake --build . --config Release -j $(nproc)
+ ubuntu-22-cmake-sycl-fp16:
+ runs-on: ubuntu-22.04
+
+ continue-on-error: true
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: add oneAPI to apt
+ shell: bash
+ run: |
+ cd /tmp
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+
+ - name: install oneAPI dpcpp compiler
+ shell: bash
+ run: |
+ sudo apt update
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
+
+ - name: install oneAPI MKL library
+ shell: bash
+ run: |
+ sudo apt install intel-oneapi-mkl-devel
+
+ - name: Clone
+ id: checkout
+ uses: actions/checkout@v3
+
+ - name: Build
+ id: cmake_build
+ run: |
+ source /opt/intel/oneapi/setvars.sh
+ mkdir build
+ cd build
+ cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON ..
+ cmake --build . --config Release -j $(nproc)
+
# TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
# how to debug it.
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index a03df4c6..dd562a89 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -12148,7 +12148,8 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
const int64_t src1_ncols, const int64_t src1_padded_row_size,
const dpct::queue_ptr &stream) {
- const int64_t ne00 = src0->ne[0];
+ GGML_TENSOR_BINARY_OP_LOCALS
+
const int64_t row_diff = row_high - row_low;
// on some GPUs it is faster to convert src1 to half and to use half precision intrinsics
@@ -12167,8 +12168,9 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
} else {
src1_dfloat = src1_dfloat_a.alloc(ne00);
ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
- ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1,
- sizeof(sycl::half), 0, 0, stream);
+ ne00, ne00, ne01, ne02, nb00, nb01, nb02,
+ nb03, ne10, ne11, ne12, nb10, nb11, nb12,
+ nb13, stream);
}
}
#else