5 files changed, 6 insertions, 22 deletions
diff --git a/scripts/LlamaConfig.cmake.in b/scripts/LlamaConfig.cmake.in
index 92e39708..9311055d 100644
--- a/scripts/LlamaConfig.cmake.in
+++ b/scripts/LlamaConfig.cmake.in
@@ -5,7 +5,6 @@ set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
 set(LLAMA_BLAS @LLAMA_BLAS@)
 set(LLAMA_CUDA @LLAMA_CUDA@)
 set(LLAMA_METAL @LLAMA_METAL@)
-set(LLAMA_CLBLAST @LLAMA_CLBLAST@)
 set(LLAMA_HIPBLAS @LLAMA_HIPBLAS@)
 set(LLAMA_ACCELERATE @LLAMA_ACCELERATE@)
 
@@ -36,10 +35,6 @@ if (LLAMA_METAL)
     find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
 endif()
 
-if (LLAMA_CLBLAST)
-    find_package(CLBlast REQUIRED)
-endif()
-
 if (LLAMA_HIPBLAS)
     find_package(hip REQUIRED)
     find_package(hipblas REQUIRED)
diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py
index 6016eb2c..513dde5e 100755
--- a/scripts/compare-llama-bench.py
+++ b/scripts/compare-llama-bench.py
@@ -19,17 +19,17 @@ logger = logging.getLogger("compare-llama-bench")
 
 # Properties by which to differentiate results per commit:
 KEY_PROPERTIES = [
-    "cpu_info", "gpu_info", "n_gpu_layers", "cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas",
+    "cpu_info", "gpu_info", "n_gpu_layers", "cuda", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas",
     "blas", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "embeddings", "n_threads",
     "type_k", "type_v", "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen"
 ]
 
 # Properties that are boolean and are converted to Yes/No for the table:
-BOOL_PROPERTIES = ["cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"]
+BOOL_PROPERTIES = ["cuda", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"]
 
 # Header names for the table:
 PRETTY_NAMES = {
-    "cuda": "CUDA", "opencl": "OpenCL", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC",
+    "cuda": "CUDA", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC",
     "gpu_blas": "GPU BLAS", "blas": "BLAS", "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model",
     "model_size": "Model Size [GiB]", "model_n_params": "Num. of Par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size",
     "n_threads": "Threads", "type_k": "K type", "type_v": "V type", "n_gpu_layers": "GPU layers", "split_mode": "Split mode",
diff --git a/scripts/server-llm.sh b/scripts/server-llm.sh
index eb6ce458..b3715e20 100644
--- a/scripts/server-llm.sh
+++ b/scripts/server-llm.sh
@@ -3,7 +3,7 @@
 # Helper script for deploying llama.cpp server with a single Bash command
 #
 # - Works on Linux and macOS
-# - Supports: CPU, CUDA, Metal, OpenCL
+# - Supports: CPU, CUDA, Metal
 # - Can run all GGUF models from HuggingFace
 # - Can serve requests in parallel
 # - Always builds latest llama.cpp from GitHub
@@ -19,7 +19,7 @@
 #   --port:            port number, default is 8888
 #   --repo:            path to a repo containing GGUF model files
 #   --wtype:           weights type (f16, q8_0, q4_0, q4_1), default is user-input
-#   --backend:         cpu, cuda, metal, opencl, depends on the OS
+#   --backend:         cpu, cuda, metal, depends on the OS
 #   --gpu-id:          gpu id, default is 0
 #   --n-parallel:      number of parallel requests, default is 8
 #   --n-kv:            KV cache size, default is 4096
@@ -72,7 +72,7 @@ function print_usage {
     printf "  --port:             port number, default is 8888\n"
     printf "  --repo:             path to a repo containing GGUF model files\n"
     printf "  --wtype:            weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
-    printf "  --backend:          cpu, cuda, metal, opencl, depends on the OS\n"
+    printf "  --backend:          cpu, cuda, metal, depends on the OS\n"
     printf "  --gpu-id:           gpu id, default is 0\n"
     printf "  --n-parallel:       number of parallel requests, default is 8\n"
     printf "  --n-kv:             KV cache size, default is 4096\n"
@@ -387,9 +387,6 @@ elif [[ "$backend" == "cpu" ]]; then
 elif [[ "$backend" == "metal" ]]; then
     printf "[+] Building with Metal backend\n"
     make -j server $log
-elif [[ "$backend" == "opencl" ]]; then
-    printf "[+] Building with OpenCL backend\n"
-    LLAMA_CLBLAST=1 make -j server $log
 else
     printf "[-] Unknown backend: %s\n" "$backend"
     exit 1
@@ -407,8 +404,6 @@ elif [[ "$backend" == "cpu" ]]; then
     args="-ngl 0"
 elif [[ "$backend" == "metal" ]]; then
     args="-ngl 999"
-elif [[ "$backend" == "opencl" ]]; then
-    args="-ngl 999"
 else
     printf "[-] Unknown backend: %s\n" "$backend"
     exit 1
diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh
index 3f8ddf37..9e34dc8b 100755
--- a/scripts/sync-ggml-am.sh
+++ b/scripts/sync-ggml-am.sh
@@ -106,8 +106,6 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
     # src/ggml-kompute.h          -> ggml-kompute.h
     # src/ggml-metal.h            -> ggml-metal.h
     # src/ggml-metal.m            -> ggml-metal.m
-    # src/ggml-opencl.cpp         -> ggml-opencl.cpp
-    # src/ggml-opencl.h           -> ggml-opencl.h
     # src/ggml-quants.c           -> ggml-quants.c
     # src/ggml-quants.h           -> ggml-quants.h
     # src/ggml-rpc.cpp            -> ggml-rpc.cpp
@@ -143,8 +141,6 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
         -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
         -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
         -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
-        -e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
-        -e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
         -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
         -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
         -e 's/src\/ggml-rpc\.cpp/ggml-rpc.cpp/g' \
diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh
index fbae6b7f..4843f8a4 100755
--- a/scripts/sync-ggml.sh
+++ b/scripts/sync-ggml.sh
@@ -14,8 +14,6 @@ cp -rpv ../ggml/src/ggml-kompute.h          ./ggml-kompute.h
 cp -rpv ../ggml/src/ggml-metal.h            ./ggml-metal.h
 cp -rpv ../ggml/src/ggml-metal.m            ./ggml-metal.m
 cp -rpv ../ggml/src/ggml-metal.metal        ./ggml-metal.metal
-cp -rpv ../ggml/src/ggml-opencl.cpp         ./ggml-opencl.cpp
-cp -rpv ../ggml/src/ggml-opencl.h           ./ggml-opencl.h
 cp -rpv ../ggml/src/ggml-quants.c           ./ggml-quants.c
 cp -rpv ../ggml/src/ggml-quants.h           ./ggml-quants.h
 cp -rpv ../ggml/src/ggml-rpc.cpp            ./ggml-rpc.cpp