diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/LlamaConfig.cmake.in | 5 | ||||
-rwxr-xr-x | scripts/compare-llama-bench.py | 6 | ||||
-rw-r--r-- | scripts/server-llm.sh | 11 | ||||
-rwxr-xr-x | scripts/sync-ggml-am.sh | 4 | ||||
-rwxr-xr-x | scripts/sync-ggml.sh | 2 |
5 files changed, 6 insertions, 22 deletions
diff --git a/scripts/LlamaConfig.cmake.in b/scripts/LlamaConfig.cmake.in index 92e39708..9311055d 100644 --- a/scripts/LlamaConfig.cmake.in +++ b/scripts/LlamaConfig.cmake.in @@ -5,7 +5,6 @@ set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@) set(LLAMA_BLAS @LLAMA_BLAS@) set(LLAMA_CUDA @LLAMA_CUDA@) set(LLAMA_METAL @LLAMA_METAL@) -set(LLAMA_CLBLAST @LLAMA_CLBLAST@) set(LLAMA_HIPBLAS @LLAMA_HIPBLAS@) set(LLAMA_ACCELERATE @LLAMA_ACCELERATE@) @@ -36,10 +35,6 @@ if (LLAMA_METAL) find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) endif() -if (LLAMA_CLBLAST) - find_package(CLBlast REQUIRED) -endif() - if (LLAMA_HIPBLAS) find_package(hip REQUIRED) find_package(hipblas REQUIRED) diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py index 6016eb2c..513dde5e 100755 --- a/scripts/compare-llama-bench.py +++ b/scripts/compare-llama-bench.py @@ -19,17 +19,17 @@ logger = logging.getLogger("compare-llama-bench") # Properties by which to differentiate results per commit: KEY_PROPERTIES = [ - "cpu_info", "gpu_info", "n_gpu_layers", "cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas", + "cpu_info", "gpu_info", "n_gpu_layers", "cuda", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas", "blas", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "embeddings", "n_threads", "type_k", "type_v", "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen" ] # Properties that are boolean and are converted to Yes/No for the table: -BOOL_PROPERTIES = ["cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"] +BOOL_PROPERTIES = ["cuda", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"] # Header names for the table: PRETTY_NAMES = { - "cuda": "CUDA", "opencl": "OpenCL", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC", + "cuda": "CUDA", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC", "gpu_blas": "GPU BLAS", "blas": "BLAS", "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model", "model_size": "Model Size [GiB]", "model_n_params": "Num. of Par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size", "n_threads": "Threads", "type_k": "K type", "type_v": "V type", "n_gpu_layers": "GPU layers", "split_mode": "Split mode", diff --git a/scripts/server-llm.sh b/scripts/server-llm.sh index eb6ce458..b3715e20 100644 --- a/scripts/server-llm.sh +++ b/scripts/server-llm.sh @@ -3,7 +3,7 @@ # Helper script for deploying llama.cpp server with a single Bash command # # - Works on Linux and macOS -# - Supports: CPU, CUDA, Metal, OpenCL +# - Supports: CPU, CUDA, Metal # - Can run all GGUF models from HuggingFace # - Can serve requests in parallel # - Always builds latest llama.cpp from GitHub @@ -19,7 +19,7 @@ # --port: port number, default is 8888 # --repo: path to a repo containing GGUF model files # --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input -# --backend: cpu, cuda, metal, opencl, depends on the OS +# --backend: cpu, cuda, metal, depends on the OS # --gpu-id: gpu id, default is 0 # --n-parallel: number of parallel requests, default is 8 # --n-kv: KV cache size, default is 4096 @@ -72,7 +72,7 @@ function print_usage { printf " --port: port number, default is 8888\n" printf " --repo: path to a repo containing GGUF model files\n" printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n" - printf " --backend: cpu, cuda, metal, opencl, depends on the OS\n" + printf " --backend: cpu, cuda, metal, depends on the OS\n" printf " --gpu-id: gpu id, default is 0\n" printf " --n-parallel: number of parallel requests, default is 8\n" printf " --n-kv: KV cache size, default is 4096\n" @@ -387,9 +387,6 @@ elif [[ "$backend" == "cpu" ]]; then elif [[ "$backend" == "metal" ]]; then printf "[+] Building with Metal backend\n" make -j server $log -elif [[ "$backend" == "opencl" ]]; then - printf "[+] Building with OpenCL backend\n" - LLAMA_CLBLAST=1 make -j server $log else printf "[-] Unknown backend: %s\n" "$backend" exit 1 @@ -407,8 +404,6 @@ elif [[ "$backend" == "cpu" ]]; then args="-ngl 0" elif [[ "$backend" == "metal" ]]; then args="-ngl 999" -elif [[ "$backend" == "opencl" ]]; then - args="-ngl 999" else printf "[-] Unknown backend: %s\n" "$backend" exit 1 diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh index 3f8ddf37..9e34dc8b 100755 --- a/scripts/sync-ggml-am.sh +++ b/scripts/sync-ggml-am.sh @@ -106,8 +106,6 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then # src/ggml-kompute.h -> ggml-kompute.h # src/ggml-metal.h -> ggml-metal.h # src/ggml-metal.m -> ggml-metal.m - # src/ggml-opencl.cpp -> ggml-opencl.cpp - # src/ggml-opencl.h -> ggml-opencl.h # src/ggml-quants.c -> ggml-quants.c # src/ggml-quants.h -> ggml-quants.h # src/ggml-rpc.cpp -> ggml-rpc.cpp @@ -143,8 +141,6 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \ -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \ -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \ - -e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \ - -e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \ -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \ -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \ -e 's/src\/ggml-rpc\.cpp/ggml-rpc.cpp/g' \ diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh index fbae6b7f..4843f8a4 100755 --- a/scripts/sync-ggml.sh +++ b/scripts/sync-ggml.sh @@ -14,8 +14,6 @@ cp -rpv ../ggml/src/ggml-kompute.h ./ggml-kompute.h cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal -cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp -cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h cp -rpv ../ggml/src/ggml-quants.c ./ggml-quants.c cp -rpv ../ggml/src/ggml-quants.h ./ggml-quants.h cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml-rpc.cpp |