diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2023-08-13 00:24:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-13 00:24:45 +0200 |
commit | f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e (patch) | |
tree | 09dbac6ef1057f2cb8bbce5e45faa94b2135b5ec /CMakeLists.txt | |
parent | b19edd54d51cef5e3616c18b1d0d8626895b2cba (diff) |
CUDA: Fixed OpenLLaMA 3b mmq, reduced compile time (#2590)
Diffstat (limited to 'CMakeLists.txt')
-rw-r--r-- | CMakeLists.txt | 2 |
1 files changed, 0 insertions, 2 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index d085bc83..dff4942c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,7 +69,6 @@ option(LLAMA_BLAS "llama: use BLAS" set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor") option(LLAMA_CUBLAS "llama: use CUDA" OFF) #option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF) -set(LLAMA_CUDA_MMQ_Y "64" CACHE STRING "llama: y tile size for mmq CUDA kernels") option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") @@ -256,7 +255,6 @@ if (LLAMA_CUBLAS) # if (LLAMA_CUDA_CUBLAS) # add_compile_definitions(GGML_CUDA_CUBLAS) # endif() - add_compile_definitions(GGML_CUDA_MMQ_Y=${LLAMA_CUDA_MMQ_Y}) if (LLAMA_CUDA_FORCE_DMMV) add_compile_definitions(GGML_CUDA_FORCE_DMMV) endif() |