diff options
author | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-07-27 07:55:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-27 07:55:01 +0200 |
commit | 154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch) | |
tree | 81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /examples/sycl | |
parent | 0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff) |
Merge mainline llama.cpp (#3)
* Merging mainline - WIP
* Merging mainline - WIP
AVX2 and CUDA appear to work.
CUDA performance seems slightly (~1-2%) lower as it is so often
the case with llama.cpp/ggml after some "improvements" have been made.
* Merging mainline - fix Metal
* Remove check
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'examples/sycl')
-rwxr-xr-x | examples/sycl/build.sh | 4 | ||||
-rwxr-xr-x | examples/sycl/run-llama2.sh | 1 | ||||
-rw-r--r-- | examples/sycl/win-build-sycl.bat | 5 | ||||
-rw-r--r-- | examples/sycl/win-run-llama2.bat | 2 |
4 files changed, 4 insertions, 8 deletions
diff --git a/examples/sycl/build.sh b/examples/sycl/build.sh index db46d57c..8fe0a679 100755 --- a/examples/sycl/build.sh +++ b/examples/sycl/build.sh @@ -8,10 +8,10 @@ cd build source /opt/intel/oneapi/setvars.sh #for FP16 -#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference +#cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON # faster for long-prompt inference #for FP32 -cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx +cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx #build example/main #cmake --build . --config Release --target main diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh index da0e4aab..111366fb 100755 --- a/examples/sycl/run-llama2.sh +++ b/examples/sycl/run-llama2.sh @@ -34,4 +34,3 @@ fi #use multiple GPUs with same max compute units #ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 - diff --git a/examples/sycl/win-build-sycl.bat b/examples/sycl/win-build-sycl.bat index 027173b0..17dd1ff5 100644 --- a/examples/sycl/win-build-sycl.bat +++ b/examples/sycl/win-build-sycl.bat @@ -13,10 +13,10 @@ if %errorlevel% neq 0 goto ERROR :: for FP16 :: faster for long-prompt inference -:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON +:: cmake -G "MinGW Makefiles" .. -DGGML_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON :: for FP32 -cmake -G "Ninja" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release +cmake -G "Ninja" .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release if %errorlevel% neq 0 goto ERROR :: build example/main only :: make main @@ -31,4 +31,3 @@ exit /B 0 :ERROR echo comomand error: %errorlevel% exit /B %errorlevel% - diff --git a/examples/sycl/win-run-llama2.bat b/examples/sycl/win-run-llama2.bat index 1d4d7d2c..f0385cdf 100644 --- a/examples/sycl/win-run-llama2.bat +++ b/examples/sycl/win-run-llama2.bat @@ -7,5 +7,3 @@ set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" .\build\bin\main.exe -m models\llama-2-7b.Q4_0.gguf -p %INPUT2% -n 400 -e -ngl 33 -s 0 - - |