From af3ba5d94627d337e32a95129e31a3064c459f6b Mon Sep 17 00:00:00 2001 From: Neo Zhang Jianyu Date: Fri, 2 Feb 2024 15:53:27 +0800 Subject: [SYCL] update guide of SYCL backend (#5254) * update guide for make installation, memory, gguf model link, rm todo for windows build * add vs install requirement * update for gpu device check * update help of llama-bench * fix grammer issues --- examples/llama-bench/README.md | 34 +++++++++++++++++++++------------- examples/sycl/win-run-llama2.bat | 2 +- 2 files changed, 22 insertions(+), 14 deletions(-) (limited to 'examples') diff --git a/examples/llama-bench/README.md b/examples/llama-bench/README.md index d02824bf..374e40a7 100644 --- a/examples/llama-bench/README.md +++ b/examples/llama-bench/README.md @@ -23,19 +23,23 @@ usage: ./llama-bench [options] options: -h, --help - -m, --model (default: models/7B/ggml-model-q4_0.gguf) - -p, --n-prompt (default: 512) - -n, --n-gen (default: 128) - -b, --batch-size (default: 512) - --memory-f32 <0|1> (default: 0) - -t, --threads (default: 16) - -ngl N, --n-gpu-layers (default: 99) - -mg i, --main-gpu (default: 0) - -mmq, --mul-mat-q <0|1> (default: 1) - -ts, --tensor_split - -r, --repetitions (default: 5) - -o, --output (default: md) - -v, --verbose (default: 0) + -m, --model (default: models/7B/ggml-model-q4_0.gguf) + -p, --n-prompt (default: 512) + -n, --n-gen (default: 128) + -b, --batch-size (default: 512) + -ctk , --cache-type-k (default: f16) + -ctv , --cache-type-v (default: f16) + -t, --threads (default: 112) + -ngl, --n-gpu-layers (default: 99) + -sm, --split-mode (default: layer) + -mg, --main-gpu (default: 0) + -nkvo, --no-kv-offload <0|1> (default: 0) + -mmp, --mmap <0|1> (default: 1) + -mmq, --mul-mat-q <0|1> (default: 1) + -ts, --tensor_split (default: 0) + -r, --repetitions (default: 5) + -o, --output (default: md) + -v, --verbose (default: 0) Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times. ``` @@ -51,6 +55,10 @@ Each test is repeated the number of times given by `-r`, and the results are ave For a description of the other options, see the [main example](../main/README.md). +Note: + +- When using SYCL backend, there would be hang issue in some cases. Please set `--mmp 0`. + ## Examples ### Text generation with different models diff --git a/examples/sycl/win-run-llama2.bat b/examples/sycl/win-run-llama2.bat index 28d93554..cf621c67 100644 --- a/examples/sycl/win-run-llama2.bat +++ b/examples/sycl/win-run-llama2.bat @@ -2,7 +2,7 @@ :: Copyright (C) 2024 Intel Corporation :: SPDX-License-Identifier: MIT -INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" +set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" @call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force -- cgit v1.2.3