From af3ba5d94627d337e32a95129e31a3064c459f6b Mon Sep 17 00:00:00 2001
From: Neo Zhang Jianyu <jianyu.zhang@intel.com>
Date: Fri, 2 Feb 2024 15:53:27 +0800
Subject: [SYCL] update guide of SYCL backend (#5254)

* update guide for make installation, memory, gguf model link,  rm todo for windows build

* add vs install requirement

* update for gpu device check

* update help of llama-bench

* fix grammer issues
---
 examples/llama-bench/README.md   | 34 +++++++++++++++++++++-------------
 examples/sycl/win-run-llama2.bat |  2 +-
 2 files changed, 22 insertions(+), 14 deletions(-)

(limited to 'examples')
diff --git a/examples/llama-bench/README.md b/examples/llama-bench/README.md
index d02824bf..374e40a7 100644
--- a/examples/llama-bench/README.md
+++ b/examples/llama-bench/README.md
@@ -23,19 +23,23 @@ usage: ./llama-bench [options]
 
 options:
   -h, --help
-  -m, --model <filename>            (default: models/7B/ggml-model-q4_0.gguf)
-  -p, --n-prompt <n>                (default: 512)
-  -n, --n-gen <n>                   (default: 128)
-  -b, --batch-size <n>              (default: 512)
-  --memory-f32 <0|1>                (default: 0)
-  -t, --threads <n>                 (default: 16)
-  -ngl N, --n-gpu-layers <n>        (default: 99)
-  -mg i, --main-gpu <i>             (default: 0)
-  -mmq, --mul-mat-q <0|1>           (default: 1)
-  -ts, --tensor_split <ts0/ts1/..>
-  -r, --repetitions <n>             (default: 5)
-  -o, --output <csv|json|md|sql>    (default: md)
-  -v, --verbose                     (default: 0)
+  -m, --model <filename>              (default: models/7B/ggml-model-q4_0.gguf)
+  -p, --n-prompt <n>                  (default: 512)
+  -n, --n-gen <n>                     (default: 128)
+  -b, --batch-size <n>                (default: 512)
+  -ctk <t>, --cache-type-k <t>        (default: f16)
+  -ctv <t>, --cache-type-v <t>        (default: f16)
+  -t, --threads <n>                   (default: 112)
+  -ngl, --n-gpu-layers <n>            (default: 99)
+  -sm, --split-mode <none|layer|row>  (default: layer)
+  -mg, --main-gpu <i>                 (default: 0)
+  -nkvo, --no-kv-offload <0|1>        (default: 0)
+  -mmp, --mmap <0|1>                  (default: 1)
+  -mmq, --mul-mat-q <0|1>             (default: 1)
+  -ts, --tensor_split <ts0/ts1/..>    (default: 0)
+  -r, --repetitions <n>               (default: 5)
+  -o, --output <csv|json|md|sql>      (default: md)
+  -v, --verbose                       (default: 0)
 
 Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.
 ```
@@ -51,6 +55,10 @@ Each test is repeated the number of times given by `-r`, and the results are ave
 
 For a description of the other options, see the [main example](../main/README.md).
 
+Note:
+
+- When using SYCL backend, there would be hang issue in some cases. Please set `--mmp 0`.
+
 ## Examples
 
 ### Text generation with different models
diff --git a/examples/sycl/win-run-llama2.bat b/examples/sycl/win-run-llama2.bat
index 28d93554..cf621c67 100644
--- a/examples/sycl/win-run-llama2.bat
+++ b/examples/sycl/win-run-llama2.bat
@@ -2,7 +2,7 @@
 ::  Copyright (C) 2024 Intel Corporation
 ::  SPDX-License-Identifier: MIT
 
-INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
+set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
 @call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
 
 
-- 
cgit v1.2.3