summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-02-05 09:48:03 +0200
committerGeorgi Gerganov <ggerganov@gmail.com>2024-02-05 09:48:03 +0200
commit30679d438d5225b3aecf5cec6482cbc9f8f87ba5 (patch)
treeef33f23cd2e9a84566d2638edf4f1876c0b795b8
parent4be04c8965578edc09194fab769b4b922b8444f5 (diff)
scripts : fix typos, cleanup (#5303)
-rw-r--r--scripts/server-llm.sh23
1 files changed, 12 insertions, 11 deletions
diff --git a/scripts/server-llm.sh b/scripts/server-llm.sh
index 062b7049..30bbac32 100644
--- a/scripts/server-llm.sh
+++ b/scripts/server-llm.sh
@@ -14,16 +14,17 @@
# - Might be unstable!
#
# Usage:
-# ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose]
+# ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose] [-non-interactive]
#
-# --port: port number, default is 8888
-# --repo: path to a repo containing GGUF model files
-# --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input
-# --backend: cpu, cuda, metal, opencl, depends on the OS
-# --gpu-id: gpu id, default is 0
-# --n-parallel: number of parallel requests, default is 8
-# --n-kv: KV cache size, default is 4096
-# --verbose: verbose output
+# --port: port number, default is 8888
+# --repo: path to a repo containing GGUF model files
+# --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input
+# --backend: cpu, cuda, metal, opencl, depends on the OS
+# --gpu-id: gpu id, default is 0
+# --n-parallel: number of parallel requests, default is 8
+# --n-kv: KV cache size, default is 4096
+# --verbose: verbose output
+# --non-interactive: run without asking a permission to run
#
# Example:
#
@@ -67,8 +68,7 @@ verbose=0
function print_usage {
printf "Usage:\n"
- printf " ./server-llm.sh [-interactive] [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose]\n\n"
- printf " --non-interactive: run without asking a permision to run\n"
+ printf " ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose] [-non-interactive]\n\n"
printf " --port: port number, default is 8888\n"
printf " --repo: path to a repo containing GGUF model files\n"
printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
@@ -77,6 +77,7 @@ function print_usage {
printf " --n-parallel: number of parallel requests, default is 8\n"
printf " --n-kv: KV cache size, default is 4096\n"
printf " --verbose: verbose output\n\n"
+ printf " --non-interactive: run without asking a permission to run\n"
printf "Example:\n\n"
printf ' bash -c "$(curl -s https://ggml.ai/server-llm.sh)"\n\n'
}