YAML result logging + preset script (#2657)

author: Johannes Gäßler <johannesg@5d6.de> 2023-08-28 17:59:39 +0200
committer: GitHub <noreply@github.com> 2023-08-28 17:59:39 +0200
commit: 6b73ef120114beb5664ea94aab48d07ed248ee52 (patch)
tree: 6d9c777a34a43f7b3ad6185df9639bab9be5c5cd /examples/server
parent: 75fafcbcccc280a5b3883bc76d0a2dabf474d094 (diff)
1 files changed, 1 insertions, 1 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 89a3311f..b485a5ea 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -719,7 +719,7 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
     fprintf(stdout, "  -ts SPLIT --tensor-split SPLIT\n");
     fprintf(stdout, "                        how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
     fprintf(stdout, "  -mg i, --main-gpu i   the GPU to use for scratch and small tensors\n");
-    fprintf(stdout, "  -lv, --low-vram don't allocate VRAM scratch buffer\n");
+    fprintf(stdout, "  -lv, --low-vram       don't allocate VRAM scratch buffer\n");
     fprintf(stdout, "  -nommq, --no-mul-mat-q\n");
     fprintf(stdout, "                        use cuBLAS instead of custom mul_mat_q CUDA kernels.\n");
     fprintf(stdout, "                        Not recommended since this is both slower and uses more VRAM.\n");
author	Johannes Gäßler <johannesg@5d6.de>	2023-08-28 17:59:39 +0200
committer	GitHub <noreply@github.com>	2023-08-28 17:59:39 +0200
commit	6b73ef120114beb5664ea94aab48d07ed248ee52 (patch)
tree	6d9c777a34a43f7b3ad6185df9639bab9be5c5cd /examples/server
parent	75fafcbcccc280a5b3883bc76d0a2dabf474d094 (diff)