diff options
| author | Johannes Gäßler <johannesg@5d6.de> | 2023-08-28 17:59:39 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-08-28 17:59:39 +0200 |
| commit | 6b73ef120114beb5664ea94aab48d07ed248ee52 (patch) | |
| tree | 6d9c777a34a43f7b3ad6185df9639bab9be5c5cd /examples/server | |
| parent | 75fafcbcccc280a5b3883bc76d0a2dabf474d094 (diff) | |
YAML result logging + preset script (#2657)
Diffstat (limited to 'examples/server')
| -rw-r--r-- | examples/server/server.cpp | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 89a3311f..b485a5ea 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -719,7 +719,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms, fprintf(stdout, " -ts SPLIT --tensor-split SPLIT\n"); fprintf(stdout, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n"); fprintf(stdout, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n"); - fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n"); + fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n"); fprintf(stdout, " -nommq, --no-mul-mat-q\n"); fprintf(stdout, " use cuBLAS instead of custom mul_mat_q CUDA kernels.\n"); fprintf(stdout, " Not recommended since this is both slower and uses more VRAM.\n"); |
