summaryrefslogtreecommitdiff
path: root/examples/common.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'examples/common.cpp')
-rw-r--r--examples/common.cpp10
1 files changed, 9 insertions, 1 deletions
diff --git a/examples/common.cpp b/examples/common.cpp
index 478dbafc..32247cef 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -289,7 +289,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
invalid_param = true;
break;
}
+#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
params.n_gpu_layers = std::stoi(argv[i]);
+#else
+ fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n");
+ fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
+#endif
} else if (arg == "--no-mmap") {
params.use_mmap = false;
} else if (arg == "--mtest") {
@@ -416,7 +421,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
fprintf(stderr, " --no-penalize-nl do not penalize newline token\n");
- fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value\n");
+ fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
+ fprintf(stderr, " not recommended: doubles context memory required and no measurable increase in quality\n");
fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
@@ -427,8 +433,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
if (llama_mmap_supported()) {
fprintf(stderr, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
}
+#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
fprintf(stderr, " -ngl N, --n-gpu-layers N\n");
fprintf(stderr, " number of layers to store in VRAM\n");
+#endif
fprintf(stderr, " --mtest compute maximum memory usage\n");
fprintf(stderr, " --verbose-prompt print prompt before generation\n");
fprintf(stderr, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");