summaryrefslogtreecommitdiff
path: root/common/common.cpp
diff options
context:
space:
mode:
authorFK <sozforex@gmail.com>2023-09-13 08:50:46 +0200
committerGitHub <noreply@github.com>2023-09-13 08:50:46 +0200
commit84e723653ca99d51a74b454984acf2c077468561 (patch)
tree62ddb7a849eb2ecf10dc831bf4ea960320e4dd5f /common/common.cpp
parentb52b29ab9d601bb298050bcd2261169bc917ba2c (diff)
speculative: add --n-gpu-layers-draft option (#3063)
Diffstat (limited to 'common/common.cpp')
-rw-r--r--common/common.cpp13
1 files changed, 13 insertions, 0 deletions
diff --git a/common/common.cpp b/common/common.cpp
index 6e5d5b4d..afc9b8a5 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -375,6 +375,17 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n");
fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
#endif
+ } else if (arg == "--gpu-layers-draft" || arg == "-ngld" || arg == "--n-gpu-layers-draft") {
+ if (++i >= argc) {
+ invalid_param = true;
+ break;
+ }
+#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
+ params.n_gpu_layers_draft = std::stoi(argv[i]);
+#else
+ fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers-draft option will be ignored\n");
+ fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n");
+#endif
} else if (arg == "--main-gpu" || arg == "-mg") {
if (++i >= argc) {
invalid_param = true;
@@ -664,6 +675,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
printf(" -ngl N, --n-gpu-layers N\n");
printf(" number of layers to store in VRAM\n");
+ printf(" -ngld N, --n-gpu-layers-draft N\n");
+ printf(" number of layers to store in VRAM for the draft model\n");
printf(" -ts SPLIT --tensor-split SPLIT\n");
printf(" how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
printf(" -mg i, --main-gpu i the GPU to use for scratch and small tensors\n");