diff options
author | FK <sozforex@gmail.com> | 2023-09-13 08:50:46 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-13 08:50:46 +0200 |
commit | 84e723653ca99d51a74b454984acf2c077468561 (patch) | |
tree | 62ddb7a849eb2ecf10dc831bf4ea960320e4dd5f /common/common.cpp | |
parent | b52b29ab9d601bb298050bcd2261169bc917ba2c (diff) |
speculative: add --n-gpu-layers-draft option (#3063)
Diffstat (limited to 'common/common.cpp')
-rw-r--r-- | common/common.cpp | 13 |
1 files changed, 13 insertions, 0 deletions
diff --git a/common/common.cpp b/common/common.cpp index 6e5d5b4d..afc9b8a5 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -375,6 +375,17 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); #endif + } else if (arg == "--gpu-layers-draft" || arg == "-ngld" || arg == "--n-gpu-layers-draft") { + if (++i >= argc) { + invalid_param = true; + break; + } +#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD + params.n_gpu_layers_draft = std::stoi(argv[i]); +#else + fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers-draft option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); +#endif } else if (arg == "--main-gpu" || arg == "-mg") { if (++i >= argc) { invalid_param = true; @@ -664,6 +675,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD printf(" -ngl N, --n-gpu-layers N\n"); printf(" number of layers to store in VRAM\n"); + printf(" -ngld N, --n-gpu-layers-draft N\n"); + printf(" number of layers to store in VRAM for the draft model\n"); printf(" -ts SPLIT --tensor-split SPLIT\n"); printf(" how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n"); printf(" -mg i, --main-gpu i the GPU to use for scratch and small tensors\n"); |