From 84e723653ca99d51a74b454984acf2c077468561 Mon Sep 17 00:00:00 2001
From: FK <sozforex@gmail.com>
Date: Wed, 13 Sep 2023 08:50:46 +0200
Subject: speculative: add --n-gpu-layers-draft option (#3063)

---
 common/common.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'common/common.h')

diff --git a/common/common.h b/common/common.h
index 012bf5e1..238635ae 100644
--- a/common/common.h
+++ b/common/common.h
@@ -38,6 +38,7 @@ struct gpt_params {
     int32_t n_draft                         = 16;   // number of tokens to draft during speculative decoding
     int32_t n_chunks                        = -1;   // max number of chunks to process (-1 = unlimited)
     int32_t n_gpu_layers                    = -1;   // number of layers to store in VRAM (-1 - use default)
+    int32_t n_gpu_layers_draft              = -1;   // number of layers to store in VRAM for the draft model (-1 - use default)
     int32_t main_gpu                        = 0;    // the GPU that is used for scratch and small tensors
     float   tensor_split[LLAMA_MAX_DEVICES] = {0};  // how split tensors should be distributed across GPUs
     int32_t n_probs                         = 0;    // if greater than 0, output the probabilities of top n_probs tokens.
-- 
cgit v1.2.3