summaryrefslogtreecommitdiff
path: root/common/common.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-09-04 22:26:24 +0300
committerGitHub <noreply@github.com>2023-09-04 22:26:24 +0300
commite36ecdccc8754783f93ad3ac8a09e540101f2ca0 (patch)
tree160ce80ac89ad8d938c5a58bcb5aae4cdb020636 /common/common.h
parentbd33e5ab92e7f214205792fc1cd9ca28e810f897 (diff)
build : on Mac OS enable Metal by default (#2901)
* build : on Mac OS enable Metal by default * make : try to fix build on Linux * make : move targets back to the top * make : fix target clean * llama : enable GPU inference by default with Metal * llama : fix vocab_only logic when GPU is enabled * common : better `n_gpu_layers` assignment * readme : update Metal instructions * make : fix merge conflict remnants * gitignore : metal
Diffstat (limited to 'common/common.h')
-rw-r--r--common/common.h2
1 files changed, 1 insertions, 1 deletions
diff --git a/common/common.h b/common/common.h
index 105fb09e..85ac0df9 100644
--- a/common/common.h
+++ b/common/common.h
@@ -34,7 +34,7 @@ struct gpt_params {
int32_t n_keep = 0; // number of tokens to keep from initial prompt
int32_t n_draft = 16; // number of tokens to draft during speculative decoding
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
- int32_t n_gpu_layers = 0; // number of layers to store in VRAM
+ int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.