From e36ecdccc8754783f93ad3ac8a09e540101f2ca0 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 4 Sep 2023 22:26:24 +0300 Subject: build : on Mac OS enable Metal by default (#2901) * build : on Mac OS enable Metal by default * make : try to fix build on Linux * make : move targets back to the top * make : fix target clean * llama : enable GPU inference by default with Metal * llama : fix vocab_only logic when GPU is enabled * common : better `n_gpu_layers` assignment * readme : update Metal instructions * make : fix merge conflict remnants * gitignore : metal --- examples/main/main.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'examples/main/main.cpp') diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 922b9a98..9201b53b 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -151,14 +151,6 @@ int main(int argc, char ** argv) { LOG_TEE("%s: warning: scaling RoPE frequency by %g (default 1.0)\n", __func__, params.rope_freq_scale); } - if (params.n_ctx > 2048) { - // TODO: determine the actual max context of the model (e.g. 4096 for LLaMA v2) and use that instead of 2048 - LOG_TEE("%s: warning: base model only supports context sizes no greater than 2048 tokens (%d specified)\n", __func__, params.n_ctx); - } else if (params.n_ctx < 8) { - LOG_TEE("%s: warning: minimum context size is 8, using minimum size.\n", __func__); - params.n_ctx = 8; - } - LOG_TEE("%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); if (params.seed == LLAMA_DEFAULT_SEED) { @@ -194,6 +186,13 @@ int main(int argc, char ** argv) { return 1; } + if (params.n_ctx > llama_n_ctx(ctx)) { + LOG_TEE("%s: warning: base model only supports context sizes no greater than %d tokens (%d specified)\n", __func__, llama_n_ctx(ctx), params.n_ctx); + } else if (params.n_ctx < 8) { + LOG_TEE("%s: warning: minimum context size is 8, using minimum size.\n", __func__); + params.n_ctx = 8; + } + // print system information { LOG_TEE("\n"); -- cgit v1.2.3