From e59fcb2bc129881f4a269fee748fb38bce0a64de Mon Sep 17 00:00:00 2001 From: Christian Demsar Date: Thu, 10 Aug 2023 10:28:27 -0400 Subject: Add --n-predict -2 for stopping generation on full context (#2565) --- examples/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'examples/common.cpp') diff --git a/examples/common.cpp b/examples/common.cpp index 4d3ba9bb..9f8aab9a 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -543,7 +543,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stdout, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n"); fprintf(stdout, " -f FNAME, --file FNAME\n"); fprintf(stdout, " prompt file to start generation.\n"); - fprintf(stdout, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict); + fprintf(stdout, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); fprintf(stdout, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx); fprintf(stdout, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); fprintf(stdout, " -gqa N, --gqa N grouped-query attention factor (TEMP!!! use 8 for LLaMAv2 70B) (default: %d)\n", params.n_gqa); -- cgit v1.2.3