summaryrefslogtreecommitdiff
path: root/examples/common.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-07-18 14:24:43 +0300
committerGitHub <noreply@github.com>2023-07-18 14:24:43 +0300
commitd01bccde9f759b24449fdaa16306b406a50eb367 (patch)
treea1c351c6732f399f540a1e91f957eb61db535bbc /examples/common.cpp
parent6cbf9dfb32f0e23ed3afd02d30ab066ed53e2c4d (diff)
ci : integrate with ggml-org/ci (#2250)
* ci : run ctest ggml-ci * ci : add open llama 3B-v2 tests ggml-ci * ci : disable wget progress output ggml-ci * ci : add open llama 3B-v2 tg tests for q4 and q5 quantizations ggml-ci * tests : try to fix tail free sampling test ggml-ci * ci : add K-quants ggml-ci * ci : add short perplexity tests ggml-ci * ci : add README.md * ppl : add --chunks argument to limit max number of chunks ggml-ci * ci : update README
Diffstat (limited to 'examples/common.cpp')
-rw-r--r--examples/common.cpp7
1 files changed, 7 insertions, 0 deletions
diff --git a/examples/common.cpp b/examples/common.cpp
index 8705127c..fd6dbc0e 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -279,6 +279,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
break;
}
params.n_keep = std::stoi(argv[i]);
+ } else if (arg == "--chunks") {
+ if (++i >= argc) {
+ invalid_param = true;
+ break;
+ }
+ params.n_chunks = std::stoi(argv[i]);
} else if (arg == "-m" || arg == "--model") {
if (++i >= argc) {
invalid_param = true;
@@ -515,6 +521,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
+ fprintf(stderr, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
if (llama_mlock_supported()) {
fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
}