ci : integrate with ggml-org/ci (#2250)

* ci : run ctest ggml-ci * ci : add open llama 3B-v2 tests ggml-ci * ci : disable wget progress output ggml-ci * ci : add open llama 3B-v2 tg tests for q4 and q5 quantizations ggml-ci * tests : try to fix tail free sampling test ggml-ci * ci : add K-quants ggml-ci * ci : add short perplexity tests ggml-ci * ci : add README.md * ppl : add --chunks argument to limit max number of chunks ggml-ci * ci : update README
author: Georgi Gerganov <ggerganov@gmail.com> 2023-07-18 14:24:43 +0300
committer: GitHub <noreply@github.com> 2023-07-18 14:24:43 +0300
commit: d01bccde9f759b24449fdaa16306b406a50eb367 (patch)
tree: a1c351c6732f399f540a1e91f957eb61db535bbc /examples/common.cpp
parent: 6cbf9dfb32f0e23ed3afd02d30ab066ed53e2c4d (diff)
1 files changed, 7 insertions, 0 deletions
diff --git a/examples/common.cpp b/examples/common.cpp
index 8705127c..fd6dbc0e 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -279,6 +279,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
                 break;
             }
             params.n_keep = std::stoi(argv[i]);
+        } else if (arg == "--chunks") {
+            if (++i >= argc) {
+                invalid_param = true;
+                break;
+            }
+            params.n_chunks = std::stoi(argv[i]);
         } else if (arg == "-m" || arg == "--model") {
             if (++i >= argc) {
                 invalid_param = true;
@@ -515,6 +521,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     fprintf(stderr, "  -b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch);
     fprintf(stderr, "  --perplexity          compute perplexity over the prompt\n");
     fprintf(stderr, "  --keep                number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
+    fprintf(stderr, "  --chunks N            max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
     if (llama_mlock_supported()) {
         fprintf(stderr, "  --mlock               force system to keep model in RAM rather than swapping or compressing\n");
     }
author	Georgi Gerganov <ggerganov@gmail.com>	2023-07-18 14:24:43 +0300
committer	GitHub <noreply@github.com>	2023-07-18 14:24:43 +0300
commit	d01bccde9f759b24449fdaa16306b406a50eb367 (patch)
tree	a1c351c6732f399f540a1e91f957eb61db535bbc /examples/common.cpp
parent	6cbf9dfb32f0e23ed3afd02d30ab066ed53e2c4d (diff)