summaryrefslogtreecommitdiff
path: root/examples/common.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-07-18 14:24:43 +0300
committerGitHub <noreply@github.com>2023-07-18 14:24:43 +0300
commitd01bccde9f759b24449fdaa16306b406a50eb367 (patch)
treea1c351c6732f399f540a1e91f957eb61db535bbc /examples/common.h
parent6cbf9dfb32f0e23ed3afd02d30ab066ed53e2c4d (diff)
ci : integrate with ggml-org/ci (#2250)
* ci : run ctest ggml-ci * ci : add open llama 3B-v2 tests ggml-ci * ci : disable wget progress output ggml-ci * ci : add open llama 3B-v2 tg tests for q4 and q5 quantizations ggml-ci * tests : try to fix tail free sampling test ggml-ci * ci : add K-quants ggml-ci * ci : add short perplexity tests ggml-ci * ci : add README.md * ppl : add --chunks argument to limit max number of chunks ggml-ci * ci : update README
Diffstat (limited to 'examples/common.h')
-rw-r--r--examples/common.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/examples/common.h b/examples/common.h
index f52fef62..037a4eec 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -28,6 +28,7 @@ struct gpt_params {
int32_t n_ctx = 512; // context size
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt
+ int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
int32_t n_gpu_layers = 0; // number of layers to store in VRAM
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs