diff options
Diffstat (limited to 'examples/batched')
-rw-r--r-- | examples/batched/batched.cpp | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index 9be7eb56..dde4d5a0 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -80,6 +80,7 @@ int main(int argc, char ** argv) { ctx_params.seed = 1234; ctx_params.n_ctx = n_kv_req; ctx_params.n_batch = std::max(n_len, n_parallel); + ctx_params.n_parallel = n_parallel; ctx_params.n_threads = params.n_threads; ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch; @@ -132,7 +133,7 @@ int main(int argc, char ** argv) { // assign the system KV cache to all parallel sequences // this way, the parallel sequences will "reuse" the prompt tokens without having to copy them for (int32_t i = 1; i < n_parallel; ++i) { - llama_kv_cache_seq_cp(ctx, 0, i, 0, batch.n_tokens); + llama_kv_cache_seq_cp(ctx, 0, i, -1, -1); } if (n_parallel > 1) { |