diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-10-20 21:07:23 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-20 21:07:23 +0300 |
commit | d1031cf49c3b958b915fd558e23453471c29ac33 (patch) | |
tree | 14fa2bc6d54d5e27bd1e8bfd6fa4dbf894dbe6b9 /examples/parallel/parallel.cpp | |
parent | 8cf19d60dc93809db8e51fedc811595eed9134c5 (diff) |
sampling : refactor init to use llama_sampling_params (#3696)
* sampling : refactor init to use llama_sampling_params
* llama : combine repetition, frequency and presence penalties in 1 call
* examples : remove embd-input and gptneox-wip
* sampling : rename penalty params + reduce size of "prev" vector
* sampling : add llama_sampling_print helper
* sampling : hide prev behind API and apply #3661
ggml-ci
Diffstat (limited to 'examples/parallel/parallel.cpp')
-rw-r--r-- | examples/parallel/parallel.cpp | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index 69f9526a..eb64adef 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -157,7 +157,7 @@ int main(int argc, char ** argv) { for (size_t i = 0; i < clients.size(); ++i) { auto & client = clients[i]; client.id = i; - client.ctx_sampling = llama_sampling_init(params); + client.ctx_sampling = llama_sampling_init(params.sparams); } std::vector<llama_token> tokens_system; @@ -330,7 +330,7 @@ int main(int argc, char ** argv) { const llama_token id = llama_sampling_sample(client.ctx_sampling, ctx, NULL, client.i_batch - i); - llama_sampling_accept(client.ctx_sampling, ctx, id); + llama_sampling_accept(client.ctx_sampling, ctx, id, true); if (client.n_decoded == 1) { // start measuring generation time after the first token to make sure all concurrent clients |