Server: fix seed for multiple slots (#6835)

* Server: add tests for consistent results * sampling: separate rng per sampling context
author: Johannes Gäßler <johannesg@5d6.de> 2024-04-24 11:08:36 +0200
committer: GitHub <noreply@github.com> 2024-04-24 11:08:36 +0200
commit: 28103f4832e301a9c84d44ff0df9d75d46ab6c76 (patch)
tree: 8ba391e3a7e0ce9a20d4b41782ef133bd7e32738 /llama.cpp
parent: c0d1b3e03e27634ac2871761f5033cf9324d472d (diff)
1 files changed, 5 insertions, 2 deletions
diff --git a/llama.cpp b/llama.cpp
index e4ca34bd..3a4a03d8 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -13667,7 +13667,7 @@ llama_token llama_sample_token_greedy(struct llama_context * ctx, llama_token_da
     return result;
 }
 
-llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates) {
+llama_token llama_sample_token_with_rng(struct llama_context * ctx, llama_token_data_array * candidates, std::mt19937 & rng) {
     GGML_ASSERT(ctx);
 
     const int64_t t_start_sample_us = ggml_time_us();
@@ -13680,7 +13680,6 @@ llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_arra
     }
 
     std::discrete_distribution<> dist(probs.begin(), probs.end());
-    auto & rng = ctx->rng;
     int idx = dist(rng);
 
     llama_token result = candidates->data[idx].id;
@@ -13690,6 +13689,10 @@ llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_arra
     return result;
 }
 
+llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates) {
+    return llama_sample_token_with_rng(ctx, candidates, ctx->rng);
+}
+
 void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar * grammar, llama_token token) {
     const int64_t t_start_sample_us = ggml_time_us();
author	Johannes Gäßler <johannesg@5d6.de>	2024-04-24 11:08:36 +0200
committer	GitHub <noreply@github.com>	2024-04-24 11:08:36 +0200
commit	28103f4832e301a9c84d44ff0df9d75d46ab6c76 (patch)
tree	8ba391e3a7e0ce9a20d4b41782ef133bd7e32738 /llama.cpp
parent	c0d1b3e03e27634ac2871761f5033cf9324d472d (diff)