diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2024-04-24 11:08:36 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-24 11:08:36 +0200 |
commit | 28103f4832e301a9c84d44ff0df9d75d46ab6c76 (patch) | |
tree | 8ba391e3a7e0ce9a20d4b41782ef133bd7e32738 /llama.h | |
parent | c0d1b3e03e27634ac2871761f5033cf9324d472d (diff) |
Server: fix seed for multiple slots (#6835)
* Server: add tests for consistent results
* sampling: separate rng per sampling context
Diffstat (limited to 'llama.h')
-rw-r--r-- | llama.h | 9 |
1 files changed, 7 insertions, 2 deletions
@@ -987,7 +987,7 @@ extern "C" { struct llama_context * ctx, llama_token_data_array * candidates); - /// @details Randomly selects a token from the candidates based on their probabilities. + /// @details Randomly selects a token from the candidates based on their probabilities using the RNG of ctx. LLAMA_API llama_token llama_sample_token( struct llama_context * ctx, llama_token_data_array * candidates); @@ -1074,8 +1074,9 @@ extern "C" { // Internal API to be implemented by llama.cpp and used by tests/benchmarks only #ifdef LLAMA_API_INTERNAL -#include <vector> +#include <random> #include <string> +#include <vector> struct ggml_tensor; @@ -1112,6 +1113,10 @@ std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8( const std::string & src, llama_partial_utf8 partial_start); +// Randomly selects a token from the candidates based on their probabilities using given std::mt19937. +// This is a temporary workaround in order to fix race conditions when sampling with multiple sequences. +llama_token llama_sample_token_with_rng(struct llama_context * ctx, llama_token_data_array * candidates, std::mt19937 & rng); + #endif // LLAMA_API_INTERNAL #endif // LLAMA_H |