summaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-10-20 21:07:23 +0300
committerGitHub <noreply@github.com>2023-10-20 21:07:23 +0300
commitd1031cf49c3b958b915fd558e23453471c29ac33 (patch)
tree14fa2bc6d54d5e27bd1e8bfd6fa4dbf894dbe6b9 /llama.h
parent8cf19d60dc93809db8e51fedc811595eed9134c5 (diff)
sampling : refactor init to use llama_sampling_params (#3696)
* sampling : refactor init to use llama_sampling_params * llama : combine repetition, frequency and presence penalties in 1 call * examples : remove embd-input and gptneox-wip * sampling : rename penalty params + reduce size of "prev" vector * sampling : add llama_sampling_print helper * sampling : hide prev behind API and apply #3661 ggml-ci
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h16
1 files changed, 5 insertions, 11 deletions
diff --git a/llama.h b/llama.h
index 51010e03..306f5b38 100644
--- a/llama.h
+++ b/llama.h
@@ -560,21 +560,15 @@ extern "C" {
LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, uint32_t seed);
/// @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
- LLAMA_API void llama_sample_repetition_penalty(
- struct llama_context * ctx,
- llama_token_data_array * candidates,
- const llama_token * last_tokens,
- size_t last_tokens_size,
- float penalty);
-
/// @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details.
- LLAMA_API void llama_sample_frequency_and_presence_penalties(
+ LLAMA_API void llama_sample_repetition_penalties(
struct llama_context * ctx,
llama_token_data_array * candidates,
const llama_token * last_tokens,
- size_t last_tokens_size,
- float alpha_frequency,
- float alpha_presence);
+ size_t penalty_last_n,
+ float penalty_repeat,
+ float penalty_freq,
+ float penalty_present);
/// @details Apply classifier-free guidance to the logits as described in academic paper "Stay on topic with Classifier-Free Guidance" https://arxiv.org/abs/2306.17806
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, the logits must be directly extracted from the original generation context without being sorted.