summaryrefslogtreecommitdiff
path: root/src/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/llama.cpp')
-rw-r--r--src/llama.cpp45
1 files changed, 45 insertions, 0 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index af8ef9be..c0f147b9 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -20849,6 +20849,10 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
return model->vocab.type;
}
+const struct llama_vocab* llama_get_model_vocab(const struct llama_model* model) {
+ return &model->vocab;
+}
+
enum llama_rope_type llama_rope_type(const struct llama_model * model) {
switch (model->arch) {
// these models do not use RoPE
@@ -23280,6 +23284,11 @@ void llama_sample_top_n_sigma(struct llama_context * ctx, llama_token_data_array
llama_sample_top_n_sigma_impl(ctx ? &ctx->sampling : nullptr, candidates_p, top_n_sigma);
}
+
+void llama_sample_dry(struct llama_context* ctx, struct llama_sampler_dry* smpl, llama_token_data_array* candidates_p) {
+ llama_sampler_dry_apply(smpl, candidates_p);
+}
+
void llama_sample_repetition_penalties(
struct llama_context * ctx,
llama_token_data_array * candidates,
@@ -23327,6 +23336,42 @@ int llama_split_path(char * split_path, size_t maxlen, const char * path_prefix,
return 0;
}
+struct llama_sampler_dry * llama_sampler_init_dry(const struct llama_vocab* vocab, float dry_multiplier, float dry_base, int32_t dry_allowed_length, int32_t dry_penalty_last_n, const char** seq_breakers, size_t num_breakers) {
+ return llama_sampler_init_dry_impl(*vocab, vocab->n_tokens(), dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n, seq_breakers, num_breakers);
+}
+
+void llama_sampler_dry_reset(struct llama_sampler_dry* smpl) {
+ smpl->last_tokens.clear();
+ smpl->dry_repeat_count.clear();
+ smpl->dry_max_token_repeat.clear();
+}
+
+void llama_sampler_dry_free(struct llama_sampler_dry* smpl) {
+ delete smpl;
+}
+
+struct llama_sampler_dry* llama_sampler_dry_clone(struct llama_sampler_dry* smpl) {
+ // nullptr is passed as vocab because it is only needed for raw sequence breaker processing, which we have already done and will be copying
+ auto* result = llama_sampler_init_dry(nullptr, smpl->dry_multiplier, smpl->dry_base, smpl->dry_allowed_length, smpl->dry_penalty_last_n, NULL, 0);
+ // Copy the state, including the processed breakers
+ {
+ auto* result_ctx = smpl;
+ result_ctx->dry_processed_breakers = smpl->dry_processed_breakers;
+ result_ctx->dry_repeat_count = smpl->dry_repeat_count;
+ result_ctx->dry_max_token_repeat = smpl->dry_max_token_repeat;
+ result_ctx->last_tokens = smpl->last_tokens;
+ }
+
+ return result;
+}
+
+void llama_sampler_dry_accept(struct llama_sampler_dry* smpl, llama_token token) {
+ if (smpl->dry_multiplier == 0.0f || smpl->dry_base < 1.0f || smpl->dry_penalty_last_n == 0) {
+ return;
+ }
+ smpl->last_tokens.push_back(token);
+}
+
int llama_split_prefix(char * dest, size_t maxlen, const char * split_path, int split_no, int split_count) {
std::string str_split_path(split_path);
char postfix[32];