summaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h23
1 files changed, 19 insertions, 4 deletions
diff --git a/llama.h b/llama.h
index 90aa5372..40dcf54e 100644
--- a/llama.h
+++ b/llama.h
@@ -388,6 +388,7 @@ extern "C" {
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
LLAMA_API int32_t llama_n_embd (const struct llama_model * model);
+ LLAMA_API int32_t llama_n_layer (const struct llama_model * model);
// Get the model's RoPE frequency scaling factor
LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
@@ -435,10 +436,24 @@ extern "C" {
// Returns 0 on success
LLAMA_API int32_t llama_model_apply_lora_from_file(
const struct llama_model * model,
- const char * path_lora,
- float scale,
- const char * path_base_model,
- int32_t n_threads);
+ const char * path_lora,
+ float scale,
+ const char * path_base_model,
+ int32_t n_threads);
+
+ // Apply a loaded control vector to a llama_context, or if data is NULL, clear
+ // the currently loaded vector.
+ // n_embd should be the size of a single layer's control, and data should point
+ // to an n_embd x n_layers buffer starting from layer 1.
+ // il_start and il_end are the layer range the vector should apply to (both inclusive)
+ // See llama_control_vector_load in common to load a control vector.
+ LLAMA_API int32_t llama_control_vector_apply(
+ struct llama_context * lctx,
+ const float * data,
+ size_t len,
+ int32_t n_embd,
+ int32_t il_start,
+ int32_t il_end);
//
// KV cache