diff options
Diffstat (limited to 'llama.h')
-rw-r--r-- | llama.h | 23 |
1 files changed, 19 insertions, 4 deletions
@@ -388,6 +388,7 @@ extern "C" { LLAMA_API int32_t llama_n_vocab (const struct llama_model * model); LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model); LLAMA_API int32_t llama_n_embd (const struct llama_model * model); + LLAMA_API int32_t llama_n_layer (const struct llama_model * model); // Get the model's RoPE frequency scaling factor LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model); @@ -435,10 +436,24 @@ extern "C" { // Returns 0 on success LLAMA_API int32_t llama_model_apply_lora_from_file( const struct llama_model * model, - const char * path_lora, - float scale, - const char * path_base_model, - int32_t n_threads); + const char * path_lora, + float scale, + const char * path_base_model, + int32_t n_threads); + + // Apply a loaded control vector to a llama_context, or if data is NULL, clear + // the currently loaded vector. + // n_embd should be the size of a single layer's control, and data should point + // to an n_embd x n_layers buffer starting from layer 1. + // il_start and il_end are the layer range the vector should apply to (both inclusive) + // See llama_control_vector_load in common to load a control vector. + LLAMA_API int32_t llama_control_vector_apply( + struct llama_context * lctx, + const float * data, + size_t len, + int32_t n_embd, + int32_t il_start, + int32_t il_end); // // KV cache |