diff options
Diffstat (limited to 'llama.h')
-rw-r--r-- | llama.h | 6 |
1 files changed, 6 insertions, 0 deletions
@@ -759,6 +759,12 @@ extern "C" { // n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens) LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch); + // Get the number of threads used for generation of a single token. + LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx); + + // Get the number of threads used for prompt and batch processing (multiple token). + LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx); + // Set whether to use causal attention or not // If set to true, the model will only attend to the past tokens LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn); |