diff options
Diffstat (limited to 'llama.h')
-rw-r--r-- | llama.h | 57 |
1 files changed, 54 insertions, 3 deletions
@@ -361,9 +361,60 @@ extern "C" { // KV cache // - // Returns the number of tokens in the KV cache - LLAMA_API DEPRECATED(int llama_get_kv_cache_token_count(const struct llama_context * ctx), - "avoid using this, it will be removed in the future, instead - count the tokens in user code"); + // Information associated with an individual cell in the KV cache view. + struct llama_kv_cache_view_cell { + // The position for this cell. Takes KV cache shifts into account. + // May be negative if the cell is not populated. + llama_pos pos; + }; + + // An updateable view of the KV cache. + struct llama_kv_cache_view { + // Number of KV cache cells. This will be the same as the context size. + int32_t n_cells; + + // Maximum number of sequences that can exist in a cell. It's not an error + // if there are more sequences in a cell than this value, however they will + // not be visible in the view cells_sequences. + int32_t n_max_seq; + + // Number of tokens in the cache. For example, if there are two populated + // cells, the first with 1 sequence id in it and the second with 2 sequence + // ids then you'll have 3 tokens. + int32_t token_count; + + // Number of populated cache cells. + int32_t used_cells; + + // Maximum contiguous empty slots in the cache. + int32_t max_contiguous; + + // Index to the start of the max_contiguous slot range. Can be negative + // when cache is full. + int32_t max_contiguous_idx; + + // Information for an individual cell. + struct llama_kv_cache_view_cell * cells; + + // The sequences for each cell. There will be n_max_seq items per cell. + llama_seq_id * cells_sequences; + }; + + // Create an empty KV cache view. (use only for debugging purposes) + LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq); + + // Free a KV cache view. (use only for debugging purposes) + LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view); + + // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes) + LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view); + + // Returns the number of tokens in the KV cache (slow, use only for debug) + // If a KV cell has multiple sequences assigned to it, it will be counted multiple times + LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx); + + // Returns the number of used KV cells (i.e. have at least one sequence assigned to them) + LLAMA_API int llama_get_kv_cache_used_cells(const struct llama_context * ctx); // Clear the KV cache LLAMA_API void llama_kv_cache_clear( |