summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/llama.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/include/llama.h b/include/llama.h
index 133c2f0e..b2906693 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -361,6 +361,14 @@ extern "C" {
enum llama_ftype ftype; // quantize to this llama_ftype
enum ggml_type output_tensor_type; // output tensor type
enum ggml_type token_embedding_type; // token embeddings tensor type
+ enum ggml_type attn_q_type; // attention query tensor type
+ enum ggml_type attn_k_type; // attention key tensor type
+ enum ggml_type attn_v_type; // attention value tensor type
+ enum ggml_type attn_qkv_type; // attention query-key-value tensor type
+ enum ggml_type attn_output_type; // attention output tensor type
+ enum ggml_type ffn_gate_type; // feedforward network gate type
+ enum ggml_type ffn_down_type; // feedforward network down type
+ enum ggml_type ffn_up_type; // feedforward network up type
bool allow_requantize; // allow quantizing non-f32/f16 tensors
bool quantize_output_tensor; // quantize output.weight
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored