diff options
Diffstat (limited to 'include/llama.h')
-rw-r--r-- | include/llama.h | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/include/llama.h b/include/llama.h index 133c2f0e..b2906693 100644 --- a/include/llama.h +++ b/include/llama.h @@ -361,6 +361,14 @@ extern "C" { enum llama_ftype ftype; // quantize to this llama_ftype enum ggml_type output_tensor_type; // output tensor type enum ggml_type token_embedding_type; // token embeddings tensor type + enum ggml_type attn_q_type; // attention query tensor type + enum ggml_type attn_k_type; // attention key tensor type + enum ggml_type attn_v_type; // attention value tensor type + enum ggml_type attn_qkv_type; // attention query-key-value tensor type + enum ggml_type attn_output_type; // attention output tensor type + enum ggml_type ffn_gate_type; // feedforward network gate type + enum ggml_type ffn_down_type; // feedforward network down type + enum ggml_type ffn_up_type; // feedforward network up type bool allow_requantize; // allow quantizing non-f32/f16 tensors bool quantize_output_tensor; // quantize output.weight bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored |