diff options
Diffstat (limited to 'ggml.h')
-rw-r--r-- | ggml.h | 53 |
1 files changed, 49 insertions, 4 deletions
@@ -283,6 +283,20 @@ const type prefix##3 = (pointer)->array[3]; \ GGML_UNUSED(prefix##3); +#define GGML_TENSOR_UNARY_OP_LOCALS \ + GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \ + GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \ + GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \ + GGML_TENSOR_LOCALS(size_t, nb, dst, nb) + +#define GGML_TENSOR_BINARY_OP_LOCALS \ + GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \ + GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \ + GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \ + GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \ + GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \ + GGML_TENSOR_LOCALS(size_t, nb, dst, nb) + #ifdef __cplusplus extern "C" { #endif @@ -381,6 +395,7 @@ extern "C" { GGML_OP_GROUP_NORM, GGML_OP_MUL_MAT, + GGML_OP_MUL_MAT_ID, GGML_OP_OUT_PROD, GGML_OP_SCALE, @@ -407,8 +422,8 @@ extern "C" { GGML_OP_CONV_TRANSPOSE_2D, GGML_OP_POOL_1D, GGML_OP_POOL_2D, - GGML_OP_UPSCALE, // nearest interpolate + GGML_OP_ARGSORT, GGML_OP_FLASH_ATTN, GGML_OP_FLASH_FF, @@ -448,7 +463,9 @@ extern "C" { GGML_UNARY_OP_GELU, GGML_UNARY_OP_GELU_QUICK, GGML_UNARY_OP_SILU, - GGML_UNARY_OP_LEAKY + GGML_UNARY_OP_LEAKY, + + GGML_UNARY_OP_COUNT, }; enum ggml_object_type { @@ -631,6 +648,9 @@ extern "C" { GGML_API const char * ggml_op_name (enum ggml_op op); GGML_API const char * ggml_op_symbol(enum ggml_op op); + GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op); + GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name + GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor); GGML_API bool ggml_is_quantized(enum ggml_type type); @@ -1027,6 +1047,15 @@ extern "C" { struct ggml_tensor * a, struct ggml_tensor * b); + // indirect matrix multiplication + // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b) + GGML_API struct ggml_tensor * ggml_mul_mat_id( + struct ggml_context * ctx, + struct ggml_tensor * as[], + struct ggml_tensor * ids, + int id, + struct ggml_tensor * b); + // A: m columns, n rows, // B: p columns, n rows, // result is m columns, p rows @@ -1520,6 +1549,23 @@ extern "C" { struct ggml_tensor * a, int scale_factor); + // sort rows + enum ggml_sort_order { + GGML_SORT_ASC, + GGML_SORT_DESC, + }; + + GGML_API struct ggml_tensor * ggml_argsort( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_sort_order order); + + // top k elements per row + GGML_API struct ggml_tensor * ggml_top_k( + struct ggml_context * ctx, + struct ggml_tensor * a, + int k); + GGML_API struct ggml_tensor * ggml_flash_attn( struct ggml_context * ctx, struct ggml_tensor * q, @@ -1581,7 +1627,6 @@ extern "C" { int kh); // used in sam - GGML_API struct ggml_tensor * ggml_add_rel_pos( struct ggml_context * ctx, struct ggml_tensor * a, @@ -1756,7 +1801,7 @@ extern "C" { GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads); GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph); - GGML_API struct ggml_cgraph * ggml_graph_view (struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i0, int i1); + GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1); GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst); GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph); |