summaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h53
1 files changed, 49 insertions, 4 deletions
diff --git a/ggml.h b/ggml.h
index 2f6787d4..a8f10cbd 100644
--- a/ggml.h
+++ b/ggml.h
@@ -283,6 +283,20 @@
const type prefix##3 = (pointer)->array[3]; \
GGML_UNUSED(prefix##3);
+#define GGML_TENSOR_UNARY_OP_LOCALS \
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
+
+#define GGML_TENSOR_BINARY_OP_LOCALS \
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -381,6 +395,7 @@ extern "C" {
GGML_OP_GROUP_NORM,
GGML_OP_MUL_MAT,
+ GGML_OP_MUL_MAT_ID,
GGML_OP_OUT_PROD,
GGML_OP_SCALE,
@@ -407,8 +422,8 @@ extern "C" {
GGML_OP_CONV_TRANSPOSE_2D,
GGML_OP_POOL_1D,
GGML_OP_POOL_2D,
-
GGML_OP_UPSCALE, // nearest interpolate
+ GGML_OP_ARGSORT,
GGML_OP_FLASH_ATTN,
GGML_OP_FLASH_FF,
@@ -448,7 +463,9 @@ extern "C" {
GGML_UNARY_OP_GELU,
GGML_UNARY_OP_GELU_QUICK,
GGML_UNARY_OP_SILU,
- GGML_UNARY_OP_LEAKY
+ GGML_UNARY_OP_LEAKY,
+
+ GGML_UNARY_OP_COUNT,
};
enum ggml_object_type {
@@ -631,6 +648,9 @@ extern "C" {
GGML_API const char * ggml_op_name (enum ggml_op op);
GGML_API const char * ggml_op_symbol(enum ggml_op op);
+ GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
+ GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
+
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
GGML_API bool ggml_is_quantized(enum ggml_type type);
@@ -1027,6 +1047,15 @@ extern "C" {
struct ggml_tensor * a,
struct ggml_tensor * b);
+ // indirect matrix multiplication
+ // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
+ GGML_API struct ggml_tensor * ggml_mul_mat_id(
+ struct ggml_context * ctx,
+ struct ggml_tensor * as[],
+ struct ggml_tensor * ids,
+ int id,
+ struct ggml_tensor * b);
+
// A: m columns, n rows,
// B: p columns, n rows,
// result is m columns, p rows
@@ -1520,6 +1549,23 @@ extern "C" {
struct ggml_tensor * a,
int scale_factor);
+ // sort rows
+ enum ggml_sort_order {
+ GGML_SORT_ASC,
+ GGML_SORT_DESC,
+ };
+
+ GGML_API struct ggml_tensor * ggml_argsort(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ enum ggml_sort_order order);
+
+ // top k elements per row
+ GGML_API struct ggml_tensor * ggml_top_k(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int k);
+
GGML_API struct ggml_tensor * ggml_flash_attn(
struct ggml_context * ctx,
struct ggml_tensor * q,
@@ -1581,7 +1627,6 @@ extern "C" {
int kh);
// used in sam
-
GGML_API struct ggml_tensor * ggml_add_rel_pos(
struct ggml_context * ctx,
struct ggml_tensor * a,
@@ -1756,7 +1801,7 @@ extern "C" {
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
- GGML_API struct ggml_cgraph * ggml_graph_view (struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i0, int i1);
+ GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);