diff options
Diffstat (limited to 'ggml.h')
-rw-r--r-- | ggml.h | 200 |
1 files changed, 193 insertions, 7 deletions
@@ -192,7 +192,7 @@ #define GGML_MAX_DIMS 4 #define GGML_MAX_NODES 4096 -#define GGML_MAX_PARAMS 16 +#define GGML_MAX_PARAMS 256 #define GGML_MAX_CONTEXTS 64 #define GGML_MAX_OPT 4 #define GGML_DEFAULT_N_THREADS 4 @@ -262,12 +262,16 @@ extern "C" { GGML_OP_DUP, GGML_OP_ADD, + GGML_OP_ADD1, + GGML_OP_ACC, GGML_OP_SUB, GGML_OP_MUL, GGML_OP_DIV, GGML_OP_SQR, GGML_OP_SQRT, + GGML_OP_LOG, GGML_OP_SUM, + GGML_OP_SUM_ROWS, GGML_OP_MEAN, GGML_OP_REPEAT, GGML_OP_ABS, @@ -277,12 +281,15 @@ extern "C" { GGML_OP_RELU, GGML_OP_GELU, GGML_OP_SILU, + GGML_OP_SILU_BACK, GGML_OP_NORM, // normalize GGML_OP_RMS_NORM, + GGML_OP_RMS_NORM_BACK, GGML_OP_MUL_MAT, GGML_OP_SCALE, + GGML_OP_SET, GGML_OP_CPY, GGML_OP_CONT, GGML_OP_RESHAPE, @@ -290,9 +297,13 @@ extern "C" { GGML_OP_PERMUTE, GGML_OP_TRANSPOSE, GGML_OP_GET_ROWS, + GGML_OP_GET_ROWS_BACK, + GGML_OP_DIAG, GGML_OP_DIAG_MASK_INF, + GGML_OP_DIAG_MASK_ZERO, GGML_OP_SOFT_MAX, GGML_OP_ROPE, + GGML_OP_ROPE_BACK, GGML_OP_ALIBI, GGML_OP_CONV_1D_1S, GGML_OP_CONV_1D_2S, @@ -496,6 +507,29 @@ extern "C" { struct ggml_tensor * a, struct ggml_tensor * b); + GGML_API struct ggml_tensor * ggml_add1( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_acc( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset); + + GGML_API struct ggml_tensor * ggml_acc_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset); + GGML_API struct ggml_tensor * ggml_sub( struct ggml_context * ctx, struct ggml_tensor * a, @@ -519,12 +553,24 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + GGML_API struct ggml_tensor * ggml_log( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_log_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); + // return scalar - // TODO: compute sum along rows GGML_API struct ggml_tensor * ggml_sum( struct ggml_context * ctx, struct ggml_tensor * a); + // sums along rows, with input shape [a,b,c,d] return shape [1,b,c,d] + GGML_API struct ggml_tensor * ggml_sum_rows( + struct ggml_context * ctx, + struct ggml_tensor * a); + // mean along rows GGML_API struct ggml_tensor * ggml_mean( struct ggml_context * ctx, @@ -566,6 +612,13 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + // a - x + // b - dy + GGML_API struct ggml_tensor * ggml_silu_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + // normalize along rows // TODO: eps is hardcoded to 1e-5 for now GGML_API struct ggml_tensor * ggml_norm( @@ -576,6 +629,13 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + // a - x + // b - dy + GGML_API struct ggml_tensor * ggml_rms_norm_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + // A: m rows, n columns // B: p rows, n columns (i.e. we transpose it internally) // result is m columns, p rows @@ -588,12 +648,66 @@ extern "C" { // operations on tensors without backpropagation // - // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_scale( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); + // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_scale_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + // b -> view(a,offset,nb1,nb2,3), return modified a + GGML_API struct ggml_tensor * ggml_set( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset); + + // b -> view(a,offset,nb1,nb2,3), return view(a) + GGML_API struct ggml_tensor * ggml_set_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t nb2, + size_t nb3, + size_t offset); + + GGML_API struct ggml_tensor * ggml_set_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t offset); + + GGML_API struct ggml_tensor * ggml_set_1d_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t offset); + + // b -> view(a,offset,nb1,nb2,3), return modified a + GGML_API struct ggml_tensor * ggml_set_2d( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t offset); + + // b -> view(a,offset,nb1,nb2,3), return view(a) + GGML_API struct ggml_tensor * ggml_set_2d_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + size_t nb1, + size_t offset); + + // a -> b, return view(b) GGML_API struct ggml_tensor * ggml_cpy( struct ggml_context * ctx, @@ -614,6 +728,11 @@ extern "C" { // return view(a) // TODO: when we start computing gradient, make a copy instead of view + GGML_API struct ggml_tensor * ggml_reshape_1d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0); + GGML_API struct ggml_tensor * ggml_reshape_2d( struct ggml_context * ctx, struct ggml_tensor * a, @@ -629,6 +748,14 @@ extern "C" { int64_t ne1, int64_t ne2); + GGML_API struct ggml_tensor * ggml_reshape_4d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3); + // offset in bytes GGML_API struct ggml_tensor * ggml_view_1d( struct ggml_context * ctx, @@ -654,6 +781,18 @@ extern "C" { size_t nb2, // slice stride in bytes size_t offset); + GGML_API struct ggml_tensor * ggml_view_4d( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + size_t nb1, // row stride in bytes + size_t nb2, // slice stride in bytes + size_t nb3, + size_t offset); + GGML_API struct ggml_tensor * ggml_permute( struct ggml_context * ctx, struct ggml_tensor * a, @@ -672,20 +811,50 @@ extern "C" { struct ggml_tensor * a, struct ggml_tensor * b); + GGML_API struct ggml_tensor * ggml_get_rows_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c); + + GGML_API struct ggml_tensor * ggml_diag( + struct ggml_context * ctx, + struct ggml_tensor * a); + // set elements above the diagonal to -INF - // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_diag_mask_inf( struct ggml_context * ctx, struct ggml_tensor * a, int n_past); // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_diag_mask_inf_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past); + + // set elements above the diagonal to 0 + GGML_API struct ggml_tensor * ggml_diag_mask_zero( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past); + + // in-place, returns view(a) + GGML_API struct ggml_tensor * gml_diag_mask_zero_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past); + GGML_API struct ggml_tensor * ggml_soft_max( struct ggml_context * ctx, struct ggml_tensor * a); - // rotary position embedding // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_soft_max_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // rotary position embedding // if mode & 1 == 1, skip n_past elements // if mode & 2 == 1, GPT-NeoX style // TODO: avoid creating a new tensor every time @@ -696,6 +865,23 @@ extern "C" { int n_dims, int mode); + // in-place, returns view(a) + GGML_API struct ggml_tensor * ggml_rope_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_dims, + int mode); + + // rotary position embedding backward, i.e compute dx from dy + // a - dy + GGML_API struct ggml_tensor * ggml_rope_back( + struct ggml_context * ctx, + struct ggml_tensor * a, + int n_past, + int n_dims, + int mode); + // alibi position embedding // in-place, returns view(a) struct ggml_tensor * ggml_alibi( @@ -740,13 +926,13 @@ extern "C" { GGML_API struct ggml_tensor * ggml_map_unary_f32( struct ggml_context * ctx, struct ggml_tensor * a, - const ggml_unary_op_f32_t fun); + ggml_unary_op_f32_t fun); GGML_API struct ggml_tensor * ggml_map_binary_f32( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, - const ggml_binary_op_f32_t fun); + ggml_binary_op_f32_t fun); // // automatic differentiation |