summaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h115
1 files changed, 109 insertions, 6 deletions
diff --git a/ggml.h b/ggml.h
index 0ec7ec5b..3c48fd27 100644
--- a/ggml.h
+++ b/ggml.h
@@ -211,6 +211,7 @@
#define GGML_MAX_OP_PARAMS 32
#define GGML_DEFAULT_N_THREADS 4
+
#define GGML_EXIT_SUCCESS 0
#define GGML_EXIT_ABORTED 1
@@ -345,10 +346,12 @@ extern "C" {
GGML_OP_ARGMAX,
GGML_OP_REPEAT,
GGML_OP_REPEAT_BACK,
+ GGML_OP_CONCAT,
GGML_OP_SILU_BACK,
GGML_OP_NORM, // normalize
GGML_OP_RMS_NORM,
GGML_OP_RMS_NORM_BACK,
+ GGML_OP_GROUP_NORM,
GGML_OP_MUL_MAT,
GGML_OP_OUT_PROD,
@@ -374,14 +377,19 @@ extern "C" {
GGML_OP_CLAMP,
GGML_OP_CONV_1D,
GGML_OP_CONV_2D,
+ GGML_OP_CONV_TRANSPOSE_2D,
GGML_OP_POOL_1D,
GGML_OP_POOL_2D,
+ GGML_OP_UPSCALE, // nearest interpolate
+
GGML_OP_FLASH_ATTN,
GGML_OP_FLASH_FF,
GGML_OP_FLASH_ATTN_BACK,
GGML_OP_WIN_PART,
GGML_OP_WIN_UNPART,
+ GGML_OP_GET_REL_POS,
+ GGML_OP_ADD_REL_POS,
GGML_OP_UNARY,
@@ -805,6 +813,13 @@ extern "C" {
struct ggml_tensor * a,
struct ggml_tensor * b);
+ // concat a and b on dim 2
+ // used in stable-diffusion
+ GGML_API struct ggml_tensor * ggml_concat(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b);
+
GGML_API struct ggml_tensor * ggml_abs(
struct ggml_context * ctx,
struct ggml_tensor * a);
@@ -913,6 +928,19 @@ extern "C" {
struct ggml_tensor * a,
float eps);
+ // group normalize along ne0*ne1*n_groups
+ // used in stable-diffusion
+ // TODO: eps is hardcoded to 1e-6 for now
+ GGML_API struct ggml_tensor * ggml_group_norm(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int n_groups);
+
+ GGML_API struct ggml_tensor * ggml_group_norm_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int n_groups);
+
// a - x
// b - dy
// TODO: update with configurable eps
@@ -1213,6 +1241,15 @@ extern "C" {
float freq_base,
float freq_scale);
+ // xPos RoPE, in-place, returns view(a)
+ GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int n_past,
+ int n_dims,
+ float base,
+ bool down);
+
// rotary position embedding backward, i.e compute dx from dy
// a - dy
GGML_API struct ggml_tensor * ggml_rope_back(
@@ -1221,7 +1258,11 @@ extern "C" {
int n_past,
int n_dims,
int mode,
- int n_ctx);
+ int n_ctx,
+ float freq_base,
+ float freq_scale,
+ float xpos_base,
+ bool xpos_down);
// alibi position embedding
// in-place, returns view(a)
@@ -1248,6 +1289,15 @@ extern "C" {
int p0, // padding
int d0); // dilation
+ // conv_1d with padding = half
+ // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
+ GGML_API struct ggml_tensor* ggml_conv_1d_ph(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ int s,
+ int d);
+
GGML_API struct ggml_tensor * ggml_conv_2d(
struct ggml_context * ctx,
struct ggml_tensor * a,
@@ -1259,14 +1309,38 @@ extern "C" {
int d0,
int d1);
- // conv_1d with padding = half
- // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
- GGML_API struct ggml_tensor * ggml_conv_1d_ph(
+
+ // kernel size is a->ne[0] x a->ne[1]
+ // stride is equal to kernel size
+ // padding is zero
+ // example:
+ // a: 16 16 3 768
+ // b: 1024 1024 3 1
+ // res: 64 64 768 1
+ // used in sam
+ GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b);
+
+ // kernel size is a->ne[0] x a->ne[1]
+ // stride is 1
+ // padding is half
+ // example:
+ // a: 3 3 256 256
+ // b: 64 64 256 1
+ // res: 64 64 256 1
+ // used in sam
+ GGML_API struct ggml_tensor * ggml_conv_2d_s1_ph(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b);
+
+ GGML_API struct ggml_tensor * ggml_conv_transpose_2d_p0(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
- int s,
- int d);
+ int stride);
enum ggml_op_pool {
GGML_OP_POOL_MAX,
@@ -1293,6 +1367,13 @@ extern "C" {
int p0,
int p1);
+ // nearest interpolate
+ // used in stable-diffusion
+ GGML_API struct ggml_tensor * ggml_upscale(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int scale_factor);
+
GGML_API struct ggml_tensor * ggml_flash_attn(
struct ggml_context * ctx,
struct ggml_tensor * q,
@@ -1346,6 +1427,27 @@ extern "C" {
struct ggml_tensor * a,
enum ggml_unary_op op);
+ // used in sam
+ GGML_API struct ggml_tensor * ggml_get_rel_pos(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int qh,
+ int kh);
+
+ // used in sam
+
+ GGML_API struct ggml_tensor * ggml_add_rel_pos(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * pw,
+ struct ggml_tensor * ph);
+
+ GGML_API struct ggml_tensor * ggml_add_rel_pos_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * pw,
+ struct ggml_tensor * ph);
+
// custom operators
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
@@ -1500,6 +1602,7 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * tensor);
+
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);