summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c183
1 files changed, 152 insertions, 31 deletions
diff --git a/ggml.c b/ggml.c
index 66658ff4..29e18a24 100644
--- a/ggml.c
+++ b/ggml.c
@@ -1395,7 +1395,7 @@ inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) {
inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
-inline static void ggml_vec_leaky_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.1f*x[i]; }
+inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
static const float GELU_COEF_A = 0.044715f;
static const float GELU_QUICK_COEF = -1.702f;
@@ -1623,7 +1623,9 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"POOL_1D",
"POOL_2D",
"UPSCALE",
+ "PAD",
"ARGSORT",
+ "LEAKY_RELU",
"FLASH_ATTN",
"FLASH_FF",
@@ -1650,7 +1652,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"CROSS_ENTROPY_LOSS_BACK",
};
-static_assert(GGML_OP_COUNT == 70, "GGML_OP_COUNT != 70");
+static_assert(GGML_OP_COUNT == 72, "GGML_OP_COUNT != 72");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
@@ -1707,7 +1709,9 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"pool_1d(x)",
"pool_2d(x)",
"upscale(x)",
+ "pad(x)",
"argsort(x)",
+ "leaky_relu(x)",
"flash_attn(x)",
"flash_ff(x)",
@@ -1734,7 +1738,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"cross_entropy_loss_back(x,y)",
};
-static_assert(GGML_OP_COUNT == 70, "GGML_OP_COUNT != 70");
+static_assert(GGML_OP_COUNT == 72, "GGML_OP_COUNT != 72");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
@@ -1750,10 +1754,9 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
"GELU",
"GELU_QUICK",
"SILU",
- "LEAKY",
};
-static_assert(GGML_UNARY_OP_COUNT == 11, "GGML_UNARY_OP_COUNT != 11");
+static_assert(GGML_UNARY_OP_COUNT == 10, "GGML_UNARY_OP_COUNT != 10");
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
@@ -3830,12 +3833,25 @@ struct ggml_tensor * ggml_relu_inplace(
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
}
-// ggml_leaky
+// ggml_leaky_relu
-struct ggml_tensor * ggml_leaky(
+struct ggml_tensor * ggml_leaky_relu(
struct ggml_context * ctx,
- struct ggml_tensor * a) {
- return ggml_unary(ctx, a, GGML_UNARY_OP_LEAKY);
+ struct ggml_tensor * a, float negative_slope, bool inplace) {
+ bool is_node = false;
+
+ if (!inplace && (a->grad)) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+ ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
+
+ result->op = GGML_OP_LEAKY_RELU;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = a;
+
+ return result;
}
// ggml_gelu
@@ -4022,8 +4038,9 @@ static struct ggml_tensor * ggml_group_norm_impl(
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
- result->op = GGML_OP_GROUP_NORM;
result->op_params[0] = n_groups;
+
+ result->op = GGML_OP_GROUP_NORM;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
result->src[1] = NULL; // TODO: maybe store epsilon here?
@@ -5523,6 +5540,30 @@ static struct ggml_tensor * ggml_upscale_impl(
return result;
}
+struct ggml_tensor * ggml_pad(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int p0, int p1, int p2, int p3) {
+ bool is_node = false;
+
+ if (a->grad) {
+ GGML_ASSERT(false); // TODO: implement backward
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
+ a->ne[0] + p0,
+ a->ne[1] + p1,
+ a->ne[2] + p2,
+ a->ne[3] + p3);
+
+ result->op = GGML_OP_PAD;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = a;
+
+ return result;
+}
+
struct ggml_tensor * ggml_upscale(
struct ggml_context * ctx,
struct ggml_tensor * a,
@@ -7718,8 +7759,10 @@ static void ggml_compute_forward_mul_f32(
const int ith = params->ith;
const int nth = params->nth;
+// TODO: OpenCL kernel support broadcast
#ifdef GGML_USE_CLBLAST
if (src1->backend == GGML_BACKEND_GPU) {
+ GGML_ASSERT(ggml_are_same_shape(src0, src1));
if (ith == 0) {
ggml_cl_mul(src0, src1, dst);
}
@@ -8985,10 +9028,9 @@ static void ggml_compute_forward_silu(
} break;
}
}
+// ggml_compute_forward_leaky_relu
-// ggml_compute_forward_leaky
-
-static void ggml_compute_forward_leaky_f32(
+static void ggml_compute_forward_leaky_relu_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
@@ -9002,24 +9044,27 @@ static void ggml_compute_forward_leaky_f32(
const int n = ggml_nrows(src0);
const int nc = src0->ne[0];
+ float negative_slope;
+ memcpy(&negative_slope, dst->op_params, sizeof(float));
+
assert(dst->nb[0] == sizeof(float));
assert(src0->nb[0] == sizeof(float));
for (int i = 0; i < n; i++) {
- ggml_vec_leaky_f32(nc,
+ ggml_vec_leaky_relu_f32(nc,
(float *) ((char *) dst->data + i*( dst->nb[1])),
- (float *) ((char *) src0->data + i*(src0->nb[1])));
+ (float *) ((char *) src0->data + i*(src0->nb[1])), negative_slope);
}
}
-static void ggml_compute_forward_leaky(
+static void ggml_compute_forward_leaky_relu(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_leaky_f32(params, src0, dst);
+ ggml_compute_forward_leaky_relu_f32(params, src0, dst);
} break;
default:
{
@@ -12158,6 +12203,7 @@ static void ggml_compute_forward_upscale_f32(
GGML_ASSERT(src0->nb[0] == sizeof(float));
const int ith = params->ith;
+ const int nth = params->nth;
GGML_TENSOR_UNARY_OP_LOCALS
@@ -12165,16 +12211,17 @@ static void ggml_compute_forward_upscale_f32(
// TODO: optimize
- for (int i03 = 0; i03 < ne03; i03++) {
- for (int i02 = ith; i02 < ne02; i02++) {
- for (int m = 0; m < dst->ne[1]; m++) {
- int i01 = m / scale_factor;
- for (int n = 0; n < dst->ne[0]; n++) {
- int i00 = n / scale_factor;
-
- const float * x = (float *)((char *) src0->data + i00 * nb00 +i01 * nb01 + i02 * nb02 + i03 * nb03);
+ for (int64_t i3 = 0; i3 < ne3; i3++) {
+ const int64_t i03 = i3;
+ for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
+ const int64_t i02 = i2;
+ for (int64_t i1 = 0; i1 < ne1; i1++) {
+ const int64_t i01 = i1 / scale_factor;
+ for (int64_t i0 = 0; i0 < ne0; i0++) {
+ const int64_t i00 = i0 / scale_factor;
- float * y = (float *)((char *) dst->data + n * dst->nb[0] + m * dst->nb[1] + i02 * dst->nb[2] + i03 * dst->nb[3]);
+ const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
+ float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
*y = *x;
}
@@ -12199,6 +12246,64 @@ static void ggml_compute_forward_upscale(
}
}
+// ggml_compute_forward_pad
+
+static void ggml_compute_forward_pad_f32(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ GGML_ASSERT(src0->nb[0] == sizeof(float));
+ GGML_ASSERT( dst->nb[0] == sizeof(float));
+
+ const int ith = params->ith;
+ const int nth = params->nth;
+
+ GGML_TENSOR_UNARY_OP_LOCALS
+
+ float * dst_ptr = (float *) dst->data;
+
+ // TODO: optimize
+
+ for (int64_t i2 = 0; i2 < ne2; ++i2) {
+ for (int64_t i1 = ith; i1 < ne1; i1 += nth) {
+ for (int64_t i0 = 0; i0 < ne0; ++i0) {
+ for (int64_t i3 = 0; i3 < ne3; ++i3) {
+ const int64_t dst_idx = i3*(ne0*ne1*ne2) + i2*(ne0*ne1) + i1*ne0 + i0;
+
+ const float * src_ptr = (const float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
+
+ if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
+ dst_ptr[dst_idx] = *src_ptr;
+ } else {
+ dst_ptr[dst_idx] = 0;
+ }
+ }
+ }
+ }
+ }
+}
+
+static void ggml_compute_forward_pad(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_pad_f32(params, src0, dst);
+ } break;
+ default:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
// ggml_compute_forward_argsort
static void ggml_compute_forward_argsort_f32(
@@ -13406,10 +13511,6 @@ static void ggml_compute_forward_unary(
{
ggml_compute_forward_silu(params, src0, dst);
} break;
- case GGML_UNARY_OP_LEAKY:
- {
- ggml_compute_forward_leaky(params, src0, dst);
- } break;
default:
{
GGML_ASSERT(false);
@@ -14191,10 +14292,18 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
{
ggml_compute_forward_upscale(params, tensor->src[0], tensor);
} break;
+ case GGML_OP_PAD:
+ {
+ ggml_compute_forward_pad(params, tensor->src[0], tensor);
+ } break;
case GGML_OP_ARGSORT:
{
ggml_compute_forward_argsort(params, tensor->src[0], tensor);
} break;
+ case GGML_OP_LEAKY_RELU:
+ {
+ ggml_compute_forward_leaky_relu(params, tensor->src[0], tensor);
+ } break;
case GGML_OP_FLASH_ATTN:
{
const int32_t t = ggml_get_op_params_i32(tensor, 0);
@@ -15187,10 +15296,18 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
{
GGML_ASSERT(false); // TODO: not implemented
} break;
+ case GGML_OP_PAD:
+ {
+ GGML_ASSERT(false); // TODO: not implemented
+ } break;
case GGML_OP_ARGSORT:
{
GGML_ASSERT(false); // TODO: not implemented
} break;
+ case GGML_OP_LEAKY_RELU:
+ {
+ GGML_ASSERT(false); // TODO: not implemented
+ } break;
case GGML_OP_FLASH_ATTN:
{
struct ggml_tensor * flash_grad = NULL;
@@ -15796,6 +15913,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
case GGML_OP_ARGMAX:
case GGML_OP_REPEAT:
case GGML_OP_REPEAT_BACK:
+ case GGML_OP_LEAKY_RELU:
{
n_tasks = 1;
} break;
@@ -15808,7 +15926,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
case GGML_UNARY_OP_TANH:
case GGML_UNARY_OP_ELU:
case GGML_UNARY_OP_RELU:
- case GGML_UNARY_OP_LEAKY:
{
n_tasks = 1;
} break;
@@ -15927,6 +16044,10 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
{
n_tasks = n_threads;
} break;
+ case GGML_OP_PAD:
+ {
+ n_tasks = n_threads;
+ } break;
case GGML_OP_ARGSORT:
{
n_tasks = n_threads;