summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c1150
1 files changed, 710 insertions, 440 deletions
diff --git a/ggml.c b/ggml.c
index 91adbb0a..5b9fa741 100644
--- a/ggml.c
+++ b/ggml.c
@@ -5644,7 +5644,9 @@ struct ggml_tensor * ggml_conv_2d(
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
- result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], a->ne[3], im2col->ne[3]); // [N, OC, OH, OW]
+ result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
+ result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
+
return result;
}
@@ -6650,8 +6652,10 @@ void ggml_set_param(
static void ggml_compute_forward_dup_same_cont(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
GGML_ASSERT(src0->type == dst->type);
@@ -6682,8 +6686,10 @@ static void ggml_compute_forward_dup_same_cont(
}
static void ggml_compute_forward_dup_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -6696,7 +6702,7 @@ static void ggml_compute_forward_dup_f16(
const int nth = params->nth; // number of threads
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
- ggml_compute_forward_dup_same_cont(params, src0, dst);
+ ggml_compute_forward_dup_same_cont(params, dst);
return;
}
@@ -6953,8 +6959,10 @@ static void ggml_compute_forward_dup_f16(
static void ggml_compute_forward_dup_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -6967,7 +6975,7 @@ static void ggml_compute_forward_dup_f32(
const int nth = params->nth; // number of threads
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
- ggml_compute_forward_dup_same_cont(params, src0, dst);
+ ggml_compute_forward_dup_same_cont(params, dst);
return;
}
@@ -7203,8 +7211,10 @@ static void ggml_compute_forward_dup_f32(
// A simplified version of ggml_compute_forward_dup that doesn't do float upcasting, and just plain old memcpy.
static void ggml_compute_forward_dup_bytes(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
GGML_ASSERT(src0->type == dst->type);
@@ -7213,7 +7223,7 @@ static void ggml_compute_forward_dup_bytes(
}
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
- ggml_compute_forward_dup_same_cont(params, src0, dst);
+ ggml_compute_forward_dup_same_cont(params, dst);
return;
}
@@ -7352,21 +7362,23 @@ static void ggml_compute_forward_dup_bytes(
static void ggml_compute_forward_dup(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
if (src0->type == dst->type) {
- ggml_compute_forward_dup_bytes(params, src0, dst);
+ ggml_compute_forward_dup_bytes(params, dst);
return;
}
switch (src0->type) {
case GGML_TYPE_F16:
{
- ggml_compute_forward_dup_f16(params, src0, dst);
+ ggml_compute_forward_dup_f16(params, dst);
} break;
case GGML_TYPE_F32:
{
- ggml_compute_forward_dup_f32(params, src0, dst);
+ ggml_compute_forward_dup_f32(params, dst);
} break;
default:
{
@@ -7379,9 +7391,11 @@ static void ggml_compute_forward_dup(
static void ggml_compute_forward_add_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -7467,9 +7481,11 @@ static void ggml_compute_forward_add_f32(
static void ggml_compute_forward_add_f16_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -7544,9 +7560,11 @@ static void ggml_compute_forward_add_f16_f32(
static void ggml_compute_forward_add_f16_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -7598,9 +7616,11 @@ static void ggml_compute_forward_add_f16_f16(
static void ggml_compute_forward_add_q_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -7676,14 +7696,16 @@ static void ggml_compute_forward_add_q_f32(
static void ggml_compute_forward_add(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
if (src1->type == GGML_TYPE_F32) {
- ggml_compute_forward_add_f32(params, src0, src1, dst);
+ ggml_compute_forward_add_f32(params, dst);
}
else {
GGML_ASSERT(false);
@@ -7692,10 +7714,10 @@ static void ggml_compute_forward_add(
case GGML_TYPE_F16:
{
if (src1->type == GGML_TYPE_F16) {
- ggml_compute_forward_add_f16_f16(params, src0, src1, dst);
+ ggml_compute_forward_add_f16_f16(params, dst);
}
else if (src1->type == GGML_TYPE_F32) {
- ggml_compute_forward_add_f16_f32(params, src0, src1, dst);
+ ggml_compute_forward_add_f16_f32(params, dst);
}
else {
GGML_ASSERT(false);
@@ -7717,7 +7739,7 @@ static void ggml_compute_forward_add(
case GGML_TYPE_IQ1_S:
case GGML_TYPE_IQ4_NL:
{
- ggml_compute_forward_add_q_f32(params, src0, src1, dst);
+ ggml_compute_forward_add_q_f32(params, dst);
} break;
default:
{
@@ -7730,9 +7752,11 @@ static void ggml_compute_forward_add(
static void ggml_compute_forward_add1_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_is_scalar(src1));
@@ -7782,9 +7806,11 @@ static void ggml_compute_forward_add1_f32(
static void ggml_compute_forward_add1_f16_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_is_scalar(src1));
@@ -7832,9 +7858,11 @@ static void ggml_compute_forward_add1_f16_f32(
static void ggml_compute_forward_add1_f16_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_is_scalar(src1));
@@ -7882,9 +7910,11 @@ static void ggml_compute_forward_add1_f16_f16(
static void ggml_compute_forward_add1_q_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_is_scalar(src1));
@@ -7949,21 +7979,23 @@ static void ggml_compute_forward_add1_q_f32(
static void ggml_compute_forward_add1(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_add1_f32(params, src0, src1, dst);
+ ggml_compute_forward_add1_f32(params, dst);
} break;
case GGML_TYPE_F16:
{
if (src1->type == GGML_TYPE_F16) {
- ggml_compute_forward_add1_f16_f16(params, src0, src1, dst);
+ ggml_compute_forward_add1_f16_f16(params, dst);
}
else if (src1->type == GGML_TYPE_F32) {
- ggml_compute_forward_add1_f16_f32(params, src0, src1, dst);
+ ggml_compute_forward_add1_f16_f32(params, dst);
}
else {
GGML_ASSERT(false);
@@ -7986,7 +8018,7 @@ static void ggml_compute_forward_add1(
case GGML_TYPE_IQ1_S:
case GGML_TYPE_IQ4_NL:
{
- ggml_compute_forward_add1_q_f32(params, src0, src1, dst);
+ ggml_compute_forward_add1_q_f32(params, dst);
} break;
default:
{
@@ -7999,9 +8031,11 @@ static void ggml_compute_forward_add1(
static void ggml_compute_forward_acc_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
@@ -8081,14 +8115,14 @@ static void ggml_compute_forward_acc_f32(
static void ggml_compute_forward_acc(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_acc_f32(params, src0, src1, dst);
+ ggml_compute_forward_acc_f32(params, dst);
} break;
case GGML_TYPE_F16:
case GGML_TYPE_Q4_0:
@@ -8118,9 +8152,11 @@ static void ggml_compute_forward_acc(
static void ggml_compute_forward_sub_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
@@ -8178,13 +8214,14 @@ static void ggml_compute_forward_sub_f32(
static void ggml_compute_forward_sub(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_sub_f32(params, src0, src1, dst);
+ ggml_compute_forward_sub_f32(params, dst);
} break;
default:
{
@@ -8197,9 +8234,11 @@ static void ggml_compute_forward_sub(
static void ggml_compute_forward_mul_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -8280,15 +8319,17 @@ static void ggml_compute_forward_mul_f32(
static void ggml_compute_forward_mul(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(src1->type == GGML_TYPE_F32 && "only f32 src1 supported for now");
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_mul_f32(params, src0, src1, dst);
+ ggml_compute_forward_mul_f32(params, dst);
} break;
default:
{
@@ -8301,9 +8342,11 @@ static void ggml_compute_forward_mul(
static void ggml_compute_forward_div_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -8374,13 +8417,14 @@ static void ggml_compute_forward_div_f32(
static void ggml_compute_forward_div(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_div_f32(params, src0, src1, dst);
+ ggml_compute_forward_div_f32(params, dst);
} break;
default:
{
@@ -8393,8 +8437,10 @@ static void ggml_compute_forward_div(
static void ggml_compute_forward_sqr_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -8417,12 +8463,14 @@ static void ggml_compute_forward_sqr_f32(
static void ggml_compute_forward_sqr(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_sqr_f32(params, src0, dst);
+ ggml_compute_forward_sqr_f32(params, dst);
} break;
default:
{
@@ -8435,8 +8483,10 @@ static void ggml_compute_forward_sqr(
static void ggml_compute_forward_sqrt_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -8459,12 +8509,14 @@ static void ggml_compute_forward_sqrt_f32(
static void ggml_compute_forward_sqrt(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_sqrt_f32(params, src0, dst);
+ ggml_compute_forward_sqrt_f32(params, dst);
} break;
default:
{
@@ -8477,8 +8529,10 @@ static void ggml_compute_forward_sqrt(
static void ggml_compute_forward_log_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_are_same_shape(src0, dst));
@@ -8501,12 +8555,14 @@ static void ggml_compute_forward_log_f32(
static void ggml_compute_forward_log(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_log_f32(params, src0, dst);
+ ggml_compute_forward_log_f32(params, dst);
} break;
default:
{
@@ -8519,8 +8575,10 @@ static void ggml_compute_forward_log(
static void ggml_compute_forward_sum_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_is_scalar(dst));
@@ -8552,8 +8610,10 @@ static void ggml_compute_forward_sum_f32(
static void ggml_compute_forward_sum_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_is_scalar(dst));
@@ -8584,16 +8644,18 @@ static void ggml_compute_forward_sum_f16(
static void ggml_compute_forward_sum(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_sum_f32(params, src0, dst);
+ ggml_compute_forward_sum_f32(params, dst);
} break;
case GGML_TYPE_F16:
{
- ggml_compute_forward_sum_f16(params, src0, dst);
+ ggml_compute_forward_sum_f16(params, dst);
} break;
default:
{
@@ -8606,8 +8668,10 @@ static void ggml_compute_forward_sum(
static void ggml_compute_forward_sum_rows_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -8639,12 +8703,14 @@ static void ggml_compute_forward_sum_rows_f32(
static void ggml_compute_forward_sum_rows(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_sum_rows_f32(params, src0, dst);
+ ggml_compute_forward_sum_rows_f32(params, dst);
} break;
default:
{
@@ -8657,8 +8723,10 @@ static void ggml_compute_forward_sum_rows(
static void ggml_compute_forward_mean_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -8694,12 +8762,14 @@ static void ggml_compute_forward_mean_f32(
static void ggml_compute_forward_mean(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_mean_f32(params, src0, dst);
+ ggml_compute_forward_mean_f32(params, dst);
} break;
default:
{
@@ -8712,8 +8782,10 @@ static void ggml_compute_forward_mean(
static void ggml_compute_forward_argmax_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -8740,12 +8812,14 @@ static void ggml_compute_forward_argmax_f32(
static void ggml_compute_forward_argmax(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_argmax_f32(params, src0, dst);
+ ggml_compute_forward_argmax_f32(params, dst);
} break;
default:
{
@@ -8758,8 +8832,10 @@ static void ggml_compute_forward_argmax(
static void ggml_compute_forward_repeat_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_can_repeat(src0, dst));
@@ -8801,8 +8877,10 @@ static void ggml_compute_forward_repeat_f32(
static void ggml_compute_forward_repeat_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_can_repeat(src0, dst));
@@ -8847,18 +8925,20 @@ static void ggml_compute_forward_repeat_f16(
static void ggml_compute_forward_repeat(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F16:
case GGML_TYPE_I16:
{
- ggml_compute_forward_repeat_f16(params, src0, dst);
+ ggml_compute_forward_repeat_f16(params, dst);
} break;
case GGML_TYPE_F32:
case GGML_TYPE_I32:
{
- ggml_compute_forward_repeat_f32(params, src0, dst);
+ ggml_compute_forward_repeat_f32(params, dst);
} break;
default:
{
@@ -8871,8 +8951,10 @@ static void ggml_compute_forward_repeat(
static void ggml_compute_forward_repeat_back_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_can_repeat(dst, src0));
@@ -8928,12 +9010,14 @@ static void ggml_compute_forward_repeat_back_f32(
static void ggml_compute_forward_repeat_back(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_repeat_back_f32(params, src0, dst);
+ ggml_compute_forward_repeat_back_f32(params, dst);
} break;
default:
{
@@ -8946,10 +9030,11 @@ static void ggml_compute_forward_repeat_back(
static void ggml_compute_forward_concat_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -8994,14 +9079,15 @@ static void ggml_compute_forward_concat_f32(
static void ggml_compute_forward_concat(
const struct ggml_compute_params* params,
- const struct ggml_tensor* src0,
- const struct ggml_tensor* src1,
struct ggml_tensor* dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
case GGML_TYPE_I32:
{
- ggml_compute_forward_concat_f32(params, src0, src1, dst);
+ ggml_compute_forward_concat_f32(params, dst);
} break;
default:
{
@@ -9014,8 +9100,10 @@ static void ggml_compute_forward_concat(
static void ggml_compute_forward_abs_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -9038,12 +9126,14 @@ static void ggml_compute_forward_abs_f32(
static void ggml_compute_forward_abs(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_abs_f32(params, src0, dst);
+ ggml_compute_forward_abs_f32(params, dst);
} break;
default:
{
@@ -9056,8 +9146,10 @@ static void ggml_compute_forward_abs(
static void ggml_compute_forward_sgn_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -9080,12 +9172,14 @@ static void ggml_compute_forward_sgn_f32(
static void ggml_compute_forward_sgn(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_sgn_f32(params, src0, dst);
+ ggml_compute_forward_sgn_f32(params, dst);
} break;
default:
{
@@ -9098,8 +9192,10 @@ static void ggml_compute_forward_sgn(
static void ggml_compute_forward_neg_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -9122,12 +9218,14 @@ static void ggml_compute_forward_neg_f32(
static void ggml_compute_forward_neg(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_neg_f32(params, src0, dst);
+ ggml_compute_forward_neg_f32(params, dst);
} break;
default:
{
@@ -9140,8 +9238,10 @@ static void ggml_compute_forward_neg(
static void ggml_compute_forward_step_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -9164,12 +9264,14 @@ static void ggml_compute_forward_step_f32(
static void ggml_compute_forward_step(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_step_f32(params, src0, dst);
+ ggml_compute_forward_step_f32(params, dst);
} break;
default:
{
@@ -9182,8 +9284,10 @@ static void ggml_compute_forward_step(
static void ggml_compute_forward_tanh_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -9206,12 +9310,14 @@ static void ggml_compute_forward_tanh_f32(
static void ggml_compute_forward_tanh(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_tanh_f32(params, src0, dst);
+ ggml_compute_forward_tanh_f32(params, dst);
} break;
default:
{
@@ -9224,8 +9330,10 @@ static void ggml_compute_forward_tanh(
static void ggml_compute_forward_elu_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -9248,12 +9356,14 @@ static void ggml_compute_forward_elu_f32(
static void ggml_compute_forward_elu(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_elu_f32(params, src0, dst);
+ ggml_compute_forward_elu_f32(params, dst);
} break;
default:
{
@@ -9266,8 +9376,10 @@ static void ggml_compute_forward_elu(
static void ggml_compute_forward_relu_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -9290,12 +9402,14 @@ static void ggml_compute_forward_relu_f32(
static void ggml_compute_forward_relu(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_relu_f32(params, src0, dst);
+ ggml_compute_forward_relu_f32(params, dst);
} break;
default:
{
@@ -9308,8 +9422,10 @@ static void ggml_compute_forward_relu(
static void ggml_compute_forward_gelu_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
GGML_ASSERT(ggml_are_same_shape(src0, dst));
@@ -9349,12 +9465,14 @@ static void ggml_compute_forward_gelu_f32(
static void ggml_compute_forward_gelu(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_gelu_f32(params, src0, dst);
+ ggml_compute_forward_gelu_f32(params, dst);
} break;
default:
{
@@ -9367,8 +9485,10 @@ static void ggml_compute_forward_gelu(
static void ggml_compute_forward_gelu_quick_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
GGML_ASSERT(ggml_are_same_shape(src0, dst));
@@ -9408,12 +9528,14 @@ static void ggml_compute_forward_gelu_quick_f32(
static void ggml_compute_forward_gelu_quick(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_gelu_quick_f32(params, src0, dst);
+ ggml_compute_forward_gelu_quick_f32(params, dst);
} break;
default:
{
@@ -9426,8 +9548,10 @@ static void ggml_compute_forward_gelu_quick(
static void ggml_compute_forward_silu_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
GGML_ASSERT(ggml_are_same_shape(src0, dst));
@@ -9467,12 +9591,14 @@ static void ggml_compute_forward_silu_f32(
static void ggml_compute_forward_silu(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_silu_f32(params, src0, dst);
+ ggml_compute_forward_silu_f32(params, dst);
} break;
default:
{
@@ -9484,8 +9610,10 @@ static void ggml_compute_forward_silu(
static void ggml_compute_forward_leaky_relu_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -9511,12 +9639,14 @@ static void ggml_compute_forward_leaky_relu_f32(
static void ggml_compute_forward_leaky_relu(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_leaky_relu_f32(params, src0, dst);
+ ggml_compute_forward_leaky_relu_f32(params, dst);
} break;
default:
{
@@ -9529,9 +9659,11 @@ static void ggml_compute_forward_leaky_relu(
static void ggml_compute_forward_silu_back_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * grad,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * grad = dst->src[1];
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
@@ -9574,13 +9706,14 @@ static void ggml_compute_forward_silu_back_f32(
static void ggml_compute_forward_silu_back(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * grad,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_silu_back_f32(params, src0, grad, dst);
+ ggml_compute_forward_silu_back_f32(params, dst);
} break;
default:
{
@@ -9592,8 +9725,10 @@ static void ggml_compute_forward_silu_back(
static void ggml_compute_forward_hardswish_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -9615,12 +9750,14 @@ static void ggml_compute_forward_hardswish_f32(
}
static void ggml_compute_forward_hardswish(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_hardswish_f32(params, src0, dst);
+ ggml_compute_forward_hardswish_f32(params, dst);
} break;
default:
{
@@ -9631,8 +9768,10 @@ static void ggml_compute_forward_hardswish(
static void ggml_compute_forward_hardsigmoid_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, dst));
@@ -9655,12 +9794,14 @@ static void ggml_compute_forward_hardsigmoid_f32(
static void ggml_compute_forward_hardsigmoid(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_hardsigmoid_f32(params, src0, dst);
+ ggml_compute_forward_hardsigmoid_f32(params, dst);
} break;
default:
{
@@ -9674,8 +9815,10 @@ static void ggml_compute_forward_hardsigmoid(
static void ggml_compute_forward_norm_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -9727,12 +9870,14 @@ static void ggml_compute_forward_norm_f32(
static void ggml_compute_forward_norm(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_norm_f32(params, src0, dst);
+ ggml_compute_forward_norm_f32(params, dst);
} break;
default:
{
@@ -9745,8 +9890,10 @@ static void ggml_compute_forward_norm(
static void ggml_compute_forward_rms_norm_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -9795,12 +9942,14 @@ static void ggml_compute_forward_rms_norm_f32(
static void ggml_compute_forward_rms_norm(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_rms_norm_f32(params, src0, dst);
+ ggml_compute_forward_rms_norm_f32(params, dst);
} break;
default:
{
@@ -9811,9 +9960,11 @@ static void ggml_compute_forward_rms_norm(
static void ggml_compute_forward_rms_norm_back_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -9968,13 +10119,14 @@ static void ggml_compute_forward_rms_norm_back_f32(
static void ggml_compute_forward_rms_norm_back(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_rms_norm_back_f32(params, src0, src1, dst);
+ ggml_compute_forward_rms_norm_back_f32(params, dst);
} break;
default:
{
@@ -9987,8 +10139,10 @@ static void ggml_compute_forward_rms_norm_back(
static void ggml_compute_forward_group_norm_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -10059,12 +10213,14 @@ static void ggml_compute_forward_group_norm_f32(
static void ggml_compute_forward_group_norm(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_group_norm_f32(params, src0, dst);
+ ggml_compute_forward_group_norm_f32(params, dst);
} break;
default:
{
@@ -10110,9 +10266,11 @@ static bool ggml_compute_forward_mul_mat_use_blas(struct ggml_tensor * dst) {
static void ggml_compute_forward_mul_mat(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
@@ -10357,10 +10515,11 @@ static void ggml_compute_forward_mul_mat(
static void ggml_compute_forward_mul_mat_id(
const struct ggml_compute_params * params,
- const struct ggml_tensor * ids,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+ const struct ggml_tensor * ids = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
const struct ggml_tensor * src0 = dst->src[2]; // only for GGML_TENSOR_BINARY_OP_LOCALS
GGML_TENSOR_BINARY_OP_LOCALS
@@ -10551,9 +10710,11 @@ static void ggml_compute_forward_mul_mat_id(
static void ggml_compute_forward_out_prod_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
// int64_t t0 = ggml_perf_time_us();
// UNUSED(t0);
@@ -10743,9 +10904,11 @@ static void ggml_compute_forward_out_prod_f32(
static void ggml_compute_forward_out_prod_q_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
// int64_t t0 = ggml_perf_time_us();
// UNUSED(t0);
@@ -10856,9 +11019,10 @@ static void ggml_compute_forward_out_prod_q_f32(
static void ggml_compute_forward_out_prod(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1:
@@ -10876,16 +11040,16 @@ static void ggml_compute_forward_out_prod(
case GGML_TYPE_IQ1_S:
case GGML_TYPE_IQ4_NL:
{
- ggml_compute_forward_out_prod_q_f32(params, src0, src1, dst);
+ ggml_compute_forward_out_prod_q_f32(params, dst);
} break;
case GGML_TYPE_F16:
{
GGML_ASSERT(false); // todo
- // ggml_compute_forward_out_prod_f16_f32(params, src0, src1, dst);
+ // ggml_compute_forward_out_prod_f16_f32(params, dst);
} break;
case GGML_TYPE_F32:
{
- ggml_compute_forward_out_prod_f32(params, src0, src1, dst);
+ ggml_compute_forward_out_prod_f32(params, dst);
} break;
default:
{
@@ -10898,8 +11062,10 @@ static void ggml_compute_forward_out_prod(
static void ggml_compute_forward_scale_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_is_contiguous(src0));
GGML_ASSERT(ggml_is_contiguous(dst));
GGML_ASSERT(ggml_are_same_shape(src0, dst));
@@ -10940,12 +11106,14 @@ static void ggml_compute_forward_scale_f32(
static void ggml_compute_forward_scale(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_scale_f32(params, src0, dst);
+ ggml_compute_forward_scale_f32(params, dst);
} break;
default:
{
@@ -10958,9 +11126,11 @@ static void ggml_compute_forward_scale(
static void ggml_compute_forward_set_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
@@ -11031,14 +11201,14 @@ static void ggml_compute_forward_set_f32(
static void ggml_compute_forward_set(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_set_f32(params, src0, src1, dst);
+ ggml_compute_forward_set_f32(params, dst);
} break;
case GGML_TYPE_F16:
case GGML_TYPE_Q4_0:
@@ -11068,29 +11238,25 @@ static void ggml_compute_forward_set(
static void ggml_compute_forward_cpy(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
- ggml_compute_forward_dup(params, src0, dst);
+ ggml_compute_forward_dup(params, dst);
}
// ggml_compute_forward_cont
static void ggml_compute_forward_cont(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
- ggml_compute_forward_dup(params, src0, dst);
+ ggml_compute_forward_dup(params, dst);
}
// ggml_compute_forward_reshape
static void ggml_compute_forward_reshape(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
// NOP
UNUSED(params);
- UNUSED(src0);
UNUSED(dst);
}
@@ -11098,39 +11264,41 @@ static void ggml_compute_forward_reshape(
static void ggml_compute_forward_view(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0) {
+ const struct ggml_tensor * dst) {
// NOP
UNUSED(params);
- UNUSED(src0);
+ UNUSED(dst);
}
// ggml_compute_forward_permute
static void ggml_compute_forward_permute(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0) {
+ const struct ggml_tensor * dst) {
// NOP
UNUSED(params);
- UNUSED(src0);
+ UNUSED(dst);
}
// ggml_compute_forward_transpose
static void ggml_compute_forward_transpose(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0) {
+ const struct ggml_tensor * dst) {
// NOP
UNUSED(params);
- UNUSED(src0);
+ UNUSED(dst);
}
// ggml_compute_forward_get_rows
static void ggml_compute_forward_get_rows_q(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11166,9 +11334,11 @@ static void ggml_compute_forward_get_rows_q(
static void ggml_compute_forward_get_rows_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11201,9 +11371,11 @@ static void ggml_compute_forward_get_rows_f16(
static void ggml_compute_forward_get_rows_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11236,9 +11408,10 @@ static void ggml_compute_forward_get_rows_f32(
static void ggml_compute_forward_get_rows(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1:
@@ -11257,16 +11430,16 @@ static void ggml_compute_forward_get_rows(
case GGML_TYPE_IQ1_S:
case GGML_TYPE_IQ4_NL:
{
- ggml_compute_forward_get_rows_q(params, src0, src1, dst);
+ ggml_compute_forward_get_rows_q(params, dst);
} break;
case GGML_TYPE_F16:
{
- ggml_compute_forward_get_rows_f16(params, src0, src1, dst);
+ ggml_compute_forward_get_rows_f16(params, dst);
} break;
case GGML_TYPE_F32:
case GGML_TYPE_I32:
{
- ggml_compute_forward_get_rows_f32(params, src0, src1, dst);
+ ggml_compute_forward_get_rows_f32(params, dst);
} break;
default:
{
@@ -11297,9 +11470,11 @@ static void ggml_compute_forward_get_rows(
static void ggml_compute_forward_get_rows_back_f32_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_is_contiguous(dst));
@@ -11334,9 +11509,11 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
static void ggml_compute_forward_get_rows_back_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(params->ith == 0);
GGML_ASSERT(ggml_is_contiguous(dst));
@@ -11371,17 +11548,18 @@ static void ggml_compute_forward_get_rows_back_f32(
static void ggml_compute_forward_get_rows_back(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F16:
{
- ggml_compute_forward_get_rows_back_f32_f16(params, src0, src1, dst);
+ ggml_compute_forward_get_rows_back_f32_f16(params, dst);
} break;
case GGML_TYPE_F32:
{
- ggml_compute_forward_get_rows_back_f32(params, src0, src1, dst);
+ ggml_compute_forward_get_rows_back_f32(params, dst);
} break;
default:
{
@@ -11412,8 +11590,10 @@ static void ggml_compute_forward_get_rows_back(
static void ggml_compute_forward_diag_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11452,12 +11632,14 @@ static void ggml_compute_forward_diag_f32(
static void ggml_compute_forward_diag(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_diag_f32(params, src0, dst);
+ ggml_compute_forward_diag_f32(params, dst);
} break;
default:
{
@@ -11470,10 +11652,11 @@ static void ggml_compute_forward_diag(
static void ggml_compute_forward_diag_mask_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst,
const float value) {
+ const struct ggml_tensor * src0 = dst->src[0];
+
const int ith = params->ith;
const int nth = params->nth;
@@ -11523,12 +11706,14 @@ static void ggml_compute_forward_diag_mask_f32(
static void ggml_compute_forward_diag_mask_inf(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_diag_mask_f32(params, src0, dst, -INFINITY);
+ ggml_compute_forward_diag_mask_f32(params, dst, -INFINITY);
} break;
default:
{
@@ -11539,12 +11724,14 @@ static void ggml_compute_forward_diag_mask_inf(
static void ggml_compute_forward_diag_mask_zero(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_diag_mask_f32(params, src0, dst, 0);
+ ggml_compute_forward_diag_mask_f32(params, dst, 0);
} break;
default:
{
@@ -11557,10 +11744,12 @@ static void ggml_compute_forward_diag_mask_zero(
static void ggml_compute_forward_soft_max_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
- const struct ggml_tensor * src2,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+ const struct ggml_tensor * src2 = dst->src[2];
+
assert(ggml_is_contiguous(dst));
assert(ggml_are_same_shape(src0, dst));
@@ -11671,14 +11860,14 @@ static void ggml_compute_forward_soft_max_f32(
static void ggml_compute_forward_soft_max(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
- const struct ggml_tensor * src2,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_soft_max_f32(params, src0, src1, src2, dst);
+ ggml_compute_forward_soft_max_f32(params, dst);
} break;
default:
{
@@ -11691,9 +11880,11 @@ static void ggml_compute_forward_soft_max(
static void ggml_compute_forward_soft_max_back_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_is_contiguous(src0));
GGML_ASSERT(ggml_is_contiguous(src1));
GGML_ASSERT(ggml_is_contiguous(dst));
@@ -11768,13 +11959,14 @@ static void ggml_compute_forward_soft_max_back_f32(
static void ggml_compute_forward_soft_max_back(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_soft_max_back_f32(params, src0, src1, dst);
+ ggml_compute_forward_soft_max_back_f32(params, dst);
} break;
default:
{
@@ -11787,8 +11979,10 @@ static void ggml_compute_forward_soft_max_back(
static void ggml_compute_forward_alibi_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11844,8 +12038,10 @@ static void ggml_compute_forward_alibi_f32(
static void ggml_compute_forward_alibi_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11904,16 +12100,18 @@ static void ggml_compute_forward_alibi_f16(
static void ggml_compute_forward_alibi(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F16:
{
- ggml_compute_forward_alibi_f16(params, src0, dst);
+ ggml_compute_forward_alibi_f16(params, dst);
} break;
case GGML_TYPE_F32:
{
- ggml_compute_forward_alibi_f32(params, src0, dst);
+ ggml_compute_forward_alibi_f32(params, dst);
} break;
case GGML_TYPE_Q4_0:
case GGML_TYPE_Q4_1:
@@ -11946,8 +12144,10 @@ static void ggml_compute_forward_alibi(
static void ggml_compute_forward_clamp_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -11986,12 +12186,14 @@ static void ggml_compute_forward_clamp_f32(
static void ggml_compute_forward_clamp(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_clamp_f32(params, src0, dst);
+ ggml_compute_forward_clamp_f32(params, dst);
} break;
case GGML_TYPE_F16:
case GGML_TYPE_Q4_0:
@@ -12081,10 +12283,12 @@ GGML_CALL void ggml_rope_yarn_corr_dims(
static void ggml_compute_forward_rope_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst,
const bool forward) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -12257,10 +12461,12 @@ static void ggml_compute_forward_rope_f32(
static void ggml_compute_forward_rope_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst,
const bool forward) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -12422,17 +12628,18 @@ static void ggml_compute_forward_rope_f16(
static void ggml_compute_forward_rope(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F16:
{
- ggml_compute_forward_rope_f16(params, src0, src1, dst, true);
+ ggml_compute_forward_rope_f16(params, dst, true);
} break;
case GGML_TYPE_F32:
{
- ggml_compute_forward_rope_f32(params, src0, src1, dst, true);
+ ggml_compute_forward_rope_f32(params, dst, true);
} break;
default:
{
@@ -12445,17 +12652,18 @@ static void ggml_compute_forward_rope(
static void ggml_compute_forward_rope_back(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F16:
{
- ggml_compute_forward_rope_f16(params, src0, src1, dst, false);
+ ggml_compute_forward_rope_f16(params, dst, false);
} break;
case GGML_TYPE_F32:
{
- ggml_compute_forward_rope_f32(params, src0, src1, dst, false);
+ ggml_compute_forward_rope_f32(params, dst, false);
} break;
default:
{
@@ -12468,9 +12676,11 @@ static void ggml_compute_forward_rope_back(
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -12565,9 +12775,11 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
static void ggml_compute_forward_conv_transpose_1d_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -12662,17 +12874,18 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
static void ggml_compute_forward_conv_transpose_1d(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F16:
{
- ggml_compute_forward_conv_transpose_1d_f16_f32(params, src0, src1, dst);
+ ggml_compute_forward_conv_transpose_1d_f16_f32(params, dst);
} break;
case GGML_TYPE_F32:
{
- ggml_compute_forward_conv_transpose_1d_f32(params, src0, src1, dst);
+ ggml_compute_forward_conv_transpose_1d_f32(params, dst);
} break;
default:
{
@@ -12686,9 +12899,11 @@ static void ggml_compute_forward_conv_transpose_1d(
// dst: result [N, OH, OW, IC*KH*KW]
static void ggml_compute_forward_im2col_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -12772,9 +12987,11 @@ static void ggml_compute_forward_im2col_f32(
// dst: result [N, OH, OW, IC*KH*KW]
static void ggml_compute_forward_im2col_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F16);
@@ -12854,17 +13071,15 @@ static void ggml_compute_forward_im2col_f16(
static void ggml_compute_forward_im2col(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
switch (dst->type) {
case GGML_TYPE_F16:
{
- ggml_compute_forward_im2col_f16(params, src0, src1, dst);
+ ggml_compute_forward_im2col_f16(params, dst);
} break;
case GGML_TYPE_F32:
{
- ggml_compute_forward_im2col_f32(params, src0, src1, dst);
+ ggml_compute_forward_im2col_f32(params, dst);
} break;
default:
{
@@ -12878,9 +13093,11 @@ static void ggml_compute_forward_im2col(
static void ggml_compute_forward_conv_transpose_2d(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);
@@ -12984,9 +13201,11 @@ static void ggml_compute_forward_conv_transpose_2d(
static void ggml_compute_forward_pool_1d_sk_p0(
const struct ggml_compute_params * params,
const enum ggml_op_pool op,
- const struct ggml_tensor * src,
const int k,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src = dst->src[0];
+
assert(src->type == GGML_TYPE_F32);
assert(params->ith == 0);
@@ -13035,7 +13254,6 @@ static void ggml_compute_forward_pool_1d_sk_p0(
static void ggml_compute_forward_pool_1d(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
const int32_t * opts = (const int32_t *)dst->op_params;
@@ -13046,15 +13264,17 @@ static void ggml_compute_forward_pool_1d(
GGML_ASSERT(p0 == 0); // padding not supported
GGML_ASSERT(k0 == s0); // only s = k supported
- ggml_compute_forward_pool_1d_sk_p0(params, op, src0, k0, dst);
+ ggml_compute_forward_pool_1d_sk_p0(params, op, k0, dst);
}
// ggml_compute_forward_pool_2d
static void ggml_compute_forward_pool_2d(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src = dst->src[0];
+
GGML_ASSERT(src->type == GGML_TYPE_F32);
GGML_ASSERT(params->ith == 0);
@@ -13127,9 +13347,10 @@ static void ggml_compute_forward_pool_2d(
static void ggml_compute_forward_upscale_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+ const struct ggml_tensor * src0 = dst->src[0];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -13166,12 +13387,14 @@ static void ggml_compute_forward_upscale_f32(
static void ggml_compute_forward_upscale(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_upscale_f32(params, src0, dst);
+ ggml_compute_forward_upscale_f32(params, dst);
} break;
default:
{
@@ -13184,9 +13407,10 @@ static void ggml_compute_forward_upscale(
static void ggml_compute_forward_pad_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+ const struct ggml_tensor * src0 = dst->src[0];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -13224,12 +13448,14 @@ static void ggml_compute_forward_pad_f32(
static void ggml_compute_forward_pad(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_pad_f32(params, src0, dst);
+ ggml_compute_forward_pad_f32(params, dst);
} break;
default:
{
@@ -13242,9 +13468,10 @@ static void ggml_compute_forward_pad(
static void ggml_compute_forward_argsort_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+ const struct ggml_tensor * src0 = dst->src[0];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -13284,13 +13511,14 @@ static void ggml_compute_forward_argsort_f32(
static void ggml_compute_forward_argsort(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_argsort_f32(params, src0, dst);
+ ggml_compute_forward_argsort_f32(params, dst);
} break;
default:
{
@@ -13303,11 +13531,13 @@ static void ggml_compute_forward_argsort(
static void ggml_compute_forward_flash_attn_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * q,
- const struct ggml_tensor * k,
- const struct ggml_tensor * v,
const bool masked,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * q = dst->src[0];
+ const struct ggml_tensor * k = dst->src[1];
+ const struct ggml_tensor * v = dst->src[2];
+
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
@@ -13493,11 +13723,13 @@ static void ggml_compute_forward_flash_attn_f32(
static void ggml_compute_forward_flash_attn_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * q,
- const struct ggml_tensor * k,
- const struct ggml_tensor * v,
const bool masked,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * q = dst->src[0];
+ const struct ggml_tensor * k = dst->src[1];
+ const struct ggml_tensor * v = dst->src[2];
+
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
@@ -13719,19 +13951,19 @@ static void ggml_compute_forward_flash_attn_f16(
static void ggml_compute_forward_flash_attn(
const struct ggml_compute_params * params,
- const struct ggml_tensor * q,
- const struct ggml_tensor * k,
- const struct ggml_tensor * v,
const bool masked,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * q = dst->src[0];
+
switch (q->type) {
case GGML_TYPE_F16:
{
- ggml_compute_forward_flash_attn_f16(params, q, k, v, masked, dst);
+ ggml_compute_forward_flash_attn_f16(params, masked, dst);
} break;
case GGML_TYPE_F32:
{
- ggml_compute_forward_flash_attn_f32(params, q, k, v, masked, dst);
+ ggml_compute_forward_flash_attn_f32(params, masked, dst);
} break;
default:
{
@@ -13744,12 +13976,14 @@ static void ggml_compute_forward_flash_attn(
static void ggml_compute_forward_flash_ff_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * a, // F16
- const struct ggml_tensor * b0, // F16 fc_w
- const struct ggml_tensor * b1, // F32 fc_b
- const struct ggml_tensor * c0, // F16 proj_w
- const struct ggml_tensor * c1, // F32 proj_b
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * a = dst->src[0]; // F16
+ const struct ggml_tensor * b0 = dst->src[1]; // F16 fc_w
+ const struct ggml_tensor * b1 = dst->src[2]; // F32 fc_b
+ const struct ggml_tensor * c0 = dst->src[3]; // F16 proj_w
+ const struct ggml_tensor * c1 = dst->src[4]; // F32 proj_b
+
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
@@ -13877,16 +14111,14 @@ static void ggml_compute_forward_flash_ff_f16(
static void ggml_compute_forward_flash_ff(
const struct ggml_compute_params * params,
- const struct ggml_tensor * a,
- const struct ggml_tensor * b0,
- const struct ggml_tensor * b1,
- const struct ggml_tensor * c0,
- const struct ggml_tensor * c1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * b0 = dst->src[1];
+
switch (b0->type) {
case GGML_TYPE_F16:
{
- ggml_compute_forward_flash_ff_f16(params, a, b0, b1, c0, c1, dst);
+ ggml_compute_forward_flash_ff_f16(params, dst);
} break;
case GGML_TYPE_F32:
{
@@ -13903,12 +14135,14 @@ static void ggml_compute_forward_flash_ff(
static void ggml_compute_forward_flash_attn_back_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * q,
- const struct ggml_tensor * k,
- const struct ggml_tensor * v,
- const struct ggml_tensor * d,
const bool masked,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * q = dst->src[0];
+ const struct ggml_tensor * k = dst->src[1];
+ const struct ggml_tensor * v = dst->src[2];
+ const struct ggml_tensor * d = dst->src[3];
+
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
@@ -14256,16 +14490,15 @@ static void ggml_compute_forward_flash_attn_back_f32(
static void ggml_compute_forward_flash_attn_back(
const struct ggml_compute_params * params,
- const struct ggml_tensor * q,
- const struct ggml_tensor * k,
- const struct ggml_tensor * v,
- const struct ggml_tensor * d,
const bool masked,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * q = dst->src[0];
+
switch (q->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_flash_attn_back_f32(params, q, k, v, d, masked, dst);
+ ggml_compute_forward_flash_attn_back_f32(params, masked, dst);
} break;
default:
{
@@ -14278,8 +14511,10 @@ static void ggml_compute_forward_flash_attn_back(
static void ggml_compute_forward_win_part_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -14322,12 +14557,14 @@ static void ggml_compute_forward_win_part_f32(
static void ggml_compute_forward_win_part(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_win_part_f32(params, src0, dst);
+ ggml_compute_forward_win_part_f32(params, dst);
} break;
default:
{
@@ -14340,8 +14577,10 @@ static void ggml_compute_forward_win_part(
static void ggml_compute_forward_win_unpart_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -14382,12 +14621,14 @@ static void ggml_compute_forward_win_unpart_f32(
static void ggml_compute_forward_win_unpart(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_win_unpart_f32(params, src0, dst);
+ ggml_compute_forward_win_unpart_f32(params, dst);
} break;
default:
{
@@ -14400,58 +14641,58 @@ static void ggml_compute_forward_win_unpart(
static void ggml_compute_forward_unary(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
const enum ggml_unary_op op = ggml_get_unary_op(dst);
switch (op) {
case GGML_UNARY_OP_ABS:
{
- ggml_compute_forward_abs(params, src0, dst);
+ ggml_compute_forward_abs(params, dst);
} break;
case GGML_UNARY_OP_SGN:
{
- ggml_compute_forward_sgn(params, src0, dst);
+ ggml_compute_forward_sgn(params, dst);
} break;
case GGML_UNARY_OP_NEG:
{
- ggml_compute_forward_neg(params, src0, dst);
+ ggml_compute_forward_neg(params, dst);
} break;
case GGML_UNARY_OP_STEP:
{
- ggml_compute_forward_step(params, src0, dst);
+ ggml_compute_forward_step(params, dst);
} break;
case GGML_UNARY_OP_TANH:
{
- ggml_compute_forward_tanh(params, src0, dst);
+ ggml_compute_forward_tanh(params, dst);
} break;
case GGML_UNARY_OP_ELU:
{
- ggml_compute_forward_elu(params, src0, dst);
+ ggml_compute_forward_elu(params, dst);
} break;
case GGML_UNARY_OP_RELU:
{
- ggml_compute_forward_relu(params, src0, dst);
+ ggml_compute_forward_relu(params, dst);
} break;
case GGML_UNARY_OP_GELU:
{
- ggml_compute_forward_gelu(params, src0, dst);
+ ggml_compute_forward_gelu(params, dst);
} break;
case GGML_UNARY_OP_GELU_QUICK:
{
- ggml_compute_forward_gelu_quick(params, src0, dst);
+ ggml_compute_forward_gelu_quick(params, dst);
} break;
case GGML_UNARY_OP_SILU:
{
- ggml_compute_forward_silu(params, src0, dst);
+ ggml_compute_forward_silu(params, dst);
} break;
case GGML_UNARY_OP_HARDSWISH:
{
- ggml_compute_forward_hardswish(params, src0, dst);
+ ggml_compute_forward_hardswish(params, dst);
} break;
case GGML_UNARY_OP_HARDSIGMOID:
{
- ggml_compute_forward_hardsigmoid(params, src0, dst);
+ ggml_compute_forward_hardsigmoid(params, dst);
} break;
default:
{
@@ -14464,8 +14705,10 @@ static void ggml_compute_forward_unary(
static void ggml_compute_forward_get_rel_pos_f16(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -14491,12 +14734,14 @@ static void ggml_compute_forward_get_rel_pos_f16(
static void ggml_compute_forward_get_rel_pos(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F16:
{
- ggml_compute_forward_get_rel_pos_f16(params, src0, dst);
+ ggml_compute_forward_get_rel_pos_f16(params, dst);
} break;
default:
{
@@ -14509,11 +14754,12 @@ static void ggml_compute_forward_get_rel_pos(
static void ggml_compute_forward_add_rel_pos_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
- const struct ggml_tensor * src2,
struct ggml_tensor * dst) {
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+ const struct ggml_tensor * src2 = dst->src[2];
+
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
if (!inplace && params->type == GGML_TASK_INIT) {
if (params->ith != 0) {
@@ -14577,14 +14823,14 @@ static void ggml_compute_forward_add_rel_pos_f32(
static void ggml_compute_forward_add_rel_pos(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
- const struct ggml_tensor * src2,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_add_rel_pos_f32(params, src0, src1, src2, dst);
+ ggml_compute_forward_add_rel_pos_f32(params, dst);
} break;
default:
{
@@ -14597,9 +14843,11 @@ static void ggml_compute_forward_add_rel_pos(
static void ggml_compute_forward_map_unary_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst,
const ggml_unary_op_f32_t fun) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
GGML_ASSERT(ggml_are_same_shape(src0, dst));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -14621,13 +14869,15 @@ static void ggml_compute_forward_map_unary_f32(
static void ggml_compute_forward_map_unary(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
struct ggml_tensor * dst,
const ggml_unary_op_f32_t fun) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_map_unary_f32(params, src0, dst, fun);
+ ggml_compute_forward_map_unary_f32(params, dst, fun);
} break;
default:
{
@@ -14640,10 +14890,12 @@ static void ggml_compute_forward_map_unary(
static void ggml_compute_forward_map_binary_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst,
const ggml_binary_op_f32_t fun) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
assert(params->ith == 0);
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
@@ -14668,14 +14920,15 @@ static void ggml_compute_forward_map_binary_f32(
static void ggml_compute_forward_map_binary(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst,
const ggml_binary_op_f32_t fun) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_map_binary_f32(params, src0, src1, dst, fun);
+ ggml_compute_forward_map_binary_f32(params, dst, fun);
} break;
default:
{
@@ -14688,9 +14941,11 @@ static void ggml_compute_forward_map_binary(
static void ggml_compute_forward_map_custom1_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * a,
struct ggml_tensor * dst,
const ggml_custom1_op_f32_t fun) {
+
+ const struct ggml_tensor * a = dst->src[0];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -14704,10 +14959,12 @@ static void ggml_compute_forward_map_custom1_f32(
static void ggml_compute_forward_map_custom2_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * a,
- const struct ggml_tensor * b,
struct ggml_tensor * dst,
const ggml_custom2_op_f32_t fun) {
+
+ const struct ggml_tensor * a = dst->src[0];
+ const struct ggml_tensor * b = dst->src[1];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -14721,11 +14978,13 @@ static void ggml_compute_forward_map_custom2_f32(
static void ggml_compute_forward_map_custom3_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * a,
- const struct ggml_tensor * b,
- const struct ggml_tensor * c,
struct ggml_tensor * dst,
const ggml_custom3_op_f32_t fun) {
+
+ const struct ggml_tensor * a = dst->src[0];
+ const struct ggml_tensor * b = dst->src[1];
+ const struct ggml_tensor * c = dst->src[1];
+
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
@@ -14739,8 +14998,10 @@ static void ggml_compute_forward_map_custom3_f32(
static void ggml_compute_forward_map_custom1(
const struct ggml_compute_params * params,
- const struct ggml_tensor * a,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * a = dst->src[0];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -14754,9 +15015,11 @@ static void ggml_compute_forward_map_custom1(
static void ggml_compute_forward_map_custom2(
const struct ggml_compute_params * params,
- const struct ggml_tensor * a,
- const struct ggml_tensor * b,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * a = dst->src[0];
+ const struct ggml_tensor * b = dst->src[1];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -14770,10 +15033,12 @@ static void ggml_compute_forward_map_custom2(
static void ggml_compute_forward_map_custom3(
const struct ggml_compute_params * params,
- const struct ggml_tensor * a,
- const struct ggml_tensor * b,
- const struct ggml_tensor * c,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * a = dst->src[0];
+ const struct ggml_tensor * b = dst->src[1];
+ const struct ggml_tensor * c = dst->src[2];
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
@@ -14787,9 +15052,11 @@ static void ggml_compute_forward_map_custom3(
static void ggml_compute_forward_cross_entropy_loss_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+
GGML_ASSERT(ggml_is_contiguous(src0));
GGML_ASSERT(ggml_is_contiguous(src1));
GGML_ASSERT(ggml_is_scalar(dst));
@@ -14893,13 +15160,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
static void ggml_compute_forward_cross_entropy_loss(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_cross_entropy_loss_f32(params, src0, src1, dst);
+ ggml_compute_forward_cross_entropy_loss_f32(params, dst);
} break;
default:
{
@@ -14912,10 +15180,12 @@ static void ggml_compute_forward_cross_entropy_loss(
static void ggml_compute_forward_cross_entropy_loss_back_f32(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
- const struct ggml_tensor * opt0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+ const struct ggml_tensor * src1 = dst->src[1];
+ const struct ggml_tensor * opt0 = dst->src[2];
+
GGML_ASSERT(ggml_is_contiguous(dst));
GGML_ASSERT(ggml_is_contiguous(src0));
GGML_ASSERT(ggml_is_contiguous(src1));
@@ -15002,14 +15272,14 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
static void ggml_compute_forward_cross_entropy_loss_back(
const struct ggml_compute_params * params,
- const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
- const struct ggml_tensor * opt0,
struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_cross_entropy_loss_back_f32(params, src0, src1, opt0, dst);
+ ggml_compute_forward_cross_entropy_loss_back_f32(params, dst);
} break;
default:
{
@@ -15057,312 +15327,312 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
switch (tensor->op) {
case GGML_OP_DUP:
{
- ggml_compute_forward_dup(params, tensor->src[0], tensor);
+ ggml_compute_forward_dup(params, tensor);
} break;
case GGML_OP_ADD:
{
- ggml_compute_forward_add(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_add(params, tensor);
} break;
case GGML_OP_ADD1:
{
- ggml_compute_forward_add1(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_add1(params, tensor);
} break;
case GGML_OP_ACC:
{
- ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_acc(params, tensor);
} break;
case GGML_OP_SUB:
{
- ggml_compute_forward_sub(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_sub(params, tensor);
} break;
case GGML_OP_MUL:
{
- ggml_compute_forward_mul(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_mul(params, tensor);
} break;
case GGML_OP_DIV:
{
- ggml_compute_forward_div(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_div(params, tensor);
} break;
case GGML_OP_SQR:
{
- ggml_compute_forward_sqr(params, tensor->src[0], tensor);
+ ggml_compute_forward_sqr(params, tensor);
} break;
case GGML_OP_SQRT:
{
- ggml_compute_forward_sqrt(params, tensor->src[0], tensor);
+ ggml_compute_forward_sqrt(params, tensor);
} break;
case GGML_OP_LOG:
{
- ggml_compute_forward_log(params, tensor->src[0], tensor);
+ ggml_compute_forward_log(params, tensor);
} break;
case GGML_OP_SUM:
{
- ggml_compute_forward_sum(params, tensor->src[0], tensor);
+ ggml_compute_forward_sum(params, tensor);
} break;
case GGML_OP_SUM_ROWS:
{
- ggml_compute_forward_sum_rows(params, tensor->src[0], tensor);
+ ggml_compute_forward_sum_rows(params, tensor);
} break;
case GGML_OP_MEAN:
{
- ggml_compute_forward_mean(params, tensor->src[0], tensor);
+ ggml_compute_forward_mean(params, tensor);
} break;
case GGML_OP_ARGMAX:
{
- ggml_compute_forward_argmax(params, tensor->src[0], tensor);
+ ggml_compute_forward_argmax(params, tensor);
} break;
case GGML_OP_REPEAT:
{
- ggml_compute_forward_repeat(params, tensor->src[0], tensor);
+ ggml_compute_forward_repeat(params, tensor);
} break;
case GGML_OP_REPEAT_BACK:
{
- ggml_compute_forward_repeat_back(params, tensor->src[0], tensor);
+ ggml_compute_forward_repeat_back(params, tensor);
} break;
case GGML_OP_CONCAT:
{
- ggml_compute_forward_concat(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_concat(params, tensor);
} break;
case GGML_OP_SILU_BACK:
{
- ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_silu_back(params, tensor);
} break;
case GGML_OP_NORM:
{
- ggml_compute_forward_norm(params, tensor->src[0], tensor);
+ ggml_compute_forward_norm(params, tensor);
} break;
case GGML_OP_RMS_NORM:
{
- ggml_compute_forward_rms_norm(params, tensor->src[0], tensor);
+ ggml_compute_forward_rms_norm(params, tensor);
} break;
case GGML_OP_RMS_NORM_BACK:
{
- ggml_compute_forward_rms_norm_back(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_rms_norm_back(params, tensor);
} break;
case GGML_OP_GROUP_NORM:
{
- ggml_compute_forward_group_norm(params, tensor->src[0], tensor);
+ ggml_compute_forward_group_norm(params, tensor);
} break;
case GGML_OP_MUL_MAT:
{
- ggml_compute_forward_mul_mat(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_mul_mat(params, tensor);
} break;
case GGML_OP_MUL_MAT_ID:
{
- ggml_compute_forward_mul_mat_id(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_mul_mat_id(params, tensor);
} break;
case GGML_OP_OUT_PROD:
{
- ggml_compute_forward_out_prod(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_out_prod(params, tensor);
} break;
case GGML_OP_SCALE:
{
- ggml_compute_forward_scale(params, tensor->src[0], tensor);
+ ggml_compute_forward_scale(params, tensor);
} break;
case GGML_OP_SET:
{
- ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_set(params, tensor);
} break;
case GGML_OP_CPY:
{
- ggml_compute_forward_cpy(params, tensor->src[0], tensor);
+ ggml_compute_forward_cpy(params, tensor);
} break;
case GGML_OP_CONT:
{
- ggml_compute_forward_cont(params, tensor->src[0], tensor);
+ ggml_compute_forward_cont(params, tensor);
} break;
case GGML_OP_RESHAPE:
{
- ggml_compute_forward_reshape(params, tensor->src[0], tensor);
+ ggml_compute_forward_reshape(params, tensor);
} break;
case GGML_OP_VIEW:
{
- ggml_compute_forward_view(params, tensor->src[0]);
+ ggml_compute_forward_view(params, tensor);
} break;
case GGML_OP_PERMUTE:
{
- ggml_compute_forward_permute(params, tensor->src[0]);
+ ggml_compute_forward_permute(params, tensor);
} break;
case GGML_OP_TRANSPOSE:
{
- ggml_compute_forward_transpose(params, tensor->src[0]);
+ ggml_compute_forward_transpose(params, tensor);
} break;
case GGML_OP_GET_ROWS:
{
- ggml_compute_forward_get_rows(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_get_rows(params, tensor);
} break;
case GGML_OP_GET_ROWS_BACK:
{
- ggml_compute_forward_get_rows_back(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_get_rows_back(params, tensor);
} break;
case GGML_OP_DIAG:
{
- ggml_compute_forward_diag(params, tensor->src[0], tensor);
+ ggml_compute_forward_diag(params, tensor);
} break;
case GGML_OP_DIAG_MASK_INF:
{
- ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor);
+ ggml_compute_forward_diag_mask_inf(params, tensor);
} break;
case GGML_OP_DIAG_MASK_ZERO:
{
- ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor);
+ ggml_compute_forward_diag_mask_zero(params, tensor);
} break;
case GGML_OP_SOFT_MAX:
{
- ggml_compute_forward_soft_max(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
+ ggml_compute_forward_soft_max(params, tensor);
} break;
case GGML_OP_SOFT_MAX_BACK:
{
- ggml_compute_forward_soft_max_back(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_soft_max_back(params, tensor);
} break;
case GGML_OP_ROPE:
{
- ggml_compute_forward_rope(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_rope(params, tensor);
} break;
case GGML_OP_ROPE_BACK:
{
- ggml_compute_forward_rope_back(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_rope_back(params, tensor);
} break;
case GGML_OP_ALIBI:
{
- ggml_compute_forward_alibi(params, tensor->src[0], tensor);
+ ggml_compute_forward_alibi(params, tensor);
} break;
case GGML_OP_CLAMP:
{
- ggml_compute_forward_clamp(params, tensor->src[0], tensor);
+ ggml_compute_forward_clamp(params, tensor);
} break;
case GGML_OP_CONV_TRANSPOSE_1D:
{
- ggml_compute_forward_conv_transpose_1d(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_conv_transpose_1d(params, tensor);
} break;
case GGML_OP_IM2COL:
{
- ggml_compute_forward_im2col(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_im2col(params, tensor);
} break;
case GGML_OP_CONV_TRANSPOSE_2D:
{
- ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_conv_transpose_2d(params, tensor);
} break;
case GGML_OP_POOL_1D:
{
- ggml_compute_forward_pool_1d(params, tensor->src[0], tensor);
+ ggml_compute_forward_pool_1d(params, tensor);
} break;
case GGML_OP_POOL_2D:
{
- ggml_compute_forward_pool_2d(params, tensor->src[0], tensor);
+ ggml_compute_forward_pool_2d(params, tensor);
} break;
case GGML_OP_UPSCALE:
{
- ggml_compute_forward_upscale(params, tensor->src[0], tensor);
+ ggml_compute_forward_upscale(params, tensor);
} break;
case GGML_OP_PAD:
{
- ggml_compute_forward_pad(params, tensor->src[0], tensor);
+ ggml_compute_forward_pad(params, tensor);
} break;
case GGML_OP_ARGSORT:
{
- ggml_compute_forward_argsort(params, tensor->src[0], tensor);
+ ggml_compute_forward_argsort(params, tensor);
} break;
case GGML_OP_LEAKY_RELU:
{
- ggml_compute_forward_leaky_relu(params, tensor->src[0], tensor);
+ ggml_compute_forward_leaky_relu(params, tensor);
} break;
case GGML_OP_FLASH_ATTN:
{
const int32_t t = ggml_get_op_params_i32(tensor, 0);
GGML_ASSERT(t == 0 || t == 1);
const bool masked = t != 0;
- ggml_compute_forward_flash_attn(params, tensor->src[0], tensor->src[1], tensor->src[2], masked, tensor);
+ ggml_compute_forward_flash_attn(params, masked, tensor);
} break;
case GGML_OP_FLASH_FF:
{
- ggml_compute_forward_flash_ff(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], tensor->src[4], tensor);
+ ggml_compute_forward_flash_ff(params, tensor);
} break;
case GGML_OP_FLASH_ATTN_BACK:
{
int32_t t = ggml_get_op_params_i32(tensor, 0);
GGML_ASSERT(t == 0 || t == 1);
bool masked = t != 0;
- ggml_compute_forward_flash_attn_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], masked, tensor);
+ ggml_compute_forward_flash_attn_back(params, masked, tensor);
} break;
case GGML_OP_WIN_PART:
{
- ggml_compute_forward_win_part(params, tensor->src[0], tensor);
+ ggml_compute_forward_win_part(params, tensor);
} break;
case GGML_OP_WIN_UNPART:
{
- ggml_compute_forward_win_unpart(params, tensor->src[0], tensor);
+ ggml_compute_forward_win_unpart(params, tensor);
} break;
case GGML_OP_UNARY:
{
- ggml_compute_forward_unary(params, tensor->src[0], tensor);
+ ggml_compute_forward_unary(params, tensor);
} break;
case GGML_OP_GET_REL_POS:
{
- ggml_compute_forward_get_rel_pos(params, tensor->src[0], tensor);
+ ggml_compute_forward_get_rel_pos(params, tensor);
} break;
case GGML_OP_ADD_REL_POS:
{
- ggml_compute_forward_add_rel_pos(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
+ ggml_compute_forward_add_rel_pos(params, tensor);
} break;
case GGML_OP_MAP_UNARY:
{
ggml_unary_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_unary(params, tensor->src[0], tensor, fun);
+ ggml_compute_forward_map_unary(params, tensor, fun);
}
break;
case GGML_OP_MAP_BINARY:
{
ggml_binary_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
+ ggml_compute_forward_map_binary(params, tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM1_F32:
{
ggml_custom1_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
+ ggml_compute_forward_map_custom1_f32(params, tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM2_F32:
{
ggml_custom2_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
+ ggml_compute_forward_map_custom2_f32(params, tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM3_F32:
{
ggml_custom3_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
- ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
+ ggml_compute_forward_map_custom3_f32(params, tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM1:
{
- ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
+ ggml_compute_forward_map_custom1(params, tensor);
}
break;
case GGML_OP_MAP_CUSTOM2:
{
- ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_map_custom2(params, tensor);
}
break;
case GGML_OP_MAP_CUSTOM3:
{
- ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
+ ggml_compute_forward_map_custom3(params, tensor);
}
break;
case GGML_OP_CROSS_ENTROPY_LOSS:
{
- ggml_compute_forward_cross_entropy_loss(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_cross_entropy_loss(params, tensor);
}
break;
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
{
- ggml_compute_forward_cross_entropy_loss_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
+ ggml_compute_forward_cross_entropy_loss_back(params, tensor);
}
break;
case GGML_OP_NONE: