summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-12-21 23:20:49 +0200
committerGitHub <noreply@github.com>2023-12-21 23:20:49 +0200
commitafefa319f1f59b002dfa0d1ef407a2c74bd9770b (patch)
treea6923e0a6214293d88957cd11e25943f2c0fb80a /ggml.c
parent769a7bc85eaa44e3d7eadf39abfeff7bb0b9cc2f (diff)
ggml : change ggml_scale to take a float instead of tensor (#4573)
* ggml : change ggml_scale to take a float instead of tensor * ggml : fix CPU implementation * tests : fix test-grad0 ggml-ci
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c42
1 files changed, 17 insertions, 25 deletions
diff --git a/ggml.c b/ggml.c
index 23614851..f27920a2 100644
--- a/ggml.c
+++ b/ggml.c
@@ -4171,23 +4171,23 @@ struct ggml_tensor * ggml_out_prod(
static struct ggml_tensor * ggml_scale_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
- struct ggml_tensor * b,
+ float s,
bool inplace) {
- GGML_ASSERT(ggml_is_scalar(b));
GGML_ASSERT(ggml_is_padded_1d(a));
bool is_node = false;
- if (a->grad || b->grad) {
+ if (a->grad) {
is_node = true;
}
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+ ggml_set_op_params(result, &s, sizeof(s));
+
result->op = GGML_OP_SCALE;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
- result->src[1] = b;
return result;
}
@@ -4195,15 +4195,15 @@ static struct ggml_tensor * ggml_scale_impl(
struct ggml_tensor * ggml_scale(
struct ggml_context * ctx,
struct ggml_tensor * a,
- struct ggml_tensor * b) {
- return ggml_scale_impl(ctx, a, b, false);
+ float s) {
+ return ggml_scale_impl(ctx, a, s, false);
}
struct ggml_tensor * ggml_scale_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
- struct ggml_tensor * b) {
- return ggml_scale_impl(ctx, a, b, true);
+ float s) {
+ return ggml_scale_impl(ctx, a, s, true);
}
// ggml_set
@@ -10325,19 +10325,17 @@ static void ggml_compute_forward_out_prod(
static void ggml_compute_forward_scale_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(src0));
GGML_ASSERT(ggml_is_contiguous(dst));
GGML_ASSERT(ggml_are_same_shape(src0, dst));
- GGML_ASSERT(ggml_is_scalar(src1));
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
// scale factor
- const float v = *(float *) src1->data;
+ const float v = *(float *) dst->op_params;
const int ith = params->ith;
const int nth = params->nth;
@@ -10368,12 +10366,11 @@ static void ggml_compute_forward_scale_f32(
static void ggml_compute_forward_scale(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
- const struct ggml_tensor * src1,
struct ggml_tensor * dst) {
switch (src0->type) {
case GGML_TYPE_F32:
{
- ggml_compute_forward_scale_f32(params, src0, src1, dst);
+ ggml_compute_forward_scale_f32(params, src0, dst);
} break;
default:
{
@@ -14383,7 +14380,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
} break;
case GGML_OP_SCALE:
{
- ggml_compute_forward_scale(params, tensor->src[0], tensor->src[1], tensor);
+ ggml_compute_forward_scale(params, tensor->src[0], tensor);
} break;
case GGML_OP_SET:
{
@@ -14839,7 +14836,7 @@ static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct gg
static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set zero_table) {
if (ggml_hash_contains(zero_table, a)) {
- struct ggml_tensor * a_zero = ggml_scale(ctx, a, ggml_new_f32(ctx, 0));
+ struct ggml_tensor * a_zero = ggml_scale(ctx, a, 0.0f);
return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false);
} else {
return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
@@ -14975,7 +14972,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
src0->grad,
ggml_scale(ctx,
ggml_mul(ctx, src0, tensor->grad),
- ggml_new_f32(ctx, 2.0f)),
+ 2.0f),
zero_table);
}
} break;
@@ -14989,7 +14986,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
ggml_div(ctx,
tensor->grad,
tensor),
- ggml_new_f32(ctx, 0.5f)),
+ 0.5f),
zero_table);
}
} break;
@@ -15155,17 +15152,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
{
// necessary for llama
if (src0->grad) {
+ const float s = ((float *) tensor->op_params)[0];
+
src0->grad =
ggml_add_or_set(ctx,
src0->grad,
- ggml_scale_impl(ctx, tensor->grad, src1, false),
- zero_table);
- }
- if (src1->grad) {
- src1->grad =
- ggml_add_or_set(ctx,
- src1->grad,
- ggml_sum(ctx, ggml_mul_impl(ctx, tensor->grad, src0, false)),
+ ggml_scale_impl(ctx, tensor->grad, s, false),
zero_table);
}
} break;