summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c207
1 files changed, 199 insertions, 8 deletions
diff --git a/ggml.c b/ggml.c
index f29b9f13..870e4161 100644
--- a/ggml.c
+++ b/ggml.c
@@ -1822,6 +1822,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"POOL_2D",
"UPSCALE",
"PAD",
+ "ARANGE",
+ "TIMESTEP_EMBEDDING",
"ARGSORT",
"LEAKY_RELU",
@@ -1850,7 +1852,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"CROSS_ENTROPY_LOSS_BACK",
};
-static_assert(GGML_OP_COUNT == 72, "GGML_OP_COUNT != 72");
+static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
@@ -1908,6 +1910,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"pool_2d(x)",
"upscale(x)",
"pad(x)",
+ "arange(start, stop, step)",
+ "timestep_embedding(timesteps, dim, max_period)",
"argsort(x)",
"leaky_relu(x)",
@@ -1936,7 +1940,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"cross_entropy_loss_back(x,y)",
};
-static_assert(GGML_OP_COUNT == 72, "GGML_OP_COUNT != 72");
+static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
@@ -2895,11 +2899,21 @@ static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_
return ((const int32_t *)(tensor->op_params))[i];
}
+static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) {
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
+ return ((const float *)(tensor->op_params))[i];
+}
+
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
((int32_t *)(tensor->op_params))[i] = value;
}
+static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) {
+ assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
+ ((float *)(tensor->op_params))[i] = value;
+}
+
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
memset(tensor->data, 0, ggml_nbytes(tensor));
return tensor;
@@ -5898,6 +5912,55 @@ struct ggml_tensor * ggml_upscale(
return ggml_upscale_impl(ctx, a, scale_factor);
}
+struct ggml_tensor * ggml_arange(
+ struct ggml_context * ctx,
+ float start,
+ float stop,
+ float step) {
+
+ GGML_ASSERT(stop > start);
+
+ const int64_t steps = (int64_t) ceilf((stop - start) / step);
+
+ struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
+
+ result->op = GGML_OP_ARANGE;
+ ggml_set_op_params_f32(result, 0, start);
+ ggml_set_op_params_f32(result, 1, stop);
+ ggml_set_op_params_f32(result, 2, step);
+
+ return result;
+}
+
+struct ggml_tensor * ggml_timestep_embedding(
+ struct ggml_context * ctx,
+ struct ggml_tensor * timesteps,
+ int dim,
+ int max_period) {
+ bool is_node = false;
+
+ if (timesteps->grad) {
+ GGML_ASSERT(false); // TODO: implement backward
+ is_node = true;
+ }
+
+ int actual_dim = dim;
+ if (dim % 2 != 0) {
+ actual_dim = dim + 1;
+ }
+
+ struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
+
+ result->op = GGML_OP_TIMESTEP_EMBEDDING;
+ ggml_set_op_params_i32(result, 0, dim);
+ ggml_set_op_params_i32(result, 1, max_period);
+
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src[0] = timesteps;
+
+ return result;
+}
+
// ggml_argsort
struct ggml_tensor * ggml_argsort(
@@ -10231,7 +10294,7 @@ static void ggml_compute_forward_group_norm_f32(
int n_channels = src0->ne[2];
int n_groups = dst->op_params[0];
int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;
- for (int i = ith; i < n_groups; i+=nth) {
+ for (int i = ith; i < n_groups; i += nth) {
int start = i * n_channels_per_group;
int end = start + n_channels_per_group;
if (end > n_channels) {
@@ -10245,28 +10308,32 @@ static void ggml_compute_forward_group_norm_f32(
for (int64_t i01 = 0; i01 < ne01; i01++) {
const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
+ ggml_float sumr = 0.0;
for (int64_t i00 = 0; i00 < ne00; i00++) {
- sum += (ggml_float)x[i00];
+ sumr += (ggml_float)x[i00];
}
+ sum += sumr;
}
}
- float mean = sum / (ne00 * ne01 * step);
- ggml_float sum2 = 0.0;
+ const float mean = sum / (ne00 * ne01 * step);
+ ggml_float sum2 = 0.0;
for (int64_t i02 = start; i02 < end; i02++) {
for (int64_t i01 = 0; i01 < ne01; i01++) {
const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
float * y = (float *)((char *) dst->data + i01 * nb1 + i02 * nb2 + i03 * nb3);
+ ggml_float sumr = 0.0;
for (int64_t i00 = 0; i00 < ne00; i00++) {
float v = x[i00] - mean;
y[i00] = v;
- sum2 += (ggml_float)(v * v);
+ sumr += (ggml_float)(v * v);
}
+ sum2 += sumr;
}
}
- float variance = sum2 / (ne00 * ne01 * step);
+ const float variance = sum2 / (ne00 * ne01 * step);
const float scale = 1.0f / sqrtf(variance + eps);
for (int64_t i02 = start; i02 < end; i02++) {
@@ -13547,6 +13614,106 @@ static void ggml_compute_forward_pad(
}
}
+
+// ggml_compute_forward_arange
+
+static void ggml_compute_forward_arange_f32(
+ const struct ggml_compute_params * params,
+ struct ggml_tensor * dst) {
+
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
+ return;
+ }
+
+ GGML_ASSERT(dst->nb[0] == sizeof(float));
+
+ const int ith = params->ith;
+ const int nth = params->nth;
+
+ const float start = ggml_get_op_params_f32(dst, 0);
+ const float stop = ggml_get_op_params_f32(dst, 1);
+ const float step = ggml_get_op_params_f32(dst, 2);
+
+ const int64_t steps = (int64_t) ceilf((stop - start) / step);
+
+ GGML_ASSERT(ggml_nelements(dst) == steps);
+
+ for (int64_t i = ith; i < steps; i+= nth) {
+ float value = start + step * i;
+ ((float *)dst->data)[i] = value;
+ }
+}
+
+static void ggml_compute_forward_arange(
+ const struct ggml_compute_params * params,
+ struct ggml_tensor * dst) {
+ switch (dst->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_arange_f32(params, dst);
+ } break;
+ default:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
+static void ggml_compute_forward_timestep_embedding_f32(
+ const struct ggml_compute_params * params,
+ struct ggml_tensor * dst) {
+
+ if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
+ return;
+ }
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
+ GGML_ASSERT(src0->nb[0] == sizeof(float));
+
+ const int ith = params->ith;
+ const int nth = params->nth;
+
+ GGML_TENSOR_UNARY_OP_LOCALS
+
+ const int dim = ggml_get_op_params_i32(dst, 0);
+ const int max_period = ggml_get_op_params_i32(dst, 1);
+
+ int half = dim / 2;
+
+ for (int64_t i = 0; i < ne00; i++) {
+ float * embed_data = (float *)((char *) dst->data + i*nb1);
+ for (int64_t j = ith; j < half; j += nth) {
+ float timestep = ((float *)src0->data)[i];
+ float freq = (float)expf(-logf(max_period) * j / half);
+ float arg = timestep * freq;
+ embed_data[j] = cosf(arg);
+ embed_data[j + half] = sinf(arg);
+ }
+ if (dim % 2 != 0 && ith == 0) {
+ embed_data[dim] = 0.f;
+ }
+ }
+}
+
+static void ggml_compute_forward_timestep_embedding(
+ const struct ggml_compute_params * params,
+ struct ggml_tensor * dst) {
+
+ const struct ggml_tensor * src0 = dst->src[0];
+
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_timestep_embedding_f32(params, dst);
+ } break;
+ default:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
// ggml_compute_forward_argsort
static void ggml_compute_forward_argsort_f32(
@@ -15615,6 +15782,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
{
ggml_compute_forward_pad(params, tensor);
} break;
+ case GGML_OP_ARANGE:
+ {
+ ggml_compute_forward_arange(params, tensor);
+ } break;
+ case GGML_OP_TIMESTEP_EMBEDDING:
+ {
+ ggml_compute_forward_timestep_embedding(params, tensor);
+ } break;
case GGML_OP_ARGSORT:
{
ggml_compute_forward_argsort(params, tensor);
@@ -16617,6 +16792,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
{
GGML_ASSERT(false); // TODO: not implemented
} break;
+ case GGML_OP_ARANGE:
+ {
+ GGML_ASSERT(false); // TODO: not implemented
+ } break;
+ case GGML_OP_TIMESTEP_EMBEDDING:
+ {
+ GGML_ASSERT(false); // TODO: not implemented
+ } break;
case GGML_OP_ARGSORT:
{
GGML_ASSERT(false); // TODO: not implemented
@@ -17368,6 +17551,14 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
{
n_tasks = n_threads;
} break;
+ case GGML_OP_ARANGE:
+ {
+ n_tasks = n_threads;
+ } break;
+ case GGML_OP_TIMESTEP_EMBEDDING:
+ {
+ n_tasks = n_threads;
+ } break;
case GGML_OP_ARGSORT:
{
n_tasks = n_threads;