diff options
Diffstat (limited to 'ggml.c')
-rw-r--r-- | ggml.c | 64 |
1 files changed, 47 insertions, 17 deletions
@@ -6293,7 +6293,10 @@ struct ggml_tensor * ggml_pool_2d( static struct ggml_tensor * ggml_upscale_impl( struct ggml_context * ctx, struct ggml_tensor * a, - int scale_factor) { + int ne0, + int ne1, + int ne2, + int ne3) { bool is_node = false; if (a->grad) { @@ -6301,19 +6304,45 @@ static struct ggml_tensor * ggml_upscale_impl( is_node = true; } + GGML_ASSERT(a->ne[0] <= ne0); + GGML_ASSERT(a->ne[1] <= ne1); + GGML_ASSERT(a->ne[2] <= ne2); + GGML_ASSERT(a->ne[3] <= ne3); + struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, - a->ne[0] * scale_factor, - a->ne[1] * scale_factor, - a->ne[2], a->ne[3]); + ne0, + ne1, + ne2, + ne3 + ); result->op = GGML_OP_UPSCALE; - result->op_params[0] = scale_factor; + result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->src[0] = a; return result; } +struct ggml_tensor * ggml_upscale( + struct ggml_context * ctx, + struct ggml_tensor * a, + int scale_factor) { + return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3]); +} + +struct ggml_tensor * ggml_upscale_ext( + struct ggml_context * ctx, + struct ggml_tensor * a, + int ne0, + int ne1, + int ne2, + int ne3) { + return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3); +} + +// ggml_pad + struct ggml_tensor * ggml_pad( struct ggml_context * ctx, struct ggml_tensor * a, @@ -6338,12 +6367,7 @@ struct ggml_tensor * ggml_pad( return result; } -struct ggml_tensor * ggml_upscale( - struct ggml_context * ctx, - struct ggml_tensor * a, - int scale_factor) { - return ggml_upscale_impl(ctx, a, scale_factor); -} +// ggml_arange struct ggml_tensor * ggml_arange( struct ggml_context * ctx, @@ -6365,6 +6389,8 @@ struct ggml_tensor * ggml_arange( return result; } +// ggml_timestep_embedding + struct ggml_tensor * ggml_timestep_embedding( struct ggml_context * ctx, struct ggml_tensor * timesteps, @@ -14820,25 +14846,28 @@ static void ggml_compute_forward_upscale_f32( return; } - GGML_ASSERT(src0->nb[0] == sizeof(float)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); const int ith = params->ith; const int nth = params->nth; GGML_TENSOR_UNARY_OP_LOCALS - const int scale_factor = dst->op_params[0]; + const float sf0 = (float)ne0/src0->ne[0]; + const float sf1 = (float)ne1/src0->ne[1]; + const float sf2 = (float)ne2/src0->ne[2]; + const float sf3 = (float)ne3/src0->ne[3]; // TODO: optimize for (int64_t i3 = 0; i3 < ne3; i3++) { - const int64_t i03 = i3; + const int64_t i03 = i3 / sf3; for (int64_t i2 = ith; i2 < ne2; i2 += nth) { - const int64_t i02 = i2; + const int64_t i02 = i2 / sf2; for (int64_t i1 = 0; i1 < ne1; i1++) { - const int64_t i01 = i1 / scale_factor; + const int64_t i01 = i1 / sf1; for (int64_t i0 = 0; i0 < ne0; i0++) { - const int64_t i00 = i0 / scale_factor; + const int64_t i00 = i0 / sf0; const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3); @@ -14868,6 +14897,7 @@ static void ggml_compute_forward_upscale( } } + // ggml_compute_forward_pad static void ggml_compute_forward_pad_f32( |