ggml : change ggml_scale to take a float instead of tensor (#4573)

* ggml : change ggml_scale to take a float instead of tensor * ggml : fix CPU implementation * tests : fix test-grad0 ggml-ci
author: Georgi Gerganov <ggerganov@gmail.com> 2023-12-21 23:20:49 +0200
committer: GitHub <noreply@github.com> 2023-12-21 23:20:49 +0200
commit: afefa319f1f59b002dfa0d1ef407a2c74bd9770b (patch)
tree: a6923e0a6214293d88957cd11e25943f2c0fb80a /examples/llava
parent: 769a7bc85eaa44e3d7eadf39abfeff7bb0b9cc2f (diff)
1 files changed, 1 insertions, 7 deletions
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 11246596..f06ec400 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -330,12 +330,6 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima
                               ggml_repeat(ctx0, model.pre_ln_b, embeddings));
     }
 
-    struct ggml_tensor * KQ_scale = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
-    ggml_allocr_alloc(ctx->alloc, KQ_scale);
-    if (!ggml_allocr_is_measure(ctx->alloc)) {
-        ggml_set_f32(KQ_scale, 1.0f / sqrt((float)d_head));
-    }
-
     // loop over layers
     for (int il = 0; il < n_layer - 1; il++) {
         struct ggml_tensor * cur = embeddings; // embeddings = residual, cur = hidden_states
@@ -356,7 +350,7 @@ static ggml_cgraph * clip_image_build_graph(const clip_ctx * ctx, const clip_ima
             struct ggml_tensor * Q =
                 ggml_add(ctx0, ggml_repeat(ctx0, model.layers[il].q_b, cur), ggml_mul_mat(ctx0, model.layers[il].q_w, cur));
 
-            Q = ggml_scale_inplace(ctx0, Q, KQ_scale);
+            Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head));
             Q = ggml_reshape_4d(ctx0, Q, d_head, n_head, num_positions, batch_size);
             Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3));
             Q = ggml_reshape_3d(ctx0, Q, d_head, num_positions, n_head * batch_size);
author	Georgi Gerganov <ggerganov@gmail.com>	2023-12-21 23:20:49 +0200
committer	GitHub <noreply@github.com>	2023-12-21 23:20:49 +0200
commit	afefa319f1f59b002dfa0d1ef407a2c74bd9770b (patch)
tree	a6923e0a6214293d88957cd11e25943f2c0fb80a /examples/llava
parent	769a7bc85eaa44e3d7eadf39abfeff7bb0b9cc2f (diff)