ggml : change ggml_scale to take a float instead of tensor (#4573)

* ggml : change ggml_scale to take a float instead of tensor * ggml : fix CPU implementation * tests : fix test-grad0 ggml-ci
author: Georgi Gerganov <ggerganov@gmail.com> 2023-12-21 23:20:49 +0200
committer: GitHub <noreply@github.com> 2023-12-21 23:20:49 +0200
commit: afefa319f1f59b002dfa0d1ef407a2c74bd9770b (patch)
tree: a6923e0a6214293d88957cd11e25943f2c0fb80a /examples/baby-llama/baby-llama.cpp
parent: 769a7bc85eaa44e3d7eadf39abfeff7bb0b9cc2f (diff)
1 files changed, 3 insertions, 12 deletions
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp
index 2dc2988d..e7d2ad59 100644
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -575,10 +575,7 @@ static struct ggml_tensor * forward(
 
             // KQ_scaled = KQ / sqrt(n_embd/n_head)
             // KQ_scaled shape [n_past + N, N, n_head, 1]
-            struct ggml_tensor * KQ_scaled =
-                ggml_scale(ctx0,
-                        KQ,
-                        ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head)));
+            struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, 1.0f/sqrtf(float(n_embd)/n_head));
 
             // KQ_masked = mask_past(KQ_scaled)
             // KQ_masked shape [n_past + N, N, n_head, 1]
@@ -844,10 +841,7 @@ static struct ggml_tensor * forward_batch(
 
             // KQ_scaled = KQ / sqrt(n_embd/n_head)
             // KQ_scaled shape [n_past + N, N, n_head, n_batch]
-            struct ggml_tensor * KQ_scaled =
-                ggml_scale(ctx0,
-                        KQ,
-                        ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head)));
+            struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, 1.0f/sqrtf(float(n_embd)/n_head));
             assert_shape_4d(KQ_scaled, n_past + N, N, n_head, n_batch);
 
             // KQ_masked = mask_past(KQ_scaled)
@@ -1131,10 +1125,7 @@ static struct ggml_tensor * forward_lora(
 
             // KQ_scaled = KQ / sqrt(n_embd/n_head)
             // KQ_scaled shape [n_past + N, N, n_head, 1]
-            struct ggml_tensor * KQ_scaled =
-                ggml_scale(ctx0,
-                        KQ,
-                        ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head)));
+            struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, 1.0f/sqrtf(float(n_embd)/n_head));
 
             // KQ_masked = mask_past(KQ_scaled)
             // KQ_masked shape [n_past + N, N, n_head, 1]
author	Georgi Gerganov <ggerganov@gmail.com>	2023-12-21 23:20:49 +0200
committer	GitHub <noreply@github.com>	2023-12-21 23:20:49 +0200
commit	afefa319f1f59b002dfa0d1ef407a2c74bd9770b (patch)
tree	a6923e0a6214293d88957cd11e25943f2c0fb80a /examples/baby-llama/baby-llama.cpp
parent	769a7bc85eaa44e3d7eadf39abfeff7bb0b9cc2f (diff)