From afefa319f1f59b002dfa0d1ef407a2c74bd9770b Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 21 Dec 2023 23:20:49 +0200 Subject: ggml : change ggml_scale to take a float instead of tensor (#4573) * ggml : change ggml_scale to take a float instead of tensor * ggml : fix CPU implementation * tests : fix test-grad0 ggml-ci --- examples/baby-llama/baby-llama.cpp | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'examples/baby-llama/baby-llama.cpp') diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 2dc2988d..e7d2ad59 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -575,10 +575,7 @@ static struct ggml_tensor * forward( // KQ_scaled = KQ / sqrt(n_embd/n_head) // KQ_scaled shape [n_past + N, N, n_head, 1] - struct ggml_tensor * KQ_scaled = - ggml_scale(ctx0, - KQ, - ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head))); + struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, 1.0f/sqrtf(float(n_embd)/n_head)); // KQ_masked = mask_past(KQ_scaled) // KQ_masked shape [n_past + N, N, n_head, 1] @@ -844,10 +841,7 @@ static struct ggml_tensor * forward_batch( // KQ_scaled = KQ / sqrt(n_embd/n_head) // KQ_scaled shape [n_past + N, N, n_head, n_batch] - struct ggml_tensor * KQ_scaled = - ggml_scale(ctx0, - KQ, - ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head))); + struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, 1.0f/sqrtf(float(n_embd)/n_head)); assert_shape_4d(KQ_scaled, n_past + N, N, n_head, n_batch); // KQ_masked = mask_past(KQ_scaled) @@ -1131,10 +1125,7 @@ static struct ggml_tensor * forward_lora( // KQ_scaled = KQ / sqrt(n_embd/n_head) // KQ_scaled shape [n_past + N, N, n_head, 1] - struct ggml_tensor * KQ_scaled = - ggml_scale(ctx0, - KQ, - ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head))); + struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, 1.0f/sqrtf(float(n_embd)/n_head)); // KQ_masked = mask_past(KQ_scaled) // KQ_masked shape [n_past + N, N, n_head, 1] -- cgit v1.2.3