summaryrefslogtreecommitdiff
path: root/examples/baby-llama/baby-llama.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-12-21 23:20:49 +0200
committerGitHub <noreply@github.com>2023-12-21 23:20:49 +0200
commitafefa319f1f59b002dfa0d1ef407a2c74bd9770b (patch)
treea6923e0a6214293d88957cd11e25943f2c0fb80a /examples/baby-llama/baby-llama.cpp
parent769a7bc85eaa44e3d7eadf39abfeff7bb0b9cc2f (diff)
ggml : change ggml_scale to take a float instead of tensor (#4573)
* ggml : change ggml_scale to take a float instead of tensor * ggml : fix CPU implementation * tests : fix test-grad0 ggml-ci
Diffstat (limited to 'examples/baby-llama/baby-llama.cpp')
-rw-r--r--examples/baby-llama/baby-llama.cpp15
1 files changed, 3 insertions, 12 deletions
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp
index 2dc2988d..e7d2ad59 100644
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -575,10 +575,7 @@ static struct ggml_tensor * forward(
// KQ_scaled = KQ / sqrt(n_embd/n_head)
// KQ_scaled shape [n_past + N, N, n_head, 1]
- struct ggml_tensor * KQ_scaled =
- ggml_scale(ctx0,
- KQ,
- ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head)));
+ struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, 1.0f/sqrtf(float(n_embd)/n_head));
// KQ_masked = mask_past(KQ_scaled)
// KQ_masked shape [n_past + N, N, n_head, 1]
@@ -844,10 +841,7 @@ static struct ggml_tensor * forward_batch(
// KQ_scaled = KQ / sqrt(n_embd/n_head)
// KQ_scaled shape [n_past + N, N, n_head, n_batch]
- struct ggml_tensor * KQ_scaled =
- ggml_scale(ctx0,
- KQ,
- ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head)));
+ struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, 1.0f/sqrtf(float(n_embd)/n_head));
assert_shape_4d(KQ_scaled, n_past + N, N, n_head, n_batch);
// KQ_masked = mask_past(KQ_scaled)
@@ -1131,10 +1125,7 @@ static struct ggml_tensor * forward_lora(
// KQ_scaled = KQ / sqrt(n_embd/n_head)
// KQ_scaled shape [n_past + N, N, n_head, 1]
- struct ggml_tensor * KQ_scaled =
- ggml_scale(ctx0,
- KQ,
- ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head)));
+ struct ggml_tensor * KQ_scaled = ggml_scale(ctx0, KQ, 1.0f/sqrtf(float(n_embd)/n_head));
// KQ_masked = mask_past(KQ_scaled)
// KQ_masked shape [n_past + N, N, n_head, 1]