summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-09-29 19:05:18 +0300
committerGitHub <noreply@github.com>2023-09-29 19:05:18 +0300
commitbc34dd4f5b5a7c10ae3ed85a265ce6f2ed2fab79 (patch)
tree361b5fc14a3d49f7e4fb68efbbd28cc9dc1608e9 /examples
parent2777a84be429401a2b7d33c2b6a4ada1f0776f1b (diff)
train : fix KQ_pos allocation (#3392)
* train : fix KQ_pos allocation * make sure KQ_pos is not reallocated in finetune --------- Co-authored-by: xaedes <xaedes@gmail.com>
Diffstat (limited to 'examples')
-rw-r--r--examples/finetune/finetune.cpp5
-rw-r--r--examples/train-text-from-scratch/train-text-from-scratch.cpp3
2 files changed, 6 insertions, 2 deletions
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index b61165fb..8ca1874d 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -626,7 +626,8 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
// KQ_pos - contains the positions
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N);
- {
+ ggml_allocr_alloc(alloc, KQ_pos);
+ if (!ggml_allocr_is_measure(alloc)) {
int * data = (int *) KQ_pos->data;
for (int i = 0; i < N; ++i) {
data[i] = n_past + i;
@@ -786,6 +787,8 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36->grad, one));
GGML_ASSERT(t36->grad->data == NULL && t36->grad->view_src == NULL);
ggml_allocr_alloc(alloc, t36->grad);
+ // KQ_pos
+ ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, KQ_pos, one));
// make sure base model tensors data cannot be used in viewable operations
ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, model->tok_embeddings, one));
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 5043f32d..be693b3a 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -334,7 +334,8 @@ static struct ggml_tensor * llama_build_train_graphs(
// KQ_pos - contains the positions
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N);
- {
+ ggml_allocr_alloc(alloc, KQ_pos);
+ if (!ggml_allocr_is_measure(alloc)) {
int * data = (int *) KQ_pos->data;
for (int i = 0; i < N; ++i) {
data[i] = n_past + i;