summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorLostRuins <39025047+LostRuins@users.noreply.github.com>2023-12-14 20:13:33 +0800
committerGitHub <noreply@github.com>2023-12-14 14:13:33 +0200
commit20a68a7030ee06e8eb7eb8e24ae4ac52dc17803f (patch)
tree3c84f1f362b064cdbbc2ec3044e47a38c9e44225 /llama.cpp
parent55e87c3749cb4985c3b316984d40e00e4df4a5d0 (diff)
ggml : add ggml_row_size() (fixes llama out of space) (#4461)
* Fixes "Not enough space in the context's memory pool" encountered on certain models, which seems to be caused by some imprecision related to the automatic casting of floating point values * do not cast to size_t, instead just use doubles * ggml : add ggml_row_size(), deprecate ggml_type_sizef() * ggml : fix row size compute to avoid overflows * tests : fix sizey -> sizez --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp12
1 files changed, 6 insertions, 6 deletions
diff --git a/llama.cpp b/llama.cpp
index 0e5ab044..456807d9 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1555,7 +1555,7 @@ static bool llama_kv_cache_init(
cache.cells.clear();
cache.cells.resize(n_ctx);
- cache.buf.resize(n_elements*(ggml_type_sizef(ktype) + ggml_type_sizef(vtype)) + 2u*n_layer*ggml_tensor_overhead());
+ cache.buf.resize(ggml_row_size(ktype, n_elements) + ggml_row_size(vtype, n_elements) + 2u*n_layer*ggml_tensor_overhead());
memset(cache.buf.data, 0, cache.buf.size);
struct ggml_init_params params;
@@ -3822,8 +3822,8 @@ static void llm_build_k_shift(
ggml_rope_custom_inplace(ctx,
ggml_view_3d(ctx, kv.k_l[il],
n_embd_head, n_head_kv, n_ctx,
- ggml_type_sizef(kv.k_l[il]->type)*n_embd_head,
- ggml_type_sizef(kv.k_l[il]->type)*n_embd_gqa,
+ ggml_row_size(kv.k_l[il]->type, n_embd_head),
+ ggml_row_size(kv.k_l[il]->type, n_embd_gqa),
0),
K_shift, n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow);
@@ -3852,7 +3852,7 @@ static void llm_build_kv_store(
cb(v_cur_t, "v_cur_t", il);
struct ggml_tensor * k_cache_view = ggml_view_1d(ctx, kv.k_l[il], n_tokens*n_embd_gqa,
- (ggml_type_sizef(kv.k_l[il]->type)*n_embd_gqa)*kv_head);
+ (ggml_row_size(kv.k_l[il]->type, n_embd_gqa))*kv_head);
cb(k_cache_view, "k_cache_view", il);
struct ggml_tensor * v_cache_view = ggml_view_2d(ctx, kv.v_l[il], n_tokens, n_embd_gqa,
@@ -4011,8 +4011,8 @@ static struct ggml_tensor * llm_build_kqv(
struct ggml_tensor * k =
ggml_view_3d(ctx, kv.k_l[il],
n_embd_head, n_kv, n_head_kv,
- ggml_type_sizef(kv.k_l[il]->type)*n_embd_gqa,
- ggml_type_sizef(kv.k_l[il]->type)*n_embd_head,
+ ggml_row_size(kv.k_l[il]->type, n_embd_gqa),
+ ggml_row_size(kv.k_l[il]->type, n_embd_head),
0);
cb(k, "k", il);