From 20a68a7030ee06e8eb7eb8e24ae4ac52dc17803f Mon Sep 17 00:00:00 2001 From: LostRuins <39025047+LostRuins@users.noreply.github.com> Date: Thu, 14 Dec 2023 20:13:33 +0800 Subject: ggml : add ggml_row_size() (fixes llama out of space) (#4461) * Fixes "Not enough space in the context's memory pool" encountered on certain models, which seems to be caused by some imprecision related to the automatic casting of floating point values * do not cast to size_t, instead just use doubles * ggml : add ggml_row_size(), deprecate ggml_type_sizef() * ggml : fix row size compute to avoid overflows * tests : fix sizey -> sizez --------- Co-authored-by: Georgi Gerganov --- examples/benchmark/benchmark-matmult.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'examples/benchmark') diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index 284733b1..434e1d6b 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -129,13 +129,13 @@ int main(int argc, char ** argv) { const ggml_type qtype = GGML_TYPE_Q4_1; size_t ctx_size = 0; - ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); - ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); - ctx_size += sizex*sizez*ggml_type_sizef(GGML_TYPE_F32); - ctx_size += sizex*sizey*ggml_type_sizef(qtype); - ctx_size += sizex*sizey*ggml_type_sizef(qtype); - ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS - ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS + ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); + ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); + ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizez); + ctx_size += ggml_row_size(qtype, sizex*sizey); + ctx_size += ggml_row_size(qtype, sizex*sizey); + ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); // BLAS + ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); // BLAS ctx_size += 1024*1024*16; printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024)); -- cgit v1.2.3