summaryrefslogtreecommitdiff
path: root/examples/benchmark
diff options
context:
space:
mode:
authorLostRuins <39025047+LostRuins@users.noreply.github.com>2023-12-14 20:13:33 +0800
committerGitHub <noreply@github.com>2023-12-14 14:13:33 +0200
commit20a68a7030ee06e8eb7eb8e24ae4ac52dc17803f (patch)
tree3c84f1f362b064cdbbc2ec3044e47a38c9e44225 /examples/benchmark
parent55e87c3749cb4985c3b316984d40e00e4df4a5d0 (diff)
ggml : add ggml_row_size() (fixes llama out of space) (#4461)
* Fixes "Not enough space in the context's memory pool" encountered on certain models, which seems to be caused by some imprecision related to the automatic casting of floating point values * do not cast to size_t, instead just use doubles * ggml : add ggml_row_size(), deprecate ggml_type_sizef() * ggml : fix row size compute to avoid overflows * tests : fix sizey -> sizez --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/benchmark')
-rw-r--r--examples/benchmark/benchmark-matmult.cpp14
1 files changed, 7 insertions, 7 deletions
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index 284733b1..434e1d6b 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -129,13 +129,13 @@ int main(int argc, char ** argv) {
const ggml_type qtype = GGML_TYPE_Q4_1;
size_t ctx_size = 0;
- ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32);
- ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32);
- ctx_size += sizex*sizez*ggml_type_sizef(GGML_TYPE_F32);
- ctx_size += sizex*sizey*ggml_type_sizef(qtype);
- ctx_size += sizex*sizey*ggml_type_sizef(qtype);
- ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
- ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
+ ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey);
+ ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey);
+ ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizez);
+ ctx_size += ggml_row_size(qtype, sizex*sizey);
+ ctx_size += ggml_row_size(qtype, sizex*sizey);
+ ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); // BLAS
+ ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); // BLAS
ctx_size += 1024*1024*16;
printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));