diff options
Diffstat (limited to 'examples/benchmark/benchmark-matmult.cpp')
-rw-r--r-- | examples/benchmark/benchmark-matmult.cpp | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp index 39d15cae..f7215f43 100644 --- a/examples/benchmark/benchmark-matmult.cpp +++ b/examples/benchmark/benchmark-matmult.cpp @@ -20,6 +20,17 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif +void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph, int n_threads) { + struct ggml_cplan plan = ggml_graph_plan(graph, n_threads); + + if (plan.work_size > 0) { + buf.resize(plan.work_size); + plan.work_data = buf.data(); + } + + ggml_graph_compute(graph, &plan); +} + float tensor_sum_elements(const ggml_tensor * tensor) { float sum = 0; if (tensor->type==GGML_TYPE_F32) { @@ -159,13 +170,14 @@ int main(int argc, char ** argv) { // printf("Creating compute graph\n"); struct ggml_cgraph gf = ggml_build_forward(m11xm2); - gf.n_threads=benchmark_params.n_threads; - printf("cgraph->n_threads=%i\n",gf.n_threads); + printf("n_threads=%i\n", benchmark_params.n_threads); TENSOR_DUMP(m11); TENSOR_DUMP(m2); - ggml_graph_compute(ctx, &gf); + std::vector<uint8_t> work_buffer; + + ggml_graph_compute_helper(work_buffer, &gf, benchmark_params.n_threads); TENSOR_DUMP(gf.nodes[0]); @@ -187,7 +199,6 @@ int main(int argc, char ** argv) { // printf("Creating compute graph\n"); struct ggml_cgraph gf31 = ggml_build_forward(q31); - gf31.n_threads=benchmark_params.n_threads; // Set up a second graph computation to make sure we override the CPU cache lines // printf("Creating new tensor q12 & Running quantize\n"); @@ -199,8 +210,7 @@ int main(int argc, char ** argv) { //printf("Creating compute graph\n"); struct ggml_cgraph gf32 = ggml_build_forward(q32); - gf32.n_threads=benchmark_params.n_threads; - printf("cgraph->n_threads=%i\n",gf31.n_threads); + printf("n_threads=%i\n", benchmark_params.n_threads); const int dimx = sizex; const int dimy = sizey; @@ -221,14 +231,15 @@ int main(int argc, char ** argv) { long long int start = ggml_time_us(); //printf("Running ggml_graph_compute\n"); - ggml_graph_compute(ctx, &gf31); + ggml_graph_compute_helper(work_buffer, &gf31, benchmark_params.n_threads); + long long int stop = ggml_time_us(); long long int usec = stop-start; double gflops = (double)(flops_per_matrix)/usec/1000.0; gflops_sum += gflops; printf("%9i;%8i;%6i;%6i;%6i;%15lli;%18lli;%10.2f\n", i, - gf31.n_threads, + benchmark_params.n_threads, sizex, sizey, sizez, flops_per_matrix, usec,gflops); @@ -253,7 +264,7 @@ int main(int argc, char ** argv) { } // Running a different graph computation to make sure we override the CPU cache lines - ggml_graph_compute(ctx, &gf32); + ggml_graph_compute_helper(work_buffer, &gf32, benchmark_params.n_threads); } printf("\n"); printf("Average%78.2f\n",gflops_sum/((double)benchmark_params.n_iterations)); |