summaryrefslogtreecommitdiff
path: root/examples/benchmark/benchmark-matmult.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'examples/benchmark/benchmark-matmult.cpp')
-rw-r--r--examples/benchmark/benchmark-matmult.cpp29
1 files changed, 20 insertions, 9 deletions
diff --git a/examples/benchmark/benchmark-matmult.cpp b/examples/benchmark/benchmark-matmult.cpp
index 39d15cae..f7215f43 100644
--- a/examples/benchmark/benchmark-matmult.cpp
+++ b/examples/benchmark/benchmark-matmult.cpp
@@ -20,6 +20,17 @@
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
+void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * graph, int n_threads) {
+ struct ggml_cplan plan = ggml_graph_plan(graph, n_threads);
+
+ if (plan.work_size > 0) {
+ buf.resize(plan.work_size);
+ plan.work_data = buf.data();
+ }
+
+ ggml_graph_compute(graph, &plan);
+}
+
float tensor_sum_elements(const ggml_tensor * tensor) {
float sum = 0;
if (tensor->type==GGML_TYPE_F32) {
@@ -159,13 +170,14 @@ int main(int argc, char ** argv) {
// printf("Creating compute graph\n");
struct ggml_cgraph gf = ggml_build_forward(m11xm2);
- gf.n_threads=benchmark_params.n_threads;
- printf("cgraph->n_threads=%i\n",gf.n_threads);
+ printf("n_threads=%i\n", benchmark_params.n_threads);
TENSOR_DUMP(m11);
TENSOR_DUMP(m2);
- ggml_graph_compute(ctx, &gf);
+ std::vector<uint8_t> work_buffer;
+
+ ggml_graph_compute_helper(work_buffer, &gf, benchmark_params.n_threads);
TENSOR_DUMP(gf.nodes[0]);
@@ -187,7 +199,6 @@ int main(int argc, char ** argv) {
// printf("Creating compute graph\n");
struct ggml_cgraph gf31 = ggml_build_forward(q31);
- gf31.n_threads=benchmark_params.n_threads;
// Set up a second graph computation to make sure we override the CPU cache lines
// printf("Creating new tensor q12 & Running quantize\n");
@@ -199,8 +210,7 @@ int main(int argc, char ** argv) {
//printf("Creating compute graph\n");
struct ggml_cgraph gf32 = ggml_build_forward(q32);
- gf32.n_threads=benchmark_params.n_threads;
- printf("cgraph->n_threads=%i\n",gf31.n_threads);
+ printf("n_threads=%i\n", benchmark_params.n_threads);
const int dimx = sizex;
const int dimy = sizey;
@@ -221,14 +231,15 @@ int main(int argc, char ** argv) {
long long int start = ggml_time_us();
//printf("Running ggml_graph_compute\n");
- ggml_graph_compute(ctx, &gf31);
+ ggml_graph_compute_helper(work_buffer, &gf31, benchmark_params.n_threads);
+
long long int stop = ggml_time_us();
long long int usec = stop-start;
double gflops = (double)(flops_per_matrix)/usec/1000.0;
gflops_sum += gflops;
printf("%9i;%8i;%6i;%6i;%6i;%15lli;%18lli;%10.2f\n",
i,
- gf31.n_threads,
+ benchmark_params.n_threads,
sizex, sizey, sizez, flops_per_matrix,
usec,gflops);
@@ -253,7 +264,7 @@ int main(int argc, char ** argv) {
}
// Running a different graph computation to make sure we override the CPU cache lines
- ggml_graph_compute(ctx, &gf32);
+ ggml_graph_compute_helper(work_buffer, &gf32, benchmark_params.n_threads);
}
printf("\n");
printf("Average%78.2f\n",gflops_sum/((double)benchmark_params.n_iterations));