summaryrefslogtreecommitdiff
path: root/ggml-metal.m
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2023-09-01 11:15:57 +0300
committerGitHub <noreply@github.com>2023-09-01 11:15:57 +0300
commite8d91589258f9204397a7ac5f9b3c857835c98f8 (patch)
tree5909f71a59fc0822fd4310c8208655b43022e575 /ggml-metal.m
parentbce1fef328941499dc0acb76cc7fd7ac90449c2f (diff)
metal: somewhat faster f16 x f32 matrix multiply kernel (#2951)
* Somewhat faster f16 x f32 matrix multiply kernel * Better use 32 thread groups for f16 x f32 --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml-metal.m')
-rw-r--r--ggml-metal.m2
1 files changed, 1 insertions, 1 deletions
diff --git a/ggml-metal.m b/ggml-metal.m
index e929c4b0..8c3c64f5 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -840,7 +840,7 @@ void ggml_metal_graph_compute(
switch (src0t) {
case GGML_TYPE_F16:
{
- nth0 = 64;
+ nth0 = 32;
nth1 = 1;
[encoder setComputePipelineState:ctx->pipeline_mul_mat_f16_f32];
} break;