summaryrefslogtreecommitdiff
path: root/ggml-metal.metal
diff options
context:
space:
mode:
Diffstat (limited to 'ggml-metal.metal')
-rw-r--r--ggml-metal.metal29
1 files changed, 14 insertions, 15 deletions
diff --git a/ggml-metal.metal b/ggml-metal.metal
index 174086b5..b16f2b7e 100644
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
@@ -3366,31 +3366,30 @@ kernel void kernel_concat(
constant uint64_t & nb1,
constant uint64_t & nb2,
constant uint64_t & nb3,
+ constant int32_t & dim,
uint3 tgpig[[threadgroup_position_in_grid]],
uint3 tpitg[[thread_position_in_threadgroup]],
uint3 ntg[[threads_per_threadgroup]]) {
- const int64_t i03 = tgpig.z;
- const int64_t i02 = tgpig.y;
- const int64_t i01 = tgpig.x;
+ const int64_t i3 = tgpig.z;
+ const int64_t i2 = tgpig.y;
+ const int64_t i1 = tgpig.x;
- const int64_t i13 = i03 % ne13;
- const int64_t i12 = i02 % ne12;
- const int64_t i11 = i01 % ne11;
+ int64_t o[4] = {0, 0, 0, 0};
+ o[dim] = dim == 0 ? ne00 : (dim == 1 ? ne01 : (dim == 2 ? ne02 : ne03));
- device const char * src0_ptr = src0 + i03*nb03 + i02*nb02 + i01*nb01 + tpitg.x*nb00;
- device const char * src1_ptr = src1 + i13*nb13 + i12*nb12 + i11*nb11 + tpitg.x*nb10;
- device char * dst_ptr = dst + i03*nb3 + i02*nb2 + i01*nb1 + tpitg.x*nb0;
+ device const float * x;
for (int i0 = tpitg.x; i0 < ne0; i0 += ntg.x) {
- if (i02 < ne02) {
- ((device float *)dst_ptr)[0] = ((device float *)src0_ptr)[0];
- src0_ptr += ntg.x*nb00;
+ if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) {
+ x = (device const float *)(src0 + (i3 )*nb03 + (i2 )*nb02 + (i1 )*nb01 + (i0 )*nb00);
} else {
- ((device float *)dst_ptr)[0] = ((device float *)src1_ptr)[0];
- src1_ptr += ntg.x*nb10;
+ x = (device const float *)(src1 + (i3 - o[3])*nb13 + (i2 - o[2])*nb12 + (i1 - o[1])*nb11 + (i0 - o[0])*nb10);
}
- dst_ptr += ntg.x*nb0;
+
+ device float * y = (device float *)(dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
+
+ *y = *x;
}
}