Merge mainline llama.cpp (#3)

* Merging mainline - WIP * Merging mainline - WIP AVX2 and CUDA appear to work. CUDA performance seems slightly (~1-2%) lower as it is so often the case with llama.cpp/ggml after some "improvements" have been made. * Merging mainline - fix Metal * Remove check --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
author: Kawrakow <48489457+ikawrakow@users.noreply.github.com> 2024-07-27 07:55:01 +0200
committer: GitHub <noreply@github.com> 2024-07-27 07:55:01 +0200
commit: 154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch)
tree: 81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /ggml/src/vulkan-shaders/generic_binary_head.comp
parent: 0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff)
1 files changed, 48 insertions, 0 deletions
diff --git a/ggml/src/vulkan-shaders/generic_binary_head.comp b/ggml/src/vulkan-shaders/generic_binary_head.comp
new file mode 100644
index 00000000..ab45d256
--- /dev/null
+++ b/ggml/src/vulkan-shaders/generic_binary_head.comp
@@ -0,0 +1,48 @@
+#extension GL_EXT_shader_16bit_storage : require
+
+layout (push_constant) uniform parameter
+{
+    uint ne;
+    uint ne00; uint ne01; uint ne02; uint ne03; uint nb00; uint nb01; uint nb02; uint nb03;
+    uint ne10; uint ne11; uint ne12; uint ne13; uint nb10; uint nb11; uint nb12; uint nb13;
+    uint ne20; uint ne21; uint ne22; uint ne23; uint nb20; uint nb21; uint nb22; uint nb23;
+    uint d_offset;
+    float param1; float param2;
+} p;
+
+layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
+
+layout (binding = 0) readonly buffer A {A_TYPE data_a[];};
+layout (binding = 1) readonly buffer B {B_TYPE data_b[];};
+layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
+
+uint src0_idx(uint idx) {
+    const uint i03 = idx / (p.ne02*p.ne01*p.ne00);
+    const uint i03_offset = i03 * p.ne02*p.ne01*p.ne00;
+    const uint i02 = (idx - i03_offset) / (p.ne01*p.ne00);
+    const uint i02_offset = i02*p.ne01*p.ne00;
+    const uint i01 = (idx - i03_offset - i02_offset) / p.ne00;
+    const uint i00 = idx - i03_offset - i02_offset - i01*p.ne00;
+    return i03*p.nb03 + i02*p.nb02 + i01*p.nb01 + i00*p.nb00;
+}
+
+uint src1_idx(uint idx) {
+    const uint i03 = idx / (p.ne02*p.ne01*p.ne00);
+    const uint i03_offset = i03 * p.ne02*p.ne01*p.ne00;
+    const uint i02 = (idx - i03_offset) / (p.ne01*p.ne00);
+    const uint i02_offset = i02*p.ne01*p.ne00;
+    const uint i01 = (idx - i03_offset - i02_offset) / p.ne00;
+    const uint i00 = idx - i03_offset - i02_offset - i01*p.ne00;
+
+    return (i03 % p.ne13)*p.nb13 + (i02 % p.ne12)*p.nb12 + (i01 % p.ne11)*p.nb11 + (i00 % p.ne10)*p.nb10;
+}
+
+uint dst_idx(uint idx) {
+    const uint i23 = idx / (p.ne22*p.ne21*p.ne20);
+    const uint i23_offset = i23 * p.ne22*p.ne21*p.ne20;
+    const uint i22 = (idx - i23_offset) / (p.ne21*p.ne20);
+    const uint i22_offset = i22*p.ne21*p.ne20;
+    const uint i21 = (idx - i23_offset - i22_offset) / p.ne20;
+    const uint i20 = idx - i23_offset - i22_offset - i21*p.ne20;
+    return i23*p.nb23 + i22*p.nb22 + i21*p.nb21 + i20*p.nb20;
+}
author	Kawrakow <48489457+ikawrakow@users.noreply.github.com>	2024-07-27 07:55:01 +0200
committer	GitHub <noreply@github.com>	2024-07-27 07:55:01 +0200
commit	154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch)
tree	81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /ggml/src/vulkan-shaders/generic_binary_head.comp
parent	0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff)