summaryrefslogtreecommitdiff
path: root/ggml/src/vulkan-shaders
diff options
context:
space:
mode:
Diffstat (limited to 'ggml/src/vulkan-shaders')
-rw-r--r--ggml/src/vulkan-shaders/fused_mul_gelu.comp27
-rw-r--r--ggml/src/vulkan-shaders/fused_mul_relu.comp22
-rw-r--r--ggml/src/vulkan-shaders/fused_mul_silu.comp24
-rw-r--r--ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp7
4 files changed, 80 insertions, 0 deletions
diff --git a/ggml/src/vulkan-shaders/fused_mul_gelu.comp b/ggml/src/vulkan-shaders/fused_mul_gelu.comp
new file mode 100644
index 00000000..65e2e662
--- /dev/null
+++ b/ggml/src/vulkan-shaders/fused_mul_gelu.comp
@@ -0,0 +1,27 @@
+#version 450
+
+#include "generic_head.comp"
+#include "types.comp"
+
+#extension GL_EXT_control_flow_attributes : enable
+
+layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
+
+layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
+layout (binding = 1) readonly buffer Y {B_TYPE data_b[];};
+layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
+
+void main() {
+ const float GELU_COEF_A = 0.044715f;
+ const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f;
+ const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
+
+ if (i >= p.KX) {
+ return;
+ }
+
+ const float xi = float(data_a[i]);
+ const float yi = float(data_b[i]);
+ const float val = SQRT_2_OVER_PI*xi*(1.0f + GELU_COEF_A*xi*xi);
+ data_d[i] = D_TYPE(0.5f*xi*yi*(2.0f - 2.0f / (exp(2 * val) + 1)));
+}
diff --git a/ggml/src/vulkan-shaders/fused_mul_relu.comp b/ggml/src/vulkan-shaders/fused_mul_relu.comp
new file mode 100644
index 00000000..01a3107f
--- /dev/null
+++ b/ggml/src/vulkan-shaders/fused_mul_relu.comp
@@ -0,0 +1,22 @@
+#version 450
+
+#include "generic_head.comp"
+#include "types.comp"
+
+#extension GL_EXT_control_flow_attributes : enable
+
+layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
+
+layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
+layout (binding = 1) readonly buffer Y {B_TYPE data_b[];};
+layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
+
+void main() {
+ const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
+
+ if (i >= p.KX) {
+ return;
+ }
+
+ data_d[i] = D_TYPE(float(data_b[i])*max(float(data_a[i]), 0));
+}
diff --git a/ggml/src/vulkan-shaders/fused_mul_silu.comp b/ggml/src/vulkan-shaders/fused_mul_silu.comp
new file mode 100644
index 00000000..0d59b64e
--- /dev/null
+++ b/ggml/src/vulkan-shaders/fused_mul_silu.comp
@@ -0,0 +1,24 @@
+#version 450
+
+#include "generic_head.comp"
+#include "types.comp"
+
+#extension GL_EXT_control_flow_attributes : enable
+
+layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
+
+layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
+layout (binding = 1) readonly buffer Y {B_TYPE data_b[];};
+layout (binding = 2) writeonly buffer D {D_TYPE data_d[];};
+
+void main() {
+ const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
+
+ if (i >= p.KX) {
+ return;
+ }
+
+ const float xi = float(data_a[i]);
+ const float yi = float(data_b[i]);
+ data_d[i] = D_TYPE(xi * yi / (1.0f + exp(-xi)));
+}
diff --git a/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
index d622f1bd..281d98c6 100644
--- a/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
+++ b/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp
@@ -572,6 +572,13 @@ void process_shaders() {
string_to_spv("upscale_f32", "upscale.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
+ string_to_spv("fused_mul_gelu_f16", "fused_mul_gelu.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
+ string_to_spv("fused_mul_gelu_f32", "fused_mul_gelu.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
+ string_to_spv("fused_mul_silu_f16", "fused_mul_silu.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
+ string_to_spv("fused_mul_silu_f32", "fused_mul_silu.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
+ string_to_spv("fused_mul_relu_f16", "fused_mul_relu.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
+ string_to_spv("fused_mul_relu_f32", "fused_mul_relu.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
+
string_to_spv("gelu_f16", "gelu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
string_to_spv("gelu_f32", "gelu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
string_to_spv("gelu_quick_f16", "gelu_quick.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});