Vulkan Phi Fix for AMD Proprietary Drivers (#5260)

* Replace tanh to avoid NaN in gelu shader on AMD proprietary driver * Fix another Vulkan CPY buffer size bug
author: 0cc4m <picard12@live.de> 2024-02-01 19:25:24 +0100
committer: GitHub <noreply@github.com> 2024-02-01 19:25:24 +0100
commit: 4d0924a8902010d31bd737b6f1f594943d120d0f (patch)
tree: 091227c1265488e6a528f280304b6ad92d6e8e17 /ggml-vulkan.cpp
parent: 8ca511cadee2c67f0bd8c7034a2513778ee9a1b7 (diff)
1 files changed, 12 insertions, 5 deletions
diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp
index bccc40bf..b1e0006b 100644
--- a/ggml-vulkan.cpp
+++ b/ggml-vulkan.cpp
@@ -2876,6 +2876,9 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm
         x_sz = ggml_nbytes(src0);
         d_sz = ggml_nbytes(dst);
 
+        if (extra_src0->offset + x_sz >= d_X->size) {
+            x_sz = VK_WHOLE_SIZE;
+        }
         if (extra->offset + d_sz >= d_D->size) {
             d_sz = VK_WHOLE_SIZE;
         }
@@ -2911,12 +2914,16 @@ static void ggml_vk_op_f32(vk_context * ctx, const ggml_tensor * src0, const ggm
             break;
         }
 
-        x_sz *= ne02 * ne03;
-        if (y_sz != VK_WHOLE_SIZE) {
-            y_sz *= ne12 * ne13;
-        }
         if (op != GGML_OP_CPY) {
-            d_sz *= ne02 * ne03;
+            if (x_sz != VK_WHOLE_SIZE) {
+                x_sz *= ne02 * ne03;
+            }
+            if (y_sz != VK_WHOLE_SIZE) {
+                y_sz *= ne12 * ne13;
+            }
+            if (d_sz != VK_WHOLE_SIZE) {
+                d_sz *= ne02 * ne03;
+            }
         }
 
         if (!use_src1 && op == GGML_OP_SOFT_MAX) {
author	0cc4m <picard12@live.de>	2024-02-01 19:25:24 +0100
committer	GitHub <noreply@github.com>	2024-02-01 19:25:24 +0100
commit	4d0924a8902010d31bd737b6f1f594943d120d0f (patch)
tree	091227c1265488e6a528f280304b6ad92d6e8e17 /ggml-vulkan.cpp
parent	8ca511cadee2c67f0bd8c7034a2513778ee9a1b7 (diff)