summaryrefslogtreecommitdiff
path: root/ggml-cuda/template-instances
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2024-06-05 16:53:00 +0200
committerGitHub <noreply@github.com>2024-06-05 16:53:00 +0200
commit7d1a378b8fb266782d9248538a661405aad80768 (patch)
tree7ce459a4c5a85e75f75825772124aedc3bb54b7f /ggml-cuda/template-instances
parent2b3389677a833cee0880226533a1768b1a9508d2 (diff)
CUDA: refactor mmq, dmmv, mmvq (#7716)
* CUDA: refactor mmq, dmmv, mmvq * fix out-of-bounds write * struct for qk, qr, qi * fix cmake build * mmq_type_traits
Diffstat (limited to 'ggml-cuda/template-instances')
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu2
-rw-r--r--ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu2
-rwxr-xr-xggml-cuda/template-instances/generate_cu_files.py16
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q2_k.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q3_k.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q4_k.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q5_k.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q6_k.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q8_0.cu5
102 files changed, 157 insertions, 91 deletions
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
index d7f10347..6696a238 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
index f3d8d2ed..dd070db2 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
index 9beb05ca..54dcde6f 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
index 0c163dcb..4ec22f79 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
index 3980167b..3c15bf7f 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
index fe099921..7e61b5fd 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
index d4d5e799..fdb15b58 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
index f08b10c4..0f7c417d 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
index e8c3f8ad..851f33c4 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
index c01416a1..763809cb 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
index 46615f28..f2a276e5 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
index 72dcc1a2..cb227f6f 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
index 9fa8a377..97ac0520 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
index 20ea86c6..c772b426 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
index ed815957..5cb74308 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
index bbe9e6a1..98a709d1 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
index d12a6169..4f2f947a 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
index 1e901afc..11f96b6f 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
index a3f98ce3..b39bdc06 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
index 1bae9724..bbd6a2c7 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
index 7258e977..9d84ff2b 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
index 08435c00..bc8a5bff 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
index 17864e8e..a679100c 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
index 9239138c..8f21bccf 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
index e387d9c1..858b00fd 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
index d69d3bbd..0fc8011f 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
index 61a47881..261fdf62 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
index 89995080..0fb82473 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
index 9e6a58df..a9d9d089 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
index 153cbfd8..7d7b2792 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
index 09d57655..a092ee2d 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
index 3e3c91e6..db55927a 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
index 7b973058..c3c21cef 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
index a43a475d..35dd9f52 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
index 5b570c0a..050c22ac 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
index bf2cc684..de4866c5 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
index 7428e45e..57a10bc4 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
index 4aee830d..e0f08b46 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
index 36acb631..1c8e8a46 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
index a4090c39..cefed83f 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
index 17b6b2d1..aede6e35 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
index 549e1cea..1a1a92c7 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
index 66bcd820..ad667473 100644
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
index 15933a29..c499f455 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
index 8aa78558..8286ebf3 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
index bde3924f..45878688 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
index 1708181c..d89103ce 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
index 30fa6fa4..bb75fd42 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
index 69673d50..b1629817 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
index d8b2b2e1..d8657604 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
index 01cce7ab..2e5bd2f1 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
index fd5563b3..be5f302d 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
index b13cc4a0..8dd91cd7 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
index 86f1fc63..4cb79150 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
index 26e7df4b..09dea426 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
index e4fda895..0fbb6076 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
index bd15117b..2aeab83b 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
index cb6c6a76..599415b4 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
index 201b6641..e4f8e308 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
index 6da57a44..34d16652 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
index 47623c9b..4bebef45 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
index 82c6861d..326468da 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
index 24a80c2b..511b58f4 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
index b95eaf7e..d9906d14 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
index 275f2efc..f61c183a 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
index 3673f7fd..c10450fd 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
index 2c4d5994..2d5cb195 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
index 2457cdf3..b384f34d 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
index b3b411ed..446e293b 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
index b7f308a4..6f430298 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
index 73968669..1cd8ba88 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
index 708d0311..1ee2eab6 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
index df891be6..2bc77816 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
index f49b6d1f..d55ced08 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
index 1de92148..8361e99c 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
index 7a1ba7f8..7507a67c 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
index 25493e4b..61f050b2 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
index 3cd650c7..d4a49d9c 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
index 88ffa43d..d1462789 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
index 8c7bac6c..e73f917a 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
index a28f62e7..d40825df 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
index d39838b9..b5c6869f 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
index 834d40f6..4e21b0cc 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
index f7d54668..2eac321b 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
index 59e00ad8..f7d2c3b4 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
index 6e63893d..a013f400 100644
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
+++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-vec-f32.cuh"
diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu
index ca356ad6..2d94e65c 100644
--- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu
+++ b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-wmma-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu
index 430ee64e..c3d9df3c 100644
--- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu
+++ b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-wmma-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu
index d421d17c..bb680e40 100644
--- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu
+++ b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-wmma-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu
index deacd5f5..073f71b1 100644
--- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu
+++ b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-wmma-f16.cuh"
diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu
index 28289673..d30710c5 100644
--- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu
+++ b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu
@@ -1,4 +1,4 @@
-// This file has been autogenerated by generate-variants.py, do not edit manually.
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
#include "../fattn-wmma-f16.cuh"
diff --git a/ggml-cuda/template-instances/generate_cu_files.py b/ggml-cuda/template-instances/generate_cu_files.py
index ee5b460e..ea58d096 100755
--- a/ggml-cuda/template-instances/generate_cu_files.py
+++ b/ggml-cuda/template-instances/generate_cu_files.py
@@ -20,6 +20,18 @@ SOURCE_FATTN_WMMA_START = """// This file has been autogenerated by generate_cu_
SOURCE_FATTN_WMMA_CASE = "DECL_FATTN_WMMA_F16_CASE({head_size}, {cols_per_block}, {kq_acc_t});\n"
+TYPES_MMQ = [
+ "GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0",
+ "GGML_TYPE_Q2_K", "GGML_TYPE_Q3_K", "GGML_TYPE_Q4_K", "GGML_TYPE_Q5_K", "GGML_TYPE_Q6_K"
+]
+
+SOURCE_MMQ = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE({type});
+"""
+
def get_short_name(long_quant_name):
return long_quant_name.replace("GGML_TYPE_", "").lower()
@@ -57,3 +69,7 @@ for kq_acc_t in ["half", "float"]:
if kq_acc_t == "float" and cols_per_block == 32 and head_size == 256: # register spilling, bad performance
continue
f.write(SOURCE_FATTN_WMMA_CASE.format(kq_acc_t=kq_acc_t, cols_per_block=cols_per_block, head_size=head_size))
+
+for type in TYPES_MMQ:
+ with open(f"mmq-instance-{get_short_name(type)}.cu", "w") as f:
+ f.write(SOURCE_MMQ.format(type=type))
diff --git a/ggml-cuda/template-instances/mmq-instance-q2_k.cu b/ggml-cuda/template-instances/mmq-instance-q2_k.cu
new file mode 100644
index 00000000..6415369d
--- /dev/null
+++ b/ggml-cuda/template-instances/mmq-instance-q2_k.cu
@@ -0,0 +1,5 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE(GGML_TYPE_Q2_K);
diff --git a/ggml-cuda/template-instances/mmq-instance-q3_k.cu b/ggml-cuda/template-instances/mmq-instance-q3_k.cu
new file mode 100644
index 00000000..ffb6213a
--- /dev/null
+++ b/ggml-cuda/template-instances/mmq-instance-q3_k.cu
@@ -0,0 +1,5 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE(GGML_TYPE_Q3_K);
diff --git a/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/ggml-cuda/template-instances/mmq-instance-q4_0.cu
new file mode 100644
index 00000000..0c0b0c8a
--- /dev/null
+++ b/ggml-cuda/template-instances/mmq-instance-q4_0.cu
@@ -0,0 +1,5 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE(GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/mmq-instance-q4_1.cu b/ggml-cuda/template-instances/mmq-instance-q4_1.cu
new file mode 100644
index 00000000..ee67f694
--- /dev/null
+++ b/ggml-cuda/template-instances/mmq-instance-q4_1.cu
@@ -0,0 +1,5 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE(GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/mmq-instance-q4_k.cu b/ggml-cuda/template-instances/mmq-instance-q4_k.cu
new file mode 100644
index 00000000..9eeb3cd7
--- /dev/null
+++ b/ggml-cuda/template-instances/mmq-instance-q4_k.cu
@@ -0,0 +1,5 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE(GGML_TYPE_Q4_K);
diff --git a/ggml-cuda/template-instances/mmq-instance-q5_0.cu b/ggml-cuda/template-instances/mmq-instance-q5_0.cu
new file mode 100644
index 00000000..cc57fb97
--- /dev/null
+++ b/ggml-cuda/template-instances/mmq-instance-q5_0.cu
@@ -0,0 +1,5 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE(GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/mmq-instance-q5_1.cu b/ggml-cuda/template-instances/mmq-instance-q5_1.cu
new file mode 100644
index 00000000..721ac790
--- /dev/null
+++ b/ggml-cuda/template-instances/mmq-instance-q5_1.cu
@@ -0,0 +1,5 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE(GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/mmq-instance-q5_k.cu b/ggml-cuda/template-instances/mmq-instance-q5_k.cu
new file mode 100644
index 00000000..a2e90ffd
--- /dev/null
+++ b/ggml-cuda/template-instances/mmq-instance-q5_k.cu
@@ -0,0 +1,5 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE(GGML_TYPE_Q5_K);
diff --git a/ggml-cuda/template-instances/mmq-instance-q6_k.cu b/ggml-cuda/template-instances/mmq-instance-q6_k.cu
new file mode 100644
index 00000000..470938fe
--- /dev/null
+++ b/ggml-cuda/template-instances/mmq-instance-q6_k.cu
@@ -0,0 +1,5 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE(GGML_TYPE_Q6_K);
diff --git a/ggml-cuda/template-instances/mmq-instance-q8_0.cu b/ggml-cuda/template-instances/mmq-instance-q8_0.cu
new file mode 100644
index 00000000..974477bb
--- /dev/null
+++ b/ggml-cuda/template-instances/mmq-instance-q8_0.cu
@@ -0,0 +1,5 @@
+// This file has been autogenerated by generate_cu_files.py, do not edit manually.
+
+#include "../mmq.cuh"
+
+DECL_MMQ_CASE(GGML_TYPE_Q8_0);