summaryrefslogtreecommitdiff
path: root/ggml-cuda/template-instances
diff options
context:
space:
mode:
Diffstat (limited to 'ggml-cuda/template-instances')
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu5
-rw-r--r--ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu10
-rw-r--r--ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu9
-rw-r--r--ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu10
-rw-r--r--ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu10
-rw-r--r--ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu8
-rwxr-xr-xggml-cuda/template-instances/generate_cu_files.py75
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q2_k.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q3_k.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q4_0.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q4_1.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q4_k.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q5_0.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q5_1.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q5_k.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q6_k.cu5
-rw-r--r--ggml-cuda/template-instances/mmq-instance-q8_0.cu5
102 files changed, 0 insertions, 602 deletions
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
deleted file mode 100644
index 6696a238..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
deleted file mode 100644
index dd070db2..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
deleted file mode 100644
index 54dcde6f..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
deleted file mode 100644
index 4ec22f79..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
deleted file mode 100644
index 3c15bf7f..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
deleted file mode 100644
index 7e61b5fd..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
deleted file mode 100644
index fdb15b58..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
deleted file mode 100644
index 0f7c417d..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
deleted file mode 100644
index 851f33c4..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
deleted file mode 100644
index 763809cb..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
deleted file mode 100644
index f2a276e5..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
deleted file mode 100644
index cb227f6f..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
deleted file mode 100644
index 97ac0520..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
deleted file mode 100644
index c772b426..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
deleted file mode 100644
index 5cb74308..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
deleted file mode 100644
index 98a709d1..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
deleted file mode 100644
index 4f2f947a..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
deleted file mode 100644
index 11f96b6f..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
deleted file mode 100644
index b39bdc06..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
deleted file mode 100644
index bbd6a2c7..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
deleted file mode 100644
index 9d84ff2b..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
deleted file mode 100644
index bc8a5bff..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
deleted file mode 100644
index a679100c..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
deleted file mode 100644
index 8f21bccf..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
deleted file mode 100644
index 858b00fd..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
deleted file mode 100644
index 0fc8011f..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
deleted file mode 100644
index 261fdf62..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
deleted file mode 100644
index 0fb82473..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
deleted file mode 100644
index a9d9d089..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
deleted file mode 100644
index 7d7b2792..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
deleted file mode 100644
index a092ee2d..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
deleted file mode 100644
index db55927a..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
deleted file mode 100644
index c3c21cef..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
deleted file mode 100644
index 35dd9f52..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
deleted file mode 100644
index 050c22ac..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
deleted file mode 100644
index de4866c5..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
deleted file mode 100644
index 57a10bc4..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
deleted file mode 100644
index e0f08b46..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
deleted file mode 100644
index 1c8e8a46..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
deleted file mode 100644
index cefed83f..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
deleted file mode 100644
index aede6e35..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
deleted file mode 100644
index 1a1a92c7..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
deleted file mode 100644
index ad667473..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f16.cuh"
-
-DECL_FATTN_VEC_F16_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
deleted file mode 100644
index c499f455..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
deleted file mode 100644
index 8286ebf3..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
deleted file mode 100644
index 45878688..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
deleted file mode 100644
index d89103ce..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
deleted file mode 100644
index bb75fd42..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
deleted file mode 100644
index b1629817..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
deleted file mode 100644
index d8657604..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
deleted file mode 100644
index 2e5bd2f1..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
deleted file mode 100644
index be5f302d..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
deleted file mode 100644
index 8dd91cd7..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
deleted file mode 100644
index 4cb79150..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
deleted file mode 100644
index 09dea426..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
deleted file mode 100644
index 0fbb6076..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
deleted file mode 100644
index 2aeab83b..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
deleted file mode 100644
index 599415b4..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
deleted file mode 100644
index e4f8e308..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
deleted file mode 100644
index 34d16652..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
deleted file mode 100644
index 4bebef45..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
deleted file mode 100644
index 326468da..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
deleted file mode 100644
index 511b58f4..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
deleted file mode 100644
index d9906d14..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
deleted file mode 100644
index f61c183a..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
deleted file mode 100644
index c10450fd..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
deleted file mode 100644
index 2d5cb195..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
deleted file mode 100644
index b384f34d..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
deleted file mode 100644
index 446e293b..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
deleted file mode 100644
index 6f430298..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
deleted file mode 100644
index 1cd8ba88..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
deleted file mode 100644
index 1ee2eab6..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
deleted file mode 100644
index 2bc77816..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
deleted file mode 100644
index d55ced08..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
deleted file mode 100644
index 8361e99c..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
deleted file mode 100644
index 7507a67c..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
deleted file mode 100644
index 61f050b2..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
deleted file mode 100644
index d4a49d9c..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
deleted file mode 100644
index d1462789..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
deleted file mode 100644
index e73f917a..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
deleted file mode 100644
index d40825df..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_F16);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
deleted file mode 100644
index b5c6869f..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
deleted file mode 100644
index 4e21b0cc..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
deleted file mode 100644
index 2eac321b..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
deleted file mode 100644
index f7d2c3b4..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
deleted file mode 100644
index a013f400..00000000
--- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f32.cuh"
-
-DECL_FATTN_VEC_F32_CASE(64, GGML_TYPE_F16, GGML_TYPE_Q8_0);
diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu
deleted file mode 100644
index 2d94e65c..00000000
--- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu
+++ /dev/null
@@ -1,10 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-wmma-f16.cuh"
-
-DECL_FATTN_WMMA_F16_CASE(64, 16, float);
-DECL_FATTN_WMMA_F16_CASE(80, 16, float);
-DECL_FATTN_WMMA_F16_CASE(96, 16, float);
-DECL_FATTN_WMMA_F16_CASE(112, 16, float);
-DECL_FATTN_WMMA_F16_CASE(128, 16, float);
-DECL_FATTN_WMMA_F16_CASE(256, 16, float);
diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu
deleted file mode 100644
index c3d9df3c..00000000
--- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu
+++ /dev/null
@@ -1,9 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-wmma-f16.cuh"
-
-DECL_FATTN_WMMA_F16_CASE(64, 32, float);
-DECL_FATTN_WMMA_F16_CASE(80, 32, float);
-DECL_FATTN_WMMA_F16_CASE(96, 32, float);
-DECL_FATTN_WMMA_F16_CASE(112, 32, float);
-DECL_FATTN_WMMA_F16_CASE(128, 32, float);
diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu
deleted file mode 100644
index bb680e40..00000000
--- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu
+++ /dev/null
@@ -1,10 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-wmma-f16.cuh"
-
-DECL_FATTN_WMMA_F16_CASE(64, 16, half);
-DECL_FATTN_WMMA_F16_CASE(80, 16, half);
-DECL_FATTN_WMMA_F16_CASE(96, 16, half);
-DECL_FATTN_WMMA_F16_CASE(112, 16, half);
-DECL_FATTN_WMMA_F16_CASE(128, 16, half);
-DECL_FATTN_WMMA_F16_CASE(256, 16, half);
diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu
deleted file mode 100644
index 073f71b1..00000000
--- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu
+++ /dev/null
@@ -1,10 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-wmma-f16.cuh"
-
-DECL_FATTN_WMMA_F16_CASE(64, 32, half);
-DECL_FATTN_WMMA_F16_CASE(80, 32, half);
-DECL_FATTN_WMMA_F16_CASE(96, 32, half);
-DECL_FATTN_WMMA_F16_CASE(112, 32, half);
-DECL_FATTN_WMMA_F16_CASE(128, 32, half);
-DECL_FATTN_WMMA_F16_CASE(256, 32, half);
diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu
deleted file mode 100644
index d30710c5..00000000
--- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu
+++ /dev/null
@@ -1,8 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-wmma-f16.cuh"
-
-DECL_FATTN_WMMA_F16_CASE(64, 8, half);
-DECL_FATTN_WMMA_F16_CASE(96, 8, half);
-DECL_FATTN_WMMA_F16_CASE(128, 8, half);
-DECL_FATTN_WMMA_F16_CASE(256, 8, half);
diff --git a/ggml-cuda/template-instances/generate_cu_files.py b/ggml-cuda/template-instances/generate_cu_files.py
deleted file mode 100755
index ea58d096..00000000
--- a/ggml-cuda/template-instances/generate_cu_files.py
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/usr/bin/env python3
-
-from glob import glob
-import os
-
-TYPES_KV = ["GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0", "GGML_TYPE_F16"]
-
-SOURCE_FATTN_VEC = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-vec-f{vkq_size}.cuh"
-
-DECL_FATTN_VEC_F{vkq_size}_CASE({head_size}, {type_k}, {type_v});
-"""
-
-SOURCE_FATTN_WMMA_START = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../fattn-wmma-f16.cuh"
-
-"""
-
-SOURCE_FATTN_WMMA_CASE = "DECL_FATTN_WMMA_F16_CASE({head_size}, {cols_per_block}, {kq_acc_t});\n"
-
-TYPES_MMQ = [
- "GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0",
- "GGML_TYPE_Q2_K", "GGML_TYPE_Q3_K", "GGML_TYPE_Q4_K", "GGML_TYPE_Q5_K", "GGML_TYPE_Q6_K"
-]
-
-SOURCE_MMQ = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE({type});
-"""
-
-
-def get_short_name(long_quant_name):
- return long_quant_name.replace("GGML_TYPE_", "").lower()
-
-
-def get_head_sizes(type_k, type_v):
- if type_k == "GGML_TYPE_F16" and type_v == "GGML_TYPE_F16":
- return [64, 128, 256]
- if type_k == "GGML_TYPE_F16":
- return [64, 128]
- return [128]
-
-
-for filename in glob("*.cu"):
- os.remove(filename)
-
-for vkq_size in [16, 32]:
- for type_k in TYPES_KV:
- for type_v in TYPES_KV:
- for head_size in get_head_sizes(type_k, type_v):
- with open(f"fattn-vec-f{vkq_size}-instance-hs{head_size}-{get_short_name(type_k)}-{get_short_name(type_v)}.cu", "w") as f:
- f.write(SOURCE_FATTN_VEC.format(vkq_size=vkq_size, head_size=head_size, type_k=type_k, type_v=type_v))
-
-for kq_acc_t in ["half", "float"]:
- for cols_per_block in [8, 16, 32]:
- if kq_acc_t == "float" and cols_per_block == 8:
- continue
-
- with open(f"fattn-wmma-f16-instance-kq{kq_acc_t}-cpb{cols_per_block}.cu", "w") as f:
- f.write(SOURCE_FATTN_WMMA_START)
-
- for head_size in [64, 80, 96, 112, 128, 256]:
- if cols_per_block == 8 and head_size % 32 != 0: # wmma fragment is 8x32
- continue
- if kq_acc_t == "float" and cols_per_block == 32 and head_size == 256: # register spilling, bad performance
- continue
- f.write(SOURCE_FATTN_WMMA_CASE.format(kq_acc_t=kq_acc_t, cols_per_block=cols_per_block, head_size=head_size))
-
-for type in TYPES_MMQ:
- with open(f"mmq-instance-{get_short_name(type)}.cu", "w") as f:
- f.write(SOURCE_MMQ.format(type=type))
diff --git a/ggml-cuda/template-instances/mmq-instance-q2_k.cu b/ggml-cuda/template-instances/mmq-instance-q2_k.cu
deleted file mode 100644
index 6415369d..00000000
--- a/ggml-cuda/template-instances/mmq-instance-q2_k.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE(GGML_TYPE_Q2_K);
diff --git a/ggml-cuda/template-instances/mmq-instance-q3_k.cu b/ggml-cuda/template-instances/mmq-instance-q3_k.cu
deleted file mode 100644
index ffb6213a..00000000
--- a/ggml-cuda/template-instances/mmq-instance-q3_k.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE(GGML_TYPE_Q3_K);
diff --git a/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/ggml-cuda/template-instances/mmq-instance-q4_0.cu
deleted file mode 100644
index 0c0b0c8a..00000000
--- a/ggml-cuda/template-instances/mmq-instance-q4_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE(GGML_TYPE_Q4_0);
diff --git a/ggml-cuda/template-instances/mmq-instance-q4_1.cu b/ggml-cuda/template-instances/mmq-instance-q4_1.cu
deleted file mode 100644
index ee67f694..00000000
--- a/ggml-cuda/template-instances/mmq-instance-q4_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE(GGML_TYPE_Q4_1);
diff --git a/ggml-cuda/template-instances/mmq-instance-q4_k.cu b/ggml-cuda/template-instances/mmq-instance-q4_k.cu
deleted file mode 100644
index 9eeb3cd7..00000000
--- a/ggml-cuda/template-instances/mmq-instance-q4_k.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE(GGML_TYPE_Q4_K);
diff --git a/ggml-cuda/template-instances/mmq-instance-q5_0.cu b/ggml-cuda/template-instances/mmq-instance-q5_0.cu
deleted file mode 100644
index cc57fb97..00000000
--- a/ggml-cuda/template-instances/mmq-instance-q5_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE(GGML_TYPE_Q5_0);
diff --git a/ggml-cuda/template-instances/mmq-instance-q5_1.cu b/ggml-cuda/template-instances/mmq-instance-q5_1.cu
deleted file mode 100644
index 721ac790..00000000
--- a/ggml-cuda/template-instances/mmq-instance-q5_1.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE(GGML_TYPE_Q5_1);
diff --git a/ggml-cuda/template-instances/mmq-instance-q5_k.cu b/ggml-cuda/template-instances/mmq-instance-q5_k.cu
deleted file mode 100644
index a2e90ffd..00000000
--- a/ggml-cuda/template-instances/mmq-instance-q5_k.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE(GGML_TYPE_Q5_K);
diff --git a/ggml-cuda/template-instances/mmq-instance-q6_k.cu b/ggml-cuda/template-instances/mmq-instance-q6_k.cu
deleted file mode 100644
index 470938fe..00000000
--- a/ggml-cuda/template-instances/mmq-instance-q6_k.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE(GGML_TYPE_Q6_K);
diff --git a/ggml-cuda/template-instances/mmq-instance-q8_0.cu b/ggml-cuda/template-instances/mmq-instance-q8_0.cu
deleted file mode 100644
index 974477bb..00000000
--- a/ggml-cuda/template-instances/mmq-instance-q8_0.cu
+++ /dev/null
@@ -1,5 +0,0 @@
-// This file has been autogenerated by generate_cu_files.py, do not edit manually.
-
-#include "../mmq.cuh"
-
-DECL_MMQ_CASE(GGML_TYPE_Q8_0);