diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2024-06-05 16:53:00 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-05 16:53:00 +0200 |
commit | 7d1a378b8fb266782d9248538a661405aad80768 (patch) | |
tree | 7ce459a4c5a85e75f75825772124aedc3bb54b7f /ggml-cuda/template-instances | |
parent | 2b3389677a833cee0880226533a1768b1a9508d2 (diff) |
CUDA: refactor mmq, dmmv, mmvq (#7716)
* CUDA: refactor mmq, dmmv, mmvq
* fix out-of-bounds write
* struct for qk, qr, qi
* fix cmake build
* mmq_type_traits
Diffstat (limited to 'ggml-cuda/template-instances')
102 files changed, 157 insertions, 91 deletions
diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu index d7f10347..6696a238 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu index f3d8d2ed..dd070db2 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu index 9beb05ca..54dcde6f 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu index 0c163dcb..4ec22f79 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu index 3980167b..3c15bf7f 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu index fe099921..7e61b5fd 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu index d4d5e799..fdb15b58 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu index f08b10c4..0f7c417d 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu index e8c3f8ad..851f33c4 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu index c01416a1..763809cb 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu index 46615f28..f2a276e5 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu index 72dcc1a2..cb227f6f 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu index 9fa8a377..97ac0520 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu index 20ea86c6..c772b426 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu index ed815957..5cb74308 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu index bbe9e6a1..98a709d1 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu index d12a6169..4f2f947a 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu index 1e901afc..11f96b6f 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu index a3f98ce3..b39bdc06 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu index 1bae9724..bbd6a2c7 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu index 7258e977..9d84ff2b 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu index 08435c00..bc8a5bff 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu index 17864e8e..a679100c 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu index 9239138c..8f21bccf 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu index e387d9c1..858b00fd 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu index d69d3bbd..0fc8011f 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu index 61a47881..261fdf62 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu index 89995080..0fb82473 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu index 9e6a58df..a9d9d089 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu index 153cbfd8..7d7b2792 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu index 09d57655..a092ee2d 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu index 3e3c91e6..db55927a 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu index 7b973058..c3c21cef 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu index a43a475d..35dd9f52 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu index 5b570c0a..050c22ac 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu index bf2cc684..de4866c5 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu index 7428e45e..57a10bc4 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu index 4aee830d..e0f08b46 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu index 36acb631..1c8e8a46 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu index a4090c39..cefed83f 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu index 17b6b2d1..aede6e35 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu index 549e1cea..1a1a92c7 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu index 66bcd820..ad667473 100644 --- a/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu index 15933a29..c499f455 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu index 8aa78558..8286ebf3 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu index bde3924f..45878688 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu index 1708181c..d89103ce 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu index 30fa6fa4..bb75fd42 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu index 69673d50..b1629817 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu index d8b2b2e1..d8657604 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu index 01cce7ab..2e5bd2f1 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu index fd5563b3..be5f302d 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu index b13cc4a0..8dd91cd7 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu index 86f1fc63..4cb79150 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu index 26e7df4b..09dea426 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu index e4fda895..0fbb6076 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu index bd15117b..2aeab83b 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu index cb6c6a76..599415b4 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu index 201b6641..e4f8e308 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu index 6da57a44..34d16652 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu index 47623c9b..4bebef45 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu index 82c6861d..326468da 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu index 24a80c2b..511b58f4 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu index b95eaf7e..d9906d14 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu index 275f2efc..f61c183a 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu index 3673f7fd..c10450fd 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu index 2c4d5994..2d5cb195 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu index 2457cdf3..b384f34d 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu index b3b411ed..446e293b 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu index b7f308a4..6f430298 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu index 73968669..1cd8ba88 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu index 708d0311..1ee2eab6 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu index df891be6..2bc77816 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu index f49b6d1f..d55ced08 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu index 1de92148..8361e99c 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu index 7a1ba7f8..7507a67c 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu index 25493e4b..61f050b2 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu index 3cd650c7..d4a49d9c 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu index 88ffa43d..d1462789 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu index 8c7bac6c..e73f917a 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu index a28f62e7..d40825df 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu index d39838b9..b5c6869f 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu index 834d40f6..4e21b0cc 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu index f7d54668..2eac321b 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu index 59e00ad8..f7d2c3b4 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu index 6e63893d..a013f400 100644 --- a/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +++ b/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-vec-f32.cuh" diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu index ca356ad6..2d94e65c 100644 --- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +++ b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-wmma-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu index 430ee64e..c3d9df3c 100644 --- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +++ b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-wmma-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu index d421d17c..bb680e40 100644 --- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +++ b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-wmma-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu index deacd5f5..073f71b1 100644 --- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +++ b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-wmma-f16.cuh" diff --git a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu index 28289673..d30710c5 100644 --- a/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +++ b/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu @@ -1,4 +1,4 @@ -// This file has been autogenerated by generate-variants.py, do not edit manually. +// This file has been autogenerated by generate_cu_files.py, do not edit manually. #include "../fattn-wmma-f16.cuh" diff --git a/ggml-cuda/template-instances/generate_cu_files.py b/ggml-cuda/template-instances/generate_cu_files.py index ee5b460e..ea58d096 100755 --- a/ggml-cuda/template-instances/generate_cu_files.py +++ b/ggml-cuda/template-instances/generate_cu_files.py @@ -20,6 +20,18 @@ SOURCE_FATTN_WMMA_START = """// This file has been autogenerated by generate_cu_ SOURCE_FATTN_WMMA_CASE = "DECL_FATTN_WMMA_F16_CASE({head_size}, {cols_per_block}, {kq_acc_t});\n" +TYPES_MMQ = [ + "GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0", + "GGML_TYPE_Q2_K", "GGML_TYPE_Q3_K", "GGML_TYPE_Q4_K", "GGML_TYPE_Q5_K", "GGML_TYPE_Q6_K" +] + +SOURCE_MMQ = """// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE({type}); +""" + def get_short_name(long_quant_name): return long_quant_name.replace("GGML_TYPE_", "").lower() @@ -57,3 +69,7 @@ for kq_acc_t in ["half", "float"]: if kq_acc_t == "float" and cols_per_block == 32 and head_size == 256: # register spilling, bad performance continue f.write(SOURCE_FATTN_WMMA_CASE.format(kq_acc_t=kq_acc_t, cols_per_block=cols_per_block, head_size=head_size)) + +for type in TYPES_MMQ: + with open(f"mmq-instance-{get_short_name(type)}.cu", "w") as f: + f.write(SOURCE_MMQ.format(type=type)) diff --git a/ggml-cuda/template-instances/mmq-instance-q2_k.cu b/ggml-cuda/template-instances/mmq-instance-q2_k.cu new file mode 100644 index 00000000..6415369d --- /dev/null +++ b/ggml-cuda/template-instances/mmq-instance-q2_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q2_K); diff --git a/ggml-cuda/template-instances/mmq-instance-q3_k.cu b/ggml-cuda/template-instances/mmq-instance-q3_k.cu new file mode 100644 index 00000000..ffb6213a --- /dev/null +++ b/ggml-cuda/template-instances/mmq-instance-q3_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q3_K); diff --git a/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/ggml-cuda/template-instances/mmq-instance-q4_0.cu new file mode 100644 index 00000000..0c0b0c8a --- /dev/null +++ b/ggml-cuda/template-instances/mmq-instance-q4_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_0); diff --git a/ggml-cuda/template-instances/mmq-instance-q4_1.cu b/ggml-cuda/template-instances/mmq-instance-q4_1.cu new file mode 100644 index 00000000..ee67f694 --- /dev/null +++ b/ggml-cuda/template-instances/mmq-instance-q4_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_1); diff --git a/ggml-cuda/template-instances/mmq-instance-q4_k.cu b/ggml-cuda/template-instances/mmq-instance-q4_k.cu new file mode 100644 index 00000000..9eeb3cd7 --- /dev/null +++ b/ggml-cuda/template-instances/mmq-instance-q4_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q4_K); diff --git a/ggml-cuda/template-instances/mmq-instance-q5_0.cu b/ggml-cuda/template-instances/mmq-instance-q5_0.cu new file mode 100644 index 00000000..cc57fb97 --- /dev/null +++ b/ggml-cuda/template-instances/mmq-instance-q5_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_0); diff --git a/ggml-cuda/template-instances/mmq-instance-q5_1.cu b/ggml-cuda/template-instances/mmq-instance-q5_1.cu new file mode 100644 index 00000000..721ac790 --- /dev/null +++ b/ggml-cuda/template-instances/mmq-instance-q5_1.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_1); diff --git a/ggml-cuda/template-instances/mmq-instance-q5_k.cu b/ggml-cuda/template-instances/mmq-instance-q5_k.cu new file mode 100644 index 00000000..a2e90ffd --- /dev/null +++ b/ggml-cuda/template-instances/mmq-instance-q5_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q5_K); diff --git a/ggml-cuda/template-instances/mmq-instance-q6_k.cu b/ggml-cuda/template-instances/mmq-instance-q6_k.cu new file mode 100644 index 00000000..470938fe --- /dev/null +++ b/ggml-cuda/template-instances/mmq-instance-q6_k.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q6_K); diff --git a/ggml-cuda/template-instances/mmq-instance-q8_0.cu b/ggml-cuda/template-instances/mmq-instance-q8_0.cu new file mode 100644 index 00000000..974477bb --- /dev/null +++ b/ggml-cuda/template-instances/mmq-instance-q8_0.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../mmq.cuh" + +DECL_MMQ_CASE(GGML_TYPE_Q8_0); |