Vulkan: a fresh start (#608)

* It compiles * Seems to be working with coopmat * Vulkan needs f32 precision for flash attention * Vulkan: fix u_batch > 4096/n_active_experts for coopmat1. Without this fix we get an assert. We get the same assert in mainline too. --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
author: Kawrakow <iwankawrakow@gmail.com> 2025-07-15 08:03:13 +0200
committer: GitHub <noreply@github.com> 2025-07-15 08:03:13 +0200
commit: 2081b3fccb9923699bf4d5e926d8719fc1d12c39 (patch)
tree: 61b3665214941b4857466fdea8220159d81a609e /ggml/src/ggml.c
parent: 45fae1a14444622478774f9a417e1d417af1ca46 (diff)
1 files changed, 18 insertions, 10 deletions
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index dbb080f8..b3982538 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -4682,6 +4682,24 @@ GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
     return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
 }
 
+GGML_CALL bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
+    return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
+}
+
+GGML_CALL bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
+    return
+        tensor->nb[0] > tensor->nb[2] &&
+        tensor->nb[1] > tensor->nb[0] &&
+        tensor->nb[2] == ggml_type_size(tensor->type);
+}
+
+GGML_CALL bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
+    return
+        tensor->ne[0] == ggml_blck_size(tensor->type) ||
+        tensor->nb[0] == ggml_type_size(tensor->type);
+}
+
+
 static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
     static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 
@@ -5195,16 +5213,6 @@ static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params,
     memcpy(tensor->op_params, params, params_size);
 }
 
-static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
-    assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
-    return ((const int32_t *)(tensor->op_params))[i];
-}
-
-static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) {
-    assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
-    return ((const float *)(tensor->op_params))[i];
-}
-
 static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
     assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
     ((int32_t *)(tensor->op_params))[i] = value;
author	Kawrakow <iwankawrakow@gmail.com>	2025-07-15 08:03:13 +0200
committer	GitHub <noreply@github.com>	2025-07-15 08:03:13 +0200
commit	2081b3fccb9923699bf4d5e926d8719fc1d12c39 (patch)
tree	61b3665214941b4857466fdea8220159d81a609e /ggml/src/ggml.c
parent	45fae1a14444622478774f9a417e1d417af1ca46 (diff)