1 files changed, 4 insertions, 0 deletions
diff --git a/ggml/src/ggml-cuda/quantize.cuh b/ggml/src/ggml-cuda/quantize.cuh
index 0be5bf0e..e1106164 100644
--- a/ggml/src/ggml-cuda/quantize.cuh
+++ b/ggml/src/ggml-cuda/quantize.cuh
@@ -30,5 +30,9 @@ void quantize_mmq_q8_1_cuda(
     const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
     const ggml_type type_x, cudaStream_t stream);
 
+void quantize_mmq_q8_1_id_cuda(
+    const float * x, void * vy, const char * row_mapping, const int64_t kx0, const int64_t kx1, const int64_t kx0_padded,
+    const ggml_type type_x, cudaStream_t stream);
+
 // For now only applicable for tensors with ne[1] = 1, ne[3] = 1, and useful if ne[2] > 1
 void quantize_tensor_q8_1_cuda(const struct ggml_tensor * src, void * vy, const enum ggml_type type, cudaStream_t stream);