summaryrefslogtreecommitdiff
path: root/ggml/src/ggml-cuda/quantize.cuh
diff options
context:
space:
mode:
Diffstat (limited to 'ggml/src/ggml-cuda/quantize.cuh')
-rw-r--r--ggml/src/ggml-cuda/quantize.cuh4
1 files changed, 4 insertions, 0 deletions
diff --git a/ggml/src/ggml-cuda/quantize.cuh b/ggml/src/ggml-cuda/quantize.cuh
index 0be5bf0e..e1106164 100644
--- a/ggml/src/ggml-cuda/quantize.cuh
+++ b/ggml/src/ggml-cuda/quantize.cuh
@@ -30,5 +30,9 @@ void quantize_mmq_q8_1_cuda(
const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded,
const ggml_type type_x, cudaStream_t stream);
+void quantize_mmq_q8_1_id_cuda(
+ const float * x, void * vy, const char * row_mapping, const int64_t kx0, const int64_t kx1, const int64_t kx0_padded,
+ const ggml_type type_x, cudaStream_t stream);
+
// For now only applicable for tensors with ne[1] = 1, ne[3] = 1, and useful if ne[2] > 1
void quantize_tensor_q8_1_cuda(const struct ggml_tensor * src, void * vy, const enum ggml_type type, cudaStream_t stream);