diff options
Diffstat (limited to 'ggml/src/ggml-cuda/quantize.cuh')
-rw-r--r-- | ggml/src/ggml-cuda/quantize.cuh | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/ggml/src/ggml-cuda/quantize.cuh b/ggml/src/ggml-cuda/quantize.cuh index 0be5bf0e..e1106164 100644 --- a/ggml/src/ggml-cuda/quantize.cuh +++ b/ggml/src/ggml-cuda/quantize.cuh @@ -30,5 +30,9 @@ void quantize_mmq_q8_1_cuda( const float * x, void * vy, const int64_t kx0, const int64_t kx1, const int64_t channels, const int64_t kx0_padded, const ggml_type type_x, cudaStream_t stream); +void quantize_mmq_q8_1_id_cuda( + const float * x, void * vy, const char * row_mapping, const int64_t kx0, const int64_t kx1, const int64_t kx0_padded, + const ggml_type type_x, cudaStream_t stream); + // For now only applicable for tensors with ne[1] = 1, ne[3] = 1, and useful if ne[2] > 1 void quantize_tensor_q8_1_cuda(const struct ggml_tensor * src, void * vy, const enum ggml_type type, cudaStream_t stream); |