diff options
author | slaren <slarengh@gmail.com> | 2024-03-25 13:50:23 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-25 13:50:23 +0100 |
commit | ae1f211ce2138448b47ebb148e25c58406845278 (patch) | |
tree | a18f5712eaee64d7d0ad1a3b3a097591ec10277e /ggml-cuda/quantize.cuh | |
parent | ad3a0505e3b6cd777259ee35e61d428357ffc565 (diff) |
cuda : refactor into multiple files (#6269)
Diffstat (limited to 'ggml-cuda/quantize.cuh')
-rw-r--r-- | ggml-cuda/quantize.cuh | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/ggml-cuda/quantize.cuh b/ggml-cuda/quantize.cuh new file mode 100644 index 00000000..adb89c83 --- /dev/null +++ b/ggml-cuda/quantize.cuh @@ -0,0 +1,5 @@ +#include "common.cuh" + +#define CUDA_QUANTIZE_BLOCK_SIZE 256 + +void quantize_row_q8_1_cuda(const float * x, void * vy, const int kx, const int ky, const int kx_padded, cudaStream_t stream); |