diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-01-17 18:54:56 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-17 18:54:56 +0200 |
commit | 38566680cdfe982a495562332c25b9227de9cf8d (patch) | |
tree | 3936732879d0a3146577745232feadb80e5917c9 /ggml-quants.h | |
parent | ba69bbc84ced580fe4fdb0713ca2d95634325b7a (diff) |
ggml : add IQ2 to test-backend-ops + refactoring (#4990)
* ggml : add IQ2 to test-backend-ops + refactoring
ggml-ci
* cuda : update supports_op for IQ2
ggml-ci
* ci : enable LLAMA_CUBLAS=1 for CUDA nodes
ggml-ci
* cuda : fix out-of-bounds-access in `mul_mat_vec_q`
ggml-ci
* tests : avoid creating RNGs for each Q tensor
ggml-ci
* tests : avoid creating RNGs for each tensor
ggml-ci
Diffstat (limited to 'ggml-quants.h')
-rw-r--r-- | ggml-quants.h | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/ggml-quants.h b/ggml-quants.h index d7fefdb5..7d7cf917 100644 --- a/ggml-quants.h +++ b/ggml-quants.h @@ -257,3 +257,6 @@ size_t quantize_q4_0 (const float * src, void * dst, int nrows, int n_per_row, size_t quantize_q4_1 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix); size_t quantize_q5_0 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix); size_t quantize_q5_1 (const float * src, void * dst, int nrows, int n_per_row, int64_t * hist, const float * imatrix); + +void iq2xs_init_impl(int grid_size); +void iq2xs_free_impl(int grid_size); |