diff options
Diffstat (limited to 'ggml.h')
-rw-r--r-- | ggml.h | 20 |
1 files changed, 16 insertions, 4 deletions
@@ -2065,6 +2065,18 @@ extern "C" { // quantization // + // - ggml_quantize_init can be called multiple times with the same type + // it will only initialize the quantization tables for the first call or after ggml_quantize_free + // automatically called by ggml_quantize_chunk for convenience + // + // - ggml_quantize_free will free any memory allocated by ggml_quantize_init + // call this at the end of the program to avoid memory leaks + // + // note: these are thread-safe + // + GGML_API void ggml_quantize_init(enum ggml_type type); + GGML_API void ggml_quantize_free(void); + // TODO: these would probably get removed in favor of the more general ggml_quantize_chunk GGML_API size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist); GGML_API size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist); @@ -2078,13 +2090,13 @@ extern "C" { GGML_API size_t ggml_quantize_q5_K(const float * src, void * dst, int n, int k, int64_t * hist); GGML_API size_t ggml_quantize_q6_K(const float * src, void * dst, int n, int k, int64_t * hist); + // some quantization type cannot be used without an importance matrix + GGML_API bool ggml_quantize_requires_imatrix(enum ggml_type type); + + // calls ggml_quantize_init internally (i.e. can allocate memory) GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int nrows, int n_per_row, int64_t * hist, const float * imatrix); - // These are needed for IQ2_XS and IQ2_XXS quantizations - GGML_API void ggml_init_iq2_quantization(enum ggml_type type); - GGML_API void ggml_deinit_iq2_quantization(enum ggml_type type); - // // gguf // |