From 1b107b8550dced48dc5f41184640061354226b96 Mon Sep 17 00:00:00 2001 From: Stephan Walter Date: Wed, 5 Jul 2023 16:13:06 +0000 Subject: ggml : generalize `quantize_fns` for simpler FP16 handling (#1237) * Generalize quantize_fns for simpler FP16 handling * Remove call to ggml_cuda_mul_mat_get_wsize * ci : disable FMA for mac os actions --------- Co-authored-by: Georgi Gerganov --- examples/quantize-stats/quantize-stats.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'examples/quantize-stats/quantize-stats.cpp') diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 9cea472d..6aa06ec8 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -147,7 +147,7 @@ void test_roundtrip_on_chunk( const ggml_tensor * layer, int64_t offset, int64_t chunk_size, - const quantize_fns_t & qfns, + const ggml_type_traits_t & qfns, bool use_reference, float * input_scratch, char * quantized_scratch, @@ -163,11 +163,11 @@ void test_roundtrip_on_chunk( } if (use_reference) { - qfns.quantize_row_q_reference(input_scratch, quantized_scratch, chunk_size); + qfns.from_float_reference(input_scratch, quantized_scratch, chunk_size); } else { - qfns.quantize_row_q(input_scratch, quantized_scratch, chunk_size); + qfns.from_float(input_scratch, quantized_scratch, chunk_size); } - qfns.dequantize_row_q(quantized_scratch, output_scratch, chunk_size); + qfns.to_float(quantized_scratch, output_scratch, chunk_size); update_error_stats(chunk_size, input_scratch, output_scratch, stats); } @@ -177,7 +177,7 @@ void test_roundtrip_on_chunk( void test_roundtrip_on_layer( std::string & name, bool print_layer_stats, - const quantize_fns_t & qfns, + const ggml_type_traits_t & qfns, bool use_reference, const ggml_tensor * layer, std::vector & input_scratch, @@ -388,8 +388,8 @@ int main(int argc, char ** argv) { if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), i) == params.include_types.end()) { continue; } - quantize_fns_t qfns = ggml_internal_get_quantize_fn(i); - if (qfns.quantize_row_q && qfns.dequantize_row_q) { + ggml_type_traits_t qfns = ggml_internal_get_type_traits(type); + if (qfns.from_float && qfns.to_float) { if (params.verbose) { printf("testing %s ...\n", ggml_type_name(type)); } -- cgit v1.2.3