diff options
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 10 |
1 files changed, 5 insertions, 5 deletions
@@ -2257,10 +2257,10 @@ static void llama_convert_tensor_internal(const llama_load_tensor & tensor, llam } float * f32_output = (float *) output.addr; - quantize_fns_t qtype; + ggml_type_traits_t qtype; if (ggml_is_quantized(tensor.type)) { - qtype = ggml_internal_get_quantize_fn(tensor.type); - if (qtype.dequantize_row_q == NULL) { + qtype = ggml_internal_get_type_traits(tensor.type); + if (qtype.to_float == NULL) { throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(tensor.type))); } } else if (tensor.type != GGML_TYPE_F16) { @@ -2271,7 +2271,7 @@ static void llama_convert_tensor_internal(const llama_load_tensor & tensor, llam if (tensor.type == GGML_TYPE_F16) { ggml_fp16_to_fp32_row((ggml_fp16_t *)tensor.data, f32_output, nelements); } else if (ggml_is_quantized(tensor.type)) { - qtype.dequantize_row_q(tensor.data, f32_output, nelements); + qtype.to_float(tensor.data, f32_output, nelements); } else { LLAMA_ASSERT(false); // unreachable } @@ -2296,7 +2296,7 @@ static void llama_convert_tensor_internal(const llama_load_tensor & tensor, llam if (typ == GGML_TYPE_F16) { ggml_fp16_to_fp32_row((ggml_fp16_t *)inbuf, outbuf, nels); } else { - qtype.dequantize_row_q(inbuf, outbuf, nels); + qtype.to_float(inbuf, outbuf, nels); } }; workers.push_back(std::thread(compute, tensor.type, tensor.data + in_buff_offs, f32_output + out_buff_offs, thr_elems)); |