diff options
author | Stephan Walter <stephan@walter.name> | 2023-03-28 15:56:03 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-28 18:56:03 +0300 |
commit | c1f885067c61191a07a1aedf684168dda62f3f71 (patch) | |
tree | 2bcb3f068942e2f16a92d70fec4bd623ac17ce28 /llama.cpp | |
parent | e0670260fb50a882b37074112b1881fb0820cf77 (diff) |
ggml : introduce structs for the q4 data blocks (#356)
* Introduce structs for the q4 data blocks
* ggml : rename quant struct variables + fix ARM_NEON
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 11 |
1 files changed, 5 insertions, 6 deletions
@@ -1345,7 +1345,7 @@ static llama_vocab::id llama_sample_top_p_top_k( // // TODO: reuse code from the llama_model_load() somehow -bool llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, int itype, int qk) { +static bool llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, int itype) { ggml_type type = GGML_TYPE_Q4_1; switch (itype) { @@ -1568,11 +1568,11 @@ bool llama_model_quantize_internal(const std::string & fname_inp, const std::str switch (type) { case GGML_TYPE_Q4_0: { - cur_size = ggml_quantize_q4_0(data_f32.data(), work.data(), nelements, ne[0], qk, hist_cur.data()); + cur_size = ggml_quantize_q4_0(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data()); } break; case GGML_TYPE_Q4_1: { - cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], qk, hist_cur.data()); + cur_size = ggml_quantize_q4_1(data_f32.data(), work.data(), nelements, ne[0], hist_cur.data()); } break; default: { @@ -1711,9 +1711,8 @@ void llama_free(struct llama_context * ctx) { int llama_model_quantize( const char * fname_inp, const char * fname_out, - int itype, - int qk) { - if (!llama_model_quantize_internal(fname_inp, fname_out, itype, qk)) { + int itype) { + if (!llama_model_quantize_internal(fname_inp, fname_out, itype)) { fprintf(stderr, "%s: failed to quantize\n", __func__); return 1; } |