diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-04-22 11:08:12 +0300 |
---|---|---|
committer | Georgi Gerganov <ggerganov@gmail.com> | 2023-04-22 11:08:12 +0300 |
commit | 872c365a9176a011b13d31269bb3121fa89c37e1 (patch) | |
tree | 299f9d9eada580ee780fcad2ff117ceeee81d9b7 /llama.cpp | |
parent | 955ef9a5d53d8f911fe00580ac9bd0caa56430af (diff) |
ggml : fix AVX build + update to new Q8_0 format
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 6 |
1 files changed, 3 insertions, 3 deletions
@@ -68,7 +68,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1() { MODEL_65B, 512ull * MB }, }; return _MEM_REQ_SCRATCH1; -}; +} // 2*n_embd*n_ctx*n_layer*sizeof(float16) static const std::map<e_model, size_t> & MEM_REQ_KV_SELF() @@ -80,7 +80,7 @@ static const std::map<e_model, size_t> & MEM_REQ_KV_SELF() { MODEL_65B, 5120ull * MB }, }; return _MEM_REQ_KV_SELF; -}; +} // this is mostly needed for temporary mul_mat buffers to dequantize the data // not actually needed if BLAS is disabled @@ -93,7 +93,7 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL() { MODEL_65B, 1536ull * MB }, }; return _MEM_REQ_EVAL; -}; +} // default hparams (LLaMA 7B) struct llama_hparams { |