diff options
author | Jared Van Bortel <jared@nomic.ai> | 2024-02-22 17:05:23 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-22 17:05:23 -0500 |
commit | 15499eb94227401bdc8875da6eb85c15d37068f7 (patch) | |
tree | 304ceb65978864af454d9b5ad7ad08fc4f673326 /llama.cpp | |
parent | 96633eeca1265ed03e57230de54032041c58f9cd (diff) |
mpt : do not duplicate token_embd.weight on disk (#5670)
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 6 |
1 files changed, 4 insertions, 2 deletions
@@ -509,7 +509,6 @@ static std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES = { { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, - { LLM_TENSOR_OUTPUT, "output" }, { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" }, @@ -4056,7 +4055,10 @@ static bool llm_load_tensors( model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}); model.output_norm_b = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, false); - model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}); + // same as tok_embd, duplicated to allow offloading + model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); + ml.n_created--; // artificial tensor + ml.size_data += ggml_nbytes(model.output); } for (int i = 0; i < n_layer; ++i) { |