mpt : do not duplicate token_embd.weight on disk (#5670)

author: Jared Van Bortel <jared@nomic.ai> 2024-02-22 17:05:23 -0500
committer: GitHub <noreply@github.com> 2024-02-22 17:05:23 -0500
commit: 15499eb94227401bdc8875da6eb85c15d37068f7 (patch)
tree: 304ceb65978864af454d9b5ad7ad08fc4f673326 /llama.cpp
parent: 96633eeca1265ed03e57230de54032041c58f9cd (diff)
1 files changed, 4 insertions, 2 deletions
diff --git a/llama.cpp b/llama.cpp
index 2ebd40df..37477e6e 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -509,7 +509,6 @@ static std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NAMES =
         {
             { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
             { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
-            { LLM_TENSOR_OUTPUT,          "output" },
             { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
             { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
             { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
@@ -4056,7 +4055,10 @@ static bool llm_load_tensors(
                         model.output_norm   = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
                         model.output_norm_b = ml.create_tensor(ctx_output,       tn(LLM_TENSOR_OUTPUT_NORM, "bias"),   {n_embd}, false);
 
-                        model.output        = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab});
+                        // same as tok_embd, duplicated to allow offloading
+                        model.output        = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_TOKEN_EMBD,  "weight"), {n_embd, n_vocab});
+                        ml.n_created--; // artificial tensor
+                        ml.size_data += ggml_nbytes(model.output);
                     }
 
                     for (int i = 0; i < n_layer; ++i) {
author	Jared Van Bortel <jared@nomic.ai>	2024-02-22 17:05:23 -0500
committer	GitHub <noreply@github.com>	2024-02-22 17:05:23 -0500
commit	15499eb94227401bdc8875da6eb85c15d37068f7 (patch)
tree	304ceb65978864af454d9b5ad7ad08fc4f673326 /llama.cpp
parent	96633eeca1265ed03e57230de54032041c58f9cd (diff)