diff options
author | compilade <113953597+compilade@users.noreply.github.com> | 2024-03-28 08:05:54 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-28 14:05:54 +0200 |
commit | 0308f5e3d7bf9879f818b1a4ae589ff36b242af5 (patch) | |
tree | c68ce4fbaa405c88179a4d7667a5dd0e38493fad | |
parent | 28cb9a09c4d10a489be1238abe7a858dcd4d65f2 (diff) |
llama : fix command-r inference when omitting outputs (#6367)
-rw-r--r-- | llama.cpp | 5 |
1 files changed, 3 insertions, 2 deletions
@@ -9152,8 +9152,9 @@ struct llm_build_context { if (il == n_layer - 1) { // skip computing output for unused tokens struct ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, cur, inp_out_ids); - inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids); } struct ggml_tensor * attn_out = cur; |