From f5d7b268ec4bf8628aa6ccc9f6631d0230dde76f Mon Sep 17 00:00:00 2001 From: Joan Fontanals Date: Thu, 6 Jun 2024 09:22:41 +0200 Subject: llama : add jina v2 base code (#7596) * feat: add changes to handle jina v2 base code * fix: do not complicate things * fix: fix the usage of the code model * fix: fix comments * fix: fix linting issues * fix: remove ollama patches * style : minor --------- Co-authored-by: Georgi Gerganov --- llama.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index 414d390e..cefb4d1d 100644 --- a/llama.cpp +++ b/llama.cpp @@ -704,6 +704,7 @@ static const std::map> LLM_TENSOR_NA { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" }, { LLM_TENSOR_TOKEN_TYPES, "token_types" }, + { LLM_TENSOR_ATTN_NORM_2, "blk.%d.attn_norm_2" }, { LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" }, { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" }, @@ -4653,8 +4654,7 @@ static void llm_load_vocab( LLAMA_LOG_WARN("%s: ************************************ \n", __func__); LLAMA_LOG_WARN("%s: \n", __func__); vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; - } else if ( - tokenizer_pre == "default") { + } else if (tokenizer_pre == "default") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; } else if ( tokenizer_pre == "llama3" || @@ -4681,7 +4681,8 @@ static void llm_load_vocab( tokenizer_pre == "jina-es" || tokenizer_pre == "jina-de" || tokenizer_pre == "jina-v2-es" || - tokenizer_pre == "jina-v2-de") { + tokenizer_pre == "jina-v2-de" || + tokenizer_pre == "jina-v2-code") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2; } else if ( tokenizer_pre == "refact") { @@ -5515,7 +5516,7 @@ static bool llm_load_tensors( layer.ffn_down_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}); } else { - layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}); + layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}); } layer.layer_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd}); @@ -5556,6 +5557,9 @@ static bool llm_load_tensors( layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}); //output_norm layer.attn_out_norm_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd}); + layer.attn_norm_2 = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2, "weight", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED); + layer.attn_norm_2_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM_2, "bias", i), {n_embd}, llama_model_loader::TENSOR_NOT_REQUIRED); + layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}); layer.ffn_gate = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}); @@ -8519,6 +8523,11 @@ struct llm_build_context { // attention layer norm cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_out_norm, model.layers[il].attn_out_norm_b, LLM_NORM, cb, il); + if (model.layers[il].attn_norm_2 != nullptr) { + cur = ggml_add(ctx0, cur, inpL); // re-add the layer input + cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].attn_norm_2, model.layers[il].attn_norm_2_b, LLM_NORM, cb, il); + } + struct ggml_tensor * ffn_inp = cur; cb(ffn_inp, "ffn_inp", il); -- cgit v1.2.3