From 03bf161eb6dea6400ee49c6dc6b69bdcfa9fd3fc Mon Sep 17 00:00:00 2001
From: Douglas Hanley <thesecretaryofwar@gmail.com>
Date: Tue, 13 Feb 2024 06:06:58 -0600
Subject: llama : support batched embeddings (#5466)

* batched embedding: pool outputs by sequence id. updated embedding example

* bring back non-causal attention

* embd : minor improvements

* llama : minor

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 gguf-py/gguf/constants.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'gguf-py/gguf/constants.py')

diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index a9c13dd3..644e1589 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -40,6 +40,7 @@ class Keys:
         TENSOR_DATA_LAYOUT    = "{arch}.tensor_data_layout"
         EXPERT_COUNT          = "{arch}.expert_count"
         EXPERT_USED_COUNT     = "{arch}.expert_used_count"
+        POOLING_LAYER         = "{arch}.pooling_layer"
 
     class Attention:
         HEAD_COUNT        = "{arch}.attention.head_count"
-- 
cgit v1.2.3