llama : support batched embeddings (#5466)

* batched embedding: pool outputs by sequence id. updated embedding example * bring back non-causal attention * embd : minor improvements * llama : minor --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: Douglas Hanley <thesecretaryofwar@gmail.com> 2024-02-13 06:06:58 -0600
committer: GitHub <noreply@github.com> 2024-02-13 14:06:58 +0200
commit: 03bf161eb6dea6400ee49c6dc6b69bdcfa9fd3fc (patch)
tree: 49320ac8aca35d2ba8162c2a280924bacbd7e06b /gguf-py
parent: ad014bba97ef6ef6c3e2f78b2fc463e91ae94579 (diff)
2 files changed, 4 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index a9c13dd3..644e1589 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -40,6 +40,7 @@ class Keys:
         TENSOR_DATA_LAYOUT    = "{arch}.tensor_data_layout"
         EXPERT_COUNT          = "{arch}.expert_count"
         EXPERT_USED_COUNT     = "{arch}.expert_used_count"
+        POOLING_LAYER         = "{arch}.pooling_layer"
 
     class Attention:
         HEAD_COUNT        = "{arch}.attention.head_count"
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 7af58a46..d87bd8e8 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -360,6 +360,9 @@ class GGUFWriter:
     def add_causal_attention(self, value: bool) -> None:
         self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
 
+    def add_pooling_layer(self, value: bool) -> None:
+        self.add_bool(Keys.LLM.POOLING_LAYER.format(arch=self.arch), value)
+
     def add_rope_dimension_count(self, count: int) -> None:
         self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
author	Douglas Hanley <thesecretaryofwar@gmail.com>	2024-02-13 06:06:58 -0600
committer	GitHub <noreply@github.com>	2024-02-13 14:06:58 +0200
commit	03bf161eb6dea6400ee49c6dc6b69bdcfa9fd3fc (patch)
tree	49320ac8aca35d2ba8162c2a280924bacbd7e06b /gguf-py
parent	ad014bba97ef6ef6c3e2f78b2fc463e91ae94579 (diff)