diff options
author | Douglas Hanley <thesecretaryofwar@gmail.com> | 2024-02-13 06:06:58 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-13 14:06:58 +0200 |
commit | 03bf161eb6dea6400ee49c6dc6b69bdcfa9fd3fc (patch) | |
tree | 49320ac8aca35d2ba8162c2a280924bacbd7e06b /gguf-py/gguf/constants.py | |
parent | ad014bba97ef6ef6c3e2f78b2fc463e91ae94579 (diff) |
llama : support batched embeddings (#5466)
* batched embedding: pool outputs by sequence id. updated embedding example
* bring back non-causal attention
* embd : minor improvements
* llama : minor
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r-- | gguf-py/gguf/constants.py | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index a9c13dd3..644e1589 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -40,6 +40,7 @@ class Keys: TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout" EXPERT_COUNT = "{arch}.expert_count" EXPERT_USED_COUNT = "{arch}.expert_used_count" + POOLING_LAYER = "{arch}.pooling_layer" class Attention: HEAD_COUNT = "{arch}.attention.head_count" |