From 9958c81b798a5872087b30b360e4674871f2479e Mon Sep 17 00:00:00 2001
From: nopperl <54780682+nopperl@users.noreply.github.com>
Date: Fri, 19 Apr 2024 09:35:54 +0000
Subject: Implement the OLMo architecture (#6741)

* implement olmo architecture

* remove unused variable

* remove unused moe branch

* remove check for weight

* remove superfluous moe, bias and rope tensors

* clarified comment

* fix clamp_kqv setting

* remove obsolete parameter name filter
---
 gguf-py/gguf/constants.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'gguf-py/gguf/constants.py')

diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index feae03e1..ba24065a 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -135,6 +135,7 @@ class MODEL_ARCH(IntEnum):
     XVERSE     = auto()
     COMMAND_R  = auto()
     DBRX       = auto()
+    OLMO       = auto()
 
 
 class MODEL_TENSOR(IntEnum):
@@ -210,6 +211,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
     MODEL_ARCH.XVERSE:         "xverse",
     MODEL_ARCH.COMMAND_R:      "command-r",
     MODEL_ARCH.DBRX:           "dbrx",
+    MODEL_ARCH.OLMO:           "olmo",
 }
 
 TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@@ -695,6 +697,17 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
         MODEL_TENSOR.FFN_DOWN_EXP,
         MODEL_TENSOR.FFN_UP_EXP,
     ],
+    MODEL_ARCH.OLMO: [
+        MODEL_TENSOR.TOKEN_EMBD,
+        MODEL_TENSOR.OUTPUT,
+        MODEL_TENSOR.ATTN_Q,
+        MODEL_TENSOR.ATTN_K,
+        MODEL_TENSOR.ATTN_V,
+        MODEL_TENSOR.ATTN_OUT,
+        MODEL_TENSOR.FFN_GATE,
+        MODEL_TENSOR.FFN_DOWN,
+        MODEL_TENSOR.FFN_UP,
+    ],
     # TODO
 }
 
-- 
cgit v1.2.3