summaryrefslogtreecommitdiff
path: root/gguf-py/gguf/constants.py
diff options
context:
space:
mode:
authorliuwei-git <14815172+liuwei-git@users.noreply.github.com>2024-05-22 04:28:32 +0800
committerGitHub <noreply@github.com>2024-05-21 23:28:32 +0300
commit201cc11afa0a1950e1f632390b2ac6c937a0d8f0 (patch)
tree440fb7ecd80b48772a955a80855db29677d172a2 /gguf-py/gguf/constants.py
parent6369bf04336ab60e5c892dd77a3246df91015147 (diff)
llama : add phi3 128K model support (#7225)
* add phi3 128k support in convert-hf-to-gguf * add phi3 128k support in cuda * address build warnings on llama.cpp * adjust index value in cuda long rope freq factors * add long rope support in ggml cpu backend * make freq factors only depend on ctx size * remove unused rope scaling type 'su' frin gguf converter * fix flint warnings on convert-hf-to-gguf.py * set to the short freq factor when context size is small than trained context size * add one line of comments * metal : support rope freq_factors * ggml : update ggml_rope_ext API to support freq. factors * backends : add dev messages to support rope freq. factors * minor : style * tests : update to use new rope API * backends : fix pragma semicolons * minor : cleanup * llama : move rope factors from KV header to tensors * llama : remove tmp assert * cuda : fix compile warning * convert : read/write n_head_kv * llama : fix uninitialized tensors --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r--gguf-py/gguf/constants.py17
1 files changed, 11 insertions, 6 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 692120f4..42df2e4d 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -57,12 +57,13 @@ class Keys:
CAUSAL = "{arch}.attention.causal"
class Rope:
- DIMENSION_COUNT = "{arch}.rope.dimension_count"
- FREQ_BASE = "{arch}.rope.freq_base"
- SCALING_TYPE = "{arch}.rope.scaling.type"
- SCALING_FACTOR = "{arch}.rope.scaling.factor"
- SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
- SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
+ DIMENSION_COUNT = "{arch}.rope.dimension_count"
+ FREQ_BASE = "{arch}.rope.freq_base"
+ SCALING_TYPE = "{arch}.rope.scaling.type"
+ SCALING_FACTOR = "{arch}.rope.scaling.factor"
+ SCALING_ATTN_FACTOR = "{arch}.rope.scaling.attn_factor"
+ SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
+ SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
class SSM:
CONV_KERNEL = "{arch}.ssm.conv_kernel"
@@ -148,6 +149,8 @@ class MODEL_TENSOR(IntEnum):
OUTPUT = auto()
OUTPUT_NORM = auto()
ROPE_FREQS = auto()
+ ROPE_FACTORS_LONG = auto()
+ ROPE_FACTORS_SHORT = auto()
ATTN_Q = auto()
ATTN_K = auto()
ATTN_V = auto()
@@ -225,6 +228,8 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
MODEL_TENSOR.OUTPUT: "output",
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
+ MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",