summaryrefslogtreecommitdiff
path: root/gguf-py/gguf/gguf_reader.py
diff options
context:
space:
mode:
authorXuan Son Nguyen <thichthat@gmail.com>2024-04-28 17:36:18 +0200
committerGitHub <noreply@github.com>2024-04-28 17:36:18 +0200
commit7bb36ccf91b8a2e92b182dd75624f1fd7cb205ac (patch)
treeab92b14895245a23730553dc06af68e75995c69c /gguf-py/gguf/gguf_reader.py
parentce023f6f2ff34fbe840e32e65d443d2fed7393de (diff)
gguf : enforce that tensor names are unique (#6905)
* not allow adding duplicated tensor name * no duplicated tensor while reading gguf * typo * throw exception inside llama_model_loader Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com>
Diffstat (limited to 'gguf-py/gguf/gguf_reader.py')
-rw-r--r--gguf-py/gguf/gguf_reader.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py
index 33afac55..48ef6d4a 100644
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -234,8 +234,14 @@ class GGUFReader:
def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
tensors = []
+ tensor_names = set() # keep track of name to prevent duplicated tensors
for field in fields:
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
+ # check if there's any tensor having same name already in the list
+ tensor_name = str(bytes(name_data), encoding = 'utf-8')
+ if tensor_name in tensor_names:
+ raise ValueError(f'Found duplicated tensor with name {tensor_name}')
+ tensor_names.add(tensor_name)
ggml_type = GGMLQuantizationType(raw_dtype[0])
n_elems = np.prod(dims)
block_size, type_size = GGML_QUANT_SIZES[ggml_type]
@@ -267,7 +273,7 @@ class GGUFReader:
item_count = n_bytes
item_type = np.uint8
tensors.append(ReaderTensor(
- name = str(bytes(name_data), encoding = 'utf-8'),
+ name = tensor_name,
tensor_type = ggml_type,
shape = dims,
n_elements = n_elems,