summaryrefslogtreecommitdiff
path: root/convert.py
diff options
context:
space:
mode:
Diffstat (limited to 'convert.py')
-rwxr-xr-xconvert.py21
1 files changed, 10 insertions, 11 deletions
diff --git a/convert.py b/convert.py
index a37aeb5e..e860ac89 100755
--- a/convert.py
+++ b/convert.py
@@ -33,7 +33,7 @@ if 'NO_LOCAL_GGUF' not in os.environ:
import gguf
if TYPE_CHECKING:
- from typing import TypeAlias
+ from typing_extensions import Self, TypeAlias
if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'):
faulthandler.register(signal.SIGUSR1)
@@ -517,7 +517,7 @@ class LlamaHfVocab(Vocab):
tokenizer_model = "llama"
name = "hfft"
- def __init__(self, base_path: Path, ignore_nonllama: bool = False):
+ def __init__(self, base_path: Path):
fname_tokenizer = base_path / FAST_TOKENIZER_FILE
# if this fails, FileNotFoundError propagates to caller
with open(fname_tokenizer, encoding='utf-8') as f:
@@ -525,9 +525,7 @@ class LlamaHfVocab(Vocab):
# pre-check so we know if we need transformers
tokenizer_model: dict[str, Any] = tokenizer_json['model']
- if ignore_nonllama:
- pass # workaround incorrect use of this class for WordPiece
- elif (
+ if (
tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False)
or tokenizer_json['decoder']['type'] != 'Sequence'
):
@@ -647,16 +645,17 @@ def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
class Tensor(ABC):
+ ndarray: NDArray
data_type: DataType
@abstractmethod
- def astype(self, data_type: DataType) -> Tensor: ...
+ def astype(self, data_type: DataType) -> Self: ...
@abstractmethod
- def permute(self, n_head: int, n_head_kv: int) -> Tensor: ...
+ def permute(self, n_head: int, n_head_kv: int) -> Self: ...
@abstractmethod
- def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> UnquantizedTensor: ...
+ def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Self: ...
@abstractmethod
- def part(self, n_part: int) -> UnquantizedTensor: ...
+ def part(self, n_part: int) -> Self: ...
@abstractmethod
def to_ggml(self) -> GGMLCompatibleTensor: ...
@@ -673,13 +672,13 @@ class UnquantizedTensor(Tensor):
self.ndarray = ndarray
self.data_type = NUMPY_TYPE_TO_DATA_TYPE[ndarray.dtype]
- def astype(self, data_type: DataType) -> Tensor:
+ def astype(self, data_type: DataType) -> UnquantizedTensor:
dtype = data_type.dtype
if self.data_type == DT_BF16:
self.ndarray = bf16_to_fp32(self.ndarray)
return UnquantizedTensor(self.ndarray.astype(dtype))
- def to_ggml(self) -> UnquantizedTensor:
+ def to_ggml(self) -> Self:
return self
def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> UnquantizedTensor: