diff options
Diffstat (limited to 'convert.py')
-rwxr-xr-x | convert.py | 21 |
1 files changed, 10 insertions, 11 deletions
@@ -33,7 +33,7 @@ if 'NO_LOCAL_GGUF' not in os.environ: import gguf if TYPE_CHECKING: - from typing import TypeAlias + from typing_extensions import Self, TypeAlias if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'): faulthandler.register(signal.SIGUSR1) @@ -517,7 +517,7 @@ class LlamaHfVocab(Vocab): tokenizer_model = "llama" name = "hfft" - def __init__(self, base_path: Path, ignore_nonllama: bool = False): + def __init__(self, base_path: Path): fname_tokenizer = base_path / FAST_TOKENIZER_FILE # if this fails, FileNotFoundError propagates to caller with open(fname_tokenizer, encoding='utf-8') as f: @@ -525,9 +525,7 @@ class LlamaHfVocab(Vocab): # pre-check so we know if we need transformers tokenizer_model: dict[str, Any] = tokenizer_json['model'] - if ignore_nonllama: - pass # workaround incorrect use of this class for WordPiece - elif ( + if ( tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False) or tokenizer_json['decoder']['type'] != 'Sequence' ): @@ -647,16 +645,17 @@ def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray: class Tensor(ABC): + ndarray: NDArray data_type: DataType @abstractmethod - def astype(self, data_type: DataType) -> Tensor: ... + def astype(self, data_type: DataType) -> Self: ... @abstractmethod - def permute(self, n_head: int, n_head_kv: int) -> Tensor: ... + def permute(self, n_head: int, n_head_kv: int) -> Self: ... @abstractmethod - def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> UnquantizedTensor: ... + def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> Self: ... @abstractmethod - def part(self, n_part: int) -> UnquantizedTensor: ... + def part(self, n_part: int) -> Self: ... @abstractmethod def to_ggml(self) -> GGMLCompatibleTensor: ... @@ -673,13 +672,13 @@ class UnquantizedTensor(Tensor): self.ndarray = ndarray self.data_type = NUMPY_TYPE_TO_DATA_TYPE[ndarray.dtype] - def astype(self, data_type: DataType) -> Tensor: + def astype(self, data_type: DataType) -> UnquantizedTensor: dtype = data_type.dtype if self.data_type == DT_BF16: self.ndarray = bf16_to_fp32(self.ndarray) return UnquantizedTensor(self.ndarray.astype(dtype)) - def to_ggml(self) -> UnquantizedTensor: + def to_ggml(self) -> Self: return self def permute_part(self, n_part: int, n_head: int, n_head_kv: int) -> UnquantizedTensor: |