diff options
Diffstat (limited to 'gguf-py/gguf')
-rw-r--r-- | gguf-py/gguf/constants.py | 4 | ||||
-rw-r--r-- | gguf-py/gguf/gguf_reader.py | 4 | ||||
-rw-r--r-- | gguf-py/gguf/gguf_writer.py | 5 | ||||
-rw-r--r-- | gguf-py/gguf/vocab.py | 42 |
4 files changed, 21 insertions, 34 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 6d597bfd..4f232e18 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from enum import Enum, IntEnum, auto from typing import Any @@ -854,8 +853,7 @@ class GGUFValueType(IntEnum): return GGUFValueType.INT32 # TODO: need help with 64-bit types in Python else: - print("Unknown type:", type(val)) - sys.exit() + raise ValueError(f"Unknown type: {type(val)}") # Note: Does not support GGML_QKK_64 diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py index 2bdb1552..db8525d8 100644 --- a/gguf-py/gguf/gguf_reader.py +++ b/gguf-py/gguf/gguf_reader.py @@ -4,6 +4,7 @@ # from __future__ import annotations +import logging import os from collections import OrderedDict from typing import Any, Literal, NamedTuple, TypeVar, Union @@ -27,6 +28,7 @@ from gguf.constants import ( GGUFValueType, ) +logger = logging.getLogger(__name__) READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION] @@ -142,7 +144,7 @@ class GGUFReader: # TODO: add option to generate error on duplicate keys # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}') - print(f'Warning: Duplicate key {field.name} at offset {field.offset}') + logger.warning(f'Duplicate key {field.name} at offset {field.offset}') self.fields[field.name + '_{}'.format(field.offset)] = field else: self.fields[field.name] = field diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 089aece8..d9cfbf71 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import os import shutil import struct @@ -24,6 +25,8 @@ from .constants import ( TokenType, ) +logger = logging.getLogger(__name__) + class WriterState(Enum): EMPTY = auto() @@ -67,7 +70,7 @@ class GGUFWriter: self.use_temp_file = use_temp_file self.temp_file = None self.tensors = [] - print("gguf: This GGUF file is for {0} Endian only".format( + logger.info("gguf: This GGUF file is for {0} Endian only".format( "Big" if self.endianess == GGUFEndian.BIG else "Little", )) self.state = WriterState.EMPTY diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py index 378eaeca..c97a78f3 100644 --- a/gguf-py/gguf/vocab.py +++ b/gguf-py/gguf/vocab.py @@ -1,13 +1,15 @@ from __future__ import annotations +import logging import json import os -import sys from pathlib import Path from typing import Any, Callable from .gguf_writer import GGUFWriter +logger = logging.getLogger(__name__) + class SpecialVocab: merges: list[str] @@ -40,38 +42,29 @@ class SpecialVocab: def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None: if self.merges: if not quiet: - print(f'gguf: Adding {len(self.merges)} merge(s).') + logger.info(f'Adding {len(self.merges)} merge(s).') gw.add_token_merges(self.merges) elif self.load_merges: - print( - 'gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.', - file = sys.stderr, - ) + logger.warning('Adding merges requested but no merges found, output may be non-functional.') for typ, tokid in self.special_token_ids.items(): id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None) if id_handler is None: - print( - f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping', - file = sys.stderr, - ) + logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping') continue if not quiet: - print(f'gguf: Setting special token type {typ} to {tokid}') + logger.info(f'Setting special token type {typ} to {tokid}') id_handler(tokid) for typ, value in self.add_special_token.items(): add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None) if add_handler is None: - print( - f'gguf: WARNING: No handler for add_{typ}_token with value {value} - skipping', - file = sys.stderr, - ) + logger.warning(f'No handler for add_{typ}_token with value {value} - skipping') continue if not quiet: - print(f'gguf: Setting add_{typ}_token to {value}') + logger.info(f'Setting add_{typ}_token to {value}') add_handler(value) if self.chat_template is not None: if not quiet: - print(f'gguf: Setting chat_template to {self.chat_template}') + logger.info(f'Setting chat_template to {self.chat_template}') gw.add_chat_template(self.chat_template) def _load(self, path: Path) -> None: @@ -99,10 +92,7 @@ class SpecialVocab: continue parts = line.split(None, 3) if len(parts) != 2: - print( - f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring', - file = sys.stderr, - ) + logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring') continue merges.append(f'{parts[0]} {parts[1]}') self.merges = merges @@ -118,10 +108,7 @@ class SpecialVocab: return self.special_token_ids[typ] = tid return - print( - f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping', - file = sys.stderr, - ) + logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping') def _try_load_from_tokenizer_json(self, path: Path) -> bool: tokenizer_file = path / 'tokenizer.json' @@ -144,10 +131,7 @@ class SpecialVocab: if chat_template is None or isinstance(chat_template, (str, list)): self.chat_template = chat_template else: - print( - f'gguf: WARNING: Bad type for chat_template field in {tokenizer_config_file!r} - ignoring', - file = sys.stderr - ) + logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring') for typ in self.special_token_types: add_entry = tokenizer_config.get(f'add_{typ}_token') if isinstance(add_entry, bool): |