diff options
author | goerch <jhr.walter@t-online.de> | 2023-08-22 23:10:42 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-23 00:10:42 +0300 |
commit | 46ef5b5fcf4c366e1fb27726b6394adbbf8fd0ea (patch) | |
tree | 96f771ef97596af6e59bdcfeea76d15a7c80153f /llama.cpp | |
parent | c63bb1d16a70c03440671b76954bb767513cead8 (diff) |
llama : fix whitespace escaping in tokenizer (#2724)
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 13 |
1 files changed, 3 insertions, 10 deletions
@@ -2253,18 +2253,11 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) { } static std::string llama_escape_whitespace(const std::string& text) { - std::string result; - bool escaping = false; - result += "\xe2\x96\x81"; + std::string result = "\xe2\x96\x81"; for (size_t offs = 0; offs < text.length(); ++offs) { if (text[offs] == ' ') { - if (!escaping) { - result += "\xe2\x96\x81"; - escaping = true; - } - } - else { - escaping = false; + result += "\xe2\x96\x81"; + } else { result += text[offs]; } } |