From 46ef5b5fcf4c366e1fb27726b6394adbbf8fd0ea Mon Sep 17 00:00:00 2001 From: goerch Date: Tue, 22 Aug 2023 23:10:42 +0200 Subject: llama : fix whitespace escaping in tokenizer (#2724) --- llama.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index 6abdc44f..6c5da130 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2253,18 +2253,11 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) { } static std::string llama_escape_whitespace(const std::string& text) { - std::string result; - bool escaping = false; - result += "\xe2\x96\x81"; + std::string result = "\xe2\x96\x81"; for (size_t offs = 0; offs < text.length(); ++offs) { if (text[offs] == ' ') { - if (!escaping) { - result += "\xe2\x96\x81"; - escaping = true; - } - } - else { - escaping = false; + result += "\xe2\x96\x81"; + } else { result += text[offs]; } } -- cgit v1.2.3