summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorgoerch <jhr.walter@t-online.de>2023-08-22 23:10:42 +0200
committerGitHub <noreply@github.com>2023-08-23 00:10:42 +0300
commit46ef5b5fcf4c366e1fb27726b6394adbbf8fd0ea (patch)
tree96f771ef97596af6e59bdcfeea76d15a7c80153f /llama.cpp
parentc63bb1d16a70c03440671b76954bb767513cead8 (diff)
llama : fix whitespace escaping in tokenizer (#2724)
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp13
1 files changed, 3 insertions, 10 deletions
diff --git a/llama.cpp b/llama.cpp
index 6abdc44f..6c5da130 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2253,18 +2253,11 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
}
static std::string llama_escape_whitespace(const std::string& text) {
- std::string result;
- bool escaping = false;
- result += "\xe2\x96\x81";
+ std::string result = "\xe2\x96\x81";
for (size_t offs = 0; offs < text.length(); ++offs) {
if (text[offs] == ' ') {
- if (!escaping) {
- result += "\xe2\x96\x81";
- escaping = true;
- }
- }
- else {
- escaping = false;
+ result += "\xe2\x96\x81";
+ } else {
result += text[offs];
}
}