llama : speedup tokenization (#2831)

* Speedup tokenization On current master it takes ~3.2 seconds to tokenize Wikitext. With this change it becomes ~525 ms. * Fixit: it was missing the piece after the last found occurence --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
author: Kawrakow <48489457+ikawrakow@users.noreply.github.com> 2023-08-27 16:50:33 +0300
committer: GitHub <noreply@github.com> 2023-08-27 16:50:33 +0300
commit: 463173a6c0ff353055eb90665794884c888c790f (patch)
tree: 4868e5ed0a6924410c91b149a6a630ea75ea06de /llama.cpp
parent: eaa13a48ff4136f01c1cdb79cacd61b67ec53095 (diff)
1 files changed, 10 insertions, 5 deletions
diff --git a/llama.cpp b/llama.cpp
index 0d12d9cc..0bb8fcd6 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -114,12 +114,17 @@ static size_t utf8_len(char src) {
 }
 
 void replace_all(std::string & s, const std::string & search, const std::string & replace) {
-    for (size_t pos = 0; ; pos += replace.length()) {
-        pos = s.find(search, pos);
-        if (pos == std::string::npos) break;
-        s.erase(pos, search.length());
-        s.insert(pos, replace);
+    std::string result;
+    for (size_t pos = 0; ; pos += search.length()) {
+        auto new_pos = s.find(search, pos);
+        if (new_pos == std::string::npos) {
+            result += s.substr(pos, s.size() - pos);
+            break;
+        }
+        result += s.substr(pos, new_pos - pos) + replace;
+        pos = new_pos;
     }
+    s = std::move(result);
 }
 
 static void zeros(std::ofstream & file, size_t n) {
author	Kawrakow <48489457+ikawrakow@users.noreply.github.com>	2023-08-27 16:50:33 +0300
committer	GitHub <noreply@github.com>	2023-08-27 16:50:33 +0300
commit	463173a6c0ff353055eb90665794884c888c790f (patch)
tree	4868e5ed0a6924410c91b149a6a630ea75ea06de /llama.cpp
parent	eaa13a48ff4136f01c1cdb79cacd61b67ec53095 (diff)