server : add `/detokenize` endpoint (#2802)

* Add a /detokenize endpoint to the example server * remove trailing white-space
author: Bruce MacDonald <brucewmacdonald@gmail.com> 2023-08-26 16:11:45 -0700
committer: GitHub <noreply@github.com> 2023-08-27 07:11:45 +0800
commit: c1ac54b77aaba10d029084d152be786102010eb2 (patch)
tree: e323617bad1fd6fc8d4393d3b756d4d16698c3b9 /examples/server
parent: 730d9c681e339b76407659344e5a2cd50af7d7d5 (diff)
2 files changed, 27 insertions, 0 deletions
diff --git a/examples/server/README.md b/examples/server/README.md
index 7105e902..51760804 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -164,6 +164,12 @@ node index.js
 
     Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.
 
+-   **POST** `/detokenize`: Convert tokens to text.
+
+    *Options:*
+
+    `tokens`: Set the tokens to detokenize.
+
 -   **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does.
 
     *Options:*
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 3300553f..a4b4d641 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1104,6 +1104,12 @@ static json format_tokenizer_response(const std::vector<llama_token> &tokens)
         {"tokens", tokens}};
 }
 
+static json format_detokenized_response(std::string content)
+{
+    return json{
+        {"content", content}};
+}
+
 template <typename T>
 static T json_value(const json &body, const std::string &key, const T &default_value)
 {
@@ -1501,6 +1507,21 @@ int main(int argc, char **argv)
         const json data = format_tokenizer_response(tokens);
         return res.set_content(data.dump(), "application/json"); });
 
+    svr.Post("/detokenize", [&llama](const Request &req, Response &res)
+             {
+        auto lock = llama.lock();
+
+        const json body = json::parse(req.body);
+        std::string content;
+        if (body.count("tokens") != 0)
+        {
+            const std::vector<llama_token> tokens = body["tokens"];
+            content = tokens_to_str(llama.ctx, tokens.cbegin(), tokens.cend());
+        }
+
+        const json data = format_detokenized_response(content);
+        return res.set_content(data.dump(), "application/json"); });
+
     svr.Post("/embedding", [&llama](const Request &req, Response &res)
              {
         auto lock = llama.lock();
author	Bruce MacDonald <brucewmacdonald@gmail.com>	2023-08-26 16:11:45 -0700
committer	GitHub <noreply@github.com>	2023-08-27 07:11:45 +0800
commit	c1ac54b77aaba10d029084d152be786102010eb2 (patch)
tree	e323617bad1fd6fc8d4393d3b756d4d16698c3b9 /examples/server
parent	730d9c681e339b76407659344e5a2cd50af7d7d5 (diff)