diff options
author | Bruce MacDonald <brucewmacdonald@gmail.com> | 2023-08-26 16:11:45 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-27 07:11:45 +0800 |
commit | c1ac54b77aaba10d029084d152be786102010eb2 (patch) | |
tree | e323617bad1fd6fc8d4393d3b756d4d16698c3b9 /examples/server | |
parent | 730d9c681e339b76407659344e5a2cd50af7d7d5 (diff) |
server : add `/detokenize` endpoint (#2802)
* Add a /detokenize endpoint to the example server
* remove trailing white-space
Diffstat (limited to 'examples/server')
-rw-r--r-- | examples/server/README.md | 6 | ||||
-rw-r--r-- | examples/server/server.cpp | 21 |
2 files changed, 27 insertions, 0 deletions
diff --git a/examples/server/README.md b/examples/server/README.md index 7105e902..51760804 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -164,6 +164,12 @@ node index.js Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`. +- **POST** `/detokenize`: Convert tokens to text. + + *Options:* + + `tokens`: Set the tokens to detokenize. + - **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does. *Options:* diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 3300553f..a4b4d641 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1104,6 +1104,12 @@ static json format_tokenizer_response(const std::vector<llama_token> &tokens) {"tokens", tokens}}; } +static json format_detokenized_response(std::string content) +{ + return json{ + {"content", content}}; +} + template <typename T> static T json_value(const json &body, const std::string &key, const T &default_value) { @@ -1501,6 +1507,21 @@ int main(int argc, char **argv) const json data = format_tokenizer_response(tokens); return res.set_content(data.dump(), "application/json"); }); + svr.Post("/detokenize", [&llama](const Request &req, Response &res) + { + auto lock = llama.lock(); + + const json body = json::parse(req.body); + std::string content; + if (body.count("tokens") != 0) + { + const std::vector<llama_token> tokens = body["tokens"]; + content = tokens_to_str(llama.ctx, tokens.cbegin(), tokens.cend()); + } + + const json data = format_detokenized_response(content); + return res.set_content(data.dump(), "application/json"); }); + svr.Post("/embedding", [&llama](const Request &req, Response &res) { auto lock = llama.lock(); |