summaryrefslogtreecommitdiff
path: root/examples/server
diff options
context:
space:
mode:
authorSeungWon Jeong <65549245+redlion0929@users.noreply.github.com>2024-03-09 21:27:58 +0900
committerGitHub <noreply@github.com>2024-03-09 14:27:58 +0200
commitfb215c3832236fec7380c4fb618bd7154cb196ef (patch)
tree1c2d0eb8fce7d2c1f70024b1f2c7b4b35baaa029 /examples/server
parent2c4f566c88322ebf2f9bd11b01b5ebdaa0130b89 (diff)
server : normalize embeddings (#5956)
* output normalize embedding in '/v1/embeddings' * common : reuse llama_embd_normalize * common : better normalize impl --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/server')
-rw-r--r--examples/server/server.cpp8
1 files changed, 7 insertions, 1 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 8cff514f..796f3499 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1327,6 +1327,8 @@ struct server_context {
const int n_embd = llama_n_embd(model);
+ std::vector<float> embd_res(n_embd, 0.0f);
+
for (int i = 0; i < batch.n_tokens; ++i) {
if (!batch.logits[i] || batch.seq_id[i][0] != slot.id + 1) {
continue;
@@ -1350,8 +1352,10 @@ struct server_context {
continue;
}
+ llama_embd_normalize(embd, embd_res.data(), n_embd);
+
res.data = json {
- {"embedding", std::vector<float>(embd, embd + n_embd)},
+ {"embedding", embd_res},
};
}
@@ -3354,6 +3358,8 @@ int main(int argc, char ** argv) {
// get the result
server_task_result result = ctx_server.queue_results.recv(id_task);
ctx_server.queue_results.remove_waiting_task_id(id_task);
+
+ // append to the responses
responses.push_back(result.data);
}