summaryrefslogtreecommitdiff
path: root/examples/server/public_legacy/completion.js
diff options
context:
space:
mode:
authorfirecoperana <xuqiaowei1124@gmail.com>2025-06-08 11:38:47 +0000
committerGitHub <noreply@github.com>2025-06-08 14:38:47 +0300
commitdf170c83a554df526e25a825389e692669644c85 (patch)
tree962efa23b4a7f341f5578ddfc8e171ecdbf8f869 /examples/server/public_legacy/completion.js
parent9e567e385adacbc4710e94ee7223c5f6b0404699 (diff)
Webui improvement (#481)
* update webui * add token/s in webui * add webui files * fix webui first message disappear in some browser * add missing html files --------- Co-authored-by: firecoperana <firecoperana>
Diffstat (limited to 'examples/server/public_legacy/completion.js')
-rw-r--r--examples/server/public_legacy/completion.js209
1 files changed, 209 insertions, 0 deletions
diff --git a/examples/server/public_legacy/completion.js b/examples/server/public_legacy/completion.js
new file mode 100644
index 00000000..30df7c2f
--- /dev/null
+++ b/examples/server/public_legacy/completion.js
@@ -0,0 +1,209 @@
+const paramDefaults = {
+ stream: true,
+ n_predict: 500,
+ temperature: 0.2,
+ stop: ["</s>"]
+};
+
+let generation_settings = null;
+
+
+// Completes the prompt as a generator. Recommended for most use cases.
+//
+// Example:
+//
+// import { llama } from '/completion.js'
+//
+// const request = llama("Tell me a joke", {n_predict: 800})
+// for await (const chunk of request) {
+// document.write(chunk.data.content)
+// }
+//
+export async function* llama(prompt, params = {}, config = {}) {
+ let controller = config.controller;
+ const api_url = config.api_url?.replace(/\/+$/, '') || "";
+
+ if (!controller) {
+ controller = new AbortController();
+ }
+
+ const completionParams = { ...paramDefaults, ...params, prompt };
+
+ const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
+ method: 'POST',
+ body: JSON.stringify(completionParams),
+ headers: {
+ 'Connection': 'keep-alive',
+ 'Content-Type': 'application/json',
+ 'Accept': 'text/event-stream',
+ ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
+ },
+ signal: controller.signal,
+ });
+
+ const reader = response.body.getReader();
+ const decoder = new TextDecoder();
+
+ let content = "";
+ let leftover = ""; // Buffer for partially read lines
+
+ try {
+ let cont = true;
+
+ while (cont) {
+ const result = await reader.read();
+ if (result.done) {
+ break;
+ }
+
+ // Add any leftover data to the current chunk of data
+ const text = leftover + decoder.decode(result.value);
+
+ // Check if the last character is a line break
+ const endsWithLineBreak = text.endsWith('\n');
+
+ // Split the text into lines
+ let lines = text.split('\n');
+
+ // If the text doesn't end with a line break, then the last line is incomplete
+ // Store it in leftover to be added to the next chunk of data
+ if (!endsWithLineBreak) {
+ leftover = lines.pop();
+ } else {
+ leftover = ""; // Reset leftover if we have a line break at the end
+ }
+
+ // Parse all sse events and add them to result
+ const regex = /^(\S+):\s(.*)$/gm;
+ for (const line of lines) {
+ const match = regex.exec(line);
+ if (match) {
+ result[match[1]] = match[2];
+ if (result.data === '[DONE]') {
+ cont = false;
+ break;
+ }
+
+ // since we know this is llama.cpp, let's just decode the json in data
+ if (result.data) {
+ result.data = JSON.parse(result.data);
+ content += result.data.content;
+
+ // yield
+ yield result;
+
+ // if we got a stop token from server, we will break here
+ if (result.data.stop) {
+ if (result.data.generation_settings) {
+ generation_settings = result.data.generation_settings;
+ }
+ cont = false;
+ break;
+ }
+ }
+ if (result.error) {
+ try {
+ result.error = JSON.parse(result.error);
+ if (result.error.message.includes('slot unavailable')) {
+ // Throw an error to be caught by upstream callers
+ throw new Error('slot unavailable');
+ } else {
+ console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
+ }
+ } catch(e) {
+ console.error(`llama.cpp error ${result.error}`)
+ }
+ }
+ }
+ }
+ }
+ } catch (e) {
+ if (e.name !== 'AbortError') {
+ console.error("llama error: ", e);
+ }
+ throw e;
+ }
+ finally {
+ controller.abort();
+ }
+
+ return content;
+}
+
+// Call llama, return an event target that you can subscribe to
+//
+// Example:
+//
+// import { llamaEventTarget } from '/completion.js'
+//
+// const conn = llamaEventTarget(prompt)
+// conn.addEventListener("message", (chunk) => {
+// document.write(chunk.detail.content)
+// })
+//
+export const llamaEventTarget = (prompt, params = {}, config = {}) => {
+ const eventTarget = new EventTarget();
+ (async () => {
+ let content = "";
+ for await (const chunk of llama(prompt, params, config)) {
+ if (chunk.data) {
+ content += chunk.data.content;
+ eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data }));
+ }
+ if (chunk.data.generation_settings) {
+ eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings }));
+ }
+ if (chunk.data.timings) {
+ eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings }));
+ }
+ }
+ eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } }));
+ })();
+ return eventTarget;
+}
+
+// Call llama, return a promise that resolves to the completed text. This does not support streaming
+//
+// Example:
+//
+// llamaPromise(prompt).then((content) => {
+// document.write(content)
+// })
+//
+// or
+//
+// const content = await llamaPromise(prompt)
+// document.write(content)
+//
+export const llamaPromise = (prompt, params = {}, config = {}) => {
+ return new Promise(async (resolve, reject) => {
+ let content = "";
+ try {
+ for await (const chunk of llama(prompt, params, config)) {
+ content += chunk.data.content;
+ }
+ resolve(content);
+ } catch (error) {
+ reject(error);
+ }
+ });
+};
+
+/**
+ * (deprecated)
+ */
+export const llamaComplete = async (params, controller, callback) => {
+ for await (const chunk of llama(params.prompt, params, { controller })) {
+ callback(chunk);
+ }
+}
+
+// Get the model info from the server. This is useful for getting the context window and so on.
+export const llamaModelInfo = async (config = {}) => {
+ if (!generation_settings) {
+ const api_url = config.api_url?.replace(/\/+$/, '') || "";
+ const props = await fetch(`${api_url}/props`).then(r => r.json());
+ generation_settings = props.default_generation_settings;
+ }
+ return generation_settings;
+}