diff options
author | firecoperana <xuqiaowei1124@gmail.com> | 2025-06-08 11:38:47 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-06-08 14:38:47 +0300 |
commit | df170c83a554df526e25a825389e692669644c85 (patch) | |
tree | 962efa23b4a7f341f5578ddfc8e171ecdbf8f869 /examples/server/public_legacy/completion.js | |
parent | 9e567e385adacbc4710e94ee7223c5f6b0404699 (diff) |
Webui improvement (#481)
* update webui
* add token/s in webui
* add webui files
* fix webui first message disappear in some browser
* add missing html files
---------
Co-authored-by: firecoperana <firecoperana>
Diffstat (limited to 'examples/server/public_legacy/completion.js')
-rw-r--r-- | examples/server/public_legacy/completion.js | 209 |
1 files changed, 209 insertions, 0 deletions
diff --git a/examples/server/public_legacy/completion.js b/examples/server/public_legacy/completion.js new file mode 100644 index 00000000..30df7c2f --- /dev/null +++ b/examples/server/public_legacy/completion.js @@ -0,0 +1,209 @@ +const paramDefaults = { + stream: true, + n_predict: 500, + temperature: 0.2, + stop: ["</s>"] +}; + +let generation_settings = null; + + +// Completes the prompt as a generator. Recommended for most use cases. +// +// Example: +// +// import { llama } from '/completion.js' +// +// const request = llama("Tell me a joke", {n_predict: 800}) +// for await (const chunk of request) { +// document.write(chunk.data.content) +// } +// +export async function* llama(prompt, params = {}, config = {}) { + let controller = config.controller; + const api_url = config.api_url?.replace(/\/+$/, '') || ""; + + if (!controller) { + controller = new AbortController(); + } + + const completionParams = { ...paramDefaults, ...params, prompt }; + + const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, { + method: 'POST', + body: JSON.stringify(completionParams), + headers: { + 'Connection': 'keep-alive', + 'Content-Type': 'application/json', + 'Accept': 'text/event-stream', + ...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {}) + }, + signal: controller.signal, + }); + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + + let content = ""; + let leftover = ""; // Buffer for partially read lines + + try { + let cont = true; + + while (cont) { + const result = await reader.read(); + if (result.done) { + break; + } + + // Add any leftover data to the current chunk of data + const text = leftover + decoder.decode(result.value); + + // Check if the last character is a line break + const endsWithLineBreak = text.endsWith('\n'); + + // Split the text into lines + let lines = text.split('\n'); + + // If the text doesn't end with a line break, then the last line is incomplete + // Store it in leftover to be added to the next chunk of data + if (!endsWithLineBreak) { + leftover = lines.pop(); + } else { + leftover = ""; // Reset leftover if we have a line break at the end + } + + // Parse all sse events and add them to result + const regex = /^(\S+):\s(.*)$/gm; + for (const line of lines) { + const match = regex.exec(line); + if (match) { + result[match[1]] = match[2]; + if (result.data === '[DONE]') { + cont = false; + break; + } + + // since we know this is llama.cpp, let's just decode the json in data + if (result.data) { + result.data = JSON.parse(result.data); + content += result.data.content; + + // yield + yield result; + + // if we got a stop token from server, we will break here + if (result.data.stop) { + if (result.data.generation_settings) { + generation_settings = result.data.generation_settings; + } + cont = false; + break; + } + } + if (result.error) { + try { + result.error = JSON.parse(result.error); + if (result.error.message.includes('slot unavailable')) { + // Throw an error to be caught by upstream callers + throw new Error('slot unavailable'); + } else { + console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`); + } + } catch(e) { + console.error(`llama.cpp error ${result.error}`) + } + } + } + } + } + } catch (e) { + if (e.name !== 'AbortError') { + console.error("llama error: ", e); + } + throw e; + } + finally { + controller.abort(); + } + + return content; +} + +// Call llama, return an event target that you can subscribe to +// +// Example: +// +// import { llamaEventTarget } from '/completion.js' +// +// const conn = llamaEventTarget(prompt) +// conn.addEventListener("message", (chunk) => { +// document.write(chunk.detail.content) +// }) +// +export const llamaEventTarget = (prompt, params = {}, config = {}) => { + const eventTarget = new EventTarget(); + (async () => { + let content = ""; + for await (const chunk of llama(prompt, params, config)) { + if (chunk.data) { + content += chunk.data.content; + eventTarget.dispatchEvent(new CustomEvent("message", { detail: chunk.data })); + } + if (chunk.data.generation_settings) { + eventTarget.dispatchEvent(new CustomEvent("generation_settings", { detail: chunk.data.generation_settings })); + } + if (chunk.data.timings) { + eventTarget.dispatchEvent(new CustomEvent("timings", { detail: chunk.data.timings })); + } + } + eventTarget.dispatchEvent(new CustomEvent("done", { detail: { content } })); + })(); + return eventTarget; +} + +// Call llama, return a promise that resolves to the completed text. This does not support streaming +// +// Example: +// +// llamaPromise(prompt).then((content) => { +// document.write(content) +// }) +// +// or +// +// const content = await llamaPromise(prompt) +// document.write(content) +// +export const llamaPromise = (prompt, params = {}, config = {}) => { + return new Promise(async (resolve, reject) => { + let content = ""; + try { + for await (const chunk of llama(prompt, params, config)) { + content += chunk.data.content; + } + resolve(content); + } catch (error) { + reject(error); + } + }); +}; + +/** + * (deprecated) + */ +export const llamaComplete = async (params, controller, callback) => { + for await (const chunk of llama(params.prompt, params, { controller })) { + callback(chunk); + } +} + +// Get the model info from the server. This is useful for getting the context window and so on. +export const llamaModelInfo = async (config = {}) => { + if (!generation_settings) { + const api_url = config.api_url?.replace(/\/+$/, '') || ""; + const props = await fetch(`${api_url}/props`).then(r => r.json()); + generation_settings = props.default_generation_settings; + } + return generation_settings; +} |