diff options
author | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-07-27 07:55:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-27 07:55:01 +0200 |
commit | 154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch) | |
tree | 81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /examples/json-schema-pydantic-example.py | |
parent | 0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff) |
Merge mainline llama.cpp (#3)
* Merging mainline - WIP
* Merging mainline - WIP
AVX2 and CUDA appear to work.
CUDA performance seems slightly (~1-2%) lower as it is so often
the case with llama.cpp/ggml after some "improvements" have been made.
* Merging mainline - fix Metal
* Remove check
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'examples/json-schema-pydantic-example.py')
-rw-r--r-- | examples/json-schema-pydantic-example.py | 74 |
1 files changed, 0 insertions, 74 deletions
diff --git a/examples/json-schema-pydantic-example.py b/examples/json-schema-pydantic-example.py deleted file mode 100644 index cc64e572..00000000 --- a/examples/json-schema-pydantic-example.py +++ /dev/null @@ -1,74 +0,0 @@ -# Usage: -#! ./llama-server -m some-model.gguf & -#! pip install pydantic -#! python json-schema-pydantic-example.py - -from pydantic import BaseModel, TypeAdapter -from annotated_types import MinLen -from typing import Annotated, List, Optional -import json, requests - -if True: - - def create_completion(*, response_model=None, endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs): - ''' - Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support - (llama.cpp server, llama-cpp-python, Anyscale / Together...) - - The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below) - ''' - if response_model: - type_adapter = TypeAdapter(response_model) - schema = type_adapter.json_schema() - messages = [{ - "role": "system", - "content": f"You respond in JSON format with the following schema: {json.dumps(schema, indent=2)}" - }] + messages - response_format={"type": "json_object", "schema": schema} - - data = requests.post(endpoint, headers={"Content-Type": "application/json"}, - json=dict(messages=messages, response_format=response_format, **kwargs)).json() - if 'error' in data: - raise Exception(data['error']['message']) - - content = data["choices"][0]["message"]["content"] - return type_adapter.validate_json(content) if type_adapter else content - -else: - - # This alternative branch uses Instructor + OpenAI client lib. - # Instructor support streamed iterable responses, retry & more. - # (see https://python.useinstructor.com/) - #! pip install instructor openai - import instructor, openai - client = instructor.patch( - openai.OpenAI(api_key="123", base_url="http://localhost:8080"), - mode=instructor.Mode.JSON_SCHEMA) - create_completion = client.chat.completions.create - - -if __name__ == '__main__': - - class QAPair(BaseModel): - question: str - concise_answer: str - justification: str - - class PyramidalSummary(BaseModel): - title: str - summary: str - question_answers: Annotated[List[QAPair], MinLen(2)] - sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]] - - print("# Summary\n", create_completion( - model="...", - response_model=PyramidalSummary, - messages=[{ - "role": "user", - "content": f""" - You are a highly efficient corporate document summarizer. - Create a pyramidal summary of an imaginary internal document about our company processes - (starting high-level, going down to each sub sections). - Keep questions short, and answers even shorter (trivia / quizz style). - """ - }])) |