server : add "/chat/completions" alias for "/v1/...` (#5722)

* Add "/chat/completions" as alias for "/v1/chat/completions" * merge to upstream master * minor : fix trailing whitespace --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: Jorge A <161275481+jorgealias@users.noreply.github.com> 2024-02-28 01:39:15 -0700
committer: GitHub <noreply@github.com> 2024-02-28 10:39:15 +0200
commit: efc72253f7987ed7bdc8bde9d9fa5c7cac2f6292 (patch)
tree: 9f208051c3b76fa9817b748e9d2b805b439d75a5 /examples/server/tests/features/steps/steps.py
parent: 7c4263d4261d6ee6f0539d53eb9e1b4d120ba8af (diff)
1 files changed, 26 insertions, 2 deletions
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index ad87fcb8..381da105 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -231,6 +231,7 @@ async def step_oai_chat_completions(context, api_error):
     completion = await oai_chat_completions(context.prompts.pop(),
                                             context.system_prompt,
                                             context.base_url,
+                                            '/v1/chat',
                                             False,
                                             model=context.model if hasattr(context, 'model') else None,
 
@@ -288,6 +289,28 @@ async def step_oai_chat_completions(context):
                               # user_prompt is inserted automatically
                               context.system_prompt,
                               context.base_url,
+                              '/v1/chat/completions',
+                              True,  # async_client
+                              model=context.model
+                              if hasattr(context, 'model') else None,
+                              n_predict=context.n_predict
+                              if hasattr(context, 'n_predict') else None,
+                              enable_streaming=context.enable_streaming
+                              if hasattr(context, 'enable_streaming') else None,
+                              server_seed=context.server_seed
+                              if hasattr(context, 'server_seed') else None,
+                              user_api_key=context.user_api_key
+                              if hasattr(context, 'user_api_key') else None)
+
+
+@step(u'concurrent OAI completions requests no v1')
+@async_run_until_complete
+async def step_oai_chat_completions(context):
+    await concurrent_requests(context, oai_chat_completions,
+                              # user_prompt is inserted automatically
+                              context.system_prompt,
+                              context.base_url,
+                              '/chat/completions',
                               True,  # async_client
                               model=context.model
                               if hasattr(context, 'model') else None,
@@ -497,6 +520,7 @@ async def request_completion(prompt,
 async def oai_chat_completions(user_prompt,
                                system_prompt,
                                base_url,
+                               base_path,
                                async_client,
                                debug=False,
                                model=None,
@@ -537,7 +561,7 @@ async def oai_chat_completions(user_prompt,
         origin = 'llama.cpp'
         headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
         async with aiohttp.ClientSession() as session:
-            async with session.post(f'{base_url}/v1/chat/completions',
+            async with session.post(f'{base_url}{base_path}',
                                     json=payload,
                                     headers=headers) as response:
                 if enable_streaming:
@@ -579,7 +603,7 @@ async def oai_chat_completions(user_prompt,
     else:
         try:
             openai.api_key = user_api_key
-            openai.api_base = f'{base_url}/v1/chat'
+            openai.api_base = f'{base_url}{base_path}'
             chat_completion = openai.Completion.create(
                 messages=payload['messages'],
                 model=model,
author	Jorge A <161275481+jorgealias@users.noreply.github.com>	2024-02-28 01:39:15 -0700
committer	GitHub <noreply@github.com>	2024-02-28 10:39:15 +0200
commit	efc72253f7987ed7bdc8bde9d9fa5c7cac2f6292 (patch)
tree	9f208051c3b76fa9817b748e9d2b805b439d75a5 /examples/server/tests/features/steps/steps.py
parent	7c4263d4261d6ee6f0539d53eb9e1b4d120ba8af (diff)