summaryrefslogtreecommitdiff
path: root/examples/server/tests/features
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-08-12 15:14:32 +0200
committerGitHub <noreply@github.com>2024-08-12 15:14:32 +0200
commit8f43e551038af2547b5c01d0e9edd641c0e4bd29 (patch)
tree07a4373620a9381d0b5c7189a475990a6feb48a5 /examples/server/tests/features
parentf5d1af61d79fb53ccfbac2e665e43208c07b083d (diff)
Merge mainline - Aug 12 2024 (#17)
* Merge mainline * Fix after merge * Remove CI check --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'examples/server/tests/features')
-rw-r--r--examples/server/tests/features/lora.feature36
-rw-r--r--examples/server/tests/features/steps/steps.py21
2 files changed, 57 insertions, 0 deletions
diff --git a/examples/server/tests/features/lora.feature b/examples/server/tests/features/lora.feature
new file mode 100644
index 00000000..7b85988a
--- /dev/null
+++ b/examples/server/tests/features/lora.feature
@@ -0,0 +1,36 @@
+@llama.cpp
+@lora
+Feature: llama.cpp server
+
+ Background: Server startup
+ Given a server listening on localhost:8080
+ And a model url https://huggingface.co/ggml-org/stories15M_MOE/resolve/main/stories15M_MOE-F16.gguf
+ And a model file stories15M_MOE-F16.gguf
+ And a model alias stories15M_MOE
+ And a lora adapter file from https://huggingface.co/ggml-org/stories15M_MOE/resolve/main/moe_shakespeare15M.gguf
+ And 42 as server seed
+ And 1024 as batch size
+ And 1024 as ubatch size
+ And 2048 KV cache size
+ And 64 max tokens to predict
+ And 0.0 temperature
+ Then the server is starting
+ Then the server is healthy
+
+ Scenario: Completion LoRA disabled
+ Given switch off lora adapter 0
+ Given a prompt:
+ """
+ Look in thy glass
+ """
+ And a completion request with no api error
+ Then 64 tokens are predicted matching little|girl|three|years|old
+
+ Scenario: Completion LoRA enabled
+ Given switch on lora adapter 0
+ Given a prompt:
+ """
+ Look in thy glass
+ """
+ And a completion request with no api error
+ Then 64 tokens are predicted matching eye|love|glass|sun
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index df0814cc..6705a34f 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -7,6 +7,7 @@ import subprocess
import sys
import threading
import time
+import requests
from collections.abc import Sequence
from contextlib import closing
from re import RegexFlag
@@ -70,6 +71,7 @@ def step_server_config(context, server_fqdn: str, server_port: str):
context.user_api_key = None
context.response_format = None
context.temperature = None
+ context.lora_file = None
context.tasks_result = []
context.concurrent_tasks = []
@@ -82,6 +84,12 @@ def step_download_hf_model(context, hf_file: str, hf_repo: str):
context.model_hf_file = hf_file
context.model_file = os.path.basename(hf_file)
+@step('a lora adapter file from {lora_file_url}')
+def step_download_lora_file(context, lora_file_url: str):
+ file_name = lora_file_url.split('/').pop()
+ context.lora_file = f'../../../{file_name}'
+ with open(context.lora_file, 'wb') as f:
+ f.write(requests.get(lora_file_url).content)
@step('a model file {model_file}')
def step_model_file(context, model_file: str):
@@ -849,6 +857,17 @@ async def step_erase_slot(context, slot_id):
context.response = response
+@step('switch {on_or_off} lora adapter {lora_id:d}')
+@async_run_until_complete
+async def toggle_lora_adapter(context, on_or_off: str, lora_id: int):
+ async with aiohttp.ClientSession() as session:
+ async with session.post(f'{context.base_url}/lora-adapters',
+ json=[{'id': lora_id, 'scale': 1 if on_or_off == 'on' else 0}],
+ headers={"Content-Type": "application/json"}) as response:
+ context.response = response
+ print([{'id': lora_id, 'scale': 1 if on_or_off == 'on' else 0}])
+
+
@step('the server responds with status code {status_code:d}')
def step_server_responds_with_status_code(context, status_code):
assert context.response.status == status_code
@@ -1326,6 +1345,8 @@ def start_server_background(context):
server_args.extend(['--grp-attn-w', context.n_ga_w])
if context.debug:
server_args.append('--verbose')
+ if context.lora_file:
+ server_args.extend(['--lora', context.lora_file])
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
server_args.extend(['--log-format', "text"])