summaryrefslogtreecommitdiff
path: root/examples/server/tests
diff options
context:
space:
mode:
Diffstat (limited to 'examples/server/tests')
-rw-r--r--examples/server/tests/README.md2
-rw-r--r--examples/server/tests/features/embeddings.feature3
-rw-r--r--examples/server/tests/features/environment.py97
-rw-r--r--examples/server/tests/features/server.feature3
-rw-r--r--examples/server/tests/features/steps/steps.py37
-rw-r--r--examples/server/tests/requirements.txt1
6 files changed, 90 insertions, 53 deletions
diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md
index 95a0353b..feb2b1d6 100644
--- a/examples/server/tests/README.md
+++ b/examples/server/tests/README.md
@@ -57,7 +57,7 @@ Feature or Scenario must be annotated with `@llama.cpp` to be included in the de
To run a scenario annotated with `@bug`, start:
```shell
-DEBUG=ON ./tests.sh --no-skipped --tags bug
+DEBUG=ON ./tests.sh --no-skipped --tags bug --stop
```
After changing logic in `steps.py`, ensure that `@bug` and `@wrong_usage` scenario are updated.
diff --git a/examples/server/tests/features/embeddings.feature b/examples/server/tests/features/embeddings.feature
index 57359b26..dcf1434f 100644
--- a/examples/server/tests/features/embeddings.feature
+++ b/examples/server/tests/features/embeddings.feature
@@ -4,7 +4,8 @@ Feature: llama.cpp server
Background: Server startup
Given a server listening on localhost:8080
- And a model file bert-bge-small/ggml-model-f16.gguf from HF repo ggml-org/models
+ And a model url https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16.gguf
+ And a model file ggml-model-f16.gguf
And a model alias bert-bge-small
And 42 as server seed
And 2 slots
diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py
index 8ad987e1..82104e92 100644
--- a/examples/server/tests/features/environment.py
+++ b/examples/server/tests/features/environment.py
@@ -1,10 +1,12 @@
-import errno
import os
+import signal
import socket
-import subprocess
+import sys
import time
+import traceback
from contextlib import closing
-import signal
+
+import psutil
def before_scenario(context, scenario):
@@ -20,33 +22,40 @@ def before_scenario(context, scenario):
def after_scenario(context, scenario):
- if context.server_process is None:
- return
- if scenario.status == "failed":
- if 'GITHUB_ACTIONS' in os.environ:
- print(f"\x1b[33;101mSCENARIO FAILED: {scenario.name} server logs:\x1b[0m\n\n")
- if os.path.isfile('llama.log'):
- with closing(open('llama.log', 'r')) as f:
- for line in f:
- print(line)
- if not is_server_listening(context.server_fqdn, context.server_port):
- print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n")
-
- if not pid_exists(context.server_process.pid):
- assert False, f"Server not running pid={context.server_process.pid} ..."
-
- server_graceful_shutdown(context)
-
- # Wait few for socket to free up
- time.sleep(0.05)
-
- attempts = 0
- while pid_exists(context.server_process.pid) or is_server_listening(context.server_fqdn, context.server_port):
- server_kill(context)
- time.sleep(0.1)
- attempts += 1
- if attempts > 5:
- server_kill_hard(context)
+ try:
+ if 'server_process' not in context or context.server_process is None:
+ return
+ if scenario.status == "failed":
+ if 'GITHUB_ACTIONS' in os.environ:
+ print(f"\x1b[33;101mSCENARIO FAILED: {scenario.name} server logs:\x1b[0m\n\n")
+ if os.path.isfile('llama.log'):
+ with closing(open('llama.log', 'r')) as f:
+ for line in f:
+ print(line)
+ if not is_server_listening(context.server_fqdn, context.server_port):
+ print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n")
+
+ if not pid_exists(context.server_process.pid):
+ assert False, f"Server not running pid={context.server_process.pid} ..."
+
+ server_graceful_shutdown(context)
+
+ # Wait few for socket to free up
+ time.sleep(0.05)
+
+ attempts = 0
+ while pid_exists(context.server_process.pid) or is_server_listening(context.server_fqdn, context.server_port):
+ server_kill(context)
+ time.sleep(0.1)
+ attempts += 1
+ if attempts > 5:
+ server_kill_hard(context)
+ except:
+ exc = sys.exception()
+ print("error in after scenario: \n")
+ print(exc)
+ print("*** print_tb: \n")
+ traceback.print_tb(exc.__traceback__, file=sys.stdout)
def server_graceful_shutdown(context):
@@ -67,11 +76,11 @@ def server_kill_hard(context):
path = context.server_path
print(f"Server dangling exits, hard killing force {pid}={path}...\n")
- if os.name == 'nt':
- process = subprocess.check_output(['taskkill', '/F', '/pid', str(pid)]).decode()
- print(process)
- else:
- os.kill(-pid, signal.SIGKILL)
+ try:
+ psutil.Process(pid).kill()
+ except psutil.NoSuchProcess:
+ return False
+ return True
def is_server_listening(server_fqdn, server_port):
@@ -84,17 +93,9 @@ def is_server_listening(server_fqdn, server_port):
def pid_exists(pid):
- """Check whether pid exists in the current process table."""
- if pid < 0:
+ try:
+ psutil.Process(pid)
+ except psutil.NoSuchProcess:
return False
- if os.name == 'nt':
- output = subprocess.check_output(['TASKLIST', '/FI', f'pid eq {pid}']).decode()
- print(output)
- return "No tasks are running" not in output
- else:
- try:
- os.kill(pid, 0)
- except OSError as e:
- return e.errno == errno.EPERM
- else:
- return True
+ return True
+
diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index 5014f326..7448986e 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -4,7 +4,8 @@ Feature: llama.cpp server
Background: Server startup
Given a server listening on localhost:8080
- And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
+ And a model url https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf
+ And a model file stories260K.gguf
And a model alias tinyllama-2
And 42 as server seed
# KV Cache corresponds to the total amount of tokens
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index a59a52d2..9e348d5f 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -5,6 +5,8 @@ import os
import re
import socket
import subprocess
+import sys
+import threading
import time
from contextlib import closing
from re import RegexFlag
@@ -32,6 +34,8 @@ def step_server_config(context, server_fqdn, server_port):
context.base_url = f'http://{context.server_fqdn}:{context.server_port}'
context.model_alias = None
+ context.model_file = None
+ context.model_url = None
context.n_batch = None
context.n_ubatch = None
context.n_ctx = None
@@ -65,6 +69,16 @@ def step_download_hf_model(context, hf_file, hf_repo):
print(f"model file: {context.model_file}\n")
+@step('a model file {model_file}')
+def step_model_file(context, model_file):
+ context.model_file = model_file
+
+
+@step('a model url {model_url}')
+def step_model_url(context, model_url):
+ context.model_url = model_url
+
+
@step('a model alias {model_alias}')
def step_model_alias(context, model_alias):
context.model_alias = model_alias
@@ -141,7 +155,8 @@ def step_start_server(context):
async def step_wait_for_the_server_to_be_started(context, expecting_status):
match expecting_status:
case 'healthy':
- await wait_for_health_status(context, context.base_url, 200, 'ok')
+ await wait_for_health_status(context, context.base_url, 200, 'ok',
+ timeout=30)
case 'ready' | 'idle':
await wait_for_health_status(context, context.base_url, 200, 'ok',
@@ -1038,8 +1053,11 @@ def start_server_background(context):
server_args = [
'--host', server_listen_addr,
'--port', context.server_port,
- '--model', context.model_file
]
+ if context.model_file:
+ server_args.extend(['--model', context.model_file])
+ if context.model_url:
+ server_args.extend(['--model-url', context.model_url])
if context.n_batch:
server_args.extend(['--batch-size', context.n_batch])
if context.n_ubatch:
@@ -1079,8 +1097,23 @@ def start_server_background(context):
pkwargs = {
'creationflags': flags,
+ 'stdout': subprocess.PIPE,
+ 'stderr': subprocess.PIPE
}
context.server_process = subprocess.Popen(
[str(arg) for arg in [context.server_path, *server_args]],
**pkwargs)
+
+ def log_stdout(process):
+ for line in iter(process.stdout.readline, b''):
+ print(line.decode('utf-8'), end='')
+ thread_stdout = threading.Thread(target=log_stdout, args=(context.server_process,))
+ thread_stdout.start()
+
+ def log_stderr(process):
+ for line in iter(process.stderr.readline, b''):
+ print(line.decode('utf-8'), end='', file=sys.stderr)
+ thread_stderr = threading.Thread(target=log_stderr, args=(context.server_process,))
+ thread_stderr.start()
+
print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")
diff --git a/examples/server/tests/requirements.txt b/examples/server/tests/requirements.txt
index 2e4f42ad..c2c96010 100644
--- a/examples/server/tests/requirements.txt
+++ b/examples/server/tests/requirements.txt
@@ -3,4 +3,5 @@ behave~=1.2.6
huggingface_hub~=0.20.3
numpy~=1.24.4
openai~=0.25.0
+psutil~=5.9.8
prometheus-client~=0.20.0