summaryrefslogtreecommitdiff
path: root/examples/server
diff options
context:
space:
mode:
Diffstat (limited to 'examples/server')
-rw-r--r--examples/server/bench/bench.py18
-rw-r--r--examples/server/bench/script.js1
2 files changed, 13 insertions, 6 deletions
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py
index ea5d3854..86eeeccf 100644
--- a/examples/server/bench/bench.py
+++ b/examples/server/bench/bench.py
@@ -16,6 +16,7 @@ import matplotlib
import matplotlib.dates
import matplotlib.pyplot as plt
import requests
+from statistics import mean
def main(args_in: list[str] | None = None) -> None:
@@ -109,6 +110,7 @@ def main(args_in: list[str] | None = None) -> None:
# Prometheus
end_time = time.time()
+ prometheus_metrics = {}
if is_server_listening("0.0.0.0", 9090):
metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds',
'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred']
@@ -127,6 +129,7 @@ def main(args_in: list[str] | None = None) -> None:
values = metric_data['data']['result'][0]['values']
timestamps, metric_values = zip(*values)
metric_values = [float(value) for value in metric_values]
+ prometheus_metrics[metric] = metric_values
timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
plt.figure(figsize=(16, 10), dpi=80)
plt.plot(timestamps_dt, metric_values, label=metric)
@@ -176,17 +179,20 @@ xychart-beta
# 140 chars max for commit status description
bench_results = {
+ "i": iterations,
"req": {
- "p90": data['metrics']["http_req_duration"]["p(90)"],
- "avg": data['metrics']["http_req_duration"]["avg"],
+ "p90": round(data['metrics']["http_req_duration"]["p(90)"], 2),
+ "avg": round(data['metrics']["http_req_duration"]["avg"], 2),
},
"pp": {
- "p90": data['metrics']["llamacpp_prompt_tokens"]["p(90)"],
- "avg": data['metrics']["llamacpp_prompt_tokens"]["avg"],
+ "p90": round(data['metrics']["llamacpp_prompt_tokens"]["p(90)"], 2),
+ "avg": round(data['metrics']["llamacpp_prompt_tokens"]["avg"], 2),
+ "0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
},
"tg": {
- "p90": data['metrics']["llamacpp_tokens_second"]["p(90)"],
- "avg": data['metrics']["llamacpp_tokens_second"]["avg"],
+ "p90": round(data['metrics']["llamacpp_tokens_second"]["p(90)"], 2),
+ "avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
+ "0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
},
}
with open("results.github.env", 'a') as github_env:
diff --git a/examples/server/bench/script.js b/examples/server/bench/script.js
index a4f5ac5a..dc41e8d9 100644
--- a/examples/server/bench/script.js
+++ b/examples/server/bench/script.js
@@ -87,6 +87,7 @@ export default function () {
],
"model": model,
"stream": false,
+ "seed": 42,
"max_tokens": max_tokens
}