diff options
author | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-07-27 07:55:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-27 07:55:01 +0200 |
commit | 154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch) | |
tree | 81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /examples/server/bench | |
parent | 0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff) |
Merge mainline llama.cpp (#3)
* Merging mainline - WIP
* Merging mainline - WIP
AVX2 and CUDA appear to work.
CUDA performance seems slightly (~1-2%) lower as it is so often
the case with llama.cpp/ggml after some "improvements" have been made.
* Merging mainline - fix Metal
* Remove check
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'examples/server/bench')
-rw-r--r-- | examples/server/bench/bench.py | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py index 4fbbb203..2daac088 100644 --- a/examples/server/bench/bench.py +++ b/examples/server/bench/bench.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import argparse import json import os @@ -59,10 +61,11 @@ def main(args_in: list[str] | None = None) -> None: sys.exit(1) # start the benchmark + iterations = 0 + data = {} try: start_benchmark(args) - iterations = 0 with open("results.github.env", 'w') as github_env: # parse output with open('k6-results.json', 'r') as bench_results: @@ -129,7 +132,7 @@ def main(args_in: list[str] | None = None) -> None: timestamps, metric_values = zip(*values) metric_values = [float(value) for value in metric_values] prometheus_metrics[metric] = metric_values - timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps] + timestamps_dt = [str(datetime.fromtimestamp(int(ts))) for ts in timestamps] plt.figure(figsize=(16, 10), dpi=80) plt.plot(timestamps_dt, metric_values, label=metric) plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7) @@ -156,7 +159,7 @@ def main(args_in: list[str] | None = None) -> None: plt.close() # Mermaid format in case images upload failed - with (open(f"{metric}.mermaid", 'w') as mermaid_f): + with open(f"{metric}.mermaid", 'w') as mermaid_f: mermaid = ( f"""--- config: @@ -278,7 +281,7 @@ def start_server_background(args): } server_process = subprocess.Popen( args, - **pkwargs) + **pkwargs) # pyright: ignore[reportArgumentType, reportCallIssue] def server_log(in_stream, out_stream): for line in iter(in_stream.readline, b''): |