diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2025-06-08 14:49:50 +0300 |
---|---|---|
committer | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2025-06-08 14:49:50 +0300 |
commit | 1eabdb420b3b7b8464bb2b44d9e797b141a580f6 (patch) | |
tree | bb82646712256a8f3155b19dd0d39e6b7d782bbe /examples | |
parent | 8a5f8573aefc23282200041abbfa12886083334a (diff) |
Revert "Rpc improvement (#480)"
This reverts commit 8a5f8573aefc23282200041abbfa12886083334a.
Diffstat (limited to 'examples')
-rw-r--r-- | examples/rpc/CMakeLists.txt | 6 | ||||
-rw-r--r-- | examples/rpc/rpc-server.cpp | 214 | ||||
-rw-r--r-- | examples/server/server.cpp | 2 | ||||
-rw-r--r-- | examples/server/utils.hpp | 1 |
4 files changed, 18 insertions, 205 deletions
diff --git a/examples/rpc/CMakeLists.txt b/examples/rpc/CMakeLists.txt index 41b22863..ae48fb98 100644 --- a/examples/rpc/CMakeLists.txt +++ b/examples/rpc/CMakeLists.txt @@ -1,4 +1,2 @@ -set(TARGET rpc-server) -add_executable(${TARGET} rpc-server.cpp) -target_link_libraries(${TARGET} PRIVATE ggml) -target_compile_features(${TARGET} PRIVATE cxx_std_17)
\ No newline at end of file +add_executable(rpc-server rpc-server.cpp) +target_link_libraries(rpc-server PRIVATE ggml llama) diff --git a/examples/rpc/rpc-server.cpp b/examples/rpc/rpc-server.cpp index 943c1b1c..6342e648 100644 --- a/examples/rpc/rpc-server.cpp +++ b/examples/rpc/rpc-server.cpp @@ -5,166 +5,33 @@ #ifdef GGML_USE_METAL #include "ggml-metal.h" #endif -#ifdef GGML_USE_VULKAN -#include "ggml-vulkan.h" -#endif -#ifdef GGML_USE_SYCL -#include "ggml-sycl.h" -#endif #include "ggml-rpc.h" #ifdef _WIN32 -# define DIRECTORY_SEPARATOR '\\' -# define NOMINMAX -# include <locale> # include <windows.h> -# include <fcntl.h> -# include <io.h> #else -# define DIRECTORY_SEPARATOR '/' # include <unistd.h> -# include <sys/stat.h> #endif #include <string> #include <stdio.h> -#include <algorithm> -#include <thread> -#include <fstream> -#include <filesystem> -#include <codecvt> - -namespace fs = std::filesystem; - -// NOTE: this is copied from common.cpp to avoid linking with libcommon -// returns true if successful, false otherwise -static bool fs_create_directory_with_parents(const std::string& path) { -#ifdef _WIN32 - std::wstring_convert<std::codecvt_utf8<wchar_t>> converter; - std::wstring wpath = converter.from_bytes(path); - - // if the path already exists, check whether it's a directory - const DWORD attributes = GetFileAttributesW(wpath.c_str()); - if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { - return true; - } - - size_t pos_slash = 0; - - // process path from front to back, procedurally creating directories - while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) { - const std::wstring subpath = wpath.substr(0, pos_slash); - const wchar_t* test = subpath.c_str(); - - const bool success = CreateDirectoryW(test, NULL); - if (!success) { - const DWORD error = GetLastError(); - - // if the path already exists, ensure that it's a directory - if (error == ERROR_ALREADY_EXISTS) { - const DWORD attributes = GetFileAttributesW(subpath.c_str()); - if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) { - return false; - } - } - else { - return false; - } - } - - pos_slash += 1; - } - - return true; -#else - // if the path already exists, check whether it's a directory - struct stat info; - if (stat(path.c_str(), &info) == 0) { - return S_ISDIR(info.st_mode); - } - - size_t pos_slash = 1; // skip leading slashes for directory creation - - // process path from front to back, procedurally creating directories - while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) { - const std::string subpath = path.substr(0, pos_slash); - struct stat info; - - // if the path already exists, ensure that it's a directory - if (stat(subpath.c_str(), &info) == 0) { - if (!S_ISDIR(info.st_mode)) { - return false; - } - } - else { - // create parent directories - const int ret = mkdir(subpath.c_str(), 0755); - if (ret != 0) { - return false; - } - } - - pos_slash += 1; - } - - return true; -#endif // _WIN32 -} - -// NOTE: this is copied from common.cpp to avoid linking with libcommon -static std::string fs_get_cache_directory() { - std::string cache_directory = ""; - auto ensure_trailing_slash = [](std::string p) { - // Make sure to add trailing slash - if (p.back() != DIRECTORY_SEPARATOR) { - p += DIRECTORY_SEPARATOR; - } - return p; - }; - if (getenv("LLAMA_CACHE")) { - cache_directory = std::getenv("LLAMA_CACHE"); - } - else { -#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) - if (std::getenv("XDG_CACHE_HOME")) { - cache_directory = std::getenv("XDG_CACHE_HOME"); - } - else { - cache_directory = std::getenv("HOME") + std::string("/.cache/"); - } -#elif defined(__APPLE__) - cache_directory = std::getenv("HOME") + std::string("/Library/Caches/"); -#elif defined(_WIN32) - cache_directory = std::getenv("LOCALAPPDATA"); -#else -# error Unknown architecture -#endif - cache_directory = ensure_trailing_slash(cache_directory); - cache_directory += "llama.cpp"; - } - return ensure_trailing_slash(cache_directory); -} struct rpc_server_params { std::string host = "127.0.0.1"; int port = 50052; size_t backend_mem = 0; - bool use_cache = false; - int n_threads = std::max(1U, std::thread::hardware_concurrency() / 2); }; -static void print_usage(int /*argc*/, char** argv, rpc_server_params params) { +static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) { fprintf(stderr, "Usage: %s [options]\n\n", argv[0]); fprintf(stderr, "options:\n"); - fprintf(stderr, " -h, --help show this help message and exit\n"); - fprintf(stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n", params.n_threads); - fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str()); - fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port); - fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n"); - fprintf(stderr, " -c, --cache enable local file cache\n"); + fprintf(stderr, " -h, --help show this help message and exit\n"); + fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str()); + fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port); + fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n"); fprintf(stderr, "\n"); } -static bool rpc_server_params_parse(int argc, char** argv, rpc_server_params& params) { +static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params & params) { std::string arg; for (int i = 1; i < argc; i++) { arg = argv[i]; @@ -173,18 +40,7 @@ static bool rpc_server_params_parse(int argc, char** argv, rpc_server_params& pa return false; } params.host = argv[i]; - } - else if (arg == "-t" || arg == "--threads") { - if (++i >= argc) { - return false; - } - params.n_threads = std::stoi(argv[i]); - if (params.n_threads <= 0) { - fprintf(stderr, "error: invalid number of threads: %d\n", params.n_threads); - return false; - } - } - else if (arg == "-p" || arg == "--port") { + } else if (arg == "-p" || arg == "--port") { if (++i >= argc) { return false; } @@ -192,21 +48,15 @@ static bool rpc_server_params_parse(int argc, char** argv, rpc_server_params& pa if (params.port <= 0 || params.port > 65535) { return false; } - } - else if (arg == "-c" || arg == "--cache") { - params.use_cache = true; - } - else if (arg == "-m" || arg == "--mem") { + } else if (arg == "-m" || arg == "--mem") { if (++i >= argc) { return false; } params.backend_mem = std::stoul(argv[i]) * 1024 * 1024; - } - else if (arg == "-h" || arg == "--help") { + } else if (arg == "-h" || arg == "--help") { print_usage(argc, argv, params); exit(0); - } - else { + } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); print_usage(argc, argv, params); exit(0); @@ -215,7 +65,7 @@ static bool rpc_server_params_parse(int argc, char** argv, rpc_server_params& pa return true; } -static ggml_backend_t create_backend(const rpc_server_params& params) { +static ggml_backend_t create_backend() { ggml_backend_t backend = NULL; #ifdef GGML_USE_CUDA fprintf(stderr, "%s: using CUDA backend\n", __func__); @@ -229,25 +79,12 @@ static ggml_backend_t create_backend(const rpc_server_params& params) { if (!backend) { fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); } -#elif GGML_USE_VULKAN - fprintf(stderr, "%s: using Vulkan backend\n", __func__); - backend = ggml_backend_vk_init(0); // init device 0 - if (!backend) { - fprintf(stderr, "%s: ggml_backend_vulkan_init() failed\n", __func__); - } -#elif GGML_USE_SYCL - fprintf(stderr, "%s: using SYCL backend\n", __func__); - backend = ggml_backend_sycl_init(0); // init device 0 - if (!backend) { - fprintf(stderr, "%s: ggml_backend_sycl_init() failed\n", __func__); - } #endif // if there aren't GPU Backends fallback to CPU backend if (!backend) { fprintf(stderr, "%s: using CPU backend\n", __func__); backend = ggml_backend_cpu_init(); - ggml_backend_cpu_set_n_threads(backend, params.n_threads); } return backend; } @@ -255,10 +92,6 @@ static ggml_backend_t create_backend(const rpc_server_params& params) { static void get_backend_memory(size_t * free_mem, size_t * total_mem) { #ifdef GGML_USE_CUDA ggml_backend_cuda_get_device_memory(0, free_mem, total_mem); -#elif GGML_USE_VULKAN - ggml_backend_vk_get_device_memory(0, free_mem, total_mem); -#elif GGML_USE_SYCL - ggml_backend_sycl_get_device_memory(0, free_mem, total_mem); #else #ifdef _WIN32 MEMORYSTATUSEX status; @@ -292,7 +125,7 @@ int main(int argc, char * argv[]) { fprintf(stderr, "\n"); } - ggml_backend_t backend = create_backend(params); + ggml_backend_t backend = create_backend(); if (!backend) { fprintf(stderr, "Failed to create backend\n"); return 1; @@ -302,28 +135,11 @@ int main(int argc, char * argv[]) { if (params.backend_mem > 0) { free_mem = params.backend_mem; total_mem = params.backend_mem; - } - else { + } else { get_backend_memory(&free_mem, &total_mem); } - const char * cache_dir = nullptr; - std::string cache_dir_str; - if (params.use_cache) { - cache_dir_str = fs_get_cache_directory() + "rpc/"; - if (!fs_create_directory_with_parents(cache_dir_str)) { - fprintf(stderr, "Failed to create cache directory: %s\n", cache_dir_str.c_str()); - return 1; - } - cache_dir = cache_dir_str.c_str(); - } - printf("Starting RPC server v%d.%d.%d\n", - RPC_PROTO_MAJOR_VERSION, - RPC_PROTO_MINOR_VERSION, - RPC_PROTO_PATCH_VERSION); - printf(" endpoint : %s\n", endpoint.c_str()); - printf(" local cache : %s\n", cache_dir ? cache_dir : "n/a"); - printf(" backend memory : %zu MB\n", free_mem / (1024 * 1024)); - ggml_backend_rpc_start_server(backend, endpoint.c_str(), cache_dir, free_mem, total_mem); + printf("Starting RPC server on %s, backend memory: %zu MB\n", endpoint.c_str(), free_mem / (1024 * 1024)); + start_rpc_server(backend, endpoint.c_str(), free_mem, total_mem); ggml_backend_free(backend); return 0; } diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 9e80bd7e..acd0581e 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -12,8 +12,6 @@ #endif // increase max payload length to allow use of larger context size #define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576 -// disable Nagle's algorithm -#define CPPHTTPLIB_TCP_NODELAY true #include "httplib.h" // Change JSON_ASSERT from assert() to GGML_ASSERT: #define JSON_ASSERT GGML_ASSERT diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 1aaa445e..70be0748 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -6,6 +6,7 @@ // Change JSON_ASSERT from assert() to GGML_ASSERT: #define JSON_ASSERT GGML_ASSERT #include "json.hpp" + #include <string> #include <vector> #include <sstream> |