summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/server/server.cpp2
-rw-r--r--ggml-cuda.cu3
2 files changed, 4 insertions, 1 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 1f2c55f2..be23ad16 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1095,6 +1095,7 @@ struct llama_server_context
std::lock_guard<std::mutex> lock(mutex_results);
task_result res;
res.id = id;
+ res.stop = false;
res.error = true;
res.result_json = { { "content", error } };
queue_results.push_back(res);
@@ -1255,6 +1256,7 @@ struct llama_server_context
std::lock_guard<std::mutex> lock(mutex_tasks);
task_server task;
task.id = id_gen++;
+ task.target_id = 0;
task.data = data;
task.infill_mode = infill;
task.embedding_mode = embedding;
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 50e03de5..f0db7ae3 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -1,4 +1,5 @@
#include <algorithm>
+#include <cinttypes>
#include <cstddef>
#include <cstdint>
#include <limits>
@@ -8057,7 +8058,7 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
if (tensor->op == GGML_OP_MUL_MAT) {
if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) {
#ifndef NDEBUG
- fprintf(stderr, "%s: cannot compute %s: src0->ne[3] = %d, src1->ne[3] = %d - fallback to CPU\n", __func__, tensor->name, tensor->src[0]->ne[3], tensor->src[1]->ne[3]);
+ fprintf(stderr, "%s: cannot compute %s: src0->ne[3] = " PRId64 ", src1->ne[3] = " PRId64 " - fallback to CPU\n", __func__, tensor->name, tensor->src[0]->ne[3], tensor->src[1]->ne[3]);
#endif
return false;
}