summaryrefslogtreecommitdiff
path: root/examples/llava/clip.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'examples/llava/clip.cpp')
-rw-r--r--examples/llava/clip.cpp36
1 files changed, 18 insertions, 18 deletions
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 2035554e..a0ed82d7 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -1235,16 +1235,16 @@ struct clip_image_f32 * clip_image_f32_init() {
void clip_image_u8_free(struct clip_image_u8 * img) { delete img; }
void clip_image_f32_free(struct clip_image_f32 * img) { delete img; }
-void clip_image_u8_batch_free(struct clip_image_u8_batch & batch) {
- if (batch.size > 0) {
- delete[] batch.data;
- batch.size = 0;
+void clip_image_u8_batch_free(struct clip_image_u8_batch * batch) {
+ if (batch->size > 0) {
+ delete[] batch->data;
+ batch->size = 0;
}
}
-void clip_image_f32_batch_free(struct clip_image_f32_batch & batch) {
- if (batch.size > 0) {
- delete[] batch.data;
- batch.size = 0;
+void clip_image_f32_batch_free(struct clip_image_f32_batch * batch) {
+ if (batch->size > 0) {
+ delete[] batch->data;
+ batch->size = 0;
}
}
@@ -1497,7 +1497,7 @@ static std::vector<clip_image_u8*> divide_to_patches_u8(const clip_image_u8 & im
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
// res_imgs memory is being allocated here, previous allocations will be freed if found
-bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs) {
+bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) {
bool pad_to_square = true;
if (!ctx->has_vision_encoder) {
printf("This gguf file seems to have no vision encoder\n");
@@ -1509,11 +1509,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
pad_to_square = false;
}
// free the previous res_imgs if any set
- if (res_imgs.size > 0) {
+ if (res_imgs->size > 0) {
clip_image_f32_batch_free(res_imgs);
}
- res_imgs.data = nullptr;
- res_imgs.size = 0;
+ res_imgs->data = nullptr;
+ res_imgs->size = 0;
// the logic below is to pad the shorter side to the longer side with a background color: rgb(122, 116, 104)
// see https://github.com/haotian-liu/LLaVA/blob/e854a2bf85118c504f6f16bf5c3c7c92f8fa8c6b/llava/conversation.py#L113-L156
@@ -1568,11 +1568,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
bicubic_resize(*img, *image_original_resize, params.image_size, params.image_size); // in python this is "shortest_edge", but all CLIP are square
patches.insert(patches.begin(), image_original_resize);
// clip_image_f32_batch_init(patches.size());
- res_imgs.size = patches.size();
- res_imgs.data = new clip_image_f32[res_imgs.size];
+ res_imgs->size = patches.size();
+ res_imgs->data = new clip_image_f32[res_imgs->size];
int num=0;
for (auto& patch : patches) {
- normalize_image_u8_to_f32(patch, &res_imgs.data[num], ctx->image_mean, ctx->image_std);
+ normalize_image_u8_to_f32(patch, &res_imgs->data[num], ctx->image_mean, ctx->image_std);
num++;
}
@@ -1660,9 +1660,9 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
// }
// res_imgs.push_back(res);
- res_imgs.size = 1;
- res_imgs.data = new clip_image_f32[res_imgs.size];
- res_imgs.data[0] = *res;
+ res_imgs->size = 1;
+ res_imgs->data = new clip_image_f32[res_imgs->size];
+ res_imgs->data[0] = *res;
clip_image_f32_free(res);
return true;