diff options
Diffstat (limited to 'examples/llava/llava.cpp')
-rw-r--r-- | examples/llava/llava.cpp | 36 |
1 files changed, 18 insertions, 18 deletions
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index 29764757..9a990bb1 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -54,7 +54,7 @@ static std::pair<int, int> select_best_resolution(const std::pair<int, int>& ori int downscaled_height = static_cast<int>(original_height * scale); int effective_resolution = std::min(downscaled_width * downscaled_height, original_width * original_height); int wasted_resolution = (width * height) - effective_resolution; - // fprintf(stderr, "resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution); + // LOG_TEE("resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution); if (effective_resolution > max_effective_resolution || (effective_resolution == max_effective_resolution && wasted_resolution < min_wasted_resolution)) { max_effective_resolution = effective_resolution; min_wasted_resolution = wasted_resolution; @@ -154,13 +154,13 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *> model.newline = ggml_new_tensor_1d(model.ctx, GGML_TYPE_F32, newline_tmp->ne[0]); if (newline_tmp->backend != GGML_BACKEND_TYPE_CPU) { if (newline_tmp->buffer == NULL) { - printf("newline_tmp tensor buffer is NULL\n"); + LOG_TEE("newline_tmp tensor buffer is NULL\n"); } ggml_backend_tensor_get(newline_tmp, model.newline->data, 0, ggml_nbytes(newline_tmp)); } else { model.newline->data = newline_tmp->data; if (model.newline->data == NULL) { - printf("newline_tmp tensor data is NULL\n"); + LOG_TEE("newline_tmp tensor data is NULL\n"); } } @@ -224,7 +224,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli img_res_v.size = 0; img_res_v.data = nullptr; if (!clip_image_preprocess(ctx_clip, img, &img_res_v)) { - fprintf(stderr, "%s: unable to preprocess image\n", __func__); + LOG_TEE("%s: unable to preprocess image\n", __func__); delete[] img_res_v.data; return false; } @@ -239,7 +239,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd); // image_embd shape is 576 x 4096 delete[] img_res_v.data; if (!encoded) { - fprintf(stderr, "Unable to encode image\n"); + LOG_TEE("Unable to encode image\n"); return false; } @@ -252,12 +252,12 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip)); // 576 patches * 4096 embeddings * 4 bytes = 9437184 const bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]); // image data is in 3x336x336 format and will be converted to 336x336x3 inside if (!encoded) { - fprintf(stderr, "Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size); + LOG_TEE("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size); return false; } } const int64_t t_img_enc_batch_us = ggml_time_us(); - printf("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); + LOG_TEE("%s: %d segments encoded in %8.2f ms\n", __func__, (int)img_res_v.size, (t_img_enc_batch_us - t_img_enc_start_us) / 1000.0); const int32_t * image_grid = clip_image_grid(ctx_clip); @@ -290,12 +290,12 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli // clip_image_save_to_bmp(*tmp, "image_feature.bmp"); } - printf("%s: image embedding created: %d tokens\n", __func__, *n_img_pos); + LOG_TEE("%s: image embedding created: %d tokens\n", __func__, *n_img_pos); const int64_t t_img_enc_end_us = ggml_time_us(); float t_img_enc_ms = (t_img_enc_end_us - t_img_enc_start_us) / 1000.0; - printf("\n%s: image encoded in %8.2f ms by CLIP (%8.2f ms per image patch)\n", __func__, t_img_enc_ms, t_img_enc_ms / *n_img_pos); + LOG_TEE("\n%s: image encoded in %8.2f ms by CLIP (%8.2f ms per image patch)\n", __func__, t_img_enc_ms, t_img_enc_ms / *n_img_pos); return true; } @@ -305,7 +305,7 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * int n_llama_embd = llama_n_embd(llama_get_model(ctx_llama)); auto n_image_embd = clip_n_mmproj_embd(ctx_clip); if (n_image_embd != n_llama_embd) { - printf("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_llama_embd); + LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_image_embd, n_llama_embd); return false; } return true; @@ -314,13 +314,13 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) { float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*6); // TODO: base on gridsize/llava model if (!image_embd) { - fprintf(stderr, "Unable to allocate memory for image embeddings\n"); + LOG_TEE("Unable to allocate memory for image embeddings\n"); return false; } int n_img_pos; if (!encode_image_with_clip(ctx_clip, n_threads, img, image_embd, &n_img_pos)) { - fprintf(stderr, "%s: cannot encode image, aborting\n", __func__); + LOG_TEE("%s: cannot encode image, aborting\n", __func__); free(image_embd); return false; } @@ -340,7 +340,7 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_ } llama_batch batch = {int32_t(n_eval), nullptr, (image_embed->embed+i*n_embd), nullptr, nullptr, nullptr, nullptr, *n_past, 1, 0, }; if (llama_decode(ctx_llama, batch)) { - fprintf(stderr, "%s : failed to eval\n", __func__); + LOG_TEE("%s : failed to eval\n", __func__); return false; } *n_past += n_eval; @@ -352,7 +352,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c clip_image_u8 * img = clip_image_u8_init(); if (!clip_image_load_from_bytes(image_bytes, image_bytes_length, img)) { clip_image_u8_free(img); - fprintf(stderr, "%s: can't load image from bytes, is it a valid image?", __func__); + LOG_TEE("%s: can't load image from bytes, is it a valid image?", __func__); return NULL; } @@ -361,7 +361,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c bool image_embed_result = llava_image_embed_make_with_clip_img(ctx_clip, n_threads, img, &image_embed, &n_image_pos); if (!image_embed_result) { clip_image_u8_free(img); - fprintf(stderr, "%s: coulnd't embed the image\n", __func__); + LOG_TEE("%s: coulnd't embed the image\n", __func__); return NULL; } @@ -375,7 +375,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long *sizeOut) { auto file = fopen(path, "rb"); if (file == NULL) { - fprintf(stderr, "%s: can't read file %s\n", __func__, path); + LOG_TEE("%s: can't read file %s\n", __func__, path); return false; } @@ -385,7 +385,7 @@ static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long auto buffer = (unsigned char *)malloc(fileSize); // Allocate memory to hold the file data if (buffer == NULL) { - fprintf(stderr, "%s: failed to alloc %ld bytes for file %s\n", __func__, fileSize, path); + LOG_TEE("%s: failed to alloc %ld bytes for file %s\n", __func__, fileSize, path); perror("Memory allocation error"); fclose(file); return false; @@ -410,7 +410,7 @@ struct llava_image_embed * llava_image_embed_make_with_filename(struct clip_ctx long image_bytes_length; auto loaded = load_file_to_bytes(image_path, &image_bytes, &image_bytes_length); if (!loaded) { - fprintf(stderr, "%s: failed to load %s\n", __func__, image_path); + LOG_TEE("%s: failed to load %s\n", __func__, image_path); return NULL; } |