llava-cli : multiple images (#6969)

Co-authored-by: root <root@nenya.lothlorien.ca>
author: cpumaxx <163466046+cpumaxx@users.noreply.github.com> 2024-04-29 07:34:24 -0700
committer: GitHub <noreply@github.com> 2024-04-29 17:34:24 +0300
commit: ffe666572f98a686b17a2cd1dbf4c0a982e5ac0a (patch)
tree: 062ed2b2706163cdb2006b0204c4589e7da4f75a /common
parent: 24affa7db3c9db148854b0ab4fd63de8bca7d898 (diff)
2 files changed, 4 insertions, 4 deletions
diff --git a/common/common.cpp b/common/common.cpp
index aa494291..fe84039f 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -893,7 +893,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
             invalid_param = true;
             return true;
         }
-        params.image = argv[i];
+        params.image.emplace_back(argv[i]);
         return true;
     }
     if (arg == "-i" || arg == "--interactive") {
@@ -1495,7 +1495,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("  -ps N, --p-split N    speculative decoding split probability (default: %.1f)\n", (double)params.p_split);
     printf("  -cb, --cont-batching  enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
     printf("  --mmproj MMPROJ_FILE  path to a multimodal projector file for LLaVA. see examples/llava/README.md\n");
-    printf("  --image IMAGE_FILE    path to an image file. use with multimodal models\n");
+    printf("  --image IMAGE_FILE    path to an image file. use with multimodal models. Specify multiple times for batching\n");
     if (llama_supports_mlock()) {
         printf("  --mlock               force system to keep model in RAM rather than swapping or compressing\n");
     }
diff --git a/common/common.h b/common/common.h
index eea63a11..3233d90e 100644
--- a/common/common.h
+++ b/common/common.h
@@ -167,8 +167,8 @@ struct gpt_params {
     std::string cache_type_v = "f16"; // KV cache data type for the V
 
     // multimodal models (see examples/llava)
-    std::string mmproj = ""; // path to multimodal projector
-    std::string image  = ""; // path to an image file
+    std::string mmproj = "";        // path to multimodal projector
+    std::vector<std::string> image; // path to image file(s)
 };
 
 bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
author	cpumaxx <163466046+cpumaxx@users.noreply.github.com>	2024-04-29 07:34:24 -0700
committer	GitHub <noreply@github.com>	2024-04-29 17:34:24 +0300
commit	ffe666572f98a686b17a2cd1dbf4c0a982e5ac0a (patch)
tree	062ed2b2706163cdb2006b0204c4589e7da4f75a /common
parent	24affa7db3c9db148854b0ab4fd63de8bca7d898 (diff)