summaryrefslogtreecommitdiff
path: root/examples/llava/clip.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'examples/llava/clip.cpp')
-rw-r--r--examples/llava/clip.cpp42
1 files changed, 1 insertions, 41 deletions
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 6d7b4950..690bca2e 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -119,7 +119,6 @@ static std::string format(const char * fmt, ...) {
#define TN_LLAVA_PROJ "mm.%d.%s"
#define TN_MVLM_PROJ_MLP "mm.model.mlp.%d.%s"
#define TN_MVLM_PROJ_BLOCK "mm.model.mb_block.%d.block.%d.%s"
-#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s"
#define TN_IMAGE_NEWLINE "model.image_newline"
@@ -127,14 +126,12 @@ enum projector_type {
PROJECTOR_TYPE_MLP,
PROJECTOR_TYPE_MLP_NORM,
PROJECTOR_TYPE_LDP,
- PROJECTOR_TYPE_LDPV2,
PROJECTOR_TYPE_UNKNOWN,
};
static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
{ PROJECTOR_TYPE_MLP, "mlp" },
{ PROJECTOR_TYPE_LDP, "ldp" },
- { PROJECTOR_TYPE_LDPV2, "ldpv2"},
};
@@ -810,29 +807,6 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
}
embeddings = block_1;
}
- else if (ctx->proj_type == PROJECTOR_TYPE_LDPV2)
- {
- int n_patch = 24;
- struct ggml_tensor * mlp_0 = ggml_mul_mat(ctx0, model.mm_model_mlp_0_w, embeddings);
- mlp_0 = ggml_add(ctx0, mlp_0, model.mm_model_mlp_0_b);
- mlp_0 = ggml_gelu(ctx0, mlp_0);
- struct ggml_tensor * mlp_2 = ggml_mul_mat(ctx0, model.mm_model_mlp_2_w, mlp_0);
- mlp_2 = ggml_add(ctx0, mlp_2, model.mm_model_mlp_2_b);
- // mlp_2 ne = [2048, 576, 1, 1]
- // // AVG Pool Layer 2*2, strides = 2
- mlp_2 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_2, 1, 0, 2, 3));
- // mlp_2 ne = [576, 2048, 1, 1]
- mlp_2 = ggml_reshape_4d(ctx0, mlp_2, n_patch, n_patch, mlp_2->ne[1], mlp_2->ne[2]);
- // mlp_2 ne [24, 24, 2048, 1]
- mlp_2 = ggml_pool_2d(ctx0, mlp_2, GGML_OP_POOL_AVG, 2, 2, 2, 2, 0, 0);
- // weight ne = [3, 3, 2048, 1]
- struct ggml_tensor * peg_0 = ggml_conv_depthwise_2d(ctx0, model.mm_model_peg_0_w, mlp_2, 1, 1, 1, 1, 1, 1);
- peg_0 = ggml_add(ctx0, peg_0, mlp_2);
- peg_0 = ggml_cont(ctx0, ggml_permute(ctx0, peg_0, 1, 2, 0, 3));
- peg_0 = ggml_add(ctx0, peg_0, model.mm_model_peg_0_b);
- peg_0 = ggml_reshape_3d(ctx0, peg_0, peg_0->ne[0], peg_0->ne[1] * peg_0->ne[2], peg_0->ne[3]);
- embeddings = peg_0;
- }
else {
GGML_ASSERT(false);
}
@@ -1203,18 +1177,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
vision_model.mm_model_block_2_block_2_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "0.weight"));
vision_model.mm_model_block_2_block_2_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.weight"));
vision_model.mm_model_block_2_block_2_1_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_BLOCK, 2, 2, "1.bias"));
- }
- else if (new_clip->proj_type == PROJECTOR_TYPE_LDPV2)
- {
- // MobilVLM_V2 projection
- vision_model.mm_model_mlp_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 0, "weight"));
- vision_model.mm_model_mlp_0_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 0, "bias"));
- vision_model.mm_model_mlp_2_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 2, "weight"));
- vision_model.mm_model_mlp_2_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 2, "bias"));
- vision_model.mm_model_peg_0_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_PEG, 0, "weight"));
- vision_model.mm_model_peg_0_b = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_PEG, 0, "bias"));
- }
- else {
+ } else {
std::string proj_type = PROJECTOR_TYPE_NAMES[new_clip->proj_type];
throw std::runtime_error(format("%s: don't support projector with: %s currently\n", __func__, proj_type.c_str()));
}
@@ -2003,9 +1966,6 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) {
if (ctx->proj_type == PROJECTOR_TYPE_LDP) {
return ctx->vision_model.mm_model_block_1_block_2_1_b->ne[0];
}
- if (ctx->proj_type == PROJECTOR_TYPE_LDPV2) {
- return ctx->vision_model.mm_model_peg_0_b->ne[0];
- }
if (ctx->proj_type == PROJECTOR_TYPE_MLP) {
return ctx->vision_model.mm_2_b->ne[0];
}