diff --git a/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_model.py b/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_model.py
deleted file mode 100644
index 6bd9c799f80..00000000000
--- a/backends/arm/test/models/Qwen3_VL/test_qwen3_vl_model.py
+++ /dev/null
@@ -1,268 +0,0 @@
-# Copyright 2026 Arm Limited and/or its affiliates.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Tuple
-
-import pytest
-import torch
-import torch.nn.functional as F
-from executorch.backends.arm.test import common
-from executorch.backends.arm.test.models.Qwen3_VL.qwen3_vl_test_config import (
-    get_qwen3_vl_2b_instruct_checkpoint_config,
-)
-from executorch.backends.arm.test.tester.test_pipeline import (
-    TosaPipelineFP,
-    VgfPipeline,
-)
-from transformers.models.qwen3_vl.modeling_qwen3_vl import (
-    Qwen3VLTextModel,
-    Qwen3VLVisionModel,
-)
-
-input_t = Tuple[torch.Tensor, ...]
-
-
-def _make_qwen3_vl_2b_instruct_layer_config():
-    config = get_qwen3_vl_2b_instruct_checkpoint_config()
-    config.text_config._attn_implementation = "sdpa"
-    config.vision_config._attn_implementation = "sdpa"
-    return config
-
-
-def _make_text_position_ids(
-    batch_size: int, seq_length: int, device: torch.device
-) -> torch.Tensor:
-    return torch.arange(seq_length, device=device).unsqueeze(0).repeat(batch_size, 1)
-
-
-def _make_image_grid_thw(device: torch.device) -> torch.Tensor:
-    return torch.tensor([[1, 4, 4]], dtype=torch.long, device=device)
-
-
-def _make_pixel_values(config, device: torch.device) -> torch.Tensor:
-    grid_thw = _make_image_grid_thw(device)
-    patch_volume = (
-        config.vision_config.in_channels
-        * config.vision_config.temporal_patch_size
-        * config.vision_config.patch_size
-        * config.vision_config.patch_size
-    )
-    num_patches = int(torch.prod(grid_thw[0]).item())
-    return torch.randn(num_patches, patch_volume, device=device)
-
-
-class Qwen3VLModelTestModule(torch.nn.Module):
-    @classmethod
-    def prepare_model_and_inputs(cls):
-        raise NotImplementedError
-
-
-def _to_bfloat16_model_and_floating_inputs(
-    model: torch.nn.Module, inputs: input_t
-) -> tuple[torch.nn.Module, input_t]:
-    """Convert model and floating inputs for BF16 backend coverage."""
-
-    return model.to(torch.bfloat16), tuple(
-        (
-            x.to(torch.bfloat16)
-            if isinstance(x, torch.Tensor) and x.is_floating_point()
-            else x
-        )
-        for x in inputs
-    )
-
-
-class TextModelWrapper(Qwen3VLModelTestModule):
-    def __init__(self, config) -> None:
-        super().__init__()
-        self.model = Qwen3VLTextModel(config.text_config)
-
-    def forward(
-        self,
-        input_ids: torch.Tensor,
-        attention_mask: torch.Tensor,
-        position_ids: torch.Tensor,
-    ) -> torch.Tensor:
-        outputs = self.model(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            position_ids=position_ids,
-        )
-        return outputs.last_hidden_state
-
-    @classmethod
-    def prepare_model_and_inputs(cls):
-        torch.manual_seed(0)
-        config = _make_qwen3_vl_2b_instruct_layer_config()
-        model = cls(config).eval()
-        input_ids = torch.randint(0, 128, (2, 8), dtype=torch.long)
-        attention_mask = torch.ones_like(input_ids)
-        position_ids = _make_text_position_ids(2, 8, input_ids.device)
-        return model, (input_ids, attention_mask, position_ids)
-
-
-class LowerableVisionModelWrapper(Qwen3VLModelTestModule):
-    def __init__(self, config) -> None:
-        super().__init__()
-        self.visual = Qwen3VLVisionModel(config.vision_config)
-
-        with torch.no_grad():
-            grid_thw = _make_image_grid_thw(self.visual.pos_embed.weight.device)
-            pos_embeds = self.visual.fast_pos_embed_interpolate(grid_thw)
-
-            rotary_pos_emb = self.visual.rot_pos_emb(grid_thw)
-            emb = torch.cat((rotary_pos_emb, rotary_pos_emb), dim=-1)
-            cos = emb.cos()
-            sin = emb.sin()
-
-            cu_seqlens = torch.repeat_interleave(
-                grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]
-            ).cumsum(dim=0, dtype=torch.int32)
-            cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0)
-
-        self.register_buffer("pos_embeds", pos_embeds)
-        self.register_buffer("cos", cos)
-        self.register_buffer("sin", sin)
-        self.register_buffer("cu_seqlens", cu_seqlens)
-
-    def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:
-        hidden_states = self.visual.patch_embed(pixel_values)
-        hidden_states = hidden_states + self.pos_embeds
-
-        position_embeddings = (self.cos, self.sin)
-        deepstack_feature_lists = []
-        for layer_num, blk in enumerate(self.visual.blocks):
-            hidden_states = blk(
-                hidden_states,
-                cu_seqlens=self.cu_seqlens,
-                position_embeddings=position_embeddings,
-            )
-            if layer_num in self.visual.deepstack_visual_indexes:
-                deepstack_feature = self.visual.deepstack_merger_list[
-                    self.visual.deepstack_visual_indexes.index(layer_num)
-                ](hidden_states)
-                deepstack_feature_lists.append(deepstack_feature)
-
-        hidden_states = self.visual.merger(hidden_states)
-
-        # Keep deepstack feature extraction in the exported graph without
-        # changing the model output.
-        deepstack_residual = hidden_states.new_zeros(())
-        for deepstack_feature in deepstack_feature_lists:
-            deepstack_residual = deepstack_residual + deepstack_feature.sum() * 0
-
-        return hidden_states + deepstack_residual
-
-    @classmethod
-    def prepare_model_and_inputs(cls):
-        torch.manual_seed(0)
-        config = _make_qwen3_vl_2b_instruct_layer_config()
-        model = cls(config).eval()
-        pixel_values = _make_pixel_values(config, torch.device("cpu"))
-        return model, (pixel_values,)
-
-
-@dataclass(frozen=True)
-class Qwen3VLModelTestCase:
-    model_cls: type[Qwen3VLModelTestModule]
-    run_on_vulkan_runtime: bool = True
-    atol: float = 1e-3
-    rtol: float = 1e-3
-
-
-TOSA_FP_TEST_CASES: dict[str, Qwen3VLModelTestCase] = {
-    "vision_model": Qwen3VLModelTestCase(
-        model_cls=LowerableVisionModelWrapper,
-    ),
-    "text_model": Qwen3VLModelTestCase(
-        model_cls=TextModelWrapper,
-        atol=3e-2,
-        rtol=1e-2,
-    ),
-}
-
-VGF_NO_QUANT_TEST_CASES: dict[str, Qwen3VLModelTestCase] = {
-    "vision_model": Qwen3VLModelTestCase(
-        model_cls=LowerableVisionModelWrapper,
-        run_on_vulkan_runtime=False,
-    ),
-    "text_model": Qwen3VLModelTestCase(
-        model_cls=TextModelWrapper,
-        run_on_vulkan_runtime=False,
-    ),
-}
-
-
-@pytest.mark.slow
-@common.parametrize("test_case", TOSA_FP_TEST_CASES)
-def test_qwen3_vl_full_models_tosa_FP(test_case: Qwen3VLModelTestCase):
-    model, inputs = test_case.model_cls.prepare_model_and_inputs()
-    with torch.no_grad():
-        pipeline = TosaPipelineFP[input_t](
-            model,
-            inputs,
-            aten_op=[],
-            exir_op=[],
-            atol=test_case.atol,
-            rtol=test_case.rtol,
-        )
-        pipeline.run()
-
-
-@pytest.mark.slow
-@common.parametrize("test_case", TOSA_FP_TEST_CASES)
-def test_qwen3_vl_full_models_tosa_FP_bf16(test_case: Qwen3VLModelTestCase):
-    model, inputs = test_case.model_cls.prepare_model_and_inputs()
-    model, inputs = _to_bfloat16_model_and_floating_inputs(model, inputs)
-    with torch.no_grad():
-        pipeline = TosaPipelineFP[input_t](
-            model,
-            inputs,
-            aten_op=[],
-            exir_op=[],
-            tosa_extensions=["bf16"],
-            atol=1e-1,
-            rtol=1e-1,
-        )
-        pipeline.run()
-
-
-@pytest.mark.slow
-@common.SkipIfNoModelConverter
-@common.parametrize("test_case", VGF_NO_QUANT_TEST_CASES)
-def test_qwen3_vl_full_models_vgf_no_quant(test_case: Qwen3VLModelTestCase):
-    model, inputs = test_case.model_cls.prepare_model_and_inputs()
-    with torch.no_grad():
-        pipeline = VgfPipeline[input_t](
-            model,
-            inputs,
-            aten_op=[],
-            exir_op=[],
-            quantize=False,
-            run_on_vulkan_runtime=test_case.run_on_vulkan_runtime,
-        )
-        pipeline.run()
-
-
-@pytest.mark.slow
-@common.SkipIfNoModelConverter
-@common.parametrize("test_case", VGF_NO_QUANT_TEST_CASES)
-def test_qwen3_vl_full_models_vgf_no_quant_bf16(test_case: Qwen3VLModelTestCase):
-    model, inputs = test_case.model_cls.prepare_model_and_inputs()
-    model, inputs = _to_bfloat16_model_and_floating_inputs(model, inputs)
-    with torch.no_grad():
-        pipeline = VgfPipeline[input_t](
-            model,
-            inputs,
-            aten_op=[],
-            exir_op=[],
-            quantize=False,
-            run_on_vulkan_runtime=test_case.run_on_vulkan_runtime,
-            tosa_spec="TOSA-1.0+FP+bf16",
-        )
-        pipeline.run()