diff --git a/litellm/llms/vertex_ai/common_utils.py b/litellm/llms/vertex_ai/common_utils.py
index 3cfa55c06066..6b2b6359b73d 100644
--- a/litellm/llms/vertex_ai/common_utils.py
+++ b/litellm/llms/vertex_ai/common_utils.py
@@ -733,6 +733,16 @@ def get_vertex_location_from_url(url: str) -> Optional[str]:
     return match.group(1) if match else None
 
 
+def get_vertex_model_id_from_url(url: str) -> Optional[str]:
+    """
+    Get the vertex model id from the url
+
+    `https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/${LOCATION}/publishers/google/models/${MODEL_ID}:streamGenerateContent`
+    """
+    match = re.search(r"/models/([^/:]+)", url)
+    return match.group(1) if match else None
+
+
 def replace_project_and_location_in_route(
     requested_route: str, vertex_project: str, vertex_location: str
 ) -> str:
@@ -782,6 +792,15 @@ def construct_target_url(
     if "cachedContent" in requested_route:
         vertex_version = "v1beta1"
 
+    # Check if the requested route starts with a version
+    # e.g. /v1beta1/publishers/google/models/gemini-3-pro-preview:streamGenerateContent
+    if requested_route.startswith("/v1/"):
+        vertex_version = "v1"
+        requested_route = requested_route.replace("/v1/", "/", 1)
+    elif requested_route.startswith("/v1beta1/"):
+        vertex_version = "v1beta1"
+        requested_route = requested_route.replace("/v1beta1/", "/", 1)
+
     base_requested_route = "{}/projects/{}/locations/{}".format(
         vertex_version, vertex_project, vertex_location
     )
diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
index 84550092d2e0..bb78ac671e77 100644
--- a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
+++ b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@@ -1547,6 +1547,7 @@ async def _base_vertex_proxy_route(
     from litellm.llms.vertex_ai.common_utils import (
         construct_target_url,
         get_vertex_location_from_url,
+        get_vertex_model_id_from_url,
         get_vertex_project_id_from_url,
     )
 
@@ -1576,6 +1577,25 @@ async def _base_vertex_proxy_route(
         vertex_location=vertex_location,
     )
 
+    if vertex_project is None or vertex_location is None:
+        # Check if model is in router config
+        model_id = get_vertex_model_id_from_url(endpoint)
+        if model_id:
+            from litellm.proxy.proxy_server import llm_router
+
+            if llm_router:
+                try:
+                    # Use the dedicated pass-through deployment selection method to automatically filter use_in_pass_through=True
+                    deployment = llm_router.get_available_deployment_for_pass_through(model=model_id)
+                    if deployment:
+                        litellm_params = deployment.get("litellm_params", {})
+                        vertex_project = litellm_params.get("vertex_project")
+                        vertex_location = litellm_params.get("vertex_location")
+                except Exception as e:
+                    verbose_proxy_logger.debug(
+                        f"Error getting available deployment for model {model_id}: {e}"
+                    )
+
     vertex_credentials = passthrough_endpoint_router.get_vertex_credentials(
         project_id=vertex_project,
         location=vertex_location,
diff --git a/litellm/router.py b/litellm/router.py
index 5e6027671b2b..30de457628ec 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -7779,6 +7779,154 @@ async def async_get_available_deployment(
                     )
             raise e
 
+    async def async_get_available_deployment_for_pass_through(
+        self,
+        model: str,
+        request_kwargs: Dict,
+        messages: Optional[List[Dict[str, str]]] = None,
+        input: Optional[Union[str, List]] = None,
+        specific_deployment: Optional[bool] = False,
+    ):
+        """
+        Async version of get_available_deployment_for_pass_through
+
+        Only returns deployments configured with use_in_pass_through=True
+        """
+        try:
+            parent_otel_span = _get_parent_otel_span_from_kwargs(request_kwargs)
+
+            # 1. Execute pre-routing hook
+            pre_routing_hook_response = await self.async_pre_routing_hook(
+                model=model,
+                request_kwargs=request_kwargs,
+                messages=messages,
+                input=input,
+                specific_deployment=specific_deployment,
+            )
+            if pre_routing_hook_response is not None:
+                model = pre_routing_hook_response.model
+                messages = pre_routing_hook_response.messages
+
+            # 2. Get healthy deployments
+            healthy_deployments = await self.async_get_healthy_deployments(
+                model=model,
+                request_kwargs=request_kwargs,
+                messages=messages,
+                input=input,
+                specific_deployment=specific_deployment,
+                parent_otel_span=parent_otel_span,
+            )
+
+            # 3. If specific deployment returned, verify if it supports pass-through
+            if isinstance(healthy_deployments, dict):
+                litellm_params = healthy_deployments.get("litellm_params", {})
+                if litellm_params.get("use_in_pass_through"):
+                    return healthy_deployments
+                else:
+                    raise litellm.BadRequestError(
+                        message=f"Deployment {healthy_deployments.get('model_info', {}).get('id')} does not support pass-through endpoint (use_in_pass_through=False)",
+                        model=model,
+                        llm_provider="",
+                    )
+
+            # 4. Filter deployments that support pass-through
+            pass_through_deployments = self._filter_pass_through_deployments(
+                healthy_deployments=healthy_deployments
+            )
+
+            if len(pass_through_deployments) == 0:
+                raise litellm.BadRequestError(
+                    message=f"Model {model} has no deployments configured with use_in_pass_through=True. Please add use_in_pass_through: true to the deployment configuration",
+                    model=model,
+                    llm_provider="",
+                )
+
+            # 5. Apply load balancing strategy
+            start_time = time.perf_counter()
+            if (
+                self.routing_strategy == "usage-based-routing-v2"
+                and self.lowesttpm_logger_v2 is not None
+            ):
+                deployment = (
+                    await self.lowesttpm_logger_v2.async_get_available_deployments(
+                        model_group=model,
+                        healthy_deployments=pass_through_deployments,  # type: ignore
+                        messages=messages,
+                        input=input,
+                    )
+                )
+            elif (
+                self.routing_strategy == "latency-based-routing"
+                and self.lowestlatency_logger is not None
+            ):
+                deployment = (
+                    await self.lowestlatency_logger.async_get_available_deployments(
+                        model_group=model,
+                        healthy_deployments=pass_through_deployments,  # type: ignore
+                        messages=messages,
+                        input=input,
+                        request_kwargs=request_kwargs,
+                    )
+                )
+            elif self.routing_strategy == "simple-shuffle":
+                return simple_shuffle(
+                    llm_router_instance=self,
+                    healthy_deployments=pass_through_deployments,
+                    model=model,
+                )
+            elif (
+                self.routing_strategy == "least-busy"
+                and self.leastbusy_logger is not None
+            ):
+                deployment = (
+                    await self.leastbusy_logger.async_get_available_deployments(
+                        model_group=model,
+                        healthy_deployments=pass_through_deployments,  # type: ignore
+                    )
+                )
+            else:
+                deployment = None
+
+            if deployment is None:
+                exception = await async_raise_no_deployment_exception(
+                    litellm_router_instance=self,
+                    model=model,
+                    parent_otel_span=parent_otel_span,
+                )
+                raise exception
+
+            verbose_router_logger.info(
+                f"async_get_available_deployment_for_pass_through model: {model}, selected deployment: {self.print_deployment(deployment)}"
+            )
+
+            end_time = time.perf_counter()
+            _duration = end_time - start_time
+            asyncio.create_task(
+                self.service_logger_obj.async_service_success_hook(
+                    service=ServiceTypes.ROUTER,
+                    duration=_duration,
+                    call_type="<routing_strategy>.async_get_available_deployments",
+                    parent_otel_span=parent_otel_span,
+                    start_time=start_time,
+                    end_time=end_time,
+                )
+            )
+
+            return deployment
+        except Exception as e:
+            traceback_exception = traceback.format_exc()
+            if request_kwargs is not None:
+                logging_obj = request_kwargs.get("litellm_logging_obj", None)
+                if logging_obj is not None:
+                    threading.Thread(
+                        target=logging_obj.failure_handler,
+                        args=(e, traceback_exception),
+                    ).start()
+                    asyncio.create_task(
+                        logging_obj.async_failure_handler(e, traceback_exception)  # type: ignore
+                    )
+            raise e
+
     async def async_pre_routing_hook(
         self,
         model: str,
@@ -7931,6 +8079,169 @@ def get_available_deployment(
         )
         return deployment
 
+    def get_available_deployment_for_pass_through(
+        self,
+        model: str,
+        messages: Optional[List[Dict[str, str]]] = None,
+        input: Optional[Union[str, List]] = None,
+        specific_deployment: Optional[bool] = False,
+        request_kwargs: Optional[Dict] = None,
+    ):
+        """
+        Returns deployments available for pass-through endpoints (based on load balancing strategy)
+
+        Similar to get_available_deployment, but only returns deployments with use_in_pass_through=True
+
+        Args:
+            model: Model name
+            messages: Optional list of messages
+            input: Optional input data
+            specific_deployment: Whether to find a specific deployment
+            request_kwargs: Optional request parameters
+
+        Returns:
+            Dict: Selected deployment configuration
+
+        Raises:
+            BadRequestError: If no deployment is configured with use_in_pass_through=True
+            RouterRateLimitError: If no pass-through deployments are available
+        """
+        # 1. Perform common checks to get healthy deployments list
+        model, healthy_deployments = self._common_checks_available_deployment(
+            model=model,
+            messages=messages,
+            input=input,
+            specific_deployment=specific_deployment,
+        )
+
+        # 2. If the returned is a specific deployment (Dict), verify and return directly
+        if isinstance(healthy_deployments, dict):
+            litellm_params = healthy_deployments.get("litellm_params", {})
+            if litellm_params.get("use_in_pass_through"):
+                return healthy_deployments
+            else:
+                # Specific deployment does not support pass-through
+                raise litellm.BadRequestError(
+                    message=f"Deployment {healthy_deployments.get('model_info', {}).get('id')} does not support pass-through endpoint (use_in_pass_through=False)",
+                    model=model,
+                    llm_provider="",
+                )
+
+        # 3. Filter deployments that support pass-through
+        pass_through_deployments = self._filter_pass_through_deployments(
+            healthy_deployments=healthy_deployments
+        )
+
+        if len(pass_through_deployments) == 0:
+            # No deployments support pass-through
+            raise litellm.BadRequestError(
+                message=f"Model {model} has no deployment configured with use_in_pass_through=True. Please add use_in_pass_through: true in the deployment configuration",
+                model=model,
+                llm_provider="",
+            )
+
+        # 4. Apply cooldown filtering
+        parent_otel_span: Optional[Span] = _get_parent_otel_span_from_kwargs(
+            request_kwargs
+        )
+        cooldown_deployments = _get_cooldown_deployments(
+            litellm_router_instance=self, parent_otel_span=parent_otel_span
+        )
+        pass_through_deployments = self._filter_cooldown_deployments(
+            healthy_deployments=pass_through_deployments,
+            cooldown_deployments=cooldown_deployments,
+        )
+
+        # 5. Apply pre-call checks (if enabled)
+        if self.enable_pre_call_checks and messages is not None:
+            pass_through_deployments = self._pre_call_checks(
+                model=model,
+                healthy_deployments=pass_through_deployments,
+                messages=messages,
+                request_kwargs=request_kwargs,
+            )
+
+        if len(pass_through_deployments) == 0:
+            model_ids = self.get_model_ids(model_name=model)
+            _cooldown_time = self.cooldown_cache.get_min_cooldown(
+                model_ids=model_ids, parent_otel_span=parent_otel_span
+            )
+            _cooldown_list = _get_cooldown_deployments(
+                litellm_router_instance=self, parent_otel_span=parent_otel_span
+            )
+            raise RouterRateLimitError(
+                model=model,
+                cooldown_time=_cooldown_time,
+                enable_pre_call_checks=self.enable_pre_call_checks,
+                cooldown_list=_cooldown_list,
+            )
+
+        # 6. Apply load balancing strategy
+        if self.routing_strategy == "least-busy" and self.leastbusy_logger is not None:
+            deployment = self.leastbusy_logger.get_available_deployments(
+                model_group=model, healthy_deployments=pass_through_deployments  # type: ignore
+            )
+        elif self.routing_strategy == "simple-shuffle":
+            return simple_shuffle(
+                llm_router_instance=self,
+                healthy_deployments=pass_through_deployments,
+                model=model,
+            )
+        elif (
+            self.routing_strategy == "latency-based-routing"
+            and self.lowestlatency_logger is not None
+        ):
+            deployment = self.lowestlatency_logger.get_available_deployments(
+                model_group=model,
+                healthy_deployments=pass_through_deployments,  # type: ignore
+                request_kwargs=request_kwargs,
+            )
+        elif (
+            self.routing_strategy == "usage-based-routing"
+            and self.lowesttpm_logger is not None
+        ):
+            deployment = self.lowesttpm_logger.get_available_deployments(
+                model_group=model,
+                healthy_deployments=pass_through_deployments,  # type: ignore
+                messages=messages,
+                input=input,
+            )
+        elif (
+            self.routing_strategy == "usage-based-routing-v2"
+            and self.lowesttpm_logger_v2 is not None
+        ):
+            deployment = self.lowesttpm_logger_v2.get_available_deployments(
+                model_group=model,
+                healthy_deployments=pass_through_deployments,  # type: ignore
+                messages=messages,
+                input=input,
+            )
+        else:
+            deployment = None
+
+        if deployment is None:
+            verbose_router_logger.info(
+                f"get_available_deployment_for_pass_through model: {model}, no available deployments"
+            )
+            model_ids = self.get_model_ids(model_name=model)
+            _cooldown_time = self.cooldown_cache.get_min_cooldown(
+                model_ids=model_ids, parent_otel_span=parent_otel_span
+            )
+            _cooldown_list = _get_cooldown_deployments(
+                litellm_router_instance=self, parent_otel_span=parent_otel_span
+            )
+            raise RouterRateLimitError(
+                model=model,
+                cooldown_time=_cooldown_time,
+                enable_pre_call_checks=self.enable_pre_call_checks,
+                cooldown_list=_cooldown_list,
+            )
+
+        verbose_router_logger.info(
+            f"get_available_deployment_for_pass_through model: {model}, selected deployment: {self.print_deployment(deployment)}"
+        )
+        return deployment
+
     def _filter_cooldown_deployments(
         self, healthy_deployments: List[Dict], cooldown_deployments: List[str]
     ) -> List[Dict]:
@@ -7953,6 +8264,34 @@ def _filter_cooldown_deployments(
             if deployment["model_info"]["id"] not in cooldown_set
         ]
 
+    def _filter_pass_through_deployments(
+        self, healthy_deployments: List[Dict]
+    ) -> List[Dict]:
+        """
+        Filter out deployments configured with use_in_pass_through=True
+
+        Args:
+            healthy_deployments: List of healthy deployments
+
+        Returns:
+            List[Dict]: Only includes a list of deployments that support pass-through
+        """
+        verbose_router_logger.debug(
+            f"Filter pass-through deployments from {len(healthy_deployments)} healthy deployments"
+        )
+
+        pass_through_deployments = [
+            deployment
+            for deployment in healthy_deployments
+            if deployment.get("litellm_params", {}).get("use_in_pass_through", False)
+        ]
+
+        verbose_router_logger.debug(
+            f"Found {len(pass_through_deployments)} deployments with pass-through enabled"
+        )
+
+        return pass_through_deployments
+
     def _track_deployment_metrics(
         self, deployment, parent_otel_span: Optional[Span], response=None
     ):
diff --git a/tests/test_litellm/llms/vertex_ai/test_vertex_ai_common_utils.py b/tests/test_litellm/llms/vertex_ai/test_vertex_ai_common_utils.py
index a5eee9e37b1e..cc80e5032520 100644
--- a/tests/test_litellm/llms/vertex_ai/test_vertex_ai_common_utils.py
+++ b/tests/test_litellm/llms/vertex_ai/test_vertex_ai_common_utils.py
@@ -1,7 +1,6 @@
 import os
 import sys
-from typing import Any, Dict
-from unittest.mock import MagicMock, call, patch
+from unittest.mock import patch
 
 import pytest
 
@@ -11,7 +10,6 @@
     0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
 
-import litellm
 from litellm.llms.vertex_ai.common_utils import (
     _get_vertex_url,
     convert_anyof_null_to_nullable,
@@ -798,9 +796,54 @@ def test_fix_enum_empty_strings():
     assert "mobile" in enum_values
     assert "tablet" in enum_values
 
-    # 3. Other properties preserved
-    assert input_schema["properties"]["user_agent_type"]["type"] == "string"
-    assert input_schema["properties"]["user_agent_type"]["description"] == "Device type for user agent"
+
+def test_get_vertex_model_id_from_url():
+    """Test get_vertex_model_id_from_url with various URLs"""
+    from litellm.llms.vertex_ai.common_utils import get_vertex_model_id_from_url
+
+    # Test with valid URL
+    url = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent"
+    model_id = get_vertex_model_id_from_url(url)
+    assert model_id == "gemini-pro"
+
+    # Test with invalid URL
+    url = "https://invalid-url.com"
+    model_id = get_vertex_model_id_from_url(url)
+    assert model_id is None
+
+
+def test_construct_target_url_with_version_prefix():
+    """Test construct_target_url with version prefixes"""
+    from litellm.llms.vertex_ai.common_utils import construct_target_url
+
+    # Test with /v1/ prefix
+    url = "/v1/publishers/google/models/gemini-pro:streamGenerateContent"
+    vertex_project = "test-project"
+    vertex_location = "us-central1"
+    base_url = "https://us-central1-aiplatform.googleapis.com"
+
+    target_url = construct_target_url(
+        base_url=base_url,
+        requested_route=url,
+        vertex_project=vertex_project,
+        vertex_location=vertex_location,
+    )
+
+    expected_url = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent"
+    assert str(target_url) == expected_url
+
+    # Test with /v1beta1/ prefix
+    url = "/v1beta1/publishers/google/models/gemini-pro:streamGenerateContent"
+
+    target_url = construct_target_url(
+        base_url=base_url,
+        requested_route=url,
+        vertex_project=vertex_project,
+        vertex_location=vertex_location,
+    )
+
+    expected_url = "https://us-central1-aiplatform.googleapis.com/v1beta1/projects/test-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent"
+    assert str(target_url) == expected_url
 
 
 def test_fix_enum_types():
@@ -862,7 +905,7 @@ def test_fix_enum_types():
             "truncateMode": {
                 "enum": ["auto", "none", "start", "end"],  # Kept - string type
                 "type": "string",
-                "description": "How to truncate content"
+                "description": "How to truncate content",
             },
             "maxLength": {  # enum removed
                 "type": "integer",
diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py
new file mode 100644
index 000000000000..ceb231eb4cb9
--- /dev/null
+++ b/tests/test_litellm/proxy/pass_through_endpoints/test_vertex_passthrough_load_balancing.py
@@ -0,0 +1,222 @@
+
+import pytest
+from unittest.mock import MagicMock, AsyncMock, patch
+from litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints import _base_vertex_proxy_route
+from litellm.types.router import DeploymentTypedDict
+
+@pytest.mark.asyncio
+async def test_vertex_passthrough_load_balancing():
+    """
+    Test that _base_vertex_proxy_route uses llm_router.get_available_deployment_for_pass_through
+    instead of get_model_list to ensure load balancing works with pass-through filtering.
+    """
+    # Setup mocks
+    mock_request = MagicMock()
+    mock_response = MagicMock()
+    mock_handler = MagicMock()
+
+    # Mock the router
+    mock_router = MagicMock()
+    mock_deployment = {
+        "litellm_params": {
+            "model": "vertex_ai/gemini-pro",
+            "vertex_project": "test-project-lb",
+            "vertex_location": "us-central1-lb",
+            "use_in_pass_through": True
+        }
+    }
+    mock_router.get_available_deployment_for_pass_through.return_value = mock_deployment
+
+    # Mock get_vertex_model_id_from_url to return a model ID
+    with patch("litellm.llms.vertex_ai.common_utils.get_vertex_model_id_from_url", return_value="gemini-pro"), \
+         patch("litellm.proxy.proxy_server.llm_router", mock_router), \
+         patch("litellm.llms.vertex_ai.common_utils.get_vertex_project_id_from_url", return_value=None), \
+         patch("litellm.llms.vertex_ai.common_utils.get_vertex_location_from_url", return_value=None), \
+         patch("litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.passthrough_endpoint_router") as mock_pt_router, \
+         patch("litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints._prepare_vertex_auth_headers", new_callable=AsyncMock) as mock_prep_headers, \
+         patch("litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.create_pass_through_route") as mock_create_route, \
+         patch("litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints.user_api_key_auth", new_callable=AsyncMock) as mock_auth:
+
+        # Setup additional mocks to avoid side effects
+        mock_pt_router.get_vertex_credentials.return_value = MagicMock()
+        mock_prep_headers.return_value = ({}, "https://test.url", False, "test-project-lb", "us-central1-lb")
+
+        mock_endpoint_func = AsyncMock()
+        mock_create_route.return_value = mock_endpoint_func
+        mock_auth.return_value = {}
+
+        # Execute
+        await _base_vertex_proxy_route(
+            endpoint="https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1/publishers/google/models/gemini-pro:streamGenerateContent",
+            request=mock_request,
+            fastapi_response=mock_response,
+            get_vertex_pass_through_handler=mock_handler
+        )
+
+        # Verify
+        # 1. Check that get_available_deployment_for_pass_through was called with the correct model ID
+        mock_router.get_available_deployment_for_pass_through.assert_called_once_with(model="gemini-pro")
+
+        # 2. Check that get_model_list was NOT called (this ensures we aren't doing the old logic)
+        mock_router.get_model_list.assert_not_called()
+
+        # 3. Verify that the project and location from the deployment were used (passed to _prepare_vertex_auth_headers)
+        # The args are: request, vertex_credentials, router_credentials, vertex_project, vertex_location, ...
+        # We check the 4th and 5th args (index 3 and 4)
+        call_args = mock_prep_headers.call_args
+        assert call_args[1]['vertex_project'] == "test-project-lb"
+        assert call_args[1]['vertex_location'] == "us-central1-lb"
+
+
+def test_get_available_deployment_for_pass_through_filters_correctly():
+    """
+    Test that get_available_deployment_for_pass_through filters deployments correctly
+    """
+    from litellm.router import Router
+
+    # Configure router with both pass-through and non-pass-through deployments
+    model_list = [
+        {
+            "model_name": "gemini-pro",
+            "litellm_params": {
+                "model": "vertex_ai/gemini-pro",
+                "vertex_project": "project-1",
+                "vertex_location": "us-central1",
+                "use_in_pass_through": True,  # Supports pass-through
+            }
+        },
+        {
+            "model_name": "gemini-pro",
+            "litellm_params": {
+                "model": "vertex_ai/gemini-pro",
+                "vertex_project": "project-2",
+                "vertex_location": "us-west1",
+                "use_in_pass_through": False,  # Does not support pass-through
+            }
+        },
+        {
+            "model_name": "gemini-pro",
+            "litellm_params": {
+                "model": "vertex_ai/gemini-pro",
+                "vertex_project": "project-3",
+                "vertex_location": "us-east1",
+                # use_in_pass_through not set (defaults to False)
+            }
+        },
+    ]
+
+    router = Router(model_list=model_list, routing_strategy="simple-shuffle")
+
+    # Test: Should only return project-1 (use_in_pass_through=True)
+    deployment = router.get_available_deployment_for_pass_through(model="gemini-pro")
+
+    assert deployment is not None
+    assert deployment["litellm_params"]["vertex_project"] == "project-1"
+    assert deployment["litellm_params"]["use_in_pass_through"] is True
+
+
+def test_get_available_deployment_for_pass_through_no_deployments():
+    """
+    Test that correct error is thrown when there are no pass-through deployments
+    """
+    import litellm
+    from litellm.router import Router
+
+    model_list = [
+        {
+            "model_name": "gemini-pro",
+            "litellm_params": {
+                "model": "vertex_ai/gemini-pro",
+                "vertex_project": "project-1",
+                "vertex_location": "us-central1",
+                "use_in_pass_through": False,  # Does not support pass-through
+            }
+        }
+    ]
+
+    router = Router(model_list=model_list)
+
+    # Should throw BadRequestError
+    with pytest.raises(litellm.BadRequestError) as exc_info:
+        router.get_available_deployment_for_pass_through(model="gemini-pro")
+
+    assert "use_in_pass_through=True" in str(exc_info.value)
+
+
+def test_get_available_deployment_for_pass_through_load_balancing():
+    """
+    Test load balancing for pass-through deployments
+    """
+    from litellm.router import Router
+
+    model_list = [
+        {
+            "model_name": "gemini-pro",
+            "litellm_params": {
+                "model": "vertex_ai/gemini-pro",
+                "vertex_project": "project-1",
+                "vertex_location": "us-central1",
+                "use_in_pass_through": True,
+                "rpm": 100,
+            }
+        },
+        {
+            "model_name": "gemini-pro",
+            "litellm_params": {
+                "model": "vertex_ai/gemini-pro",
+                "vertex_project": "project-2",
+                "vertex_location": "us-west1",
+                "use_in_pass_through": True,
+                "rpm": 200,  # Higher RPM should be selected more frequently
+            }
+        },
+    ]
+
+    router = Router(
+        model_list=model_list,
+        routing_strategy="simple-shuffle"
+    )
+
+    # Call multiple times and track selected deployments
+    selections = {"project-1": 0, "project-2": 0}
+    for _ in range(100):
+        deployment = router.get_available_deployment_for_pass_through(model="gemini-pro")
+        project = deployment["litellm_params"]["vertex_project"]
+        selections[project] += 1
+
+    # Due to rpm weight, project-2 should be selected more times
+    assert selections["project-2"] > selections["project-1"]
+
+
+@pytest.mark.asyncio
+async def test_async_get_available_deployment_for_pass_through():
+    """
+    Test the async version of get_available_deployment_for_pass_through
+    """
+    from litellm.router import Router
+
+    model_list = [
+        {
+            "model_name": "gemini-pro",
+            "litellm_params": {
+                "model": "vertex_ai/gemini-pro",
+                "vertex_project": "project-1",
+                "vertex_location": "us-central1",
+                "use_in_pass_through": True,
+            }
+        }
+    ]
+
+    router = Router(
+        model_list=model_list,
+        routing_strategy="simple-shuffle"
+    )
+
+    deployment = await router.async_get_available_deployment_for_pass_through(
+        model="gemini-pro",
+        request_kwargs={}
+    )
+
+    assert deployment is not None
+    assert deployment["litellm_params"]["use_in_pass_through"] is True
+