diff --git a/go1/tools/low_vram/__init__.py b/go1/tools/low_vram/__init__.py
new file mode 100644
index 0000000..5fe7d2a
--- /dev/null
+++ b/go1/tools/low_vram/__init__.py
@@ -0,0 +1,45 @@
+"""
+Low-VRAM Training Framework for GO-1
+=====================================
+
+Clean Architecture Package Structure:
+
+    low_vram/
+    ├── core/                    # Domain Layer (Pure business logic)
+    │   ├── interfaces/          # Abstract interfaces (Dependency Inversion)
+    │   ├── entities/            # Domain entities
+    │   └── use_cases/           # Application business rules
+    ├── adapters/                # Interface Adapters Layer
+    │   ├── memory/              # Memory management implementations
+    │   └── training/            # Training strategy implementations
+    └── infrastructure/          # Frameworks & Drivers Layer
+        ├── pytorch/             # PyTorch-specific implementations
+        └── config/              # Configuration handling
+
+Design Principles Applied:
+    - SOLID Principles (especially Dependency Inversion)
+    - Clean Architecture (layered separation)
+    - Single Responsibility (one class = one job)
+    - Interface Segregation (small, focused interfaces)
+    - DRY (shared utilities extracted)
+
+Security Considerations (CIA Triad):
+    - Integrity: Checksum validation for cached features
+    - Availability: Graceful degradation when memory is low
+    - Confidentiality: No sensitive data logged
+"""
+
+__version__ = "0.1.0"
+__author__ = "AgiBot-World Contributors"
+
+from go1.tools.low_vram.core.interfaces import (
+    MemoryManager,
+    TrainingStrategy,
+    FeatureCache,
+)
+
+__all__ = [
+    "MemoryManager",
+    "TrainingStrategy", 
+    "FeatureCache",
+]
diff --git a/go1/tools/low_vram/adapters/__init__.py b/go1/tools/low_vram/adapters/__init__.py
new file mode 100644
index 0000000..fb32ef6
--- /dev/null
+++ b/go1/tools/low_vram/adapters/__init__.py
@@ -0,0 +1,23 @@
+"""
+Adapters Module - Interface Adapter Layer
+==========================================
+
+Implements concrete classes for the core interfaces.
+This layer converts between the domain layer and infrastructure.
+"""
+
+from go1.tools.low_vram.adapters.memory_manager import TorchMemoryManager
+from go1.tools.low_vram.adapters.training_strategy import (
+    LowVRAMTrainingStrategy,
+    GradientAccumulationMixin,
+)
+from go1.tools.low_vram.adapters.feature_cache import DiskFeatureCache
+from go1.tools.low_vram.adapters.model_freezer import ComponentFreezer
+
+__all__ = [
+    "TorchMemoryManager",
+    "LowVRAMTrainingStrategy",
+    "GradientAccumulationMixin",
+    "DiskFeatureCache",
+    "ComponentFreezer",
+]
diff --git a/go1/tools/low_vram/adapters/feature_cache.py b/go1/tools/low_vram/adapters/feature_cache.py
new file mode 100644
index 0000000..21ea0bc
--- /dev/null
+++ b/go1/tools/low_vram/adapters/feature_cache.py
@@ -0,0 +1,213 @@
+"""
+Feature Cache Implementation
+=============================
+
+Disk-based cache for pre-computed vision features.
+Security (CIA):
+    - Integrity: SHA256 checksum validation
+    - Availability: Graceful handling of corrupted files
+"""
+
+import hashlib
+import json
+import logging
+import os
+import shutil
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import torch
+from torch import Tensor
+
+from go1.tools.low_vram.core.interfaces import FeatureCache
+
+logger = logging.getLogger(__name__)
+
+
+def compute_checksum(tensor: Tensor) -> str:
+    """Compute SHA256 checksum of tensor data for integrity verification."""
+    # Convert to bytes and hash
+    data = tensor.detach().cpu().numpy().tobytes()
+    return hashlib.sha256(data).hexdigest()[:16]  # First 16 chars
+
+
+class DiskFeatureCache(FeatureCache):
+    """
+    Disk-based feature cache with integrity verification.
+    
+    Single Responsibility: Feature storage/retrieval only.
+    
+    File Structure:
+        cache_dir/
+        ├── features/
+        │   ├── {key}.pt          # Tensor files
+        │   └── ...
+        ├── metadata.json         # Cache metadata
+        └── checksums.json        # Integrity checksums
+    """
+    
+    def __init__(self, cache_dir: str, verify_on_load: bool = True):
+        """
+        Initialize disk cache.
+        
+        Args:
+            cache_dir: Directory for cache storage
+            verify_on_load: Whether to verify checksums on retrieval
+        """
+        self._cache_dir = Path(cache_dir)
+        self._features_dir = self._cache_dir / "features"
+        self._verify_on_load = verify_on_load
+        
+        # Statistics
+        self._hits = 0
+        self._misses = 0
+        
+        # Create directories
+        self._features_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Load or create checksums
+        self._checksums_path = self._cache_dir / "checksums.json"
+        self._checksums: Dict[str, str] = self._load_checksums()
+        
+        logger.info(f"Feature cache initialized at {cache_dir}")
+    
+    def _load_checksums(self) -> Dict[str, str]:
+        """Load checksums from disk."""
+        if self._checksums_path.exists():
+            try:
+                with open(self._checksums_path, "r") as f:
+                    return json.load(f)
+            except (json.JSONDecodeError, IOError):
+                logger.warning("Corrupted checksums file, starting fresh")
+                return {}
+        return {}
+    
+    def _save_checksums(self) -> None:
+        """Save checksums to disk."""
+        with open(self._checksums_path, "w") as f:
+            json.dump(self._checksums, f, indent=2)
+    
+    def _get_path(self, key: str) -> Path:
+        """Get file path for a key (sanitize key for filesystem)."""
+        # Sanitize key to be filesystem-safe
+        safe_key = key.replace("/", "_").replace("\\", "_").replace(":", "_")
+        return self._features_dir / f"{safe_key}.pt"
+    
+    def store(
+        self,
+        key: str,
+        features: Tensor,
+        checksum: Optional[str] = None
+    ) -> None:
+        """
+        Store features to disk with optional integrity checksum.
+        
+        Args:
+            key: Unique identifier
+            features: Tensor to store
+            checksum: Optional pre-computed checksum (computes if None)
+        """
+        path = self._get_path(key)
+        
+        # Compute checksum if not provided
+        if checksum is None:
+            checksum = compute_checksum(features)
+        
+        # Save tensor
+        torch.save(features.detach().cpu(), path)
+        
+        # Save checksum
+        self._checksums[key] = checksum
+        self._save_checksums()
+        
+        logger.debug(f"Cached '{key}': {features.shape}, checksum={checksum}")
+    
+    def retrieve(
+        self,
+        key: str,
+        verify_checksum: bool = True
+    ) -> Optional[Tensor]:
+        """
+        Retrieve features from cache.
+        
+        Args:
+            key: Identifier used during storage
+            verify_checksum: Whether to verify integrity
+            
+        Returns:
+            Cached tensor or None if not found/corrupted
+        """
+        path = self._get_path(key)
+        
+        if not path.exists():
+            self._misses += 1
+            return None
+        
+        try:
+            features = torch.load(path, weights_only=True)
+        except Exception as e:
+            logger.error(f"Failed to load cached features '{key}': {e}")
+            self._misses += 1
+            return None
+        
+        # Verify integrity
+        if verify_checksum and self._verify_on_load:
+            expected = self._checksums.get(key)
+            if expected is not None:
+                actual = compute_checksum(features)
+                if actual != expected:
+                    logger.error(
+                        f"Checksum mismatch for '{key}': "
+                        f"expected {expected}, got {actual}. "
+                        "Data may be corrupted!"
+                    )
+                    self._misses += 1
+                    return None
+        
+        self._hits += 1
+        return features
+    
+    def exists(self, key: str) -> bool:
+        """Check if features exist in cache without loading."""
+        return self._get_path(key).exists()
+    
+    def clear(self) -> None:
+        """Clear all cached features."""
+        if self._features_dir.exists():
+            shutil.rmtree(self._features_dir)
+            self._features_dir.mkdir(parents=True, exist_ok=True)
+        
+        self._checksums.clear()
+        self._save_checksums()
+        
+        self._hits = 0
+        self._misses = 0
+        
+        logger.info("Feature cache cleared")
+    
+    def get_stats(self) -> Dict[str, Any]:
+        """Get cache statistics."""
+        total_size = sum(
+            f.stat().st_size for f in self._features_dir.glob("*.pt")
+        ) if self._features_dir.exists() else 0
+        
+        return {
+            "hits": self._hits,
+            "misses": self._misses,
+            "hit_rate": self._hits / max(1, self._hits + self._misses),
+            "cached_items": len(list(self._features_dir.glob("*.pt"))),
+            "total_size_mb": total_size / (1024 * 1024),
+            "cache_dir": str(self._cache_dir),
+        }
+    
+    def __repr__(self) -> str:
+        stats = self.get_stats()
+        return (
+            f"DiskFeatureCache("
+            f"items={stats['cached_items']}, "
+            f"size={stats['total_size_mb']:.1f}MB, "
+            f"hit_rate={stats['hit_rate']:.1%})"
+        )
+
+
+__all__ = ["DiskFeatureCache", "compute_checksum"]
diff --git a/go1/tools/low_vram/adapters/memory_manager.py b/go1/tools/low_vram/adapters/memory_manager.py
new file mode 100644
index 0000000..b1512ed
--- /dev/null
+++ b/go1/tools/low_vram/adapters/memory_manager.py
@@ -0,0 +1,271 @@
+"""
+Memory Manager Implementation
+==============================
+
+Concrete implementation of MemoryManager interface.
+Single Responsibility: GPU/CPU memory management only.
+
+Security (CIA):
+    - Availability: Prevents OOM by proactive monitoring
+    - Integrity: Validates tensor state after transfers
+"""
+
+import gc
+import logging
+import time
+from typing import Dict, Optional
+
+import torch
+from torch import Tensor
+
+from go1.tools.low_vram.core.interfaces import (
+    MemoryConfig,
+    MemoryManager,
+    MemorySnapshot,
+    MemoryTier,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class TorchMemoryManager(MemoryManager):
+    """
+    PyTorch-based memory manager with CPU offloading support.
+    
+    Follows Single Responsibility: Only manages memory, no training logic.
+    Follows Open/Closed: Extend by subclassing, don't modify.
+    
+    Attributes:
+        config: Immutable memory configuration
+        _offloaded: Dict of tensors offloaded to CPU
+        _pinned: Whether to use pinned memory for CPU tensors
+    """
+    
+    def __init__(self, config: MemoryConfig):
+        """
+        Initialize memory manager.
+        
+        Args:
+            config: Frozen MemoryConfig instance
+        """
+        self._config = config
+        self._offloaded: Dict[str, Tensor] = {}
+        self._offload_metadata: Dict[str, Dict] = {}
+        
+        # Set memory limits if GPU available
+        if torch.cuda.is_available() and config.max_gpu_memory_mb > 0:
+            # Reserve some memory for system operations
+            fraction = config.max_gpu_memory_mb / (
+                torch.cuda.get_device_properties(0).total_memory / (1024 * 1024)
+            )
+            fraction = min(0.95, fraction)  # Never use more than 95%
+            torch.cuda.set_per_process_memory_fraction(fraction)
+            logger.info(f"GPU memory fraction set to {fraction:.2%}")
+    
+    @property
+    def config(self) -> MemoryConfig:
+        """Return immutable config (no setter - prevents tampering)."""
+        return self._config
+    
+    def get_snapshot(self) -> MemorySnapshot:
+        """
+        Get current memory state without side effects.
+        
+        This is a pure query - no state modification.
+        """
+        if torch.cuda.is_available():
+            gpu_allocated = torch.cuda.memory_allocated() / (1024 * 1024)
+            gpu_reserved = torch.cuda.memory_reserved() / (1024 * 1024)
+            gpu_max = torch.cuda.get_device_properties(0).total_memory / (1024 * 1024)
+        else:
+            gpu_allocated = gpu_reserved = gpu_max = 0.0
+        
+        # Estimate CPU memory used by offloaded tensors
+        cpu_used = sum(
+            t.numel() * t.element_size() / (1024 * 1024)
+            for t in self._offloaded.values()
+        )
+        
+        return MemorySnapshot(
+            gpu_allocated_mb=gpu_allocated,
+            gpu_reserved_mb=gpu_reserved,
+            gpu_max_mb=gpu_max,
+            cpu_used_mb=cpu_used,
+            timestamp=time.time(),
+        )
+    
+    def can_allocate(self, size_bytes: int) -> bool:
+        """
+        Check if allocation is possible without actually allocating.
+        
+        Pure function - no side effects.
+        """
+        if not torch.cuda.is_available():
+            return True  # CPU allocation always possible (simplified)
+        
+        snapshot = self.get_snapshot()
+        size_mb = size_bytes / (1024 * 1024)
+        available = self._config.max_gpu_memory_mb - snapshot.gpu_allocated_mb
+        
+        # Add safety margin
+        safety_margin = 50  # MB
+        return size_mb < (available - safety_margin)
+    
+    def offload_to_cpu(self, tensor: Tensor, name: str) -> Tensor:
+        """
+        Offload tensor to CPU memory.
+        
+        Args:
+            tensor: GPU tensor to offload
+            name: Unique identifier
+            
+        Returns:
+            CPU tensor (pinned if configured)
+            
+        Raises:
+            ValueError: If name already exists (prevents silent overwrite)
+        """
+        if name in self._offloaded:
+            raise ValueError(
+                f"Tensor '{name}' already offloaded. "
+                "Use unique names or restore first. (Integrity protection)"
+            )
+        
+        # Store metadata for validation
+        self._offload_metadata[name] = {
+            "shape": tuple(tensor.shape),
+            "dtype": tensor.dtype,
+            "device": str(tensor.device),
+            "numel": tensor.numel(),
+        }
+        
+        # Transfer to CPU
+        if self._config.pin_cpu_memory and torch.cuda.is_available():
+            cpu_tensor = tensor.detach().cpu().pin_memory()
+        else:
+            cpu_tensor = tensor.detach().cpu()
+        
+        self._offloaded[name] = cpu_tensor
+        
+        logger.debug(
+            f"Offloaded '{name}': {tensor.shape} "
+            f"({tensor.numel() * tensor.element_size() / 1024:.1f} KB)"
+        )
+        
+        return cpu_tensor
+    
+    def restore_to_gpu(self, name: str, device: torch.device) -> Tensor:
+        """
+        Restore tensor from CPU to GPU.
+        
+        Args:
+            name: Identifier used during offload
+            device: Target GPU device
+            
+        Returns:
+            GPU tensor
+            
+        Raises:
+            KeyError: If name not found
+            RuntimeError: If tensor integrity check fails
+        """
+        if name not in self._offloaded:
+            raise KeyError(f"No offloaded tensor found with name '{name}'")
+        
+        cpu_tensor = self._offloaded.pop(name)
+        metadata = self._offload_metadata.pop(name)
+        
+        # Integrity check
+        if cpu_tensor.numel() != metadata["numel"]:
+            raise RuntimeError(
+                f"Tensor integrity check failed for '{name}'. "
+                f"Expected {metadata['numel']} elements, got {cpu_tensor.numel()}"
+            )
+        
+        # Transfer to GPU
+        gpu_tensor = cpu_tensor.to(device, non_blocking=True)
+        
+        logger.debug(f"Restored '{name}' to {device}")
+        
+        return gpu_tensor
+    
+    def clear_cache(self) -> None:
+        """Clear GPU cache and trigger garbage collection."""
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+        logger.debug("Cache cleared")
+    
+    def get_offloaded_count(self) -> int:
+        """Get number of currently offloaded tensors."""
+        return len(self._offloaded)
+    
+    def get_offloaded_memory_mb(self) -> float:
+        """Get total memory used by offloaded tensors in MB."""
+        return sum(
+            t.numel() * t.element_size() / (1024 * 1024)
+            for t in self._offloaded.values()
+        )
+    
+    def __repr__(self) -> str:
+        snapshot = self.get_snapshot()
+        return (
+            f"TorchMemoryManager("
+            f"gpu={snapshot.gpu_allocated_mb:.1f}/{snapshot.gpu_max_mb:.1f}MB, "
+            f"offloaded={self.get_offloaded_count()} tensors)"
+        )
+
+
+class MemoryGuard:
+    """
+    Context manager for memory-safe operations.
+    
+    Usage:
+        with MemoryGuard(manager, min_free_mb=500):
+            # Operations that need memory
+            pass
+    """
+    
+    def __init__(
+        self,
+        manager: MemoryManager,
+        min_free_mb: float = 100,
+        auto_clear: bool = True
+    ):
+        self._manager = manager
+        self._min_free_mb = min_free_mb
+        self._auto_clear = auto_clear
+        self._initial_snapshot: Optional[MemorySnapshot] = None
+    
+    def __enter__(self) -> "MemoryGuard":
+        self._initial_snapshot = self._manager.get_snapshot()
+        
+        # Pre-emptively clear cache if low on memory
+        available = (
+            self._initial_snapshot.gpu_max_mb - 
+            self._initial_snapshot.gpu_allocated_mb
+        )
+        if available < self._min_free_mb:
+            self._manager.clear_cache()
+            logger.warning(f"Pre-emptive cache clear: {available:.1f}MB available")
+        
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb) -> bool:
+        if self._auto_clear:
+            self._manager.clear_cache()
+        
+        # Log memory change
+        final_snapshot = self._manager.get_snapshot()
+        delta = (
+            final_snapshot.gpu_allocated_mb - 
+            self._initial_snapshot.gpu_allocated_mb
+        )
+        if abs(delta) > 10:  # Only log significant changes
+            logger.debug(f"Memory delta: {delta:+.1f}MB")
+        
+        return False  # Don't suppress exceptions
+
+
+__all__ = ["TorchMemoryManager", "MemoryGuard"]
diff --git a/go1/tools/low_vram/adapters/model_freezer.py b/go1/tools/low_vram/adapters/model_freezer.py
new file mode 100644
index 0000000..e4a00c3
--- /dev/null
+++ b/go1/tools/low_vram/adapters/model_freezer.py
@@ -0,0 +1,177 @@
+"""
+Model Freezer Implementation
+=============================
+
+Handles selective freezing/unfreezing of model components.
+Single Responsibility: Parameter freeze state management only.
+"""
+
+import logging
+from typing import Dict, List, Optional, Set
+
+import torch
+from torch import nn
+
+from go1.tools.low_vram.core.interfaces import ModelFreezer
+
+logger = logging.getLogger(__name__)
+
+
+class ComponentFreezer(ModelFreezer):
+    """
+    Smart model component freezer with named component support.
+    
+    Single Responsibility: Only handles freeze/unfreeze operations.
+    Open/Closed: Add new component patterns without modifying core logic.
+    """
+    
+    # Default component patterns for GO-1 model
+    GO1_COMPONENTS = {
+        "vision": ["vision_model"],
+        "language": ["language_model"],
+        "action_expert": ["action_model"],
+        "latent_planner": ["latent_planner"],
+        "adapters": ["mlp1", "k_proj", "v_proj", "state_adaptor", "action_adaptor"],
+        "embedders": ["time_embedder", "freq_embedder"],
+        "final": ["final_layer"],
+    }
+    
+    def __init__(self, component_patterns: Optional[Dict[str, List[str]]] = None):
+        """
+        Initialize freezer with component patterns.
+        
+        Args:
+            component_patterns: Mapping of component names to module name patterns.
+                              Defaults to GO1_COMPONENTS.
+        """
+        self._patterns = component_patterns or self.GO1_COMPONENTS.copy()
+        self._frozen_components: Set[str] = set()
+    
+    def _get_modules_for_component(
+        self,
+        model: nn.Module,
+        component_name: str
+    ) -> List[nn.Module]:
+        """Get all modules matching a component name."""
+        if component_name not in self._patterns:
+            # Try direct attribute access
+            if hasattr(model, component_name):
+                return [getattr(model, component_name)]
+            raise ValueError(f"Unknown component: {component_name}")
+        
+        modules = []
+        for pattern in self._patterns[component_name]:
+            if hasattr(model, pattern):
+                modules.append(getattr(model, pattern))
+        
+        return modules
+    
+    def freeze_component(self, model: nn.Module, component_name: str) -> int:
+        """
+        Freeze a model component by name.
+        
+        Args:
+            model: The model containing the component
+            component_name: Name of the component to freeze
+            
+        Returns:
+            Number of parameters frozen
+        """
+        modules = self._get_modules_for_component(model, component_name)
+        
+        frozen_count = 0
+        for module in modules:
+            for param in module.parameters():
+                if param.requires_grad:
+                    param.requires_grad = False
+                    frozen_count += param.numel()
+        
+        if frozen_count > 0:
+            self._frozen_components.add(component_name)
+            logger.info(
+                f"Frozen '{component_name}': {frozen_count:,} parameters "
+                f"({frozen_count * 4 / 1e6:.1f}MB saved)"
+            )
+        
+        return frozen_count
+    
+    def unfreeze_component(self, model: nn.Module, component_name: str) -> int:
+        """
+        Unfreeze a model component by name.
+        
+        Args:
+            model: The model containing the component
+            component_name: Name of the component to unfreeze
+            
+        Returns:
+            Number of parameters unfrozen
+        """
+        modules = self._get_modules_for_component(model, component_name)
+        
+        unfrozen_count = 0
+        for module in modules:
+            for param in module.parameters():
+                if not param.requires_grad:
+                    param.requires_grad = True
+                    unfrozen_count += param.numel()
+        
+        if unfrozen_count > 0:
+            self._frozen_components.discard(component_name)
+            logger.info(f"Unfrozen '{component_name}': {unfrozen_count:,} parameters")
+        
+        return unfrozen_count
+    
+    def get_frozen_params(self, model: nn.Module) -> int:
+        """Get total number of frozen parameters."""
+        return sum(
+            p.numel() for p in model.parameters() if not p.requires_grad
+        )
+    
+    def get_trainable_params(self, model: nn.Module) -> int:
+        """Get total number of trainable parameters."""
+        return sum(
+            p.numel() for p in model.parameters() if p.requires_grad
+        )
+    
+    def freeze_for_low_vram(self, model: nn.Module) -> Dict[str, int]:
+        """
+        Apply recommended freeze configuration for 4GB GPU.
+        
+        Freezes vision and language models, keeps action expert trainable.
+        
+        Returns:
+            Dict of component names to frozen param counts
+        """
+        results = {}
+        
+        # Freeze heavy components
+        for component in ["vision", "language"]:
+            try:
+                results[component] = self.freeze_component(model, component)
+            except ValueError:
+                logger.warning(f"Component '{component}' not found in model")
+        
+        # Log summary
+        total_frozen = sum(results.values())
+        trainable = self.get_trainable_params(model)
+        
+        logger.info(
+            f"Low-VRAM freeze complete: "
+            f"{total_frozen:,} frozen, {trainable:,} trainable "
+            f"(~{trainable * 4 / 1e6:.1f}MB for gradients)"
+        )
+        
+        return results
+    
+    def get_frozen_components(self) -> Set[str]:
+        """Get set of currently frozen component names."""
+        return self._frozen_components.copy()
+    
+    def __repr__(self) -> str:
+        return (
+            f"ComponentFreezer(frozen={list(self._frozen_components)}, "
+            f"patterns={list(self._patterns.keys())})"
+        )
+
+
+__all__ = ["ComponentFreezer"]
diff --git a/go1/tools/low_vram/adapters/training_strategy.py b/go1/tools/low_vram/adapters/training_strategy.py
new file mode 100644
index 0000000..07f4170
--- /dev/null
+++ b/go1/tools/low_vram/adapters/training_strategy.py
@@ -0,0 +1,278 @@
+"""
+Training Strategy Implementation
+=================================
+
+Implements low-VRAM training with gradient accumulation and mixed precision.
+Single Responsibility: Training step logic only.
+
+Design Patterns:
+    - Strategy Pattern: Swappable training strategies
+    - Template Method: Common training flow, customizable steps
+"""
+
+import logging
+from typing import Dict, Optional, Tuple
+
+import torch
+from torch import Tensor
+from torch.cuda.amp import GradScaler, autocast
+
+from go1.tools.low_vram.core.interfaces import (
+    TrainingConfig,
+    TrainingStrategy,
+    MemoryManager,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class GradientAccumulationMixin:
+    """
+    Mixin for gradient accumulation logic.
+    
+    Follows DRY: Shared logic extracted into reusable mixin.
+    """
+    
+    def _should_accumulate_impl(
+        self,
+        step: int,
+        accumulation_steps: int
+    ) -> bool:
+        """
+        Determine if we should accumulate gradients (not step optimizer).
+        
+        Pure function - no side effects.
+        """
+        return (step + 1) % accumulation_steps != 0
+    
+    def _scale_loss(self, loss: Tensor, accumulation_steps: int) -> Tensor:
+        """Scale loss for gradient accumulation."""
+        return loss / accumulation_steps
+
+
+class LowVRAMTrainingStrategy(TrainingStrategy, GradientAccumulationMixin):
+    """
+    Memory-efficient training strategy for 4-8GB GPUs.
+    
+    Features:
+        - Gradient accumulation (effective batch size with micro batches)
+        - Mixed precision (FP16 forward, FP32 gradients)
+        - Gradient checkpointing support
+        - Optional CPU offloading of optimizer states
+    
+    Single Responsibility: Only handles training step execution.
+    Does NOT manage memory directly (uses MemoryManager interface).
+    """
+    
+    def __init__(
+        self,
+        config: TrainingConfig,
+        memory_manager: Optional[MemoryManager] = None,
+        device: Optional[torch.device] = None,
+    ):
+        """
+        Initialize training strategy.
+        
+        Args:
+            config: Frozen training configuration
+            memory_manager: Optional memory manager for logging
+            device: Target device (defaults to cuda if available)
+        """
+        self._config = config
+        self._memory_manager = memory_manager
+        self._device = device or (
+            torch.device("cuda") if torch.cuda.is_available()
+            else torch.device("cpu")
+        )
+        
+        # Mixed precision scaler
+        self._scaler: Optional[GradScaler] = None
+        self._use_amp = torch.cuda.is_available()
+        
+        # Metrics tracking
+        self._step_count = 0
+        self._accumulated_loss = 0.0
+        
+    def _initialize_scaler(self) -> None:
+        """Lazy initialization of GradScaler (allows serialization)."""
+        if self._scaler is None and self._use_amp:
+            self._scaler = GradScaler()
+    
+    def get_config(self) -> TrainingConfig:
+        """Return immutable training configuration."""
+        return self._config
+    
+    def should_accumulate(self, step: int) -> bool:
+        """Check if gradients should be accumulated (no optimizer step)."""
+        return self._should_accumulate_impl(
+            step,
+            self._config.gradient_accumulation_steps
+        )
+    
+    def training_step(
+        self,
+        model: torch.nn.Module,
+        batch: Dict[str, Tensor],
+        step: int,
+    ) -> Tuple[Tensor, Dict[str, float]]:
+        """
+        Execute one training step with low-VRAM optimizations.
+        
+        Args:
+            model: The model to train (must be in train mode)
+            batch: Input batch with tensors on correct device
+            step: Current global step number
+            
+        Returns:
+            Tuple of (scaled loss tensor for backward, metrics dict)
+            
+        Note:
+            Caller is responsible for:
+            - Calling optimizer.step() when should_accumulate returns False
+            - Calling optimizer.zero_grad() after optimizer.step()
+        """
+        self._initialize_scaler()
+        
+        model.train()
+        metrics: Dict[str, float] = {}
+        
+        # Move batch to device (no-op if already there)
+        batch = self._move_batch_to_device(batch)
+        
+        # Mixed precision forward pass
+        if self._use_amp:
+            with autocast():
+                loss, model_metrics = self._forward_pass(model, batch)
+        else:
+            loss, model_metrics = self._forward_pass(model, batch)
+        
+        metrics.update(model_metrics)
+        
+        # Scale loss for gradient accumulation
+        scaled_loss = self._scale_loss(
+            loss,
+            self._config.gradient_accumulation_steps
+        )
+        
+        # Backward pass with mixed precision
+        if self._use_amp and self._scaler is not None:
+            self._scaler.scale(scaled_loss).backward()
+        else:
+            scaled_loss.backward()
+        
+        # Track metrics
+        self._accumulated_loss += loss.detach().item()
+        self._step_count += 1
+        
+        metrics["loss"] = loss.detach().item()
+        metrics["scaled_loss"] = scaled_loss.detach().item()
+        
+        # Add memory metrics if manager available
+        if self._memory_manager is not None:
+            snapshot = self._memory_manager.get_snapshot()
+            metrics["gpu_memory_mb"] = snapshot.gpu_allocated_mb
+        
+        return scaled_loss, metrics
+    
+    def _move_batch_to_device(self, batch: Dict[str, Tensor]) -> Dict[str, Tensor]:
+        """Move batch tensors to target device."""
+        return {
+            key: value.to(self._device, non_blocking=True)
+            if isinstance(value, Tensor) else value
+            for key, value in batch.items()
+        }
+    
+    def _forward_pass(
+        self,
+        model: torch.nn.Module,
+        batch: Dict[str, Tensor],
+    ) -> Tuple[Tensor, Dict[str, float]]:
+        """
+        Execute forward pass and compute loss.
+        
+        Override this method for custom forward logic.
+        Template Method Pattern: Default implementation, customizable.
+        """
+        # Extract inputs from batch (customize based on model interface)
+        outputs = model(**batch)
+        
+        # Handle different output formats
+        if isinstance(outputs, tuple):
+            loss = outputs[0]
+            metrics = {"action_loss": loss.item()} if hasattr(loss, 'item') else {}
+        elif hasattr(outputs, "loss"):
+            loss = outputs.loss
+            metrics = {}
+            if hasattr(outputs, "action_loss") and outputs.action_loss is not None:
+                metrics["action_loss"] = outputs.action_loss.item()
+        else:
+            loss = outputs
+            metrics = {}
+        
+        return loss, metrics
+    
+    def optimizer_step(
+        self,
+        optimizer: torch.optim.Optimizer,
+        scheduler: Optional[torch.optim.lr_scheduler._LRScheduler] = None,
+        max_grad_norm: Optional[float] = 1.0,
+    ) -> Dict[str, float]:
+        """
+        Execute optimizer step with mixed precision support.
+        
+        Args:
+            optimizer: The optimizer
+            scheduler: Optional learning rate scheduler
+            max_grad_norm: Max gradient norm for clipping (None to disable)
+            
+        Returns:
+            Metrics dict with gradient info
+        """
+        metrics: Dict[str, float] = {}
+        
+        # Gradient clipping
+        if max_grad_norm is not None:
+            if self._use_amp and self._scaler is not None:
+                self._scaler.unscale_(optimizer)
+            
+            total_norm = torch.nn.utils.clip_grad_norm_(
+                filter(lambda p: p.grad is not None, 
+                       filter(lambda p: p.requires_grad, 
+                              optimizer.param_groups[0]['params'])),
+                max_grad_norm
+            )
+            metrics["grad_norm"] = total_norm.item() if hasattr(total_norm, 'item') else total_norm
+        
+        # Optimizer step
+        if self._use_amp and self._scaler is not None:
+            self._scaler.step(optimizer)
+            self._scaler.update()
+        else:
+            optimizer.step()
+        
+        # Scheduler step
+        if scheduler is not None:
+            scheduler.step()
+            metrics["learning_rate"] = scheduler.get_last_lr()[0]
+        
+        # Zero gradients
+        optimizer.zero_grad(set_to_none=True)  # More memory efficient
+        
+        # Calculate average loss over accumulation steps
+        if self._step_count > 0:
+            metrics["avg_loss"] = self._accumulated_loss / self._step_count
+            self._accumulated_loss = 0.0
+            self._step_count = 0
+        
+        return metrics
+    
+    def __repr__(self) -> str:
+        return (
+            f"LowVRAMTrainingStrategy("
+            f"batch_size={self._config.batch_size}, "
+            f"accum_steps={self._config.gradient_accumulation_steps}, "
+            f"amp={self._use_amp})"
+        )
+
+
+__all__ = ["LowVRAMTrainingStrategy", "GradientAccumulationMixin"]
diff --git a/go1/tools/low_vram/core/__init__.py b/go1/tools/low_vram/core/__init__.py
new file mode 100644
index 0000000..5d78096
--- /dev/null
+++ b/go1/tools/low_vram/core/__init__.py
@@ -0,0 +1,29 @@
+"""Core module - Domain layer with pure business logic."""
+
+from go1.tools.low_vram.core.interfaces import (
+    MemoryTier,
+    TrainingPhase,
+    MemoryConfig,
+    MemorySnapshot,
+    TrainingConfig,
+    MemoryManager,
+    TrainingStrategy,
+    FeatureCache,
+    ModelFreezer,
+    ProgressReporter,
+    TrainerFactory,
+)
+
+__all__ = [
+    "MemoryTier",
+    "TrainingPhase",
+    "MemoryConfig",
+    "MemorySnapshot",
+    "TrainingConfig",
+    "MemoryManager",
+    "TrainingStrategy",
+    "FeatureCache",
+    "ModelFreezer",
+    "ProgressReporter",
+    "TrainerFactory",
+]
diff --git a/go1/tools/low_vram/core/interfaces.py b/go1/tools/low_vram/core/interfaces.py
new file mode 100644
index 0000000..f083343
--- /dev/null
+++ b/go1/tools/low_vram/core/interfaces.py
@@ -0,0 +1,348 @@
+"""
+Core Interfaces for Low-VRAM Training
+======================================
+
+Following Interface Segregation Principle (ISP):
+Each interface is small and focused on a single responsibility.
+
+Following Dependency Inversion Principle (DIP):
+High-level modules depend on these abstractions, not concrete implementations.
+"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from enum import Enum, auto
+from typing import Dict, Iterator, Optional, Tuple, Any
+import torch
+from torch import Tensor
+
+
+# =============================================================================
+# ENUMS - Type-safe configuration options
+# =============================================================================
+
+class MemoryTier(Enum):
+    """Memory tier for offloading decisions."""
+    GPU_HIGH_PRIORITY = auto()  # Stay on GPU always
+    GPU_LOW_PRIORITY = auto()   # On GPU, but can be offloaded
+    CPU_PINNED = auto()          # On CPU with pinned memory
+    CPU_STANDARD = auto()        # On CPU standard memory
+    DISK = auto()                # Offloaded to disk (extreme low memory)
+
+
+class TrainingPhase(Enum):
+    """Current phase of training for strategy selection."""
+    WARMUP = auto()
+    TRAINING = auto()
+    VALIDATION = auto()
+    CHECKPOINTING = auto()
+
+
+# =============================================================================
+# DATA CLASSES - Immutable configuration and state (no side effects)
+# =============================================================================
+
+@dataclass(frozen=True)
+class MemoryConfig:
+    """
+    Immutable configuration for memory management.
+    
+    Frozen dataclass ensures no tampering after creation (Integrity).
+    """
+    max_gpu_memory_mb: int
+    max_cpu_memory_mb: int
+    enable_cpu_offload: bool = True
+    enable_disk_offload: bool = False
+    gradient_checkpointing: bool = True
+    mixed_precision: bool = True
+    pin_cpu_memory: bool = True
+
+
+@dataclass(frozen=True)
+class MemorySnapshot:
+    """Immutable snapshot of current memory state."""
+    gpu_allocated_mb: float
+    gpu_reserved_mb: float
+    gpu_max_mb: float
+    cpu_used_mb: float
+    timestamp: float
+
+
+@dataclass(frozen=True)
+class TrainingConfig:
+    """Immutable training configuration."""
+    batch_size: int = 1
+    gradient_accumulation_steps: int = 16
+    learning_rate: float = 1e-4
+    max_epochs: int = 10
+    warmup_steps: int = 100
+    freeze_vision: bool = True
+    freeze_llm: bool = True
+    train_action_expert: bool = True
+
+
+# =============================================================================
+# ABSTRACT INTERFACES - Dependency Inversion Principle
+# =============================================================================
+
+class MemoryManager(ABC):
+    """
+    Interface for GPU/CPU memory management.
+    
+    Single Responsibility: Only manages memory allocation and offloading.
+    Does NOT handle training logic or model operations.
+    """
+    
+    @abstractmethod
+    def get_snapshot(self) -> MemorySnapshot:
+        """Get current memory state without side effects."""
+        pass
+    
+    @abstractmethod
+    def can_allocate(self, size_bytes: int) -> bool:
+        """Check if allocation is possible without allocating."""
+        pass
+    
+    @abstractmethod
+    def offload_to_cpu(self, tensor: Tensor, name: str) -> Tensor:
+        """
+        Offload tensor to CPU, return CPU tensor.
+        
+        Args:
+            tensor: GPU tensor to offload
+            name: Identifier for later retrieval
+            
+        Returns:
+            CPU tensor (pinned if configured)
+        """
+        pass
+    
+    @abstractmethod
+    def restore_to_gpu(self, name: str, device: torch.device) -> Tensor:
+        """
+        Restore tensor from CPU to GPU.
+        
+        Args:
+            name: Identifier used during offload
+            device: Target GPU device
+            
+        Returns:
+            GPU tensor
+        """
+        pass
+    
+    @abstractmethod
+    def clear_cache(self) -> None:
+        """Clear GPU cache and garbage collect."""
+        pass
+
+
+class TrainingStrategy(ABC):
+    """
+    Interface for training strategies (Strategy Pattern).
+    
+    Single Responsibility: Only handles training step logic.
+    Does NOT handle memory management directly.
+    """
+    
+    @abstractmethod
+    def training_step(
+        self,
+        model: torch.nn.Module,
+        batch: Dict[str, Tensor],
+        step: int,
+    ) -> Tuple[Tensor, Dict[str, float]]:
+        """
+        Execute one training step.
+        
+        Args:
+            model: The model to train
+            batch: Input batch dictionary
+            step: Current global step number
+            
+        Returns:
+            Tuple of (loss tensor, metrics dict)
+        """
+        pass
+    
+    @abstractmethod
+    def should_accumulate(self, step: int) -> bool:
+        """Check if gradients should be accumulated (no optimizer step)."""
+        pass
+    
+    @abstractmethod
+    def get_config(self) -> TrainingConfig:
+        """Return current training configuration."""
+        pass
+
+
+class FeatureCache(ABC):
+    """
+    Interface for caching pre-computed features.
+    
+    Single Responsibility: Only handles feature storage and retrieval.
+    """
+    
+    @abstractmethod
+    def store(self, key: str, features: Tensor, checksum: Optional[str] = None) -> None:
+        """
+        Store features with optional integrity checksum.
+        
+        Args:
+            key: Unique identifier for the features
+            features: Tensor to store
+            checksum: Optional hash for integrity verification
+        """
+        pass
+    
+    @abstractmethod
+    def retrieve(self, key: str, verify_checksum: bool = True) -> Optional[Tensor]:
+        """
+        Retrieve features, optionally verifying integrity.
+        
+        Args:
+            key: Identifier used during storage
+            verify_checksum: Whether to verify data integrity
+            
+        Returns:
+            Cached tensor or None if not found/corrupted
+        """
+        pass
+    
+    @abstractmethod
+    def exists(self, key: str) -> bool:
+        """Check if features exist in cache without loading."""
+        pass
+    
+    @abstractmethod
+    def clear(self) -> None:
+        """Clear all cached features."""
+        pass
+    
+    @abstractmethod
+    def get_stats(self) -> Dict[str, Any]:
+        """Get cache statistics (hits, misses, size)."""
+        pass
+
+
+class ModelFreezer(ABC):
+    """
+    Interface for model parameter freezing.
+    
+    Single Responsibility: Only handles freeze/unfreeze operations.
+    """
+    
+    @abstractmethod
+    def freeze_component(self, model: torch.nn.Module, component_name: str) -> int:
+        """
+        Freeze a model component by name.
+        
+        Args:
+            model: The model containing the component
+            component_name: Name of the component to freeze
+            
+        Returns:
+            Number of parameters frozen
+        """
+        pass
+    
+    @abstractmethod
+    def unfreeze_component(self, model: torch.nn.Module, component_name: str) -> int:
+        """Unfreeze a model component by name."""
+        pass
+    
+    @abstractmethod
+    def get_frozen_params(self, model: torch.nn.Module) -> int:
+        """Get total number of frozen parameters."""
+        pass
+    
+    @abstractmethod
+    def get_trainable_params(self, model: torch.nn.Module) -> int:
+        """Get total number of trainable parameters."""
+        pass
+
+
+class ProgressReporter(ABC):
+    """
+    Interface for progress reporting (Observer Pattern).
+    
+    Single Responsibility: Only handles progress updates.
+    Follows Open/Closed - extend by adding new reporters.
+    """
+    
+    @abstractmethod
+    def on_epoch_start(self, epoch: int, total_epochs: int) -> None:
+        """Called when an epoch starts."""
+        pass
+    
+    @abstractmethod
+    def on_step(
+        self,
+        step: int,
+        total_steps: int,
+        loss: float,
+        metrics: Dict[str, float],
+    ) -> None:
+        """Called after each training step."""
+        pass
+    
+    @abstractmethod
+    def on_epoch_end(self, epoch: int, metrics: Dict[str, float]) -> None:
+        """Called when an epoch ends."""
+        pass
+    
+    @abstractmethod
+    def on_checkpoint(self, path: str) -> None:
+        """Called when a checkpoint is saved."""
+        pass
+
+
+# =============================================================================
+# FACTORY INTERFACE - Abstract Factory Pattern
+# =============================================================================
+
+class TrainerFactory(ABC):
+    """
+    Abstract Factory for creating training components.
+    
+    Allows dependency injection of different implementations.
+    """
+    
+    @abstractmethod
+    def create_memory_manager(self, config: MemoryConfig) -> MemoryManager:
+        """Create a memory manager instance."""
+        pass
+    
+    @abstractmethod
+    def create_training_strategy(self, config: TrainingConfig) -> TrainingStrategy:
+        """Create a training strategy instance."""
+        pass
+    
+    @abstractmethod
+    def create_feature_cache(self, cache_dir: str) -> FeatureCache:
+        """Create a feature cache instance."""
+        pass
+    
+    @abstractmethod
+    def create_model_freezer(self) -> ModelFreezer:
+        """Create a model freezer instance."""
+        pass
+
+
+# Export all public interfaces
+__all__ = [
+    # Enums
+    "MemoryTier",
+    "TrainingPhase",
+    # Data classes
+    "MemoryConfig",
+    "MemorySnapshot", 
+    "TrainingConfig",
+    # Interfaces
+    "MemoryManager",
+    "TrainingStrategy",
+    "FeatureCache",
+    "ModelFreezer",
+    "ProgressReporter",
+    "TrainerFactory",
+]
diff --git a/go1/tools/low_vram/infrastructure/__init__.py b/go1/tools/low_vram/infrastructure/__init__.py
new file mode 100644
index 0000000..ed3ed6c
--- /dev/null
+++ b/go1/tools/low_vram/infrastructure/__init__.py
@@ -0,0 +1,18 @@
+"""
+Infrastructure Module - Frameworks & Drivers Layer
+===================================================
+
+Contains PyTorch-specific implementations and configuration.
+This is the outermost layer in Clean Architecture.
+"""
+
+from go1.tools.low_vram.infrastructure.trainer import LowVRAMTrainer
+from go1.tools.low_vram.infrastructure.factory import DefaultTrainerFactory
+from go1.tools.low_vram.infrastructure.config import load_config, save_config
+
+__all__ = [
+    "LowVRAMTrainer",
+    "DefaultTrainerFactory",
+    "load_config",
+    "save_config",
+]
diff --git a/go1/tools/low_vram/infrastructure/config.py b/go1/tools/low_vram/infrastructure/config.py
new file mode 100644
index 0000000..3e5353a
--- /dev/null
+++ b/go1/tools/low_vram/infrastructure/config.py
@@ -0,0 +1,82 @@
+"""
+Configuration Loading/Saving
+=============================
+
+YAML-based configuration for low-VRAM training.
+"""
+
+import json
+import logging
+from dataclasses import asdict
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+from go1.tools.low_vram.core.interfaces import MemoryConfig, TrainingConfig
+
+logger = logging.getLogger(__name__)
+
+
+def load_config(
+    config_path: Union[str, Path],
+) -> tuple[MemoryConfig, TrainingConfig]:
+    """
+    Load configuration from JSON file.
+    
+    Args:
+        config_path: Path to configuration file
+        
+    Returns:
+        Tuple of (MemoryConfig, TrainingConfig)
+    """
+    config_path = Path(config_path)
+    
+    if not config_path.exists():
+        raise FileNotFoundError(f"Config file not found: {config_path}")
+    
+    with open(config_path, "r") as f:
+        data = json.load(f)
+    
+    memory_config = MemoryConfig(**data.get("memory", {}))
+    training_config = TrainingConfig(**data.get("training", {}))
+    
+    logger.info(f"Loaded configuration from {config_path}")
+    
+    return memory_config, training_config
+
+
+def save_config(
+    memory_config: MemoryConfig,
+    training_config: TrainingConfig,
+    config_path: Union[str, Path],
+) -> None:
+    """
+    Save configuration to JSON file.
+    
+    Args:
+        memory_config: Memory configuration
+        training_config: Training configuration
+        config_path: Output path
+    """
+    config_path = Path(config_path)
+    config_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    data = {
+        "memory": asdict(memory_config),
+        "training": asdict(training_config),
+    }
+    
+    with open(config_path, "w") as f:
+        json.dump(data, f, indent=2)
+    
+    logger.info(f"Saved configuration to {config_path}")
+
+
+def create_default_config_file(output_path: Union[str, Path]) -> None:
+    """Create a default configuration file for reference."""
+    from go1.tools.low_vram.infrastructure.factory import create_4gb_config
+    
+    memory_config, training_config = create_4gb_config()
+    save_config(memory_config, training_config, output_path)
+
+
+__all__ = ["load_config", "save_config", "create_default_config_file"]
diff --git a/go1/tools/low_vram/infrastructure/factory.py b/go1/tools/low_vram/infrastructure/factory.py
new file mode 100644
index 0000000..28af259
--- /dev/null
+++ b/go1/tools/low_vram/infrastructure/factory.py
@@ -0,0 +1,189 @@
+"""
+Trainer Factory Implementation
+===============================
+
+Factory for creating training components.
+Follows Abstract Factory Pattern for dependency injection.
+"""
+
+import logging
+from typing import Optional
+
+import torch
+from torch import nn
+from torch.optim import AdamW
+
+from go1.tools.low_vram.core.interfaces import (
+    FeatureCache,
+    MemoryConfig,
+    MemoryManager,
+    ModelFreezer,
+    TrainerFactory,
+    TrainingConfig,
+    TrainingStrategy,
+)
+from go1.tools.low_vram.adapters.memory_manager import TorchMemoryManager
+from go1.tools.low_vram.adapters.training_strategy import LowVRAMTrainingStrategy
+from go1.tools.low_vram.adapters.feature_cache import DiskFeatureCache
+from go1.tools.low_vram.adapters.model_freezer import ComponentFreezer
+
+logger = logging.getLogger(__name__)
+
+
+class DefaultTrainerFactory(TrainerFactory):
+    """
+    Default factory for creating low-VRAM training components.
+    
+    Implements Abstract Factory Pattern.
+    Single Responsibility: Only creates components, doesn't configure them.
+    """
+    
+    def __init__(self, device: Optional[torch.device] = None):
+        """
+        Initialize factory.
+        
+        Args:
+            device: Target device (defaults to cuda if available)
+        """
+        self._device = device or (
+            torch.device("cuda") if torch.cuda.is_available()
+            else torch.device("cpu")
+        )
+        
+        # Cached instances for reuse
+        self._memory_manager: Optional[MemoryManager] = None
+    
+    def create_memory_manager(self, config: MemoryConfig) -> MemoryManager:
+        """Create and cache a memory manager instance."""
+        if self._memory_manager is None:
+            self._memory_manager = TorchMemoryManager(config)
+        return self._memory_manager
+    
+    def create_training_strategy(
+        self,
+        config: TrainingConfig,
+        memory_manager: Optional[MemoryManager] = None,
+    ) -> TrainingStrategy:
+        """Create a training strategy instance."""
+        return LowVRAMTrainingStrategy(
+            config=config,
+            memory_manager=memory_manager,
+            device=self._device,
+        )
+    
+    def create_feature_cache(self, cache_dir: str) -> FeatureCache:
+        """Create a feature cache instance."""
+        return DiskFeatureCache(cache_dir)
+    
+    def create_model_freezer(self) -> ModelFreezer:
+        """Create a model freezer instance."""
+        return ComponentFreezer()
+    
+    def create_optimizer(
+        self,
+        model: nn.Module,
+        config: TrainingConfig,
+        use_8bit: bool = False,
+    ) -> torch.optim.Optimizer:
+        """
+        Create optimizer with optional 8-bit quantization.
+        
+        Args:
+            model: Model to optimize
+            config: Training configuration
+            use_8bit: Whether to use 8-bit Adam (requires bitsandbytes)
+            
+        Returns:
+            Optimizer instance
+        """
+        # Filter trainable parameters
+        params = [p for p in model.parameters() if p.requires_grad]
+        
+        if not params:
+            raise ValueError("No trainable parameters found!")
+        
+        if use_8bit:
+            try:
+                import bitsandbytes as bnb
+                optimizer = bnb.optim.Adam8bit(
+                    params,
+                    lr=config.learning_rate,
+                    weight_decay=0.01,
+                )
+                logger.info("Using 8-bit Adam optimizer (saves ~50% optimizer memory)")
+            except ImportError:
+                logger.warning(
+                    "bitsandbytes not installed, falling back to standard AdamW. "
+                    "Install with: pip install bitsandbytes"
+                )
+                optimizer = AdamW(params, lr=config.learning_rate, weight_decay=0.01)
+        else:
+            optimizer = AdamW(params, lr=config.learning_rate, weight_decay=0.01)
+        
+        return optimizer
+    
+    def __repr__(self) -> str:
+        return f"DefaultTrainerFactory(device={self._device})"
+
+
+def create_4gb_config() -> tuple[MemoryConfig, TrainingConfig]:
+    """
+    Factory function for 4GB GPU configuration.
+    
+    Returns minimal memory footprint settings.
+    """
+    memory_config = MemoryConfig(
+        max_gpu_memory_mb=3500,  # Leave 500MB for system
+        max_cpu_memory_mb=8000,
+        enable_cpu_offload=True,
+        enable_disk_offload=False,
+        gradient_checkpointing=True,
+        mixed_precision=True,
+        pin_cpu_memory=True,
+    )
+    
+    training_config = TrainingConfig(
+        batch_size=1,
+        gradient_accumulation_steps=16,
+        learning_rate=1e-4,
+        max_epochs=10,
+        warmup_steps=100,
+        freeze_vision=True,
+        freeze_llm=True,
+        train_action_expert=True,
+    )
+    
+    return memory_config, training_config
+
+
+def create_8gb_config() -> tuple[MemoryConfig, TrainingConfig]:
+    """Factory function for 8GB GPU configuration."""
+    memory_config = MemoryConfig(
+        max_gpu_memory_mb=7000,
+        max_cpu_memory_mb=16000,
+        enable_cpu_offload=True,
+        enable_disk_offload=False,
+        gradient_checkpointing=True,
+        mixed_precision=True,
+        pin_cpu_memory=True,
+    )
+    
+    training_config = TrainingConfig(
+        batch_size=2,
+        gradient_accumulation_steps=8,
+        learning_rate=1e-4,
+        max_epochs=10,
+        warmup_steps=100,
+        freeze_vision=True,
+        freeze_llm=False,  # Can train last few LLM layers
+        train_action_expert=True,
+    )
+    
+    return memory_config, training_config
+
+
+__all__ = [
+    "DefaultTrainerFactory",
+    "create_4gb_config",
+    "create_8gb_config",
+]
diff --git a/go1/tools/low_vram/infrastructure/trainer.py b/go1/tools/low_vram/infrastructure/trainer.py
new file mode 100644
index 0000000..e6f8c98
--- /dev/null
+++ b/go1/tools/low_vram/infrastructure/trainer.py
@@ -0,0 +1,396 @@
+"""
+Low-VRAM Trainer Orchestrator
+==============================
+
+Main entry point for low-VRAM training.
+Orchestrates all components following Clean Architecture.
+
+This class depends on abstractions (interfaces), not concrete implementations.
+Concrete implementations are injected via the factory.
+"""
+
+import logging
+import os
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, Iterator, Optional, Tuple
+
+import torch
+from torch import Tensor, nn
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler
+from torch.utils.data import DataLoader
+
+from go1.tools.low_vram.core.interfaces import (
+    FeatureCache,
+    MemoryConfig,
+    MemoryManager,
+    MemorySnapshot,
+    ModelFreezer,
+    ProgressReporter,
+    TrainingConfig,
+    TrainingStrategy,
+)
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TrainerState:
+    """Mutable state container for trainer (separates state from logic)."""
+    epoch: int = 0
+    global_step: int = 0
+    best_loss: float = float("inf")
+    total_tokens_trained: int = 0
+
+
+class ConsoleProgressReporter(ProgressReporter):
+    """Simple console-based progress reporter."""
+    
+    def __init__(self, log_interval: int = 10):
+        self._log_interval = log_interval
+        self._epoch_start_time: float = 0
+    
+    def on_epoch_start(self, epoch: int, total_epochs: int) -> None:
+        self._epoch_start_time = time.time()
+        logger.info(f"{'='*60}")
+        logger.info(f"Epoch {epoch + 1}/{total_epochs}")
+        logger.info(f"{'='*60}")
+    
+    def on_step(
+        self,
+        step: int,
+        total_steps: int,
+        loss: float,
+        metrics: Dict[str, float],
+    ) -> None:
+        if step % self._log_interval == 0:
+            gpu_mem = metrics.get("gpu_memory_mb", 0)
+            lr = metrics.get("learning_rate", 0)
+            logger.info(
+                f"Step {step}/{total_steps} | "
+                f"Loss: {loss:.4f} | "
+                f"GPU: {gpu_mem:.0f}MB | "
+                f"LR: {lr:.2e}"
+            )
+    
+    def on_epoch_end(self, epoch: int, metrics: Dict[str, float]) -> None:
+        elapsed = time.time() - self._epoch_start_time
+        avg_loss = metrics.get("avg_loss", 0)
+        logger.info(
+            f"Epoch {epoch + 1} complete | "
+            f"Avg Loss: {avg_loss:.4f} | "
+            f"Time: {elapsed:.1f}s"
+        )
+    
+    def on_checkpoint(self, path: str) -> None:
+        logger.info(f"Checkpoint saved: {path}")
+
+
+class LowVRAMTrainer:
+    """
+    Main orchestrator for low-VRAM training.
+    
+    Follows:
+        - Dependency Inversion: Depends on interfaces, not implementations
+        - Single Responsibility: Only orchestrates, doesn't implement components
+        - Open/Closed: Extend via new strategies, don't modify this class
+    
+    Security (STRIDE):
+        - Tampering: Checksums for cached features
+        - DoS: Memory limits prevent OOM
+        - Repudiation: Training logs for audit trail
+    """
+    
+    def __init__(
+        self,
+        model: nn.Module,
+        optimizer: Optimizer,
+        memory_manager: MemoryManager,
+        training_strategy: TrainingStrategy,
+        feature_cache: Optional[FeatureCache] = None,
+        model_freezer: Optional[ModelFreezer] = None,
+        progress_reporter: Optional[ProgressReporter] = None,
+        scheduler: Optional[_LRScheduler] = None,
+        checkpoint_dir: Optional[str] = None,
+        max_grad_norm: float = 1.0,
+    ):
+        """
+        Initialize trainer with injected dependencies.
+        
+        Args:
+            model: The model to train
+            optimizer: Optimizer instance
+            memory_manager: Memory manager for GPU/CPU orchestration
+            training_strategy: Strategy for training steps
+            feature_cache: Optional cache for pre-computed features
+            model_freezer: Optional freezer for parameter management
+            progress_reporter: Optional progress reporter
+            scheduler: Optional learning rate scheduler
+            checkpoint_dir: Directory for saving checkpoints
+            max_grad_norm: Maximum gradient norm for clipping
+        """
+        self._model = model
+        self._optimizer = optimizer
+        self._scheduler = scheduler
+        self._memory_manager = memory_manager
+        self._strategy = training_strategy
+        self._cache = feature_cache
+        self._freezer = model_freezer
+        self._reporter = progress_reporter or ConsoleProgressReporter()
+        self._checkpoint_dir = Path(checkpoint_dir) if checkpoint_dir else None
+        self._max_grad_norm = max_grad_norm
+        
+        # State
+        self._state = TrainerState()
+        
+        # Device
+        self._device = next(model.parameters()).device
+        
+        # Create checkpoint directory
+        if self._checkpoint_dir:
+            self._checkpoint_dir.mkdir(parents=True, exist_ok=True)
+    
+    @property
+    def state(self) -> TrainerState:
+        """Get current trainer state (read-only access advised)."""
+        return self._state
+    
+    def train(
+        self,
+        train_dataloader: DataLoader,
+        num_epochs: int,
+        eval_dataloader: Optional[DataLoader] = None,
+        eval_interval: int = 1,
+        checkpoint_interval: int = 1,
+    ) -> Dict[str, Any]:
+        """
+        Main training loop.
+        
+        Args:
+            train_dataloader: Training data loader
+            num_epochs: Number of epochs to train
+            eval_dataloader: Optional evaluation data loader
+            eval_interval: Epochs between evaluations
+            checkpoint_interval: Epochs between checkpoints
+            
+        Returns:
+            Training history dictionary
+        """
+        history: Dict[str, list] = {
+            "train_loss": [],
+            "eval_loss": [],
+            "memory_usage": [],
+        }
+        
+        total_steps = len(train_dataloader) * num_epochs
+        config = self._strategy.get_config()
+        
+        logger.info(f"Starting training for {num_epochs} epochs")
+        logger.info(f"Total steps: {total_steps}")
+        logger.info(f"Gradient accumulation: {config.gradient_accumulation_steps}")
+        
+        # Initial memory snapshot
+        initial_snapshot = self._memory_manager.get_snapshot()
+        logger.info(
+            f"Initial GPU memory: {initial_snapshot.gpu_allocated_mb:.1f}MB / "
+            f"{initial_snapshot.gpu_max_mb:.1f}MB"
+        )
+        
+        for epoch in range(num_epochs):
+            self._state.epoch = epoch
+            self._reporter.on_epoch_start(epoch, num_epochs)
+            
+            # Training epoch
+            epoch_metrics = self._train_epoch(train_dataloader)
+            history["train_loss"].append(epoch_metrics["avg_loss"])
+            
+            self._reporter.on_epoch_end(epoch, epoch_metrics)
+            
+            # Evaluation
+            if eval_dataloader and (epoch + 1) % eval_interval == 0:
+                eval_metrics = self._evaluate(eval_dataloader)
+                history["eval_loss"].append(eval_metrics["avg_loss"])
+            
+            # Checkpointing
+            if self._checkpoint_dir and (epoch + 1) % checkpoint_interval == 0:
+                self._save_checkpoint(epoch, epoch_metrics)
+            
+            # Memory tracking
+            snapshot = self._memory_manager.get_snapshot()
+            history["memory_usage"].append(snapshot.gpu_allocated_mb)
+            
+            # Clear cache between epochs
+            self._memory_manager.clear_cache()
+        
+        logger.info("Training complete!")
+        return history
+    
+    def _train_epoch(self, dataloader: DataLoader) -> Dict[str, float]:
+        """Execute one training epoch."""
+        self._model.train()
+        
+        epoch_loss = 0.0
+        num_steps = len(dataloader)
+        
+        for step, batch in enumerate(dataloader):
+            self._state.global_step += 1
+            
+            # Training step
+            loss, metrics = self._strategy.training_step(
+                self._model,
+                batch,
+                self._state.global_step,
+            )
+            
+            epoch_loss += metrics["loss"]
+            
+            # Optimizer step (respects gradient accumulation)
+            if not self._strategy.should_accumulate(self._state.global_step):
+                opt_metrics = self._strategy.optimizer_step(
+                    self._optimizer,
+                    self._scheduler,
+                    self._max_grad_norm,
+                )
+                metrics.update(opt_metrics)
+            
+            # Report progress
+            self._reporter.on_step(
+                step,
+                num_steps,
+                metrics["loss"],
+                metrics,
+            )
+        
+        return {
+            "avg_loss": epoch_loss / num_steps,
+            "total_steps": num_steps,
+        }
+    
+    @torch.no_grad()
+    def _evaluate(self, dataloader: DataLoader) -> Dict[str, float]:
+        """Evaluate model on validation data."""
+        self._model.eval()
+        
+        total_loss = 0.0
+        num_batches = 0
+        
+        for batch in dataloader:
+            # Move to device
+            batch = {
+                k: v.to(self._device) if isinstance(v, Tensor) else v
+                for k, v in batch.items()
+            }
+            
+            outputs = self._model(**batch)
+            
+            if hasattr(outputs, "loss"):
+                loss = outputs.loss
+            elif isinstance(outputs, tuple):
+                loss = outputs[0]
+            else:
+                loss = outputs
+            
+            total_loss += loss.item()
+            num_batches += 1
+        
+        self._model.train()
+        
+        return {"avg_loss": total_loss / max(1, num_batches)}
+    
+    def _save_checkpoint(self, epoch: int, metrics: Dict[str, float]) -> None:
+        """Save training checkpoint."""
+        if self._checkpoint_dir is None:
+            return
+        
+        checkpoint_path = self._checkpoint_dir / f"checkpoint_epoch_{epoch + 1}.pt"
+        
+        checkpoint = {
+            "epoch": epoch,
+            "global_step": self._state.global_step,
+            "model_state_dict": self._model.state_dict(),
+            "optimizer_state_dict": self._optimizer.state_dict(),
+            "metrics": metrics,
+            "best_loss": self._state.best_loss,
+        }
+        
+        if self._scheduler:
+            checkpoint["scheduler_state_dict"] = self._scheduler.state_dict()
+        
+        torch.save(checkpoint, checkpoint_path)
+        self._reporter.on_checkpoint(str(checkpoint_path))
+        
+        # Update best loss
+        if metrics.get("avg_loss", float("inf")) < self._state.best_loss:
+            self._state.best_loss = metrics["avg_loss"]
+            best_path = self._checkpoint_dir / "best_model.pt"
+            torch.save(checkpoint, best_path)
+            logger.info(f"New best model saved: loss={self._state.best_loss:.4f}")
+    
+    def load_checkpoint(self, checkpoint_path: str) -> None:
+        """Load training state from checkpoint."""
+        checkpoint = torch.load(checkpoint_path, map_location=self._device)
+        
+        self._model.load_state_dict(checkpoint["model_state_dict"])
+        self._optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
+        
+        if self._scheduler and "scheduler_state_dict" in checkpoint:
+            self._scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
+        
+        self._state.epoch = checkpoint["epoch"]
+        self._state.global_step = checkpoint["global_step"]
+        self._state.best_loss = checkpoint.get("best_loss", float("inf"))
+        
+        logger.info(f"Loaded checkpoint from epoch {self._state.epoch + 1}")
+    
+    def get_memory_report(self) -> str:
+        """Generate memory usage report."""
+        snapshot = self._memory_manager.get_snapshot()
+        
+        report = [
+            "=" * 50,
+            "MEMORY REPORT",
+            "=" * 50,
+            f"GPU Allocated: {snapshot.gpu_allocated_mb:.1f} MB",
+            f"GPU Reserved:  {snapshot.gpu_reserved_mb:.1f} MB",
+            f"GPU Maximum:   {snapshot.gpu_max_mb:.1f} MB",
+            f"GPU Usage:     {snapshot.gpu_allocated_mb / snapshot.gpu_max_mb * 100:.1f}%",
+            "",
+            f"CPU (offloaded): {snapshot.cpu_used_mb:.1f} MB",
+        ]
+        
+        if self._cache:
+            stats = self._cache.get_stats()
+            report.extend([
+                "",
+                "Feature Cache:",
+                f"  Items:    {stats['cached_items']}",
+                f"  Size:     {stats['total_size_mb']:.1f} MB",
+                f"  Hit Rate: {stats['hit_rate']:.1%}",
+            ])
+        
+        if self._freezer:
+            frozen = self._freezer.get_frozen_params(self._model)
+            trainable = self._freezer.get_trainable_params(self._model)
+            report.extend([
+                "",
+                "Model Parameters:",
+                f"  Frozen:    {frozen:,} ({frozen * 4 / 1e6:.1f} MB)",
+                f"  Trainable: {trainable:,} ({trainable * 4 / 1e6:.1f} MB)",
+            ])
+        
+        report.append("=" * 50)
+        
+        return "\n".join(report)
+    
+    def __repr__(self) -> str:
+        return (
+            f"LowVRAMTrainer("
+            f"epoch={self._state.epoch}, "
+            f"step={self._state.global_step}, "
+            f"strategy={self._strategy})"
+        )
+
+
+__all__ = ["LowVRAMTrainer", "TrainerState", "ConsoleProgressReporter"]
diff --git a/tests/low_vram/__init__.py b/tests/low_vram/__init__.py
new file mode 100644
index 0000000..99c295b
--- /dev/null
+++ b/tests/low_vram/__init__.py
@@ -0,0 +1 @@
+"""Tests package for low-VRAM training framework."""
diff --git a/tests/low_vram/test_components.py b/tests/low_vram/test_components.py
new file mode 100644
index 0000000..a55626a
--- /dev/null
+++ b/tests/low_vram/test_components.py
@@ -0,0 +1,267 @@
+"""
+Unit Tests for Low-VRAM Training Framework
+==========================================
+
+Testing following AAA pattern: Arrange, Act, Assert
+Each test has a single responsibility.
+"""
+
+import os
+import tempfile
+import unittest
+from unittest.mock import MagicMock, patch
+
+import torch
+from torch import nn, Tensor
+
+from go1.tools.low_vram.core.interfaces import (
+    MemoryConfig,
+    MemorySnapshot,
+    TrainingConfig,
+)
+from go1.tools.low_vram.adapters.memory_manager import TorchMemoryManager
+from go1.tools.low_vram.adapters.training_strategy import (
+    LowVRAMTrainingStrategy,
+    GradientAccumulationMixin,
+)
+from go1.tools.low_vram.adapters.feature_cache import DiskFeatureCache, compute_checksum
+from go1.tools.low_vram.adapters.model_freezer import ComponentFreezer
+
+
+class TestMemoryConfig(unittest.TestCase):
+    """Tests for MemoryConfig dataclass."""
+    
+    def test_config_is_frozen(self):
+        """Verify config cannot be modified after creation (Integrity)."""
+        config = MemoryConfig(max_gpu_memory_mb=4000, max_cpu_memory_mb=8000)
+        
+        with self.assertRaises(Exception):  # FrozenInstanceError
+            config.max_gpu_memory_mb = 5000
+    
+    def test_config_defaults(self):
+        """Verify default values are sensible."""
+        config = MemoryConfig(max_gpu_memory_mb=4000, max_cpu_memory_mb=8000)
+        
+        self.assertTrue(config.enable_cpu_offload)
+        self.assertTrue(config.gradient_checkpointing)
+        self.assertTrue(config.mixed_precision)
+
+
+class TestTorchMemoryManager(unittest.TestCase):
+    """Tests for TorchMemoryManager."""
+    
+    def setUp(self):
+        self.config = MemoryConfig(
+            max_gpu_memory_mb=4000,
+            max_cpu_memory_mb=8000,
+            pin_cpu_memory=False,  # Disable for tests without CUDA
+        )
+        self.manager = TorchMemoryManager(self.config)
+    
+    def test_get_snapshot_returns_valid_data(self):
+        """Snapshot should return non-negative values."""
+        snapshot = self.manager.get_snapshot()
+        
+        self.assertIsInstance(snapshot, MemorySnapshot)
+        self.assertGreaterEqual(snapshot.gpu_allocated_mb, 0)
+        self.assertGreaterEqual(snapshot.timestamp, 0)
+    
+    def test_offload_and_restore_cpu(self):
+        """Tensor should roundtrip through CPU offload."""
+        # Arrange
+        tensor = torch.randn(100, 100)
+        original_sum = tensor.sum().item()
+        
+        # Act
+        cpu_tensor = self.manager.offload_to_cpu(tensor, "test_tensor")
+        restored = self.manager.restore_to_gpu("test_tensor", torch.device("cpu"))
+        
+        # Assert
+        self.assertAlmostEqual(restored.sum().item(), original_sum, places=5)
+        self.assertEqual(self.manager.get_offloaded_count(), 0)
+    
+    def test_offload_duplicate_name_raises(self):
+        """Offloading with same name should raise (Integrity protection)."""
+        tensor1 = torch.randn(10, 10)
+        tensor2 = torch.randn(10, 10)
+        
+        self.manager.offload_to_cpu(tensor1, "same_name")
+        
+        with self.assertRaises(ValueError):
+            self.manager.offload_to_cpu(tensor2, "same_name")
+    
+    def test_restore_nonexistent_raises(self):
+        """Restoring nonexistent tensor should raise."""
+        with self.assertRaises(KeyError):
+            self.manager.restore_to_gpu("nonexistent", torch.device("cpu"))
+
+
+class TestGradientAccumulation(unittest.TestCase):
+    """Tests for gradient accumulation logic."""
+    
+    def test_should_accumulate_for_intermediate_steps(self):
+        """Accumulation steps 1-15 should accumulate for accum_steps=16."""
+        mixin = GradientAccumulationMixin()
+        
+        # Steps 0-14 should accumulate
+        for step in range(15):
+            self.assertTrue(
+                mixin._should_accumulate_impl(step, 16),
+                f"Step {step} should accumulate"
+            )
+    
+    def test_should_not_accumulate_on_final_step(self):
+        """Step 15 (16th step) should not accumulate."""
+        mixin = GradientAccumulationMixin()
+        
+        self.assertFalse(mixin._should_accumulate_impl(15, 16))
+        self.assertFalse(mixin._should_accumulate_impl(31, 16))
+    
+    def test_loss_scaling(self):
+        """Loss should be scaled by accumulation steps."""
+        mixin = GradientAccumulationMixin()
+        loss = torch.tensor(1.6)
+        
+        scaled = mixin._scale_loss(loss, 16)
+        
+        self.assertAlmostEqual(scaled.item(), 0.1, places=5)
+
+
+class TestDiskFeatureCache(unittest.TestCase):
+    """Tests for DiskFeatureCache."""
+    
+    def setUp(self):
+        self.temp_dir = tempfile.mkdtemp()
+        self.cache = DiskFeatureCache(self.temp_dir)
+    
+    def tearDown(self):
+        self.cache.clear()
+        import shutil
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+    
+    def test_store_and_retrieve(self):
+        """Basic store/retrieve should work."""
+        features = torch.randn(64, 256)
+        
+        self.cache.store("test_key", features)
+        retrieved = self.cache.retrieve("test_key")
+        
+        self.assertIsNotNone(retrieved)
+        self.assertTrue(torch.allclose(features, retrieved))
+    
+    def test_exists_check(self):
+        """Exists should return correct boolean."""
+        self.assertFalse(self.cache.exists("nonexistent"))
+        
+        self.cache.store("exists_key", torch.randn(10))
+        
+        self.assertTrue(self.cache.exists("exists_key"))
+    
+    def test_checksum_validation(self):
+        """Checksum should be computed and stored."""
+        features = torch.randn(10, 10)
+        expected_checksum = compute_checksum(features)
+        
+        self.cache.store("checksum_key", features)
+        
+        # Verify checksum was stored
+        self.assertIn("checksum_key", self.cache._checksums)
+        self.assertEqual(self.cache._checksums["checksum_key"], expected_checksum)
+    
+    def test_cache_stats(self):
+        """Stats should track hits and misses."""
+        self.cache.store("hit_key", torch.randn(10))
+        
+        self.cache.retrieve("hit_key")  # Hit
+        self.cache.retrieve("miss_key")  # Miss
+        
+        stats = self.cache.get_stats()
+        self.assertEqual(stats["hits"], 1)
+        self.assertEqual(stats["misses"], 1)
+
+
+class TestComponentFreezer(unittest.TestCase):
+    """Tests for ComponentFreezer."""
+    
+    def setUp(self):
+        # Create a simple model with named components
+        self.model = nn.Module()
+        self.model.vision = nn.Linear(10, 10)
+        self.model.language = nn.Linear(10, 10)
+        self.model.action = nn.Linear(10, 10)
+        
+        self.freezer = ComponentFreezer({
+            "vision": ["vision"],
+            "language": ["language"],
+            "action": ["action"],
+        })
+    
+    def test_freeze_component(self):
+        """Freezing should set requires_grad=False."""
+        # Verify initially trainable
+        self.assertTrue(self.model.vision.weight.requires_grad)
+        
+        # Freeze
+        frozen_count = self.freezer.freeze_component(self.model, "vision")
+        
+        # Verify frozen
+        self.assertFalse(self.model.vision.weight.requires_grad)
+        self.assertGreater(frozen_count, 0)
+    
+    def test_unfreeze_component(self):
+        """Unfreezing should set requires_grad=True."""
+        # Freeze first
+        self.freezer.freeze_component(self.model, "vision")
+        self.assertFalse(self.model.vision.weight.requires_grad)
+        
+        # Unfreeze
+        self.freezer.unfreeze_component(self.model, "vision")
+        
+        # Verify unfrozen
+        self.assertTrue(self.model.vision.weight.requires_grad)
+    
+    def test_get_trainable_params(self):
+        """Should count trainable parameters correctly."""
+        total = self.freezer.get_trainable_params(self.model)
+        
+        self.freezer.freeze_component(self.model, "vision")
+        
+        after_freeze = self.freezer.get_trainable_params(self.model)
+        
+        self.assertLess(after_freeze, total)
+
+
+class TestLowVRAMTrainingStrategy(unittest.TestCase):
+    """Tests for LowVRAMTrainingStrategy."""
+    
+    def setUp(self):
+        self.config = TrainingConfig(
+            batch_size=1,
+            gradient_accumulation_steps=4,
+            learning_rate=1e-4,
+        )
+        self.strategy = LowVRAMTrainingStrategy(
+            config=self.config,
+            device=torch.device("cpu"),
+        )
+    
+    def test_get_config_returns_immutable(self):
+        """Config should be accessible."""
+        config = self.strategy.get_config()
+        
+        self.assertEqual(config.batch_size, 1)
+        self.assertEqual(config.gradient_accumulation_steps, 4)
+    
+    def test_should_accumulate_logic(self):
+        """Should correctly determine accumulation."""
+        # Steps 0, 1, 2 should accumulate
+        self.assertTrue(self.strategy.should_accumulate(0))
+        self.assertTrue(self.strategy.should_accumulate(1))
+        self.assertTrue(self.strategy.should_accumulate(2))
+        
+        # Step 3 should not (4th step, time to update)
+        self.assertFalse(self.strategy.should_accumulate(3))
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)