unclecode · Martichou · Dec 23, 2025 · Dec 23, 2025 · Dec 25, 2025 · Nov 25, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -41,8 +41,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     pkg-config \
     python3-dev \
     libjpeg-dev \
+    lsof \
     redis-server \
     supervisor \
+    tini \
     && apt-get clean \ 
     && rm -rf /var/lib/apt/lists/*
 
@@ -199,7 +201,12 @@ EXPOSE 6379
 USER appuser
 
 # Set environment variables to ptoduction
-ENV PYTHON_ENV=production 
+ENV PYTHON_ENV=production
+
+# Use tini as init system to properly reap zombie processes
+# This is required for Playwright/Chromium which spawns many child processes
+# See: https://github.com/unclecode/crawl4ai/issues/1666
+ENTRYPOINT ["/usr/bin/tini", "--"]
 
 # Start the application using supervisord
 CMD ["supervisord", "-c", "supervisord.conf"]
diff --git a/README.md b/README.md
@@ -1093,6 +1093,7 @@ Our enterprise sponsors and technology partners help scale Crawl4AI to power pro
 
 | Company | About | Sponsorship Tier |
 |------|------|----------------------------|
+| <a href="https://www.thordata.com/?ls=github&lk=crawl4ai" target="_blank"><img src="https://gist.github.com/aravindkarnam/dfc598a67be5036494475acece7e54cf/raw/thor_data.svg" alt="Thor Data" width="120"/></a>  | Leveraging Thordata ensures seamless compatibility with any AI/ML workflows and data infrastructure, massively accessing web data with 99.9% uptime, backed by one-on-one customer support. | 🥈 Silver |
 | <a href="https://app.nstproxy.com/register?i=ecOqW9" target="_blank"><picture><source width="250" media="(prefers-color-scheme: dark)" srcset="https://gist.github.com/aravindkarnam/62f82bd4818d3079d9dd3c31df432cf8/raw/nst-light.svg"><source width="250" media="(prefers-color-scheme: light)" srcset="https://www.nstproxy.com/logo.svg"><img alt="nstproxy" src="ttps://www.nstproxy.com/logo.svg"></picture></a>  | NstProxy is a trusted proxy provider with over 110M+ real residential IPs, city-level targeting, 99.99% uptime, and low pricing at $0.1/GB, it delivers unmatched stability, scale, and cost-efficiency. | 🥈 Silver |
 | <a href="https://app.scrapeless.com/passport/register?utm_source=official&utm_term=crawl4ai" target="_blank"><picture><source width="250" media="(prefers-color-scheme: dark)" srcset="https://gist.githubusercontent.com/aravindkarnam/0d275b942705604263e5c32d2db27bc1/raw/Scrapeless-light-logo.svg"><source width="250" media="(prefers-color-scheme: light)" srcset="https://gist.githubusercontent.com/aravindkarnam/22d0525cc0f3021bf19ebf6e11a69ccd/raw/Scrapeless-dark-logo.svg"><img alt="Scrapeless" src="https://gist.githubusercontent.com/aravindkarnam/22d0525cc0f3021bf19ebf6e11a69ccd/raw/Scrapeless-dark-logo.svg"></picture></a>  | Scrapeless provides production-grade infrastructure for Crawling, Automation, and AI Agents, offering Scraping Browser, 4 Proxy Types and Universal Scraping API. | 🥈 Silver |
 | <a href="https://dashboard.capsolver.com/passport/register?inviteCode=ESVSECTX5Q23" target="_blank"><picture><source width="120" media="(prefers-color-scheme: dark)" srcset="https://docs.crawl4ai.com/uploads/sponsors/20251013045338_72a71fa4ee4d2f40.png"><source width="120" media="(prefers-color-scheme: light)" srcset="https://www.capsolver.com/assets/images/logo-text.png"><img alt="Capsolver" src="https://www.capsolver.com/assets/images/logo-text.png"></picture></a> | AI-powered Captcha solving service. Supports all major Captcha types, including reCAPTCHA, Cloudflare, and more | 🥉 Bronze |

diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py
@@ -1053,9 +1053,17 @@ async def get_delayed_content(delay: float = 5.0) -> str:
             raise e
 
         finally:
+            # Release the context reference so cleanup can work
+            if not self.browser_config.use_managed_browser:
+                try:
+                    config_signature = self.browser_manager._make_config_signature(config)
+                    await self.browser_manager.release_context(config_signature)
+                except Exception:
+                    pass  # Don't fail on cleanup
+
             # If no session_id is given we should close the page
             all_contexts = page.context.browser.contexts
-            total_pages = sum(len(context.pages) for context in all_contexts)                
+            total_pages = sum(len(context.pages) for context in all_contexts)
             if config.session_id:
                 pass
             elif total_pages <= 1 and (self.browser_config.use_managed_browser or self.browser_config.headless):

diff --git a/crawl4ai/browser_manager.py b/crawl4ai/browser_manager.py
@@ -611,12 +611,16 @@ def __init__(self, browser_config: BrowserConfig, logger=None, use_undetected: b
         # Keep track of contexts by a "config signature," so each unique config reuses a single context
         self.contexts_by_config = {}
         self._contexts_lock = asyncio.Lock()
-
+
+        # Reference counting for contexts - tracks how many requests are using each context
+        # Key: config_signature, Value: count of active requests using this context
+        self._context_refcounts = {}
+
         # Serialize context.new_page() across concurrent tasks to avoid races
         # when using a shared persistent context (context.pages may be empty
         # for all racers). Prevents 'Target page/context closed' errors.
         self._page_lock = asyncio.Lock()
-        
+
         # Stealth adapter for stealth mode
         self._stealth_adapter = None
         if self.config.enable_stealth and not self.use_undetected:
@@ -1102,6 +1106,9 @@ async def get_page(self, crawlerRunConfig: CrawlerRunConfig):
                     await self.setup_context(context, crawlerRunConfig)
                     self.contexts_by_config[config_signature] = context
 
+                # Increment reference count - this context is now in use
+                self._context_refcounts[config_signature] = self._context_refcounts.get(config_signature, 0) + 1
+
             # Create a new page from the chosen context
             page = await context.new_page()
             await self._apply_stealth_to_page(page)
@@ -1137,11 +1144,127 @@ def _cleanup_expired_sessions(self):
         for sid in expired_sessions:
             asyncio.create_task(self.kill_session(sid))
 
+    async def cleanup_contexts(self, max_contexts: int = 5, force: bool = False):
+        """
+        Clean up contexts to prevent memory growth.
+        Only closes contexts that have no active references AND no open pages (safe cleanup).
+
+        Args:
+            max_contexts: Maximum number of contexts to keep. Excess idle contexts
+                          will be closed, starting with the oldest ones.
+            force: If True, close contexts even if they have pages (but never if refcount > 0).
+                   Use with caution.
+        """
+        async with self._contexts_lock:
+            # First, identify contexts that are safe to close:
+            # - No active references (refcount == 0)
+            # - No open pages (or force=True)
+            idle_contexts = []
+            active_contexts = []
+
+            for sig, ctx in list(self.contexts_by_config.items()):
+                try:
+                    refcount = self._context_refcounts.get(sig, 0)
+                    has_pages = hasattr(ctx, 'pages') and len(ctx.pages) > 0
+
+                    # Context is safe to close only if refcount is 0
+                    if refcount > 0:
+                        # Context is actively being used by a request - never close
+                        active_contexts.append((sig, ctx))
+                    elif has_pages and not force:
+                        # Has pages but no refs - might be finishing up, skip unless forced
+                        active_contexts.append((sig, ctx))
+                    else:
+                        # refcount == 0 and (no pages or force=True) - safe to close
+                        idle_contexts.append((sig, ctx))
+                except Exception:
+                    # Context may be in bad state, only cleanup if no refs
+                    if self._context_refcounts.get(sig, 0) == 0:
+                        idle_contexts.append((sig, ctx))
+                    else:
+                        active_contexts.append((sig, ctx))
+
+            # Log context status for debugging
+            self.logger.debug(
+                message="Context cleanup check: {total} total, {idle} idle (refcount=0), {active} active",
+                tag="CLEANUP",
+                params={
+                    "total": len(self.contexts_by_config),
+                    "idle": len(idle_contexts),
+                    "active": len(active_contexts)
+                }
+            )
+
+            # Close idle contexts if we exceed max_contexts total
+            contexts_to_close = []
+            if len(self.contexts_by_config) > max_contexts:
+                # Calculate how many we need to close
+                excess = len(self.contexts_by_config) - max_contexts
+                # Only close from idle contexts (safe)
+                contexts_to_close = idle_contexts[:excess]
+
+            # If force=True and we still have too many, close active ones too
+            if force and len(self.contexts_by_config) - len(contexts_to_close) > max_contexts:
+                remaining_excess = len(self.contexts_by_config) - len(contexts_to_close) - max_contexts
+                contexts_to_close.extend(active_contexts[:remaining_excess])
-            # If force=True and we still have too many, close active ones too
-            if force and len(self.contexts_by_config) - len(contexts_to_close) > max_contexts:
-                remaining_excess = len(self.contexts_by_config) - len(contexts_to_close) - max_contexts
-                contexts_to_close.extend(active_contexts[:remaining_excess])
+            # If force=True and we still have too many, close additional contexts
+            # but never close contexts with refcount > 0 (they may be in active use).
+            if force and len(self.contexts_by_config) - len(contexts_to_close) > max_contexts:
+                remaining_excess = len(self.contexts_by_config) - len(contexts_to_close) - max_contexts
+                # From active_contexts, only consider those whose refcount is 0 for forced closure
+                force_closable_active = [
+                    (sig, ctx)
+                    for sig, ctx in active_contexts
+                    if self._context_refcounts.get(sig, 0) == 0
+                ]
+                contexts_to_close.extend(force_closable_active[:remaining_excess])
-            # If force=True and we still have too many, close active ones too
-            if force and len(self.contexts_by_config) - len(contexts_to_close) > max_contexts:
-                remaining_excess = len(self.contexts_by_config) - len(contexts_to_close) - max_contexts
-                contexts_to_close.extend(active_contexts[:remaining_excess])
+            # If force=True and we still have too many, close additional contexts
+            # but never close contexts with refcount > 0 (they may be in active use).
+            if force and len(self.contexts_by_config) - len(contexts_to_close) > max_contexts:
+                remaining_excess = len(self.contexts_by_config) - len(contexts_to_close) - max_contexts
+                # From active_contexts, only consider those whose refcount is 0 for forced closure
+                force_closable_active = [
+                    (sig, ctx)
+                    for sig, ctx in active_contexts
+                    if self._context_refcounts.get(sig, 0) == 0
+                ]
+                contexts_to_close.extend(force_closable_active[:remaining_excess])
+
+            # Perform cleanup
+            for sig, ctx in contexts_to_close:
+                try:
+                    # If forcing and context has pages, close them first
+                    if force and hasattr(ctx, 'pages'):
+                        for page in list(ctx.pages):
+                            try:
+                                await page.close()
+                            except Exception:
+                                pass
-                            except Exception:
-                                pass
+                            except Exception as e:
+                                # Ignore individual page close failures but record them for diagnostics
+                                self.logger.warning(
+                                    message="Error closing page during context cleanup: {error}",
+                                    tag="WARNING",
+                                    params={"error": str(e)}
+                                )
-                            except Exception:
-                                pass
+                            except Exception as e:
+                                # Ignore individual page close failures but record them for diagnostics
+                                self.logger.warning(
+                                    message="Error closing page during context cleanup: {error}",
+                                    tag="WARNING",
+                                    params={"error": str(e)}
+                                )
+
+                    # Remove from our tracking dicts
+                    self.contexts_by_config.pop(sig, None)
+                    self._context_refcounts.pop(sig, None)
+
+                    # Close the context
+                    await ctx.close()
+
+                    self.logger.info(
+                        message="Cleaned up context: {sig}",
+                        tag="CLEANUP",
+                        params={"sig": sig[:8]}
+                    )
+                except Exception as e:
+                    # Still remove from tracking even if close fails
+                    self.contexts_by_config.pop(sig, None)
+                    self._context_refcounts.pop(sig, None)
+                    self.logger.warning(
+                        message="Error closing context during cleanup: {error}",
+                        tag="WARNING",
+                        params={"error": str(e)}
+                    )
+
+            return len(contexts_to_close)  # Return count of cleaned contexts
+
+    async def release_context(self, config_signature: str):
+        """
+        Decrement the reference count for a context after a crawl completes.
+        Call this when a crawl operation finishes (success or failure).
+
+        Args:
+            config_signature: The config signature of the context to release
+        """
+        async with self._contexts_lock:
+            if config_signature in self._context_refcounts:
+                self._context_refcounts[config_signature] = max(0, self._context_refcounts[config_signature] - 1)
+                self.logger.debug(
+                    message="Released context ref: {sig}, remaining refs: {refs}",
+                    tag="CLEANUP",
+                    params={"sig": config_signature[:8], "refs": self._context_refcounts[config_signature]}
+                )
+
     async def close(self):
         """Close all browser resources and clean up."""
         if self.config.cdp_url:
             return
-        
+
         if self.config.sleep_on_close:
             await asyncio.sleep(0.5)
 

diff --git a/deploy/docker/api.py b/deploy/docker/api.py
@@ -579,21 +579,33 @@ async def handle_crawl_request(
 
         results = []
         func = getattr(crawler, "arun" if len(urls) == 1 else "arun_many")
-        partial_func = partial(func, 
-                                urls[0] if len(urls) == 1 else urls, 
-                                config=crawler_config, 
+        partial_func = partial(func,
+                                urls[0] if len(urls) == 1 else urls,
+                                config=crawler_config,
                                 dispatcher=dispatcher)
         results = await partial_func()
-        
+
         # Ensure results is always a list
         if not isinstance(results, list):
             results = [results]
 
+        # Clean up idle browser contexts to prevent memory leaks
+        # Only closes contexts with no open pages (safe cleanup)
+        try:
+            if hasattr(crawler, 'crawler_strategy') and hasattr(crawler.crawler_strategy, 'browser_manager'):
+                bm = crawler.crawler_strategy.browser_manager
+                # Clean up idle contexts (keep at most 3 to allow some reuse)
+                cleaned_count = await bm.cleanup_contexts(max_contexts=3)
+                if cleaned_count > 0:
+                    logger.info(f"Browser cleanup: closed {cleaned_count} idle context(s)")
+        except Exception as e:
+            logger.warning(f"Browser context cleanup warning: {e}")
+
         # await crawler.close()
-        
+
         end_mem_mb = _get_memory_mb() # <--- Get memory after
         end_time = time.time()
-        
+
         if start_mem_mb is not None and end_mem_mb is not None:
             mem_delta_mb = end_mem_mb - start_mem_mb # <--- Calculate delta
             peak_mem_mb = max(peak_mem_mb if peak_mem_mb else 0, end_mem_mb) # <--- Get peak memory