diff --git a/docs/user/builds.rst b/docs/user/builds.rst index 0b98ee7b2eb..318067f1a50 100644 --- a/docs/user/builds.rst +++ b/docs/user/builds.rst @@ -112,6 +112,25 @@ Read the Docs supports three different mechanisms to cancel a running build: Take a look at :ref:`build-customization:cancel build based on a condition` section for some examples. +Automatic disabling of builds +----------------------------- + +To reduce resource consumption and improve build queue times for all users, +Read the Docs will automatically disable builds for projects that have too many consecutive failed builds on their default version. + +When a project has **25 consecutive failed builds** on its default version, +we will disable builds for the project. + +This helps ensure that projects with persistent build issues don't consume resources that could be used by active projects. + +.. note:: + + This only applies to the default version of a project. + Builds on other versions (branches, tags, pull requests) are not counted towards this limit. + +If your project has been disabled due to consecutive build failures, you'll need to re-enable from your project settings. +Make sure to fix the underlying issue to avoid being disabled again. + Build resources --------------- diff --git a/readthedocs/builds/signals_receivers.py b/readthedocs/builds/signals_receivers.py index 0f9da2073c7..ddb9236c657 100644 --- a/readthedocs/builds/signals_receivers.py +++ b/readthedocs/builds/signals_receivers.py @@ -4,6 +4,7 @@ NOTE: Done in a separate file to avoid circular imports. """ +import structlog from django.db.models.signals import post_save from django.dispatch import receiver @@ -11,6 +12,9 @@ from readthedocs.projects.models import Project +log = structlog.get_logger(__name__) + + @receiver(post_save, sender=Build) def update_latest_build_for_project(sender, instance, created, **kwargs): """When a build is created, update the latest build for the project.""" diff --git a/readthedocs/builds/tasks.py b/readthedocs/builds/tasks.py index f23c8f2aa58..961ed050f68 100644 --- a/readthedocs/builds/tasks.py +++ b/readthedocs/builds/tasks.py @@ -667,3 +667,64 @@ def send_webhook(self, webhook): webhook_id=webhook.id, webhook_url=webhook.url, ) + + +@app.task(queue="web") +def check_and_disable_project_for_consecutive_failed_builds(project_slug, version_slug): + """ + Check if a project has too many consecutive failed builds and disable it. + + When a project has more than RTD_BUILDS_MAX_CONSECUTIVE_FAILURES consecutive failed builds + on the default version, we attach a notification to the project and disable builds (skip=True). + This helps reduce resource consumption from projects that are not being monitored. + """ + from readthedocs.builds.constants import BUILD_STATE_FINISHED + from readthedocs.projects.notifications import ( + MESSAGE_PROJECT_BUILDS_DISABLED_DUE_TO_CONSECUTIVE_FAILURES, + ) + + try: + project = Project.objects.get(slug=project_slug) + except Project.DoesNotExist: + return + + # Only check for the default version + if version_slug != project.get_default_version(): + return + + # Skip if the project is already disabled + if project.skip or project.n_consecutive_failed_builds: + return + + # Count consecutive failed builds on the default version + builds = list( + Build.objects.filter( + project=project, + version_slug=version_slug, + state=BUILD_STATE_FINISHED, + ) + .order_by("-date") + .values_list("success", flat=True)[: settings.RTD_BUILDS_MAX_CONSECUTIVE_FAILURES] + ) + if not any(builds) and len(builds) >= settings.RTD_BUILDS_MAX_CONSECUTIVE_FAILURES: + consecutive_failed_builds = builds.count(False) + log.info( + "Disabling project due to consecutive failed builds.", + project_slug=project.slug, + version_slug=version_slug, + consecutive_failed_builds=consecutive_failed_builds, + ) + + # Disable the project + project.n_consecutive_failed_builds = True + project.save() + + # Attach notification to the project + Notification.objects.add( + message_id=MESSAGE_PROJECT_BUILDS_DISABLED_DUE_TO_CONSECUTIVE_FAILURES, + attached_to=project, + dismissable=False, + format_values={ + "consecutive_failed_builds": consecutive_failed_builds, + }, + ) diff --git a/readthedocs/builds/tests/test_tasks.py b/readthedocs/builds/tests/test_tasks.py index 2f09c8afb8a..ee3f08995ec 100644 --- a/readthedocs/builds/tests/test_tasks.py +++ b/readthedocs/builds/tests/test_tasks.py @@ -2,6 +2,7 @@ from textwrap import dedent from unittest import mock +from django.conf import settings from django.contrib.auth.models import User from django.test import TestCase, override_settings from django.utils import timezone @@ -19,10 +20,12 @@ from readthedocs.builds.models import Build, BuildCommandResult, Version from readthedocs.builds.tasks import ( archive_builds_task, + check_and_disable_project_for_consecutive_failed_builds, delete_closed_external_versions, post_build_overview, ) from readthedocs.filetreediff.dataclasses import FileTreeDiff, FileTreeDiffFileStatus +from readthedocs.notifications.models import Notification from readthedocs.oauth.constants import GITHUB_APP from readthedocs.oauth.models import ( GitHubAccountType, @@ -31,6 +34,9 @@ ) from readthedocs.oauth.services import GitHubAppService from readthedocs.projects.models import Project +from readthedocs.projects.notifications import ( + MESSAGE_PROJECT_BUILDS_DISABLED_DUE_TO_CONSECUTIVE_FAILURES, +) class TestTasks(TestCase): @@ -124,6 +130,76 @@ def test_archive_builds(self, build_commands_storage): self.assertEqual(Build.objects.filter(cold_storage=True).count(), 5) self.assertEqual(BuildCommandResult.objects.count(), 50) + def _create_builds(self, project, version, count, success=False): + """Helper to create a series of builds.""" + builds = [] + for _ in range(count): + build = get( + Build, + project=project, + version=version, + success=success, + state=BUILD_STATE_FINISHED, + ) + builds.append(build) + return builds + + @override_settings(RTD_BUILDS_MAX_CONSECUTIVE_FAILURES=50) + def test_task_disables_project_at_max_consecutive_failed_builds(self): + """Test that the project is disabled at the failure threshold.""" + project = get(Project, slug="test-project", n_consecutive_failed_builds=False) + version = project.versions.get(slug=LATEST) + version.active = True + version.save() + + # Create failures at the threshold + self._create_builds(project, version, settings.RTD_BUILDS_MAX_CONSECUTIVE_FAILURES + 1, success=False) + + # Call the Celery task directly + check_and_disable_project_for_consecutive_failed_builds( + project_slug=project.slug, + version_slug=version.slug, + ) + + project.refresh_from_db() + self.assertTrue(project.n_consecutive_failed_builds) + + # Verify notification was added + notification = Notification.objects.filter( + message_id=MESSAGE_PROJECT_BUILDS_DISABLED_DUE_TO_CONSECUTIVE_FAILURES + ).first() + self.assertIsNotNone(notification) + self.assertEqual(notification.attached_to, project) + + @override_settings(RTD_BUILDS_MAX_CONSECUTIVE_FAILURES=50) + def test_task_does_not_disable_project_with_successful_build(self): + """Test that the project is NOT disabled when there's at least one successful build.""" + project = get(Project, slug="test-project-success", n_consecutive_failed_builds=False) + version = project.versions.get(slug=LATEST) + version.active = True + version.save() + + # Create failures below the threshold with one successful build + self._create_builds(project, version, settings.RTD_BUILDS_MAX_CONSECUTIVE_FAILURES - 1, success=False) + self._create_builds(project, version, 1, success=True) # One successful build + self._create_builds(project, version, 1, success=False) # One more failure + + # Call the Celery task directly + check_and_disable_project_for_consecutive_failed_builds( + project_slug=project.slug, + version_slug=version.slug, + ) + + project.refresh_from_db() + self.assertFalse(project.n_consecutive_failed_builds) + + # Verify notification was NOT added + self.assertFalse( + Notification.objects.filter( + message_id=MESSAGE_PROJECT_BUILDS_DISABLED_DUE_TO_CONSECUTIVE_FAILURES, + ).exists() + ) + @override_settings( PRODUCTION_DOMAIN="readthedocs.org", diff --git a/readthedocs/notifications/signals.py b/readthedocs/notifications/signals.py index d6e1c6e4078..50461760f0c 100644 --- a/readthedocs/notifications/signals.py +++ b/readthedocs/notifications/signals.py @@ -9,6 +9,9 @@ from readthedocs.notifications.models import Notification from readthedocs.organizations.models import Organization from readthedocs.projects.models import Project +from readthedocs.projects.notifications import ( + MESSAGE_PROJECT_BUILDS_DISABLED_DUE_TO_CONSECUTIVE_FAILURES, +) from readthedocs.projects.notifications import MESSAGE_PROJECT_SKIP_BUILDS from readthedocs.subscriptions.notifications import MESSAGE_ORGANIZATION_DISABLED @@ -32,6 +35,16 @@ def project_skip_builds(instance, *args, **kwargs): ) +@receiver(post_save, sender=Project) +def project_n_consecutive_failed_builds(instance, *args, **kwargs): + """Check if the project has not N+ consecutive failed builds anymore and cancel the notification.""" + if not instance.n_consecutive_failed_builds: + Notification.objects.cancel( + message_id=MESSAGE_PROJECT_BUILDS_DISABLED_DUE_TO_CONSECUTIVE_FAILURES, + attached_to=instance, + ) + + @receiver(post_save, sender=Organization) def organization_disabled(instance, *args, **kwargs): """Check if the organization is ``disabled`` and add/cancel the notification.""" diff --git a/readthedocs/projects/forms.py b/readthedocs/projects/forms.py index 2637e56007f..336ef803690 100644 --- a/readthedocs/projects/forms.py +++ b/readthedocs/projects/forms.py @@ -435,6 +435,7 @@ class Meta: "default_branch", "readthedocs_yaml_path", "search_indexing_enabled", + "n_consecutive_failed_builds", # Meta data "programming_language", "project_url", @@ -478,6 +479,11 @@ def __init__(self, *args, **kwargs): if self.instance.search_indexing_enabled: self.fields.pop("search_indexing_enabled") + # Only show this field if building for this project is disabled due to N+ consecutive builds failing + # We allow disabling it from the form, but not enabling it. + if not self.instance.n_consecutive_failed_builds: + self.fields.pop("n_consecutive_failed_builds") + # NOTE: we are deprecating this feature. # However, we will keep it available for projects that already using it. # Old projects not using it already or new projects won't be able to enable. diff --git a/readthedocs/projects/migrations/0157_disable_builds_after_n_failed.py b/readthedocs/projects/migrations/0157_disable_builds_after_n_failed.py new file mode 100644 index 00000000000..01e85661694 --- /dev/null +++ b/readthedocs/projects/migrations/0157_disable_builds_after_n_failed.py @@ -0,0 +1,36 @@ +# Generated by Django 5.2.7 on 2025-12-02 09:19 + +from django.db import migrations +from django.db import models +from django_safemigrate import Safe + + +class Migration(migrations.Migration): + safe = Safe.before_deploy() + + dependencies = [ + ("projects", "0156_project_search_indexing_enabled"), + ] + + operations = [ + migrations.AddField( + model_name="historicalproject", + name="n_consecutive_failed_builds", + field=models.BooleanField( + db_default=False, + default=False, + help_text="Builds on this project were automatically disabled due to many consecutive failures. Uncheck this field to re-enable building.", + verbose_name="Disable builds for this project", + ), + ), + migrations.AddField( + model_name="project", + name="n_consecutive_failed_builds", + field=models.BooleanField( + db_default=False, + default=False, + help_text="Builds on this project were automatically disabled due to many consecutive failures. Uncheck this field to re-enable building.", + verbose_name="Disable builds for this project", + ), + ), + ] diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 219048aeaa3..57e21f9d782 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -524,6 +524,14 @@ class Project(models.Model): featured = models.BooleanField(_("Featured"), default=False) skip = models.BooleanField(_("Skip (disable) building this project"), default=False) + n_consecutive_failed_builds = models.BooleanField( + _("Disable builds for this project"), + default=False, + db_default=False, + help_text=_( + "Builds on this project were automatically disabled due to many consecutive failures. Uncheck this field to re-enable building." + ), + ) # null=True can be removed in a later migration # be careful if adding new queries on this, .filter(delisted=False) does not work diff --git a/readthedocs/projects/notifications.py b/readthedocs/projects/notifications.py index d4e8950bad9..0f0d972bead 100644 --- a/readthedocs/projects/notifications.py +++ b/readthedocs/projects/notifications.py @@ -20,6 +20,9 @@ MESSAGE_PROJECT_SSH_KEY_WITH_WRITE_ACCESS = "project:ssh-key-with-write-access" MESSAGE_PROJECT_DEPRECATED_WEBHOOK = "project:webhooks:deprecated" MESSAGE_PROJECT_SEARCH_INDEXING_DISABLED = "project:search:indexing-disabled" +MESSAGE_PROJECT_BUILDS_DISABLED_DUE_TO_CONSECUTIVE_FAILURES = ( + "project:builds:disabled-due-to-consecutive-failures" +) messages = [ Message( @@ -223,5 +226,18 @@ ), type=INFO, ), + Message( + id=MESSAGE_PROJECT_BUILDS_DISABLED_DUE_TO_CONSECUTIVE_FAILURES, + header=_("Builds disabled due to consecutive failures"), + body=_( + textwrap.dedent( + """ + Your project has been automatically disabled because the default version has failed to build {{consecutive_failed_builds}} times in a row. + Please fix the build issues and re-enable builds by unchecking "Disable builds for this project" option from the project settings. + """ + ).strip(), + ), + type=WARNING, + ), ] registry.add(messages) diff --git a/readthedocs/projects/querysets.py b/readthedocs/projects/querysets.py index 8b9a2883dd1..f54595774c6 100644 --- a/readthedocs/projects/querysets.py +++ b/readthedocs/projects/querysets.py @@ -95,6 +95,7 @@ def is_active(self, project): if ( project.skip + or project.n_consecutive_failed_builds or any_owner_banned or (organization and organization.disabled) or spam_project diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index 0735f018c6e..216c815a4d5 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -42,6 +42,7 @@ from readthedocs.builds.models import APIVersion from readthedocs.builds.models import Build from readthedocs.builds.signals import build_complete +from readthedocs.builds.tasks import check_and_disable_project_for_consecutive_failed_builds from readthedocs.builds.utils import memcache_lock from readthedocs.config.config import BuildConfigV2 from readthedocs.config.exceptions import ConfigError @@ -567,6 +568,13 @@ def on_failure(self, exc, task_id, args, kwargs, einfo): status=status, ) + # Trigger task to check number of failed builds and disable the project if needed (only for community) + if not settings.ALLOW_PRIVATE_REPOS: + check_and_disable_project_for_consecutive_failed_builds.delay( + project_slug=self.data.project.slug, + version_slug=self.data.version.slug, + ) + # Update build object self.data.build["success"] = False diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index 2c8eba8f030..8197ae22fc6 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -176,6 +176,7 @@ def RTD_RESTRICTED_DOMAINS(self): RTD_MAX_CONCURRENT_BUILDS = 4 RTD_BUILDS_MAX_RETRIES = 25 RTD_BUILDS_RETRY_DELAY = 5 * 60 # seconds + RTD_BUILDS_MAX_CONSECUTIVE_FAILURES = 25 # The project is disabled when hitting this limit on the default version RTD_BUILD_STATUS_API_NAME = "docs/readthedocs" RTD_ANALYTICS_DEFAULT_RETENTION_DAYS = 30 * 3 RTD_AUDITLOGS_DEFAULT_RETENTION_DAYS = 30 * 3