diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index aa9312ec1..39748a7cf 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -19,6 +19,7 @@ from vulnerabilities.pipelines import flag_ghost_packages from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline from vulnerabilities.pipelines import remove_duplicate_advisories +from vulnerabilities.pipelines.v2_improvers import clamav_rules from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2 from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2 from vulnerabilities.pipelines.v2_improvers import ( @@ -70,5 +71,6 @@ compute_advisory_todo_v2.ComputeToDo, unfurl_version_range_v2.UnfurlVersionRangePipeline, compute_advisory_todo.ComputeToDo, + clamav_rules.ClamVRulesImproverPipeline, ] ) diff --git a/vulnerabilities/migrations/0104_advisorydetectionrule.py b/vulnerabilities/migrations/0104_advisorydetectionrule.py new file mode 100644 index 000000000..34f4b633c --- /dev/null +++ b/vulnerabilities/migrations/0104_advisorydetectionrule.py @@ -0,0 +1,59 @@ +# Generated by Django 4.2.25 on 2025-12-01 20:13 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0103_codecommit_impactedpackage_affecting_commits_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="AdvisoryDetectionRule", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "rule_text", + models.TextField( + help_text="Full text of the detection rule, script, or signature." + ), + ), + ( + "rule_type", + models.CharField( + blank=True, + choices=[ + ("yara", "YARA"), + ("sigma", "Sigma Detection Rule"), + ("clamav", "ClamAV Signature"), + ], + max_length=100, + ), + ), + ( + "source_url", + models.URLField( + blank=True, + help_text="URL or reference to the source of the rule (vendor feed, GitHub repo, etc.).", + null=True, + ), + ), + ( + "advisory", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="detection_rules", + to="vulnerabilities.advisoryv2", + ), + ), + ], + ), + ] diff --git a/vulnerabilities/migrations/0105_alter_advisorydetectionrule_advisory.py b/vulnerabilities/migrations/0105_alter_advisorydetectionrule_advisory.py new file mode 100644 index 000000000..b99a6f8d0 --- /dev/null +++ b/vulnerabilities/migrations/0105_alter_advisorydetectionrule_advisory.py @@ -0,0 +1,25 @@ +# Generated by Django 4.2.25 on 2025-12-01 21:52 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0104_advisorydetectionrule"), + ] + + operations = [ + migrations.AlterField( + model_name="advisorydetectionrule", + name="advisory", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="detection_rules", + to="vulnerabilities.advisoryv2", + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e1c4ddc6b..dde2faa24 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -3414,3 +3414,45 @@ class CodeCommit(models.Model): class Meta: unique_together = ("commit_hash", "vcs_url") + + +class DetectionRuleTypes(models.TextChoices): + """Defines the supported formats for security detection rules.""" + + YARA = "yara", "Yara" + YARA_X = "yara-x", "Yara-X" + SIGMA = "sigma", "Sigma" + CLAMAV = "clamav", "CLAMAV" + SURICATA = "suricata", "Suricata" + + +class DetectionRule(models.Model): + """ + A Detection Rule is code used to identify malicious activity or security threats. + """ + + rule_type = models.CharField( + max_length=50, + choices=DetectionRuleTypes.choices, + help_text="The type of the detection rule content (e.g., YARA, Sigma).", + ) + + source_url = models.URLField( + max_length=1024, help_text="URL to the original source or reference for this rule." + ) + + rule_metadata = models.JSONField( + null=True, + blank=True, + help_text="Additional structured data such as tags, or author information.", + ) + + rule_text = models.TextField(help_text="The content of the detection signature.") + + advisory = models.ForeignKey( + AdvisoryV2, + related_name="detection_rules", + on_delete=models.SET_NULL, + null=True, + blank=True, + ) diff --git a/vulnerabilities/pipelines/v2_improvers/clamav_rules.py b/vulnerabilities/pipelines/v2_improvers/clamav_rules.py new file mode 100644 index 000000000..2ec1d6bbc --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/clamav_rules.py @@ -0,0 +1,201 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import gzip +import io +import os +import shutil +import tarfile +import tempfile +from pathlib import Path +from typing import List + +import requests + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import DetectionRule +from vulnerabilities.models import DetectionRuleTypes +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import find_all_cve + + +def extract_cvd(cvd_path, output_dir): + """ + Extract a CVD file. CVD format: 512-byte header + gzipped tar archive and returns Path to output directory + """ + output_path = Path(output_dir) + output_path.mkdir(parents=True, exist_ok=True) + + with open(cvd_path, "rb") as f: + f.seek(512) # Skip header + compressed_data = f.read() + + decompressed_data = gzip.decompress(compressed_data) + tar_buffer = io.BytesIO(decompressed_data) + + with tarfile.open(fileobj=tar_buffer, mode="r:") as tar: + tar.extractall(path=output_path) + + for file in output_path.rglob("*"): + if file.is_file(): + file.chmod(0o644) # rw-r--r-- + return output_path + + +def parse_ndb_file(ndb_path: Path) -> List[dict]: + """Parse a .ndb file (extended signatures). Return list of dicts.""" + signatures = [] + with ndb_path.open("r", encoding="utf-8", errors="ignore") as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line or line.startswith("#"): + continue + + parts = line.split(":") + if len(parts) >= 4: + signatures.append( + { + "name": parts[0], + "target_type": parts[1], + "offset": parts[2], + "hex_signature": parts[3], + "line_num": line_num, + } + ) + return signatures + + +def parse_hdb_file(hdb_path: Path) -> List[dict]: + """Parse a .hdb file (MD5 hash signatures). Return list of dicts.""" + signatures = [] + with hdb_path.open("r", encoding="utf-8", errors="ignore") as f: + for line_num, line in enumerate(f, 1): + line = line.strip() + if not line or line.startswith("#"): + continue + + parts = line.split(":") + if len(parts) >= 3: + signatures.append( + { + "hash": parts[0], + "file_size": parts[1], + "name": parts[2], + "line_num": line_num, + } + ) + return signatures + + +def extract_cve_id(name: str): + """Normalize underscores and extract the first CVE ID from a string, or None.""" + normalized = name.replace("_", "-") + cves = [cve.upper() for cve in find_all_cve(normalized)] + return cves[0] if cves else None + + +class ClamVRulesImproverPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Pipeline that downloads ClamAV database (main.cvd), extracts signatures, + parses .ndb and .hdb files and save a detection rules. + """ + + pipeline_id = "clamv_rules" + MAIN_DATABASE_URL = "https://database.clamav.net/main.cvd" + license_url = "https://github.com/Cisco-Talos/clamav/blob/c73755d3fc130b0c60ccf4e8f8d28c62fc58c95b/README.md#licensing" + license_expression = "GNU GENERAL PUBLIC LICENSE" + + @classmethod + def steps(cls): + return ( + cls.download_database, + cls.extract_database, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def download_database(self): + """Download ClamAV database using the supported API with proper headers.""" + + self.log("Downloading ClamAV database…") + self.db_dir = Path(tempfile.mkdtemp()) / "clamav_db" + self.db_dir.mkdir(parents=True, exist_ok=True) + + database_url = "https://database.clamav.net/main.cvd?api-version=1" + headers = { + "User-Agent": "ClamAV-Client/1.0 (https://github.com/yourproject)", + "Accept": "*/*", + } + + filename = self.db_dir / "main.cvd" + self.log(f"Downloading {database_url} → {filename}") + + resp = requests.get(database_url, headers=headers, stream=True, timeout=30) + resp.raise_for_status() + + with filename.open("wb") as f: + for chunk in resp.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + + self.log("ClamAV DB file downloaded successfully.") + + def extract_database(self): + """Extract the downloaded CVD into a directory""" + out_dir = self.db_dir / "extracted" + self.extract_cvd_dir = extract_cvd(self.db_dir / "main.cvd", out_dir) + self.log(f"Extracted CVD to {self.extract_cvd_dir}") + + def collect_and_store_advisories(self): + """Parse .ndb and .hdb files and store rules in the DB.""" + + for rule_entry in parse_hdb_file(self.extract_cvd_dir / "main.hdb") + parse_ndb_file( + self.extract_cvd_dir / "main.ndb" + ): + name = rule_entry.get("name", "") + cve_id = extract_cve_id(name) + found_advisories = set() + + if cve_id: + try: + if alias := AdvisoryAlias.objects.get(alias=cve_id): + for adv in alias.advisories.all(): + found_advisories.add(adv) + except AdvisoryAlias.DoesNotExist: + self.log(f"Advisory {cve_id} not found.") + + for adv in found_advisories: + DetectionRule.objects.update_or_create( + rule_text=str(rule_entry), + rule_type=DetectionRuleTypes.CLAMAV, + advisory=adv, + defaults={ + "source_url": self.MAIN_DATABASE_URL, + }, + ) + + if not found_advisories: + DetectionRule.objects.update_or_create( + rule_text=str(rule_entry), + rule_type=DetectionRuleTypes.CLAMAV, + advisory=None, + defaults={ + "source_url": self.MAIN_DATABASE_URL, + }, + ) + + def clean_downloads(self): + """Clean up downloaded files.""" + if getattr(self, "db_dir", None) and os.path.exists(self.db_dir): + shutil.rmtree(self.db_dir, ignore_errors=True) + self.log("Cleaned up downloaded files.") + + def on_failure(self): + """Ensure cleanup on failure.""" + self.clean_downloads() diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_clamv_rules.py b/vulnerabilities/tests/pipelines/v2_improvers/test_clamv_rules.py new file mode 100644 index 000000000..61ea1e1f3 --- /dev/null +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_clamv_rules.py @@ -0,0 +1,151 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from datetime import datetime +from pathlib import Path +from unittest import mock +from unittest.mock import MagicMock + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import DetectionRule +from vulnerabilities.pipelines.v2_improvers.clamav_rules import ClamVRulesImproverPipeline + +BASE_DIR = Path(__file__).resolve().parent +TEST_REPO_DIR = (BASE_DIR / "../../test_data/clamav").resolve() + + +@pytest.mark.django_db +@mock.patch("vulnerabilities.pipelines.v2_improvers.clamav_rules.extract_cvd") +@mock.patch("vulnerabilities.pipelines.v2_improvers.clamav_rules.requests.get") +def test_clamav_rules_db_improver(mock_requests_get, mock_extract_cvd): + mock_resp = MagicMock() + mock_resp.iter_content.return_value = [b"fake data"] + mock_resp.raise_for_status.return_value = None + mock_requests_get.return_value = mock_resp + + mock_extract_cvd.return_value = TEST_REPO_DIR + + adv1 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-0001", + datasource_id="ds", + avid="ds/VCIO-123-0001", + unique_content_id="sgsdg45", + url="https://test.com", + date_collected=datetime.now(), + ) + adv2 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-1002", + datasource_id="ds", + avid="ds/VCIO-123-1002", + unique_content_id="6hd4d6f", + url="https://test.com", + date_collected=datetime.now(), + ) + adv3 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-1003", + datasource_id="ds", + avid="ds/VCIO-123-1003", + unique_content_id="sd6h4sh", + url="https://test.com", + date_collected=datetime.now(), + ) + + alias1 = AdvisoryAlias.objects.create(alias="CVE-2019-1199") + alias2 = AdvisoryAlias.objects.create(alias="CVE-2020-0720") + alias3 = AdvisoryAlias.objects.create(alias="CVE-2020-0722") + + adv1.aliases.add(alias1) + adv2.aliases.add(alias2) + adv3.aliases.add(alias3) + + improver = ClamVRulesImproverPipeline() + improver.execute() + + assert DetectionRule.objects.count() == 14 + assert DetectionRule.objects.get(advisory=adv1) + assert DetectionRule.objects.get(advisory=adv2) + assert DetectionRule.objects.get(advisory=adv3) + assert [ + (detection_rule.rule_type, detection_rule.rule_text, detection_rule.source_url) + for detection_rule in DetectionRule.objects.all() + ] == [ + ( + "clamav", + "{'hash': 'af9a2ce339b3a314cd8ce31f4e2489a5', 'file_size': '149420', 'name': 'Archive.Malware.Agent-7116646-0', 'line_num': 1}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'hash': 'ab51de8588946f1332d53dd53bac8056', 'file_size': '48580', 'name': 'Html.Malware.Agent-7116647-0', 'line_num': 2}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'hash': '3f70569ac131833698c3d1c20e0123ca', 'file_size': '676', 'name': 'Html.Malware.Agent-7116648-0', 'line_num': 3}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'hash': 'df6634d021a6df4d17f005e507beac88', 'file_size': '6268', 'name': 'Win.Exploit.CVE_2019_1199-7116649-2', 'line_num': 4}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'hash': '27ebcd8c72e6e3c7f4a64dc68b95dd8a', 'file_size': '173248', 'name': 'Html.Malware.Agent-7116650-0', 'line_num': 5}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'hash': '8745d432f7027e65178e92b2239bef25', 'file_size': '384634', 'name': 'Archive.Malware.Agent-7116651-0', 'line_num': 6}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'hash': '63d1a25066c121253febc907850b1852', 'file_size': '50185', 'name': 'Html.Malware.Agent-7116652-0', 'line_num': 7}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'hash': '92233ed6889cd0ba7bf632e3f45fc950', 'file_size': '97134', 'name': 'Html.Malware.Agent-7116653-0', 'line_num': 8}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'name': 'Win.Exploit.CVE_2020_0720-7578647-1', 'target_type': '1', 'offset': '*', 'hex_signature': '240C1400000068E8214000660F1344241C897C2414C744241805000000E80EFEFFFF83C4048D44240C50FF1544204000', 'line_num': 1}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'name': 'Win.Exploit.CVE_2020_0731-7583553-0', 'target_type': '1', 'offset': '*', 'hex_signature': '83C4088B55F0526AF48B45FC50FF15D4C146008945E46A006A006A108B4D', 'line_num': 2}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'name': 'Win.Exploit.CVE_2020_0722-7583689-1', 'target_type': '1', 'offset': '*', 'hex_signature': '488B555033C9FF15A1F100004889057AB10000488B0D73B10000FF1575F10000EB86', 'line_num': 3}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'name': 'Win.Ransomware.MailTo-7586723-0', 'target_type': '1', 'offset': '*', 'hex_signature': '496e746572666163345c7b62313936623238372d626162342d313031612d623639632d3030616130303334316430377d*4c616d616e74696e652e537469636b7950617373776f7264', 'line_num': 4}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'name': 'Win.Trojan.Emotet-7587729-1', 'target_type': '1', 'offset': '*', 'hex_signature': '565053e801000000cc5889c3402d00e016002dacb00b1005a3b00b10803bcc7519c60300bb00100000682ece177a680f9067565350e80a00000083c000894424085b58c35589e5505351568b75088b4d0cc1e9028b45108b5d1485c9740a3106011e83c60449ebf25e595b58c9c21000', 'line_num': 5}", + "https://database.clamav.net/main.cvd", + ), + ( + "clamav", + "{'name': 'Win.Trojan.Hoplight-7587747-0', 'target_type': '1', 'offset': '*', 'hex_signature': '4e6574776f726b20554450205472616365204d616e6167656d656e742053657276696365*6d646e6574757365*554450547263537663', 'line_num': 6}", + "https://database.clamav.net/main.cvd", + ), + ] diff --git a/vulnerabilities/tests/test_data/clamav/main.hdb b/vulnerabilities/tests/test_data/clamav/main.hdb new file mode 100644 index 000000000..0a96c142e --- /dev/null +++ b/vulnerabilities/tests/test_data/clamav/main.hdb @@ -0,0 +1,8 @@ +af9a2ce339b3a314cd8ce31f4e2489a5:149420:Archive.Malware.Agent-7116646-0:73 +ab51de8588946f1332d53dd53bac8056:48580:Html.Malware.Agent-7116647-0:73 +3f70569ac131833698c3d1c20e0123ca:676:Html.Malware.Agent-7116648-0:73 +df6634d021a6df4d17f005e507beac88:6268:Win.Exploit.CVE_2019_1199-7116649-2:73 +27ebcd8c72e6e3c7f4a64dc68b95dd8a:173248:Html.Malware.Agent-7116650-0:73 +8745d432f7027e65178e92b2239bef25:384634:Archive.Malware.Agent-7116651-0:73 +63d1a25066c121253febc907850b1852:50185:Html.Malware.Agent-7116652-0:73 +92233ed6889cd0ba7bf632e3f45fc950:97134:Html.Malware.Agent-7116653-0:73 \ No newline at end of file diff --git a/vulnerabilities/tests/test_data/clamav/main.ndb b/vulnerabilities/tests/test_data/clamav/main.ndb new file mode 100644 index 000000000..1c2c77646 --- /dev/null +++ b/vulnerabilities/tests/test_data/clamav/main.ndb @@ -0,0 +1,6 @@ +Win.Exploit.CVE_2020_0720-7578647-1:1:*:240C1400000068E8214000660F1344241C897C2414C744241805000000E80EFEFFFF83C4048D44240C50FF1544204000 +Win.Exploit.CVE_2020_0731-7583553-0:1:*:83C4088B55F0526AF48B45FC50FF15D4C146008945E46A006A006A108B4D +Win.Exploit.CVE_2020_0722-7583689-1:1:*:488B555033C9FF15A1F100004889057AB10000488B0D73B10000FF1575F10000EB86 +Win.Ransomware.MailTo-7586723-0:1:*:496e746572666163345c7b62313936623238372d626162342d313031612d623639632d3030616130303334316430377d*4c616d616e74696e652e537469636b7950617373776f7264 +Win.Trojan.Emotet-7587729-1:1:*:565053e801000000cc5889c3402d00e016002dacb00b1005a3b00b10803bcc7519c60300bb00100000682ece177a680f9067565350e80a00000083c000894424085b58c35589e5505351568b75088b4d0cc1e9028b45108b5d1485c9740a3106011e83c60449ebf25e595b58c9c21000 +Win.Trojan.Hoplight-7587747-0:1:*:4e6574776f726b20554450205472616365204d616e6167656d656e742053657276696365*6d646e6574757365*554450547263537663 \ No newline at end of file