From c7395947d6362aa99d89c5c047d12c5f5a3e3378 Mon Sep 17 00:00:00 2001 From: Chai Tadmor Date: Tue, 25 Nov 2025 16:00:22 +0200 Subject: [PATCH 1/5] Root data source --- docs/data.md | 2 ++ source.yaml | 15 +++++++++++++++ source_test.yaml | 15 +++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/docs/data.md b/docs/data.md index e74d810cee9..a294e4da28f 100644 --- a/docs/data.md +++ b/docs/data.md @@ -56,6 +56,8 @@ The following ecosystems have vulnerabilities encoded in this format: ([Apache 2.0](https://github.com/bitnami/vulndb/blob/main/LICENSE.md)) - [Haskell Security Advisory DB](https://github.com/haskell/security-advisories) ([CC0 1.0](https://github.com/haskell/security-advisories/blob/main/LICENSE.txt)) +- [Root](https://api.root.io/external/osv/all.json) + (License TBD) - [Ubuntu](https://github.com/canonical/ubuntu-security-notices) ([CC-BY-SA 4.0](https://github.com/canonical/ubuntu-security-notices/blob/main/LICENSE)) diff --git a/source.yaml b/source.yaml index 9131af96fe3..86412039dab 100644 --- a/source.yaml +++ b/source.yaml @@ -101,6 +101,21 @@ editable: False strict_validation: False +- name: 'root' + versions_from_repo: False + type: 2 + rest_api_url: 'https://api.root.io/external/osv/all.json' + ignore_patterns: ['^(?!ROOT-).*$'] + directory_path: 'osv' + detect_cherrypicks: False + extension: '.json' + db_prefix: ['ROOT-'] + ignore_git: True + human_link: 'https://root.io/security/{{ BUG_ID }}' + link: 'https://api.root.io/external/osv/' + editable: False + strict_validation: True + - name: 'chainguard' versions_from_repo: False rest_api_url: 'https://packages.cgr.dev/chainguard/osv/all.json' diff --git a/source_test.yaml b/source_test.yaml index dfc162ef2f5..e3070a3596e 100644 --- a/source_test.yaml +++ b/source_test.yaml @@ -101,6 +101,21 @@ editable: False strict_validation: True +- name: 'root' + versions_from_repo: False + type: 2 + rest_api_url: 'https://api.root.io/external/osv/all.json' + ignore_patterns: ['^(?!ROOT-).*$'] + directory_path: 'osv' + detect_cherrypicks: False + extension: '.json' + db_prefix: ['ROOT-'] + ignore_git: True + human_link: 'https://root.io/security/{{ BUG_ID }}' + link: 'https://api.root.io/external/osv/' + editable: False + strict_validation: True + - name: 'chainguard' versions_from_repo: False rest_api_url: 'https://packages.cgr.dev/chainguard/osv/all.json' From fcfb1c2ef60f3700f32c38a0e21da5a3d999944a Mon Sep 17 00:00:00 2001 From: Chai Tadmor Date: Tue, 23 Dec 2025 17:55:55 +0200 Subject: [PATCH 2/5] Revert source.yaml and source_test.yaml changes These will be added in a separate PR after the code changes are merged. --- docs/data.md | 2 -- source.yaml | 15 --------------- source_test.yaml | 15 --------------- 3 files changed, 32 deletions(-) diff --git a/docs/data.md b/docs/data.md index a294e4da28f..e74d810cee9 100644 --- a/docs/data.md +++ b/docs/data.md @@ -56,8 +56,6 @@ The following ecosystems have vulnerabilities encoded in this format: ([Apache 2.0](https://github.com/bitnami/vulndb/blob/main/LICENSE.md)) - [Haskell Security Advisory DB](https://github.com/haskell/security-advisories) ([CC0 1.0](https://github.com/haskell/security-advisories/blob/main/LICENSE.txt)) -- [Root](https://api.root.io/external/osv/all.json) - (License TBD) - [Ubuntu](https://github.com/canonical/ubuntu-security-notices) ([CC-BY-SA 4.0](https://github.com/canonical/ubuntu-security-notices/blob/main/LICENSE)) diff --git a/source.yaml b/source.yaml index 86412039dab..9131af96fe3 100644 --- a/source.yaml +++ b/source.yaml @@ -101,21 +101,6 @@ editable: False strict_validation: False -- name: 'root' - versions_from_repo: False - type: 2 - rest_api_url: 'https://api.root.io/external/osv/all.json' - ignore_patterns: ['^(?!ROOT-).*$'] - directory_path: 'osv' - detect_cherrypicks: False - extension: '.json' - db_prefix: ['ROOT-'] - ignore_git: True - human_link: 'https://root.io/security/{{ BUG_ID }}' - link: 'https://api.root.io/external/osv/' - editable: False - strict_validation: True - - name: 'chainguard' versions_from_repo: False rest_api_url: 'https://packages.cgr.dev/chainguard/osv/all.json' diff --git a/source_test.yaml b/source_test.yaml index e3070a3596e..dfc162ef2f5 100644 --- a/source_test.yaml +++ b/source_test.yaml @@ -101,21 +101,6 @@ editable: False strict_validation: True -- name: 'root' - versions_from_repo: False - type: 2 - rest_api_url: 'https://api.root.io/external/osv/all.json' - ignore_patterns: ['^(?!ROOT-).*$'] - directory_path: 'osv' - detect_cherrypicks: False - extension: '.json' - db_prefix: ['ROOT-'] - ignore_git: True - human_link: 'https://root.io/security/{{ BUG_ID }}' - link: 'https://api.root.io/external/osv/' - editable: False - strict_validation: True - - name: 'chainguard' versions_from_repo: False rest_api_url: 'https://packages.cgr.dev/chainguard/osv/all.json' From 9ef7d6ec617d31b0315dc07ad96978d10003f0f9 Mon Sep 17 00:00:00 2001 From: Chai Tadmor Date: Wed, 7 Jan 2026 23:35:52 +0200 Subject: [PATCH 3/5] fix(root): resolve version comparison crashes and PURL collisions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes three critical issues with Root.io data source integration: 1. **Version Comparison Crashes (TypeError)** - Root versions mixed int and str types in comparison tuples - Example: '22.12.0-2.root.io.1' vs '22.12.0-2+deb12u1.root.io.5' - Solution: Reuse ecosystem-specific parsers (AlpineLinuxVersion, DebianVersion, packaging_legacy, MavenVersion) - Use suffix-based ecosystem detection (:Alpine:3.18 → Alpine) - Explicit error handling with ValueError for invalid versions - No fallback try/catch to let errors propagate properly 2. **PURL Collisions** - All Root packages mapped to pkg:generic/root/ causing collisions - Example: Both Root:Alpine:3.18/curl and Root:Debian:12/curl generated identical pkg:generic/root/curl - Solution: Add sub-ecosystem-specific PURL mappings: * Root:Alpine → pkg:apk/root-alpine/ * Root:Debian → pkg:deb/root-debian/ * Root:Ubuntu → pkg:deb/root-ubuntu/ * Root:PyPI → pkg:pypi/root/ * Root:npm → pkg:npm/root/ * Root:Maven → pkg:maven/root/ - Parse hierarchical ecosystems (Root:Alpine:3.18 → Root:Alpine) - Add arch=source suffix for distro packages 3. **Maven PURL Encoding** - Fixed encoding to preserve '/' in Maven group/artifact separation - Example: com.example:mylib → pkg:maven/root/com.example/mylib Changes: - osv/ecosystems/root.py: Complete redesign with ecosystem-based version parser selection using match/case pattern - osv/purl_helpers.py: Added Root sub-ecosystem mappings and hierarchical ecosystem parsing - osv/purl_helpers_test.py: Added collision prevention tests - osv/ecosystems/root_test.py: Created 11 comprehensive unit tests covering Alpine, Debian, PyPI, Maven, and npm ecosystems Testing: - All unit tests pass (11/11 in root_test.py) - Validated against real Root.io API with 8144 vulnerabilities - No PURL collisions detected across sub-ecosystems - Version comparison works correctly for all ecosystems Fixes #4396 --- osv/ecosystems/root.py | 111 +++++++++++++---------- osv/ecosystems/root_test.py | 175 ++++++++++++++++++++++++++++++++++++ osv/purl_helpers.py | 42 ++++++++- osv/purl_helpers_test.py | 40 +++++++++ 4 files changed, 317 insertions(+), 51 deletions(-) create mode 100644 osv/ecosystems/root_test.py diff --git a/osv/ecosystems/root.py b/osv/ecosystems/root.py index 653d08be08d..cb3b5a263de 100644 --- a/osv/ecosystems/root.py +++ b/osv/ecosystems/root.py @@ -14,7 +14,11 @@ """Root ecosystem helper.""" import re +import packaging_legacy.version from .ecosystems_base import OrderedEcosystem +from .maven import Version as MavenVersion +from ..third_party.univers.alpine import AlpineLinuxVersion +from ..third_party.univers.debian import Version as DebianVersion class Root(OrderedEcosystem): @@ -37,79 +41,90 @@ class Root(OrderedEcosystem): def _sort_key(self, version: str): """Generate sort key for Root version strings. - Handles multiple version formats: - - Alpine: 1.0.0-r10071 - - Python: 1.0.0+root.io.1 - - Others: 1.0.0.root.io.1 + Delegates to the appropriate ecosystem version parser based on the + ecosystem suffix (e.g., :Alpine:3.18, :Debian:12, :npm). Args: version: Version string to parse Returns: - Tuple suitable for sorting + Tuple with (version_object, root_patch) for sorting """ - # Try Alpine format: -r - alpine_match = re.match(r'^(.+?)-r(\d+)$', version) - if alpine_match: - upstream = alpine_match.group(1) - root_patch = int(alpine_match.group(2)) - return self._parse_upstream_version(upstream) + (root_patch,) + upstream_version = version + root_patch = 0 - # Try Python format: +root.io. + # Extract Root-specific suffixes + # Python format: +root.io. python_match = re.match(r'^(.+?)\+root\.io\.(\d+)$', version) if python_match: - upstream = python_match.group(1) + upstream_version = python_match.group(1) root_patch = int(python_match.group(2)) - return self._parse_upstream_version(upstream) + (root_patch,) - # Try other format: .root.io. + # Generic format: .root.io. other_match = re.match(r'^(.+?)\.root\.io\.(\d+)$', version) if other_match: - upstream = other_match.group(1) + upstream_version = other_match.group(1) root_patch = int(other_match.group(2)) - return self._parse_upstream_version(upstream) + (root_patch,) - # Fallback: treat as generic version - return self._parse_upstream_version(version) + # Alpine format with Root suffix: -r + # Note: Alpine naturally uses -r + alpine_match = re.match(r'^(.+?)-r(\d+)$', upstream_version) + if alpine_match: + root_patch = int(alpine_match.group(2)) - def _parse_upstream_version(self, version: str): - """Parse upstream version component. + # Determine the sub-ecosystem from the suffix + sub_ecosystem = self._get_sub_ecosystem() - Attempts to extract numeric and string components for sorting. + # Parse the upstream version using the appropriate version class + return self._parse_upstream_version(upstream_version, sub_ecosystem) + (root_patch,) - Args: - version: Upstream version string + def _get_sub_ecosystem(self) -> str: + """Extract the sub-ecosystem from the suffix. Returns: - Tuple of parsed components + Sub-ecosystem name (e.g., 'Alpine', 'Debian', 'npm', 'PyPI') """ - parts = [] - - # Split on common delimiters - components = re.split(r'[.-]', version) - - for component in components: - # Try to parse as integer - try: - parts.append(int(component)) - except ValueError: - # If not numeric, use string comparison - # Convert to tuple of character codes for consistent sorting - parts.append(component) + if not self.suffix: + return 'unknown' - return tuple(parts) + # Parse suffix like ":Alpine:3.18" -> "Alpine" + # or ":npm" -> "npm" + parts = self.suffix.strip(':').split(':') + if parts: + return parts[0] + return 'unknown' - def sort_key(self, version: str): - """Public sort key method. + def _parse_upstream_version(self, version: str, sub_ecosystem: str): + """Parse upstream version using ecosystem-specific parser. Args: - version: Version string + version: Upstream version string + sub_ecosystem: Sub-ecosystem name (e.g., 'Alpine', 'Debian', 'npm') Returns: - Tuple for sorting + Tuple with version object for comparison + + Raises: + ValueError: If the version cannot be parsed by the appropriate parser """ - try: - return self._sort_key(version) - except Exception: - # Fallback to string comparison if parsing fails - return (version,) + match sub_ecosystem.lower(): + case 'alpine': + if not AlpineLinuxVersion.is_valid(version): + raise ValueError(f'Invalid Alpine version: {version}') + return (AlpineLinuxVersion(version),) + + case 'debian' | 'ubuntu': + if not DebianVersion.is_valid(version): + raise ValueError(f'Invalid Debian/Ubuntu version: {version}') + return (DebianVersion.from_string(version),) + + case 'pypi' | 'python': + # packaging_legacy.version.parse handles invalid versions gracefully + # by returning LegacyVersion, so we don't need explicit validation + return (packaging_legacy.version.parse(version),) + + case 'maven': + return (MavenVersion.from_string(version),) + + case _: + return (packaging_legacy.version.parse(version),) diff --git a/osv/ecosystems/root_test.py b/osv/ecosystems/root_test.py new file mode 100644 index 00000000000..1831c169e9f --- /dev/null +++ b/osv/ecosystems/root_test.py @@ -0,0 +1,175 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Root ecosystem helper tests.""" + +import unittest + +from . import root + + +class RootEcosystemTest(unittest.TestCase): + """Root ecosystem helper tests.""" + + def test_alpine_versions(self): + """Test Root:Alpine version comparison.""" + ecosystem = root.Root(suffix=':Alpine:3.18') + + # Basic Alpine version ordering + self.assertGreater( + ecosystem.sort_key('1.51.0-r20072'), ecosystem.sort_key('1.51.0-r20071')) + self.assertGreater( + ecosystem.sort_key('1.0.0-r2'), ecosystem.sort_key('1.0.0-r1')) + + # Check the 0 sentinel value + self.assertLess( + ecosystem.sort_key('0'), ecosystem.sort_key('1.0.0-r1')) + + # Check equality + self.assertEqual( + ecosystem.sort_key('1.51.0-r20071'), ecosystem.sort_key('1.51.0-r20071')) + + def test_debian_versions(self): + """Test Root:Debian version comparison.""" + ecosystem = root.Root(suffix=':Debian:12') + + # Basic Debian version ordering with Root suffix + self.assertGreater( + ecosystem.sort_key('22.12.0-2+deb12u1.root.io.5'), + ecosystem.sort_key('22.12.0-2.root.io.1')) + + self.assertGreater( + ecosystem.sort_key('1.18.0-6+deb11u3-r20072'), + ecosystem.sort_key('1.18.0-6+deb11u3-r20071')) + + # Check equality + self.assertEqual( + ecosystem.sort_key('1.18.0-6+deb11u3-r20071'), + ecosystem.sort_key('1.18.0-6+deb11u3-r20071')) + + def test_ubuntu_versions(self): + """Test Root:Ubuntu version comparison.""" + ecosystem = root.Root(suffix=':Ubuntu:22.04') + + # Ubuntu version ordering + self.assertGreater( + ecosystem.sort_key('1.2.3-4ubuntu2'), + ecosystem.sort_key('1.2.3-4ubuntu1')) + + def test_pypi_versions(self): + """Test Root:PyPI version comparison.""" + ecosystem = root.Root(suffix=':PyPI') + + # Python version ordering with Root suffix + self.assertGreater( + ecosystem.sort_key('1.0.0+root.io.5'), + ecosystem.sort_key('1.0.0+root.io.1')) + + # PEP440 version ordering + self.assertGreater( + ecosystem.sort_key('2.0.0'), ecosystem.sort_key('1.9.9')) + self.assertGreater( + ecosystem.sort_key('1.0.0'), ecosystem.sort_key('1.0.0rc1')) + + def test_npm_versions(self): + """Test Root:npm version comparison.""" + ecosystem = root.Root(suffix=':npm') + + # npm semver ordering with Root suffix + self.assertGreater( + ecosystem.sort_key('1.0.0.root.io.5'), + ecosystem.sort_key('1.0.0.root.io.1')) + + # Basic semver ordering + self.assertGreater( + ecosystem.sort_key('2.0.0'), ecosystem.sort_key('1.9.9')) + self.assertGreater( + ecosystem.sort_key('1.0.1'), ecosystem.sort_key('1.0.0')) + + def test_maven_versions(self): + """Test Root:Maven version comparison.""" + ecosystem = root.Root(suffix=':Maven') + + # Maven version ordering + self.assertGreater( + ecosystem.sort_key('2.0'), ecosystem.sort_key('1.0')) + self.assertGreater( + ecosystem.sort_key('1.0'), ecosystem.sort_key('1.0-SNAPSHOT')) + + def test_unknown_ecosystem_fallback(self): + """Test fallback behavior for unknown ecosystems.""" + ecosystem = root.Root(suffix=None) + + # Should still work with Alpine-like versions + self.assertGreater( + ecosystem.sort_key('1.0.0-r2'), ecosystem.sort_key('1.0.0-r1')) + + # Should work with generic versions + self.assertGreater( + ecosystem.sort_key('2.0.0'), ecosystem.sort_key('1.0.0')) + + def test_github_issue_4396(self): + """Test the specific versions from GitHub issue #4396.""" + ecosystem = root.Root(suffix=':Debian:12') + + # The problematic comparison that used to crash + key1 = ecosystem.sort_key('22.12.0-2.root.io.1') + key2 = ecosystem.sort_key('22.12.0-2+deb12u1.root.io.5') + + # Should not crash and should compare correctly + self.assertLess(key1, key2) + + def test_root_suffix_extraction(self): + """Test extraction of Root-specific version suffixes.""" + ecosystem = root.Root(suffix=':PyPI') + + # Python format: +root.io. + key = ecosystem.sort_key('1.0.0+root.io.5') + self.assertIsNotNone(key) + + # Generic format: .root.io. + key = ecosystem.sort_key('1.0.0.root.io.5') + self.assertIsNotNone(key) + + def test_invalid_versions(self): + """Test that invalid versions raise appropriate errors.""" + # Alpine ecosystem with invalid version + ecosystem_alpine = root.Root(suffix=':Alpine:3.18') + with self.assertRaises(ValueError) as context: + ecosystem_alpine.sort_key('invalid-version!@#') + self.assertIn('Invalid Alpine version', str(context.exception)) + + # Debian ecosystem with empty version + ecosystem_debian = root.Root(suffix=':Debian:12') + with self.assertRaises(ValueError) as context: + ecosystem_debian.sort_key('') + self.assertIn('Invalid Debian/Ubuntu version', str(context.exception)) + + def test_sub_ecosystem_extraction(self): + """Test _get_sub_ecosystem method.""" + # Test various suffix formats + ecosystem = root.Root(suffix=':Alpine:3.18') + self.assertEqual(ecosystem._get_sub_ecosystem(), 'Alpine') + + ecosystem = root.Root(suffix=':Debian:12') + self.assertEqual(ecosystem._get_sub_ecosystem(), 'Debian') + + ecosystem = root.Root(suffix=':npm') + self.assertEqual(ecosystem._get_sub_ecosystem(), 'npm') + + ecosystem = root.Root(suffix=None) + self.assertEqual(ecosystem._get_sub_ecosystem(), 'unknown') + + +if __name__ == '__main__': + unittest.main() diff --git a/osv/purl_helpers.py b/osv/purl_helpers.py index a44be1b8bb0..0761424ae74 100644 --- a/osv/purl_helpers.py +++ b/osv/purl_helpers.py @@ -87,6 +87,20 @@ EcosystemPURL('rpm', 'redhat'), 'Rocky Linux': EcosystemPURL('rpm', 'rocky-linux'), + # Root sub-ecosystems - map to appropriate PURL types + 'Root:Alpine': + EcosystemPURL('apk', 'root-alpine'), + 'Root:Debian': + EcosystemPURL('deb', 'root-debian'), + 'Root:Ubuntu': + EcosystemPURL('deb', 'root-ubuntu'), + 'Root:PyPI': + EcosystemPURL('pypi', 'root'), + 'Root:npm': + EcosystemPURL('npm', 'root'), + 'Root:Maven': + EcosystemPURL('maven', 'root'), + # Fallback for Root (unknown sub-ecosystem) 'Root': EcosystemPURL('generic', 'root'), 'RubyGems': @@ -119,7 +133,19 @@ def _url_encode(package_name): def package_to_purl(ecosystem: str, package_name: str) -> str | None: """Convert a ecosystem and package name to PURL.""" - purl_data = ECOSYSTEM_PURL_DATA.get(ecosystem) + # Handle Root's hierarchical ecosystems (e.g., "Root:Alpine:3.18") + # Extract the sub-ecosystem for PURL mapping + lookup_ecosystem = ecosystem + if ecosystem.startswith('Root:'): + # Extract first two parts: "Root:Alpine:3.18" -> "Root:Alpine" + parts = ecosystem.split(':', 2) + if len(parts) >= 2: + lookup_ecosystem = ':'.join(parts[:2]) + # If exact match not found, try just "Root" + if lookup_ecosystem not in ECOSYSTEM_PURL_DATA: + lookup_ecosystem = 'Root' + + purl_data = ECOSYSTEM_PURL_DATA.get(lookup_ecosystem) if not purl_data: return None @@ -138,12 +164,22 @@ def package_to_purl(ecosystem: str, package_name: str) -> str | None: if purl_type == 'deb' and ecosystem == 'Debian': suffix = '?arch=source' + # Add arch=source for Root Debian/Ubuntu packages + if purl_type == 'deb' and ecosystem.startswith('Root:'): + suffix = '?arch=source' + if purl_type == 'apk' and ecosystem in ('Alpine', 'Alpaquita', 'BellSoft Hardened Containers'): suffix = '?arch=source' - # Encode package name: preserve '/' only when no namespace is defined - safe_chars = '' if purl_namespace else '/' + # Add arch=source for Root Alpine packages + if purl_type == 'apk' and ecosystem.startswith('Root:'): + suffix = '?arch=source' + + # Encode package name: preserve '/' in specific cases + # - When no namespace is defined + # - For maven type (uses / to separate group ID and artifact ID) + safe_chars = '' if (purl_namespace and purl_type != 'maven') else '/' encoded_name = quote(package_name, safe=safe_chars) return f'pkg:{purl_ecosystem}/{encoded_name}{suffix}' diff --git a/osv/purl_helpers_test.py b/osv/purl_helpers_test.py index 10130d39da3..4c788a77636 100644 --- a/osv/purl_helpers_test.py +++ b/osv/purl_helpers_test.py @@ -98,6 +98,46 @@ def tests_package_to_purl(self): self.assertEqual('pkg:hex/acme/foo', purl_helpers.package_to_purl('Hex', 'acme/foo')) + # Root ecosystem tests - verify no collisions + self.assertEqual('pkg:apk/root-alpine/curl?arch=source', + purl_helpers.package_to_purl('Root:Alpine:3.18', 'curl')) + + self.assertEqual('pkg:deb/root-debian/curl?arch=source', + purl_helpers.package_to_purl('Root:Debian:12', 'curl')) + + self.assertEqual('pkg:deb/root-ubuntu/curl?arch=source', + purl_helpers.package_to_purl('Root:Ubuntu:22.04', 'curl')) + + self.assertEqual('pkg:pypi/root/requests', + purl_helpers.package_to_purl('Root:PyPI', 'requests')) + + self.assertEqual('pkg:npm/root/%40root%2Flodash', + purl_helpers.package_to_purl('Root:npm', '@root/lodash')) + + self.assertEqual('pkg:maven/root/com.example/mylib', + purl_helpers.package_to_purl('Root:Maven', 'com.example:mylib')) + + # Test fallback for unknown Root sub-ecosystem + self.assertEqual('pkg:generic/root/unknown-package', + purl_helpers.package_to_purl('Root', 'unknown-package')) + + def test_root_purl_no_collisions(self): + """Test that Root PURLs don't collide across sub-ecosystems.""" + # Generate PURLs for the same package name across different Root ecosystems + purl_alpine = purl_helpers.package_to_purl('Root:Alpine:3.18', 'curl') + purl_debian = purl_helpers.package_to_purl('Root:Debian:12', 'curl') + purl_ubuntu = purl_helpers.package_to_purl('Root:Ubuntu:22.04', 'curl') + + # Verify they are all different (no collisions) + self.assertNotEqual(purl_alpine, purl_debian) + self.assertNotEqual(purl_alpine, purl_ubuntu) + self.assertNotEqual(purl_debian, purl_ubuntu) + + # Verify they have the correct structure + self.assertTrue(purl_alpine.startswith('pkg:apk/root-alpine/')) + self.assertTrue(purl_debian.startswith('pkg:deb/root-debian/')) + self.assertTrue(purl_ubuntu.startswith('pkg:deb/root-ubuntu/')) + self.assertEqual('pkg:julia/Example', purl_helpers.package_to_purl('Julia', 'Example')) From 0c8aa62963d1e2f5ab412fb8c2c5d59698955475 Mon Sep 17 00:00:00 2001 From: Chai Tadmor Date: Fri, 9 Jan 2026 20:37:02 +0200 Subject: [PATCH 4/5] fix(root): remove PURL generation for Root ecosystem --- osv/purl_helpers.py | 41 +++--------------------- osv/purl_helpers_test.py | 69 ++++++++++------------------------------ 2 files changed, 20 insertions(+), 90 deletions(-) diff --git a/osv/purl_helpers.py b/osv/purl_helpers.py index 0761424ae74..7e4c8288565 100644 --- a/osv/purl_helpers.py +++ b/osv/purl_helpers.py @@ -87,22 +87,9 @@ EcosystemPURL('rpm', 'redhat'), 'Rocky Linux': EcosystemPURL('rpm', 'rocky-linux'), - # Root sub-ecosystems - map to appropriate PURL types - 'Root:Alpine': - EcosystemPURL('apk', 'root-alpine'), - 'Root:Debian': - EcosystemPURL('deb', 'root-debian'), - 'Root:Ubuntu': - EcosystemPURL('deb', 'root-ubuntu'), - 'Root:PyPI': - EcosystemPURL('pypi', 'root'), - 'Root:npm': - EcosystemPURL('npm', 'root'), - 'Root:Maven': - EcosystemPURL('maven', 'root'), - # Fallback for Root (unknown sub-ecosystem) - 'Root': - EcosystemPURL('generic', 'root'), + # Note: Root ecosystem does not generate PURLs as Root packages are not + # published to public registries (npm, PyPI, Maven Central, etc.). + # Users can query Root vulnerabilities using ecosystem and package name. 'RubyGems': EcosystemPURL('gem', None), 'SUSE': @@ -133,19 +120,7 @@ def _url_encode(package_name): def package_to_purl(ecosystem: str, package_name: str) -> str | None: """Convert a ecosystem and package name to PURL.""" - # Handle Root's hierarchical ecosystems (e.g., "Root:Alpine:3.18") - # Extract the sub-ecosystem for PURL mapping - lookup_ecosystem = ecosystem - if ecosystem.startswith('Root:'): - # Extract first two parts: "Root:Alpine:3.18" -> "Root:Alpine" - parts = ecosystem.split(':', 2) - if len(parts) >= 2: - lookup_ecosystem = ':'.join(parts[:2]) - # If exact match not found, try just "Root" - if lookup_ecosystem not in ECOSYSTEM_PURL_DATA: - lookup_ecosystem = 'Root' - - purl_data = ECOSYSTEM_PURL_DATA.get(lookup_ecosystem) + purl_data = ECOSYSTEM_PURL_DATA.get(ecosystem) if not purl_data: return None @@ -164,18 +139,10 @@ def package_to_purl(ecosystem: str, package_name: str) -> str | None: if purl_type == 'deb' and ecosystem == 'Debian': suffix = '?arch=source' - # Add arch=source for Root Debian/Ubuntu packages - if purl_type == 'deb' and ecosystem.startswith('Root:'): - suffix = '?arch=source' - if purl_type == 'apk' and ecosystem in ('Alpine', 'Alpaquita', 'BellSoft Hardened Containers'): suffix = '?arch=source' - # Add arch=source for Root Alpine packages - if purl_type == 'apk' and ecosystem.startswith('Root:'): - suffix = '?arch=source' - # Encode package name: preserve '/' in specific cases # - When no namespace is defined # - For maven type (uses / to separate group ID and artifact ID) diff --git a/osv/purl_helpers_test.py b/osv/purl_helpers_test.py index 4c788a77636..bdfa8d07558 100644 --- a/osv/purl_helpers_test.py +++ b/osv/purl_helpers_test.py @@ -98,45 +98,22 @@ def tests_package_to_purl(self): self.assertEqual('pkg:hex/acme/foo', purl_helpers.package_to_purl('Hex', 'acme/foo')) - # Root ecosystem tests - verify no collisions - self.assertEqual('pkg:apk/root-alpine/curl?arch=source', - purl_helpers.package_to_purl('Root:Alpine:3.18', 'curl')) - - self.assertEqual('pkg:deb/root-debian/curl?arch=source', - purl_helpers.package_to_purl('Root:Debian:12', 'curl')) - - self.assertEqual('pkg:deb/root-ubuntu/curl?arch=source', - purl_helpers.package_to_purl('Root:Ubuntu:22.04', 'curl')) - - self.assertEqual('pkg:pypi/root/requests', - purl_helpers.package_to_purl('Root:PyPI', 'requests')) - - self.assertEqual('pkg:npm/root/%40root%2Flodash', - purl_helpers.package_to_purl('Root:npm', '@root/lodash')) - - self.assertEqual('pkg:maven/root/com.example/mylib', - purl_helpers.package_to_purl('Root:Maven', 'com.example:mylib')) - - # Test fallback for unknown Root sub-ecosystem - self.assertEqual('pkg:generic/root/unknown-package', - purl_helpers.package_to_purl('Root', 'unknown-package')) - - def test_root_purl_no_collisions(self): - """Test that Root PURLs don't collide across sub-ecosystems.""" - # Generate PURLs for the same package name across different Root ecosystems - purl_alpine = purl_helpers.package_to_purl('Root:Alpine:3.18', 'curl') - purl_debian = purl_helpers.package_to_purl('Root:Debian:12', 'curl') - purl_ubuntu = purl_helpers.package_to_purl('Root:Ubuntu:22.04', 'curl') - - # Verify they are all different (no collisions) - self.assertNotEqual(purl_alpine, purl_debian) - self.assertNotEqual(purl_alpine, purl_ubuntu) - self.assertNotEqual(purl_debian, purl_ubuntu) - - # Verify they have the correct structure - self.assertTrue(purl_alpine.startswith('pkg:apk/root-alpine/')) - self.assertTrue(purl_debian.startswith('pkg:deb/root-debian/')) - self.assertTrue(purl_ubuntu.startswith('pkg:deb/root-ubuntu/')) + # Root ecosystem does not generate PURLs + # Root packages are not published to public registries + self.assertIsNone(purl_helpers.package_to_purl('Root:Alpine:3.18', 'rootio-curl')) + self.assertIsNone(purl_helpers.package_to_purl('Root:Debian:12', 'rootio-curl')) + self.assertIsNone(purl_helpers.package_to_purl('Root:Ubuntu:22.04', 'rootio-curl')) + self.assertIsNone(purl_helpers.package_to_purl('Root:PyPI', 'rootio-requests')) + self.assertIsNone(purl_helpers.package_to_purl('Root:npm', '@rootio/lodash')) + self.assertIsNone(purl_helpers.package_to_purl('Root:Maven', 'io.root.example:mylib')) + self.assertIsNone(purl_helpers.package_to_purl('Root', 'root-nginx')) + + def test_root_purl_no_generation(self): + """Test that Root ecosystem does not generate PURLs.""" + # Root packages should return None as they're not in public registries + self.assertIsNone(purl_helpers.package_to_purl('Root:Alpine:3.18', 'rootio-curl')) + self.assertIsNone(purl_helpers.package_to_purl('Root:Debian:12', 'rootio-curl')) + self.assertIsNone(purl_helpers.package_to_purl('Root:Ubuntu:22.04', 'rootio-curl')) self.assertEqual('pkg:julia/Example', purl_helpers.package_to_purl('Julia', 'Example')) @@ -174,12 +151,6 @@ def test_root_purl_no_collisions(self): 'pkg:rpm/rocky-linux/test-package', purl_helpers.package_to_purl('Rocky Linux', 'test-package')) - self.assertEqual('pkg:generic/root/root-nginx', - purl_helpers.package_to_purl('Root', 'root-nginx')) - - self.assertEqual('pkg:generic/root/%40root%2Flodash', - purl_helpers.package_to_purl('Root', '@root/lodash')) - self.assertEqual('pkg:gem/test-package', purl_helpers.package_to_purl('RubyGems', 'test-package')) @@ -331,14 +302,6 @@ def test_parse_purl(self): ('Rocky Linux', 'test-package', '1.2.3'), purl_helpers.parse_purl('pkg:rpm/rocky-linux/test-package@1.2.3')) - self.assertEqual( - ('Root', 'root-nginx', '1.0.0-r10071'), - purl_helpers.parse_purl('pkg:generic/root/root-nginx@1.0.0-r10071')) - - self.assertEqual( - ('Root', '@root/lodash', '4.17.21'), - purl_helpers.parse_purl('pkg:generic/root/%40root%2Flodash@4.17.21')) - self.assertEqual(('RubyGems', 'test-package', '1.2.3'), purl_helpers.parse_purl('pkg:gem/test-package@1.2.3')) From 6657519f3601febcb6343f6f5e3b2fe5d12dc686 Mon Sep 17 00:00:00 2001 From: Chai Tadmor Date: Mon, 12 Jan 2026 15:30:13 +0200 Subject: [PATCH 5/5] lint --- osv/ecosystems/root.py | 5 +++-- osv/ecosystems/root_test.py | 25 +++++++++++-------------- osv/purl_helpers_test.py | 27 ++++++++++++++++++--------- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/osv/ecosystems/root.py b/osv/ecosystems/root.py index cb3b5a263de..83857328e64 100644 --- a/osv/ecosystems/root.py +++ b/osv/ecosystems/root.py @@ -76,7 +76,8 @@ def _sort_key(self, version: str): sub_ecosystem = self._get_sub_ecosystem() # Parse the upstream version using the appropriate version class - return self._parse_upstream_version(upstream_version, sub_ecosystem) + (root_patch,) + return self._parse_upstream_version(upstream_version, + sub_ecosystem) + (root_patch,) def _get_sub_ecosystem(self) -> str: """Extract the sub-ecosystem from the suffix. @@ -124,7 +125,7 @@ def _parse_upstream_version(self, version: str, sub_ecosystem: str): return (packaging_legacy.version.parse(version),) case 'maven': - return (MavenVersion.from_string(version),) + return (MavenVersion.from_string(version),) case _: return (packaging_legacy.version.parse(version),) diff --git a/osv/ecosystems/root_test.py b/osv/ecosystems/root_test.py index 1831c169e9f..6a0f49aae77 100644 --- a/osv/ecosystems/root_test.py +++ b/osv/ecosystems/root_test.py @@ -27,17 +27,18 @@ def test_alpine_versions(self): # Basic Alpine version ordering self.assertGreater( - ecosystem.sort_key('1.51.0-r20072'), ecosystem.sort_key('1.51.0-r20071')) + ecosystem.sort_key('1.51.0-r20072'), + ecosystem.sort_key('1.51.0-r20071')) self.assertGreater( ecosystem.sort_key('1.0.0-r2'), ecosystem.sort_key('1.0.0-r1')) # Check the 0 sentinel value - self.assertLess( - ecosystem.sort_key('0'), ecosystem.sort_key('1.0.0-r1')) + self.assertLess(ecosystem.sort_key('0'), ecosystem.sort_key('1.0.0-r1')) # Check equality self.assertEqual( - ecosystem.sort_key('1.51.0-r20071'), ecosystem.sort_key('1.51.0-r20071')) + ecosystem.sort_key('1.51.0-r20071'), + ecosystem.sort_key('1.51.0-r20071')) def test_debian_versions(self): """Test Root:Debian version comparison.""" @@ -76,8 +77,7 @@ def test_pypi_versions(self): ecosystem.sort_key('1.0.0+root.io.1')) # PEP440 version ordering - self.assertGreater( - ecosystem.sort_key('2.0.0'), ecosystem.sort_key('1.9.9')) + self.assertGreater(ecosystem.sort_key('2.0.0'), ecosystem.sort_key('1.9.9')) self.assertGreater( ecosystem.sort_key('1.0.0'), ecosystem.sort_key('1.0.0rc1')) @@ -91,18 +91,15 @@ def test_npm_versions(self): ecosystem.sort_key('1.0.0.root.io.1')) # Basic semver ordering - self.assertGreater( - ecosystem.sort_key('2.0.0'), ecosystem.sort_key('1.9.9')) - self.assertGreater( - ecosystem.sort_key('1.0.1'), ecosystem.sort_key('1.0.0')) + self.assertGreater(ecosystem.sort_key('2.0.0'), ecosystem.sort_key('1.9.9')) + self.assertGreater(ecosystem.sort_key('1.0.1'), ecosystem.sort_key('1.0.0')) def test_maven_versions(self): """Test Root:Maven version comparison.""" ecosystem = root.Root(suffix=':Maven') # Maven version ordering - self.assertGreater( - ecosystem.sort_key('2.0'), ecosystem.sort_key('1.0')) + self.assertGreater(ecosystem.sort_key('2.0'), ecosystem.sort_key('1.0')) self.assertGreater( ecosystem.sort_key('1.0'), ecosystem.sort_key('1.0-SNAPSHOT')) @@ -115,8 +112,7 @@ def test_unknown_ecosystem_fallback(self): ecosystem.sort_key('1.0.0-r2'), ecosystem.sort_key('1.0.0-r1')) # Should work with generic versions - self.assertGreater( - ecosystem.sort_key('2.0.0'), ecosystem.sort_key('1.0.0')) + self.assertGreater(ecosystem.sort_key('2.0.0'), ecosystem.sort_key('1.0.0')) def test_github_issue_4396(self): """Test the specific versions from GitHub issue #4396.""" @@ -158,6 +154,7 @@ def test_invalid_versions(self): def test_sub_ecosystem_extraction(self): """Test _get_sub_ecosystem method.""" # Test various suffix formats + # pylint: disable=protected-access ecosystem = root.Root(suffix=':Alpine:3.18') self.assertEqual(ecosystem._get_sub_ecosystem(), 'Alpine') diff --git a/osv/purl_helpers_test.py b/osv/purl_helpers_test.py index bdfa8d07558..f0abe5c681e 100644 --- a/osv/purl_helpers_test.py +++ b/osv/purl_helpers_test.py @@ -100,20 +100,29 @@ def tests_package_to_purl(self): # Root ecosystem does not generate PURLs # Root packages are not published to public registries - self.assertIsNone(purl_helpers.package_to_purl('Root:Alpine:3.18', 'rootio-curl')) - self.assertIsNone(purl_helpers.package_to_purl('Root:Debian:12', 'rootio-curl')) - self.assertIsNone(purl_helpers.package_to_purl('Root:Ubuntu:22.04', 'rootio-curl')) - self.assertIsNone(purl_helpers.package_to_purl('Root:PyPI', 'rootio-requests')) - self.assertIsNone(purl_helpers.package_to_purl('Root:npm', '@rootio/lodash')) - self.assertIsNone(purl_helpers.package_to_purl('Root:Maven', 'io.root.example:mylib')) + self.assertIsNone( + purl_helpers.package_to_purl('Root:Alpine:3.18', 'rootio-curl')) + self.assertIsNone( + purl_helpers.package_to_purl('Root:Debian:12', 'rootio-curl')) + self.assertIsNone( + purl_helpers.package_to_purl('Root:Ubuntu:22.04', 'rootio-curl')) + self.assertIsNone( + purl_helpers.package_to_purl('Root:PyPI', 'rootio-requests')) + self.assertIsNone( + purl_helpers.package_to_purl('Root:npm', '@rootio/lodash')) + self.assertIsNone( + purl_helpers.package_to_purl('Root:Maven', 'io.root.example:mylib')) self.assertIsNone(purl_helpers.package_to_purl('Root', 'root-nginx')) def test_root_purl_no_generation(self): """Test that Root ecosystem does not generate PURLs.""" # Root packages should return None as they're not in public registries - self.assertIsNone(purl_helpers.package_to_purl('Root:Alpine:3.18', 'rootio-curl')) - self.assertIsNone(purl_helpers.package_to_purl('Root:Debian:12', 'rootio-curl')) - self.assertIsNone(purl_helpers.package_to_purl('Root:Ubuntu:22.04', 'rootio-curl')) + self.assertIsNone( + purl_helpers.package_to_purl('Root:Alpine:3.18', 'rootio-curl')) + self.assertIsNone( + purl_helpers.package_to_purl('Root:Debian:12', 'rootio-curl')) + self.assertIsNone( + purl_helpers.package_to_purl('Root:Ubuntu:22.04', 'rootio-curl')) self.assertEqual('pkg:julia/Example', purl_helpers.package_to_purl('Julia', 'Example'))