diff --git a/.gitignore b/.gitignore index 1b44107bde5..2d896a85840 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ hurl-scripts/ temp/* **/tmp/** gcp/api/v1/osv/** +.hypothesis \ No newline at end of file diff --git a/osv/ecosystems/alpine.py b/osv/ecosystems/alpine.py index 3862b344e05..12b9d4640b3 100644 --- a/osv/ecosystems/alpine.py +++ b/osv/ecosystems/alpine.py @@ -22,8 +22,12 @@ from ..third_party.univers.alpine import AlpineLinuxVersion from . import config -from .ecosystems_base import EnumerableEcosystem, EnumerateError -from .ecosystems_base import OrderedEcosystem +from .ecosystems_base import ( + coarse_version_generic, + EnumerableEcosystem, + EnumerateError, + OrderedEcosystem, +) from .. import repos from ..cache import cached @@ -33,11 +37,31 @@ class APK(OrderedEcosystem): def _sort_key(self, version): if not AlpineLinuxVersion.is_valid(version): - # If version is not valid, it is most likely an invalid input - # version then sort it to the last/largest element - return AlpineLinuxVersion('9999999999') + raise ValueError(f'Invalid version: {version}') return AlpineLinuxVersion(version) + def coarse_version(self, version): + """Coarse version. + + Treats version as dot-separated integers. + Trims suffixes (_rc, _p, -r) to ensure monotonicity (e.g. 1.2_rc1 < 1.2). + """ + if not AlpineLinuxVersion.is_valid(version): + raise ValueError(f'Invalid version: {version}') + # is_valid uses a $ regex anchor (which can match a newline), + # so we need to remove the newline if one exists. + if version[-1] == '\n': + version = version[:-1] + return coarse_version_generic( + version, + separators_regex=r'[.]', + # in APK, 1.0.2 < 1.02.1 < 1.1.1 + # We must treat everything after .0x as 0 + # Also split off the _rc, _p, or -r suffixes + truncate_regex=r'(?:\.0|[_-])', + implicit_split=False, + empty_as='') + class Alpine(APK, EnumerableEcosystem): """ @@ -99,7 +123,7 @@ def clean_versions(ver: str) -> str: current_ver = clean_versions(current_ver) current_rel = clean_versions(current_rel) # Ignore occasional version that is still not valid. - if AlpineLinuxVersion.is_valid(current_ver) and current_rel.isdigit(): + if AlpineLinuxVersion.is_valid(current_ver) and current_rel.isdecimal(): all_versions.add(current_ver + '-r' + current_rel) else: logging.warning('Alpine version "%s" - "%s" is not valid', diff --git a/osv/ecosystems/alpine_test.py b/osv/ecosystems/alpine_test.py index 9d24382f9c2..9a5711c1d86 100644 --- a/osv/ecosystems/alpine_test.py +++ b/osv/ecosystems/alpine_test.py @@ -85,6 +85,24 @@ def test_apk(self): self.assertLessEqual( ecosystem.sort_key('1.2.0-r0'), ecosystem.sort_key('1.10.0-r0')) + def test_coarse_version(self): + """Test coarse version.""" + ecosystem = alpine.APK() + self.assertEqual('00:00000010.00000002.00000033', + ecosystem.coarse_version('10.2.33')) + self.assertEqual('00:00000004.00000005.00000000', + ecosystem.coarse_version('4.5_alpha')) + self.assertEqual('00:20200712.00000000.00000000', + ecosystem.coarse_version('20200712-r0')) + self.assertEqual('00:00000011.00000003.00000020', + ecosystem.coarse_version('11.3.20.1_p1-r0')) + self.assertEqual('00:00000002.00000003.00000000', + ecosystem.coarse_version('02.3')) + self.assertEqual('00:00000005.00000000.00000000', + ecosystem.coarse_version('5.06.7')) + self.assertEqual('00:00000005.00000000.00000000', + ecosystem.coarse_version('5.0.9')) + def test_apk_ecosystems(self): """Test apk-based ecosystems return an APK ecosystem.""" ecos = [ diff --git a/osv/ecosystems/coarse_version_monotonicity_test.py b/osv/ecosystems/coarse_version_monotonicity_test.py new file mode 100644 index 00000000000..000ac91f6b3 --- /dev/null +++ b/osv/ecosystems/coarse_version_monotonicity_test.py @@ -0,0 +1,168 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Coarse version monotonicity tests.""" + +import re +import unittest +from hypothesis import given, example, strategies as st +import packaging.version + +from .. import ecosystems +from ..third_party.univers.gem import GemVersion + +from . import alpine +from . import cran +from . import debian +from . import haskell +from . import maven +from . import nuget +from . import packagist +from . import pub +from . import pypi +from . import redhat +from . import rubygems +from . import semver_ecosystem_helper + +# Strategies + +# Matches standard SemVer: major.minor.patch, optional 'v', prerelease/build. +# Note: OSV's SemVer implementation coerces partial versions +# (e.g. '1.0' -> '1.0.0'). +semver_strategy = st.from_regex( + r'^v?[0-9]+(\.[0-9]+){0,2}(-[0-9a-zA-z.-]*)?\+?[0-9a-zA-z.-]*$') + +# Matches standard Alpine versions like 1.2.3, optionally with suffixes +# like _rc1, _p2, and revision -r3. +apk_version_strategy = st.from_regex( + r'^[0-9]+(\.[0-9]+)*(_rc[0-9]*|_p[0-9]*)*(-r[0-9]+)?$') + +# Matches R versions: sequence of numbers separated by dots or dashes +# (e.g. 1.2-3). +cran_version_strategy = st.from_regex(r'^[0-9]+([.-][0-9]+)+$') + +# Matches Debian versions: optional epoch, upstream version +# (alphanumerics/separators), optional debian revision. +dpkg_version_strategy = st.from_regex( + r'^(\d+:)?\d([A-Za-z0-9\.\+\~\-]+|[A-Za-z0-9\.\+\~]+-[A-Za-z0-9\+\.\~]+)?$') + +# Matches Haskell versions: dot-separated integers (e.g. 1.2.3). +hackage_version_strategy = st.from_regex(r'^[0-9]+(\.[0-9]+)*$') + +# Matches Maven versions: flexible sequence of numbers or identifiers +# separated by dots or dashes. +maven_version_strategy = st.from_regex(r'^(([0-9]*|[A-Za-z+]*)[.-]?)*$') + +# Matches NuGet versions: SemVer-like, optional 'v' prefix, 4th component, +# prerelease/build metadata. +nuget_version_strategy = st.from_regex( + r'^v?[0-9]+(\.[0-9]+){0,3}(-[0-9a-zA-z.-]*)?\+?[0-9a-zA-z.-]*$') + +# Matches Packagist versions: 'v' prefix, flexible components separated by +# ., +, _, -. +packagist_version_strategy = st.from_regex(r'^v?(([0-9]*|[A-Za-z+]*)[.+_-]?)*$') + +# Pub versions are the same format as SemVer. +pub_version_strategy = semver_strategy + +# Uses standard packaging.version pattern. +pypi_strategy = st.one_of( + st.text(), # legacy version can be any string + st.from_regex( + re.compile(r'^' + packaging.version.VERSION_PATTERN + r'$', + re.IGNORECASE | re.VERBOSE | re.ASCII))) + +# Matches RPM versions: optional epoch, alternating alphanumeric segments. +rpm_version_strategy = st.from_regex( + re.compile(r'^([0-9]+:)?(([0-9]+|[A-Za-z]+)((?![0-9A-Za-z])[ -~])*)+$', + re.ASCII)) + +# Uses standard GemVersion pattern. +rubygems_version_strategy = st.from_regex(r'^' + GemVersion.VERSION_PATTERN + + r'$') + + +def check_coarse_version_monotonic(test_case: unittest.TestCase, + ecosystem: ecosystems.OrderedEcosystem, + v1_str: str, v2_str: str): + """Test coarse_version monotonicity.""" + v1 = ecosystem.sort_key(v1_str) + v2 = ecosystem.sort_key(v2_str) + if v2 < v1: + v1, v2 = v2, v1 + v1_str, v2_str = v2_str, v1_str + + if v1.is_invalid: + test_case.assertRaises(ValueError, ecosystem.coarse_version, v1_str) + if v2.is_invalid: + test_case.assertRaises(ValueError, ecosystem.coarse_version, v2_str) + + if not v1.is_invalid and not v2.is_invalid: + v1_coarse = ecosystem.coarse_version(v1_str) + v2_coarse = ecosystem.coarse_version(v2_str) + test_case.assertLessEqual(v1_coarse, v2_coarse) + + +class CoarseVersionMonotonicityTest(unittest.TestCase): + """Coarse version monotonicity tests.""" + + @given(apk_version_strategy, apk_version_strategy) + @example('1.02', '1.1') + @example('5.0.9', '5.06.7') + def test_apk(self, v1_str, v2_str): + check_coarse_version_monotonic(self, alpine.APK(), v1_str, v2_str) + + @given(cran_version_strategy, cran_version_strategy) + def test_cran(self, v1_str, v2_str): + check_coarse_version_monotonic(self, cran.CRAN(), v1_str, v2_str) + + @given(dpkg_version_strategy, dpkg_version_strategy) + def test_dpkg(self, v1_str, v2_str): + check_coarse_version_monotonic(self, debian.DPKG(), v1_str, v2_str) + + @given(hackage_version_strategy, hackage_version_strategy) + def test_hackage(self, v1_str, v2_str): + check_coarse_version_monotonic(self, haskell.Hackage(), v1_str, v2_str) + + @given(maven_version_strategy, maven_version_strategy) + def test_maven(self, v1_str, v2_str): + check_coarse_version_monotonic(self, maven.Maven(), v1_str, v2_str) + + @given(nuget_version_strategy, nuget_version_strategy) + def test_nuget(self, v1_str, v2_str): + check_coarse_version_monotonic(self, nuget.NuGet(), v1_str, v2_str) + + @given(packagist_version_strategy, packagist_version_strategy) + def test_packagist(self, v1_str, v2_str): + check_coarse_version_monotonic(self, packagist.Packagist(), v1_str, v2_str) + + @given(pub_version_strategy, pub_version_strategy) + def test_pub(self, v1_str, v2_str): + check_coarse_version_monotonic(self, pub.Pub(), v1_str, v2_str) + + @given(pypi_strategy, pypi_strategy) + def test_pypi(self, v1_str, v2_str): + check_coarse_version_monotonic(self, pypi.PyPI(), v1_str, v2_str) + + @given(rpm_version_strategy, rpm_version_strategy) + def test_rpm(self, v1_str, v2_str): + check_coarse_version_monotonic(self, redhat.RPM(), v1_str, v2_str) + + @given(rubygems_version_strategy, rubygems_version_strategy) + def test_rubygems(self, v1_str, v2_str): + check_coarse_version_monotonic(self, rubygems.RubyGems(), v1_str, v2_str) + + @given(semver_strategy, semver_strategy) + def test_semver(self, v1_str, v2_str): + check_coarse_version_monotonic(self, semver_ecosystem_helper.SemverLike(), + v1_str, v2_str) diff --git a/osv/ecosystems/cran.py b/osv/ecosystems/cran.py index c5991283800..6dd1b6ed24b 100644 --- a/osv/ecosystems/cran.py +++ b/osv/ecosystems/cran.py @@ -14,10 +14,13 @@ """CRAN helpers.""" import requests -import packaging_legacy.version from . import config -from .ecosystems_base import EnumerableEcosystem, EnumerateError +from .ecosystems_base import ( + coarse_version_from_ints, + EnumerableEcosystem, + EnumerateError, +) class CRAN(EnumerableEcosystem): @@ -34,11 +37,19 @@ def _sort_key(self, version): # Some documentation on CRAN versioning and the R numeric_version method: # https://cran.r-project.org/doc/manuals/R-exts.html#The-DESCRIPTION-file # https://stat.ethz.ch/R-manual/R-devel/library/base/html/numeric_version.html - # The packaging.version appears to work for the typical X.Y.Z and - # X.Y-Z cases version = version.replace("-", ".") - # version.parse() handles invalid versions by returning LegacyVersion() - return packaging_legacy.version.parse(version) + try: + return tuple(int(part) for part in version.split('.')) + except ValueError as exc: + raise ValueError(f'Invalid version: {version}') from exc + + def coarse_version(self, version): + """Coarse version. + + Treats version as integers separated by dots or dashes. + """ + # Use _sort_key to validate or raise ValueError + return coarse_version_from_ints(self._sort_key(version)) def _enumerate_versions(self, url, diff --git a/osv/ecosystems/cran_test.py b/osv/ecosystems/cran_test.py index c61f04e6ce7..efd67594acb 100644 --- a/osv/ecosystems/cran_test.py +++ b/osv/ecosystems/cran_test.py @@ -58,3 +58,11 @@ def test_sort_key(self): ecosystem.sort_key('1.10-0'), ecosystem.sort_key('1.2-0')) self.assertLessEqual( ecosystem.sort_key('1.2-0'), ecosystem.sort_key('1.10-0')) + + def test_coarse_version(self): + """Test coarse_version""" + ecosystem = ecosystems.get('CRAN') + self.assertEqual('00:00009001.00000010.00000033', + ecosystem.coarse_version('9001.10-33.4')) + self.assertEqual('00:00000000.00000001.00000000', + ecosystem.coarse_version('0-1')) diff --git a/osv/ecosystems/debian.py b/osv/ecosystems/debian.py index de1fad8bd69..bb75aaffd42 100644 --- a/osv/ecosystems/debian.py +++ b/osv/ecosystems/debian.py @@ -13,14 +13,21 @@ # limitations under the License. """Debian ecosystem helper.""" +from itertools import batched import json import logging +import re import requests from ..third_party.univers.debian import Version as DebianVersion from . import config -from .ecosystems_base import EnumerableEcosystem, EnumerateError +from .ecosystems_base import ( + coarse_version_from_ints, + EnumerableEcosystem, + EnumerateError, + MAX_COARSE_PART, +) from .ecosystems_base import OrderedEcosystem from .. import cache from ..request_helper import RequestError, RequestHelper @@ -31,11 +38,54 @@ class DPKG(OrderedEcosystem): def _sort_key(self, version): if not DebianVersion.is_valid(version): - # If debian version is not valid, it is most likely an invalid fixed - # version then sort it to the last/largest element - return DebianVersion(9999999999, '9999999999') + raise ValueError(f'Invalid version: {version}') return DebianVersion.from_string(version) + def coarse_version(self, version: str) -> str: + """Coarse version. + + Treats version as alternating digit/non-digit strings. + Truncates at non-dot separators (like ~) to ensure monotonicity + (e.g. 1.0~rc1 < 1.0). + Epochs are preserved. + """ + if not DebianVersion.is_valid(version): + raise ValueError(f'Invalid version: {version}') + + # Try extract epoch. + e, p, v = version.partition(':') + if not p: + v = e + e = '0' + try: + epoch = int(e) + except ValueError as e: + raise ValueError(f'Invalid version: {version}') from e + + # Versions are treated as alternating digit/non-digit strings + # We treat the exact string '.' as a digit separator. + # Any strings starting with '.' (that are not exactly '.') + # are greater than any number. + # Any strings starting with anything else are less than any number. + parts = re.findall(r'^$|\d+|\D+', v) + int_parts = [] + for couple in batched(parts, 2): + if not couple[0].isdecimal(): + # This is probably handled by is_valid + break + int_parts.append(int(couple[0])) + if len(couple) == 1: + break + sep = couple[1] + if sep == '.': + continue + if sep[0] == '.': + # Bigger than the max int, so we overflow + int_parts.append(MAX_COARSE_PART + 1) + break + + return coarse_version_from_ints(int_parts, epoch=epoch) + # TODO(another-rex): Update this to use dynamically # change depending on the project diff --git a/osv/ecosystems/debian_test.py b/osv/ecosystems/debian_test.py index 9656529e461..9a797fbb40f 100644 --- a/osv/ecosystems/debian_test.py +++ b/osv/ecosystems/debian_test.py @@ -14,6 +14,7 @@ """dpkg / Debian ecosystem helper tests.""" import requests + import vcr.unittest import unittest from unittest import mock @@ -81,16 +82,55 @@ def test_dpkg(self): self.assertLessEqual( ecosystem.sort_key('1.2.0-1'), ecosystem.sort_key('1.10.0-1')) + def test_ubuntu(self): + """Test sort_key from Ubuntu""" + ecosystem = debian.DPKG() + self.assertGreater( + ecosystem.sort_key('2.42.8+dfsg-1ubuntu0.3'), + ecosystem.sort_key('2.40.0+dfsg-3ubuntu0.5')) + self.assertGreater( + ecosystem.sort_key('2.42.8+dfsg-1ubuntu0.3'), + ecosystem.sort_key('2.42.8+dfsg-1ubuntu0.2')) + self.assertGreater(ecosystem.sort_key('5.4.13-1'), ecosystem.sort_key('0')) + + # Check the 0 sentinel value. + self.assertLess(ecosystem.sort_key('0'), ecosystem.sort_key('0:0~0-0')) + + self.assertGreater( + ecosystem.sort_key('5.4.13-1'), ecosystem.sort_key('3.2.30-1')) + self.assertGreater( + ecosystem.sort_key('invalid'), ecosystem.sort_key('3.2.30-1')) + # Check >= / <= methods + self.assertGreaterEqual( + ecosystem.sort_key('2.10.8+dfsg-1ubuntu0.3'), + ecosystem.sort_key('2.2.8+dfsg-1ubuntu0.3')) + self.assertLessEqual( + ecosystem.sort_key('2.2.8+dfsg-1ubuntu0.3'), + ecosystem.sort_key('2.10.8+dfsg-1ubuntu0.3')) + def test_dpkg_ecosystems(self): """Test dpkg-based ecosystems return a DPKG ecosystem.""" ecos = [ 'Debian', 'Echo', + 'Ubuntu', ] for ecosystem_name in ecos: ecosystem = ecosystems.get(ecosystem_name) self.assertIsInstance(ecosystem, debian.DPKG) + def test_coarse_version(self): + """Test coarse_version""" + ecosystem = debian.DPKG() + self.assertEqual('00:00000001.00000002.00000000', + ecosystem.coarse_version('1.2+1')) + self.assertEqual('01:20230101.00000000.00000000', + ecosystem.coarse_version('1:20230101~dfsg-1.1~deb12u1')) + self.assertEqual('00:00000010.00000020.00000030', + ecosystem.coarse_version('10.20.30')) + self.assertEqual('00:00000010.00000020.99999999', + ecosystem.coarse_version('10.20.a30')) + class DebianEcosystemTest(vcr.unittest.VCRTestCase): """Debian ecosystem helper tests.""" diff --git a/osv/ecosystems/ecosystems_base.py b/osv/ecosystems/ecosystems_base.py index 7366351148b..653fe030254 100644 --- a/osv/ecosystems/ecosystems_base.py +++ b/osv/ecosystems/ecosystems_base.py @@ -13,10 +13,11 @@ # limitations under the License. """Ecosystems base classes.""" from abc import ABC, abstractmethod -from typing import Any +from typing import Any, Iterable from warnings import deprecated import bisect import functools +import re import requests from urllib.parse import quote @@ -29,15 +30,34 @@ class VersionKey: _key: Any _is_zero: bool - - def __init__(self, key: Any = None, is_zero: bool = False): + _is_invalid: bool + _error: Exception | None + + def __init__(self, + key: Any = None, + is_zero: bool = False, + is_invalid: bool = False, + error: Exception | None = None): self._key = key self._is_zero = is_zero + self._is_invalid = is_invalid + self._error = error + + @property + def is_invalid(self): + return self._is_invalid def __lt__(self, other): if not isinstance(other, VersionKey): return NotImplemented + # Invalid versions are greater than everything else + if self._is_invalid: + # If both are invalid, they are equal (not less than) + return False + if other._is_invalid: + return True + if self._is_zero: return not other._is_zero @@ -50,6 +70,12 @@ def __eq__(self, other): if not isinstance(other, VersionKey): return NotImplemented + if self._is_invalid: + return other._is_invalid + + if other._is_invalid: + return False + if self._is_zero: return other._is_zero @@ -59,13 +85,15 @@ def __eq__(self, other): return self._key == other._key def __repr__(self): + if self._is_invalid: + return 'VersionKey(is_invalid=True)' if self._is_zero: return 'VersionKey(is_zero=True)' - - return f'VersionKey(key={self._key!r})' + return f'VersionKey({self._key})' _VERSION_ZERO = VersionKey(is_zero=True) +_VERSION_INVALID = VersionKey(is_invalid=True) class OrderedEcosystem(ABC): @@ -83,7 +111,7 @@ def __init__(self, suffix: str | None = None): def _sort_key(self, version: str) -> Any: """Comparable key for a version. - If the version string is invalid, return a very large version. + If the version string is invalid, raise a ValueError. """ def sort_key(self, version: str) -> VersionKey: @@ -91,12 +119,41 @@ def sort_key(self, version: str) -> VersionKey: if version == '0': return _VERSION_ZERO - return VersionKey(self._sort_key(version)) + try: + return VersionKey(self._sort_key(version)) + except ValueError as e: + # Store the exception for potential logging/debugging. + return VersionKey(is_invalid=True, error=e) def sort_versions(self, versions: list[str]): """Sort versions.""" versions.sort(key=self.sort_key) + def coarse_version(self, version: str) -> str: + """Convert a version string for this ecosystem to a lexicographically + sortable string in the form: + + EE:XXXXXXXX.YYYYYYYY.ZZZZZZZZ + where: + EE is the 0-padded 2-digit epoch number (or equivalent), + XXXXXXXX is the 0-padded 8-digit major version (or equivalent), + YYYYYYYY is the 0-padded 8-digit minor version (or equivalent), + ZZZZZZZZ is the 0-padded 8-digit patch version (or equivalent). + + The returned string is used for database range queries + (e.g. coarse_min <= v <= coarse_max). + It does not need to be a perfect representation of the version, but it + MUST be monotonically non-decreasing with respect to the ecosystem's sort + order. + i.e. if v1 < v2, then coarse_version(v1) <= coarse_version(v2). + + Version string '0' should map to 00:0000000.00000000.00000000 + + Should raise a ValueError if the version string is invalid. + """ + raise NotImplementedError( + f'coarse_version not implemented for {self.__class__.__name__}') + class EnumerateError(Exception): """Non-retryable version enumeration error.""" @@ -210,3 +267,105 @@ def _deps_dev_enumerate(self, self.sort_versions(versions) return self._get_affected_versions(versions, introduced, fixed, last_affected, limits) + + +MAX_COARSE_EPOCH = 99 +MAX_COARSE_PART = 99999999 + + +def coarse_version_generic(version: str, + separators_regex=r'[.]', + truncate_regex=r'[-+]', + implicit_split=False, + empty_as: str | None = None, + epoch: int = 0) -> str: + """ + Convert a version string into a coarse, lexicographically comparable string. + + Format: 00:00000000.00000000.00000000 + (Epoch:Major.Minor.Patch) + + Only the first 3 integer components (Major, Minor, Patch) are used. + + Args: + version: The version string to convert. + separators_regex: Regex for separators (default: r'[.]'). + truncate_regex: Regex for characters to truncate after (default: r'[-+]'). + If None, no truncation is performed. + implicit_split: If True, splits on transitions between digits and non-digits + (in addition to separators_regex). + empty_as: If not None, treats empty parts as the given string instead of + removing them. + epoch: The epoch to use. + + Returns: + A string in the format 00:00000000.00000000.00000000 + """ + if version == '0': + return coarse_version_from_ints((0, 0, 0), epoch=epoch) + + main = version + if truncate_regex: + # Truncate off trailing components (e.g. prerelease/build) + main = re.split(truncate_regex, version, maxsplit=1)[0] + parts = re.split(separators_regex, main) + if implicit_split: + # Also split on transitions between digits and non-digits + parts = [p for part in parts for p in re.findall(r'^$|\d+|\D+', part)] + + # Filter empty parts or treat as zero + if empty_as is not None: + parts = [p if p else empty_as for p in parts] + else: + parts = [p for p in parts if p] + + # Extract up to 3 integer components + components = [] + for p in parts[:3]: + if not p.isdecimal(): + break + components.append(int(p)) + + return coarse_version_from_ints(components, epoch=epoch) + + +def coarse_version_from_ints(parts: Iterable[int], epoch: int = 0) -> str: + """ + Convert a list of integers into a coarse version string. + + Format: 00:00000000.00000000.00000000 + (Epoch:Major.Minor.Patch) + + Only the first 3 integer components (Major, Minor, Patch) are used. + + Args: + parts: The list of integers to convert. + epoch: The epoch to use. + """ + if epoch < 0: + # A negative epoch doesn't really make sense + return '00:00000000.00000000.00000000' + if epoch > MAX_COARSE_EPOCH: + return '99:99999999.99999999.99999999' + ints = [] + overflow = False + for p in parts: + if p < 0: + # A negative part doesn't really make sense + # but let's just treat it and all following parts as 0 + ints.append(0) + break + if p > MAX_COARSE_PART: + p = MAX_COARSE_PART + overflow = True + ints.append(p) + if overflow or len(ints) == 3: + break + + # Pad with zeros to ensure 3 components + # If we overflowed, we should pad with MAX instead of 0 + pad_value = MAX_COARSE_PART if overflow else 0 + while len(ints) < 3: + ints.append(pad_value) + + return f'{epoch:02d}:{ints[0]:08d}.{ints[1]:08d}.{ints[2]:08d}' diff --git a/osv/ecosystems/haskell.py b/osv/ecosystems/haskell.py index 3895472390e..a72cdb9a8a7 100644 --- a/osv/ecosystems/haskell.py +++ b/osv/ecosystems/haskell.py @@ -23,7 +23,11 @@ import requests from . import config -from .ecosystems_base import EnumerableEcosystem, EnumerateError +from .ecosystems_base import ( + coarse_version_from_ints, + EnumerableEcosystem, + EnumerateError, +) from .semver_ecosystem_helper import SemverLike @@ -39,12 +43,17 @@ def _sort_key(self, version): https://hackage.haskell.org/package/Cabal-syntax/docs/Distribution-Types-Version.html """ - # If version is not valid, it is most likely an invalid input version - # then sort it to the last/largest element try: return [int(x) for x in version.split('.')] - except ValueError: - return [999999] + except ValueError as exc: + raise ValueError(f'Invalid version: {version}') from exc + + def coarse_version(self, version): + """Coarse version. + + Hackage versions are treated as dot-separated integers. + """ + return coarse_version_from_ints(self._sort_key(version)) def enumerate_versions(self, package, @@ -123,7 +132,7 @@ def tag_to_version(cls, tag: str) -> str | None: def is_major_minor_patch(s: str) -> bool: """Check that string matches ``..``.""" parts = s.split('.') - return len(parts) == 3 and all(x.isdigit() for x in parts) + return len(parts) == 3 and all(x.isdecimal() for x in parts) def enumerate_versions(self, package, diff --git a/osv/ecosystems/haskell_test.py b/osv/ecosystems/haskell_test.py index 6d9b20868f5..b7b5e15a5c7 100644 --- a/osv/ecosystems/haskell_test.py +++ b/osv/ecosystems/haskell_test.py @@ -51,6 +51,16 @@ def test_sort_key(self): self.assertLessEqual( ecosystem.sort_key('1.20.0'), ecosystem.sort_key('1-20-0')) + def test_coarse_version(self): + """Test coarse_version""" + ecosystem = ecosystems.get('Hackage') + self.assertEqual('00:00009001.00000010.00000033', + ecosystem.coarse_version('9001.10.33.4')) + self.assertEqual('00:00000000.00000001.00000000', + ecosystem.coarse_version('0.1')) + self.assertEqual('00:00000067.00000000.00000000', + ecosystem.coarse_version('67')) + class GHCEcosystemTest(vcr.unittest.VCRTestCase): """GHC ecosystem helper tests.""" diff --git a/osv/ecosystems/maven.py b/osv/ecosystems/maven.py index f2dc4851682..e14de1398a0 100644 --- a/osv/ecosystems/maven.py +++ b/osv/ecosystems/maven.py @@ -17,7 +17,11 @@ import functools import re -from .ecosystems_base import EnumerableEcosystem, DepsDevMixin +from .ecosystems_base import ( + coarse_version_generic, + EnumerableEcosystem, + DepsDevMixin, +) # pylint: disable=line-too-long @@ -31,7 +35,7 @@ def qualifier_order(token): """Returns an integer representing a token's order.""" # ".qualifier" < "-qualifier" < "-number" < ".number" - if token.value.isdigit(): + if token.value.isdecimal(): if token.prefix == '-': return 2 @@ -58,16 +62,16 @@ def __eq__(self, other): def __lt__(self, other): if self.prefix == other.prefix: # if the prefix is the same, then compare the token: - if self.value.isdigit() and other.value.isdigit(): + if self.value.isdecimal() and other.value.isdecimal(): # Numeric tokens have the natural order. return int(self.value) < int(other.value) # The spec is unclear, but according to Maven's implementation, numerics # sort after non-numerics, **unless it's a null value**. # https://github.com/apache/maven/blob/965aaa53da5c2d814e94a41d37142d0d6830375d/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java#L443 - if self.value.isdigit() and not self.is_null: + if self.value.isdecimal() and not self.is_null: return False - if other.value.isdigit() and not other.is_null: + if other.value.isdecimal() and not other.is_null: return True # Non-numeric tokens ("qualifiers") have the alphabetical order, except @@ -208,7 +212,7 @@ def from_string(cls, str_version): if current == 'm': current = 'milestone' - if current.isdigit(): + if current.isdecimal(): # Remove any leading zeros. current = str(int(current)) @@ -244,6 +248,21 @@ def _sort_key(self, version): """Sort key.""" return Version.from_string(version) + def coarse_version(self, version): + """Coarse version. + + Treats version as dot-separated integers. + Trims at hyphens to ensure monotonicity for qualifiers (1.2-beta < 1.2) + and numeric suffixes (1.2-3 < 1.2.1). + """ + return coarse_version_generic( + version, + separators_regex=r'[.]', + truncate_regex=r'-', + implicit_split=True, + empty_as='0', + ) + def enumerate_versions(self, package, introduced, diff --git a/osv/ecosystems/maven_test.py b/osv/ecosystems/maven_test.py index e4a06dbe541..292ac121b1e 100644 --- a/osv/ecosystems/maven_test.py +++ b/osv/ecosystems/maven_test.py @@ -16,6 +16,7 @@ # Many tests are ported from # https://github.com/apache/maven/blob/c3cf29438e3d65d6ee5c5726f8611af99d9a649a/maven-artifact/src/test/java/org/apache/maven/artifact/versioning/ComparableVersionTest.java. """Maven ecosystem helper tests.""" + import unittest import vcr.unittest import warnings @@ -259,6 +260,19 @@ def test_version_ge_le(self): self.assertLessEqual( self.ecosystem.sort_key('1.2.0'), self.ecosystem.sort_key('1.10.0')) + def test_coarse_version(self): + """Test coarse_version.""" + self.assertEqual('00:00000001.00000002.00000003', + self.ecosystem.coarse_version('1.2.3')) + self.assertEqual('00:00000002.00000003.00000000', + self.ecosystem.coarse_version('2.3-5.4')) + self.assertEqual('00:00000000.00000000.00000000', + self.ecosystem.coarse_version('alpha-alpha')) + self.assertEqual('00:00000005.00000010.00000000', + self.ecosystem.coarse_version('5.10.foo-6')) + self.assertEqual('00:00000001.00000000.00000009', + self.ecosystem.coarse_version('1..9foo')) + class MavenEcosystemTest(vcr.unittest.VCRTestCase): """Maven ecosystem helper tests.""" diff --git a/osv/ecosystems/nuget.py b/osv/ecosystems/nuget.py index 5da2aee3634..4a92473eced 100644 --- a/osv/ecosystems/nuget.py +++ b/osv/ecosystems/nuget.py @@ -18,7 +18,11 @@ import requests from . import config -from .ecosystems_base import EnumerableEcosystem, EnumerateError +from .ecosystems_base import ( + coarse_version_generic, + EnumerableEcosystem, + EnumerateError, +) from .. import semver_index # This relies on a strict SemVer implementation. @@ -74,10 +78,8 @@ def from_string(cls, str_version): str_version, revision = _extract_revision(str_version) try: return Version(semver_index.parse(str_version), revision) - except ValueError: - # If a user gives us an unparsable semver version, - # treat it as a very large version so as to not match anything. - return Version(semver_index.parse('999999'), 999999) + except ValueError as exc: + raise ValueError(f'Invalid version: {str_version}') from exc class NuGet(EnumerableEcosystem): @@ -90,6 +92,24 @@ def _sort_key(self, version): """Sort key.""" return Version.from_string(version) + def coarse_version(self, version): + """Coarse version. + + Treats version as dot-separated integers. + Trims prerelease/build suffixes to ensure monotonicity + (e.g. 1.0.0-beta < 1.0.0). + """ + # Call _sort_key to force a ValueError if the version is invalid. + self._sort_key(version) + if version[0] == 'v': + version = version[1:] + return coarse_version_generic( + version, + separators_regex=r'[.]', + truncate_regex=r'[-+]', + implicit_split=True, + empty_as=None) + def enumerate_versions(self, package, introduced, diff --git a/osv/ecosystems/nuget_test.py b/osv/ecosystems/nuget_test.py index 9052004f2a5..1a7732f1a12 100644 --- a/osv/ecosystems/nuget_test.py +++ b/osv/ecosystems/nuget_test.py @@ -18,6 +18,7 @@ """NuGet ecosystem helper tests.""" import unittest + import vcr.unittest import warnings @@ -92,6 +93,22 @@ def test_ge_le(self): self.check_order(self.assertGreaterEqual, '1.10.0', '1.2.0') self.check_order(self.assertLessEqual, '1.2.0', '1.10.0') + def test_coarse_version(self): + """Test coarse_version""" + ecosystem = nuget.NuGet() + self.assertEqual('00:00000000.00000000.00000000', + ecosystem.coarse_version('0')) + self.assertEqual('00:00000001.00000002.00000003', + ecosystem.coarse_version('1.2.3.5')) + self.assertEqual('00:00000010.00000020.00000030', + ecosystem.coarse_version('10.20.30-alpha.1')) + self.assertEqual('00:00000000.00000002.00000000', + ecosystem.coarse_version('0.2.0.1+a')) + self.assertEqual('00:00000000.00000000.00000099', + ecosystem.coarse_version('0.0.99.10-pre+b')) + self.assertEqual('00:00000002.99999999.99999999', + ecosystem.coarse_version('2.100000000.1.1')) + class NuGetEcosystemTest(vcr.unittest.VCRTestCase): """NuGet ecosystem helper tests.""" diff --git a/osv/ecosystems/packagist.py b/osv/ecosystems/packagist.py index 83f36550d84..cc14170d02c 100644 --- a/osv/ecosystems/packagist.py +++ b/osv/ecosystems/packagist.py @@ -18,7 +18,12 @@ from typing import List from . import config -from .ecosystems_base import EnumerableEcosystem, EnumerateError +from .ecosystems_base import ( + coarse_version_from_ints, + EnumerableEcosystem, + EnumerateError, + MAX_COARSE_PART, +) from ..request_helper import RequestError, RequestHelper @@ -92,11 +97,11 @@ def php_slices_compare(a_split: List[str], b_split: List[str]): Compare php versions after being split by '.' """ for a, b in zip(a_split, b_split): - if a.isdigit() and b.isdigit(): + if a.isdecimal() and b.isdecimal(): compare = int(a) - int(b) - elif not a.isdigit() and not b.isdigit(): + elif not a.isdecimal() and not b.isdecimal(): compare = PackagistVersion.compare_special_versions(a, b) - elif a.isdigit(): + elif a.isdecimal(): compare = PackagistVersion.compare_special_versions('#', b) else: compare = PackagistVersion.compare_special_versions(a, '#') @@ -108,13 +113,13 @@ def php_slices_compare(a_split: List[str], b_split: List[str]): if len(a_split) > len(b_split): next_char = a_split[len(b_split)] - if next_char.isdigit(): + if next_char.isdecimal(): return 1 return PackagistVersion.php_slices_compare(a_split[len(b_split):], ['#']) if len(a_split) < len(b_split): next_char = b_split[len(a_split)] - if next_char.isdigit(): + if next_char.isdecimal(): return -1 return PackagistVersion.php_slices_compare(['#'], b_split[len(a_split):]) @@ -202,6 +207,39 @@ class Packagist(EnumerableEcosystem): def _sort_key(self, version): return PackagistVersion(version) + def coarse_version(self, version): + """Coarse version. + + Treats version as integers separated by ., -, _, or +. + Treats 'p'/'pl' suffixes as MAX_INT to ensure they sort after base versions + (e.g. 1.0 < 1.0-p1). + """ + if version.startswith('v'): + version = version[1:] + # Cannot use coarse_version_generic because 'p' and 'pl' are considered + # greater than numbers + # 0 > .1 (but 0.1 == 0..1) + if not version or version[0] in '-_+.': + return coarse_version_from_ints([0]) + # Split on separators. + parts = re.split(r'[-_+.]', version) + # Split on transitions between digits and non-digits + parts = [p for part in parts for p in re.findall(r'^$|\d+|\D+', part)] + # Filter empty parts + parts = [p for p in parts if p] + # Extract up to 3 integer components + components = [] + for p in parts[:3]: + if p in ('p', 'pl'): + val = MAX_COARSE_PART + 1 # trigger overflow + elif not p.isdecimal(): + break + else: + val = int(p) + components.append(val) + + return coarse_version_from_ints(components) + def enumerate_versions(self, package, introduced, diff --git a/osv/ecosystems/packagist_test.py b/osv/ecosystems/packagist_test.py index 52c6cc7a7c9..d7f1d3c467f 100644 --- a/osv/ecosystems/packagist_test.py +++ b/osv/ecosystems/packagist_test.py @@ -24,6 +24,24 @@ class PackagistEcosystemTest(vcr.unittest.VCRTestCase): _TEST_DATA_DIR = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'testdata') + def test_coarse_version(self): + """Test coarse_version.""" + ecosystem = ecosystems.get('Packagist') + self.assertEqual('00:00000001.00000002.00000003', + ecosystem.coarse_version('1.2.3')) + self.assertEqual('00:00000001.00000000.00000000', + ecosystem.coarse_version('1.0.0-alpha')) + self.assertEqual('00:00000001.00000000.00000000', + ecosystem.coarse_version('1.0.0-p1')) + self.assertEqual('00:00000004.00000003.00000002', + ecosystem.coarse_version('4.3-2RC1')) + self.assertEqual('00:00000001.00000002.00000003', + ecosystem.coarse_version('1+.+2-_-3')) + self.assertEqual('00:00000008.99999999.99999999', + ecosystem.coarse_version('8.p2')) + self.assertEqual('00:99999999.99999999.99999999', + ecosystem.coarse_version('pl')) + def test_packagist(self): """Test Packagist.""" ecosystem = ecosystems.get('Packagist') diff --git a/osv/ecosystems/pub.py b/osv/ecosystems/pub.py index b7657bbbcba..61b2fb4eacb 100644 --- a/osv/ecosystems/pub.py +++ b/osv/ecosystems/pub.py @@ -17,7 +17,11 @@ import json from . import config -from .ecosystems_base import EnumerableEcosystem, EnumerateError +from .ecosystems_base import ( + coarse_version_generic, + EnumerableEcosystem, + EnumerateError, +) from .. import semver_index from ..request_helper import RequestError, RequestHelper @@ -58,12 +62,10 @@ def __eq__(self, other): @classmethod def from_string(cls, str_version): - # If version is not valid, it is most likely an invalid input - # version then sort it to the last/largest element try: return Version(semver_index.parse(str_version)) - except ValueError: - return Version(semver_index.parse('999999')) + except ValueError as exc: + raise ValueError(f'Invalid version: {str_version}') from exc class Pub(EnumerableEcosystem): @@ -75,6 +77,27 @@ def _sort_key(self, version): """Sort key.""" return Version.from_string(version) + def coarse_version(self, version): + """Coarse version. + + Treats version as dot-separated integers. + Trims prerelease/build suffixes to ensure monotonicity + (e.g. 1.0.0-beta < 1.0.0). + """ + # Make sure the version is valid before trying to make it coarse. + try: + semver_index.parse(version) + except ValueError as e: + raise ValueError(f'Invalid version: {version}') from e + if version[0] == 'v': + version = version[1:] + return coarse_version_generic( + version, + separators_regex=r'[.]', + truncate_regex=r'[-+]', + implicit_split=True, + empty_as=None) + def enumerate_versions(self, package, introduced, diff --git a/osv/ecosystems/pub_test.py b/osv/ecosystems/pub_test.py index d7cd7a782c4..a21951926e2 100644 --- a/osv/ecosystems/pub_test.py +++ b/osv/ecosystems/pub_test.py @@ -113,6 +113,22 @@ def test_empty_identifier(self): # The implementation incorrectly assumes "1.0.0-a..b" == "1.0.0-a.-.b" # I have decided that this extreme edge case is not worth fixing. + def test_coarse_version(self): + """Test coarse_version""" + ecosystem = pub.Pub() + self.assertEqual('00:00000000.00000000.00000000', + ecosystem.coarse_version('0')) + self.assertEqual('00:00000001.00000002.00000003', + ecosystem.coarse_version('1.2.3')) + self.assertEqual('00:00000010.00000020.00000030', + ecosystem.coarse_version('10.20.30-alpha.1')) + self.assertEqual('00:00000000.00000002.00000000', + ecosystem.coarse_version('0.2.0+a')) + self.assertEqual('00:00000000.00000000.00000099', + ecosystem.coarse_version('0.0.99-pre+b')) + self.assertEqual('00:00000002.99999999.99999999', + ecosystem.coarse_version('2.100000000.1')) + class PubEcosystemTest(vcr.unittest.VCRTestCase): """Pub ecosystem helper tests.""" diff --git a/osv/ecosystems/pypi.py b/osv/ecosystems/pypi.py index 23add43e138..56f2866e0e4 100644 --- a/osv/ecosystems/pypi.py +++ b/osv/ecosystems/pypi.py @@ -17,7 +17,12 @@ import requests from . import config -from .ecosystems_base import EnumerableEcosystem, EnumerateError +from .ecosystems_base import ( + coarse_version_generic, + coarse_version_from_ints, + EnumerableEcosystem, + EnumerateError, +) class PyPI(EnumerableEcosystem): @@ -30,6 +35,33 @@ def _sort_key(self, version): # version.parse() handles invalid versions by returning LegacyVersion() return packaging_legacy.version.parse(version) + def coarse_version(self, version: str): + """Coarse version. + + Treats version as dot-separated integers. + Maps legacy versions to 0 (sort before valid versions). + Epochs are preserved. + """ + # legacy versions are less than non-legacy versions, thus mapped to 0 + ver = packaging_legacy.version.parse(version) + if isinstance(ver, packaging_legacy.version.LegacyVersion): + return coarse_version_from_ints([0]) + + epoch = ver.epoch + + # parse the epoch-less string + if version[0].lower() == 'v': + version = version[1:] + epochless = version.split('!', 1)[-1] + + return coarse_version_generic( + epochless, + separators_regex=r'[.]', + truncate_regex=r'[+_-]', + implicit_split=True, + empty_as=None, + epoch=epoch) + def enumerate_versions(self, package, introduced, diff --git a/osv/ecosystems/pypi_test.py b/osv/ecosystems/pypi_test.py index 8b61f2fce94..f8f7eb23cb9 100644 --- a/osv/ecosystems/pypi_test.py +++ b/osv/ecosystems/pypi_test.py @@ -48,3 +48,17 @@ def test_sort_key(self): ecosystem.sort_key('1.10.0'), ecosystem.sort_key('1.2.0')) self.assertLessEqual( ecosystem.sort_key('1.2.0'), ecosystem.sort_key('1.10.0')) + + def test_coarse_version(self): + """Test coarse_version""" + ecosystem = ecosystems.get('PyPI') + self.assertEqual('00:00000001.00000002.00000003', + ecosystem.coarse_version('1.2.3')) + self.assertEqual('00:00002020.00000000.00000000', + ecosystem.coarse_version('0!2020.post1')) + self.assertEqual('02:00000001.00000002.00000003', + ecosystem.coarse_version('2!1.2.3')) + self.assertEqual('99:99999999.99999999.99999999', + ecosystem.coarse_version('100!1.0.0')) + self.assertEqual('00:00000000.00000000.00000000', + ecosystem.coarse_version('1.foobar')) diff --git a/osv/ecosystems/redhat.py b/osv/ecosystems/redhat.py index 03679669d84..490464c931e 100644 --- a/osv/ecosystems/redhat.py +++ b/osv/ecosystems/redhat.py @@ -14,11 +14,52 @@ """Red Hat Linux ecosystem helper.""" from ..third_party.univers.rpm import RpmVersion -from .ecosystems_base import OrderedEcosystem +from .ecosystems_base import coarse_version_generic, OrderedEcosystem + +# A real, valid Rpm Version to check against +_rpm_test_version = RpmVersion.from_string('0') class RPM(OrderedEcosystem): """Red Hat Package Manager ecosystem helper.""" def _sort_key(self, version): - return RpmVersion.from_string(version) + ver = RpmVersion.from_string(version) + # Invalid RPM versions only reveal themselves when doing a comparison. + try: + _rpm_test_version < ver + except Exception as e: + raise ValueError(f'Invalid version: {version}') from e + return ver + + def coarse_version(self, version: str) -> str: + """Coarse version. + + Treats version as alternating digit/non-digit strings. + Treats ~, ^, - as separators that sort before regular separators + (e.g. 1.0~rc1 < 1.0). + Epochs are preserved. + """ + # Call sort key to validate the version + self._sort_key(version) + # Extract epoch, if it exists + e, p, v = version.partition(':') + if not p: + v = e + e = '0' + try: + epoch = int(e) + except ValueError as e: + raise ValueError(f'Invalid version: {version}') from e + + return coarse_version_generic( + v, + # any non-alphanumeric character is considered a separator + separators_regex=r'[^0-9A-Za-z~^-]', + # ~, ^, - separators are special and treated as less than a regular + # separator + truncate_regex=r'[~^-]', + implicit_split=True, + empty_as=None, + epoch=epoch, + ) diff --git a/osv/ecosystems/redhat_test.py b/osv/ecosystems/redhat_test.py index b440edf7a1d..0f785d27b11 100644 --- a/osv/ecosystems/redhat_test.py +++ b/osv/ecosystems/redhat_test.py @@ -135,3 +135,15 @@ def test_rpm_ecosystems(self): for ecosystem_name in ecos: ecosystem = ecosystems.get(ecosystem_name) self.assertIsInstance(ecosystem, redhat.RPM) + + def test_coarse_version(self): + """Test coarse_version""" + ecosystem = redhat.RPM() + self.assertEqual('00:00000003.00000004.00000001', + ecosystem.coarse_version('0:3.4.1-6.el8_10')) + self.assertEqual('02:00000001.00000035.00000000', + ecosystem.coarse_version('2:1.35-9.el10_1')) + self.assertEqual('00:00000001.00000002.00000003', + ecosystem.coarse_version('1+2__3')) + self.assertEqual('00:00000010.00000020.00000000', + ecosystem.coarse_version('10.20^2')) diff --git a/osv/ecosystems/rubygems.py b/osv/ecosystems/rubygems.py index dc4abf8e9b9..868f8786bdf 100644 --- a/osv/ecosystems/rubygems.py +++ b/osv/ecosystems/rubygems.py @@ -18,7 +18,11 @@ from ..third_party.univers.gem import GemVersion, InvalidVersionError from . import config -from .ecosystems_base import EnumerableEcosystem, EnumerateError +from .ecosystems_base import ( + coarse_version_generic, + EnumerableEcosystem, + EnumerateError, +) class RubyGems(EnumerableEcosystem): @@ -32,8 +36,24 @@ def _sort_key(self, version): # version then sort it to the last/largest element try: return GemVersion(version) - except InvalidVersionError: - return GemVersion('9999999999') + except InvalidVersionError as exc: + raise ValueError(f'Invalid version: {version}') from exc + + def coarse_version(self, version: str) -> str: + """Coarse version. + + Treats version as dot-separated integers. + Trims at hyphens to ensure monotonicity (e.g. 1.2-3 < 1.2). + """ + # Call sort key to validate the version + self._sort_key(version) + + return coarse_version_generic( + version.strip(), + separators_regex=r'[.]', + truncate_regex=r'[-]', + implicit_split=True, + empty_as='') def enumerate_versions(self, package, diff --git a/osv/ecosystems/rubygems_test.py b/osv/ecosystems/rubygems_test.py index 172836a5f00..c99dac36da8 100644 --- a/osv/ecosystems/rubygems_test.py +++ b/osv/ecosystems/rubygems_test.py @@ -52,3 +52,11 @@ def test_sort_key(self): ecosystem.sort_key('1.2.0.rc1'), ecosystem.sort_key('1.10.0.rc1')) # Check the 0 sentinel value self.assertLess(ecosystem.sort_key('0'), ecosystem.sort_key('0.0.0.rc0')) + + def test_coarse_version(self): + """Test coarse_version""" + ecosystem = ecosystems.get('RubyGems') + self.assertEqual('00:00000001.00000002.00000003', + ecosystem.coarse_version('1.2.3.4')) + self.assertEqual('00:00000010.00000002.00000000', + ecosystem.coarse_version('10.2a.3')) diff --git a/osv/ecosystems/semver_ecosystem_helper.py b/osv/ecosystems/semver_ecosystem_helper.py index 974dc9e4c64..00db6c9687e 100644 --- a/osv/ecosystems/semver_ecosystem_helper.py +++ b/osv/ecosystems/semver_ecosystem_helper.py @@ -14,7 +14,7 @@ """Ecosystem helper for ecosystems using SemVer.""" from warnings import deprecated -from .ecosystems_base import OrderedEcosystem +from .ecosystems_base import coarse_version_generic, OrderedEcosystem from .. import semver_index @@ -24,12 +24,28 @@ class SemverLike(OrderedEcosystem): def _sort_key(self, version): """Sort key.""" + return semver_index.parse(version) + + def coarse_version(self, version): + """Coarse version. + + Treats version as dot-separated integers. + Trims prerelease/build suffixes to ensure monotonicity + (e.g. 1.0.0-rc1 < 1.0.0). + """ + # Make sure the version is valid before trying to make it coarse. try: - return semver_index.parse(version) - except ValueError: - # If a user gives us an unparsable semver version, - # treat it as a very large version so as to not match anything. - return semver_index.parse('9999999999') + semver_index.parse(version) + except ValueError as e: + raise ValueError(f'Invalid version: {version}') from e + if version[0] == 'v': + version = version[1:] + return coarse_version_generic( + version, + separators_regex=r'[.]', + truncate_regex=r'[-+]', + implicit_split=True, + empty_as=None) class SemverEcosystem(SemverLike): diff --git a/osv/ecosystems/semver_ecosystem_helper_test.py b/osv/ecosystems/semver_ecosystem_helper_test.py index 0f1cd85e35e..a40df45e7a0 100644 --- a/osv/ecosystems/semver_ecosystem_helper_test.py +++ b/osv/ecosystems/semver_ecosystem_helper_test.py @@ -38,3 +38,28 @@ def test_sort_key(self): ecosystem = semver_ecosystem_helper.SemverLike('') # Check the 0 sentinel value self.assertLess(ecosystem.sort_key('0'), ecosystem.sort_key('0.0.0-0.0')) + + # Check invalid version + invalid_key = ecosystem.sort_key('invalid') + valid_key = ecosystem.sort_key('1.0.0') + # Invalid versions should be greater than valid versions + self.assertLess(valid_key, invalid_key) + self.assertGreater(invalid_key, valid_key) + # Invalid versions should be equal to other invalid versions (for stability) + self.assertEqual(invalid_key, ecosystem.sort_key('also-invalid')) + + def test_coarse_version(self): + """Test coarse_version""" + ecosystem = semver_ecosystem_helper.SemverLike('') + self.assertEqual('00:00000000.00000000.00000000', + ecosystem.coarse_version('0')) + self.assertEqual('00:00000001.00000002.00000003', + ecosystem.coarse_version('1.2.3')) + self.assertEqual('00:00000010.00000020.00000030', + ecosystem.coarse_version('10.20.30-alpha.1')) + self.assertEqual('00:00000000.00000002.00000000', + ecosystem.coarse_version('0.2.0+a')) + self.assertEqual('00:00000000.00000000.00000099', + ecosystem.coarse_version('0.0.99-pre+b')) + self.assertEqual('00:00000002.99999999.99999999', + ecosystem.coarse_version('2.100000000.1')) diff --git a/osv/ecosystems/ubuntu.py b/osv/ecosystems/ubuntu.py index 5dffbbbe9fa..66fd42f0ac9 100644 --- a/osv/ecosystems/ubuntu.py +++ b/osv/ecosystems/ubuntu.py @@ -11,17 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Ubuntu ecosystem helper.""" +"""Ubuntu ecosystem helper. (DEPRECATED)""" -from ..third_party.univers.debian import Version as UbuntuVersion +from .debian import DPKG -from .ecosystems_base import OrderedEcosystem - - -class Ubuntu(OrderedEcosystem): - """Ubuntu ecosystem""" - - def _sort_key(self, version): - if not UbuntuVersion.is_valid(version): - return UbuntuVersion(9999999999, '9999999999') - return UbuntuVersion.from_string(version) +# DEPRECATED +Ubuntu = DPKG diff --git a/osv/ecosystems/ubuntu_test.py b/osv/ecosystems/ubuntu_test.py deleted file mode 100644 index 9ac4685e27d..00000000000 --- a/osv/ecosystems/ubuntu_test.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Ubuntu ecosystem helper tests.""" - -import unittest -from .. import ecosystems - - -class UbuntuEcosystemTest(unittest.TestCase): - """Ubuntu ecosystem helper tests.""" - - def test_ubuntu(self): - """Test sort_key""" - ecosystem = ecosystems.get('Ubuntu') - self.assertGreater( - ecosystem.sort_key('2.42.8+dfsg-1ubuntu0.3'), - ecosystem.sort_key('2.40.0+dfsg-3ubuntu0.5')) - self.assertGreater( - ecosystem.sort_key('2.42.8+dfsg-1ubuntu0.3'), - ecosystem.sort_key('2.42.8+dfsg-1ubuntu0.2')) - self.assertGreater(ecosystem.sort_key('5.4.13-1'), ecosystem.sort_key('0')) - - # Check the 0 sentinel value. - self.assertLess(ecosystem.sort_key('0'), ecosystem.sort_key('0:0~0-0')) - - self.assertGreater( - ecosystem.sort_key('5.4.13-1'), ecosystem.sort_key('3.2.30-1')) - self.assertGreater( - ecosystem.sort_key('invalid'), ecosystem.sort_key('3.2.30-1')) - # Check >= / <= methods - self.assertGreaterEqual( - ecosystem.sort_key('2.10.8+dfsg-1ubuntu0.3'), - ecosystem.sort_key('2.2.8+dfsg-1ubuntu0.3')) - self.assertLessEqual( - ecosystem.sort_key('2.2.8+dfsg-1ubuntu0.3'), - ecosystem.sort_key('2.10.8+dfsg-1ubuntu0.3')) diff --git a/osv/models.py b/osv/models.py index 3af516ea086..45f7204dd3c 100644 --- a/osv/models.py +++ b/osv/models.py @@ -29,6 +29,11 @@ # pylint: disable=relative-beyond-top-level from . import bug from . import ecosystems +from .ecosystems.ecosystems_base import ( + coarse_version_from_ints, + MAX_COARSE_PART, + MAX_COARSE_EPOCH, +) from . import gcs from . import pubsub from . import purl_helpers @@ -40,6 +45,10 @@ _MAX_GIT_VERSIONS_TO_INDEX = 5000 +MIN_COARSE_VERSION = coarse_version_from_ints((0,), 0) +MAX_COARSE_VERSION = coarse_version_from_ints((MAX_COARSE_PART + 1,), + MAX_COARSE_EPOCH + 1) + _EVENT_ORDER = { 'introduced': 0, 'last_affected': 1, @@ -999,6 +1008,13 @@ class AffectedVersions(ndb.Model): events: list[AffectedEvent] = ndb.LocalStructuredProperty( AffectedEvent, repeated=True) + # Coarse, string-comparable version bounds + # for pre-filtering affected versions. + # minimum: 00:00000000.00000000.00000000 + # maximum: 99:99999999.99999999.99999999 + coarse_min: str = ndb.StringProperty() + coarse_max: str = ndb.StringProperty() + def sort_key(self): """Key function for comparison and deduplication.""" return (self.vuln_id, self.ecosystem, self.name, tuple(self.versions), @@ -1165,91 +1181,150 @@ def transaction(): pubsub.publish_failure(data, type='gcs_retry') -def affected_from_bug(entity: Bug) -> list[AffectedVersions]: - """Compute the AffectedVersions from a Bug entity.""" +def _get_coarse_min_max(events: list[AffectedEvent], + e_helper: ecosystems.OrderedEcosystem, + db_id: str) -> tuple[str, str]: + """Get coarse min and max from sorted events.""" + coarse_min = MIN_COARSE_VERSION + coarse_max = MAX_COARSE_VERSION + try: + # Find the lowest introduced event for coarse min + # (in case, for some reason, the first event is not introduced) + for e in events: + if e.type == 'introduced': + coarse_min = e_helper.coarse_version(e.value) + # Only if we found an introduced version, update coarse_max + # And only if the range is bounded. + last = events[-1] + if last.type != 'introduced': + coarse_max = e_helper.coarse_version(last.value) + break + except NotImplementedError: + # Coarse versioning not yet implemented for this ecosystem. + pass + except ValueError: + logging.warning('Invalid version in %s', db_id) + coarse_min = MIN_COARSE_VERSION + coarse_max = MAX_COARSE_VERSION + + return coarse_min, coarse_max + + +def _affected_versions_from_package(affected: AffectedPackage, + db_id: str) -> list[AffectedVersions]: + """Compute AffectedVersions for a single affected package.""" affected_versions = [] - for affected in entity.affected_packages: - pkg_ecosystem = affected.package.ecosystem - # Make sure we capture all possible ecosystem variants for matching. - # e.g. {'Ubuntu:22.04:LTS', 'Ubuntu:22.04', 'Ubuntu'} - all_pkg_ecosystems = {pkg_ecosystem, ecosystems.normalize(pkg_ecosystem)} - if (e := ecosystems.remove_variants(pkg_ecosystem)) is not None: - all_pkg_ecosystems.add(e) - - pkg_name = ecosystems.maybe_normalize_package_names(affected.package.name, - pkg_ecosystem) - - # Ecosystem helper for sorting the events. - e_helper = ecosystems.get(pkg_ecosystem) - # TODO(michaelkedar): I am matching the current behaviour of the API, - # where GIT tags match to the first git repo in the ranges list, even if - # there are non-git ranges or multiple git repos in a range. - repo_url = '' - pkg_has_affected = False - for r in affected.ranges: - if r.type == 'GIT': - if not repo_url: - repo_url = r.repo_url - continue - if r.type not in ('SEMVER', 'ECOSYSTEM'): - logging.warning('Unknown range type "%s" in %s', r.type, entity.db_id) - continue - events = r.events - if not events: - continue - pkg_has_affected = True - if e_helper is not None: - # If we have an ecosystem helper sort the events to help with querying. - events.sort(key=lambda e, sort_key=e_helper.sort_key: - (sort_key(e.value), _EVENT_ORDER.get(e.type, -1))) - # If we don't have an ecosystem helper, assume the events are in order. - for e in all_pkg_ecosystems: - affected_versions.append( - AffectedVersions( - vuln_id=entity.db_id, - ecosystem=e, - name=pkg_name, - events=events, - )) - - # Add the enumerated versions - # We need at least a package name to perform matching. - if pkg_name and affected.versions: - pkg_has_affected = True - for e in all_pkg_ecosystems: - affected_versions.append( - AffectedVersions( - vuln_id=entity.db_id, - ecosystem=e, - name=pkg_name, - versions=affected.versions, - )) - if pkg_name and not pkg_has_affected: - # We have a package that does not have any affected ranges or versions, - # which doesn't really make sense. - # Add an empty AffectedVersions entry so that this vuln is returned when - # querying the API with no version specified. - logging.warning('Vuln has empty affected ranges and versions: %s, %s/%s', - entity.db_id, pkg_ecosystem, pkg_name) - for e in all_pkg_ecosystems: - affected_versions.append( - AffectedVersions( - vuln_id=entity.db_id, - ecosystem=e, - name=pkg_name, - )) - - if repo_url: - # If we have a repository, always add a GIT entry. - # Even if affected.versions is empty, we still want to return this vuln - # for the API queries with no versions specified. + pkg_ecosystem = affected.package.ecosystem + # Make sure we capture all possible ecosystem variants for matching. + # e.g. {'Ubuntu:22.04:LTS', 'Ubuntu:22.04', 'Ubuntu'} + all_pkg_ecosystems = {pkg_ecosystem, ecosystems.normalize(pkg_ecosystem)} + if (e := ecosystems.remove_variants(pkg_ecosystem)) is not None: + all_pkg_ecosystems.add(e) + + pkg_name = ecosystems.maybe_normalize_package_names(affected.package.name, + pkg_ecosystem) + + # Ecosystem helper for sorting the events. + e_helper = ecosystems.get(pkg_ecosystem) + # TODO(michaelkedar): I am matching the current behaviour of the API, + # where GIT tags match to the first git repo in the ranges list, even if + # there are non-git ranges or multiple git repos in a range. + repo_url = '' + pkg_has_affected = False + for r in affected.ranges: + if r.type == 'GIT': + if not repo_url: + repo_url = r.repo_url + continue + if r.type not in ('SEMVER', 'ECOSYSTEM'): + logging.warning('Unknown range type "%s" in %s', r.type, db_id) + continue + events = r.events + if not events: + continue + pkg_has_affected = True + coarse_min = MIN_COARSE_VERSION + coarse_max = MAX_COARSE_VERSION + if e_helper is not None: + # If we have an ecosystem helper sort the events to help with querying. + events.sort(key=lambda e, sort_key=e_helper.sort_key: + (sort_key(e.value), _EVENT_ORDER.get(e.type, -1))) + coarse_min, coarse_max = _get_coarse_min_max(events, e_helper, db_id) + + # If we don't have an ecosystem helper, assume the events are in order. + for e in all_pkg_ecosystems: affected_versions.append( AffectedVersions( - vuln_id=entity.db_id, - ecosystem='GIT', - name=normalize_repo_package(repo_url), + vuln_id=db_id, + ecosystem=e, + name=pkg_name, + coarse_min=coarse_min, + coarse_max=coarse_max, + events=events, + )) + + # Add the enumerated versions + # We need at least a package name to perform matching. + if pkg_name and affected.versions: + pkg_has_affected = True + coarse_min = MIN_COARSE_VERSION + coarse_max = MAX_COARSE_VERSION + if e_helper is not None: + try: + all_coarse = [e_helper.coarse_version(v) for v in affected.versions] + coarse_min = min(all_coarse) + coarse_max = max(all_coarse) + except NotImplementedError: + # Coarse versioning not yet implemented for this ecosystem. + pass + except ValueError: + logging.warning('Invalid version in %s', db_id) + for e in all_pkg_ecosystems: + affected_versions.append( + AffectedVersions( + vuln_id=db_id, + ecosystem=e, + name=pkg_name, versions=affected.versions, + coarse_min=coarse_min, + coarse_max=coarse_max, )) + if pkg_name and not pkg_has_affected: + # We have a package that does not have any affected ranges or versions, + # which doesn't really make sense. + # Add an empty AffectedVersions entry so that this vuln is returned when + # querying the API with no version specified. + logging.warning('Vuln has empty affected ranges and versions: %s, %s/%s', + db_id, pkg_ecosystem, pkg_name) + for e in all_pkg_ecosystems: + affected_versions.append( + AffectedVersions( + vuln_id=db_id, + ecosystem=e, + name=pkg_name, + )) + + if repo_url: + # If we have a repository, always add a GIT entry. + # Even if affected.versions is empty, we still want to return this vuln + # for the API queries with no versions specified. + affected_versions.append( + AffectedVersions( + vuln_id=db_id, + ecosystem='GIT', + name=normalize_repo_package(repo_url), + versions=affected.versions, + )) + + return affected_versions + + +def affected_from_bug(entity: Bug) -> list[AffectedVersions]: + """Compute the AffectedVersions from a Bug entity.""" + affected_versions = [] + for affected in entity.affected_packages: + affected_versions.extend( + _affected_versions_from_package(affected, entity.db_id)) # Deduplicate and sort the affected_versions unique_affected_dict = {av.sort_key(): av for av in affected_versions} diff --git a/osv/models_test.py b/osv/models_test.py index 87c54a59b5a..371a0006bc5 100644 --- a/osv/models_test.py +++ b/osv/models_test.py @@ -192,12 +192,16 @@ def test_bug_post_put(self): events=[ models.AffectedEvent(type='introduced', value='0'), models.AffectedEvent(type='fixed', value='1.0.0-3') - ]), + ], + coarse_min=models.MIN_COARSE_VERSION, + coarse_max='00:00000001.00000000.00000000'), models.AffectedVersions( vuln_id=vuln_id, ecosystem='Ubuntu', name='test', - versions=['1.0.0-1', '1.0.0-2']), + versions=['1.0.0-1', '1.0.0-2'], + coarse_min='00:00000001.00000000.00000000', + coarse_max='00:00000001.00000000.00000000'), models.AffectedVersions( vuln_id=vuln_id, ecosystem='Ubuntu:24.04', @@ -205,12 +209,16 @@ def test_bug_post_put(self): events=[ models.AffectedEvent(type='introduced', value='0'), models.AffectedEvent(type='fixed', value='1.0.0-3') - ]), + ], + coarse_min=models.MIN_COARSE_VERSION, + coarse_max='00:00000001.00000000.00000000'), models.AffectedVersions( vuln_id=vuln_id, ecosystem='Ubuntu:24.04', name='test', - versions=['1.0.0-1', '1.0.0-2']), + versions=['1.0.0-1', '1.0.0-2'], + coarse_min='00:00000001.00000000.00000000', + coarse_max='00:00000001.00000000.00000000'), models.AffectedVersions( vuln_id=vuln_id, ecosystem='Ubuntu:24.04:LTS', @@ -218,12 +226,16 @@ def test_bug_post_put(self): events=[ models.AffectedEvent(type='introduced', value='0'), models.AffectedEvent(type='fixed', value='1.0.0-3') - ]), + ], + coarse_min=models.MIN_COARSE_VERSION, + coarse_max='00:00000001.00000000.00000000'), models.AffectedVersions( vuln_id=vuln_id, ecosystem='Ubuntu:24.04:LTS', name='test', - versions=['1.0.0-1', '1.0.0-2']), + versions=['1.0.0-1', '1.0.0-2'], + coarse_min='00:00000001.00000000.00000000', + coarse_max='00:00000001.00000000.00000000'), models.AffectedVersions( vuln_id=vuln_id, ecosystem='Ubuntu:25.04', @@ -231,12 +243,16 @@ def test_bug_post_put(self): events=[ models.AffectedEvent(type='introduced', value='0'), models.AffectedEvent(type='fixed', value='1.0.0-3') - ]), + ], + coarse_min=models.MIN_COARSE_VERSION, + coarse_max='00:00000001.00000000.00000000'), models.AffectedVersions( vuln_id=vuln_id, ecosystem='Ubuntu:25.04', name='test', - versions=['1.0.0-1', '1.0.0-2']), + versions=['1.0.0-1', '1.0.0-2'], + coarse_min='00:00000001.00000000.00000000', + coarse_max='00:00000001.00000000.00000000'), models.AffectedVersions( vuln_id=vuln_id, ecosystem='npm', @@ -244,7 +260,9 @@ def test_bug_post_put(self): events=[ models.AffectedEvent(type='introduced', value='0'), models.AffectedEvent(type='fixed', value='1.0.0') - ]), + ], + coarse_min=models.MIN_COARSE_VERSION, + coarse_max='00:00000001.00000000.00000000'), models.AffectedVersions( vuln_id=vuln_id, ecosystem='npm', @@ -252,12 +270,16 @@ def test_bug_post_put(self): events=[ models.AffectedEvent(type='introduced', value='2.0.0'), models.AffectedEvent(type='last_affected', value='2.2.0') - ]), + ], + coarse_min='00:00000002.00000000.00000000', + coarse_max='00:00000002.00000002.00000000'), models.AffectedVersions( vuln_id=vuln_id, ecosystem='npm', name='testjs', - versions=['0.1.0', '0.2.0', '0.3.0', '2.0.0', '2.1.0', '2.2.0']), + versions=['0.1.0', '0.2.0', '0.3.0', '2.0.0', '2.1.0', '2.2.0'], + coarse_min='00:00000000.00000001.00000000', + coarse_max='00:00000002.00000002.00000000'), ] self.assertListEqual([a.to_dict() for a in want], [a.to_dict() for a in affected]) diff --git a/osv/semver_index.py b/osv/semver_index.py index b8a4f5040ba..83c10e584ab 100644 --- a/osv/semver_index.py +++ b/osv/semver_index.py @@ -61,7 +61,7 @@ def _coerce_suffix(suffix): for component in match.group(1)[1:].split('.'): if not component: pre_components.append('-') - elif component.isdigit(): + elif component.isdecimal(): pre_components.append(_remove_leading_zero(component)) else: pre_components.append(component) @@ -156,7 +156,7 @@ def normalize_prerelease(prerelease): # # Normalization: Pad numeric components with '0', and prefix alphanumeric # with a single '1' (to ensure they always come after). - if component.isdigit(): + if component.isdecimal(): # 1. Identifiers consisting of only digits are compared numerically. pre_components.append(component.rjust(_PAD_WIDTH, '0')) else: diff --git a/osv/third_party/univers/alpine.py b/osv/third_party/univers/alpine.py index 16ae54def5f..8491399cf70 100644 --- a/osv/third_party/univers/alpine.py +++ b/osv/third_party/univers/alpine.py @@ -20,28 +20,29 @@ class InvalidVersion(ValueError): def is_valid_alpine_version(s: str): """ Return True is the string `s` is a valid Alpine version. - We do not support yet version strings that start with + We do support version strings that start with non-significant zeros. For example: >>> is_valid_alpine_version("006") - False + True >>> is_valid_alpine_version("1.2.3") True >>> is_valid_alpine_version("02-r1") True """ search = AlpineLinuxVersion.version_extractor.search(s) - if not search: - return False - - s = search.group(1) - left, _, _ = s.partition(".") - # handle the suffix case - left, _, _ = left.partition("-") - if not left.isdigit(): - return True - i = int(left) - return str(i) == left + return bool(search) + # if not search: + # return False + + # s = search.group(1) + # left, _, _ = s.partition(".") + # # handle the suffix case + # left, _, _ = left.partition("-") + # if not left.isdecimal(): + # return True + # i = int(left) + # return str(i) == left @attr.s(frozen=True, order=False, hash=True) diff --git a/osv/third_party/univers/debian.py b/osv/third_party/univers/debian.py index ae90c596d0e..209a1af90c0 100644 --- a/osv/third_party/univers/debian.py +++ b/osv/third_party/univers/debian.py @@ -357,7 +357,7 @@ def get_digit_prefix(characters): Return the digit prefix from a list of characters. """ value = 0 - while characters and characters[0].isdigit(): + while characters and characters[0].isdecimal(): value = value * 10 + int(characters.pop(0)) return value @@ -367,7 +367,7 @@ def get_non_digit_prefix(characters): Return the non-digit prefix from a list of characters. """ prefix = [] - while characters and not characters[0].isdigit(): + while characters and not characters[0].isdecimal(): prefix.append(characters.pop(0)) return prefix diff --git a/osv/third_party/univers/gem.py b/osv/third_party/univers/gem.py index b59d32aafb1..b0d5d0d4b27 100644 --- a/osv/third_party/univers/gem.py +++ b/osv/third_party/univers/gem.py @@ -292,7 +292,7 @@ def prerelease(self): Return True if this is considered as a prerelease version. A version is considered a prerelease if it contains a letter. """ - return any(not str(s).isdigit() for s in self.segments) + return any(not str(s).isdecimal() for s in self.segments) @property def segments(self): @@ -312,7 +312,7 @@ def get_segments(self): find_segments = re.compile(r"[0-9]+|[a-z]+", re.IGNORECASE).findall segments = [] for seg in find_segments(self.version): - if seg.isdigit(): + if seg.isdecimal(): seg = int(seg) segments.append(seg) return tuple(segments) diff --git a/osv/third_party/univers/gentoo.py b/osv/third_party/univers/gentoo.py index 1d875a98df3..32752687b9d 100644 --- a/osv/third_party/univers/gentoo.py +++ b/osv/third_party/univers/gentoo.py @@ -39,6 +39,9 @@ def parse_version_and_revision(version_string): revision = int(match.group(1)[2:]) version = version_string[:match.span(1)[0]] + # strip leading 0's from the first component only + version = re.sub(r"^0+(\d)", r"\1", version) + return version, revision diff --git a/poetry.lock b/poetry.lock index d2a77cbe0d8..e6a6ad0a8b0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. [[package]] name = "astroid" @@ -313,12 +313,12 @@ files = [ google-auth = ">=2.14.1,<3.0.0" googleapis-common-protos = ">=1.56.2,<2.0.0" grpcio = [ - {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, {version = ">=1.75.1,<2.0.0", optional = true, markers = "python_version >= \"3.14\" and extra == \"grpc\""}, + {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\" and python_version < \"3.14\""}, ] grpcio-status = [ - {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, {version = ">=1.75.1,<2.0.0", optional = true, markers = "python_version >= \"3.14\" and extra == \"grpc\""}, + {version = ">=1.49.1,<2.0.0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, ] proto-plus = {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""} protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0" @@ -374,8 +374,8 @@ files = [ google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0", extras = ["grpc"]} google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0" grpcio = [ - {version = ">=1.33.2,<2.0.0"}, {version = ">=1.75.1,<2.0.0", markers = "python_version >= \"3.14\""}, + {version = ">=1.33.2,<2.0.0"}, ] proto-plus = {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""} protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0" @@ -432,8 +432,8 @@ google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0", extras google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0" google-cloud-core = ">=1.4.0,<3.0.0" grpcio = [ - {version = ">=1.38.0,<2.0.0"}, {version = ">=1.75.1,<2.0.0", markers = "python_version >= \"3.14\""}, + {version = ">=1.38.0,<2.0.0"}, ] proto-plus = {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""} protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0" @@ -501,8 +501,8 @@ google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0", extras google-auth = ">=2.14.1,<3.0.0" grpc-google-iam-v1 = ">=0.12.4,<1.0.0" grpcio = [ - {version = ">=1.51.3,<2.0.0", markers = "python_version < \"3.14\""}, {version = ">=1.75.1,<2.0.0", markers = "python_version >= \"3.14\""}, + {version = ">=1.51.3,<2.0.0", markers = "python_version < \"3.14\""}, ] grpcio-status = ">=1.33.2" opentelemetry-api = {version = ">=1.27.0", markers = "python_version >= \"3.8\""} @@ -806,6 +806,39 @@ grpcio = ">=1.76.0" protobuf = ">=6.31.1,<7.0.0" setuptools = "*" +[[package]] +name = "hypothesis" +version = "6.150.0" +description = "The property-based testing library for Python" +optional = false +python-versions = ">=3.10" +groups = ["dev"] +files = [ + {file = "hypothesis-6.150.0-py3-none-any.whl", hash = "sha256:caf1f752418c49ac805f11d909c5831aaceb96762aa3895e0c702468dedbe3fe"}, + {file = "hypothesis-6.150.0.tar.gz", hash = "sha256:ac263bdaf338f4899a9a56e8224304e29b3ad91799e0274783c49abd91ea35ac"}, +] + +[package.dependencies] +sortedcontainers = ">=2.1.0,<3.0.0" + +[package.extras] +all = ["black (>=20.8b0)", "click (>=7.0)", "crosshair-tool (>=0.0.101)", "django (>=4.2)", "dpcontracts (>=0.4)", "hypothesis-crosshair (>=0.0.27)", "lark (>=0.10.1)", "libcst (>=0.3.16)", "numpy (>=1.21.6)", "pandas (>=1.1)", "pytest (>=4.6)", "python-dateutil (>=1.4)", "pytz (>=2014.1)", "redis (>=3.0.0)", "rich (>=9.0.0)", "tzdata (>=2025.3) ; sys_platform == \"win32\" or sys_platform == \"emscripten\"", "watchdog (>=4.0.0)"] +cli = ["black (>=20.8b0)", "click (>=7.0)", "rich (>=9.0.0)"] +codemods = ["libcst (>=0.3.16)"] +crosshair = ["crosshair-tool (>=0.0.101)", "hypothesis-crosshair (>=0.0.27)"] +dateutil = ["python-dateutil (>=1.4)"] +django = ["django (>=4.2)"] +dpcontracts = ["dpcontracts (>=0.4)"] +ghostwriter = ["black (>=20.8b0)"] +lark = ["lark (>=0.10.1)"] +numpy = ["numpy (>=1.21.6)"] +pandas = ["pandas (>=1.1)"] +pytest = ["pytest (>=4.6)"] +pytz = ["pytz (>=2014.1)"] +redis = ["redis (>=3.0.0)"] +watchdog = ["watchdog (>=4.0.0)"] +zoneinfo = ["tzdata (>=2025.3) ; sys_platform == \"win32\" or sys_platform == \"emscripten\""] + [[package]] name = "idna" version = "3.11" @@ -1544,6 +1577,18 @@ enabler = ["pytest-enabler (>=2.2)"] test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"}, + {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, +] + [[package]] name = "tomlkit" version = "0.13.3" @@ -1775,4 +1820,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.13,<4.0" -content-hash = "c09baabd568a3e965aac12a39b2e95eb2dfbca5ed1bdfec61a5f47dec30313d9" +content-hash = "7177316ec76fed91bf3ff2d1f6334a91ea7f9730bd8f1c54f1aff18ff304ecb6" diff --git a/pyproject.toml b/pyproject.toml index 97b7947dd5d..4a9e4f78c68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ pylint = "*" grpcio-tools = "*" mypy-protobuf = "^3.6.0" vcrpy = "*" +hypothesis = "*" [tool.pyright] executionEnvironments = [