Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ hurl-scripts/
temp/*
**/tmp/**
gcp/api/v1/osv/**
.hypothesis
36 changes: 30 additions & 6 deletions osv/ecosystems/alpine.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@
from ..third_party.univers.alpine import AlpineLinuxVersion

from . import config
from .ecosystems_base import EnumerableEcosystem, EnumerateError
from .ecosystems_base import OrderedEcosystem
from .ecosystems_base import (
coarse_version_generic,
EnumerableEcosystem,
EnumerateError,
OrderedEcosystem,
)
from .. import repos
from ..cache import cached

Expand All @@ -33,11 +37,31 @@ class APK(OrderedEcosystem):

def _sort_key(self, version):
if not AlpineLinuxVersion.is_valid(version):
# If version is not valid, it is most likely an invalid input
# version then sort it to the last/largest element
return AlpineLinuxVersion('9999999999')
raise ValueError(f'Invalid version: {version}')
return AlpineLinuxVersion(version)

def coarse_version(self, version):
"""Coarse version.

Treats version as dot-separated integers.
Trims suffixes (_rc, _p, -r) to ensure monotonicity (e.g. 1.2_rc1 < 1.2).
"""
if not AlpineLinuxVersion.is_valid(version):
raise ValueError(f'Invalid version: {version}')
# is_valid uses a $ regex anchor (which can match a newline),
# so we need to remove the newline if one exists.
if version[-1] == '\n':
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does $ also match \r\n? Might be worth it to call strip(), since I don't believe any versioning scheme have white spaces as part of the spec.

Copy link
Member Author

@michaelkedar michaelkedar Jan 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does $ also match \r\n?

No, at least not on Linux.
I would rather not call strip, just in case

version = version[:-1]
return coarse_version_generic(
version,
separators_regex=r'[.]',
# in APK, 1.0.2 < 1.02.1 < 1.1.1
# We must treat everything after .0x as 0
# Also split off the _rc, _p, or -r suffixes
truncate_regex=r'(?:\.0|[_-])',
implicit_split=False,
empty_as='')


class Alpine(APK, EnumerableEcosystem):
"""
Expand Down Expand Up @@ -99,7 +123,7 @@ def clean_versions(ver: str) -> str:
current_ver = clean_versions(current_ver)
current_rel = clean_versions(current_rel)
# Ignore occasional version that is still not valid.
if AlpineLinuxVersion.is_valid(current_ver) and current_rel.isdigit():
if AlpineLinuxVersion.is_valid(current_ver) and current_rel.isdecimal():
all_versions.add(current_ver + '-r' + current_rel)
else:
logging.warning('Alpine version "%s" - "%s" is not valid',
Expand Down
18 changes: 18 additions & 0 deletions osv/ecosystems/alpine_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,24 @@ def test_apk(self):
self.assertLessEqual(
ecosystem.sort_key('1.2.0-r0'), ecosystem.sort_key('1.10.0-r0'))

def test_coarse_version(self):
"""Test coarse version."""
ecosystem = alpine.APK()
self.assertEqual('00:00000010.00000002.00000033',
ecosystem.coarse_version('10.2.33'))
self.assertEqual('00:00000004.00000005.00000000',
ecosystem.coarse_version('4.5_alpha'))
self.assertEqual('00:20200712.00000000.00000000',
ecosystem.coarse_version('20200712-r0'))
self.assertEqual('00:00000011.00000003.00000020',
ecosystem.coarse_version('11.3.20.1_p1-r0'))
self.assertEqual('00:00000002.00000003.00000000',
ecosystem.coarse_version('02.3'))
self.assertEqual('00:00000005.00000000.00000000',
ecosystem.coarse_version('5.06.7'))
self.assertEqual('00:00000005.00000000.00000000',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like it does trim 5.0.9, should this be happening?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah - 5.0.9 < 5.01.0 so everything after the first 0 must be truncated to 0

ecosystem.coarse_version('5.0.9'))

def test_apk_ecosystems(self):
"""Test apk-based ecosystems return an APK ecosystem."""
ecos = [
Expand Down
168 changes: 168 additions & 0 deletions osv/ecosystems/coarse_version_monotonicity_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Coarse version monotonicity tests."""

import re
import unittest
from hypothesis import given, example, strategies as st
import packaging.version

from .. import ecosystems
from ..third_party.univers.gem import GemVersion

from . import alpine
from . import cran
from . import debian
from . import haskell
from . import maven
from . import nuget
from . import packagist
from . import pub
from . import pypi
from . import redhat
from . import rubygems
from . import semver_ecosystem_helper

# Strategies

# Matches standard SemVer: major.minor.patch, optional 'v', prerelease/build.
# Note: OSV's SemVer implementation coerces partial versions
# (e.g. '1.0' -> '1.0.0').
semver_strategy = st.from_regex(
r'^v?[0-9]+(\.[0-9]+){0,2}(-[0-9a-zA-z.-]*)?\+?[0-9a-zA-z.-]*$')

# Matches standard Alpine versions like 1.2.3, optionally with suffixes
# like _rc1, _p2, and revision -r3.
apk_version_strategy = st.from_regex(
r'^[0-9]+(\.[0-9]+)*(_rc[0-9]*|_p[0-9]*)*(-r[0-9]+)?$')

# Matches R versions: sequence of numbers separated by dots or dashes
# (e.g. 1.2-3).
cran_version_strategy = st.from_regex(r'^[0-9]+([.-][0-9]+)+$')

# Matches Debian versions: optional epoch, upstream version
# (alphanumerics/separators), optional debian revision.
dpkg_version_strategy = st.from_regex(
r'^(\d+:)?\d([A-Za-z0-9\.\+\~\-]+|[A-Za-z0-9\.\+\~]+-[A-Za-z0-9\+\.\~]+)?$')

# Matches Haskell versions: dot-separated integers (e.g. 1.2.3).
hackage_version_strategy = st.from_regex(r'^[0-9]+(\.[0-9]+)*$')

# Matches Maven versions: flexible sequence of numbers or identifiers
# separated by dots or dashes.
maven_version_strategy = st.from_regex(r'^(([0-9]*|[A-Za-z+]*)[.-]?)*$')

# Matches NuGet versions: SemVer-like, optional 'v' prefix, 4th component,
# prerelease/build metadata.
nuget_version_strategy = st.from_regex(
r'^v?[0-9]+(\.[0-9]+){0,3}(-[0-9a-zA-z.-]*)?\+?[0-9a-zA-z.-]*$')

# Matches Packagist versions: 'v' prefix, flexible components separated by
# ., +, _, -.
packagist_version_strategy = st.from_regex(r'^v?(([0-9]*|[A-Za-z+]*)[.+_-]?)*$')

# Pub versions are the same format as SemVer.
pub_version_strategy = semver_strategy

# Uses standard packaging.version pattern.
pypi_strategy = st.one_of(
st.text(), # legacy version can be any string
st.from_regex(
re.compile(r'^' + packaging.version.VERSION_PATTERN + r'$',
re.IGNORECASE | re.VERBOSE | re.ASCII)))

# Matches RPM versions: optional epoch, alternating alphanumeric segments.
rpm_version_strategy = st.from_regex(
re.compile(r'^([0-9]+:)?(([0-9]+|[A-Za-z]+)((?![0-9A-Za-z])[ -~])*)+$',

Check failure

Code scanning / CodeQL

Inefficient regular expression High

This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hypothesis uses the regex to generate patterns. It doesn't match with them afaik.

Check failure

Code scanning / CodeQL

Inefficient regular expression High

This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'A'.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hypothesis uses the regex to generate patterns. It doesn't match with them afaik.

re.ASCII))

# Uses standard GemVersion pattern.
rubygems_version_strategy = st.from_regex(r'^' + GemVersion.VERSION_PATTERN +
r'$')


def check_coarse_version_monotonic(test_case: unittest.TestCase,
ecosystem: ecosystems.OrderedEcosystem,
v1_str: str, v2_str: str):
"""Test coarse_version monotonicity."""
v1 = ecosystem.sort_key(v1_str)
v2 = ecosystem.sort_key(v2_str)
if v2 < v1:
v1, v2 = v2, v1
v1_str, v2_str = v2_str, v1_str

if v1.is_invalid:
test_case.assertRaises(ValueError, ecosystem.coarse_version, v1_str)
if v2.is_invalid:
test_case.assertRaises(ValueError, ecosystem.coarse_version, v2_str)

if not v1.is_invalid and not v2.is_invalid:
v1_coarse = ecosystem.coarse_version(v1_str)
v2_coarse = ecosystem.coarse_version(v2_str)
test_case.assertLessEqual(v1_coarse, v2_coarse)


class CoarseVersionMonotonicityTest(unittest.TestCase):
"""Coarse version monotonicity tests."""

@given(apk_version_strategy, apk_version_strategy)
@example('1.02', '1.1')
@example('5.0.9', '5.06.7')
def test_apk(self, v1_str, v2_str):
check_coarse_version_monotonic(self, alpine.APK(), v1_str, v2_str)

@given(cran_version_strategy, cran_version_strategy)
def test_cran(self, v1_str, v2_str):
check_coarse_version_monotonic(self, cran.CRAN(), v1_str, v2_str)

@given(dpkg_version_strategy, dpkg_version_strategy)
def test_dpkg(self, v1_str, v2_str):
check_coarse_version_monotonic(self, debian.DPKG(), v1_str, v2_str)

@given(hackage_version_strategy, hackage_version_strategy)
def test_hackage(self, v1_str, v2_str):
check_coarse_version_monotonic(self, haskell.Hackage(), v1_str, v2_str)

@given(maven_version_strategy, maven_version_strategy)
def test_maven(self, v1_str, v2_str):
check_coarse_version_monotonic(self, maven.Maven(), v1_str, v2_str)

@given(nuget_version_strategy, nuget_version_strategy)
def test_nuget(self, v1_str, v2_str):
check_coarse_version_monotonic(self, nuget.NuGet(), v1_str, v2_str)

@given(packagist_version_strategy, packagist_version_strategy)
def test_packagist(self, v1_str, v2_str):
check_coarse_version_monotonic(self, packagist.Packagist(), v1_str, v2_str)

@given(pub_version_strategy, pub_version_strategy)
def test_pub(self, v1_str, v2_str):
check_coarse_version_monotonic(self, pub.Pub(), v1_str, v2_str)

@given(pypi_strategy, pypi_strategy)
def test_pypi(self, v1_str, v2_str):
check_coarse_version_monotonic(self, pypi.PyPI(), v1_str, v2_str)

@given(rpm_version_strategy, rpm_version_strategy)
def test_rpm(self, v1_str, v2_str):
check_coarse_version_monotonic(self, redhat.RPM(), v1_str, v2_str)

@given(rubygems_version_strategy, rubygems_version_strategy)
def test_rubygems(self, v1_str, v2_str):
check_coarse_version_monotonic(self, rubygems.RubyGems(), v1_str, v2_str)

@given(semver_strategy, semver_strategy)
def test_semver(self, v1_str, v2_str):
check_coarse_version_monotonic(self, semver_ecosystem_helper.SemverLike(),
v1_str, v2_str)
23 changes: 17 additions & 6 deletions osv/ecosystems/cran.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@
"""CRAN helpers."""

import requests
import packaging_legacy.version

from . import config
from .ecosystems_base import EnumerableEcosystem, EnumerateError
from .ecosystems_base import (
coarse_version_from_ints,
EnumerableEcosystem,
EnumerateError,
)


class CRAN(EnumerableEcosystem):
Expand All @@ -34,11 +37,19 @@ def _sort_key(self, version):
# Some documentation on CRAN versioning and the R numeric_version method:
# https://cran.r-project.org/doc/manuals/R-exts.html#The-DESCRIPTION-file
# https://stat.ethz.ch/R-manual/R-devel/library/base/html/numeric_version.html
# The packaging.version appears to work for the typical X.Y.Z and
# X.Y-Z cases
version = version.replace("-", ".")
# version.parse() handles invalid versions by returning LegacyVersion()
return packaging_legacy.version.parse(version)
try:
return tuple(int(part) for part in version.split('.'))
except ValueError as exc:
raise ValueError(f'Invalid version: {version}') from exc

def coarse_version(self, version):
"""Coarse version.
Treats version as integers separated by dots or dashes.
"""
# Use _sort_key to validate or raise ValueError
return coarse_version_from_ints(self._sort_key(version))

def _enumerate_versions(self,
url,
Expand Down
8 changes: 8 additions & 0 deletions osv/ecosystems/cran_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,11 @@ def test_sort_key(self):
ecosystem.sort_key('1.10-0'), ecosystem.sort_key('1.2-0'))
self.assertLessEqual(
ecosystem.sort_key('1.2-0'), ecosystem.sort_key('1.10-0'))

def test_coarse_version(self):
"""Test coarse_version"""
ecosystem = ecosystems.get('CRAN')
self.assertEqual('00:00009001.00000010.00000033',
ecosystem.coarse_version('9001.10-33.4'))
self.assertEqual('00:00000000.00000001.00000000',
ecosystem.coarse_version('0-1'))
58 changes: 54 additions & 4 deletions osv/ecosystems/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,21 @@
# limitations under the License.
"""Debian ecosystem helper."""

from itertools import batched
import json
import logging
import re
import requests

from ..third_party.univers.debian import Version as DebianVersion

from . import config
from .ecosystems_base import EnumerableEcosystem, EnumerateError
from .ecosystems_base import (
coarse_version_from_ints,
EnumerableEcosystem,
EnumerateError,
MAX_COARSE_PART,
)
from .ecosystems_base import OrderedEcosystem
from .. import cache
from ..request_helper import RequestError, RequestHelper
Expand All @@ -31,11 +38,54 @@ class DPKG(OrderedEcosystem):

def _sort_key(self, version):
if not DebianVersion.is_valid(version):
# If debian version is not valid, it is most likely an invalid fixed
# version then sort it to the last/largest element
return DebianVersion(9999999999, '9999999999')
raise ValueError(f'Invalid version: {version}')
return DebianVersion.from_string(version)

def coarse_version(self, version: str) -> str:
"""Coarse version.

Treats version as alternating digit/non-digit strings.
Truncates at non-dot separators (like ~) to ensure monotonicity
(e.g. 1.0~rc1 < 1.0).
Epochs are preserved.
"""
if not DebianVersion.is_valid(version):
raise ValueError(f'Invalid version: {version}')

# Try extract epoch.
e, p, v = version.partition(':')
if not p:
v = e
e = '0'
try:
epoch = int(e)
except ValueError as e:
raise ValueError(f'Invalid version: {version}') from e

# Versions are treated as alternating digit/non-digit strings
# We treat the exact string '.' as a digit separator.
# Any strings starting with '.' (that are not exactly '.')
# are greater than any number.
# Any strings starting with anything else are less than any number.
parts = re.findall(r'^$|\d+|\D+', v)
int_parts = []
for couple in batched(parts, 2):
if not couple[0].isdecimal():
# This is probably handled by is_valid
break
int_parts.append(int(couple[0]))
if len(couple) == 1:
break
sep = couple[1]
if sep == '.':
continue
if sep[0] == '.':
# Bigger than the max int, so we overflow
int_parts.append(MAX_COARSE_PART + 1)
break

return coarse_version_from_ints(int_parts, epoch=epoch)


# TODO(another-rex): Update this to use dynamically
# change depending on the project
Expand Down
Loading
Loading