Skip to content

Commit 9a4580c

Browse files
committed
Add support for OSV code fix commit collection
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 461d0d4 commit 9a4580c

File tree

3 files changed

+176
-6
lines changed

3 files changed

+176
-6
lines changed

vulnerabilities/importers/osv.py

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from vulnerabilities.importer import AdvisoryData
2626
from vulnerabilities.importer import AffectedPackage
2727
from vulnerabilities.importer import AffectedPackageV2
28+
from vulnerabilities.importer import CodeCommitData
2829
from vulnerabilities.importer import Reference
2930
from vulnerabilities.importer import ReferenceV2
3031
from vulnerabilities.importer import VulnerabilitySeverity
@@ -132,7 +133,8 @@ def parse_advisory_data_v2(
132133
references = get_references_v2(raw_data=raw_data)
133134

134135
affected_packages = []
135-
136+
fixed_by_commits = []
137+
affected_by_commits = []
136138
for affected_pkg in raw_data.get("affected") or []:
137139
purl = get_affected_purl(affected_pkg=affected_pkg, raw_id=advisory_id)
138140

@@ -154,6 +156,10 @@ def parse_advisory_data_v2(
154156
)
155157
fixed_versions.extend([v.string for v in fixed_version])
156158

159+
introduced_commits, fixed_commits = get_code_commit(fixed_range, raw_id=advisory_id)
160+
fixed_by_commits.extend(fixed_commits)
161+
affected_by_commits.extend(introduced_commits)
162+
157163
fixed_version_range = (
158164
get_fixed_version_range(fixed_versions, purl.type) if fixed_versions else None
159165
)
@@ -183,6 +189,8 @@ def parse_advisory_data_v2(
183189
affected_packages=affected_packages,
184190
date_published=date_published,
185191
weaknesses=weaknesses,
192+
fixed_by_commits=fixed_by_commits,
193+
affected_by_commits=affected_by_commits,
186194
url=advisory_url,
187195
original_advisory_text=advisory_text or json.dumps(raw_data, indent=2, ensure_ascii=False),
188196
)
@@ -208,6 +216,17 @@ def extract_fixed_versions(fixed_range) -> Iterable[str]:
208216
yield fixed
209217

210218

219+
def extract_commits(introduced_range) -> Iterable[str]:
220+
"""
221+
Return a list of fixed version strings given a ``fixed_range`` mapping of
222+
OSV data.
223+
"""
224+
for event in introduced_range.get("events") or []:
225+
introduced = event.get("introduced")
226+
fixed = event.get("fixed")
227+
yield introduced, fixed
228+
229+
211230
def get_published_date(raw_data):
212231
published = raw_data.get("published")
213232
return published and dateparser.parse(date_string=published)
@@ -398,11 +417,49 @@ def get_fixed_versions(fixed_range, raw_id, supported_ecosystem) -> List[Version
398417
fixed_versions.append(SemverVersion(version))
399418
except InvalidVersion:
400419
logger.error(f"Invalid SemverVersion: {version!r} for OSV id: {raw_id!r}")
420+
421+
if fixed_range_type == "GIT":
422+
# We process this in the get_code_commit function.
423+
continue
401424
else:
402425
logger.error(f"Unsupported fixed version type: {version!r} for OSV id: {raw_id!r}")
403426

404-
# if fixed_range_type == "GIT":
405-
# TODO add GitHubVersion univers fix_version
406-
# logger.error(f"NotImplementedError GIT Version - {raw_id !r} - {i !r}")
407-
408427
return dedupe(fixed_versions)
428+
429+
430+
def get_code_commit(ranges, raw_id):
431+
"""
432+
Return two lists of unique code commits (introduced and fixed) extracted from a
433+
given vulnerability `ranges` dictionary.
434+
"""
435+
if ranges.get("type") != "GIT":
436+
logger.debug(f"Skipping non-GIT range for OSV id: {raw_id!r}")
437+
return [], []
438+
439+
repo = ranges.get("repo")
440+
if not repo:
441+
logger.error(f"Missing 'repo' field in range: {ranges} (OSV id: {raw_id!r})")
442+
return [], []
443+
444+
repo = ranges.get("repo")
445+
introduced_commits, fixed_commits = [], []
446+
for introduced, fixed in extract_commits(ranges):
447+
# Git uses this magic hash for the empty tree
448+
if introduced == "0":
449+
introduced = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
450+
451+
try:
452+
if introduced:
453+
introduced_commit = CodeCommitData(commit_hash=introduced, vcs_url=repo)
454+
introduced_commits.append(introduced_commit)
455+
except ValueError as e:
456+
logger.error(f"Failed to extract introduced commits: {e!r}")
457+
458+
try:
459+
if fixed:
460+
fixed_commit = CodeCommitData(commit_hash=fixed, vcs_url=repo)
461+
fixed_commits.append(fixed_commit)
462+
except ValueError as e:
463+
logger.error(f"Failed to extract fixed commits: {e!r}")
464+
465+
return introduced_commits, fixed_commits

vulnerabilities/tests/pipelines/v2_importers/test_github_osv_importer_v2.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import pytest
1414

1515
from vulnerabilities.importer import AdvisoryData
16+
from vulnerabilities.importer import CodeCommitData
1617
from vulnerabilities.pipelines.v2_importers.github_osv_importer import GithubOSVImporterPipeline
1718

1819

@@ -27,7 +28,23 @@ def sample_osv_advisory(tmp_path: Path):
2728
{
2829
"package": {"name": "sample", "ecosystem": "pypi"},
2930
"ranges": [
30-
{"type": "ECOSYSTEM", "events": [{"introduced": "1.0.0"}, {"fixed": "1.2.0"}]}
31+
{"type": "ECOSYSTEM", "events": [{"introduced": "1.0.0"}, {"fixed": "1.2.0"}]},
32+
{
33+
"type": "GIT",
34+
"repo": "https://github.com/aboutcode-org/vulnerablecode",
35+
"events": [
36+
{"introduced": "0"},
37+
{"fixed": "10081dd502dcfc0953de333fe8afb399db5f2a88"},
38+
],
39+
},
40+
{
41+
"type": "GIT",
42+
"repo": "https://github.com/aboutcode-org/vulnerablecode",
43+
"events": [
44+
{"introduced": "b58c68c38a9de451818bac6c96d08d61e7f348a2"},
45+
{"fixed": "61621982593152c47b520ce893eb90c332427483"},
46+
],
47+
},
3148
],
3249
"versions": ["1.0.0", "1.1.0"],
3350
}
@@ -67,3 +84,36 @@ def delete(self):
6784
assert advisory.original_advisory_text.strip().startswith("{")
6885
assert advisory.affected_packages
6986
assert advisory.affected_packages[0].package.type == "pypi"
87+
assert advisory.affected_by_commits == [
88+
CodeCommitData(
89+
commit_hash="4b825dc642cb6eb9a060e54bf8d69288fbee4904",
90+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
91+
commit_author=None,
92+
commit_message=None,
93+
commit_date=None,
94+
),
95+
CodeCommitData(
96+
commit_hash="b58c68c38a9de451818bac6c96d08d61e7f348a2",
97+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
98+
commit_author=None,
99+
commit_message=None,
100+
commit_date=None,
101+
),
102+
]
103+
104+
assert advisory.fixed_by_commits == [
105+
CodeCommitData(
106+
commit_hash="10081dd502dcfc0953de333fe8afb399db5f2a88",
107+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
108+
commit_author=None,
109+
commit_message=None,
110+
commit_date=None,
111+
),
112+
CodeCommitData(
113+
commit_hash="61621982593152c47b520ce893eb90c332427483",
114+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
115+
commit_author=None,
116+
commit_message=None,
117+
commit_date=None,
118+
),
119+
]

vulnerabilities/tests/test_osv.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
from univers.versions import PypiVersion
1616
from univers.versions import SemverVersion
1717

18+
from vulnerabilities.importer import CodeCommitData
1819
from vulnerabilities.importer import Reference
1920
from vulnerabilities.importer import VulnerabilitySeverity
2021
from vulnerabilities.importers.osv import extract_fixed_versions as fixed_filter
2122
from vulnerabilities.importers.osv import get_affected_purl
2223
from vulnerabilities.importers.osv import get_affected_version_range
24+
from vulnerabilities.importers.osv import get_code_commit
2325
from vulnerabilities.importers.osv import get_fixed_versions
2426
from vulnerabilities.importers.osv import get_published_date
2527
from vulnerabilities.importers.osv import get_references
@@ -70,6 +72,67 @@ def test_fixed_filter3(self):
7072
)
7173
assert results == ["1.5.0", "9.0g0", "10.8"]
7274

75+
def test_code_commit_filter(self):
76+
results = get_code_commit(
77+
ranges={
78+
"type": "GIT",
79+
"repo": "https://github.com/aboutcode-org/vulnerablecode",
80+
"events": [
81+
{"introduced": "0"},
82+
{"fixed": "a8ec9f1f300dc87f24e0f0a426a5c67c0b1b32d7"},
83+
],
84+
},
85+
raw_id="GHSA-j3f7-7rmc-6wqj",
86+
)
87+
88+
assert results == (
89+
[
90+
CodeCommitData(
91+
commit_hash="4b825dc642cb6eb9a060e54bf8d69288fbee4904",
92+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
93+
commit_author=None,
94+
commit_message=None,
95+
commit_date=None,
96+
)
97+
],
98+
[
99+
CodeCommitData(
100+
commit_hash="a8ec9f1f300dc87f24e0f0a426a5c67c0b1b32d7",
101+
vcs_url="https://github.com/aboutcode-org/vulnerablecode",
102+
commit_author=None,
103+
commit_message=None,
104+
commit_date=None,
105+
)
106+
],
107+
)
108+
109+
def test_code_commit_invalid_filter(self):
110+
results = get_code_commit(
111+
ranges={
112+
"type": "GIT",
113+
"events": [
114+
{"introduced": "0"},
115+
{"fixed": "a8ec9f1f300dc87f24e0f0a426a5c67c0b1b32d7"},
116+
],
117+
},
118+
raw_id="GHSA-j3f7-7rmc-6wqj",
119+
)
120+
121+
assert results == ([], []) # no vcs_url
122+
123+
results = get_code_commit(
124+
ranges={
125+
"repo": "https://github.com/aboutcode-org/vulnerablecode",
126+
"events": [
127+
{"introduced": "0"},
128+
{"fixed": "a8ec9f1f300dc87f24e0f0a426a5c67c0b1b32d7"},
129+
],
130+
},
131+
raw_id="GHSA-j3f7-7rmc-6wqj",
132+
)
133+
134+
assert results == ([], []) # no type
135+
73136
def test_get_published_date1(self):
74137
results = get_published_date(
75138
raw_data={"id": "GHSA-j3f7-7rmc-6wqj", "published": "2022-01-10T14:12:00Z"}

0 commit comments

Comments
 (0)