Skip to content

Commit e4c15b0

Browse files
committed
Add support for collecting GitHub vulnerability-related issues and pull requests
Add tests for this functionality Signed-off-by: ziad hany <[email protected]>
1 parent 8c001a1 commit e4c15b0

File tree

5 files changed

+262
-0
lines changed

5 files changed

+262
-0
lines changed

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from vulnerabilities.pipelines.v2_importers import (
4848
elixir_security_importer as elixir_security_importer_v2,
4949
)
50+
from vulnerabilities.pipelines.v2_importers import github_issue_pr as github_issue_pr_v2
5051
from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2
5152
from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
5253
from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2
@@ -115,5 +116,6 @@
115116
ubuntu_usn.UbuntuUSNImporter,
116117
fireeye.FireyeImporter,
117118
oss_fuzz.OSSFuzzImporter,
119+
github_issue_pr_v2.GithubPipelineIssuePR,
118120
]
119121
)
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import re
11+
from collections import defaultdict
12+
13+
from github import Github
14+
15+
from vulnerabilities.importer import AdvisoryData
16+
from vulnerabilities.importer import ReferenceV2
17+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
18+
from vulnerablecode.settings import env
19+
20+
GITHUB_TOKEN = env.str("GITHUB_TOKEN")
21+
22+
23+
class GithubPipelineIssuePR(VulnerableCodeBaseImporterPipelineV2):
24+
"""
25+
Pipeline to collect GitHub issues and PRs related to vulnerabilities.
26+
"""
27+
28+
pipeline_id = "collect_issues_pr"
29+
30+
@classmethod
31+
def steps(cls):
32+
return (
33+
cls.fetch_entries,
34+
cls.collect_and_store_advisories,
35+
)
36+
37+
def fetch_entries(self):
38+
"""Clone the repository."""
39+
self.repo_url = "https://github.com/torvalds/linux"
40+
repo_name = "django/django"
41+
42+
g = Github(login_or_token=GITHUB_TOKEN)
43+
44+
base_query = f"repo:{repo_name} (CVE OR PYSEC OR GHSA)"
45+
self.issues = g.search_issues(f"{base_query} is:issue")
46+
self.pull_requestes = g.search_issues(f"{base_query} is:pr")
47+
48+
def advisories_count(self) -> int:
49+
"""
50+
Return total number of advisories discovered (issues + PRs).
51+
"""
52+
return self.issues.totalCount + self.pull_requestes.totalCount
53+
54+
def collect_issues_and_prs(self):
55+
"""
56+
Group issues and PRs by vulnerability identifiers (like CVE-xxxx-yyyy).
57+
Returns a dict mapping vuln_id -> [(type, html_url)].
58+
"""
59+
self.log("Grouping GitHub issues and PRs by vulnerability identifiers.")
60+
61+
grouped_items = defaultdict(list)
62+
pattern = re.compile(r"(CVE-\d{4}-\d+|PYSEC-\d{4}-\d+|GHSA-[\w-]+)", re.IGNORECASE)
63+
64+
for issue in self.issues:
65+
matches = pattern.findall(issue.title + " " + (issue.body or ""))
66+
for match in matches:
67+
grouped_items[match].append(("Issue", issue.html_url))
68+
69+
for pr in self.pull_requestes:
70+
matches = pattern.findall(pr.title + " " + (pr.body or ""))
71+
for match in matches:
72+
grouped_items[match].append(("PR", pr.html_url))
73+
74+
self.log(f"Grouped {len(grouped_items)} unique vulnerability identifiers.")
75+
return grouped_items
76+
77+
def collect_advisories(self):
78+
"""
79+
Generate AdvisoryData objects for each vulnerability ID grouped with its related GitHub issues and PRs.
80+
"""
81+
self.log("Generating AdvisoryData objects from GitHub issues and PRs.")
82+
grouped_data = self.collect_issues_and_prs()
83+
84+
for vuln_id, refs in grouped_data.items():
85+
references = [ReferenceV2(reference_id=ref_id, url=url) for ref_id, url in refs]
86+
87+
yield AdvisoryData(
88+
advisory_id=vuln_id,
89+
aliases=[vuln_id],
90+
references_v2=references,
91+
url=self.repo_url,
92+
)
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
from pathlib import Path
12+
from types import SimpleNamespace
13+
from unittest.mock import MagicMock
14+
15+
import pytest
16+
17+
from vulnerabilities.pipelines.v2_importers.github_issue_pr import GithubPipelineIssuePR
18+
from vulnerabilities.tests import util_tests
19+
20+
21+
@pytest.fixture
22+
def pipeline():
23+
pipeline = GithubPipelineIssuePR()
24+
pipeline.repo_url = "https://github.com/test/repo"
25+
pipeline.log = MagicMock()
26+
return pipeline
27+
28+
29+
@pytest.mark.django_db
30+
def test_collect_issues_and_prs(pipeline):
31+
pipeline.issues = [
32+
SimpleNamespace(
33+
title="Fix for CVE-2023-1234 found",
34+
body="This resolves a security issue",
35+
html_url="http://example.com/issue1",
36+
),
37+
SimpleNamespace(
38+
title="No vulnerability mentioned",
39+
body="This is unrelated",
40+
html_url="http://example.com/issue2",
41+
),
42+
]
43+
44+
pipeline.pull_requestes = [
45+
SimpleNamespace(
46+
title="Patch addressing GHSA-zzz-111",
47+
body="Also fixes PYSEC-2024-5678",
48+
html_url="http://example.com/pr1",
49+
)
50+
]
51+
52+
result = pipeline.collect_issues_and_prs()
53+
expected = {
54+
"CVE-2023-1234": [("Issue", "http://example.com/issue1")],
55+
"GHSA-zzz-111": [("PR", "http://example.com/pr1")],
56+
"PYSEC-2024-5678": [("PR", "http://example.com/pr1")],
57+
}
58+
59+
assert result == expected
60+
61+
62+
TEST_DATA = Path(__file__).parent.parent.parent / "test_data" / "github_issue_pr"
63+
64+
65+
@pytest.mark.django_db
66+
def test_collect_advisories_from_json():
67+
input_file = TEST_DATA / "issues_and_pr.json"
68+
expected_file = TEST_DATA / "expected_advisory_output.json"
69+
70+
issues_and_prs = json.loads(input_file.read_text(encoding="utf-8"))
71+
72+
pipeline = GithubPipelineIssuePR()
73+
pipeline.repo_url = "https://github.com/test/repo"
74+
pipeline.log = MagicMock()
75+
76+
pipeline.collect_issues_and_prs = MagicMock(return_value=issues_and_prs)
77+
78+
result = [adv.to_dict() for adv in pipeline.collect_advisories()]
79+
80+
util_tests.check_results_against_json(result, expected_file)
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
[
2+
{
3+
"advisory_id": "CVE-2023-1234",
4+
"aliases": [
5+
"CVE-2023-1234"
6+
],
7+
"summary": "",
8+
"affected_packages": [],
9+
"references_v2": [
10+
{
11+
"reference_id": "Issue",
12+
"reference_type": "",
13+
"url": "https://example.com/issue1"
14+
},
15+
{
16+
"reference_id": "PR",
17+
"reference_type": "",
18+
"url": "https://example.com/pr1"
19+
}
20+
],
21+
"severities": [],
22+
"date_published": null,
23+
"weaknesses": [],
24+
"url": "https://github.com/test/repo"
25+
},
26+
{
27+
"advisory_id": "GHSA-zzz-111",
28+
"aliases": [
29+
"GHSA-zzz-111"
30+
],
31+
"summary": "",
32+
"affected_packages": [],
33+
"references_v2": [
34+
{
35+
"reference_id": "PR",
36+
"reference_type": "",
37+
"url": "https://example.com/pr1"
38+
}
39+
],
40+
"severities": [],
41+
"date_published": null,
42+
"weaknesses": [],
43+
"url": "https://github.com/test/repo"
44+
},
45+
{
46+
"advisory_id": "PYSEC-2024-5678",
47+
"aliases": [
48+
"PYSEC-2024-5678"
49+
],
50+
"summary": "",
51+
"affected_packages": [],
52+
"references_v2": [
53+
{
54+
"reference_id": "PR",
55+
"reference_type": "",
56+
"url": "https://example.com/pr1"
57+
}
58+
],
59+
"severities": [],
60+
"date_published": null,
61+
"weaknesses": [],
62+
"url": "https://github.com/test/repo"
63+
}
64+
]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"CVE-2023-1234": [
3+
[
4+
"Issue",
5+
"https://example.com/issue1"
6+
],
7+
[
8+
"PR",
9+
"https://example.com/pr1"
10+
]
11+
],
12+
"GHSA-zzz-111": [
13+
[
14+
"PR",
15+
"https://example.com/pr1"
16+
]
17+
],
18+
"PYSEC-2024-5678": [
19+
[
20+
"PR",
21+
"https://example.com/pr1"
22+
]
23+
]
24+
}

0 commit comments

Comments
 (0)