From 91fbbe8ced746e94b9db5b4f5e4e2b306519e888 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 11 Feb 2026 19:01:24 +0530 Subject: [PATCH 1/2] Add pipeline to federate package vulnerabilities Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 4 + .../federate_package_vulnerabilities.py | 257 ++++++++++++++++++ vulnerabilities/pipes/federatedcode.py | 175 ++++++++++++ vulnerablecode/settings.py | 10 + 4 files changed, 446 insertions(+) create mode 100644 vulnerabilities/pipelines/exporters/federate_package_vulnerabilities.py create mode 100644 vulnerabilities/pipes/federatedcode.py diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index fc784e019..521ba1e5c 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -141,6 +141,10 @@ def log(self, message, level=logging.INFO): class VulnerableCodePipeline(PipelineDefinition, BasePipelineRun): pipeline_id = None # Unique Pipeline ID + # When set to true pipeline is run only once. + # To rerun onetime pipeline reset is_active field to True via migration. + run_once = False + def on_failure(self): """ Tasks to run in the event that pipeline execution fails. diff --git a/vulnerabilities/pipelines/exporters/federate_package_vulnerabilities.py b/vulnerabilities/pipelines/exporters/federate_package_vulnerabilities.py new file mode 100644 index 000000000..3bc3dcbaf --- /dev/null +++ b/vulnerabilities/pipelines/exporters/federate_package_vulnerabilities.py @@ -0,0 +1,257 @@ +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + +import itertools +import shutil +from operator import attrgetter +from pathlib import Path + +import saneyaml +from aboutcode.pipeline import LoopProgress +from django.conf import settings + +from aboutcode.federated import DataFederation +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipes import federatedcode + + +class FederatePackageVulnerabilities(VulnerableCodePipeline): + """Export package vulnerabilities and advisory to FederatedCode.""" + + pipeline_id = "federate_package_vulnerabilities_v2" + + @classmethod + def steps(cls): + return ( + cls.check_federatedcode_eligibility, + cls.create_federatedcode_working_dir, + cls.fetch_federation_config, + cls.clone_vulnerabilities_repo, + cls.publish_vulnerabilities, + cls.delete_working_dir, + ) + + def check_federatedcode_eligibility(self): + """Check if FederatedCode is configured.""" + federatedcode.check_federatedcode_configured_and_available(self.log) + + def create_federatedcode_working_dir(self): + """Create temporary working dir.""" + self.working_path = federatedcode.create_federatedcode_working_dir() + + def fetch_federation_config(self): + """Fetch config for PackageURL Federation.""" + data_federation = DataFederation.from_url( + name="aboutcode-data", + remote_root_url="https://github.com/aboutcode-data", + ) + self.data_cluster = data_federation.get_cluster("purls") + + def clone_vulnerabilities_repo(self): + self.repo = federatedcode.clone_repository( + repo_url=settings.FEDERATEDCODE_VULNERABILITIES_REPO, + clone_path=self.working_path / "vulnerabilities-data", + logger=self.log, + ) + + def publish_vulnerabilities(self): + """Publish package vulnerabilities and advisory to FederatedCode""" + repo_path = Path(self.repo.working_dir) + commit_count = 1 + batch_size = 2000 + files_to_commit = set() + exported_avids = set() + + distinct_packages_count = ( + PackageV2.objects.values("type", "namespace", "name") + .distinct("type", "namespace", "name") + .count() + ) + package_qs = package_prefetched_qs() + grouped_packages = itertools.groupby( + package_qs.iterator(chunk_size=2000), + key=attrgetter("type", "namespace", "name"), + ) + + self.log(f"Exporting vulnerabilities for {distinct_packages_count} packages.") + progress = LoopProgress( + total_iterations=distinct_packages_count, + progress_step=1, + logger=self.log, + ) + for _, packages in progress.iter(grouped_packages): + package_urls = [] + package_vulnerabilities = [] + for package in packages: + purl = package.package_url + package_urls.append(purl) + package_vulnerabilities.append(serialize_package_vulnerability(package)) + + impacts = itertools.chain( + package.affected_in_impacts.all(), + package.fixed_in_impacts.all(), + ) + for impact in impacts: + adv = impact.advisory + avid = adv.avid + if avid in exported_avids: + continue + + exported_avids.add(avid) + advisory = serialize_advisory(adv) + adv_file = f"vulnerabilities/{avid}.yml" + write_file( + repo_path=repo_path, + file_path=adv_file, + data=advisory, + ) + files_to_commit.add(adv_file) + + package_repo, datafile_path = self.data_cluster.get_datafile_repo_and_path(purl=purl) + package_vulnerability_path = datafile_path.replace("/purls.yml", "/vulnerabilities.yml") + package_vulnerability_path = f"packages/{package_repo}/{package_vulnerability_path}" + package_path = f"packages/{package_repo}/{datafile_path}" + + write_file( + repo_path=repo_path, + file_path=package_path, + data=package_urls, + ) + files_to_commit.add(package_path) + + write_file( + repo_path=repo_path, + file_path=package_vulnerability_path, + data=package_vulnerabilities, + ) + files_to_commit.add(package_vulnerability_path) + + if len(files_to_commit) > batch_size: + if federatedcode.commit_and_push_changes( + commit_message=self.commit_message(commit_count), + repo=self.repo, + files_to_commit=files_to_commit, + logger=self.log, + ): + commit_count += 1 + files_to_commit.clear() + + if files_to_commit: + federatedcode.commit_and_push_changes( + commit_message=self.commit_message(commit_count, commit_count), + repo=self.repo, + files_to_commit=files_to_commit, + logger=self.log, + ) + + self.log( + f"Federated {distinct_packages_count} package and {len(exported_avids)} vulnerabilities." + ) + + def delete_working_dir(self): + """Remove temporary working dir.""" + if hasattr(self, "working_path") and self.working_path: + shutil.rmtree(self.working_path) + + def on_failure(self): + self.delete_working_dir() + + def commit_message(self, commit_count, total_commit_count="many"): + """Commit message for pushing Package vulnerability.""" + return federatedcode.commit_message( + commit_count=commit_count, + total_commit_count=total_commit_count, + ) + + +def package_prefetched_qs(): + return PackageV2.objects.order_by("type", "namespace", "name", "version").prefetch_related( + "affected_in_impacts", + "affected_in_impacts__advisory", + "affected_in_impacts__advisory__impacted_packages", + "affected_in_impacts__advisory__aliases", + "affected_in_impacts__advisory__references", + "affected_in_impacts__advisory__severities", + "affected_in_impacts__advisory__weaknesses", + "fixed_in_impacts", + "fixed_in_impacts__advisory", + "fixed_in_impacts__advisory__impacted_packages", + "fixed_in_impacts__advisory__aliases", + "fixed_in_impacts__advisory__references", + "fixed_in_impacts__advisory__severities", + "fixed_in_impacts__advisory__weaknesses", + ) + + +def serialize_package_vulnerability(package): + affected_by_vulnerabilities = [ + impact.advisory.avid for impact in package.affected_in_impacts.all() + ] + fixing_vulnerabilities = [impact.advisory.avid for impact in package.fixed_in_impacts.all()] + + return { + "purl": package.package_url, + "affected_by_vulnerabilities": affected_by_vulnerabilities, + "fixing_vulnerabilities": fixing_vulnerabilities, + } + + +def serialize_severity(sev): + return { + "score": sev.value, + "scoring_system": sev.scoring_system, + "scoring_elements": sev.scoring_elements, + "published_at": str(sev.published_at), + "url": sev.url, + } + + +def serialize_references(reference): + return { + "url": reference.url, + "reference_type": reference.reference_type, + "reference_id": reference.reference_id, + } + + +def serialize_advisory(advisory): + """Return a plain data mapping serialized from advisory object.""" + aliases = [a.alias for a in advisory.aliases.all()] + severities = [serialize_severity(sev) for sev in advisory.severities.all()] + weaknesses = [wkns.cwe for wkns in advisory.weaknesses.all()] + references = [serialize_references(ref) for ref in advisory.references.all()] + impacts = [ + { + "purl": impact.base_purl, + "affected_versions": impact.affecting_vers, + "fixed_versions": impact.fixed_vers, + } + for impact in advisory.impacted_packages.all() + ] + + return { + "advisory_id": advisory.advisory_id, + "datasource_id": advisory.avid, + "datasource_url": advisory.url, + "aliases": aliases, + "summary": advisory.summary, + "impacted_packages": impacts, + "severities": severities, + "weaknesses": weaknesses, + "references": references, + } + + +def write_file(repo_path, file_path, data): + """Write ``data`` as YAML to ``repo_path``.""" + write_to = repo_path / file_path + write_to.parent.mkdir(parents=True, exist_ok=True) + with open(write_to, encoding="utf-8", mode="w") as f: + f.write(saneyaml.dump(data)) diff --git a/vulnerabilities/pipes/federatedcode.py b/vulnerabilities/pipes/federatedcode.py new file mode 100644 index 000000000..604c79237 --- /dev/null +++ b/vulnerabilities/pipes/federatedcode.py @@ -0,0 +1,175 @@ +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + +import logging +import tempfile +import textwrap +from pathlib import Path +from urllib.parse import urlparse + +import requests +from django.conf import settings +from git import GitCommandError +from git import Repo + +logger = logging.getLogger(__name__) + + +def url_exists(url, timeout=5): + """ + Check if the given `url` is reachable by doing head request. + Return True if response status is 200, else False. + """ + try: + response = requests.head(url, timeout=timeout) + response.raise_for_status() + except requests.exceptions.RequestException as request_exception: + logger.debug(f"Error while checking {url}: {request_exception}") + return False + + return response.status_code == requests.codes.ok + + +def is_configured(): + """Return True if the required FederatedCode settings have been set.""" + if all( + [ + settings.FEDERATEDCODE_VULNERABILITIES_REPO, + settings.FEDERATEDCODE_GIT_SERVICE_TOKEN, + settings.FEDERATEDCODE_GIT_SERVICE_EMAIL, + settings.FEDERATEDCODE_GIT_SERVICE_NAME, + ] + ): + return True + return False + + +def create_federatedcode_working_dir(): + """Create temporary working dir for cloning federatedcode repositories.""" + return Path(tempfile.mkdtemp()) + + +def is_available(): + """Return True if the configured Git repo is available.""" + if not is_configured(): + return False + + return url_exists(settings.FEDERATEDCODE_VULNERABILITIES_REPO) + + +def check_federatedcode_configured_and_available(logger): + """ + Check if the criteria for pushing the results to FederatedCode + is satisfied. + + Criteria: + - FederatedCode is configured and available. + """ + if not is_configured(): + raise Exception("FederatedCode is not configured.") + + if not is_available(): + raise Exception("FederatedCode Git account is not available.") + + logger("Federatedcode repositories are configured and available.") + + +def clone_repository(repo_url, clone_path, logger, shallow_clone=True): + """Clone repository to clone_path.""" + logger(f"Cloning repository {repo_url}") + + authenticated_repo_url = repo_url.replace( + "https://", + f"https://{settings.FEDERATEDCODE_GIT_SERVICE_TOKEN}@", + ) + clone_args = { + "url": authenticated_repo_url, + "to_path": clone_path, + } + if shallow_clone: + clone_args["depth"] = 1 + + repo = Repo.clone_from(**clone_args) + repo.config_writer(config_level="repository").set_value( + "user", "name", settings.FEDERATEDCODE_GIT_SERVICE_NAME + ).release() + repo.config_writer(config_level="repository").set_value( + "user", "email", settings.FEDERATEDCODE_GIT_SERVICE_EMAIL + ).release() + + return repo + + +def get_github_org(url): + """Return org username from GitHub account URL.""" + github_account_url = urlparse(url) + path_after_domain = github_account_url.path.lstrip("/") + org_name = path_after_domain.split("/")[0] + return org_name + + +def push_changes(repo, remote_name="origin", branch_name=""): + """Push changes to remote repository.""" + if not branch_name: + branch_name = repo.active_branch.name + repo.git.push(remote_name, branch_name, "--no-verify") + + +def commit_and_push_changes( + repo, + files_to_commit, + commit_message, + logger, + remote_name="origin", +): + """ + Commit and push changes to remote repository. + Returns True if changes are successfully pushed, False otherwise. + """ + try: + commit_changes(repo, files_to_commit, commit_message) + push_changes(repo, remote_name) + except GitCommandError as e: + if "nothing to commit" in e.stdout.lower(): + logger("Nothing to commit, working tree clean.") + else: + logger(f"Error while committing change: {e}") + return False + return True + + +def commit_changes(repo, files_to_commit, commit_message): + """Commit changes in files to a remote repository.""" + if not files_to_commit: + return + + repo.index.add(files_to_commit) + repo.git.commit( + m=textwrap.dedent(commit_message), + allow_empty=False, + no_verify=True, + ) + + +def commit_message(commit_count, total_commit_count): + """Commit message for pushing Package vulnerability.""" + from vulnerablecode import __version__ as VERSION + + author_name = settings.FEDERATEDCODE_GIT_SERVICE_NAME + author_email = settings.FEDERATEDCODE_GIT_SERVICE_EMAIL + + tool_name = "pkg:github/aboutcode-org/vulnerablecode" + + return f"""\ + Add new Package vulnerability ({commit_count}/{total_commit_count}) + + Tool: {tool_name}@v{VERSION} + + Signed-off-by: {author_name} <{author_email}> + """ diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index 7318e20fb..ae6638b76 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -389,3 +389,13 @@ "DEFAULT_TIMEOUT": env.int("VULNERABLECODE_REDIS_DEFAULT_TIMEOUT", default=3600), } } + + +# FederatedCode integration + +FEDERATEDCODE_VULNERABILITIES_REPO = env.str( + "FEDERATEDCODE_VULNERABILITIES_REPO", default="" +).rstrip("/") +FEDERATEDCODE_GIT_SERVICE_TOKEN = env.str("FEDERATEDCODE_GIT_SERVICE_TOKEN", default="") +FEDERATEDCODE_GIT_SERVICE_NAME = env.str("FEDERATEDCODE_GIT_SERVICE_NAME", default="") +FEDERATEDCODE_GIT_SERVICE_EMAIL = env.str("FEDERATEDCODE_GIT_SERVICE_EMAIL", default="") From a216e20d5a3e0698b272081556cf6272fa95ef9b Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 11 Feb 2026 19:05:48 +0530 Subject: [PATCH 2/2] Ignore link check for Nix url Signed-off-by: Keshav Priyadarshi --- docs/source/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/conf.py b/docs/source/conf.py index 5d6099eeb..650a7b0c0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -40,6 +40,7 @@ "https://nvd.nist.gov/products/cpe", "https://ftp.suse.com/pub/projects/security/yaml/suse-cvss-scores.yaml", "http://ftp.suse.com/pub/projects/security/yaml/", + "https://nixos.wiki/wiki/Flakes", # Cloudflare protection ] # Add any Sphinx extension module names here, as strings. They can be