From eced14fded47a78c4adad83ae3061939d174e9db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karolis=20Vy=C4=8Dius?= Date: Sun, 18 Jan 2026 17:16:36 +0200 Subject: [PATCH] Add network configuration support for synchronous file uploads Introduce RemoteNetworkConfigSerializer to centralize shared network settings (timeouts, proxies, and certificates) and ensure consistent validation across persistent Remotes and ad-hoc upload downloads. - Exclude sync-specific fields (policy, rate_limit) from shared config - Pass validated network kwargs to the downloader in UploadSerializerFieldsMixin - Reuse common proxy and certificate validation logic closes: #7201 --- CHANGES/7201.feature | 1 + pulp_file/app/serializers.py | 2 + pulpcore/app/serializers/base.py | 202 +++++++++++++++++++++++++ pulpcore/app/serializers/repository.py | 194 +++--------------------- pulpcore/plugin/serializers/content.py | 37 +++-- 5 files changed, 252 insertions(+), 184 deletions(-) create mode 100644 CHANGES/7201.feature diff --git a/CHANGES/7201.feature b/CHANGES/7201.feature new file mode 100644 index 00000000000..78a446cf004 --- /dev/null +++ b/CHANGES/7201.feature @@ -0,0 +1 @@ +Add network configuration support for synchronous file uploads \ No newline at end of file diff --git a/pulp_file/app/serializers.py b/pulp_file/app/serializers.py index de0d17071b9..45bfbe88c63 100644 --- a/pulp_file/app/serializers.py +++ b/pulp_file/app/serializers.py @@ -65,6 +65,8 @@ class FileContentUploadSerializer(FileContentSerializer): """ def validate(self, data): + data = super().validate(data) + """Validate the FileContent data.""" if upload := data.pop("upload", None): # Handle chunked upload diff --git a/pulpcore/app/serializers/base.py b/pulpcore/app/serializers/base.py index 2076068705f..56710da8e29 100644 --- a/pulpcore/app/serializers/base.py +++ b/pulpcore/app/serializers/base.py @@ -7,6 +7,7 @@ from typing import List, TypedDict from urllib.parse import urljoin +from cryptography.x509 import load_pem_x509_certificate from django.conf import settings from django.core.validators import URLValidator from django.core.exceptions import ObjectDoesNotExist @@ -582,3 +583,204 @@ def validate(self, data): data = super().validate(data) data["value"] = self.context["content_object"].pulp_labels[data["key"]] return data + + +class RemoteNetworkConfigSerializer(serializers.Serializer): + """ + Shared network configuration fields and validation logic used by both + RemoteSerializer and UploadSerializerFieldsMixin. + """ + + ca_cert = serializers.CharField( + help_text="A PEM encoded CA certificate used to validate the server " + "certificate presented by the remote server.", + required=False, + allow_null=True, + ) + client_cert = serializers.CharField( + help_text="A PEM encoded client certificate used for authentication.", + required=False, + allow_null=True, + ) + client_key = serializers.CharField( + help_text="A PEM encoded private key used for authentication.", + required=False, + allow_null=True, + write_only=True, + ) + tls_validation = serializers.BooleanField( + help_text="If True, TLS peer validation must be performed.", required=False + ) + proxy_url = serializers.CharField( + help_text="The proxy URL. Format: scheme://host:port", + required=False, + allow_null=True, + ) + proxy_username = serializers.CharField( + help_text="The username to authenticte to the proxy.", + required=False, + allow_null=True, + write_only=True, + ) + proxy_password = serializers.CharField( + help_text=_( + "The password to authenticate to the proxy. Extra leading and trailing whitespace " + "characters are not trimmed." + ), + required=False, + allow_null=True, + write_only=True, + trim_whitespace=False, + style={"input_type": "password"}, + ) + username = serializers.CharField( + help_text="The username to be used for authentication when syncing.", + required=False, + allow_null=True, + write_only=True, + ) + password = serializers.CharField( + help_text=_( + "The password to be used for authentication when syncing. Extra leading and trailing " + "whitespace characters are not trimmed." + ), + required=False, + allow_null=True, + write_only=True, + trim_whitespace=False, + style={"input_type": "password"}, + ) + max_retries = serializers.IntegerField( + help_text=( + "Maximum number of retry attempts after a download failure. If not set then the " + "default value (3) will be used." + ), + required=False, + allow_null=True, + ) + total_timeout = serializers.FloatField( + allow_null=True, + required=False, + help_text=( + "aiohttp.ClientTimeout.total (q.v.) for download-connections. The default is null, " + "which will cause the default from the aiohttp library to be used." + ), + min_value=0.0, + ) + connect_timeout = serializers.FloatField( + allow_null=True, + required=False, + help_text=( + "aiohttp.ClientTimeout.connect (q.v.) for download-connections. The default is null, " + "which will cause the default from the aiohttp library to be used." + ), + min_value=0.0, + ) + sock_connect_timeout = serializers.FloatField( + allow_null=True, + required=False, + help_text=( + "aiohttp.ClientTimeout.sock_connect (q.v.) for download-connections. The default is " + "null, which will cause the default from the aiohttp library to be used." + ), + min_value=0.0, + ) + sock_read_timeout = serializers.FloatField( + allow_null=True, + required=False, + help_text=( + "aiohttp.ClientTimeout.sock_read (q.v.) for download-connections. The default is " + "null, which will cause the default from the aiohttp library to be used." + ), + min_value=0.0, + ) + headers = serializers.ListField( + child=serializers.DictField(), + help_text=_("Headers for aiohttp.Clientsession"), + required=False, + ) + + def validate_proxy_url(self, value): + """ + Check, that the proxy_url does not contain credentials. + """ + if value and "@" in value: + raise serializers.ValidationError(_("proxy_url must not contain credentials")) + return value + + def validate_ca_cert(self, value): + return self._validate_certificate("ca_cert", value) + + def validate_client_cert(self, value): + return self._validate_certificate("client_cert", value) + + @staticmethod + def _validate_certificate(which_cert, value): + """ + Validate and return *just* the certs and not any commentary that came along with them. + + Args: + which_cert: The attribute-name whose cert we're validating + (only used for error-message). + value: The string being proposed as a certificate-containing PEM. + + Raises: + ValidationError: When the provided value has no or an invalid certificate. + + Returns: + The pem-string with *just* the validated BEGIN/END CERTIFICATE segments. + """ + if value: + try: + # Find any/all CERTIFICATE entries in the proposed PEM and let crypto validate them. + # NOTE: crypto/39 includes load_certificates(), which will let us remove this whole + # loop. But we want to fix the current problem on older supported branches that + # allow 38, so we do it ourselves for now + certs = list() + a_cert = "" + for line in value.split("\n"): + if "-----BEGIN CERTIFICATE-----" in line or a_cert: + a_cert += line + "\n" + if "-----END CERTIFICATE-----" in line: + load_pem_x509_certificate(bytes(a_cert, "ASCII")) + certs.append(a_cert.strip()) + a_cert = "" + if not certs: + raise serializers.ValidationError( + "No {} specified in string {}".format(which_cert, value) + ) + return "\n".join(certs) + "\n" + except ValueError as e: + raise serializers.ValidationError( + "Invalid {} specified, error '{}'".format(which_cert, e.args) + ) + + def validate(self, data): + """ + Check that proxy credentials are only provided completely and if a proxy is configured. + Adapted to work for both ModelSerializers (Remotes) and standard Serializers (Uploads). + """ + # Handle cases where we don't have an instance (e.g. Uploads) + instance = getattr(self, "instance", None) + partial = getattr(self, "partial", False) + + proxy_url = instance.proxy_url if instance and partial else None + proxy_url = data.get("proxy_url", proxy_url) + + proxy_username = instance.proxy_username if instance and partial else None + proxy_username = data.get("proxy_username", proxy_username) + + proxy_password = instance.proxy_password if instance and partial else None + proxy_password = data.get("proxy_password", proxy_password) + + if (proxy_username or proxy_password) and not proxy_url: + raise serializers.ValidationError( + _("proxy credentials cannot be specified without a proxy") + ) + + if bool(proxy_username) is not bool(proxy_password): + raise serializers.ValidationError( + _("proxy username and password can only be specified together") + ) + + return data diff --git a/pulpcore/app/serializers/repository.py b/pulpcore/app/serializers/repository.py index f4e02627600..a6abc870f9c 100644 --- a/pulpcore/app/serializers/repository.py +++ b/pulpcore/app/serializers/repository.py @@ -1,5 +1,4 @@ import os -from cryptography.x509 import load_pem_x509_certificate from gettext import gettext as _ from urllib.parse import urlparse @@ -7,7 +6,6 @@ from rest_framework_nested.serializers import NestedHyperlinkedModelSerializer from pulpcore.app import models, settings -from pulpcore.app.util import get_prn, reverse from pulpcore.app.serializers import ( DetailIdentityField, DetailRelatedField, @@ -21,7 +19,9 @@ HiddenFieldsMixin, pulp_labels_validator, ) +from pulpcore.app.serializers.base import RemoteNetworkConfigSerializer from pulpcore.app.util import extract_pk, raise_for_unknown_content_units +from pulpcore.app.util import get_prn, reverse class RepositorySerializer(ModelSerializer): @@ -73,47 +73,7 @@ class Meta: ) -def validate_certificate(which_cert, value): - """ - Validate and return *just* the certs and not any commentary that came along with them. - - Args: - which_cert: The attribute-name whose cert we're validating (only used for error-message). - value: The string being proposed as a certificate-containing PEM. - - Raises: - ValidationError: When the provided value has no or an invalid certificate. - - Returns: - The pem-string with *just* the validated BEGIN/END CERTIFICATE segments. - """ - if value: - try: - # Find any/all CERTIFICATE entries in the proposed PEM and let crypto validate them. - # NOTE: crypto/39 includes load_certificates(), which will let us remove this whole - # loop. But we want to fix the current problem on older supported branches that - # allow 38, so we do it ourselves for now - certs = list() - a_cert = "" - for line in value.split("\n"): - if "-----BEGIN CERTIFICATE-----" in line or a_cert: - a_cert += line + "\n" - if "-----END CERTIFICATE-----" in line: - load_pem_x509_certificate(bytes(a_cert, "ASCII")) - certs.append(a_cert.strip()) - a_cert = "" - if not certs: - raise serializers.ValidationError( - "No {} specified in string {}".format(which_cert, value) - ) - return "\n".join(certs) + "\n" - except ValueError as e: - raise serializers.ValidationError( - "Invalid {} specified, error '{}'".format(which_cert, e.args) - ) - - -class RemoteSerializer(ModelSerializer, HiddenFieldsMixin): +class RemoteSerializer(RemoteNetworkConfigSerializer, ModelSerializer, HiddenFieldsMixin): """ Every remote defined by a plugin should have a Remote serializer that inherits from this class. Please import from `pulpcore.plugin.serializers` rather than from this module directly. @@ -126,65 +86,6 @@ class RemoteSerializer(ModelSerializer, HiddenFieldsMixin): validators=[DomainUniqueValidator(queryset=models.Remote.objects.all())], ) url = serializers.CharField(help_text="The URL of an external content source.") - ca_cert = serializers.CharField( - help_text="A PEM encoded CA certificate used to validate the server " - "certificate presented by the remote server.", - required=False, - allow_null=True, - ) - client_cert = serializers.CharField( - help_text="A PEM encoded client certificate used for authentication.", - required=False, - allow_null=True, - ) - client_key = serializers.CharField( - help_text="A PEM encoded private key used for authentication.", - required=False, - allow_null=True, - write_only=True, - ) - tls_validation = serializers.BooleanField( - help_text="If True, TLS peer validation must be performed.", required=False - ) - proxy_url = serializers.CharField( - help_text="The proxy URL. Format: scheme://host:port", - required=False, - allow_null=True, - ) - proxy_username = serializers.CharField( - help_text="The username to authenticte to the proxy.", - required=False, - allow_null=True, - write_only=True, - ) - proxy_password = serializers.CharField( - help_text=_( - "The password to authenticate to the proxy. Extra leading and trailing whitespace " - "characters are not trimmed." - ), - required=False, - allow_null=True, - write_only=True, - trim_whitespace=False, - style={"input_type": "password"}, - ) - username = serializers.CharField( - help_text="The username to be used for authentication when syncing.", - required=False, - allow_null=True, - write_only=True, - ) - password = serializers.CharField( - help_text=_( - "The password to be used for authentication when syncing. Extra leading and trailing " - "whitespace characters are not trimmed." - ), - required=False, - allow_null=True, - write_only=True, - trim_whitespace=False, - style={"input_type": "password"}, - ) pulp_last_updated = serializers.DateTimeField( help_text="Timestamp of the most recent update of the remote.", read_only=True ) @@ -197,18 +98,13 @@ class RemoteSerializer(ModelSerializer, HiddenFieldsMixin): required=False, min_value=1, ) - max_retries = serializers.IntegerField( - help_text=( - "Maximum number of retry attempts after a download failure. If not set then the " - "default value (3) will be used." - ), - required=False, - allow_null=True, - ) policy = serializers.ChoiceField( help_text="The policy to use when downloading content.", choices=( - (models.Remote.IMMEDIATE, "When syncing, download all metadata and content now."), + ( + models.Remote.IMMEDIATE, + "When syncing, download all metadata and content now.", + ), ), default=models.Remote.IMMEDIATE, ) @@ -304,72 +200,22 @@ def validate_url(self, url): _("The path '{}' does not start with any of the allowed import paths").format(user_path) ) - def validate_proxy_url(self, value): - """ - Check, that the proxy_url does not contain credentials. - """ - if value and "@" in value: - raise serializers.ValidationError(_("proxy_url must not contain credentials")) - return value - - def validate_ca_cert(self, value): - return validate_certificate("ca_cert", value) - - def validate_client_cert(self, value): - return validate_certificate("client_cert", value) - - def validate(self, data): - """ - Check, that proxy credentials are only provided completely and if a proxy is configured. - """ - data = super().validate(data) - - proxy_url = self.instance.proxy_url if self.partial else None - proxy_url = data.get("proxy_url", proxy_url) - proxy_username = self.instance.proxy_username if self.partial else None - proxy_username = data.get("proxy_username", proxy_username) - proxy_password = self.instance.proxy_password if self.partial else None - proxy_password = data.get("proxy_password", proxy_password) - - if (proxy_username or proxy_password) and not proxy_url: - raise serializers.ValidationError( - _("proxy credentials cannot be specified without a proxy") - ) - - if bool(proxy_username) is not bool(proxy_password): - raise serializers.ValidationError( - _("proxy username and password can only be specified together") - ) - - return data - class Meta: abstract = True model = models.Remote - fields = ModelSerializer.Meta.fields + ( - "name", - "url", - "ca_cert", - "client_cert", - "client_key", - "tls_validation", - "proxy_url", - "proxy_username", - "proxy_password", - "username", - "password", - "pulp_labels", - "pulp_last_updated", - "download_concurrency", - "max_retries", - "policy", - "total_timeout", - "connect_timeout", - "sock_connect_timeout", - "sock_read_timeout", - "headers", - "rate_limit", - "hidden_fields", + fields = ( + ModelSerializer.Meta.fields + + ( + "name", + "url", + "pulp_labels", + "pulp_last_updated", + "download_concurrency", + "policy", + "rate_limit", + "hidden_fields", + ) + + tuple(RemoteNetworkConfigSerializer().get_fields().keys()) ) diff --git a/pulpcore/plugin/serializers/content.py b/pulpcore/plugin/serializers/content.py index bb89d830115..9be1102c76b 100644 --- a/pulpcore/plugin/serializers/content.py +++ b/pulpcore/plugin/serializers/content.py @@ -1,17 +1,15 @@ import json - from gettext import gettext as _ - from tempfile import NamedTemporaryFile +from urllib.parse import urlparse from django.db import DatabaseError from rest_framework.serializers import ( CharField, FileField, - Serializer, ValidationError, ) -from urllib.parse import urlparse + from pulpcore.app.files import PulpTemporaryUploadedFile from pulpcore.app.models import Artifact, PulpTemporaryFile, Remote, Upload, UploadChunk from pulpcore.app.serializers import ( @@ -20,10 +18,11 @@ NoArtifactContentSerializer, SingleArtifactContentSerializer, ) +from pulpcore.app.serializers.base import RemoteNetworkConfigSerializer from pulpcore.app.util import get_domain_pk -class UploadSerializerFieldsMixin(Serializer): +class UploadSerializerFieldsMixin(RemoteNetworkConfigSerializer): """A mixin class that contains fields and methods common to content upload serializers.""" REMOTE_CLASS = Remote @@ -46,6 +45,14 @@ class UploadSerializerFieldsMixin(Serializer): write_only=True, ) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + network_fields = RemoteNetworkConfigSerializer().get_fields() + for field_name, field_obj in network_fields.items(): + field_obj.write_only = True + self.fields[field_name] = field_obj + def validate_file_url(self, value): """Parse out the auth if provided.""" url_parse = urlparse(value) @@ -74,9 +81,11 @@ def download(self, url, expected_digests=None, expected_size=None): PulpTemporaryUploadedFile: the downloaded file """ remote = self.REMOTE_CLASS(url=url, **self.context.get("remote_kwargs", {})) + downloader = remote.get_downloader( url=url, expected_digests=expected_digests, expected_size=expected_size ) + result = downloader.fetch() return PulpTemporaryUploadedFile.from_file(open(result.path, "rb")) @@ -85,6 +94,15 @@ def validate(self, data): data = super().validate(data) + network_keys = set(RemoteNetworkConfigSerializer().get_fields().keys()) + remote_kwargs = self.context.get("remote_kwargs", {}) + + for key in network_keys: + if key in data: + remote_kwargs[key] = data.pop(key) + + self.context["remote_kwargs"] = remote_kwargs + if self.context.get("request") is not None: upload_fields = { field @@ -146,10 +164,8 @@ def create(self, validated_data): return result class Meta: - fields = ( - "file", - "upload", - "file_url", + fields = ("file", "upload", "file_url") + tuple( + RemoteNetworkConfigSerializer().get_fields().keys() ) @@ -251,7 +267,8 @@ def deferred_validate(self, data): # if artifact already exists, let's use it try: artifact = Artifact.objects.get( - sha256=file.hashers["sha256"].hexdigest(), pulp_domain=get_domain_pk() + sha256=file.hashers["sha256"].hexdigest(), + pulp_domain=get_domain_pk(), ) if not artifact.pulp_domain.get_storage().exists(artifact.file.name): artifact.file = file