diff --git a/cmd/catalogd/main.go b/cmd/catalogd/main.go index af2463e2c6..e96d9a619f 100644 --- a/cmd/catalogd/main.go +++ b/cmd/catalogd/main.go @@ -149,6 +149,13 @@ func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) utilruntime.Must(ocv1.AddToScheme(scheme)) ctrl.SetLogger(klog.NewKlogr()) + + // Configure global HTTP transport to use custom dialer for all HTTP clients + // including the containers/image library used for pulling from registries. + // The custom dialer tries addresses in DNS order without Happy Eyeballs' 300ms delay. + if err := httputil.ConfigureDefaultTransport(); err != nil { + setupLog.Error(err, "Failed to configure custom dialer") + } } func main() { @@ -274,7 +281,10 @@ func run(ctx context.Context) error { } // Create manager - mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + restConfig := ctrl.GetConfigOrDie() + // Configure REST client to use custom dialer without Happy Eyeballs delay + restConfig.Dial = httputil.ImmediateFallbackDialContext + mgr, err := ctrl.NewManager(restConfig, ctrl.Options{ Scheme: scheme, Metrics: metricsServerOptions, PprofBindAddress: cfg.pprofAddr, diff --git a/cmd/operator-controller/main.go b/cmd/operator-controller/main.go index dd0bc98f6d..9d55883e62 100644 --- a/cmd/operator-controller/main.go +++ b/cmd/operator-controller/main.go @@ -198,6 +198,13 @@ func init() { tlsprofiles.AddFlags(flags) ctrl.SetLogger(klog.NewKlogr()) + + // Configure global HTTP transport to use custom dialer for all HTTP clients + // including the containers/image library used for pulling from registries. + // The custom dialer tries addresses in DNS order without Happy Eyeballs' 300ms delay. + if err := httputil.ConfigureDefaultTransport(); err != nil { + setupLog.Error(err, "Failed to configure custom dialer") + } } func validateMetricsFlags() error { if (cfg.certFile != "" && cfg.keyFile == "") || (cfg.certFile == "" && cfg.keyFile != "") { @@ -325,6 +332,8 @@ func run() error { } restConfig := ctrl.GetConfigOrDie() + // Configure REST client to use custom dialer without Happy Eyeballs delay + restConfig.Dial = httputil.ImmediateFallbackDialContext mgr, err := ctrl.NewManager(restConfig, ctrl.Options{ Scheme: scheme.Scheme, Metrics: metricsServerOptions, diff --git a/internal/shared/util/http/httputil.go b/internal/shared/util/http/httputil.go index f5a982d2de..39288ed5c4 100644 --- a/internal/shared/util/http/httputil.go +++ b/internal/shared/util/http/httputil.go @@ -1,11 +1,107 @@ package http import ( + "context" "crypto/tls" + "fmt" + "net" "net/http" "time" + + "k8s.io/klog/v2" ) +// ImmediateFallbackDialContext creates a DialContext function that tries connection +// attempts sequentially in the order returned by DNS, without the 300ms Happy Eyeballs +// delay. This respects DNS server ordering while eliminating the racing delay. +// +// Go's standard Happy Eyeballs implementation (RFC 6555/8305) is in the net package: +// https://cs.opensource.google/go/go/+/refs/tags/go1.25.3:src/net/dial.go;l=525 (DialContext) +// https://cs.opensource.google/go/go/+/refs/tags/go1.25.3:src/net/dial.go;l=585 (dialParallel) +func ImmediateFallbackDialContext(ctx context.Context, network, address string) (net.Conn, error) { + // Split the address into host and port + host, port, err := net.SplitHostPort(address) + if err != nil { + return nil, err + } + + klog.InfoS("Resolving DNS for connection", "host", host, "port", port, "network", network) + + // Resolve all IP addresses for the host + ips, err := net.DefaultResolver.LookupIP(ctx, "ip", host) + if err != nil { + klog.ErrorS(err, "DNS resolution failed", "host", host) + return nil, err + } + + if len(ips) == 0 { + err := fmt.Errorf("no IP addresses found for host %s", host) + klog.ErrorS(err, "DNS resolution returned no addresses", "host", host) + return nil, err + } + + // Convert IPs to strings for logging + ipStrings := make([]string, 0, len(ips)) + for _, ip := range ips { + ipStrings = append(ipStrings, ip.String()) + } + klog.InfoS("DNS resolution complete", "host", host, "addressCount", len(ips), "addresses", ipStrings) + + dialer := &net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 30 * time.Second, + } + + // Try each address sequentially in the order DNS returned them + var lastErr error + for i, ip := range ips { + // Determine address type and dial network + var addrType, dialNetwork string + if ip.To4() != nil { + addrType = "IPv4" + dialNetwork = network + if network == "tcp" { + dialNetwork = "tcp4" + } + } else { + addrType = "IPv6" + dialNetwork = network + if network == "tcp" { + dialNetwork = "tcp6" + } + } + + target := net.JoinHostPort(ip.String(), port) + klog.InfoS("Attempting connection", "host", host, "type", addrType, + "address", ip.String(), "port", port, "attempt", i+1, "of", len(ips)) + + conn, err := dialer.DialContext(ctx, dialNetwork, target) + if err == nil { + klog.InfoS("Successfully connected", "host", host, "type", addrType, + "address", ip.String(), "port", port) + return conn, nil + } + klog.ErrorS(err, "Connection failed", "host", host, "type", addrType, + "address", ip.String(), "port", port, "attempt", i+1, "of", len(ips)) + lastErr = err + } + + klog.ErrorS(lastErr, "All connection attempts failed", "host", host, "totalAttempts", len(ips)) + return nil, lastErr +} + +// ConfigureDefaultTransport configures http.DefaultTransport to use ImmediateFallbackDialContext. +// This affects all HTTP clients that use the default transport, including the containers/image +// library used for pulling from registries. Returns an error if DefaultTransport is not *http.Transport. +func ConfigureDefaultTransport() error { + transport, ok := http.DefaultTransport.(*http.Transport) + if !ok { + return fmt.Errorf("http.DefaultTransport is not *http.Transport, cannot configure custom dialer") + } + transport.DialContext = ImmediateFallbackDialContext + return nil +} + func BuildHTTPClient(cpw *CertPoolWatcher) (*http.Client, error) { httpClient := &http.Client{Timeout: 10 * time.Second} @@ -14,13 +110,16 @@ func BuildHTTPClient(cpw *CertPoolWatcher) (*http.Client, error) { return nil, err } - tlsConfig := &tls.Config{ + // Clone the default transport to inherit custom dialer and other defaults + transport, ok := http.DefaultTransport.(*http.Transport) + if !ok { + return nil, fmt.Errorf("http.DefaultTransport is not *http.Transport, cannot build HTTP client") + } + tlsTransport := transport.Clone() + tlsTransport.TLSClientConfig = &tls.Config{ RootCAs: pool, MinVersion: tls.VersionTLS12, } - tlsTransport := &http.Transport{ - TLSClientConfig: tlsConfig, - } httpClient.Transport = tlsTransport return httpClient, nil diff --git a/internal/shared/util/http/httputil_test.go b/internal/shared/util/http/httputil_test.go new file mode 100644 index 0000000000..4d3bc8b41b --- /dev/null +++ b/internal/shared/util/http/httputil_test.go @@ -0,0 +1,83 @@ +package http + +import ( + "context" + "net" + "testing" +) + +func TestImmediateFallbackDialContext(t *testing.T) { + tests := []struct { + name string + address string + wantFail bool + minExpectedAddrs int // minimum addresses we expect to find + }{ + { + name: "dual-stack hostname tries addresses in DNS order", + address: "localhost:80", + wantFail: true, // nothing listening on port 80 + minExpectedAddrs: 1, // should have at least one address + }, + { + name: "IPv4-only hostname", + address: "127.0.0.1:80", + wantFail: true, + minExpectedAddrs: 1, + }, + { + name: "IPv6-only hostname", + address: "[::1]:80", + wantFail: true, + minExpectedAddrs: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + + // Parse the address to extract host for DNS lookup + host, _, err := net.SplitHostPort(tt.address) + if err != nil { + t.Fatalf("Failed to split host:port: %v", err) + } + + // Look up IPs to verify DNS resolution works + ips, err := net.DefaultResolver.LookupIP(ctx, "ip", host) + if err != nil { + t.Skipf("DNS resolution failed for %s: %v (this is OK for test environments)", host, err) + } + + if len(ips) < tt.minExpectedAddrs { + t.Skip("Not enough IP addresses found for hostname") + } + + t.Logf("DNS returned %d address(es) - will try each in order:", len(ips)) + + // Log all addresses for debugging + for i, ip := range ips { + ipType := "IPv6" + if ip.To4() != nil { + ipType = "IPv4" + } + t.Logf(" [%d] %s (%s)", i, ip.String(), ipType) + } + + // Actually call the dialer function + _, err = ImmediateFallbackDialContext(ctx, "tcp", tt.address) + + if tt.wantFail { + if err == nil { + t.Errorf("Expected connection to fail, but it succeeded") + } else { + t.Logf("Connection failed as expected: %v", err) + } + } else { + if err != nil { + t.Errorf("Expected connection to succeed, but got error: %v", err) + } + } + }) + } +}