Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion cmd/catalogd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,13 @@ func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(ocv1.AddToScheme(scheme))
ctrl.SetLogger(klog.NewKlogr())

// Configure global HTTP transport to use custom dialer for all HTTP clients
// including the containers/image library used for pulling from registries.
// The custom dialer tries addresses in DNS order without Happy Eyeballs' 300ms delay.
if err := httputil.ConfigureDefaultTransport(); err != nil {
setupLog.Error(err, "Failed to configure custom dialer")
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about we wrapper this one to avoid pacnic?
Something like

// dialerRoundTripper injects DialContext into any *http.Transport it sees.
// If the underlying RT isn't a *http.Transport, it just passes through.
type dialerRoundTripper struct {
	base http.RoundTripper
}

func (d dialerRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) {
	if tr, ok := d.base.(*http.Transport); ok && tr != nil {
		cl := tr.Clone()
		cl.DialContext = httputil.IPv4PreferringDialContext
		return cl.RoundTrip(r)
	}
	return d.base.RoundTrip(r)
}

func init() {
	if http.DefaultTransport == nil {
		http.DefaultTransport = http.Transport{}
	}
	http.DefaultTransport = dialerRoundTripper{base: http.DefaultTransport}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, a panic indicates that we did something unexpected (wrong) that caused this to happen, so we want the panic to occur. See my other comment:

That's a lot more code, for not much benefit, and I'm concerned with us setting all those http.Transport{} values to something fixed. Better to indicate an error, IMHO, since it would be a coding error on our part if the original code does not work.
There should be NO reason why the casting fails without a golang code change

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved it to a common function.

}

func main() {
Expand Down Expand Up @@ -274,7 +281,10 @@ func run(ctx context.Context) error {
}

// Create manager
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a feature gate for this one ?
OR falg option to be default ipv6 and then with flag ipv4 something like?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. It's not feature-gate "worthy", since it's attempting to fix a bug. The behavior is mimicking existing behavior without any explicit default of IPv4 or IPv6. So, there's really no choice between the two.

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
restConfig := ctrl.GetConfigOrDie()
// Configure REST client to use custom dialer without Happy Eyeballs delay
restConfig.Dial = httputil.ImmediateFallbackDialContext
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
Metrics: metricsServerOptions,
PprofBindAddress: cfg.pprofAddr,
Expand Down
9 changes: 9 additions & 0 deletions cmd/operator-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,13 @@ func init() {
tlsprofiles.AddFlags(flags)

ctrl.SetLogger(klog.NewKlogr())

// Configure global HTTP transport to use custom dialer for all HTTP clients
// including the containers/image library used for pulling from registries.
// The custom dialer tries addresses in DNS order without Happy Eyeballs' 300ms delay.
if err := httputil.ConfigureDefaultTransport(); err != nil {
setupLog.Error(err, "Failed to configure custom dialer")
}
}
func validateMetricsFlags() error {
if (cfg.certFile != "" && cfg.keyFile == "") || (cfg.certFile == "" && cfg.keyFile != "") {
Expand Down Expand Up @@ -325,6 +332,8 @@ func run() error {
}

restConfig := ctrl.GetConfigOrDie()
// Configure REST client to use custom dialer without Happy Eyeballs delay
restConfig.Dial = httputil.ImmediateFallbackDialContext
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme.Scheme,
Metrics: metricsServerOptions,
Expand Down
107 changes: 103 additions & 4 deletions internal/shared/util/http/httputil.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,107 @@
package http

import (
"context"
"crypto/tls"
"fmt"
"net"
"net/http"
"time"

"k8s.io/klog/v2"
)

// ImmediateFallbackDialContext creates a DialContext function that tries connection
// attempts sequentially in the order returned by DNS, without the 300ms Happy Eyeballs
// delay. This respects DNS server ordering while eliminating the racing delay.
//
// Go's standard Happy Eyeballs implementation (RFC 6555/8305) is in the net package:
// https://cs.opensource.google/go/go/+/refs/tags/go1.25.3:src/net/dial.go;l=525 (DialContext)
// https://cs.opensource.google/go/go/+/refs/tags/go1.25.3:src/net/dial.go;l=585 (dialParallel)
func ImmediateFallbackDialContext(ctx context.Context, network, address string) (net.Conn, error) {
// Split the address into host and port
host, port, err := net.SplitHostPort(address)
if err != nil {
return nil, err
}

klog.InfoS("Resolving DNS for connection", "host", host, "port", port, "network", network)

// Resolve all IP addresses for the host
ips, err := net.DefaultResolver.LookupIP(ctx, "ip", host)
if err != nil {
klog.ErrorS(err, "DNS resolution failed", "host", host)
return nil, err
}

if len(ips) == 0 {
err := fmt.Errorf("no IP addresses found for host %s", host)
klog.ErrorS(err, "DNS resolution returned no addresses", "host", host)
return nil, err
}

// Convert IPs to strings for logging
ipStrings := make([]string, 0, len(ips))
for _, ip := range ips {
ipStrings = append(ipStrings, ip.String())
}
klog.InfoS("DNS resolution complete", "host", host, "addressCount", len(ips), "addresses", ipStrings)

dialer := &net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}

// Try each address sequentially in the order DNS returned them
var lastErr error
for i, ip := range ips {
// Determine address type and dial network
var addrType, dialNetwork string
if ip.To4() != nil {
addrType = "IPv4"
dialNetwork = network
if network == "tcp" {
dialNetwork = "tcp4"
}
} else {
addrType = "IPv6"
dialNetwork = network
if network == "tcp" {
dialNetwork = "tcp6"
}
}

target := net.JoinHostPort(ip.String(), port)
klog.InfoS("Attempting connection", "host", host, "type", addrType,
"address", ip.String(), "port", port, "attempt", i+1, "of", len(ips))

conn, err := dialer.DialContext(ctx, dialNetwork, target)
if err == nil {
klog.InfoS("Successfully connected", "host", host, "type", addrType,
"address", ip.String(), "port", port)
return conn, nil
}
klog.ErrorS(err, "Connection failed", "host", host, "type", addrType,
"address", ip.String(), "port", port, "attempt", i+1, "of", len(ips))
lastErr = err
}

klog.ErrorS(lastErr, "All connection attempts failed", "host", host, "totalAttempts", len(ips))
return nil, lastErr
}

// ConfigureDefaultTransport configures http.DefaultTransport to use ImmediateFallbackDialContext.
// This affects all HTTP clients that use the default transport, including the containers/image
// library used for pulling from registries. Returns an error if DefaultTransport is not *http.Transport.
func ConfigureDefaultTransport() error {
transport, ok := http.DefaultTransport.(*http.Transport)
if !ok {
return fmt.Errorf("http.DefaultTransport is not *http.Transport, cannot configure custom dialer")
}
transport.DialContext = ImmediateFallbackDialContext
return nil
}

func BuildHTTPClient(cpw *CertPoolWatcher) (*http.Client, error) {
httpClient := &http.Client{Timeout: 10 * time.Second}

Expand All @@ -14,13 +110,16 @@ func BuildHTTPClient(cpw *CertPoolWatcher) (*http.Client, error) {
return nil, err
}

tlsConfig := &tls.Config{
// Clone the default transport to inherit custom dialer and other defaults
Copy link
Contributor

@camilamacedo86 camilamacedo86 Jan 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about something like bellow. By updating BuildHTTPClient to avoid assuming http.DefaultTransport is always *http.Transport. Right now it can panic or fail if something replaces the default transport.

A simple fix is: try to clone http.DefaultTransport when it’s a *http.Transport, otherwise fall back to a new http.Transport with basic defaults. Then set TLSClientConfig on that single transport.

This also seems to removes the duplicate tlsTransport := &http.Transport{...} block, so we don’t overwrite the cloned transport.

base, ok := http.DefaultTransport.(*http.Transport)

var tlsTransport *http.Transport
if ok && base != nil {
    tlsTransport = base.Clone()
} else {
    // Fallback: avoid panic if someone replaced http.DefaultTransport.
    tlsTransport = &http.Transport{
        Proxy: http.ProxyFromEnvironment,
        // DialContext left as default unless your code sets it elsewhere
        ForceAttemptHTTP2:     true,
        MaxIdleConns:          100,
        IdleConnTimeout:       90 * time.Second,
        TLSHandshakeTimeout:   10 * time.Second,
        ExpectContinueTimeout: 1 * time.Second,
    }
}

tlsTransport.TLSClientConfig = &tls.Config{
    RootCAs:    pool,
    MinVersion: tls.VersionTLS12,
}

httpClient.Transport = tlsTransport

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a lot more code, for not much benefit, and I'm concerned with us setting all those http.Transport{} values to something fixed. Better to indicate an error, IMHO, since it would be a coding error on our part if the original code does not work.
There should be NO reason why the casting fails without a golang code change

transport, ok := http.DefaultTransport.(*http.Transport)
if !ok {
return nil, fmt.Errorf("http.DefaultTransport is not *http.Transport, cannot build HTTP client")
}
tlsTransport := transport.Clone()
tlsTransport.TLSClientConfig = &tls.Config{
RootCAs: pool,
MinVersion: tls.VersionTLS12,
}
tlsTransport := &http.Transport{
TLSClientConfig: tlsConfig,
}
httpClient.Transport = tlsTransport

return httpClient, nil
Expand Down
83 changes: 83 additions & 0 deletions internal/shared/util/http/httputil_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package http

import (
"context"
"net"
"testing"
)

func TestImmediateFallbackDialContext(t *testing.T) {
tests := []struct {
name string
address string
wantFail bool
minExpectedAddrs int // minimum addresses we expect to find
Comment on lines +13 to +14
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets group expectations from the test input, so that we can read them better:

want struct {
  fail bool
  minExpectedAddrs int
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure adding a new structure inside another structure is of much benefit, TBH.

}{
{
name: "dual-stack hostname tries addresses in DNS order",
address: "localhost:80",
wantFail: true, // nothing listening on port 80
minExpectedAddrs: 1, // should have at least one address
},
{
name: "IPv4-only hostname",
address: "127.0.0.1:80",
wantFail: true,
minExpectedAddrs: 1,
},
{
name: "IPv6-only hostname",
address: "[::1]:80",
wantFail: true,
minExpectedAddrs: 1,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx := context.Background()

// Parse the address to extract host for DNS lookup
host, _, err := net.SplitHostPort(tt.address)
if err != nil {
t.Fatalf("Failed to split host:port: %v", err)
}

// Look up IPs to verify DNS resolution works
ips, err := net.DefaultResolver.LookupIP(ctx, "ip", host)
if err != nil {
t.Skipf("DNS resolution failed for %s: %v (this is OK for test environments)", host, err)
}

if len(ips) < tt.minExpectedAddrs {
t.Skip("Not enough IP addresses found for hostname")
}

t.Logf("DNS returned %d address(es) - will try each in order:", len(ips))

// Log all addresses for debugging
for i, ip := range ips {
ipType := "IPv6"
if ip.To4() != nil {
ipType = "IPv4"
}
t.Logf(" [%d] %s (%s)", i, ip.String(), ipType)
}

// Actually call the dialer function
_, err = ImmediateFallbackDialContext(ctx, "tcp", tt.address)

if tt.wantFail {
if err == nil {
t.Errorf("Expected connection to fail, but it succeeded")
} else {
t.Logf("Connection failed as expected: %v", err)
}
} else {
if err != nil {
t.Errorf("Expected connection to succeed, but got error: %v", err)
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in what cases we are failing the tests?

})
}
}
Loading