From c5aec55b42f214dbf518ddd9a099d66b4dc27224 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 21 Oct 2019 13:32:22 -0700 Subject: [PATCH 1/2] resolver: less debug Observed lots of debug messages from resolver (and associated high CPU usage because of lots of logging) when debug logging is turned on. Here is some data from a real system: > $ journalctl -u docker --since="2019-10-21 21:00:00" --until="2019-10-21 22:00:00" | wc -l > 188621 > $ journalctl -u docker --since="2019-10-21 21:00:00" --until="2019-10-21 22:00:00" | grep -E 'Name To resolve: |\[resolver\] ' | wc -l > 186319 So, it was about 200000 lines logger for just one hour, and about 99% of that are from resolver. While this might be the peculiarity of a particular setup, the number of such messages still seem way too excessive. Remove the ones that are not errors. In case those are needed, one have to recompile this package with debug_log build tag. Signed-off-by: Kir Kolyshkin --- debug_log.go | 8 ++++++++ no_debug_log.go | 5 +++++ resolver.go | 12 ++++++------ sandbox.go | 6 +++--- 4 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 debug_log.go create mode 100644 no_debug_log.go diff --git a/debug_log.go b/debug_log.go new file mode 100644 index 0000000000..504ba62d54 --- /dev/null +++ b/debug_log.go @@ -0,0 +1,8 @@ +// +build debug_log +package libnetwork + +import "github.com/sirupsen/logrus" + +func debugf(format string, args ...interface{}) { + logrus.Debugf(format, args...) +} diff --git a/no_debug_log.go b/no_debug_log.go new file mode 100644 index 0000000000..fc9603c963 --- /dev/null +++ b/no_debug_log.go @@ -0,0 +1,5 @@ +// +build !debug_log +package libnetwork + +func debugf(format string, args ...interface{}) { +} diff --git a/resolver.go b/resolver.go index 8e3158e559..b8a4840057 100644 --- a/resolver.go +++ b/resolver.go @@ -255,7 +255,7 @@ func (r *resolver) handleIPQuery(name string, query *dns.Msg, ipType int) (*dns. return nil, nil } - logrus.Debugf("[resolver] lookup for %s: IP %v", name, addr) + debugf("[resolver] lookup for %s: IP %v", name, addr) resp := createRespMsg(query) if len(addr) > 1 { @@ -296,7 +296,7 @@ func (r *resolver) handlePTRQuery(ptr string, query *dns.Msg) (*dns.Msg, error) return nil, nil } - logrus.Debugf("[resolver] lookup for IP %s: name %s", parts[0], host) + debugf("[resolver] lookup for IP %s: name %s", parts[0], host) fqdn := dns.Fqdn(host) resp := new(dns.Msg) @@ -453,7 +453,7 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) { continue } queryType := dns.TypeToString[query.Question[0].Qtype] - logrus.Debugf("[resolver] query %s (%s) from %s, forwarding to %s:%s", name, queryType, + debugf("[resolver] query %s (%s) from %s, forwarding to %s:%s", name, queryType, extConn.LocalAddr().String(), proto, extDNS.IPStr) // Timeout has to be set for every IO operation. @@ -522,17 +522,17 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) { case dns.TypeA: answers++ ip := rr.(*dns.A).A - logrus.Debugf("[resolver] received A record %q for %q from %s:%s", ip, h.Name, proto, extDNS.IPStr) + debugf("[resolver] received A record %q for %q from %s:%s", ip, h.Name, proto, extDNS.IPStr) r.backend.HandleQueryResp(h.Name, ip) case dns.TypeAAAA: answers++ ip := rr.(*dns.AAAA).AAAA - logrus.Debugf("[resolver] received AAAA record %q for %q from %s:%s", ip, h.Name, proto, extDNS.IPStr) + debugf("[resolver] received AAAA record %q for %q from %s:%s", ip, h.Name, proto, extDNS.IPStr) r.backend.HandleQueryResp(h.Name, ip) } } if resp.Answer == nil || answers == 0 { - logrus.Debugf("[resolver] external DNS %s:%s did not return any %s records for %q", proto, extDNS.IPStr, queryType, name) + debugf("[resolver] external DNS %s:%s did not return any %s records for %q", proto, extDNS.IPStr, queryType, name) } resp.Compress = true break diff --git a/sandbox.go b/sandbox.go index 03c9215786..28c53f83da 100644 --- a/sandbox.go +++ b/sandbox.go @@ -439,7 +439,7 @@ func (sb *sandbox) HandleQueryResp(name string, ip net.IP) { func (sb *sandbox) ResolveIP(ip string) string { var svc string - logrus.Debugf("IP To resolve %v", ip) + debugf("IP to resolve %v", ip) for _, ep := range sb.getConnectedEndpoints() { n := ep.getNetwork() @@ -466,7 +466,7 @@ func (sb *sandbox) ResolveService(name string) ([]*net.SRV, []net.IP) { srv := []*net.SRV{} ip := []net.IP{} - logrus.Debugf("Service name To resolve: %v", name) + debugf("Service name to resolve: %v", name) // There are DNS implementations that allow SRV queries for names not in // the format defined by RFC 2782. Hence specific validations checks are @@ -529,7 +529,7 @@ func (sb *sandbox) ResolveName(name string, ipType int) ([]net.IP, bool) { // {a.b in network c.d}, // {a in network b.c.d}, - logrus.Debugf("Name To resolve: %v", name) + debugf("Name to resolve: %v", name) name = strings.TrimSuffix(name, ".") reqName := []string{name} networkName := []string{""} From 0f20b88efc813ff7340ae345ea01d27f0dcd241e Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Sun, 17 Nov 2019 19:06:55 -0800 Subject: [PATCH 2/2] More logs cleanup Use more `debugf` in places that look definitely like developer's debug which is most probably useless to an end user. Promote some debug errors that looks more like warnings to use Warnf rather than Debugf. Signed-off-by: Kir Kolyshkin --- resolver.go | 14 +++++++------- sandbox.go | 16 ++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/resolver.go b/resolver.go index b8a4840057..d9b34a3a07 100644 --- a/resolver.go +++ b/resolver.go @@ -247,7 +247,7 @@ func (r *resolver) handleIPQuery(name string, query *dns.Msg, ipType int) (*dns. if addr == nil && ipv6Miss { // Send a reply without any Answer sections - logrus.Debugf("[resolver] lookup name %s present without IPv6 address", name) + logrus.Warnf("[resolver] lookup name %s present without IPv6 address", name) resp := createRespMsg(query) return resp, nil } @@ -477,7 +477,7 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) { err = co.WriteMsg(query) if err != nil { r.forwardQueryEnd() - logrus.Debugf("[resolver] send to DNS server failed, %s", err) + logrus.Warnf("[resolver] send to DNS server failed, %s", err) continue } @@ -486,24 +486,24 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) { // client can retry over TCP if err != nil && err != dns.ErrTruncated { r.forwardQueryEnd() - logrus.Debugf("[resolver] read from DNS server failed, %s", err) + logrus.Warnf("[resolver] read from DNS server failed, %s", err) continue } r.forwardQueryEnd() if resp == nil { - logrus.Debugf("[resolver] external DNS %s:%s returned empty response for %q", proto, extDNS.IPStr, name) + logrus.Warnf("[resolver] external DNS %s:%s returned empty response for %q", proto, extDNS.IPStr, name) break } switch resp.Rcode { case dns.RcodeServerFailure, dns.RcodeRefused: // Server returned FAILURE: continue with the next external DNS server // Server returned REFUSED: this can be a transitional status, so continue with the next external DNS server - logrus.Debugf("[resolver] external DNS %s:%s responded with %s for %q", proto, extDNS.IPStr, statusString(resp.Rcode), name) + logrus.Warnf("[resolver] external DNS %s:%s responded with %s for %q", proto, extDNS.IPStr, statusString(resp.Rcode), name) continue case dns.RcodeNameError: // Server returned NXDOMAIN. Stop resolution if it's an authoritative answer (see RFC 8020: https://tools.ietf.org/html/rfc8020#section-2) - logrus.Debugf("[resolver] external DNS %s:%s responded with %s for %q", proto, extDNS.IPStr, statusString(resp.Rcode), name) + logrus.Warnf("[resolver] external DNS %s:%s responded with %s for %q", proto, extDNS.IPStr, statusString(resp.Rcode), name) if resp.Authoritative { break } @@ -512,7 +512,7 @@ func (r *resolver) ServeDNS(w dns.ResponseWriter, query *dns.Msg) { // All is well default: // Server gave some error. Log the error, and continue with the next external DNS server - logrus.Debugf("[resolver] external DNS %s:%s responded with %s (code %d) for %q", proto, extDNS.IPStr, statusString(resp.Rcode), resp.Rcode, name) + logrus.Warnf("[resolver] external DNS %s:%s responded with %s (code %d) for %q", proto, extDNS.IPStr, statusString(resp.Rcode), resp.Rcode, name) continue } answers := 0 diff --git a/sandbox.go b/sandbox.go index 28c53f83da..01286e4475 100644 --- a/sandbox.go +++ b/sandbox.go @@ -637,7 +637,7 @@ func (sb *sandbox) resolveName(req string, networkName string, epList []*endpoin func (sb *sandbox) SetKey(basePath string) error { start := time.Now() defer func() { - logrus.Debugf("sandbox set key processing took %s for container %s", time.Since(start), sb.ContainerID()) + debugf("sandbox set key processing took %s for container %s", time.Since(start), sb.ContainerID()) }() if basePath == "" { @@ -691,7 +691,7 @@ func (sb *sandbox) SetKey(basePath string) error { } func (sb *sandbox) EnableService() (err error) { - logrus.Debugf("EnableService %s START", sb.containerID) + debugf("EnableService %s START", sb.containerID) defer func() { if err != nil { sb.DisableService() @@ -705,12 +705,12 @@ func (sb *sandbox) EnableService() (err error) { ep.enableService() } } - logrus.Debugf("EnableService %s DONE", sb.containerID) + debugf("EnableService %s DONE", sb.containerID) return nil } func (sb *sandbox) DisableService() (err error) { - logrus.Debugf("DisableService %s START", sb.containerID) + debugf("DisableService %s START", sb.containerID) failedEps := []string{} defer func() { if len(failedEps) > 0 { @@ -726,7 +726,7 @@ func (sb *sandbox) DisableService() (err error) { ep.disableService() } } - logrus.Debugf("DisableService %s DONE", sb.containerID) + debugf("DisableService %s DONE", sb.containerID) return nil } @@ -735,7 +735,7 @@ func releaseOSSboxResources(osSbox osl.Sandbox, ep *endpoint) { // Only remove the interfaces owned by this endpoint from the sandbox. if ep.hasInterface(i.SrcName()) { if err := i.Remove(); err != nil { - logrus.Debugf("Remove interface %s failed: %v", i.SrcName(), err) + logrus.WithError(err).Warnf("Remove interface %s failed", i.SrcName()) } } } @@ -749,7 +749,7 @@ func releaseOSSboxResources(osSbox osl.Sandbox, ep *endpoint) { if len(vip) > 0 && lbModeIsDSR { ipNet := &net.IPNet{IP: vip, Mask: net.CIDRMask(32, 32)} if err := osSbox.RemoveAliasIP(osSbox.GetLoopbackIfaceName(), ipNet); err != nil { - logrus.WithError(err).Debugf("failed to remove virtual ip %v to loopback", ipNet) + logrus.WithError(err).Warnf("Failed to remove virtual ip %v to loopback", ipNet) } } @@ -760,7 +760,7 @@ func releaseOSSboxResources(osSbox osl.Sandbox, ep *endpoint) { // Remove non-interface routes. for _, r := range joinInfo.StaticRoutes { if err := osSbox.RemoveStaticRoute(r); err != nil { - logrus.Debugf("Remove route failed: %v", err) + logrus.WithError(err).Warnf("Remove route failed") } } }