diff --git a/fe/fe-common/src/main/java/org/apache/doris/trinoconnector/TrinoConnectorPluginManager.java b/fe/fe-common/src/main/java/org/apache/doris/trinoconnector/TrinoConnectorPluginManager.java index 86ec56874713d6..ca0056297117e8 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/trinoconnector/TrinoConnectorPluginManager.java +++ b/fe/fe-common/src/main/java/org/apache/doris/trinoconnector/TrinoConnectorPluginManager.java @@ -54,6 +54,7 @@ public class TrinoConnectorPluginManager implements PluginInstaller { .add("org.openjdk.jol.") .add("io.opentelemetry.api.") .add("io.opentelemetry.context.") + .add("org.apache.hadoop.metrics2.") .build(); private final ConcurrentMap connectorFactories = new ConcurrentHashMap(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/DorisFE.java b/fe/fe-core/src/main/java/org/apache/doris/DorisFE.java index ace11547572498..89d8bea48236c5 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/DorisFE.java +++ b/fe/fe-core/src/main/java/org/apache/doris/DorisFE.java @@ -188,6 +188,12 @@ public static void start(String dorisHomeDir, String pidDir, String[] args, Star fuzzyConfigs(); + // Replace Hadoop's DefaultMetricsSystem with a no-op to prevent memory leak. + // Each FileSystem.get() registers metrics (MetricCounterLong, MBeanAttributeInfo, etc.) + // that are never unregistered, causing unbounded growth. Doris FE does not use Hadoop metrics. + org.apache.hadoop.metrics2.lib.DefaultMetricsSystem + .setInstance(new org.apache.doris.common.NopMetricsSystem()); + LOG.info("Doris FE starting..."); FrontendOptions.init(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogConfigCache.java b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogConfigCache.java index 2cf7f065770ee6..7c056679901fec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogConfigCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogConfigCache.java @@ -183,6 +183,7 @@ public void remove(long id) { lock.writeLock().lock(); try { dbTableBinlogEnableMap.remove(id); + tableTypeMap.remove(id); } finally { lock.writeLock().unlock(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java index 0d17efdb6de861..d7857bde307a63 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/binlog/BinlogManager.java @@ -709,6 +709,7 @@ public void removeDB(long dbId) { } finally { lock.writeLock().unlock(); } + binlogConfigCache.remove(dbId); } public void removeTable(long dbId, long tableId) { @@ -721,6 +722,7 @@ public void removeTable(long dbId, long tableId) { } finally { lock.writeLock().unlock(); } + binlogConfigCache.remove(tableId); } private static void writeTBinlogToStream(DataOutputStream dos, TBinlog binlog) throws TException, IOException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 07fcbe27c0c62c..6988d45ee4715f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -711,7 +711,9 @@ public Env(boolean isCheckpointCatalog) { this.lock = new MonitoredReentrantLock(true); this.backupHandler = new BackupHandler(this); this.metaDir = Config.meta_dir; - this.publishVersionDaemon = new PublishVersionDaemon(); + if (!isCheckpointCatalog) { + this.publishVersionDaemon = new PublishVersionDaemon(); + } this.deleteHandler = new DeleteHandler(); this.dbUsedDataQuotaInfoCollector = new DbUsedDataQuotaInfoCollector(); this.partitionInfoCollector = new PartitionInfoCollector(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/NopMetricsSystem.java b/fe/fe-core/src/main/java/org/apache/doris/common/NopMetricsSystem.java new file mode 100644 index 00000000000000..b04c4acc3b1450 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/common/NopMetricsSystem.java @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.common; + +import org.apache.hadoop.metrics2.MetricsSink; +import org.apache.hadoop.metrics2.MetricsSource; +import org.apache.hadoop.metrics2.MetricsSystem; + +/** + * A no-op MetricsSystem implementation to prevent Hadoop metrics2 memory leak. + * + * Each Hadoop FileSystem instance registers metrics with the global DefaultMetricsSystem, + * creating MetricsSourceAdapter and JMX MBeans that are never unregistered on close(). + * Since Doris FE does not consume Hadoop metrics, we replace the default with this no-op + * to prevent unbounded accumulation of MetricCounterLong, MBeanAttributeInfo, etc. + */ +public class NopMetricsSystem extends MetricsSystem { + + @Override + public MetricsSystem init(String prefix) { + return this; + } + + @Override + public T register(String name, String desc, T source) { + return source; + } + + @Override + public void unregisterSource(String name) {} + + @Override + public MetricsSource getSource(String name) { + return null; + } + + @Override + public T register(String name, String desc, T sink) { + return sink; + } + + @Override + public void register(Callback callback) {} + + @Override + public void publishMetricsNow() {} + + @Override + public boolean shutdown() { + return true; + } + + @Override + public void start() {} + + @Override + public void stop() {} + + @Override + public void startMetricsMBeans() {} + + @Override + public void stopMetricsMBeans() {} + + @Override + public String currentConfig() { + return ""; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/ChildFirstClassLoader.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/ChildFirstClassLoader.java index ad3b0dfe77dd67..21aa339df447ee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/ChildFirstClassLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/ChildFirstClassLoader.java @@ -45,6 +45,10 @@ */ public class ChildFirstClassLoader extends URLClassLoader { + // Hadoop metrics2 classes must use parent-first delegation so that all ClassLoaders + // share the main ClassLoader's NopMetricsSystem, preventing per-ClassLoader metrics leak. + private static final String HADOOP_METRICS_PREFIX = "org.apache.hadoop.metrics2."; + // A list of URLs pointing to JAR files private final List jarURLs; @@ -79,6 +83,10 @@ public ChildFirstClassLoader(URL[] urls, ClassLoader parent) throws IOException, */ @Override protected Class loadClass(String name, boolean resolve) throws ClassNotFoundException { + // Parent-first for Hadoop metrics2 — share NopMetricsSystem from main ClassLoader + if (name.startsWith(HADOOP_METRICS_PREFIX)) { + return super.loadClass(name, resolve); + } // Child-First mechanism: try to find the class locally first try { return findClass(name); diff --git a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java index 20dea19bb29c7f..6b7903797c5fd7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/fs/FileSystemCache.java @@ -23,12 +23,16 @@ import org.apache.doris.fs.remote.RemoteFileSystem; import com.github.benmanes.caffeine.cache.LoadingCache; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.util.Objects; import java.util.OptionalLong; public class FileSystemCache { + private static final Logger LOG = LogManager.getLogger(FileSystemCache.class); + private final LoadingCache fileSystemCache; public FileSystemCache() { @@ -39,7 +43,17 @@ public FileSystemCache() { Config.max_remote_file_system_cache_num, false, null); - fileSystemCache = fsCacheFactory.buildCache(this::loadFileSystem); + // Use sync RemovalListener to close evicted RemoteFileSystem and release underlying resources + // (e.g., Hadoop FileSystem handles). Without this, evicted entries leak native resources. + fileSystemCache = fsCacheFactory.buildCacheWithSyncRemovalListener(this::loadFileSystem, (key, fs, cause) -> { + if (fs != null) { + try { + fs.close(); + } catch (Exception e) { + LOG.warn("Failed to close RemoteFileSystem on cache eviction", e); + } + } + }); } private RemoteFileSystem loadFileSystem(FileSystemCacheKey key) {