Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ public class TrinoConnectorPluginManager implements PluginInstaller {
.add("org.openjdk.jol.")
.add("io.opentelemetry.api.")
.add("io.opentelemetry.context.")
.add("org.apache.hadoop.metrics2.")
.build();

private final ConcurrentMap<ConnectorName, ConnectorFactory> connectorFactories = new ConcurrentHashMap();
Expand Down
6 changes: 6 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/DorisFE.java
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,12 @@ public static void start(String dorisHomeDir, String pidDir, String[] args, Star

fuzzyConfigs();

// Replace Hadoop's DefaultMetricsSystem with a no-op to prevent memory leak.
// Each FileSystem.get() registers metrics (MetricCounterLong, MBeanAttributeInfo, etc.)
// that are never unregistered, causing unbounded growth. Doris FE does not use Hadoop metrics.
org.apache.hadoop.metrics2.lib.DefaultMetricsSystem
.setInstance(new org.apache.doris.common.NopMetricsSystem());

LOG.info("Doris FE starting...");

FrontendOptions.init();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ public void remove(long id) {
lock.writeLock().lock();
try {
dbTableBinlogEnableMap.remove(id);
tableTypeMap.remove(id);
} finally {
lock.writeLock().unlock();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,7 @@ public void removeDB(long dbId) {
} finally {
lock.writeLock().unlock();
}
binlogConfigCache.remove(dbId);
}

public void removeTable(long dbId, long tableId) {
Expand All @@ -721,6 +722,7 @@ public void removeTable(long dbId, long tableId) {
} finally {
lock.writeLock().unlock();
}
binlogConfigCache.remove(tableId);
}

private static void writeTBinlogToStream(DataOutputStream dos, TBinlog binlog) throws TException, IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,9 @@ public Env(boolean isCheckpointCatalog) {
this.lock = new MonitoredReentrantLock(true);
this.backupHandler = new BackupHandler(this);
this.metaDir = Config.meta_dir;
this.publishVersionDaemon = new PublishVersionDaemon();
if (!isCheckpointCatalog) {
this.publishVersionDaemon = new PublishVersionDaemon();
}
this.deleteHandler = new DeleteHandler();
this.dbUsedDataQuotaInfoCollector = new DbUsedDataQuotaInfoCollector();
this.partitionInfoCollector = new PartitionInfoCollector();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.common;

import org.apache.hadoop.metrics2.MetricsSink;
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.MetricsSystem;

/**
* A no-op MetricsSystem implementation to prevent Hadoop metrics2 memory leak.
*
* Each Hadoop FileSystem instance registers metrics with the global DefaultMetricsSystem,
* creating MetricsSourceAdapter and JMX MBeans that are never unregistered on close().
* Since Doris FE does not consume Hadoop metrics, we replace the default with this no-op
* to prevent unbounded accumulation of MetricCounterLong, MBeanAttributeInfo, etc.
*/
public class NopMetricsSystem extends MetricsSystem {

@Override
public MetricsSystem init(String prefix) {
return this;
}

@Override
public <T> T register(String name, String desc, T source) {
return source;
}

@Override
public void unregisterSource(String name) {}

@Override
public MetricsSource getSource(String name) {
return null;
}

@Override
public <T extends MetricsSink> T register(String name, String desc, T sink) {
return sink;
}

@Override
public void register(Callback callback) {}

@Override
public void publishMetricsNow() {}

@Override
public boolean shutdown() {
return true;
}

@Override
public void start() {}

@Override
public void stop() {}

@Override
public void startMetricsMBeans() {}

@Override
public void stopMetricsMBeans() {}

@Override
public String currentConfig() {
return "";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@
*/
public class ChildFirstClassLoader extends URLClassLoader {

// Hadoop metrics2 classes must use parent-first delegation so that all ClassLoaders
// share the main ClassLoader's NopMetricsSystem, preventing per-ClassLoader metrics leak.
private static final String HADOOP_METRICS_PREFIX = "org.apache.hadoop.metrics2.";

// A list of URLs pointing to JAR files
private final List<URL> jarURLs;

Expand Down Expand Up @@ -79,6 +83,10 @@ public ChildFirstClassLoader(URL[] urls, ClassLoader parent) throws IOException,
*/
@Override
protected Class<?> loadClass(String name, boolean resolve) throws ClassNotFoundException {
// Parent-first for Hadoop metrics2 — share NopMetricsSystem from main ClassLoader
if (name.startsWith(HADOOP_METRICS_PREFIX)) {
return super.loadClass(name, resolve);
}
// Child-First mechanism: try to find the class locally first
try {
return findClass(name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,16 @@
import org.apache.doris.fs.remote.RemoteFileSystem;

import com.github.benmanes.caffeine.cache.LoadingCache;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.Objects;
import java.util.OptionalLong;

public class FileSystemCache {

private static final Logger LOG = LogManager.getLogger(FileSystemCache.class);

private final LoadingCache<FileSystemCacheKey, RemoteFileSystem> fileSystemCache;

public FileSystemCache() {
Expand All @@ -39,7 +43,17 @@ public FileSystemCache() {
Config.max_remote_file_system_cache_num,
false,
null);
fileSystemCache = fsCacheFactory.buildCache(this::loadFileSystem);
// Use sync RemovalListener to close evicted RemoteFileSystem and release underlying resources
// (e.g., Hadoop FileSystem handles). Without this, evicted entries leak native resources.
fileSystemCache = fsCacheFactory.buildCacheWithSyncRemovalListener(this::loadFileSystem, (key, fs, cause) -> {
if (fs != null) {
try {
fs.close();
} catch (Exception e) {
LOG.warn("Failed to close RemoteFileSystem on cache eviction", e);
}
}
});
}

private RemoteFileSystem loadFileSystem(FileSystemCacheKey key) {
Expand Down
Loading