From cbcb804ac266c0fe648a5c7ca47e944966dd68ae Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Tue, 30 Dec 2025 18:55:15 -0500 Subject: [PATCH 01/13] add a recovery jmh benchmark --- solr/benchmark/jmh.ps1 | 116 +++++++++ .../apache/solr/bench/MiniClusterState.java | 10 +- .../bench/lifecycle/ReplicationRecovery.java | 242 ++++++++++++++++++ 3 files changed, 367 insertions(+), 1 deletion(-) create mode 100644 solr/benchmark/jmh.ps1 create mode 100644 solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java diff --git a/solr/benchmark/jmh.ps1 b/solr/benchmark/jmh.ps1 new file mode 100644 index 000000000000..7a8cae366e93 --- /dev/null +++ b/solr/benchmark/jmh.ps1 @@ -0,0 +1,116 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +param( + [string]$SysProps = "" +) + +# Get all arguments passed after the script name (remaining args go to JMH) +$JmhArgs = $args + +$ErrorActionPreference = "Stop" + +$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$gradlewDir = Join-Path $scriptDir "..\..\" +$gradlew = Join-Path $gradlewDir "gradlew.bat" + +# Check if lib directory exists +$libDir = Join-Path $scriptDir "lib" +if (Test-Path $libDir) { + Write-Host "Using lib directory for classpath..." + $classpath = "$libDir\*;$scriptDir\build\classes\java\main" +} else { + Write-Host "Getting classpath from gradle..." + + # Build the jars first + Push-Location $gradlewDir + try { + & $gradlew -q jar + if ($LASTEXITCODE -ne 0) { + Write-Error "Gradle build failed" + exit 1 + } + Write-Host "Gradle build done" + } finally { + Pop-Location + } + + # Get classpath from gradle + Push-Location $scriptDir + try { + $classpath = & $gradlew -q echoCp + if ($LASTEXITCODE -ne 0) { + Write-Error "Failed to get classpath from gradle" + exit 1 + } + } finally { + Pop-Location + } +} + +Write-Host "Running JMH with args: $JmhArgs" + +# JVM Arguments +$jvmArgs = @( + "-jvmArgs", "-Djmh.shutdownTimeout=5", + "-jvmArgs", "-Djmh.shutdownTimeout.step=3", + "-jvmArgs", "-Djava.security.egd=file:/dev/./urandom", + "-jvmArgs", "-XX:+UnlockDiagnosticVMOptions", + "-jvmArgs", "-XX:+DebugNonSafepoints", + "-jvmArgs", "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED" +) + +# GC Arguments +$gcArgs = @( + "-jvmArgs", "-XX:+UseG1GC", + "-jvmArgs", "-XX:+ParallelRefProcEnabled" +) + +# Logging Arguments +$loggingArgs = @( + "-jvmArgs", "-Dlog4jConfigurationFile=./log4j2-bench.xml", + "-jvmArgs", "-Dlog4j2.is.webapp=false", + "-jvmArgs", "-Dlog4j2.garbagefreeThreadContextMap=true", + "-jvmArgs", "-Dlog4j2.enableDirectEncoders=true", + "-jvmArgs", "-Dlog4j2.enable.threadlocals=true" +) + +# User-provided system properties +$userSysPropsArgs = @() +if ($SysProps -ne "") { + # Split on whitespace, handling -D properties + $props = $SysProps -split '\s+(?=-D)' | Where-Object { $_ -ne "" } + foreach ($prop in $props) { + $userSysPropsArgs += "-jvmArgs", $prop.Trim() + } + Write-Host "User system properties: $($props -join ', ')" +} + +# Build the full argument list +$allArgs = @( + "-cp", $classpath, + "--add-opens=java.base/java.io=ALL-UNNAMED", + "-Djdk.module.illegalAccess.silent=true", + "org.openjdk.jmh.Main" +) + $jvmArgs + $loggingArgs + $gcArgs + $userSysPropsArgs + $JmhArgs + +# Run JMH +Write-Host "Executing: java $($allArgs -join ' ')" +& java $allArgs + +$exitCode = $LASTEXITCODE +Write-Host "JMH benchmarks done (exit code: $exitCode)" +exit $exitCode + diff --git a/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java b/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java index 4c4946a3ae86..e5a475a9c120 100755 --- a/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java @@ -36,6 +36,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.jetty.HttpJettySolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; @@ -79,7 +80,7 @@ public static class MiniClusterBenchState { public String zkHost; /** The Cluster. */ - MiniSolrCloudCluster cluster; + public MiniSolrCloudCluster cluster; /** The Client. */ public HttpJettySolrClient client; @@ -393,6 +394,7 @@ public void index(String collection, Docs docs, int docCount, boolean parallel) private void indexParallel(String collection, Docs docs, int docCount) throws InterruptedException { Meter meter = new Meter(); + AtomicReference indexingException = new AtomicReference<>(); ExecutorService executorService = Executors.newFixedThreadPool( Runtime.getRuntime().availableProcessors(), @@ -429,6 +431,7 @@ public void run() { try { client.requestWithBaseUrl(url, updateRequest, collection); } catch (Exception e) { + indexingException.compareAndSet(null, e); throw new RuntimeException(e); } } @@ -444,6 +447,11 @@ public void run() { } scheduledExecutor.shutdown(); + + Exception ex = indexingException.get(); + if (ex != null) { + throw new RuntimeException("Indexing failed", ex); + } } private void indexBatch(String collection, Docs docs, int docCount, int batchSize) diff --git a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java new file mode 100644 index 000000000000..b95f01c5d55d --- /dev/null +++ b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java @@ -0,0 +1,242 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one or more + * * contributor license agreements. See the NOTICE file distributed with + * * this work for additional information regarding copyright ownership. + * * The ASF licenses this file to You under the Apache License, Version 2.0 + * * (the "License"); you may not use this file except in compliance with + * * the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * + */ +package org.apache.solr.bench.lifecycle; + +import static org.apache.solr.bench.BaseBenchState.log; +import static org.apache.solr.bench.Docs.docs; +import static org.apache.solr.bench.generators.SourceDSL.integers; +import static org.apache.solr.bench.generators.SourceDSL.strings; + +import java.util.concurrent.TimeUnit; +import org.apache.solr.bench.Docs; +import org.apache.solr.bench.MiniClusterState; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * A benchmark to measure shard replication/recovery performance. + * + *

This benchmark creates a collection with 12 shards on a single node, indexes approximately 1GB + * of data, then adds replicas on a second node to trigger recovery. It measures the time taken for + * all replicas to become active. + */ +@BenchmarkMode(Mode.SingleShotTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@Threads(1) +@Warmup(iterations = 0) +@Measurement(iterations = 1) +@Fork(value = 1) +public class ReplicationRecovery { + + @State(Scope.Benchmark) + public static class BenchState { + + static final String COLLECTION = "replicationTestCollection"; + + @Param({"false", "true"}) + boolean useHttp1; + + @Param("12") + int numShards; + + @Param("100") + int pollIntervalMs; + + // Target ~1GB of data. With docs averaging ~10KB each, we need ~100,000 docs + // Adjust this based on actual doc size to achieve ~1GB + @Param("100000") + int docCount; + + // Auto commit interval in milliseconds + @Param("10000") + int autoCommitMaxTime; + + private final Docs largeDocs; + private String secondNodeUrl; + + public BenchState() { + // Create docs with substantial content to generate ~10KB per doc + // This will help us reach ~1GB with 100k docs + largeDocs = + docs() + .field("id", integers().incrementing()) + // Multiple large text fields to bulk up document size + .field("text1_t", strings().basicLatinAlphabet().multi(50).ofLengthBetween(100, 200)) + .field("text2_t", strings().basicLatinAlphabet().multi(50).ofLengthBetween(100, 200)) + .field("text3_t", strings().basicLatinAlphabet().multi(30).ofLengthBetween(80, 150)) + .field("text4_t", strings().basicLatinAlphabet().multi(30).ofLengthBetween(80, 150)) + .field("content_t", strings().basicLatinAlphabet().multi(100).ofLengthBetween(50, 100)); + } + + @Setup(Level.Trial) + public void doSetup(MiniClusterState.MiniClusterBenchState miniClusterState) throws Exception { + log("Setting up ReplicationRecovery benchmark..."); + + // Set autoCommit.maxTime before starting the cluster + System.setProperty("autoCommit.maxTime", String.valueOf(autoCommitMaxTime)); + log("Set autoCommit.maxTime to " + autoCommitMaxTime + "ms"); + + // Configure HTTP version + miniClusterState.setUseHttp1(useHttp1); + log("Using HTTP/1.1: " + useHttp1); + + // Start cluster with 2 nodes + miniClusterState.startMiniCluster(2); + + // Store the second node URL for later use + secondNodeUrl = miniClusterState.nodes.get(1); + log("First node URL: " + miniClusterState.nodes.get(0)); + log("Second node URL: " + secondNodeUrl); + + // Create collection with all shards on the first node only (1 replica each) + log("Creating collection with " + numShards + " shards on first node..."); + CollectionAdminRequest.Create createRequest = + CollectionAdminRequest.createCollection(COLLECTION, "conf", numShards, 1); + // Force all replicas to be created on the first node + // Node name format is host:port_solr (with underscore, not slash) + String firstNode = + miniClusterState.nodes.get(0).replace("http://", "").replace("https://", "").replace("/", "_"); + log("First node name for createNodeSet: " + firstNode); + createRequest.setCreateNodeSet(firstNode); + miniClusterState.client.requestWithBaseUrl( + miniClusterState.nodes.get(0), createRequest, null); + + miniClusterState.cluster.waitForActiveCollection( + COLLECTION, 30, TimeUnit.SECONDS, numShards, numShards); + + log("Collection created. Indexing " + docCount + " documents (~1GB of data)..."); + + // Index documents + miniClusterState.index(COLLECTION, largeDocs, docCount, true); + + // Wait for autoCommit to ensure all data is committed + log("Waiting for autoCommit (" + autoCommitMaxTime + "ms + buffer)..."); + Thread.sleep(autoCommitMaxTime + 2000); + + log("Setup complete. Ready to benchmark replication recovery."); + } + } + + /** + * Benchmark that measures the time to replicate all shards to a second node. + * + *

This adds a replica for each of the 12 shards to the second node and polls the cluster state + * every 100ms until all replicas are active. + */ + @Benchmark + public long replicateShards( + MiniClusterState.MiniClusterBenchState miniClusterState, + BenchState state, + Blackhole blackhole) + throws Exception { + + long startTime = System.currentTimeMillis(); + int totalReplicas = state.numShards * 2; // Original + new replicas + + log("Starting replication of " + state.numShards + " shards to second node..."); + + // Get the second node name (without http prefix, with underscore) for the replica placement + String secondNode = state.secondNodeUrl.replace("http://", "").replace("https://", "").replace("/", "_"); + + // Add a replica for each shard to the second node + for (int i = 1; i <= state.numShards; i++) { + String shardName = "shard" + i; + CollectionAdminRequest.AddReplica addReplica = + CollectionAdminRequest.addReplicaToShard(BenchState.COLLECTION, shardName); + addReplica.setNode(secondNode); + // Send request asynchronously to allow parallel recovery + addReplica.setAsyncId("add-replica-" + shardName); + miniClusterState.client.requestWithBaseUrl(miniClusterState.nodes.get(0), addReplica, null); + } + + log("All add-replica requests submitted. Polling for recovery completion..."); + + // Poll cluster state until all replicas are active + int pollCount = 0; + boolean allActive = false; + long lastLogTime = startTime; + + while (!allActive) { + Thread.sleep(state.pollIntervalMs); + pollCount++; + + // Refresh and check cluster state + miniClusterState.cluster.getZkStateReader().forceUpdateCollection(BenchState.COLLECTION); + DocCollection collection = + miniClusterState.cluster.getZkStateReader().getCollection(BenchState.COLLECTION); + + int activeCount = 0; + int recoveringCount = 0; + int downCount = 0; + + for (Slice slice : collection.getSlices()) { + for (Replica replica : slice.getReplicas()) { + Replica.State replicaState = replica.getState(); + if (replicaState == Replica.State.ACTIVE) { + activeCount++; + } else if (replicaState == Replica.State.RECOVERING) { + recoveringCount++; + } else { + downCount++; + } + } + } + + // Log progress every 5 seconds + long now = System.currentTimeMillis(); + if (now - lastLogTime >= 5000) { + log( + String.format( + "Recovery progress: %d active, %d recovering, %d down (total needed: %d)", + activeCount, recoveringCount, downCount, totalReplicas)); + lastLogTime = now; + } + + allActive = (activeCount == totalReplicas); + blackhole.consume(collection); + } + + long endTime = System.currentTimeMillis(); + long duration = endTime - startTime; + + log( + String.format( + "Replication complete! All %d replicas active. Duration: %d ms, Poll count: %d", + totalReplicas, duration, pollCount)); + + return duration; + } +} + From cf7d2e651efec6724e052bb397f6cb7f0e1e15fa Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 12 Jan 2026 15:21:04 -0500 Subject: [PATCH 02/13] parallelized indexing + docker build --- solr/benchmark/jmh.sh | 38 ++++---- .../apache/solr/bench/MiniClusterState.java | 93 +++++++++++++++++++ .../bench/lifecycle/ReplicationRecovery.java | 65 ++++++++----- 3 files changed, 157 insertions(+), 39 deletions(-) diff --git a/solr/benchmark/jmh.sh b/solr/benchmark/jmh.sh index 18f9875da192..88a3fa2c8f34 100755 --- a/solr/benchmark/jmh.sh +++ b/solr/benchmark/jmh.sh @@ -20,23 +20,27 @@ base_dir=$(dirname "$0") if [ "${base_dir}" == "." ]; then gradlew_dir="../.." -else - echo "Benchmarks need to be run from the 'solr/benchmark' directory" - exit -fi + log4j_config="./log4j2-bench.xml" - -if [ -d "lib" ] -then - echo "Using lib directory for classpath..." - classpath="lib/*:build/classes/java/main" + if [ -d "lib" ]; then + echo "Using lib directory for classpath..." + classpath="lib/*" + else + echo "Getting classpath from gradle..." + # --no-daemon + gradleCmd="${gradlew_dir}/gradlew" + $gradleCmd -q -p ../../ jar + echo "gradle build done" + classpath=$($gradleCmd -q echoCp) + fi +elif [ "${base_dir}" == "/opt/benchmark" ]; then + # Docker container mode + log4j_config="/opt/benchmark/log4j2-bench.xml" + classpath="/opt/benchmark/lib/*" + echo "Running in Docker container mode..." else - echo "Getting classpath from gradle..." - # --no-daemon - gradleCmd="${gradlew_dir}/gradlew" - $gradleCmd -q -p ../../ jar - echo "gradle build done" - classpath=$($gradleCmd -q echoCp) + echo "Benchmarks need to be run from the 'solr/benchmark' directory" + exit 1 fi # shellcheck disable=SC2145 @@ -54,8 +58,8 @@ echo "running JMH with args: $@" jvmArgs="-jvmArgs -Djmh.shutdownTimeout=5 -jvmArgs -Djmh.shutdownTimeout.step=3 -jvmArgs -Djava.security.egd=file:/dev/./urandom -jvmArgs -XX:+UnlockDiagnosticVMOptions -jvmArgs -XX:+DebugNonSafepoints -jvmArgs --add-opens=java.base/java.lang.reflect=ALL-UNNAMED" gcArgs="-jvmArgs -XX:+UseG1GC -jvmArgs -XX:+ParallelRefProcEnabled" -# -jvmArgs -Dlog4j2.debug -loggingArgs="-jvmArgs -Dlog4jConfigurationFile=./log4j2-bench.xml -jvmArgs -Dlog4j2.is.webapp=false -jvmArgs -Dlog4j2.garbagefreeThreadContextMap=true -jvmArgs -Dlog4j2.enableDirectEncoders=true -jvmArgs -Dlog4j2.enable.threadlocals=true" +# -jvmArgs -Dlog4j2.debug +loggingArgs="-jvmArgs -Dlog4jConfigurationFile=${log4j_config} -jvmArgs -Dlog4j2.is.webapp=false -jvmArgs -Dlog4j2.garbagefreeThreadContextMap=true -jvmArgs -Dlog4j2.enableDirectEncoders=true -jvmArgs -Dlog4j2.enable.threadlocals=true" #set -x diff --git a/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java b/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java index e5a475a9c120..6aee2e0e907d 100755 --- a/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java @@ -479,6 +479,99 @@ private void indexBatch(String collection, Docs docs, int docCount, int batchSiz log(meter.getCount() + " docs at " + (long) meter.getMeanRate() + " doc/s"); } + /** + * Index documents using multiple threads, each sending batches. + * + * @param collection the collection + * @param docs the docs generator + * @param docCount total number of docs to index + * @param numThreads number of parallel threads + * @param batchSize docs per batch/request + */ + @SuppressForbidden(reason = "This module does not need to deal with logging context") + public void indexParallelBatched( + String collection, Docs docs, int docCount, int numThreads, int batchSize) + throws InterruptedException { + Meter meter = new Meter(); + AtomicReference indexingException = new AtomicReference<>(); + + ExecutorService executorService = + Executors.newFixedThreadPool(numThreads, new SolrNamedThreadFactory("SolrJMH Indexer")); + + // Progress logging + ScheduledExecutorService scheduledExecutor = + Executors.newSingleThreadScheduledExecutor( + new SolrNamedThreadFactory("SolrJMH Indexer Progress")); + scheduledExecutor.scheduleAtFixedRate( + () -> { + if (meter.getCount() >= docCount) { + scheduledExecutor.shutdown(); + } else { + log(meter.getCount() + "/" + docCount + " docs at " + (long) meter.getMeanRate() + " doc/s"); + } + }, + 5, + 5, + TimeUnit.SECONDS); + + // Split work across threads + int docsPerThread = docCount / numThreads; + int remainder = docCount % numThreads; + + for (int t = 0; t < numThreads; t++) { + final int threadDocsCount = docsPerThread + (t < remainder ? 1 : 0); + + executorService.execute(() -> { + List batch = new ArrayList<>(batchSize); + + for (int i = 0; i < threadDocsCount; i++) { + batch.add(docs.inputDocument()); + + if (batch.size() >= batchSize) { + sendBatch(collection, batch, indexingException); + meter.mark(batch.size()); + batch.clear(); + } + } + + // Send remaining docs + if (!batch.isEmpty()) { + sendBatch(collection, batch, indexingException); + meter.mark(batch.size()); + } + }); + } + + executorService.shutdown(); + boolean terminated = false; + while (!terminated) { + terminated = executorService.awaitTermination(10, TimeUnit.MINUTES); + } + scheduledExecutor.shutdown(); + + Exception ex = indexingException.get(); + if (ex != null) { + throw new RuntimeException("Indexing failed", ex); + } + + log(meter.getCount() + " docs indexed at " + (long) meter.getMeanRate() + " doc/s"); + } + + private void sendBatch( + String collection, + List batch, + AtomicReference indexingException) { + try { + UpdateRequest updateRequest = new UpdateRequest(); + updateRequest.add(batch); + // Use first node - simpler and avoids thread-safety issues with random node selection + client.requestWithBaseUrl(nodes.get(0), updateRequest, collection); + } catch (Exception e) { + indexingException.compareAndSet(null, e); + throw new RuntimeException(e); + } + } + /** * Wait for merges. * diff --git a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java index b95f01c5d55d..8063e23234cc 100644 --- a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java @@ -65,39 +65,55 @@ public static class BenchState { static final String COLLECTION = "replicationTestCollection"; - @Param({"false", "true"}) - boolean useHttp1; - @Param("12") int numShards; @Param("100") int pollIntervalMs; - // Target ~1GB of data. With docs averaging ~10KB each, we need ~100,000 docs - // Adjust this based on actual doc size to achieve ~1GB - @Param("100000") + // Number of docs to index. Each doc is ~10KB. + // Use -p docCount=100000 for ~1GB of data. + @Param("1000") int docCount; // Auto commit interval in milliseconds @Param("10000") int autoCommitMaxTime; + // Number of threads for parallel indexing (0 = sequential) + @Param("4") + int indexThreads; + + // Batch size for indexing (docs per request) + @Param("1000") + int batchSize; + + // Replica type for new replicas: NRT, TLOG, or PULL + // PULL replicas just copy segments (fastest for replication) + // TLOG replicas replay transaction log + // NRT replicas do full local indexing + @Param("NRT") + String replicaType; + private final Docs largeDocs; private String secondNodeUrl; public BenchState() { - // Create docs with substantial content to generate ~10KB per doc - // This will help us reach ~1GB with 100k docs + // Create docs with substantial content to generate ~100KB per doc + // This will help us reach ~1GB with 10k docs largeDocs = docs() .field("id", integers().incrementing()) - // Multiple large text fields to bulk up document size - .field("text1_t", strings().basicLatinAlphabet().multi(50).ofLengthBetween(100, 200)) - .field("text2_t", strings().basicLatinAlphabet().multi(50).ofLengthBetween(100, 200)) - .field("text3_t", strings().basicLatinAlphabet().multi(30).ofLengthBetween(80, 150)) - .field("text4_t", strings().basicLatinAlphabet().multi(30).ofLengthBetween(80, 150)) - .field("content_t", strings().basicLatinAlphabet().multi(100).ofLengthBetween(50, 100)); + // Multiple large text fields to bulk up document size to ~100KB + .field("text1_t", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text2_t", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text3_t", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text4_t", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text5_t", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("text6_t", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("text7_t", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("text8_t", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("content_t", strings().basicLatinAlphabet().multi(200).ofLengthBetween(100, 200)); } @Setup(Level.Trial) @@ -108,10 +124,6 @@ public void doSetup(MiniClusterState.MiniClusterBenchState miniClusterState) thr System.setProperty("autoCommit.maxTime", String.valueOf(autoCommitMaxTime)); log("Set autoCommit.maxTime to " + autoCommitMaxTime + "ms"); - // Configure HTTP version - miniClusterState.setUseHttp1(useHttp1); - log("Using HTTP/1.1: " + useHttp1); - // Start cluster with 2 nodes miniClusterState.startMiniCluster(2); @@ -136,10 +148,17 @@ public void doSetup(MiniClusterState.MiniClusterBenchState miniClusterState) thr miniClusterState.cluster.waitForActiveCollection( COLLECTION, 30, TimeUnit.SECONDS, numShards, numShards); - log("Collection created. Indexing " + docCount + " documents (~1GB of data)..."); + log("Collection created. Indexing " + docCount + " documents with " + indexThreads + " threads, batch size " + batchSize + "..."); // Index documents - miniClusterState.index(COLLECTION, largeDocs, docCount, true); + long indexStart = System.currentTimeMillis(); + if (indexThreads > 0) { + miniClusterState.indexParallelBatched(COLLECTION, largeDocs, docCount, indexThreads, batchSize); + } else { + miniClusterState.index(COLLECTION, largeDocs, docCount, false); + } + long indexTime = System.currentTimeMillis() - indexStart; + log("Indexing completed in " + indexTime + "ms"); // Wait for autoCommit to ensure all data is committed log("Waiting for autoCommit (" + autoCommitMaxTime + "ms + buffer)..."); @@ -165,7 +184,9 @@ public long replicateShards( long startTime = System.currentTimeMillis(); int totalReplicas = state.numShards * 2; // Original + new replicas - log("Starting replication of " + state.numShards + " shards to second node..."); + // Parse replica type + Replica.Type type = Replica.Type.valueOf(state.replicaType.toUpperCase()); + log("Starting replication of " + state.numShards + " shards to second node (replica type: " + type + ")..."); // Get the second node name (without http prefix, with underscore) for the replica placement String secondNode = state.secondNodeUrl.replace("http://", "").replace("https://", "").replace("/", "_"); @@ -174,7 +195,7 @@ public long replicateShards( for (int i = 1; i <= state.numShards; i++) { String shardName = "shard" + i; CollectionAdminRequest.AddReplica addReplica = - CollectionAdminRequest.addReplicaToShard(BenchState.COLLECTION, shardName); + CollectionAdminRequest.addReplicaToShard(BenchState.COLLECTION, shardName, type); addReplica.setNode(secondNode); // Send request asynchronously to allow parallel recovery addReplica.setAsyncId("add-replica-" + shardName); From 67b9ae7aa0076789a355cc592fd77b5d10a8479f Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 12 Jan 2026 16:12:25 -0500 Subject: [PATCH 03/13] text to string fields --- .../bench/lifecycle/ReplicationRecovery.java | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java index 8063e23234cc..40b98b440cc3 100644 --- a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java @@ -101,19 +101,20 @@ public static class BenchState { public BenchState() { // Create docs with substantial content to generate ~100KB per doc // This will help us reach ~1GB with 10k docs + // Using _s (string) fields instead of _t (text) to avoid analysis overhead largeDocs = docs() .field("id", integers().incrementing()) - // Multiple large text fields to bulk up document size to ~100KB - .field("text1_t", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text2_t", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text3_t", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text4_t", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text5_t", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("text6_t", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("text7_t", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("text8_t", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("content_t", strings().basicLatinAlphabet().multi(200).ofLengthBetween(100, 200)); + // Multiple large string fields to bulk up document size to ~100KB (no analysis) + .field("text1_s", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text2_s", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text3_s", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text4_s", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text5_s", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("text6_s", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("text7_s", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("text8_s", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("content_s", strings().basicLatinAlphabet().multi(200).ofLengthBetween(100, 200)); } @Setup(Level.Trial) From 6ecc35dcbd22dadc0dc94d6f1b856818efdbef05 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 16 Jan 2026 22:55:20 -0500 Subject: [PATCH 04/13] text to string fields --- .../bench/lifecycle/ReplicationRecovery.java | 18 +++++++++--------- .../configs/cloud-minimal/conf/schema.xml | 1 + 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java index 40b98b440cc3..2fc492432ffe 100644 --- a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java @@ -106,15 +106,15 @@ public BenchState() { docs() .field("id", integers().incrementing()) // Multiple large string fields to bulk up document size to ~100KB (no analysis) - .field("text1_s", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text2_s", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text3_s", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text4_s", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text5_s", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("text6_s", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("text7_s", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("text8_s", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("content_s", strings().basicLatinAlphabet().multi(200).ofLengthBetween(100, 200)); + .field("text1_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text2_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text3_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text4_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field("text5_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("text6_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("text7_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("text8_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) + .field("content_ss", strings().basicLatinAlphabet().multi(200).ofLengthBetween(100, 200)); } @Setup(Level.Trial) diff --git a/solr/benchmark/src/resources/configs/cloud-minimal/conf/schema.xml b/solr/benchmark/src/resources/configs/cloud-minimal/conf/schema.xml index e517aea59307..09ae5fa43997 100644 --- a/solr/benchmark/src/resources/configs/cloud-minimal/conf/schema.xml +++ b/solr/benchmark/src/resources/configs/cloud-minimal/conf/schema.xml @@ -43,6 +43,7 @@ + From 517a96c846e530314ec230775446987ed2c4f821 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 16 Jan 2026 23:07:23 -0500 Subject: [PATCH 05/13] revert docker specific changes --- solr/benchmark/jmh.sh | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/solr/benchmark/jmh.sh b/solr/benchmark/jmh.sh index 88a3fa2c8f34..ab4ee52551aa 100755 --- a/solr/benchmark/jmh.sh +++ b/solr/benchmark/jmh.sh @@ -20,7 +20,6 @@ base_dir=$(dirname "$0") if [ "${base_dir}" == "." ]; then gradlew_dir="../.." - log4j_config="./log4j2-bench.xml" if [ -d "lib" ]; then echo "Using lib directory for classpath..." @@ -33,11 +32,6 @@ if [ "${base_dir}" == "." ]; then echo "gradle build done" classpath=$($gradleCmd -q echoCp) fi -elif [ "${base_dir}" == "/opt/benchmark" ]; then - # Docker container mode - log4j_config="/opt/benchmark/log4j2-bench.xml" - classpath="/opt/benchmark/lib/*" - echo "Running in Docker container mode..." else echo "Benchmarks need to be run from the 'solr/benchmark' directory" exit 1 @@ -59,7 +53,7 @@ jvmArgs="-jvmArgs -Djmh.shutdownTimeout=5 -jvmArgs -Djmh.shutdownTimeout.step=3 gcArgs="-jvmArgs -XX:+UseG1GC -jvmArgs -XX:+ParallelRefProcEnabled" # -jvmArgs -Dlog4j2.debug -loggingArgs="-jvmArgs -Dlog4jConfigurationFile=${log4j_config} -jvmArgs -Dlog4j2.is.webapp=false -jvmArgs -Dlog4j2.garbagefreeThreadContextMap=true -jvmArgs -Dlog4j2.enableDirectEncoders=true -jvmArgs -Dlog4j2.enable.threadlocals=true" +loggingArgs="-jvmArgs -Dlog4jConfigurationFile=./log4j2-bench.xml -jvmArgs -Dlog4j2.is.webapp=false -jvmArgs -Dlog4j2.garbagefreeThreadContextMap=true -jvmArgs -Dlog4j2.enableDirectEncoders=true -jvmArgs -Dlog4j2.enable.threadlocals=true" #set -x From d730bfb48764e9f847e9e3f6337a19a026c33946 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 23 Jan 2026 20:52:06 -0500 Subject: [PATCH 06/13] add flow control parameters --- .../solr/bench/search/StreamingSearch.java | 89 ++++++++++++++++--- .../solrj/jetty/HttpJettySolrClient.java | 54 +++++++++-- .../HttpJettySolrClientCompatibilityTest.java | 72 +++++++++++++++ 3 files changed, 197 insertions(+), 18 deletions(-) diff --git a/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java b/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java index a9860763dbe7..d31d682a4830 100644 --- a/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java @@ -64,7 +64,30 @@ public static class BenchState { @Param({"false", "true"}) boolean useHttp1; - private int docs = 1000; + @Param("3") + int nodeCount; + + @Param("3") + int numShards; + + @Param("1") + int numReplicas; + + @Param("1000") + int docCount; + + @Param("4") + int indexThreads; // 0 = sequential indexing + + @Param("500") + int batchSize; + + @Param("1024") + int docSizeBytes; // Target document size in bytes (approximate) + + @Param("3") + int numTextFields; // Number of textN_ts fields to generate + private String zkHost; private ModifiableSolrParams params; private StreamContext streamContext; @@ -73,24 +96,66 @@ public static class BenchState { @Setup(Level.Trial) public void setup(MiniClusterBenchState miniClusterState) throws Exception { - miniClusterState.startMiniCluster(3); - miniClusterState.createCollection(collection, 3, 1); - Docs docGen = - docs() - .field("id", integers().incrementing()) - .field("text2_ts", strings().basicLatinAlphabet().multi(312).ofLengthBetween(30, 64)) - .field("text3_ts", strings().basicLatinAlphabet().multi(312).ofLengthBetween(30, 64)) - .field("int1_i_dv", integers().all()); - miniClusterState.index(collection, docGen, docs); + miniClusterState.startMiniCluster(nodeCount); + miniClusterState.createCollection(collection, numShards, numReplicas); + + Docs docGen = createDocsWithTargetSize(docSizeBytes, numTextFields); + + if (indexThreads > 0) { + miniClusterState.indexParallelBatched(collection, docGen, docCount, indexThreads, batchSize); + } else { + miniClusterState.index(collection, docGen, docCount, false); + } miniClusterState.waitForMerges(collection); zkHost = miniClusterState.zkHost; + // Build field list dynamically based on numTextFields + StringBuilder flBuilder = new StringBuilder("id"); + for (int i = 1; i <= numTextFields; i++) { + flBuilder.append(",text").append(i).append("_ts"); + } + flBuilder.append(",int1_i_dv"); + params = new ModifiableSolrParams(); params.set(CommonParams.Q, "*:*"); - params.set(CommonParams.FL, "id,text2_ts,text3_ts,int1_i_dv"); + params.set(CommonParams.FL, flBuilder.toString()); params.set(CommonParams.SORT, "id asc,int1_i_dv asc"); - params.set(CommonParams.ROWS, docs); + params.set(CommonParams.ROWS, docCount); + } + + /** + * Creates a Docs generator that produces documents with approximately the target size. + * + * @param targetSizeBytes target document size in bytes (approximate) + * @param numFields number of textN_ts fields to generate + * @return Docs generator configured for the target size + */ + private Docs createDocsWithTargetSize(int targetSizeBytes, int numFields) { + // Calculate how many characters per field to approximate target size + // Each character is ~1 byte in basic Latin alphabet + // Account for field overhead, id field, and int field + int baseOverhead = 100; // Approximate overhead for id, int field, and field names + int availableBytes = Math.max(100, targetSizeBytes - baseOverhead); + int bytesPerField = availableBytes / Math.max(1, numFields); + + // Use multi-value strings: multi(N) creates N strings joined by spaces + // Calculate words and word length to hit target + int wordsPerField = Math.max(1, bytesPerField / 50); // ~50 chars per word avg + int wordLength = Math.min(64, Math.max(10, bytesPerField / Math.max(1, wordsPerField))); + int minWordLength = Math.max(5, wordLength - 10); + int maxWordLength = wordLength + 10; + + Docs docGen = docs().field("id", integers().incrementing()); + + for (int i = 1; i <= numFields; i++) { + docGen.field( + "text" + i + "_ts", + strings().basicLatinAlphabet().multi(wordsPerField).ofLengthBetween(minWordLength, maxWordLength)); + } + + docGen.field("int1_i_dv", integers().all()); + return docGen; } @Setup(Level.Iteration) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java index 072a2add953c..36e55b453956 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java @@ -81,6 +81,7 @@ import org.eclipse.jetty.http.HttpMethod; import org.eclipse.jetty.http.MimeTypes; import org.eclipse.jetty.http.MultiPart; +import org.eclipse.jetty.http2.SimpleFlowControlStrategy; import org.eclipse.jetty.http2.client.HTTP2Client; import org.eclipse.jetty.http2.client.transport.HttpClientTransportOverHTTP2; import org.eclipse.jetty.io.ClientConnector; @@ -110,6 +111,25 @@ public class HttpJettySolrClient extends HttpSolrClientBase { */ public static final String CLIENT_CUSTOMIZER_SYSPROP = "solr.solrj.http.jetty.customizer"; + /** + * A Java system property to set the initial HTTP/2 session receive window size (in bytes). Only + * applies when using HTTP/2 transport. + */ + public static final String HTTP2_SESSION_RECV_WINDOW_SYSPROP = "solr.http2.initialSessionRecvWindow"; + + /** + * A Java system property to set the initial HTTP/2 stream receive window size (in bytes). Only + * applies when using HTTP/2 transport. + */ + public static final String HTTP2_STREAM_RECV_WINDOW_SYSPROP = "solr.http2.initialStreamRecvWindow"; + + /** + * A Java system property to enable the simple flow control strategy for HTTP/2. When set to + * "true", uses {@link SimpleFlowControlStrategy} instead of the default buffering + * strategy. Only applies when using HTTP/2 transport. + */ + public static final String HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP = "solr.http2.useSimpleFlowControl"; + public static final String REQ_PRINCIPAL_KEY = "solr-req-principal"; private static final String USER_AGENT = "Solr[" + MethodHandles.lookup().lookupClass().getName() + "] " + SolrVersion.LATEST_STRING; @@ -275,9 +295,7 @@ private HttpClient createHttpClient(Builder builder) { HttpClient httpClient; HttpClientTransport transport; if (builder.shouldUseHttp1_1()) { - if (log.isDebugEnabled()) { - log.debug("Create HttpJettySolrClient with HTTP/1.1 transport"); - } + log.info("Create HttpJettySolrClient with HTTP/1.1 transport (solr.http1={})", System.getProperty("solr.http1")); transport = new HttpClientTransportOverHTTP(clientConnector); httpClient = new HttpClient(transport); @@ -285,11 +303,10 @@ private HttpClient createHttpClient(Builder builder) { httpClient.setMaxConnectionsPerDestination(builder.getMaxConnectionsPerHost()); } } else { - if (log.isDebugEnabled()) { - log.debug("Create HttpJettySolrClient with HTTP/2 transport"); - } + log.info("Create HttpJettySolrClient with HTTP/2 transport (solr.http1={})", System.getProperty("solr.http1")); HTTP2Client http2client = new HTTP2Client(clientConnector); + configureHttp2FlowControl(http2client); transport = new HttpClientTransportOverHTTP2(http2client); httpClient = new HttpClient(transport); httpClient.setMaxConnectionsPerDestination(4); @@ -326,6 +343,31 @@ private HttpClient createHttpClient(Builder builder) { return httpClient; } + /** + * Configures HTTP/2 flow control settings on the HTTP2Client based on system properties. Only + * applies settings when the corresponding system property is explicitly set. + */ + private void configureHttp2FlowControl(HTTP2Client http2client) { + Integer sessionRecvWindow = + EnvUtils.getPropertyAsInteger(HTTP2_SESSION_RECV_WINDOW_SYSPROP, null); + if (sessionRecvWindow != null) { + http2client.setInitialSessionRecvWindow(sessionRecvWindow); + log.info("LKZ Set HTTP/2 initial session recv window to {} bytes", sessionRecvWindow); + } + + Integer streamRecvWindow = + EnvUtils.getPropertyAsInteger(HTTP2_STREAM_RECV_WINDOW_SYSPROP, null); + if (streamRecvWindow != null) { + http2client.setInitialStreamRecvWindow(streamRecvWindow); + log.info("Set HTTP/2 initial stream recv window to {} bytes", streamRecvWindow); + } + + if (EnvUtils.getPropertyAsBool(HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP, false)) { + http2client.setFlowControlStrategyFactory(SimpleFlowControlStrategy::new); + log.error("Using simple HTTP/2 flow control strategy"); + } + } + private void setupProxy(Builder builder, HttpClient httpClient) { if (builder.getProxyHost() == null) { return; diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java index c213b3d31303..06d3bb7b9d18 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java @@ -28,6 +28,9 @@ import org.apache.solr.util.ServletFixtures.DebugServlet; import org.eclipse.jetty.client.transport.HttpClientTransportOverHTTP; import org.eclipse.jetty.ee10.servlet.ServletHolder; +import org.eclipse.jetty.http2.FlowControlStrategy; +import org.eclipse.jetty.http2.SimpleFlowControlStrategy; +import org.eclipse.jetty.http2.client.HTTP2Client; import org.eclipse.jetty.http2.client.transport.HttpClientTransportOverHTTP2; @LogLevel("org.eclipse.jetty.client=DEBUG;org.eclipse.jetty.util=DEBUG") @@ -45,6 +48,75 @@ public void testSystemPropertyFlag() { } } + public void testHttp2FlowControlSystemProperties() { + // Test with custom session and stream recv window sizes + System.setProperty( + HttpJettySolrClient.HTTP2_SESSION_RECV_WINDOW_SYSPROP, String.valueOf(4 * 1024 * 1024)); + System.setProperty( + HttpJettySolrClient.HTTP2_STREAM_RECV_WINDOW_SYSPROP, String.valueOf(2 * 1024 * 1024)); + System.setProperty(HttpJettySolrClient.HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP, "true"); + + try (var client = new HttpJettySolrClient.Builder().build()) { + var transport = client.getHttpClient().getTransport(); + assertTrue( + "Expected HTTP/2 transport", transport instanceof HttpClientTransportOverHTTP2); + + HttpClientTransportOverHTTP2 http2Transport = (HttpClientTransportOverHTTP2) transport; + HTTP2Client http2Client = http2Transport.getHTTP2Client(); + + assertEquals( + "Session recv window should be set", + 4 * 1024 * 1024, + http2Client.getInitialSessionRecvWindow()); + assertEquals( + "Stream recv window should be set", + 2 * 1024 * 1024, + http2Client.getInitialStreamRecvWindow()); + + // Verify simple flow control strategy is used + FlowControlStrategy.Factory factory = http2Client.getFlowControlStrategyFactory(); + FlowControlStrategy strategy = factory.newFlowControlStrategy(); + assertTrue( + "Expected SimpleFlowControlStrategy", strategy instanceof SimpleFlowControlStrategy); + } finally { + System.clearProperty(HttpJettySolrClient.HTTP2_SESSION_RECV_WINDOW_SYSPROP); + System.clearProperty(HttpJettySolrClient.HTTP2_STREAM_RECV_WINDOW_SYSPROP); + System.clearProperty(HttpJettySolrClient.HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP); + } + } + + @SuppressWarnings("try") // HTTP2Client is AutoCloseable but doesn't need closing when not started + public void testHttp2FlowControlDefaultsUnchangedWhenPropertiesNotSet() { + // Ensure no flow control properties are set + System.clearProperty(HttpJettySolrClient.HTTP2_SESSION_RECV_WINDOW_SYSPROP); + System.clearProperty(HttpJettySolrClient.HTTP2_STREAM_RECV_WINDOW_SYSPROP); + System.clearProperty(HttpJettySolrClient.HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP); + + // Get default values from a fresh HTTP2Client for comparison + // Note: HTTP2Client doesn't need to be closed if never started + HTTP2Client defaultHttp2Client = new HTTP2Client(); + int defaultSessionWindow = defaultHttp2Client.getInitialSessionRecvWindow(); + int defaultStreamWindow = defaultHttp2Client.getInitialStreamRecvWindow(); + + try (var client = new HttpJettySolrClient.Builder().build()) { + var transport = client.getHttpClient().getTransport(); + assertTrue( + "Expected HTTP/2 transport", transport instanceof HttpClientTransportOverHTTP2); + + HttpClientTransportOverHTTP2 http2Transport = (HttpClientTransportOverHTTP2) transport; + HTTP2Client http2Client = http2Transport.getHTTP2Client(); + + assertEquals( + "Session recv window should remain at default", + defaultSessionWindow, + http2Client.getInitialSessionRecvWindow()); + assertEquals( + "Stream recv window should remain at default", + defaultStreamWindow, + http2Client.getInitialStreamRecvWindow()); + } + } + public void testConnectToOldNodesUsingHttp1() throws Exception { JettyConfig jettyConfig = From 9ac94b6bc206c015fc04e06b48118915c6f37ef5 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 23 Jan 2026 20:53:08 -0500 Subject: [PATCH 07/13] tidy --- .../apache/solr/bench/MiniClusterState.java | 41 ++++++++++------- .../bench/lifecycle/ReplicationRecovery.java | 46 ++++++++++++++----- .../solr/bench/search/StreamingSearch.java | 8 +++- .../solrj/jetty/HttpJettySolrClient.java | 18 +++++--- .../HttpJettySolrClientCompatibilityTest.java | 6 +-- 5 files changed, 79 insertions(+), 40 deletions(-) diff --git a/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java b/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java index 6aee2e0e907d..9e5e714bb001 100755 --- a/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/MiniClusterState.java @@ -507,7 +507,13 @@ public void indexParallelBatched( if (meter.getCount() >= docCount) { scheduledExecutor.shutdown(); } else { - log(meter.getCount() + "/" + docCount + " docs at " + (long) meter.getMeanRate() + " doc/s"); + log( + meter.getCount() + + "/" + + docCount + + " docs at " + + (long) meter.getMeanRate() + + " doc/s"); } }, 5, @@ -521,25 +527,26 @@ public void indexParallelBatched( for (int t = 0; t < numThreads; t++) { final int threadDocsCount = docsPerThread + (t < remainder ? 1 : 0); - executorService.execute(() -> { - List batch = new ArrayList<>(batchSize); + executorService.execute( + () -> { + List batch = new ArrayList<>(batchSize); - for (int i = 0; i < threadDocsCount; i++) { - batch.add(docs.inputDocument()); + for (int i = 0; i < threadDocsCount; i++) { + batch.add(docs.inputDocument()); - if (batch.size() >= batchSize) { - sendBatch(collection, batch, indexingException); - meter.mark(batch.size()); - batch.clear(); - } - } + if (batch.size() >= batchSize) { + sendBatch(collection, batch, indexingException); + meter.mark(batch.size()); + batch.clear(); + } + } - // Send remaining docs - if (!batch.isEmpty()) { - sendBatch(collection, batch, indexingException); - meter.mark(batch.size()); - } - }); + // Send remaining docs + if (!batch.isEmpty()) { + sendBatch(collection, batch, indexingException); + meter.mark(batch.size()); + } + }); } executorService.shutdown(); diff --git a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java index 2fc492432ffe..3459165018c7 100644 --- a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java @@ -106,15 +106,21 @@ public BenchState() { docs() .field("id", integers().incrementing()) // Multiple large string fields to bulk up document size to ~100KB (no analysis) - .field("text1_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text2_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text3_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text4_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field( + "text1_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field( + "text2_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field( + "text3_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) + .field( + "text4_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) .field("text5_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) .field("text6_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) .field("text7_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) .field("text8_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("content_ss", strings().basicLatinAlphabet().multi(200).ofLengthBetween(100, 200)); + .field( + "content_ss", + strings().basicLatinAlphabet().multi(200).ofLengthBetween(100, 200)); } @Setup(Level.Trial) @@ -140,7 +146,12 @@ public void doSetup(MiniClusterState.MiniClusterBenchState miniClusterState) thr // Force all replicas to be created on the first node // Node name format is host:port_solr (with underscore, not slash) String firstNode = - miniClusterState.nodes.get(0).replace("http://", "").replace("https://", "").replace("/", "_"); + miniClusterState + .nodes + .get(0) + .replace("http://", "") + .replace("https://", "") + .replace("/", "_"); log("First node name for createNodeSet: " + firstNode); createRequest.setCreateNodeSet(firstNode); miniClusterState.client.requestWithBaseUrl( @@ -149,12 +160,20 @@ public void doSetup(MiniClusterState.MiniClusterBenchState miniClusterState) thr miniClusterState.cluster.waitForActiveCollection( COLLECTION, 30, TimeUnit.SECONDS, numShards, numShards); - log("Collection created. Indexing " + docCount + " documents with " + indexThreads + " threads, batch size " + batchSize + "..."); + log( + "Collection created. Indexing " + + docCount + + " documents with " + + indexThreads + + " threads, batch size " + + batchSize + + "..."); // Index documents long indexStart = System.currentTimeMillis(); if (indexThreads > 0) { - miniClusterState.indexParallelBatched(COLLECTION, largeDocs, docCount, indexThreads, batchSize); + miniClusterState.indexParallelBatched( + COLLECTION, largeDocs, docCount, indexThreads, batchSize); } else { miniClusterState.index(COLLECTION, largeDocs, docCount, false); } @@ -187,10 +206,16 @@ public long replicateShards( // Parse replica type Replica.Type type = Replica.Type.valueOf(state.replicaType.toUpperCase()); - log("Starting replication of " + state.numShards + " shards to second node (replica type: " + type + ")..."); + log( + "Starting replication of " + + state.numShards + + " shards to second node (replica type: " + + type + + ")..."); // Get the second node name (without http prefix, with underscore) for the replica placement - String secondNode = state.secondNodeUrl.replace("http://", "").replace("https://", "").replace("/", "_"); + String secondNode = + state.secondNodeUrl.replace("http://", "").replace("https://", "").replace("/", "_"); // Add a replica for each shard to the second node for (int i = 1; i <= state.numShards; i++) { @@ -261,4 +286,3 @@ public long replicateShards( return duration; } } - diff --git a/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java b/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java index d31d682a4830..ead834232d7d 100644 --- a/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java +++ b/solr/benchmark/src/java/org/apache/solr/bench/search/StreamingSearch.java @@ -102,7 +102,8 @@ public void setup(MiniClusterBenchState miniClusterState) throws Exception { Docs docGen = createDocsWithTargetSize(docSizeBytes, numTextFields); if (indexThreads > 0) { - miniClusterState.indexParallelBatched(collection, docGen, docCount, indexThreads, batchSize); + miniClusterState.indexParallelBatched( + collection, docGen, docCount, indexThreads, batchSize); } else { miniClusterState.index(collection, docGen, docCount, false); } @@ -151,7 +152,10 @@ private Docs createDocsWithTargetSize(int targetSizeBytes, int numFields) { for (int i = 1; i <= numFields; i++) { docGen.field( "text" + i + "_ts", - strings().basicLatinAlphabet().multi(wordsPerField).ofLengthBetween(minWordLength, maxWordLength)); + strings() + .basicLatinAlphabet() + .multi(wordsPerField) + .ofLengthBetween(minWordLength, maxWordLength)); } docGen.field("int1_i_dv", integers().all()); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java index 36e55b453956..9ccafca1a7c3 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/jetty/HttpJettySolrClient.java @@ -115,18 +115,20 @@ public class HttpJettySolrClient extends HttpSolrClientBase { * A Java system property to set the initial HTTP/2 session receive window size (in bytes). Only * applies when using HTTP/2 transport. */ - public static final String HTTP2_SESSION_RECV_WINDOW_SYSPROP = "solr.http2.initialSessionRecvWindow"; + public static final String HTTP2_SESSION_RECV_WINDOW_SYSPROP = + "solr.http2.initialSessionRecvWindow"; /** * A Java system property to set the initial HTTP/2 stream receive window size (in bytes). Only * applies when using HTTP/2 transport. */ - public static final String HTTP2_STREAM_RECV_WINDOW_SYSPROP = "solr.http2.initialStreamRecvWindow"; + public static final String HTTP2_STREAM_RECV_WINDOW_SYSPROP = + "solr.http2.initialStreamRecvWindow"; /** * A Java system property to enable the simple flow control strategy for HTTP/2. When set to - * "true", uses {@link SimpleFlowControlStrategy} instead of the default buffering - * strategy. Only applies when using HTTP/2 transport. + * "true", uses {@link SimpleFlowControlStrategy} instead of the default buffering strategy. Only + * applies when using HTTP/2 transport. */ public static final String HTTP2_SIMPLE_FLOW_CONTROL_SYSPROP = "solr.http2.useSimpleFlowControl"; @@ -295,7 +297,9 @@ private HttpClient createHttpClient(Builder builder) { HttpClient httpClient; HttpClientTransport transport; if (builder.shouldUseHttp1_1()) { - log.info("Create HttpJettySolrClient with HTTP/1.1 transport (solr.http1={})", System.getProperty("solr.http1")); + log.info( + "Create HttpJettySolrClient with HTTP/1.1 transport (solr.http1={})", + System.getProperty("solr.http1")); transport = new HttpClientTransportOverHTTP(clientConnector); httpClient = new HttpClient(transport); @@ -303,7 +307,9 @@ private HttpClient createHttpClient(Builder builder) { httpClient.setMaxConnectionsPerDestination(builder.getMaxConnectionsPerHost()); } } else { - log.info("Create HttpJettySolrClient with HTTP/2 transport (solr.http1={})", System.getProperty("solr.http1")); + log.info( + "Create HttpJettySolrClient with HTTP/2 transport (solr.http1={})", + System.getProperty("solr.http1")); HTTP2Client http2client = new HTTP2Client(clientConnector); configureHttp2FlowControl(http2client); diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java index 06d3bb7b9d18..cec63c2f3393 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/jetty/HttpJettySolrClientCompatibilityTest.java @@ -58,8 +58,7 @@ public void testHttp2FlowControlSystemProperties() { try (var client = new HttpJettySolrClient.Builder().build()) { var transport = client.getHttpClient().getTransport(); - assertTrue( - "Expected HTTP/2 transport", transport instanceof HttpClientTransportOverHTTP2); + assertTrue("Expected HTTP/2 transport", transport instanceof HttpClientTransportOverHTTP2); HttpClientTransportOverHTTP2 http2Transport = (HttpClientTransportOverHTTP2) transport; HTTP2Client http2Client = http2Transport.getHTTP2Client(); @@ -100,8 +99,7 @@ public void testHttp2FlowControlDefaultsUnchangedWhenPropertiesNotSet() { try (var client = new HttpJettySolrClient.Builder().build()) { var transport = client.getHttpClient().getTransport(); - assertTrue( - "Expected HTTP/2 transport", transport instanceof HttpClientTransportOverHTTP2); + assertTrue("Expected HTTP/2 transport", transport instanceof HttpClientTransportOverHTTP2); HttpClientTransportOverHTTP2 http2Transport = (HttpClientTransportOverHTTP2) transport; HTTP2Client http2Client = http2Transport.getHTTP2Client(); From 620ddfaf3b4e6f5049dfd89c71e893e3979e3771 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 26 Jan 2026 16:26:51 -0500 Subject: [PATCH 08/13] log flow control --- solr/benchmark/build-lib.sh | 50 +++++++++++++++++++++++++++++++++ solr/benchmark/jmh.sh | 2 +- solr/benchmark/log4j2-bench.xml | 10 ++++++- 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 solr/benchmark/build-lib.sh diff --git a/solr/benchmark/build-lib.sh b/solr/benchmark/build-lib.sh new file mode 100644 index 000000000000..a6d4c27b219f --- /dev/null +++ b/solr/benchmark/build-lib.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Builds all JARs and copies them to lib/ so jmh.sh can run without invoking gradle. + +set -e + +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +REPO_ROOT="$SCRIPT_DIR/../.." +LIB_DIR="$SCRIPT_DIR/lib" + +echo "Building jars..." +"$REPO_ROOT/gradlew" -p "$REPO_ROOT" jar + +echo "Getting classpath from gradle..." +CLASSPATH=$("$REPO_ROOT/gradlew" -p "$SCRIPT_DIR" -q echoCp) + +echo "Copying JARs to lib/..." +rm -rf "$LIB_DIR" +mkdir -p "$LIB_DIR" + +# Copy all JAR dependencies +echo "$CLASSPATH" | tr ':' '\n' | while read -r jar; do + if [ -f "$jar" ] && [[ "$jar" == *.jar ]]; then + cp "$jar" "$LIB_DIR/" + fi +done + +# Copy the benchmark module's own JAR (echoCp outputs classes dir, not the JAR) +BENCHMARK_JAR=$(ls "$SCRIPT_DIR/build/libs"/solr-benchmark-*.jar 2>/dev/null | head -1) +if [ -n "$BENCHMARK_JAR" ]; then + cp "$BENCHMARK_JAR" "$LIB_DIR/" +fi + +JAR_COUNT=$(ls -1 "$LIB_DIR"/*.jar 2>/dev/null | wc -l) +echo "Done. Copied $JAR_COUNT JARs to lib/" +echo "You can now run jmh.sh without gradle being invoked." diff --git a/solr/benchmark/jmh.sh b/solr/benchmark/jmh.sh index ab4ee52551aa..0900ed18012a 100755 --- a/solr/benchmark/jmh.sh +++ b/solr/benchmark/jmh.sh @@ -53,7 +53,7 @@ jvmArgs="-jvmArgs -Djmh.shutdownTimeout=5 -jvmArgs -Djmh.shutdownTimeout.step=3 gcArgs="-jvmArgs -XX:+UseG1GC -jvmArgs -XX:+ParallelRefProcEnabled" # -jvmArgs -Dlog4j2.debug -loggingArgs="-jvmArgs -Dlog4jConfigurationFile=./log4j2-bench.xml -jvmArgs -Dlog4j2.is.webapp=false -jvmArgs -Dlog4j2.garbagefreeThreadContextMap=true -jvmArgs -Dlog4j2.enableDirectEncoders=true -jvmArgs -Dlog4j2.enable.threadlocals=true" +loggingArgs="-jvmArgs -Dlog4j2.configurationFile=./log4j2-bench.xml -jvmArgs -Dlog4j2.is.webapp=false -jvmArgs -Dlog4j2.garbagefreeThreadContextMap=true -jvmArgs -Dlog4j2.enableDirectEncoders=true -jvmArgs -Dlog4j2.enable.threadlocals=true -jvmArgs -Djava.util.logging.config.file=./logging.properties" #set -x diff --git a/solr/benchmark/log4j2-bench.xml b/solr/benchmark/log4j2-bench.xml index c3b81a84ca5e..ce7d42c23435 100644 --- a/solr/benchmark/log4j2-bench.xml +++ b/solr/benchmark/log4j2-bench.xml @@ -96,10 +96,18 @@ + + + + + + + + + - From 9310129b65ab2450379861d586af10e5bcfca9ba Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 26 Jan 2026 17:23:59 -0500 Subject: [PATCH 09/13] simplify --- solr/benchmark/jmh.ps1 | 116 ------- solr/benchmark/jmh.sh | 28 +- .../bench/lifecycle/ReplicationRecovery.java | 288 ------------------ 3 files changed, 15 insertions(+), 417 deletions(-) delete mode 100644 solr/benchmark/jmh.ps1 delete mode 100644 solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java diff --git a/solr/benchmark/jmh.ps1 b/solr/benchmark/jmh.ps1 deleted file mode 100644 index 7a8cae366e93..000000000000 --- a/solr/benchmark/jmh.ps1 +++ /dev/null @@ -1,116 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -param( - [string]$SysProps = "" -) - -# Get all arguments passed after the script name (remaining args go to JMH) -$JmhArgs = $args - -$ErrorActionPreference = "Stop" - -$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path -$gradlewDir = Join-Path $scriptDir "..\..\" -$gradlew = Join-Path $gradlewDir "gradlew.bat" - -# Check if lib directory exists -$libDir = Join-Path $scriptDir "lib" -if (Test-Path $libDir) { - Write-Host "Using lib directory for classpath..." - $classpath = "$libDir\*;$scriptDir\build\classes\java\main" -} else { - Write-Host "Getting classpath from gradle..." - - # Build the jars first - Push-Location $gradlewDir - try { - & $gradlew -q jar - if ($LASTEXITCODE -ne 0) { - Write-Error "Gradle build failed" - exit 1 - } - Write-Host "Gradle build done" - } finally { - Pop-Location - } - - # Get classpath from gradle - Push-Location $scriptDir - try { - $classpath = & $gradlew -q echoCp - if ($LASTEXITCODE -ne 0) { - Write-Error "Failed to get classpath from gradle" - exit 1 - } - } finally { - Pop-Location - } -} - -Write-Host "Running JMH with args: $JmhArgs" - -# JVM Arguments -$jvmArgs = @( - "-jvmArgs", "-Djmh.shutdownTimeout=5", - "-jvmArgs", "-Djmh.shutdownTimeout.step=3", - "-jvmArgs", "-Djava.security.egd=file:/dev/./urandom", - "-jvmArgs", "-XX:+UnlockDiagnosticVMOptions", - "-jvmArgs", "-XX:+DebugNonSafepoints", - "-jvmArgs", "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED" -) - -# GC Arguments -$gcArgs = @( - "-jvmArgs", "-XX:+UseG1GC", - "-jvmArgs", "-XX:+ParallelRefProcEnabled" -) - -# Logging Arguments -$loggingArgs = @( - "-jvmArgs", "-Dlog4jConfigurationFile=./log4j2-bench.xml", - "-jvmArgs", "-Dlog4j2.is.webapp=false", - "-jvmArgs", "-Dlog4j2.garbagefreeThreadContextMap=true", - "-jvmArgs", "-Dlog4j2.enableDirectEncoders=true", - "-jvmArgs", "-Dlog4j2.enable.threadlocals=true" -) - -# User-provided system properties -$userSysPropsArgs = @() -if ($SysProps -ne "") { - # Split on whitespace, handling -D properties - $props = $SysProps -split '\s+(?=-D)' | Where-Object { $_ -ne "" } - foreach ($prop in $props) { - $userSysPropsArgs += "-jvmArgs", $prop.Trim() - } - Write-Host "User system properties: $($props -join ', ')" -} - -# Build the full argument list -$allArgs = @( - "-cp", $classpath, - "--add-opens=java.base/java.io=ALL-UNNAMED", - "-Djdk.module.illegalAccess.silent=true", - "org.openjdk.jmh.Main" -) + $jvmArgs + $loggingArgs + $gcArgs + $userSysPropsArgs + $JmhArgs - -# Run JMH -Write-Host "Executing: java $($allArgs -join ' ')" -& java $allArgs - -$exitCode = $LASTEXITCODE -Write-Host "JMH benchmarks done (exit code: $exitCode)" -exit $exitCode - diff --git a/solr/benchmark/jmh.sh b/solr/benchmark/jmh.sh index 0900ed18012a..6ef517f0da0b 100755 --- a/solr/benchmark/jmh.sh +++ b/solr/benchmark/jmh.sh @@ -20,21 +20,23 @@ base_dir=$(dirname "$0") if [ "${base_dir}" == "." ]; then gradlew_dir="../.." - - if [ -d "lib" ]; then - echo "Using lib directory for classpath..." - classpath="lib/*" - else - echo "Getting classpath from gradle..." - # --no-daemon - gradleCmd="${gradlew_dir}/gradlew" - $gradleCmd -q -p ../../ jar - echo "gradle build done" - classpath=$($gradleCmd -q echoCp) - fi else echo "Benchmarks need to be run from the 'solr/benchmark' directory" - exit 1 + exit +fi + + +if [ -d "lib" ] +then + echo "Using lib directory for classpath..." + classpath="lib/*:build/classes/java/main" +else + echo "Getting classpath from gradle..." + # --no-daemon + gradleCmd="${gradlew_dir}/gradlew" + $gradleCmd -q -p ../../ jar + echo "gradle build done" + classpath=$($gradleCmd -q echoCp) fi # shellcheck disable=SC2145 diff --git a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java b/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java deleted file mode 100644 index 3459165018c7..000000000000 --- a/solr/benchmark/src/java/org/apache/solr/bench/lifecycle/ReplicationRecovery.java +++ /dev/null @@ -1,288 +0,0 @@ -/* - * - * * Licensed to the Apache Software Foundation (ASF) under one or more - * * contributor license agreements. See the NOTICE file distributed with - * * this work for additional information regarding copyright ownership. - * * The ASF licenses this file to You under the Apache License, Version 2.0 - * * (the "License"); you may not use this file except in compliance with - * * the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, software - * * distributed under the License is distributed on an "AS IS" BASIS, - * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * * See the License for the specific language governing permissions and - * * limitations under the License. - * - */ -package org.apache.solr.bench.lifecycle; - -import static org.apache.solr.bench.BaseBenchState.log; -import static org.apache.solr.bench.Docs.docs; -import static org.apache.solr.bench.generators.SourceDSL.integers; -import static org.apache.solr.bench.generators.SourceDSL.strings; - -import java.util.concurrent.TimeUnit; -import org.apache.solr.bench.Docs; -import org.apache.solr.bench.MiniClusterState; -import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.common.cloud.DocCollection; -import org.apache.solr.common.cloud.Replica; -import org.apache.solr.common.cloud.Slice; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Level; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Threads; -import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; - -/** - * A benchmark to measure shard replication/recovery performance. - * - *

This benchmark creates a collection with 12 shards on a single node, indexes approximately 1GB - * of data, then adds replicas on a second node to trigger recovery. It measures the time taken for - * all replicas to become active. - */ -@BenchmarkMode(Mode.SingleShotTime) -@OutputTimeUnit(TimeUnit.MILLISECONDS) -@Threads(1) -@Warmup(iterations = 0) -@Measurement(iterations = 1) -@Fork(value = 1) -public class ReplicationRecovery { - - @State(Scope.Benchmark) - public static class BenchState { - - static final String COLLECTION = "replicationTestCollection"; - - @Param("12") - int numShards; - - @Param("100") - int pollIntervalMs; - - // Number of docs to index. Each doc is ~10KB. - // Use -p docCount=100000 for ~1GB of data. - @Param("1000") - int docCount; - - // Auto commit interval in milliseconds - @Param("10000") - int autoCommitMaxTime; - - // Number of threads for parallel indexing (0 = sequential) - @Param("4") - int indexThreads; - - // Batch size for indexing (docs per request) - @Param("1000") - int batchSize; - - // Replica type for new replicas: NRT, TLOG, or PULL - // PULL replicas just copy segments (fastest for replication) - // TLOG replicas replay transaction log - // NRT replicas do full local indexing - @Param("NRT") - String replicaType; - - private final Docs largeDocs; - private String secondNodeUrl; - - public BenchState() { - // Create docs with substantial content to generate ~100KB per doc - // This will help us reach ~1GB with 10k docs - // Using _s (string) fields instead of _t (text) to avoid analysis overhead - largeDocs = - docs() - .field("id", integers().incrementing()) - // Multiple large string fields to bulk up document size to ~100KB (no analysis) - .field( - "text1_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field( - "text2_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field( - "text3_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field( - "text4_ss", strings().basicLatinAlphabet().multi(100).ofLengthBetween(200, 400)) - .field("text5_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("text6_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("text7_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field("text8_ss", strings().basicLatinAlphabet().multi(80).ofLengthBetween(150, 300)) - .field( - "content_ss", - strings().basicLatinAlphabet().multi(200).ofLengthBetween(100, 200)); - } - - @Setup(Level.Trial) - public void doSetup(MiniClusterState.MiniClusterBenchState miniClusterState) throws Exception { - log("Setting up ReplicationRecovery benchmark..."); - - // Set autoCommit.maxTime before starting the cluster - System.setProperty("autoCommit.maxTime", String.valueOf(autoCommitMaxTime)); - log("Set autoCommit.maxTime to " + autoCommitMaxTime + "ms"); - - // Start cluster with 2 nodes - miniClusterState.startMiniCluster(2); - - // Store the second node URL for later use - secondNodeUrl = miniClusterState.nodes.get(1); - log("First node URL: " + miniClusterState.nodes.get(0)); - log("Second node URL: " + secondNodeUrl); - - // Create collection with all shards on the first node only (1 replica each) - log("Creating collection with " + numShards + " shards on first node..."); - CollectionAdminRequest.Create createRequest = - CollectionAdminRequest.createCollection(COLLECTION, "conf", numShards, 1); - // Force all replicas to be created on the first node - // Node name format is host:port_solr (with underscore, not slash) - String firstNode = - miniClusterState - .nodes - .get(0) - .replace("http://", "") - .replace("https://", "") - .replace("/", "_"); - log("First node name for createNodeSet: " + firstNode); - createRequest.setCreateNodeSet(firstNode); - miniClusterState.client.requestWithBaseUrl( - miniClusterState.nodes.get(0), createRequest, null); - - miniClusterState.cluster.waitForActiveCollection( - COLLECTION, 30, TimeUnit.SECONDS, numShards, numShards); - - log( - "Collection created. Indexing " - + docCount - + " documents with " - + indexThreads - + " threads, batch size " - + batchSize - + "..."); - - // Index documents - long indexStart = System.currentTimeMillis(); - if (indexThreads > 0) { - miniClusterState.indexParallelBatched( - COLLECTION, largeDocs, docCount, indexThreads, batchSize); - } else { - miniClusterState.index(COLLECTION, largeDocs, docCount, false); - } - long indexTime = System.currentTimeMillis() - indexStart; - log("Indexing completed in " + indexTime + "ms"); - - // Wait for autoCommit to ensure all data is committed - log("Waiting for autoCommit (" + autoCommitMaxTime + "ms + buffer)..."); - Thread.sleep(autoCommitMaxTime + 2000); - - log("Setup complete. Ready to benchmark replication recovery."); - } - } - - /** - * Benchmark that measures the time to replicate all shards to a second node. - * - *

This adds a replica for each of the 12 shards to the second node and polls the cluster state - * every 100ms until all replicas are active. - */ - @Benchmark - public long replicateShards( - MiniClusterState.MiniClusterBenchState miniClusterState, - BenchState state, - Blackhole blackhole) - throws Exception { - - long startTime = System.currentTimeMillis(); - int totalReplicas = state.numShards * 2; // Original + new replicas - - // Parse replica type - Replica.Type type = Replica.Type.valueOf(state.replicaType.toUpperCase()); - log( - "Starting replication of " - + state.numShards - + " shards to second node (replica type: " - + type - + ")..."); - - // Get the second node name (without http prefix, with underscore) for the replica placement - String secondNode = - state.secondNodeUrl.replace("http://", "").replace("https://", "").replace("/", "_"); - - // Add a replica for each shard to the second node - for (int i = 1; i <= state.numShards; i++) { - String shardName = "shard" + i; - CollectionAdminRequest.AddReplica addReplica = - CollectionAdminRequest.addReplicaToShard(BenchState.COLLECTION, shardName, type); - addReplica.setNode(secondNode); - // Send request asynchronously to allow parallel recovery - addReplica.setAsyncId("add-replica-" + shardName); - miniClusterState.client.requestWithBaseUrl(miniClusterState.nodes.get(0), addReplica, null); - } - - log("All add-replica requests submitted. Polling for recovery completion..."); - - // Poll cluster state until all replicas are active - int pollCount = 0; - boolean allActive = false; - long lastLogTime = startTime; - - while (!allActive) { - Thread.sleep(state.pollIntervalMs); - pollCount++; - - // Refresh and check cluster state - miniClusterState.cluster.getZkStateReader().forceUpdateCollection(BenchState.COLLECTION); - DocCollection collection = - miniClusterState.cluster.getZkStateReader().getCollection(BenchState.COLLECTION); - - int activeCount = 0; - int recoveringCount = 0; - int downCount = 0; - - for (Slice slice : collection.getSlices()) { - for (Replica replica : slice.getReplicas()) { - Replica.State replicaState = replica.getState(); - if (replicaState == Replica.State.ACTIVE) { - activeCount++; - } else if (replicaState == Replica.State.RECOVERING) { - recoveringCount++; - } else { - downCount++; - } - } - } - - // Log progress every 5 seconds - long now = System.currentTimeMillis(); - if (now - lastLogTime >= 5000) { - log( - String.format( - "Recovery progress: %d active, %d recovering, %d down (total needed: %d)", - activeCount, recoveringCount, downCount, totalReplicas)); - lastLogTime = now; - } - - allActive = (activeCount == totalReplicas); - blackhole.consume(collection); - } - - long endTime = System.currentTimeMillis(); - long duration = endTime - startTime; - - log( - String.format( - "Replication complete! All %d replicas active. Duration: %d ms, Poll count: %d", - totalReplicas, duration, pollCount)); - - return duration; - } -} From 48e23e99b9022d263c90b3a5d83868dd1bd5c921 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Mon, 26 Jan 2026 20:44:33 -0500 Subject: [PATCH 10/13] add server-side properties and results --- index-recovery-tests.md | 13 ++++++ solr/server/etc/jetty-http.xml | 9 +++++ solr/server/etc/jetty-https.xml | 9 +++++ stream-benchmark-results.md | 71 +++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+) create mode 100644 index-recovery-tests.md create mode 100644 stream-benchmark-results.md diff --git a/index-recovery-tests.md b/index-recovery-tests.md new file mode 100644 index 000000000000..39fc87dfe607 --- /dev/null +++ b/index-recovery-tests.md @@ -0,0 +1,13 @@ +# Harder-to-Reproduce Index Recovery Performance Results + +I tested recovery of 1 and 12 shards of ~20 Gigs each. The size makes it a bit challenging to package nicely in a reproducible benchmark although I am sure it can be done. I am confident you can reproduce this behavior with a comparable amount of data and cloud structure. I can share the scripts I used to achieve these results if it is helpful. + +## Results Summary + +| Scenario | Shards | Configuration | Result | Time | +|----------|--------|---------------|--------|------| +| HTTP/2 | 1 | default | Fast | ~40s | +| HTTP/1 | 1 | default | Fast | ~50s | +| HTTP/1 | 12 | default | Fast | ~90s | +| HTTP/2 | 12 | default | Slowest | ~320s | +| HTTP/2 | 12 | `maxConcurrentStreams=1`| Slower | ~180s | diff --git a/solr/server/etc/jetty-http.xml b/solr/server/etc/jetty-http.xml index 02f53991c5c1..cf43b2836c85 100644 --- a/solr/server/etc/jetty-http.xml +++ b/solr/server/etc/jetty-http.xml @@ -34,6 +34,15 @@ + + + + + + + + + diff --git a/solr/server/etc/jetty-https.xml b/solr/server/etc/jetty-https.xml index 4a74eb125063..d5bcae7f14f9 100644 --- a/solr/server/etc/jetty-https.xml +++ b/solr/server/etc/jetty-https.xml @@ -54,6 +54,15 @@ + + + + + + + + + diff --git a/stream-benchmark-results.md b/stream-benchmark-results.md new file mode 100644 index 000000000000..959150bd4ca4 --- /dev/null +++ b/stream-benchmark-results.md @@ -0,0 +1,71 @@ +# Reproducible /Stream Performance Results + +## Scenario: HTTP/2 with 1 shard + +**Result: Slow** + +```bash +./jmh.sh -p useHttp1=false -p nodeCount=2 -p numShards=1 -p numReplicas=2 -p docCount=10000 -p indexThreads=14 -p batchSize=500 -p docSizeBytes=10024 -p numTextFields=25 StreamingSearch +``` + +| Benchmark | batchSize | docCount | docSizeBytes | indexThreads | nodeCount | numReplicas | numShards | numTextFields | useHttp1 | Mode | Cnt | Score | Error | Units | +|-----------|-----------|----------|--------------|--------------|-----------|-------------|-----------|---------------|----------|------|-----|-------|-------|-------| +| StreamingSearch.stream | 500 | 10000 | 10024 | 14 | 2 | 2 | 1 | 25 | false | thrpt | 4 | 0.257 | ± 0.308 | ops/s | + +--- + +## Scenario: HTTP/2 with 12 shards + +**Result: Hangs indefinitely** + +```bash +./jmh.sh -p useHttp1=false -p nodeCount=2 -p numShards=12 -p numReplicas=2 -p docCount=10000 -p indexThreads=14 -p batchSize=500 -p docSizeBytes=10024 -p numTextFields=25 StreamingSearch +``` + +It appears server's flow control forcibly sets recvWindow to 0 on the offending client. There appear to be too many concurrent streams fighting for the same piece of "session window". I'm attaching flow-control-stall.log which gives a more detailed view of this response stall. + +--- + +## Scenario: HTTP/2 with 12 shards and LOWER initialStreamRecvWindow + +**Result: Slow** + +```bash +./jmh.sh -p useHttp1=false -p nodeCount=2 -p numShards=12 -p numReplicas=2 -p docCount=10000 -p indexThreads=14 -p batchSize=500 -p docSizeBytes=10024 -p numTextFields=25 -jvmArgs -Dsolr.http2.initialStreamRecvWindow=500000 StreamingSearch +``` + +| Benchmark | batchSize | docCount | docSizeBytes | indexThreads | nodeCount | numReplicas | numShards | numTextFields | useHttp1 | Mode | Cnt | Score | Error | Units | +|-----------|-----------|----------|--------------|--------------|-----------|-------------|-----------|---------------|----------|------|-----|-------|-------|-------| +| StreamingSearch.stream | 500 | 10000 | 10024 | 14 | 2 | 2 | 12 | 25 | false | thrpt | 4 | 0.344 | ± 0.027 | ops/s | + +The reason setting initialStreamRecvWindow lower helps avoid the stall is because each response gets sequestered to a smaller portion of the total session window pool and so each shard progresses consistently, albeit slowly. This result is still poor. + +--- + +## Scenario: HTTP/2 with 12 shards and HIGHER initialSessionRecvWindow + +**Result: Slow** + +```bash +./jmh.sh -p useHttp1=false -p nodeCount=2 -p numShards=12 -p numReplicas=2 -p docCount=10000 -p indexThreads=14 -p batchSize=500 -p docSizeBytes=10024 -p numTextFields=25 -jvmArgs -Dsolr.http2.initialStreamRecvWindow=8000000 -jvmArgs -Dsolr.http2.initialSessionRecvWindow=96000000 StreamingSearch +``` + +| Benchmark | batchSize | docCount | docSizeBytes | indexThreads | nodeCount | numReplicas | numShards | numTextFields | useHttp1 | Mode | Cnt | Score | Error | Units | +|-----------|-----------|----------|--------------|--------------|-----------|-------------|-----------|---------------|----------|------|-----|-------|-------|-------| +| StreamingSearch.stream | 500 | 10000 | 10024 | 14 | 2 | 2 | 12 | 25 | false | thrpt | 4 | 0.332 | ± 0.050 | ops/s | + +In other words, increasing the client-side response window doesn't help. + +--- + +## Scenario: HTTP/1 with 12 shards + +**Result: Fast** + +```bash +./jmh.sh -p useHttp1=true -p nodeCount=2 -p numShards=12 -p numReplicas=2 -p docCount=10000 -p indexThreads=14 -p batchSize=500 -p docSizeBytes=10024 -p numTextFields=25 -jvmArgs -Dsolr.http1=true StreamingSearch +``` + +| Benchmark | batchSize | docCount | docSizeBytes | indexThreads | nodeCount | numReplicas | numShards | numTextFields | useHttp1 | Mode | Cnt | Score | Error | Units | +|-----------|-----------|----------|--------------|--------------|-----------|-------------|-----------|---------------|----------|------|-----|-------|-------|-------| +| StreamingSearch.stream | 500 | 10000 | 10024 | 14 | 2 | 2 | 12 | 25 | true | thrpt | 4 | 2.301 | ± 0.676 | ops/s | From 26b1da949a6d5fd6a4977e78ce803c5e6549af81 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 28 Jan 2026 14:21:04 -0500 Subject: [PATCH 11/13] explanatory note --- index-recovery-tests.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/index-recovery-tests.md b/index-recovery-tests.md index 39fc87dfe607..8c7b481612d2 100644 --- a/index-recovery-tests.md +++ b/index-recovery-tests.md @@ -1,6 +1,7 @@ # Harder-to-Reproduce Index Recovery Performance Results -I tested recovery of 1 and 12 shards of ~20 Gigs each. The size makes it a bit challenging to package nicely in a reproducible benchmark although I am sure it can be done. I am confident you can reproduce this behavior with a comparable amount of data and cloud structure. I can share the scripts I used to achieve these results if it is helpful. +I tested recovery of 1 and 12 shards of ~20 Gigs each. The size makes it a bit challenging to package nicely in a reproducible benchmark, although I am sure it can be done. +I am confident you can reproduce this behavior with a comparable amount of data and cloud structure. I can share the scripts I used to achieve these results if it is helpful. ## Results Summary @@ -11,3 +12,6 @@ I tested recovery of 1 and 12 shards of ~20 Gigs each. The size makes it a bit c | HTTP/1 | 12 | default | Fast | ~90s | | HTTP/2 | 12 | default | Slowest | ~320s | | HTTP/2 | 12 | `maxConcurrentStreams=1`| Slower | ~180s | + +A minor note, a theory explaining why `maxConcurrentStreams=1` is still slower than HTTP/1 is that our network set-up may not be capable of fully utilizing the network capacity +with a single connection. Still, the improvement suggests that even if we were able to fully consume bandwidth with HTTP/2's multiplexing, the overhead of managing multiple streams is still detrimental to recovery performance. From 7c61d7c1849014085b2d051fb6ff859fc85536b6 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Wed, 28 Jan 2026 14:28:59 -0500 Subject: [PATCH 12/13] revert comment --- index-recovery-tests.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/index-recovery-tests.md b/index-recovery-tests.md index 8c7b481612d2..3e6fbcab19ce 100644 --- a/index-recovery-tests.md +++ b/index-recovery-tests.md @@ -13,5 +13,3 @@ I am confident you can reproduce this behavior with a comparable amount of data | HTTP/2 | 12 | default | Slowest | ~320s | | HTTP/2 | 12 | `maxConcurrentStreams=1`| Slower | ~180s | -A minor note, a theory explaining why `maxConcurrentStreams=1` is still slower than HTTP/1 is that our network set-up may not be capable of fully utilizing the network capacity -with a single connection. Still, the improvement suggests that even if we were able to fully consume bandwidth with HTTP/2's multiplexing, the overhead of managing multiple streams is still detrimental to recovery performance. From ac38bcad4e76509c980aff575a2745a65cad8cd5 Mon Sep 17 00:00:00 2001 From: Luke Kot-Zaniewski Date: Fri, 13 Feb 2026 14:26:42 -0500 Subject: [PATCH 13/13] add scripts for index recovery test --- scripts/add-replicas.sh | 154 ++++++++++++ scripts/cycle-replicas.sh | 150 +++++++++++ scripts/delete-replicas.sh | 111 +++++++++ scripts/ingest-docs-text-variants2.sh | 341 ++++++++++++++++++++++++++ 4 files changed, 756 insertions(+) create mode 100644 scripts/add-replicas.sh create mode 100644 scripts/cycle-replicas.sh create mode 100644 scripts/delete-replicas.sh create mode 100644 scripts/ingest-docs-text-variants2.sh diff --git a/scripts/add-replicas.sh b/scripts/add-replicas.sh new file mode 100644 index 000000000000..f2ee859fd18d --- /dev/null +++ b/scripts/add-replicas.sh @@ -0,0 +1,154 @@ +#!/bin/bash +# +# /* +# * Licensed to the Apache Software Foundation (ASF) under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The ASF licenses this file to You under the Apache License, Version 2.0 +# * (the "License"); you may not use this file except in compliance with +# * the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +# ============================================================================= +# Script to add replicas to a target node +# +# Usage: +# ./add-replicas.sh [SOLR_URL] [COLLECTION] [TARGET_NODE] [COUNT] [TYPE] +# +# Example: +# ./add-replicas.sh http://localhost:8983/solr test solr2:8983_solr 12 TLOG +# ./add-replicas.sh http://localhost:8983/solr test solr2:8983_solr 1 NRT +# ============================================================================= + +set -e + +SOLR_URL="${1:-http://localhost:8983/solr}" +COLLECTION="${2:-test}" +TARGET_NODE="${3:-solr2:8983_solr}" +NUM_SHARDS="${4:-12}" +TYPE="${5:-TLOG}" + +echo "Ensuring $NUM_SHARDS shards with 1 replica of type $TYPE on $TARGET_NODE for collection $COLLECTION" + +# Fetch cluster status +echo "Fetching cluster status from $SOLR_URL..." +cluster_status=$(curl -s "$SOLR_URL/admin/collections?action=CLUSTERSTATUS") + +# Validate JSON response +if ! echo "$cluster_status" | jq -e . >/dev/null 2>&1; then + echo "Error: Invalid JSON response from Solr." + echo "Response: $cluster_status" + exit 1 +fi + +# Check if collection exists +if echo "$cluster_status" | jq -e ".cluster.collections[\"$COLLECTION\"] == null" >/dev/null; then + echo "Collection '$COLLECTION' not found." + echo "Creating collection '$COLLECTION' with $NUM_SHARDS shards..." + + # Determine replica types for CREATE + # prioritizing TLOG if requested + CREATE_PARAMS="action=CREATE&name=$COLLECTION&numShards=$NUM_SHARDS" + + if [ "$TYPE" == "TLOG" ]; then + CREATE_PARAMS="${CREATE_PARAMS}&nrtReplicas=0&tlogReplicas=1" + elif [ "$TYPE" == "PULL" ]; then + CREATE_PARAMS="${CREATE_PARAMS}&nrtReplicas=0&pullReplicas=1" + else + CREATE_PARAMS="${CREATE_PARAMS}&replicationFactor=1" + fi + + # Create collection targeted at the node to ensure initial replicas are there + create_response=$(curl -s -w "\n%{http_code}" \ + "$SOLR_URL/admin/collections?${CREATE_PARAMS}&createNodeSet=$TARGET_NODE") + + create_http_code=$(echo "$create_response" | tail -n1) + + if [ "$create_http_code" != "200" ]; then + echo "Error creating collection: HTTP $create_http_code" + echo "$create_response" | head -n -1 + exit 1 + fi + + echo "Collection created successfully." + + # We are done since CREATE with createNodeSet puts them there + exit 0 +fi + +echo "Collection '$COLLECTION' exists. Checking shards..." + +# Refresh cluster status +cluster_status=$(curl -s "$SOLR_URL/admin/collections?action=CLUSTERSTATUS") + +# Iterate through expected shards 1..NUM_SHARDS +for ((i=1; i<=NUM_SHARDS; i++)); do + shard_name="shard${i}" + + # Check if shard exists + shard_exists=$(echo "$cluster_status" | jq -r ".cluster.collections[\"$COLLECTION\"].shards[\"$shard_name\"] // empty") + + if [ -z "$shard_exists" ]; then + echo " $shard_name does not exist. Creating..." + + # Create shard + response=$(curl -s -w "\n%{http_code}" \ + "$SOLR_URL/admin/collections?action=CREATESHARD&collection=$COLLECTION&shard=$shard_name&createNodeSet=$TARGET_NODE") + + # CREATESHARD doesn't take type params easily for the new replica, it usually uses collection defaults. + # But if we use createNodeSet it creates a replica there. + # However, checking if it created the right TYPE is hard atomically. + # Typically CREATESHARD adds replicas based on collection settings. + + http_code=$(echo "$response" | tail -n1) + if [ "$http_code" != "200" ]; then + echo " Error creating shard: HTTP $http_code" + echo "$response" | head -n -1 + exit 1 + fi + echo " $shard_name created." + + # We might need to ensure the type is correct if default isn't TLOG. + # But for now assuming collection settings or manual add later if needed. + # Ideally we'd check and delete/re-add if wrong type, but that's complex. + else + # Shard exists, check for replica on TARGET_NODE + # We look for a replica on this node + replicas_on_node=$(echo "$cluster_status" | jq -r ".cluster.collections[\"$COLLECTION\"].shards[\"$shard_name\"].replicas | to_entries[] | select(.value.node_name == \"$TARGET_NODE\") | .key") + + if [ -z "$replicas_on_node" ]; then + echo " $shard_name exists but has no replica on $TARGET_NODE. Adding $TYPE replica..." + + response=$(curl -s -w "\n%{http_code}" \ + "$SOLR_URL/admin/collections?action=ADDREPLICA&collection=$COLLECTION&shard=$shard_name&node=$TARGET_NODE&type=$TYPE") + + http_code=$(echo "$response" | tail -n1) + if [ "$http_code" != "200" ]; then + echo " Error adding replica: HTTP $http_code" + echo "$response" | head -n -1 + exit 1 + fi + echo " Replica added." + else + echo " $shard_name already has replica on $TARGET_NODE. Skipping." + fi + fi +done + +echo "" +echo "=========================================" +echo "Configuration complete!" +echo "Collection: $COLLECTION" +echo "Target node: $TARGET_NODE" +echo "Shards checked: $NUM_SHARDS" +echo "=========================================" + diff --git a/scripts/cycle-replicas.sh b/scripts/cycle-replicas.sh new file mode 100644 index 000000000000..f58ec12baafa --- /dev/null +++ b/scripts/cycle-replicas.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# +# /* +# * Licensed to the Apache Software Foundation (ASF) under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The ASF licenses this file to You under the Apache License, Version 2.0 +# * (the "License"); you may not use this file except in compliance with +# * the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +# ============================================================================= +# Script to remove all replicas from a node and then add them back +# +# Usage: +# ./cycle-replicas.sh [SOLR_URL] [COLLECTION] [TARGET_NODE] +# +# Example: +# ./cycle-replicas.sh http://localhost:8983/solr test solr2:8983_solr +# ============================================================================= + +set -e + +SOLR_URL="${1:-http://localhost:8983/solr}" +COLLECTION="${2:-test}" +TARGET_NODE="${3:-solr2:8983_solr}" + +echo "Cycling replicas on $TARGET_NODE for collection $COLLECTION" +echo "" + +# Get cluster status +cluster_status=$(curl -s "$SOLR_URL/admin/collections?action=CLUSTERSTATUS") + +# Find all replicas on the target node +# Format: shard_name:replica_name +replicas_on_node=$(echo "$cluster_status" | jq -r " + .cluster.collections[\"$COLLECTION\"].shards | to_entries[] | + .key as \$shard | + .value.replicas | to_entries[] | + select(.value.node_name == \"$TARGET_NODE\") | + \"\(\$shard):\(.key)\" +") + +if [ -z "$replicas_on_node" ]; then + echo "No replicas found on $TARGET_NODE for collection $COLLECTION" + exit 0 +fi + +# Get list of shards that have replicas on target node +shards_on_node=$(echo "$replicas_on_node" | cut -d: -f1 | sort -u) + +echo "Found replicas on $TARGET_NODE:" +echo "$replicas_on_node" +echo "" + +# ========================================= +# PHASE 1: Remove all replicas from node +# ========================================= +echo "=========================================" +echo "PHASE 1: Removing replicas from $TARGET_NODE" +echo "=========================================" + +# Get the directory of the current script +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Call the delete-replicas subscript +"$SCRIPT_DIR/delete-replicas.sh" "$SOLR_URL" "$COLLECTION" "$TARGET_NODE" 2 + +echo "" +echo "All replicas removed from $TARGET_NODE" +echo "" + +# ========================================= +# PHASE 2: Add replicas back to node +# ========================================= +echo "=========================================" +echo "PHASE 2: Adding replicas back to $TARGET_NODE (async)" +echo "=========================================" + +async_ids=() +timestamp=$(date +%s) + +for shard in $shards_on_node; do + echo "Adding TLOG replica for $shard on $TARGET_NODE..." + + async_id="${COLLECTION}_${shard}_add_${timestamp}" + + # Delete any existing async status with this ID (ignore errors) + curl -s "$SOLR_URL/admin/collections?action=DELETESTATUS&requestid=$async_id" > /dev/null 2>&1 || true + + response=$(curl -s -w "\n%{http_code}" \ + "$SOLR_URL/admin/collections?action=ADDREPLICA&collection=$COLLECTION&shard=$shard&node=$TARGET_NODE&type=TLOG&async=$async_id") + + http_code=$(echo "$response" | tail -n1) + body=$(echo "$response" | head -n -1) + + if [ "$http_code" != "200" ]; then + echo "Error: HTTP $http_code" + echo "$body" + exit 1 + fi + + async_ids+=("$async_id") + echo " Submitted (async id: $async_id)" +done + +echo "" +echo "Waiting for async operations to complete..." + +# Wait for all async operations to complete +for async_id in "${async_ids[@]}"; do + echo "Checking status of $async_id..." + + while true; do + status_response=$(curl -s "$SOLR_URL/admin/collections?action=REQUESTSTATUS&requestid=$async_id") + state=$(echo "$status_response" | jq -r '.status.state') + + if [ "$state" == "completed" ]; then + echo " $async_id: completed" + # Clean up the async request + curl -s "$SOLR_URL/admin/collections?action=DELETESTATUS&requestid=$async_id" > /dev/null + break + elif [ "$state" == "failed" ]; then + echo " $async_id: FAILED" + echo "$status_response" | jq '.status' + exit 1 + else + echo " $async_id: $state (waiting...)" + sleep 2 + fi + done +done + +echo "" +echo "=========================================" +echo "Replica cycling complete!" +echo "Collection: $COLLECTION" +echo "Node: $TARGET_NODE" +echo "Shards cycled: $(echo "$shards_on_node" | wc -w)" +echo "=========================================" + diff --git a/scripts/delete-replicas.sh b/scripts/delete-replicas.sh new file mode 100644 index 000000000000..965a002ca489 --- /dev/null +++ b/scripts/delete-replicas.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# +# /* +# * Licensed to the Apache Software Foundation (ASF) under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The ASF licenses this file to You under the Apache License, Version 2.0 +# * (the "License"); you may not use this file except in compliance with +# * the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +# ============================================================================= +# Script to remove all replicas from a node +# +# Usage: +# ./delete-replicas.sh [SOLR_URL] [COLLECTION] [TARGET_NODE] [TIMEOUT] +# +# Example: +# ./delete-replicas.sh http://localhost:8983/solr test solr2:8983_solr +# ./delete-replicas.sh http://localhost:8983/solr test solr2:8983_solr 10 +# ============================================================================= + +set -e + +SOLR_URL="${1:-http://localhost:8983/solr}" +COLLECTION="${2:-test}" +TARGET_NODE="${3:-solr2:8983_solr}" +TIMEOUT="${4:-10}" + +echo "Deleting replicas from $TARGET_NODE for collection $COLLECTION" +echo "Timeout: ${TIMEOUT}s" +echo "" + +# Get cluster status +cluster_status=$(curl -s "$SOLR_URL/admin/collections?action=CLUSTERSTATUS") + +# Find all replicas on the target node +# Format: shard_name:replica_name +replicas_on_node=$(echo "$cluster_status" | jq -r " + .cluster.collections[\"$COLLECTION\"].shards | to_entries[] | + .key as \$shard | + .value.replicas | to_entries[] | + select(.value.node_name == \"$TARGET_NODE\") | + \"\(\$shard):\(.key)\" +") + +if [ -z "$replicas_on_node" ]; then + echo "No replicas found on $TARGET_NODE for collection $COLLECTION" + exit 0 +fi + +echo "Found replicas on $TARGET_NODE:" +echo "$replicas_on_node" +echo "" + +echo "=========================================" +echo "Removing replicas from $TARGET_NODE" +echo "=========================================" + +deleted_count=0 +skipped_count=0 + +for replica_info in $replicas_on_node; do + shard=$(echo "$replica_info" | cut -d: -f1) + replica=$(echo "$replica_info" | cut -d: -f2) + + echo "Removing $replica from $shard..." + + response=$(curl -s -w "\n%{http_code}" --max-time "$TIMEOUT" \ + "$SOLR_URL/admin/collections?action=DELETEREPLICA&collection=$COLLECTION&shard=$shard&replica=$replica" 2>&1) || true + + http_code=$(echo "$response" | tail -n1) + body=$(echo "$response" | head -n -1) + + # Check for timeout or empty response + if [[ -z "$http_code" ]] || [[ ! "$http_code" =~ ^[0-9]+$ ]]; then + echo " Warning: Request timed out for $replica, continuing..." + skipped_count=$((skipped_count + 1)) + continue + fi + + if [ "$http_code" != "200" ]; then + echo " Warning: HTTP $http_code for $replica" + echo " $body" + echo " Continuing with next replica..." + skipped_count=$((skipped_count + 1)) + continue + fi + + echo " Done" + deleted_count=$((deleted_count + 1)) +done + +echo "" +echo "=========================================" +echo "Replica deletion complete!" +echo "Collection: $COLLECTION" +echo "Node: $TARGET_NODE" +echo "Deleted: $deleted_count" +echo "Skipped: $skipped_count" +echo "=========================================" + diff --git a/scripts/ingest-docs-text-variants2.sh b/scripts/ingest-docs-text-variants2.sh new file mode 100644 index 000000000000..6b44315f27da --- /dev/null +++ b/scripts/ingest-docs-text-variants2.sh @@ -0,0 +1,341 @@ +#!/bin/bash +# +# /* +# * Licensed to the Apache Software Foundation (ASF) under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The ASF licenses this file to You under the Apache License, Version 2.0 +# * (the "License"); you may not use this file except in compliance with +# * the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# + +# ============================================================================= +# Script to ingest documents with text field variants into a Solr collection +# +# Each document contains: +# - 5 text values shared across 3 field types: text1_s, text1_txt_en, text1_txt_sort (through text5_*) +# - 10 additional large filler _txt_en fields: filler1_txt_en through filler10_txt_en +# - Total: 25 text fields per document (15 shared + 10 filler) +# - Shared text sizes: uniform random between MIN-MAX (under 32KB DV limit) +# - Filler text sizes: uniform random between MIN_TXT_EN-MAX_TXT_EN (no DV limit, can be large) +# +# Usage: +# ./ingest-docs-text-variants.sh [SOLR_URL] [COLLECTION] [START_ID] [NUM_THREADS] \ +# [TOTAL_DOCS] [BATCH_SIZE] [MIN_TEXT_SIZE] [MAX_TEXT_SIZE] [MIN_TXT_EN_SIZE] [MAX_TXT_EN_SIZE] +# +# Parameters: +# SOLR_URL - Solr base URL (default: http://localhost:8983/solr) +# COLLECTION - Collection name (default: test) +# START_ID - Starting document ID (default: 0) +# NUM_THREADS - Number of parallel threads (default: 10) +# TOTAL_DOCS - Total documents to ingest (default: 10000000) +# BATCH_SIZE - Documents per batch (default: 100) +# MIN_TEXT_SIZE - Min text size for shared fields (default: 1024) +# MAX_TEXT_SIZE - Max text size for shared fields (default: 30720, under 32KB DV limit) +# MIN_TXT_EN_SIZE - Min text size for filler _txt_en fields (default: 1024) +# MAX_TXT_EN_SIZE - Max text size for filler _txt_en fields (default: 102400, ~100KB, no DV limit) +# ============================================================================= + +set -e + +# Ensure output is unbuffered and locale handles binary input safely +export BASH_XTRACEFD=2 +export LC_ALL=C + +SOLR_URL="${1:-http://localhost:8983/solr}" +COLLECTION="${2:-test}" +START_ID="${3:-0}" +NUM_THREADS="${4:-10}" +TOTAL_DOCS="${5:-10000000}" +# Reduced batch size due to larger document sizes (up to ~450KB per doc) +BATCH_SIZE="${6:-100}" + +# Text size range for _s and _txt_sort fields: 1KB to 30KB (under 32KB docValue limit) +MIN_TEXT_SIZE="${7:-1024}" +MAX_TEXT_SIZE="${8:-30720}" + +# Text size range for _txt_en fields: 1KB to 100KB (no docValue limit, stored only) +MIN_TXT_EN_SIZE="${9:-1024}" +MAX_TXT_EN_SIZE="${10:-102400}" + +# Pre-generated block size (max of both maximums) +FIELD_SIZE=$((MAX_TEXT_SIZE > MAX_TXT_EN_SIZE ? MAX_TEXT_SIZE : MAX_TXT_EN_SIZE)) + +END_ID=$((START_ID + TOTAL_DOCS)) +DOCS_PER_THREAD=$((TOTAL_DOCS / NUM_THREADS)) + +# Pre-calculate common values to avoid spawning 'date' per document +TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + +echo "Ingesting $TOTAL_DOCS documents into $SOLR_URL/$COLLECTION" +echo "Each document has 25 text fields:" +echo " - 15 shared fields (5 values × 3 types: _s, _txt_en, _txt_sort)" +echo " - 10 filler _txt_en fields (filler1_txt_en through filler10_txt_en)" +echo "Shared text sizes: ${MIN_TEXT_SIZE}-${MAX_TEXT_SIZE} bytes (under 32KB DV limit)" +echo "Filler text sizes: ${MIN_TXT_EN_SIZE}-${MAX_TXT_EN_SIZE} bytes (no DV limit)" +echo "Parallel threads: $NUM_THREADS" + +generate_text() { + local size=$1 + # Use redirection and /dev/urandom safely + tr -dc 'a-zA-Z0-9 ' < /dev/urandom | head -c "$size" +} +export -f generate_text + +# Generate random size between MIN and MAX (uniform distribution) +random_text_size() { + echo $((MIN_TEXT_SIZE + RANDOM % (MAX_TEXT_SIZE - MIN_TEXT_SIZE + 1))) +} +export -f random_text_size + +echo "Pre-generating random text blocks (30KB each for maximum coverage)..." +TEXT_BLOCK_1=$(generate_text $FIELD_SIZE) +echo " Generated block 1" +TEXT_BLOCK_2=$(generate_text $FIELD_SIZE) +echo " Generated block 2" +TEXT_BLOCK_3=$(generate_text $FIELD_SIZE) +echo " Generated block 3" +TEXT_BLOCK_4=$(generate_text $FIELD_SIZE) +echo " Generated block 4" +TEXT_BLOCK_5=$(generate_text $FIELD_SIZE) +echo " Generated block 5" +echo "Text blocks ready." + +# Function to create a single document JSON +# Generates 5 text values shared across all 3 field types (_s, _txt_en, _txt_sort) +# Plus 10 additional large filler _txt_en fields (no DV limit) +# Plus non-textual fields from original script +create_document() { + local id=$1 + + # Non-textual fields (from original script) + local r_int=$((RANDOM % 5)) + local r_dec=$((RANDOM % 100)) + local views=$((RANDOM * RANDOM % 1000000)) + + # Generate 5 text values shared across _s, _txt_en, and _txt_sort fields (under 32KB DV limit) + local size1=$((MIN_TEXT_SIZE + RANDOM % (MAX_TEXT_SIZE - MIN_TEXT_SIZE))) + local start1=$((RANDOM % (FIELD_SIZE - size1 + 1))) + local text1="${TEXT_BLOCK_1:$start1:$size1}" + + local size2=$((MIN_TEXT_SIZE + RANDOM % (MAX_TEXT_SIZE - MIN_TEXT_SIZE))) + local start2=$((RANDOM % (FIELD_SIZE - size2 + 1))) + local text2="${TEXT_BLOCK_2:$start2:$size2}" + + local size3=$((MIN_TEXT_SIZE + RANDOM % (MAX_TEXT_SIZE - MIN_TEXT_SIZE))) + local start3=$((RANDOM % (FIELD_SIZE - size3 + 1))) + local text3="${TEXT_BLOCK_3:$start3:$size3}" + + local size4=$((MIN_TEXT_SIZE + RANDOM % (MAX_TEXT_SIZE - MIN_TEXT_SIZE))) + local start4=$((RANDOM % (FIELD_SIZE - size4 + 1))) + local text4="${TEXT_BLOCK_4:$start4:$size4}" + + local size5=$((MIN_TEXT_SIZE + RANDOM % (MAX_TEXT_SIZE - MIN_TEXT_SIZE))) + local start5=$((RANDOM % (FIELD_SIZE - size5 + 1))) + local text5="${TEXT_BLOCK_5:$start5:$size5}" + + # Generate 10 large filler _txt_en fields (no docValues, can be much larger) + local filler_size1=$((MIN_TXT_EN_SIZE + RANDOM % (MAX_TXT_EN_SIZE - MIN_TXT_EN_SIZE))) + local filler_start1=$((RANDOM % (FIELD_SIZE - filler_size1 + 1))) + local filler1="${TEXT_BLOCK_1:$filler_start1:$filler_size1}" + + local filler_size2=$((MIN_TXT_EN_SIZE + RANDOM % (MAX_TXT_EN_SIZE - MIN_TXT_EN_SIZE))) + local filler_start2=$((RANDOM % (FIELD_SIZE - filler_size2 + 1))) + local filler2="${TEXT_BLOCK_2:$filler_start2:$filler_size2}" + + local filler_size3=$((MIN_TXT_EN_SIZE + RANDOM % (MAX_TXT_EN_SIZE - MIN_TXT_EN_SIZE))) + local filler_start3=$((RANDOM % (FIELD_SIZE - filler_size3 + 1))) + local filler3="${TEXT_BLOCK_3:$filler_start3:$filler_size3}" + + local filler_size4=$((MIN_TXT_EN_SIZE + RANDOM % (MAX_TXT_EN_SIZE - MIN_TXT_EN_SIZE))) + local filler_start4=$((RANDOM % (FIELD_SIZE - filler_size4 + 1))) + local filler4="${TEXT_BLOCK_4:$filler_start4:$filler_size4}" + + local filler_size5=$((MIN_TXT_EN_SIZE + RANDOM % (MAX_TXT_EN_SIZE - MIN_TXT_EN_SIZE))) + local filler_start5=$((RANDOM % (FIELD_SIZE - filler_size5 + 1))) + local filler5="${TEXT_BLOCK_5:$filler_start5:$filler_size5}" + + local filler_size6=$((MIN_TXT_EN_SIZE + RANDOM % (MAX_TXT_EN_SIZE - MIN_TXT_EN_SIZE))) + local filler_start6=$((RANDOM % (FIELD_SIZE - filler_size6 + 1))) + local filler6="${TEXT_BLOCK_1:$filler_start6:$filler_size6}" + + local filler_size7=$((MIN_TXT_EN_SIZE + RANDOM % (MAX_TXT_EN_SIZE - MIN_TXT_EN_SIZE))) + local filler_start7=$((RANDOM % (FIELD_SIZE - filler_size7 + 1))) + local filler7="${TEXT_BLOCK_2:$filler_start7:$filler_size7}" + + local filler_size8=$((MIN_TXT_EN_SIZE + RANDOM % (MAX_TXT_EN_SIZE - MIN_TXT_EN_SIZE))) + local filler_start8=$((RANDOM % (FIELD_SIZE - filler_size8 + 1))) + local filler8="${TEXT_BLOCK_3:$filler_start8:$filler_size8}" + + local filler_size9=$((MIN_TXT_EN_SIZE + RANDOM % (MAX_TXT_EN_SIZE - MIN_TXT_EN_SIZE))) + local filler_start9=$((RANDOM % (FIELD_SIZE - filler_size9 + 1))) + local filler9="${TEXT_BLOCK_4:$filler_start9:$filler_size9}" + + local filler_size10=$((MIN_TXT_EN_SIZE + RANDOM % (MAX_TXT_EN_SIZE - MIN_TXT_EN_SIZE))) + local filler_start10=$((RANDOM % (FIELD_SIZE - filler_size10 + 1))) + local filler10="${TEXT_BLOCK_5:$filler_start10:$filler_size10}" + + # Output document JSON + # All 3 field types (_s, _txt_en, _txt_sort) share the same text values + # Plus 10 large filler _txt_en fields + echo "{ + \"id\": \"doc_$id\", + \"text1_s\": \"$text1\", + \"text1_txt_en\": \"$text1\", + \"text1_txt_sort\": \"$text1\", + \"text2_s\": \"$text2\", + \"text2_txt_en\": \"$text2\", + \"text2_txt_sort\": \"$text2\", + \"text3_s\": \"$text3\", + \"text3_txt_en\": \"$text3\", + \"text3_txt_sort\": \"$text3\", + \"text4_s\": \"$text4\", + \"text4_txt_en\": \"$text4\", + \"text4_txt_sort\": \"$text4\", + \"text5_s\": \"$text5\", + \"text5_txt_en\": \"$text5\", + \"text5_txt_sort\": \"$text5\", + \"filler1_txt_en\": \"$filler1\", + \"filler2_txt_en\": \"$filler2\", + \"filler3_txt_en\": \"$filler3\", + \"filler4_txt_en\": \"$filler4\", + \"filler5_txt_en\": \"$filler5\", + \"filler6_txt_en\": \"$filler6\", + \"filler7_txt_en\": \"$filler7\", + \"filler8_txt_en\": \"$filler8\", + \"filler9_txt_en\": \"$filler9\", + \"filler10_txt_en\": \"$filler10\", + \"category_s\": \"category_$((id % 100))\", + \"author_s\": \"author_$((id % 500))\", + \"tags_ss\": [\"tag_$((id % 50))\", \"tag_$((id % 30))\", \"tag_$((id % 20))\"], + \"created_dt\": \"$TIMESTAMP\", + \"views_i\": $views, + \"rating_f\": $r_int.$r_dec +}" +} + +export -f create_document + +ingest_range() { + local thread_id=$1 + local range_start=$2 + local range_end=$3 + local thread_docs=$((range_end - range_start)) + local current_id=$range_start + local batch_num=0 + local docs_ingested=0 + local thread_start_time=$(date +%s) + + echo "[Thread $thread_id] Starting: $range_start to $((range_end - 1))" + + while [ $current_id -lt $range_end ]; do + # Generate fresh random blocks for this batch to ensure higher entropy + local TEXT_BLOCK_1=$(generate_text $FIELD_SIZE) + local TEXT_BLOCK_2=$(generate_text $FIELD_SIZE) + local TEXT_BLOCK_3=$(generate_text $FIELD_SIZE) + local TEXT_BLOCK_4=$(generate_text $FIELD_SIZE) + local TEXT_BLOCK_5=$(generate_text $FIELD_SIZE) + + batch_num=$((batch_num + 1)) + batch_start=$current_id + batch_end=$((current_id + BATCH_SIZE)) + + if [ $batch_end -gt $range_end ]; then + batch_end=$range_end + fi + + # Write batch JSON directly to temp file + temp_file=$(mktemp) + printf '[' > "$temp_file" + + # Loop optimization: minimize checks inside loop + create_document $batch_start >> "$temp_file" + for ((i=batch_start+1; i> "$temp_file" + create_document $i >> "$temp_file" + done + + printf ']' >> "$temp_file" + + # Send batch to Solr (added timeouts, increased max-time for larger payloads) + response=$(curl -s -S --connect-timeout 10 --max-time 600 -w "\n%{http_code}" -X POST \ + "$SOLR_URL/$COLLECTION/update?commit=false" \ + -H "Content-Type: application/json" \ + -d @"$temp_file") + + rm -f "$temp_file" + + http_code=$(echo "$response" | tail -n1) + + if [ "$http_code" != "200" ]; then + echo "[Thread $thread_id] Error: HTTP $http_code on batch $batch_num" + # Print error response safely + echo "$response" | head -n -1 + fi + + current_id=$batch_end + docs_ingested=$((current_id - range_start)) + + # Progress update every 10 batches (reduced from 20 due to larger docs) + if [ $((batch_num % 10)) -eq 0 ]; then + elapsed=$(($(date +%s) - thread_start_time)) + if [ $elapsed -gt 0 ]; then + rate=$((docs_ingested / elapsed)) + pct=$((docs_ingested * 100 / thread_docs)) + echo "[Thread $thread_id] Progress: $docs_ingested / $thread_docs ($pct%) - $rate docs/sec" + fi + fi + done + + echo "[Thread $thread_id] COMPLETED." +} + +export -f ingest_range +export SOLR_URL COLLECTION BATCH_SIZE TIMESTAMP TEXT_BLOCK_1 TEXT_BLOCK_2 TEXT_BLOCK_3 TEXT_BLOCK_4 TEXT_BLOCK_5 +export MIN_TEXT_SIZE MAX_TEXT_SIZE MIN_TXT_EN_SIZE MAX_TXT_EN_SIZE FIELD_SIZE + +start_time=$(date +%s) +pids=() + +echo "Starting $NUM_THREADS parallel threads..." + +for ((t=0; t&1 & + pids+=($!) +done + +echo "Waiting for threads..." +failed=0 +for pid in "${pids[@]}"; do + if ! wait $pid; then + failed=1 + fi +done + +if [ $failed -eq 1 ]; then + echo "One or more threads failed!" + exit 1 +fi + +echo "Committing..." +curl -s --connect-timeout 10 -X POST "$SOLR_URL/$COLLECTION/update?commit=true" -H "Content-Type: application/json" -d '{}' > /dev/null + +total_time=$(($(date +%s) - start_time)) +echo "Ingestion complete in ${total_time}s" +