apache · kotman12 · Dec 30, 2025 · Jan 12, 2026 · Jan 12, 2026 · Jan 17, 2026
diff --git a/index-recovery-tests.md b/index-recovery-tests.md
@@ -0,0 +1,15 @@
+# Harder-to-Reproduce Index Recovery Performance Results
+
+I tested recovery of 1 and 12 shards of ~20 Gigs each. The size makes it a bit challenging to package nicely in a reproducible benchmark, although I am sure it can be done.
+I am confident you can reproduce this behavior with a comparable amount of data and cloud structure. I can share the scripts I used to achieve these results if it is helpful.
+
+## Results Summary
+
+| Scenario | Shards | Configuration | Result | Time |
+|----------|--------|---------------|--------|------|
+| HTTP/2 | 1 | default | Fast | ~40s |
+| HTTP/1 | 1 | default | Fast | ~50s |
+| HTTP/1 | 12 | default | Fast | ~90s |
+| HTTP/2 | 12 | default | Slowest | ~320s |
+| HTTP/2 | 12 | `maxConcurrentStreams=1`| Slower | ~180s |
+
diff --git a/scripts/add-replicas.sh b/scripts/add-replicas.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+#
+# /*
+#  * Licensed to the Apache Software Foundation (ASF) under one or more
+#  * contributor license agreements.  See the NOTICE file distributed with
+#  * this work for additional information regarding copyright ownership.
+#  * The ASF licenses this file to You under the Apache License, Version 2.0
+#  * (the "License"); you may not use this file except in compliance with
+#  * the License.  You may obtain a copy of the License at
+#  *
+#  *     http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+
+# =============================================================================
+# Script to add replicas to a target node
+#
+# Usage:
+#   ./add-replicas.sh [SOLR_URL] [COLLECTION] [TARGET_NODE] [COUNT] [TYPE]
+#
+# Example:
+#   ./add-replicas.sh http://localhost:8983/solr test solr2:8983_solr 12 TLOG
+#   ./add-replicas.sh http://localhost:8983/solr test solr2:8983_solr 1 NRT
+# =============================================================================
+
+set -e
+
+SOLR_URL="${1:-http://localhost:8983/solr}"
+COLLECTION="${2:-test}"
+TARGET_NODE="${3:-solr2:8983_solr}"
+NUM_SHARDS="${4:-12}"
+TYPE="${5:-TLOG}"
+
+echo "Ensuring $NUM_SHARDS shards with 1 replica of type $TYPE on $TARGET_NODE for collection $COLLECTION"
+
+# Fetch cluster status
+echo "Fetching cluster status from $SOLR_URL..."
+cluster_status=$(curl -s "$SOLR_URL/admin/collections?action=CLUSTERSTATUS")
+
+# Validate JSON response
+if ! echo "$cluster_status" | jq -e . >/dev/null 2>&1; then
+    echo "Error: Invalid JSON response from Solr."
+    echo "Response: $cluster_status"
+    exit 1
+fi
+
+# Check if collection exists
+if echo "$cluster_status" | jq -e ".cluster.collections[\"$COLLECTION\"] == null" >/dev/null; then
+    echo "Collection '$COLLECTION' not found."
+    echo "Creating collection '$COLLECTION' with $NUM_SHARDS shards..."
+
+    # Determine replica types for CREATE
+    # prioritizing TLOG if requested
+    CREATE_PARAMS="action=CREATE&name=$COLLECTION&numShards=$NUM_SHARDS"
+
+    if [ "$TYPE" == "TLOG" ]; then
+         CREATE_PARAMS="${CREATE_PARAMS}&nrtReplicas=0&tlogReplicas=1"
+    elif [ "$TYPE" == "PULL" ]; then
+         CREATE_PARAMS="${CREATE_PARAMS}&nrtReplicas=0&pullReplicas=1"
+    else
+         CREATE_PARAMS="${CREATE_PARAMS}&replicationFactor=1"
+    fi
+
+    # Create collection targeted at the node to ensure initial replicas are there
+    create_response=$(curl -s -w "\n%{http_code}" \
+        "$SOLR_URL/admin/collections?${CREATE_PARAMS}&createNodeSet=$TARGET_NODE")
+
+    create_http_code=$(echo "$create_response" | tail -n1)
+
+    if [ "$create_http_code" != "200" ]; then
+        echo "Error creating collection: HTTP $create_http_code"
+        echo "$create_response" | head -n -1
+        exit 1
+    fi
+
+    echo "Collection created successfully."
+
+    # We are done since CREATE with createNodeSet puts them there
+    exit 0
+fi
+
+echo "Collection '$COLLECTION' exists. Checking shards..."
+
+# Refresh cluster status
+cluster_status=$(curl -s "$SOLR_URL/admin/collections?action=CLUSTERSTATUS")
+
+# Iterate through expected shards 1..NUM_SHARDS
+for ((i=1; i<=NUM_SHARDS; i++)); do
+    shard_name="shard${i}"
+
+    # Check if shard exists
+    shard_exists=$(echo "$cluster_status" | jq -r ".cluster.collections[\"$COLLECTION\"].shards[\"$shard_name\"] // empty")
+
+    if [ -z "$shard_exists" ]; then
+        echo "  $shard_name does not exist. Creating..."
+
+        # Create shard
+        response=$(curl -s -w "\n%{http_code}" \
+            "$SOLR_URL/admin/collections?action=CREATESHARD&collection=$COLLECTION&shard=$shard_name&createNodeSet=$TARGET_NODE")
+
+        # CREATESHARD doesn't take type params easily for the new replica, it usually uses collection defaults.
+        # But if we use createNodeSet it creates a replica there.
+        # However, checking if it created the right TYPE is hard atomically.
+        # Typically CREATESHARD adds replicas based on collection settings.
+
+        http_code=$(echo "$response" | tail -n1)
+        if [ "$http_code" != "200" ]; then
+            echo "  Error creating shard: HTTP $http_code"
+            echo "$response" | head -n -1
+            exit 1
+        fi
+        echo "  $shard_name created."
+
+        # We might need to ensure the type is correct if default isn't TLOG.
+        # But for now assuming collection settings or manual add later if needed.
+        # Ideally we'd check and delete/re-add if wrong type, but that's complex.
+    else
+        # Shard exists, check for replica on TARGET_NODE
+        # We look for a replica on this node
+        replicas_on_node=$(echo "$cluster_status" | jq -r ".cluster.collections[\"$COLLECTION\"].shards[\"$shard_name\"].replicas | to_entries[] | select(.value.node_name == \"$TARGET_NODE\") | .key")
+
+        if [ -z "$replicas_on_node" ]; then
+             echo "  $shard_name exists but has no replica on $TARGET_NODE. Adding $TYPE replica..."
+
+             response=$(curl -s -w "\n%{http_code}" \
+                "$SOLR_URL/admin/collections?action=ADDREPLICA&collection=$COLLECTION&shard=$shard_name&node=$TARGET_NODE&type=$TYPE")
+
+            http_code=$(echo "$response" | tail -n1)
+            if [ "$http_code" != "200" ]; then
+                echo "  Error adding replica: HTTP $http_code"
+                echo "$response" | head -n -1
+                exit 1
+            fi
+            echo "  Replica added."
+        else
+             echo "  $shard_name already has replica on $TARGET_NODE. Skipping."
+        fi
+    fi
+done
+
+echo ""
+echo "========================================="
+echo "Configuration complete!"
+echo "Collection: $COLLECTION"
+echo "Target node: $TARGET_NODE"
+echo "Shards checked: $NUM_SHARDS"
+echo "========================================="
+
diff --git a/scripts/cycle-replicas.sh b/scripts/cycle-replicas.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+#
+# /*
+#  * Licensed to the Apache Software Foundation (ASF) under one or more
+#  * contributor license agreements.  See the NOTICE file distributed with
+#  * this work for additional information regarding copyright ownership.
+#  * The ASF licenses this file to You under the Apache License, Version 2.0
+#  * (the "License"); you may not use this file except in compliance with
+#  * the License.  You may obtain a copy of the License at
+#  *
+#  *     http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+
+# =============================================================================
+# Script to remove all replicas from a node and then add them back
+#
+# Usage:
+#   ./cycle-replicas.sh [SOLR_URL] [COLLECTION] [TARGET_NODE]
+#
+# Example:
+#   ./cycle-replicas.sh http://localhost:8983/solr test solr2:8983_solr
+# =============================================================================
+
+set -e
+
+SOLR_URL="${1:-http://localhost:8983/solr}"
+COLLECTION="${2:-test}"
+TARGET_NODE="${3:-solr2:8983_solr}"
+
+echo "Cycling replicas on $TARGET_NODE for collection $COLLECTION"
+echo ""
+
+# Get cluster status
+cluster_status=$(curl -s "$SOLR_URL/admin/collections?action=CLUSTERSTATUS")
+
+# Find all replicas on the target node
+# Format: shard_name:replica_name
+replicas_on_node=$(echo "$cluster_status" | jq -r "
+    .cluster.collections[\"$COLLECTION\"].shards | to_entries[] |
+    .key as \$shard |
+    .value.replicas | to_entries[] |
+    select(.value.node_name == \"$TARGET_NODE\") |
+    \"\(\$shard):\(.key)\"
+")
+
+if [ -z "$replicas_on_node" ]; then
+    echo "No replicas found on $TARGET_NODE for collection $COLLECTION"
+    exit 0
+fi
+
+# Get list of shards that have replicas on target node
+shards_on_node=$(echo "$replicas_on_node" | cut -d: -f1 | sort -u)
+
+echo "Found replicas on $TARGET_NODE:"
+echo "$replicas_on_node"
+echo ""
+
+# =========================================
+# PHASE 1: Remove all replicas from node
+# =========================================
+echo "========================================="
+echo "PHASE 1: Removing replicas from $TARGET_NODE"
+echo "========================================="
+
+# Get the directory of the current script
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Call the delete-replicas subscript
+"$SCRIPT_DIR/delete-replicas.sh" "$SOLR_URL" "$COLLECTION" "$TARGET_NODE" 2
+
+echo ""
+echo "All replicas removed from $TARGET_NODE"
+echo ""
+
+# =========================================
+# PHASE 2: Add replicas back to node
+# =========================================
+echo "========================================="
+echo "PHASE 2: Adding replicas back to $TARGET_NODE (async)"
+echo "========================================="
+
+async_ids=()
+timestamp=$(date +%s)
+
+for shard in $shards_on_node; do
+    echo "Adding TLOG replica for $shard on $TARGET_NODE..."
+
+    async_id="${COLLECTION}_${shard}_add_${timestamp}"
+
+    # Delete any existing async status with this ID (ignore errors)
+    curl -s "$SOLR_URL/admin/collections?action=DELETESTATUS&requestid=$async_id" > /dev/null 2>&1 || true
+
+    response=$(curl -s -w "\n%{http_code}" \
+        "$SOLR_URL/admin/collections?action=ADDREPLICA&collection=$COLLECTION&shard=$shard&node=$TARGET_NODE&type=TLOG&async=$async_id")
+
+    http_code=$(echo "$response" | tail -n1)
+    body=$(echo "$response" | head -n -1)
+
+    if [ "$http_code" != "200" ]; then
+        echo "Error: HTTP $http_code"
+        echo "$body"
+        exit 1
+    fi
+
+    async_ids+=("$async_id")
+    echo "  Submitted (async id: $async_id)"
+done
+
+echo ""
+echo "Waiting for async operations to complete..."
+
+# Wait for all async operations to complete
+for async_id in "${async_ids[@]}"; do
+    echo "Checking status of $async_id..."
+
+    while true; do
+        status_response=$(curl -s "$SOLR_URL/admin/collections?action=REQUESTSTATUS&requestid=$async_id")
+        state=$(echo "$status_response" | jq -r '.status.state')
+
+        if [ "$state" == "completed" ]; then
+            echo "  $async_id: completed"
+            # Clean up the async request
+            curl -s "$SOLR_URL/admin/collections?action=DELETESTATUS&requestid=$async_id" > /dev/null
+            break
+        elif [ "$state" == "failed" ]; then
+            echo "  $async_id: FAILED"
+            echo "$status_response" | jq '.status'
+            exit 1
+        else
+            echo "  $async_id: $state (waiting...)"
+            sleep 2
+        fi
+    done
+done
+
+echo ""
+echo "========================================="
+echo "Replica cycling complete!"
+echo "Collection: $COLLECTION"
+echo "Node: $TARGET_NODE"
+echo "Shards cycled: $(echo "$shards_on_node" | wc -w)"
+echo "========================================="
+