From 2b39d26cba97e4b671713dda91fa32932ac4dd73 Mon Sep 17 00:00:00 2001 From: Pierre Salagnac Date: Mon, 5 Feb 2024 20:42:28 +0100 Subject: [PATCH 1/3] SOLR-17149: Fix backup/restore for large collections. --- .../src/java/org/apache/solr/core/CoreContainer.java | 3 ++- .../org/apache/solr/handler/admin/CoreAdminHandler.java | 4 +++- .../java/org/apache/solr/common/util/ExecutorUtil.java | 9 +++++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index 1da0802cf08..8af5ecc272e 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -412,7 +412,8 @@ public CoreContainer(NodeConfig config, CoresLocator locator, boolean asyncSolrC new OrderedExecutor( cfg.getReplayUpdatesThreads(), ExecutorUtil.newMDCAwareCachedThreadPool( - cfg.getReplayUpdatesThreads(), + cfg.getReplayUpdatesThreads(), // thread count + cfg.getReplayUpdatesThreads(), // queue size new SolrNamedThreadFactory("replayUpdatesExecutor"))); this.appHandlersByConfigSetId = new JerseyAppHandlerCache(); diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java index ca67a355b44..d2d823eb190 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java @@ -443,7 +443,9 @@ public static class CoreAdminAsyncTracker { // We keep the number number of max threads very low to have throttling for expensive tasks private ExecutorService expensiveExecutor = ExecutorUtil.newMDCAwareCachedThreadPool( - 5, new SolrNamedThreadFactory("parallelCoreAdminAPIExpensiveExecutor")); + 5, + Integer.MAX_VALUE, + new SolrNamedThreadFactory("parallelCoreAdminAPIExpensiveExecutor")); public CoreAdminAsyncTracker() { HashMap> map = new HashMap<>(3, 1.0f); diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java index f0d1f8bc698..c97658efebd 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java @@ -181,9 +181,14 @@ public static ExecutorService newMDCAwareCachedThreadPool(ThreadFactory threadFa } public static ExecutorService newMDCAwareCachedThreadPool( - int maxThreads, ThreadFactory threadFactory) { + int maxThreads, int queueCapacity, ThreadFactory threadFactory) { return new MDCAwareThreadPoolExecutor( - 0, maxThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(maxThreads), threadFactory); + 0, + maxThreads, + 60L, + TimeUnit.SECONDS, + new LinkedBlockingQueue<>(queueCapacity), + threadFactory); } @SuppressForbidden(reason = "class customizes ThreadPoolExecutor so it can be used instead") From 7dc05c8f6f070acb9385b30720abd0f290d79b09 Mon Sep 17 00:00:00 2001 From: Jason Gerlowski Date: Tue, 6 Feb 2024 09:13:07 -0500 Subject: [PATCH 2/3] Modify tests to cover large-collection case --- .../api/collections/LocalFSCloudIncrementalBackupTest.java | 2 +- .../test/org/apache/solr/gcs/GCSIncrementalBackupTest.java | 2 +- .../api/collections/HdfsCloudIncrementalBackupTest.java | 2 +- .../test/org/apache/solr/s3/S3IncrementalBackupTest.java | 2 +- .../api/collections/AbstractIncrementalBackupTest.java | 7 +++++-- 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java index 869c90fd67d..06a664aef5a 100644 --- a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java @@ -79,7 +79,7 @@ public static void setupClass() throws Exception { backupLocation = createTempDir("mybackup").toAbsolutePath().toString(); } - configureCluster(NUM_SHARDS) // nodes + configureCluster(NUM_NODES) // nodes .addConfig( "conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) .withSolrXml(SOLR_XML.replace("ALLOWPATHS_TEMPLATE_VAL", backupLocation)) diff --git a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java index 83b93b2ce58..4b145c601b4 100644 --- a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java +++ b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java @@ -72,7 +72,7 @@ public class GCSIncrementalBackupTest extends AbstractIncrementalBackupTest { @BeforeClass public static void setupClass() throws Exception { - configureCluster(NUM_SHARDS) // nodes + configureCluster(NUM_NODES) // nodes .addConfig("conf1", getFile("conf/solrconfig.xml").getParentFile().toPath()) .withSolrXml(SOLR_XML) .configure(); diff --git a/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java b/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java index aed608ef37b..508e3c42ee4 100644 --- a/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java +++ b/solr/modules/hdfs/src/test/org/apache/solr/hdfs/cloud/api/collections/HdfsCloudIncrementalBackupTest.java @@ -121,7 +121,7 @@ public static void setupClass() throws Exception { System.setProperty("solr.hdfs.home", hdfsUri + "/solr"); useFactory("solr.StandardDirectoryFactory"); - configureCluster(NUM_SHARDS) // nodes + configureCluster(NUM_NODES) // nodes .addConfig( "conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) .withSolrXml(SOLR_XML) diff --git a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java index 7214fe6418b..d10322253cd 100644 --- a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java +++ b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java @@ -94,7 +94,7 @@ public static void setupClass() throws Exception { AbstractS3ClientTest.setS3ConfFile(); - configureCluster(NUM_SHARDS) // nodes + configureCluster(NUM_NODES) // nodes .addConfig("conf1", getFile("conf/solrconfig.xml").getParentFile().toPath()) .withSolrXml( SOLR_XML diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java index 03acadab7ff..79a5f59d12f 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java @@ -88,7 +88,9 @@ public abstract class AbstractIncrementalBackupTest extends SolrCloudTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static long docsSeed; // see indexDocs() + protected static final int NUM_NODES = 2; protected static final int NUM_SHARDS = 2; // granted we sometimes shard split to get more + protected static final int LARGE_NUM_SHARDS = 11; // Periodically chosen via randomization protected static final int REPL_FACTOR = 2; protected static final String BACKUPNAME_PREFIX = "mytestbackup"; protected static final String BACKUP_REPO_NAME = "trackingBackupRepository"; @@ -129,10 +131,11 @@ public void testSimple() throws Exception { setTestSuffix("testbackupincsimple"); final String backupCollectionName = getCollectionName(); final String restoreCollectionName = backupCollectionName + "_restore"; + final int randomizedNumShards = rarely() ? LARGE_NUM_SHARDS : NUM_SHARDS; CloudSolrClient solrClient = cluster.getSolrClient(); - CollectionAdminRequest.createCollection(backupCollectionName, "conf1", NUM_SHARDS, 1) + CollectionAdminRequest.createCollection(backupCollectionName, "conf1", randomizedNumShards, 1) .process(solrClient); int totalIndexedDocs = indexDocs(backupCollectionName, true); String backupName = BACKUPNAME_PREFIX + testSuffix; @@ -166,7 +169,7 @@ public void testSimple() throws Exception { log.info("Created backup with {} docs, took {}ms", numFound, timeTaken); t = System.nanoTime(); - randomlyPrecreateRestoreCollection(restoreCollectionName, "conf1", NUM_SHARDS, 1); + randomlyPrecreateRestoreCollection(restoreCollectionName, "conf1", randomizedNumShards, 1); CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName) .setBackupId(0) .setLocation(backupLocation) From d1a06a2eadd91dbf970f9536ad2589342cf09f7f Mon Sep 17 00:00:00 2001 From: Jason Gerlowski Date: Tue, 6 Feb 2024 10:00:51 -0500 Subject: [PATCH 3/3] Add CHANGES.txt entry --- solr/CHANGES.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index db542ef7db8..536291b0928 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -220,6 +220,8 @@ Bug Fixes * SOLR-17112: bin/solr script doesn't do ps properly on some systems. (Vincenzo D'Amore via Eric Pugh) +* SOLR-17149: Backups on collections with too many shards fail due to restrictive Executor queue size (Pierre Salagnac via Jason Gerlowski) + Dependency Upgrades --------------------- * SOLR-17012: Update Apache Hadoop to 3.3.6 and Apache Curator to 5.5.0 (Kevin Risden)