From c2bde1f1cc1ec8f5d8731d5245c3f0f1a909b4b9 Mon Sep 17 00:00:00 2001 From: Matthew Biscocho Date: Fri, 3 Jan 2025 11:34:12 -0500 Subject: [PATCH 1/5] Add Known nodes to BaseHttpClusterStateProvider --- .../impl/BaseHttpClusterStateProvider.java | 54 +++++--- .../solrj/impl/ClusterStateProviderTest.java | 127 +++++++++++++++++- .../solr/cloud/MiniSolrCloudCluster.java | 4 +- 3 files changed, 162 insertions(+), 23 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java index 40183109178..1f2a441c8a3 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java @@ -23,6 +23,7 @@ import java.lang.invoke.MethodHandles; import java.time.Instant; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -51,7 +52,9 @@ public abstract class BaseHttpClusterStateProvider implements ClusterStateProvid private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private String urlScheme; + private Set initialNodes; volatile Set liveNodes; + volatile Set knownNodes; // Live nodes + initial nodes long liveNodesTimestamp = 0; volatile Map> aliases; volatile Map> aliasProperties; @@ -65,6 +68,8 @@ public void init(List solrUrls) throws Exception { urlScheme = solrUrl.startsWith("https") ? "https" : "http"; try (SolrClient initialClient = getSolrClient(solrUrl)) { this.liveNodes = fetchLiveNodes(initialClient); + this.initialNodes = Set.copyOf(liveNodes); + updateKnownNodes(); liveNodesTimestamp = System.nanoTime(); break; } catch (SolrServerException | IOException e) { @@ -96,7 +101,7 @@ public DocCollection getCollection(String collection) { @Override public ClusterState.CollectionRef getState(String collection) { - for (String nodeName : liveNodes) { + for (String nodeName : knownNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { DocCollection docCollection = fetchCollectionState(client, collection); @@ -120,7 +125,7 @@ public ClusterState.CollectionRef getState(String collection) { } throw new RuntimeException( "Tried fetching cluster state using the node names we knew of, i.e. " - + liveNodes + + knownNodes + ". However, " + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," @@ -137,6 +142,7 @@ private ClusterState fetchClusterState(SolrClient client) List liveNodesList = (List) cluster.get("live_nodes"); if (liveNodesList != null) { this.liveNodes = Set.copyOf(liveNodesList); + updateKnownNodes(); liveNodesTimestamp = System.nanoTime(); } @@ -216,7 +222,7 @@ private SimpleOrderedMap submitClusterStateRequest( @Override public Set getLiveNodes() { - if (liveNodes == null) { + if (knownNodes == null) { throw new RuntimeException( "We don't know of any live_nodes to fetch the" + " latest live_nodes information from. " @@ -226,27 +232,27 @@ public Set getLiveNodes() { } if (TimeUnit.SECONDS.convert((System.nanoTime() - liveNodesTimestamp), TimeUnit.NANOSECONDS) > getCacheTimeout()) { - for (String nodeName : liveNodes) { + for (String nodeName : knownNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { - Set liveNodes = fetchLiveNodes(client); - this.liveNodes = (liveNodes); + this.liveNodes = fetchLiveNodes(client); + updateKnownNodes(); liveNodesTimestamp = System.nanoTime(); - return liveNodes; + return this.liveNodes; } catch (Exception e) { log.warn("Attempt to fetch cluster state from {} failed.", baseUrl, e); } } throw new RuntimeException( "Tried fetching live_nodes using all the node names we knew of, i.e. " - + liveNodes + + knownNodes + ". However, " + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," + " you could try re-creating a new CloudSolrClient using working" + " solrUrl(s) or zkHost(s)."); } else { - return liveNodes; // cached copy is fresh enough + return this.liveNodes; // cached copy is fresh enough } } @@ -272,7 +278,7 @@ public String resolveSimpleAlias(String aliasName) throws IllegalArgumentExcepti } private Map> getAliases(boolean forceFetch) { - if (this.liveNodes == null) { + if (this.knownNodes == null) { throw new RuntimeException( "We don't know of any live_nodes to fetch the" + " latest aliases information from. " @@ -285,7 +291,7 @@ private Map> getAliases(boolean forceFetch) { || this.aliases == null || TimeUnit.SECONDS.convert((System.nanoTime() - aliasesTimestamp), TimeUnit.NANOSECONDS) > getCacheTimeout()) { - for (String nodeName : liveNodes) { + for (String nodeName : knownNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { @@ -313,7 +319,7 @@ > getCacheTimeout()) { throw new RuntimeException( "Tried fetching aliases using all the node names we knew of, i.e. " - + liveNodes + + knownNodes + ". However, " + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," @@ -332,7 +338,7 @@ public Map getAliasProperties(String alias) { @Override public ClusterState getClusterState() { - for (String nodeName : liveNodes) { + for (String nodeName : knownNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { return fetchClusterState(client); @@ -347,7 +353,7 @@ public ClusterState getClusterState() { } throw new RuntimeException( "Tried fetching cluster state using the node names we knew of, i.e. " - + liveNodes + + knownNodes + ". However, " + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," @@ -358,8 +364,7 @@ public ClusterState getClusterState() { @SuppressWarnings("unchecked") @Override public Map getClusterProperties() { - // Map clusterPropertiesMap = new HashMap<>(); - for (String nodeName : liveNodes) { + for (String nodeName : knownNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { SimpleOrderedMap cluster = @@ -371,7 +376,7 @@ public Map getClusterProperties() { } throw new RuntimeException( "Tried fetching cluster state using the node names we knew of, i.e. " - + liveNodes + + knownNodes + ". However, " + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," @@ -413,6 +418,21 @@ public String getQuorumHosts() { return String.join(",", this.liveNodes); } + public Set getKnownNodes() { + getLiveNodes(); + return this.knownNodes; + } + + /** + * Known nodes should always have the latest set of live nodes but never remove initial set of + * live nodes + */ + private void updateKnownNodes() { + Set knownNodes = new HashSet<>(this.liveNodes); + knownNodes.addAll(this.initialNodes); + this.knownNodes = Set.copyOf(knownNodes); + } + private enum ClusterStateRequestType { FETCH_LIVE_NODES, FETCH_CLUSTER_PROP, diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java index 707313efec4..1fde6da3acc 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java @@ -26,6 +26,7 @@ import java.time.Instant; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Supplier; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.request.CollectionAdminRequest; @@ -34,15 +35,20 @@ import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.util.NamedList; +import org.apache.solr.embedded.JettySolrRunner; import org.hamcrest.Matchers; +import org.junit.After; import org.junit.BeforeClass; import org.junit.Test; public class ClusterStateProviderTest extends SolrCloudTestCase { + private static JettySolrRunner jettyNode1; + private static JettySolrRunner jettyNode2; + @BeforeClass public static void setupCluster() throws Exception { - configureCluster(1) + configureCluster(2) .addConfig( "conf", getFile("solrj") @@ -51,6 +57,19 @@ public static void setupCluster() throws Exception { .resolve("streaming") .resolve("conf")) .configure(); + jettyNode1 = cluster.getJettySolrRunner(0); + jettyNode2 = cluster.getJettySolrRunner(1); + } + + @After + public void cleanup() throws Exception { + if (!jettyNode1.isRunning()) { + cluster.startJettySolrRunner(jettyNode1); + } + if (!jettyNode2.isRunning()) { + cluster.startJettySolrRunner(jettyNode2); + } + waitForCSPCacheTimeout(); } @ParametersFactory @@ -59,10 +78,13 @@ public static Iterable parameters() throws NoSuchMethodException { new String[] {"http2ClusterStateProvider"}, new String[] {"zkClientClusterStateProvider"}); } - private static ClusterStateProvider http2ClusterStateProvider() { + private static Http2ClusterStateProvider http2ClusterStateProvider() { try { return new Http2ClusterStateProvider( - List.of(cluster.getJettySolrRunner(0).getBaseUrl().toString()), null); + List.of( + cluster.getJettySolrRunner(0).getBaseUrl().toString(), + cluster.getJettySolrRunner(1).getBaseUrl().toString()), + null); } catch (Exception e) { throw new RuntimeException(e); } @@ -160,7 +182,6 @@ public void testClusterStateProvider() throws SolrServerException, IOException { try (var cspZk = zkClientClusterStateProvider(); var cspHttp = http2ClusterStateProvider()) { - assertThat(cspZk.getClusterProperties(), Matchers.hasEntry("ext.foo", "bar")); assertThat( cspZk.getClusterProperties().entrySet(), @@ -181,6 +202,104 @@ public void testClusterStateProvider() throws SolrServerException, IOException { assertThat( clusterStateZk.getCollection("col2"), equalTo(clusterStateHttp.getCollection("col2"))); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void testClusterStateProviderDownedLiveNodes() { + try (var cspZk = zkClientClusterStateProvider(); + var cspHttp = http2ClusterStateProvider()) { + Set expectedLiveNodes = cspZk.getClusterState().getLiveNodes(); + Set actualLiveNodes = cspHttp.getLiveNodes(); + assertEquals(2, actualLiveNodes.size()); + assertEquals(expectedLiveNodes, actualLiveNodes); + + cluster.stopJettySolrRunner(jettyNode1); + waitForCSPCacheTimeout(); + + expectedLiveNodes = cspZk.getClusterState().getLiveNodes(); + actualLiveNodes = cspHttp.getLiveNodes(); + assertEquals(1, actualLiveNodes.size()); + assertEquals(expectedLiveNodes, actualLiveNodes); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Test + public void testClusterStateProviderDownedKnownHosts() { + + try (var cspHttp = http2ClusterStateProvider()) { + + String jettyNode1Url = normalizeJettyUrl(jettyNode1.getBaseUrl().toString()); + String jettyNode2Url = normalizeJettyUrl(jettyNode2.getBaseUrl().toString()); + Set expectedKnownNodes = Set.of(jettyNode1Url, jettyNode2Url); + Set actualKnownNodes = cspHttp.getKnownNodes(); + + assertEquals(2, actualKnownNodes.size()); + assertEquals(expectedKnownNodes, actualKnownNodes); + + cluster.stopJettySolrRunner(jettyNode1); + waitForCSPCacheTimeout(); + + // Known hosts should never remove the initial set of live nodes + actualKnownNodes = cspHttp.getKnownNodes(); + assertEquals(2, actualKnownNodes.size()); + assertEquals(expectedKnownNodes, actualKnownNodes); + + } catch (Exception e) { + throw new RuntimeException(e); } } + + @Test + public void testClusterStateProviderKnownHostsWithNewHost() { + + try (var cspHttp = http2ClusterStateProvider()) { + + var jettyNode3 = cluster.startJettySolrRunner(); + String jettyNode1Url = normalizeJettyUrl(jettyNode1.getBaseUrl().toString()); + String jettyNode2Url = normalizeJettyUrl(jettyNode2.getBaseUrl().toString()); + String jettyNode3Url = normalizeJettyUrl(jettyNode3.getBaseUrl().toString()); + Set expectedKnownNodes = Set.of(jettyNode1Url, jettyNode2Url, jettyNode3Url); + + waitForCSPCacheTimeout(); + + Set actualKnownNodes = cspHttp.getKnownNodes(); + assertEquals(3, actualKnownNodes.size()); + assertEquals(expectedKnownNodes, actualKnownNodes); + + cluster.stopJettySolrRunner(jettyNode1); + waitForCSPCacheTimeout(); + + // Known hosts should never remove the initial set of live nodes + actualKnownNodes = cspHttp.getKnownNodes(); + assertEquals(3, actualKnownNodes.size()); + assertEquals(expectedKnownNodes, actualKnownNodes); + + cluster.stopJettySolrRunner(jettyNode3); + expectedKnownNodes = Set.of(jettyNode1Url, jettyNode2Url); + waitForCSPCacheTimeout(); + + // New nodes are removable from known hosts + actualKnownNodes = cspHttp.getKnownNodes(); + assertEquals(2, actualKnownNodes.size()); + assertEquals(expectedKnownNodes, actualKnownNodes); + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private void waitForCSPCacheTimeout() throws InterruptedException { + Thread.sleep(6000); + } + + /** Jetty URL to Cluster State Node String http://127.0.0.1:12345/solr to 127.0.0.1:12345_solr */ + private String normalizeJettyUrl(String jettyUrl) { + return jettyUrl.substring(jettyUrl.lastIndexOf("//") + 2).replace("/", "_"); + } } diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java index c600c7e4ffa..a1d368bdb11 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java @@ -530,14 +530,14 @@ public JettySolrRunner stopJettySolrRunner(int index) throws Exception { } /** - * Add a previously stopped node back to the cluster + * Add a previously stopped node back to the cluster and reuse its port * * @param jetty a {@link JettySolrRunner} previously returned by {@link #stopJettySolrRunner(int)} * @return the started node * @throws Exception on error */ public JettySolrRunner startJettySolrRunner(JettySolrRunner jetty) throws Exception { - jetty.start(false); + jetty.start(); if (!jettys.contains(jetty)) jettys.add(jetty); return jetty; } From 00368b51f97bd3700c0e039a116c71f8b645c855 Mon Sep 17 00:00:00 2001 From: Matthew Biscocho Date: Fri, 3 Jan 2025 11:50:48 -0500 Subject: [PATCH 2/5] Fix tests --- .../solrj/impl/ClusterStateProviderTest.java | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java index 1fde6da3acc..ee9848af64c 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java @@ -202,13 +202,11 @@ public void testClusterStateProvider() throws SolrServerException, IOException { assertThat( clusterStateZk.getCollection("col2"), equalTo(clusterStateHttp.getCollection("col2"))); - } catch (Exception e) { - throw new RuntimeException(e); } } @Test - public void testClusterStateProviderDownedLiveNodes() { + public void testClusterStateProviderDownedLiveNodes() throws Exception { try (var cspZk = zkClientClusterStateProvider(); var cspHttp = http2ClusterStateProvider()) { Set expectedLiveNodes = cspZk.getClusterState().getLiveNodes(); @@ -224,13 +222,20 @@ public void testClusterStateProviderDownedLiveNodes() { assertEquals(1, actualLiveNodes.size()); assertEquals(expectedLiveNodes, actualLiveNodes); - } catch (Exception e) { - throw new RuntimeException(e); + cluster.startJettySolrRunner(jettyNode1); + cluster.stopJettySolrRunner(jettyNode2); + waitForCSPCacheTimeout(); + + // Should still be reachable because known hosts doesn't remove initial nodes + expectedLiveNodes = cspZk.getClusterState().getLiveNodes(); + actualLiveNodes = cspHttp.getLiveNodes(); + assertEquals(1, actualLiveNodes.size()); + assertEquals(expectedLiveNodes, actualLiveNodes); } } @Test - public void testClusterStateProviderDownedKnownHosts() { + public void testClusterStateProviderDownedKnownHosts() throws Exception { try (var cspHttp = http2ClusterStateProvider()) { @@ -249,14 +254,11 @@ public void testClusterStateProviderDownedKnownHosts() { actualKnownNodes = cspHttp.getKnownNodes(); assertEquals(2, actualKnownNodes.size()); assertEquals(expectedKnownNodes, actualKnownNodes); - - } catch (Exception e) { - throw new RuntimeException(e); } } @Test - public void testClusterStateProviderKnownHostsWithNewHost() { + public void testClusterStateProviderKnownHostsWithNewHost() throws Exception { try (var cspHttp = http2ClusterStateProvider()) { @@ -265,7 +267,6 @@ public void testClusterStateProviderKnownHostsWithNewHost() { String jettyNode2Url = normalizeJettyUrl(jettyNode2.getBaseUrl().toString()); String jettyNode3Url = normalizeJettyUrl(jettyNode3.getBaseUrl().toString()); Set expectedKnownNodes = Set.of(jettyNode1Url, jettyNode2Url, jettyNode3Url); - waitForCSPCacheTimeout(); Set actualKnownNodes = cspHttp.getKnownNodes(); @@ -275,7 +276,6 @@ public void testClusterStateProviderKnownHostsWithNewHost() { cluster.stopJettySolrRunner(jettyNode1); waitForCSPCacheTimeout(); - // Known hosts should never remove the initial set of live nodes actualKnownNodes = cspHttp.getKnownNodes(); assertEquals(3, actualKnownNodes.size()); assertEquals(expectedKnownNodes, actualKnownNodes); @@ -288,9 +288,6 @@ public void testClusterStateProviderKnownHostsWithNewHost() { actualKnownNodes = cspHttp.getKnownNodes(); assertEquals(2, actualKnownNodes.size()); assertEquals(expectedKnownNodes, actualKnownNodes); - - } catch (Exception e) { - throw new RuntimeException(e); } } From 52ea46bbafe1f5da506154776bd5c883007b4221 Mon Sep 17 00:00:00 2001 From: Matthew Biscocho Date: Fri, 3 Jan 2025 12:14:50 -0500 Subject: [PATCH 3/5] Rename setKnownHosts --- .../solrj/impl/BaseHttpClusterStateProvider.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java index 1f2a441c8a3..0f3fa31a7f9 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java @@ -54,7 +54,7 @@ public abstract class BaseHttpClusterStateProvider implements ClusterStateProvid private String urlScheme; private Set initialNodes; volatile Set liveNodes; - volatile Set knownNodes; // Live nodes + initial nodes + volatile Set knownNodes; long liveNodesTimestamp = 0; volatile Map> aliases; volatile Map> aliasProperties; @@ -69,7 +69,7 @@ public void init(List solrUrls) throws Exception { try (SolrClient initialClient = getSolrClient(solrUrl)) { this.liveNodes = fetchLiveNodes(initialClient); this.initialNodes = Set.copyOf(liveNodes); - updateKnownNodes(); + setKnownNodes(); liveNodesTimestamp = System.nanoTime(); break; } catch (SolrServerException | IOException e) { @@ -142,7 +142,7 @@ private ClusterState fetchClusterState(SolrClient client) List liveNodesList = (List) cluster.get("live_nodes"); if (liveNodesList != null) { this.liveNodes = Set.copyOf(liveNodesList); - updateKnownNodes(); + setKnownNodes(); liveNodesTimestamp = System.nanoTime(); } @@ -236,7 +236,7 @@ > getCacheTimeout()) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { this.liveNodes = fetchLiveNodes(client); - updateKnownNodes(); + setKnownNodes(); liveNodesTimestamp = System.nanoTime(); return this.liveNodes; } catch (Exception e) { @@ -427,7 +427,7 @@ public Set getKnownNodes() { * Known nodes should always have the latest set of live nodes but never remove initial set of * live nodes */ - private void updateKnownNodes() { + private void setKnownNodes() { Set knownNodes = new HashSet<>(this.liveNodes); knownNodes.addAll(this.initialNodes); this.knownNodes = Set.copyOf(knownNodes); From 9d0e720b4687897c5f4f4d77163bf0d1e0fe3efe Mon Sep 17 00:00:00 2001 From: Matthew Biscocho Date: Fri, 10 Jan 2025 12:15:04 -0500 Subject: [PATCH 4/5] Remove known nodes and only use liveNodes --- .../impl/BaseHttpClusterStateProvider.java | 69 ++++++++------ .../solrj/impl/ClusterStateProviderTest.java | 93 +++++-------------- 2 files changed, 63 insertions(+), 99 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java index 0f3fa31a7f9..fa81814e876 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java @@ -21,6 +21,10 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; import java.time.Instant; import java.util.Collections; import java.util.HashSet; @@ -54,7 +58,6 @@ public abstract class BaseHttpClusterStateProvider implements ClusterStateProvid private String urlScheme; private Set initialNodes; volatile Set liveNodes; - volatile Set knownNodes; long liveNodesTimestamp = 0; volatile Map> aliases; volatile Map> aliasProperties; @@ -64,12 +67,11 @@ public abstract class BaseHttpClusterStateProvider implements ClusterStateProvid private int cacheTimeout = EnvUtils.getPropertyAsInteger("solr.solrj.cache.timeout.sec", 5); public void init(List solrUrls) throws Exception { + this.initialNodes = getNodeNamesFromSolrUrls(solrUrls); for (String solrUrl : solrUrls) { urlScheme = solrUrl.startsWith("https") ? "https" : "http"; try (SolrClient initialClient = getSolrClient(solrUrl)) { this.liveNodes = fetchLiveNodes(initialClient); - this.initialNodes = Set.copyOf(liveNodes); - setKnownNodes(); liveNodesTimestamp = System.nanoTime(); break; } catch (SolrServerException | IOException e) { @@ -101,7 +103,7 @@ public DocCollection getCollection(String collection) { @Override public ClusterState.CollectionRef getState(String collection) { - for (String nodeName : knownNodes) { + for (String nodeName : liveNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { DocCollection docCollection = fetchCollectionState(client, collection); @@ -125,7 +127,7 @@ public ClusterState.CollectionRef getState(String collection) { } throw new RuntimeException( "Tried fetching cluster state using the node names we knew of, i.e. " - + knownNodes + + liveNodes + ". However, " + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," @@ -141,8 +143,7 @@ private ClusterState fetchClusterState(SolrClient client) List liveNodesList = (List) cluster.get("live_nodes"); if (liveNodesList != null) { - this.liveNodes = Set.copyOf(liveNodesList); - setKnownNodes(); + setLiveNodes(Set.copyOf(liveNodesList)); liveNodesTimestamp = System.nanoTime(); } @@ -222,7 +223,7 @@ private SimpleOrderedMap submitClusterStateRequest( @Override public Set getLiveNodes() { - if (knownNodes == null) { + if (liveNodes == null) { throw new RuntimeException( "We don't know of any live_nodes to fetch the" + " latest live_nodes information from. " @@ -232,11 +233,10 @@ public Set getLiveNodes() { } if (TimeUnit.SECONDS.convert((System.nanoTime() - liveNodesTimestamp), TimeUnit.NANOSECONDS) > getCacheTimeout()) { - for (String nodeName : knownNodes) { + for (String nodeName : liveNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { - this.liveNodes = fetchLiveNodes(client); - setKnownNodes(); + setLiveNodes(fetchLiveNodes(client)); liveNodesTimestamp = System.nanoTime(); return this.liveNodes; } catch (Exception e) { @@ -245,7 +245,7 @@ > getCacheTimeout()) { } throw new RuntimeException( "Tried fetching live_nodes using all the node names we knew of, i.e. " - + knownNodes + + liveNodes + ". However, " + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," @@ -278,7 +278,7 @@ public String resolveSimpleAlias(String aliasName) throws IllegalArgumentExcepti } private Map> getAliases(boolean forceFetch) { - if (this.knownNodes == null) { + if (this.liveNodes == null) { throw new RuntimeException( "We don't know of any live_nodes to fetch the" + " latest aliases information from. " @@ -291,7 +291,7 @@ private Map> getAliases(boolean forceFetch) { || this.aliases == null || TimeUnit.SECONDS.convert((System.nanoTime() - aliasesTimestamp), TimeUnit.NANOSECONDS) > getCacheTimeout()) { - for (String nodeName : knownNodes) { + for (String nodeName : liveNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { @@ -319,7 +319,7 @@ > getCacheTimeout()) { throw new RuntimeException( "Tried fetching aliases using all the node names we knew of, i.e. " - + knownNodes + + liveNodes + ". However, " + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," @@ -338,7 +338,7 @@ public Map getAliasProperties(String alias) { @Override public ClusterState getClusterState() { - for (String nodeName : knownNodes) { + for (String nodeName : liveNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { return fetchClusterState(client); @@ -353,7 +353,7 @@ public ClusterState getClusterState() { } throw new RuntimeException( "Tried fetching cluster state using the node names we knew of, i.e. " - + knownNodes + + liveNodes + ". However, " + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," @@ -364,7 +364,7 @@ public ClusterState getClusterState() { @SuppressWarnings("unchecked") @Override public Map getClusterProperties() { - for (String nodeName : knownNodes) { + for (String nodeName : liveNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { SimpleOrderedMap cluster = @@ -376,7 +376,7 @@ public Map getClusterProperties() { } throw new RuntimeException( "Tried fetching cluster state using the node names we knew of, i.e. " - + knownNodes + + liveNodes + ". However, " + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," @@ -418,19 +418,28 @@ public String getQuorumHosts() { return String.join(",", this.liveNodes); } - public Set getKnownNodes() { - getLiveNodes(); - return this.knownNodes; + /** Live nodes should always have the latest set of live nodes but never remove initial set */ + private void setLiveNodes(Set nodes) { + Set liveNodes = new HashSet<>(nodes); + liveNodes.addAll(this.initialNodes); + this.liveNodes = Set.copyOf(liveNodes); + } + + public Set getNodeNamesFromSolrUrls(List urls) + throws URISyntaxException, MalformedURLException { + Set set = new HashSet<>(); + for (String url : urls) { + String nodeNameFromSolrUrl = getNodeNameFromSolrUrl(url); + set.add(nodeNameFromSolrUrl); + } + return Collections.unmodifiableSet(set); } - /** - * Known nodes should always have the latest set of live nodes but never remove initial set of - * live nodes - */ - private void setKnownNodes() { - Set knownNodes = new HashSet<>(this.liveNodes); - knownNodes.addAll(this.initialNodes); - this.knownNodes = Set.copyOf(knownNodes); + /** URL to cluster state node name (http://127.0.0.1:12345/solr to 127.0.0.1:12345_solr) */ + public String getNodeNameFromSolrUrl(String solrUrl) + throws MalformedURLException, URISyntaxException { + URL url = new URI(solrUrl).toURL(); + return url.getAuthority() + url.getPath().replace('/', '_'); } private enum ClusterStateRequestType { diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java index ee9848af64c..ee8938ec563 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java @@ -35,7 +35,6 @@ import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.util.NamedList; -import org.apache.solr.embedded.JettySolrRunner; import org.hamcrest.Matchers; import org.junit.After; import org.junit.BeforeClass; @@ -43,9 +42,6 @@ public class ClusterStateProviderTest extends SolrCloudTestCase { - private static JettySolrRunner jettyNode1; - private static JettySolrRunner jettyNode2; - @BeforeClass public static void setupCluster() throws Exception { configureCluster(2) @@ -57,19 +53,15 @@ public static void setupCluster() throws Exception { .resolve("streaming") .resolve("conf")) .configure(); - jettyNode1 = cluster.getJettySolrRunner(0); - jettyNode2 = cluster.getJettySolrRunner(1); + cluster.waitForAllNodes(30); + System.setProperty("solr.solrj.cache.timeout.sec", "1"); } @After public void cleanup() throws Exception { - if (!jettyNode1.isRunning()) { - cluster.startJettySolrRunner(jettyNode1); - } - if (!jettyNode2.isRunning()) { - cluster.startJettySolrRunner(jettyNode2); + while (cluster.getJettySolrRunners().size() < 2) { + cluster.startJettySolrRunner(); } - waitForCSPCacheTimeout(); } @ParametersFactory @@ -206,97 +198,60 @@ public void testClusterStateProvider() throws SolrServerException, IOException { } @Test - public void testClusterStateProviderDownedLiveNodes() throws Exception { - try (var cspZk = zkClientClusterStateProvider(); - var cspHttp = http2ClusterStateProvider()) { - Set expectedLiveNodes = cspZk.getClusterState().getLiveNodes(); + public void testClusterStateProviderDownedInitialLiveNodes() throws Exception { + try (var cspHttp = http2ClusterStateProvider()) { + var jettyNode1 = cluster.getJettySolrRunner(0); + var jettyNode2 = cluster.getJettySolrRunner(1); + Set actualLiveNodes = cspHttp.getLiveNodes(); assertEquals(2, actualLiveNodes.size()); - assertEquals(expectedLiveNodes, actualLiveNodes); cluster.stopJettySolrRunner(jettyNode1); waitForCSPCacheTimeout(); - expectedLiveNodes = cspZk.getClusterState().getLiveNodes(); actualLiveNodes = cspHttp.getLiveNodes(); - assertEquals(1, actualLiveNodes.size()); - assertEquals(expectedLiveNodes, actualLiveNodes); + assertEquals(2, actualLiveNodes.size()); cluster.startJettySolrRunner(jettyNode1); cluster.stopJettySolrRunner(jettyNode2); waitForCSPCacheTimeout(); - // Should still be reachable because known hosts doesn't remove initial nodes - expectedLiveNodes = cspZk.getClusterState().getLiveNodes(); + // Should still be reachable because live nodes doesn't remove initial nodes actualLiveNodes = cspHttp.getLiveNodes(); - assertEquals(1, actualLiveNodes.size()); - assertEquals(expectedLiveNodes, actualLiveNodes); - } - } - - @Test - public void testClusterStateProviderDownedKnownHosts() throws Exception { - - try (var cspHttp = http2ClusterStateProvider()) { - - String jettyNode1Url = normalizeJettyUrl(jettyNode1.getBaseUrl().toString()); - String jettyNode2Url = normalizeJettyUrl(jettyNode2.getBaseUrl().toString()); - Set expectedKnownNodes = Set.of(jettyNode1Url, jettyNode2Url); - Set actualKnownNodes = cspHttp.getKnownNodes(); - - assertEquals(2, actualKnownNodes.size()); - assertEquals(expectedKnownNodes, actualKnownNodes); - - cluster.stopJettySolrRunner(jettyNode1); - waitForCSPCacheTimeout(); - - // Known hosts should never remove the initial set of live nodes - actualKnownNodes = cspHttp.getKnownNodes(); - assertEquals(2, actualKnownNodes.size()); - assertEquals(expectedKnownNodes, actualKnownNodes); + assertEquals(2, actualLiveNodes.size()); } } @Test - public void testClusterStateProviderKnownHostsWithNewHost() throws Exception { - + public void testClusterStateProviderLiveNodesWithNewHost() throws Exception { try (var cspHttp = http2ClusterStateProvider()) { - + var jettyNode1 = cluster.getJettySolrRunner(0); + var jettyNode2 = cluster.getJettySolrRunner(1); var jettyNode3 = cluster.startJettySolrRunner(); - String jettyNode1Url = normalizeJettyUrl(jettyNode1.getBaseUrl().toString()); - String jettyNode2Url = normalizeJettyUrl(jettyNode2.getBaseUrl().toString()); - String jettyNode3Url = normalizeJettyUrl(jettyNode3.getBaseUrl().toString()); - Set expectedKnownNodes = Set.of(jettyNode1Url, jettyNode2Url, jettyNode3Url); - waitForCSPCacheTimeout(); - - Set actualKnownNodes = cspHttp.getKnownNodes(); - assertEquals(3, actualKnownNodes.size()); - assertEquals(expectedKnownNodes, actualKnownNodes); - cluster.stopJettySolrRunner(jettyNode1); + String nodeName1 = cspHttp.getNodeNameFromSolrUrl(jettyNode1.getBaseUrl().toString()); + String nodeName2 = cspHttp.getNodeNameFromSolrUrl(jettyNode2.getBaseUrl().toString()); + String nodeName3 = cspHttp.getNodeNameFromSolrUrl(jettyNode3.getBaseUrl().toString()); + Set expectedKnownNodes = Set.of(nodeName1, nodeName2, nodeName3); waitForCSPCacheTimeout(); - actualKnownNodes = cspHttp.getKnownNodes(); + Set actualKnownNodes = cspHttp.getLiveNodes(); assertEquals(3, actualKnownNodes.size()); assertEquals(expectedKnownNodes, actualKnownNodes); + // Stop non initially passed node from the cluster cluster.stopJettySolrRunner(jettyNode3); - expectedKnownNodes = Set.of(jettyNode1Url, jettyNode2Url); + expectedKnownNodes = Set.of(nodeName1, nodeName2); waitForCSPCacheTimeout(); // New nodes are removable from known hosts - actualKnownNodes = cspHttp.getKnownNodes(); + actualKnownNodes = cspHttp.getLiveNodes(); assertEquals(2, actualKnownNodes.size()); assertEquals(expectedKnownNodes, actualKnownNodes); } } private void waitForCSPCacheTimeout() throws InterruptedException { - Thread.sleep(6000); - } - - /** Jetty URL to Cluster State Node String http://127.0.0.1:12345/solr to 127.0.0.1:12345_solr */ - private String normalizeJettyUrl(String jettyUrl) { - return jettyUrl.substring(jettyUrl.lastIndexOf("//") + 2).replace("/", "_"); + Thread.sleep(2000); } } From 04ba07d8a847583a1318b9fb83745283834d40b9 Mon Sep 17 00:00:00 2001 From: Matthew Biscocho Date: Fri, 10 Jan 2025 14:14:51 -0500 Subject: [PATCH 5/5] inline urlSet --- .../client/solrj/impl/BaseHttpClusterStateProvider.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java index fa81814e876..08605122d24 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java @@ -427,12 +427,11 @@ private void setLiveNodes(Set nodes) { public Set getNodeNamesFromSolrUrls(List urls) throws URISyntaxException, MalformedURLException { - Set set = new HashSet<>(); + Set urlSet = new HashSet<>(); for (String url : urls) { - String nodeNameFromSolrUrl = getNodeNameFromSolrUrl(url); - set.add(nodeNameFromSolrUrl); + urlSet.add(getNodeNameFromSolrUrl(url)); } - return Collections.unmodifiableSet(set); + return Collections.unmodifiableSet(urlSet); } /** URL to cluster state node name (http://127.0.0.1:12345/solr to 127.0.0.1:12345_solr) */