diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index dfb35da9bf6..052e3cd42fc 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -176,6 +176,10 @@ Bug Fixes * SOLR-17629: If SQLHandler failed to open the underlying stream (e.g. Solr returns an error; could be user/syntax problem), it needs to close the stream to cleanup resources but wasn't. (David Smiley) +* SOLR-17519: SolrJ CloudSolrClient configured with Solr URLs can fail to request cluster state if its + current live nodes list is stale. CloudSolrClient now retains the initial configured list of passed URLs as backup + used for fetching cluster state when all live nodes have failed. (Matthew Biscocho via David Smiley, Houston Putman) + Dependency Upgrades --------------------- * SOLR-17471: Upgrade Lucene to 9.12.1. (Pierre Salagnac, Christine Poerschke) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java index 40183109178..a09d936582e 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BaseHttpClusterStateProvider.java @@ -21,6 +21,10 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; import java.time.Instant; import java.util.Collections; import java.util.List; @@ -28,6 +32,7 @@ import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest.METHOD; import org.apache.solr.client.solrj.SolrServerException; @@ -43,6 +48,7 @@ import org.apache.solr.common.util.EnvUtils; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.util.URLUtil; import org.apache.solr.common.util.Utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -51,6 +57,7 @@ public abstract class BaseHttpClusterStateProvider implements ClusterStateProvid private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private String urlScheme; + private List configuredNodes; volatile Set liveNodes; long liveNodesTimestamp = 0; volatile Map> aliases; @@ -61,6 +68,19 @@ public abstract class BaseHttpClusterStateProvider implements ClusterStateProvid private int cacheTimeout = EnvUtils.getPropertyAsInteger("solr.solrj.cache.timeout.sec", 5); public void init(List solrUrls) throws Exception { + this.configuredNodes = + solrUrls.stream() + .map( + (solrUrl) -> { + try { + return new URI(solrUrl).toURL(); + } catch (MalformedURLException | URISyntaxException e) { + throw new IllegalArgumentException( + "Failed to parse base Solr URL " + solrUrl, e); + } + }) + .collect(Collectors.toList()); + for (String solrUrl : solrUrls) { urlScheme = solrUrl.startsWith("https") ? "https" : "http"; try (SolrClient initialClient = getSolrClient(solrUrl)) { @@ -80,7 +100,7 @@ public void init(List solrUrls) throws Exception { + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," + " you could try re-creating a new CloudSolrClient using working" - + " solrUrl(s) or zkHost(s)."); + + " solrUrl(s)."); } } @@ -125,7 +145,7 @@ public ClusterState.CollectionRef getState(String collection) { + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," + " you could try re-creating a new CloudSolrClient using working" - + " solrUrl(s) or zkHost(s)."); + + " solrUrl(s)."); } @SuppressWarnings("unchecked") @@ -216,27 +236,20 @@ private SimpleOrderedMap submitClusterStateRequest( @Override public Set getLiveNodes() { - if (liveNodes == null) { - throw new RuntimeException( - "We don't know of any live_nodes to fetch the" - + " latest live_nodes information from. " - + "If you think your Solr cluster is up and is accessible," - + " you could try re-creating a new CloudSolrClient using working" - + " solrUrl(s) or zkHost(s)."); - } if (TimeUnit.SECONDS.convert((System.nanoTime() - liveNodesTimestamp), TimeUnit.NANOSECONDS) > getCacheTimeout()) { - for (String nodeName : liveNodes) { - String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); - try (SolrClient client = getSolrClient(baseUrl)) { - Set liveNodes = fetchLiveNodes(client); - this.liveNodes = (liveNodes); - liveNodesTimestamp = System.nanoTime(); - return liveNodes; - } catch (Exception e) { - log.warn("Attempt to fetch cluster state from {} failed.", baseUrl, e); - } - } + + if (liveNodes.stream() + .anyMatch((node) -> updateLiveNodes(URLUtil.getBaseUrlForNodeName(node, urlScheme)))) + return this.liveNodes; + + log.warn( + "Attempt to fetch cluster state from all known live nodes {} failed. Trying backup nodes", + liveNodes); + + if (configuredNodes.stream().anyMatch((node) -> updateLiveNodes(node.toString()))) + return this.liveNodes; + throw new RuntimeException( "Tried fetching live_nodes using all the node names we knew of, i.e. " + liveNodes @@ -244,10 +257,21 @@ > getCacheTimeout()) { + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," + " you could try re-creating a new CloudSolrClient using working" - + " solrUrl(s) or zkHost(s)."); + + " solrUrl(s)."); } else { - return liveNodes; // cached copy is fresh enough + return this.liveNodes; // cached copy is fresh enough + } + } + + private boolean updateLiveNodes(String liveNode) { + try (SolrClient client = getSolrClient(liveNode)) { + this.liveNodes = fetchLiveNodes(client); + liveNodesTimestamp = System.nanoTime(); + return true; + } catch (Exception e) { + log.warn("Attempt to fetch cluster state from {} failed.", liveNode, e); } + return false; } @SuppressWarnings({"unchecked"}) @@ -278,7 +302,7 @@ private Map> getAliases(boolean forceFetch) { + " latest aliases information from. " + "If you think your Solr cluster is up and is accessible," + " you could try re-creating a new CloudSolrClient using working" - + " solrUrl(s) or zkHost(s)."); + + " solrUrl(s)."); } if (forceFetch @@ -300,7 +324,7 @@ > getCacheTimeout()) { if (e instanceof RemoteSolrException && ((RemoteSolrException) e).code() == 400) { log.warn( "LISTALIASES not found, possibly using older Solr server. Aliases won't work {}", - "unless you re-create the CloudSolrClient using zkHost(s) or upgrade Solr server", + "unless you upgrade Solr server", e); this.aliases = Collections.emptyMap(); this.aliasProperties = Collections.emptyMap(); @@ -318,7 +342,7 @@ > getCacheTimeout()) { + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," + " you could try re-creating a new CloudSolrClient using a working" - + " solrUrl or zkHost."); + + " solrUrl."); } else { return Collections.unmodifiableMap(this.aliases); // cached copy is fresh enough } @@ -352,13 +376,12 @@ public ClusterState getClusterState() { + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," + " you could try re-creating a new CloudSolrClient using working" - + " solrUrl(s) or zkHost(s)."); + + " solrUrl(s)."); } @SuppressWarnings("unchecked") @Override public Map getClusterProperties() { - // Map clusterPropertiesMap = new HashMap<>(); for (String nodeName : liveNodes) { String baseUrl = Utils.getBaseUrlForNodeName(nodeName, urlScheme); try (SolrClient client = getSolrClient(baseUrl)) { @@ -376,7 +399,7 @@ public Map getClusterProperties() { + "succeeded in obtaining the cluster state from none of them." + "If you think your Solr cluster is up and is accessible," + " you could try re-creating a new CloudSolrClient using working" - + " solrUrl(s) or zkHost(s)."); + + " solrUrl(s)."); } @Override diff --git a/solr/solrj/src/java/org/apache/solr/common/util/URLUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/URLUtil.java index 6ddd81052b7..2cc577d5abe 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/URLUtil.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/URLUtil.java @@ -17,6 +17,10 @@ package org.apache.solr.common.util; import java.lang.invoke.MethodHandles; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; @@ -98,4 +102,59 @@ private static String removeTrailingSlashIfPresent(String url) { return url; } + + /** + * Construct a V1 base url for the Solr node, given its name (e.g., 'app-node-1:8983_solr') and a + * URL scheme. + * + * @param nodeName name of the Solr node + * @param urlScheme scheme for the base url ('http' or 'https') + * @return url that looks like {@code https://app-node-1:8983/solr} + * @throws IllegalArgumentException if the provided node name is malformed + */ + public static String getBaseUrlForNodeName(final String nodeName, final String urlScheme) { + return getBaseUrlForNodeName(nodeName, urlScheme, false); + } + + /** + * Construct a V1 or a V2 base url for the Solr node, given its name (e.g., + * 'app-node-1:8983_solr') and a URL scheme. + * + * @param nodeName name of the Solr node + * @param urlScheme scheme for the base url ('http' or 'https') + * @param isV2 whether a V2 url should be constructed + * @return url that looks like {@code https://app-node-1:8983/api} (V2) or {@code + * https://app-node-1:8983/solr} (V1) + * @throws IllegalArgumentException if the provided node name is malformed + */ + public static String getBaseUrlForNodeName( + final String nodeName, final String urlScheme, boolean isV2) { + final int colonAt = nodeName.indexOf(':'); + if (colonAt == -1) { + throw new IllegalArgumentException( + "nodeName does not contain expected ':' separator: " + nodeName); + } + + final int _offset = nodeName.indexOf('_', colonAt); + if (_offset < 0) { + throw new IllegalArgumentException( + "nodeName does not contain expected '_' separator: " + nodeName); + } + final String hostAndPort = nodeName.substring(0, _offset); + return urlScheme + "://" + hostAndPort + "/" + (isV2 ? "api" : "solr"); + } + + /** + * Construct base Solr URL to a Solr node name + * + * @param solrUrl Given a base Solr URL string (e.g., 'https://app-node-1:8983/solr') + * @return Node name that looks like {@code app-node-1:8983_solr} + * @throws MalformedURLException if the provided URL string is malformed + * @throws URISyntaxException if the provided URL string could not be parsed as a URI reference. + */ + public static String getNodeNameForBaseUrl(String solrUrl) + throws MalformedURLException, URISyntaxException { + URL url = new URI(solrUrl).toURL(); + return url.getAuthority() + url.getPath().replace('/', '_'); + } } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/Utils.java b/solr/solrj/src/java/org/apache/solr/common/util/Utils.java index 618217583a7..253c97f275b 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/Utils.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/Utils.java @@ -753,9 +753,11 @@ public static String applyUrlScheme(final String url, final String urlScheme) { * @param urlScheme scheme for the base url ('http' or 'https') * @return url that looks like {@code https://app-node-1:8983/solr} * @throws IllegalArgumentException if the provided node name is malformed + * @deprecated Use {@link URLUtil#getBaseUrlForNodeName(String, String)} */ + @Deprecated public static String getBaseUrlForNodeName(final String nodeName, final String urlScheme) { - return getBaseUrlForNodeName(nodeName, urlScheme, false); + return URLUtil.getBaseUrlForNodeName(nodeName, urlScheme, false); } /** @@ -768,22 +770,12 @@ public static String getBaseUrlForNodeName(final String nodeName, final String u * @return url that looks like {@code https://app-node-1:8983/api} (V2) or {@code * https://app-node-1:8983/solr} (V1) * @throws IllegalArgumentException if the provided node name is malformed + * @deprecated Use {@link URLUtil#getBaseUrlForNodeName(String, String, boolean)} */ + @Deprecated public static String getBaseUrlForNodeName( final String nodeName, final String urlScheme, boolean isV2) { - final int colonAt = nodeName.indexOf(':'); - if (colonAt == -1) { - throw new IllegalArgumentException( - "nodeName does not contain expected ':' separator: " + nodeName); - } - - final int _offset = nodeName.indexOf('_', colonAt); - if (_offset < 0) { - throw new IllegalArgumentException( - "nodeName does not contain expected '_' separator: " + nodeName); - } - final String hostAndPort = nodeName.substring(0, _offset); - return urlScheme + "://" + hostAndPort + "/" + (isV2 ? "api" : "solr"); + return URLUtil.getBaseUrlForNodeName(nodeName, urlScheme, isV2); } public static long time(TimeSource timeSource, TimeUnit unit) { diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java index 707313efec4..f3ee722ee58 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ClusterStateProviderTest.java @@ -17,6 +17,7 @@ package org.apache.solr.client.solrj.impl; +import static org.apache.solr.common.util.URLUtil.getNodeNameForBaseUrl; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; @@ -26,6 +27,7 @@ import java.time.Instant; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Supplier; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.request.CollectionAdminRequest; @@ -35,6 +37,7 @@ import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.util.NamedList; import org.hamcrest.Matchers; +import org.junit.After; import org.junit.BeforeClass; import org.junit.Test; @@ -42,7 +45,7 @@ public class ClusterStateProviderTest extends SolrCloudTestCase { @BeforeClass public static void setupCluster() throws Exception { - configureCluster(1) + configureCluster(2) .addConfig( "conf", getFile("solrj") @@ -51,6 +54,15 @@ public static void setupCluster() throws Exception { .resolve("streaming") .resolve("conf")) .configure(); + cluster.waitForAllNodes(30); + System.setProperty("solr.solrj.cache.timeout.sec", "1"); + } + + @After + public void cleanup() throws Exception { + while (cluster.getJettySolrRunners().size() < 2) { + cluster.startJettySolrRunner(); + } } @ParametersFactory @@ -59,10 +71,13 @@ public static Iterable parameters() throws NoSuchMethodException { new String[] {"http2ClusterStateProvider"}, new String[] {"zkClientClusterStateProvider"}); } - private static ClusterStateProvider http2ClusterStateProvider() { + private static Http2ClusterStateProvider http2ClusterStateProvider() { try { return new Http2ClusterStateProvider( - List.of(cluster.getJettySolrRunner(0).getBaseUrl().toString()), null); + List.of( + cluster.getJettySolrRunner(0).getBaseUrl().toString(), + cluster.getJettySolrRunner(1).getBaseUrl().toString()), + null); } catch (Exception e) { throw new RuntimeException(e); } @@ -160,7 +175,6 @@ public void testClusterStateProvider() throws SolrServerException, IOException { try (var cspZk = zkClientClusterStateProvider(); var cspHttp = http2ClusterStateProvider()) { - assertThat(cspZk.getClusterProperties(), Matchers.hasEntry("ext.foo", "bar")); assertThat( cspZk.getClusterProperties().entrySet(), @@ -183,4 +197,75 @@ public void testClusterStateProvider() throws SolrServerException, IOException { clusterStateZk.getCollection("col2"), equalTo(clusterStateHttp.getCollection("col2"))); } } + + @Test + public void testClusterStateProviderDownedInitialLiveNodes() throws Exception { + try (var cspHttp = http2ClusterStateProvider()) { + var jettyNode1 = cluster.getJettySolrRunner(0); + var jettyNode2 = cluster.getJettySolrRunner(1); + + String nodeName1 = getNodeNameForBaseUrl(jettyNode1.getBaseUrl().toString()); + String nodeName2 = getNodeNameForBaseUrl(jettyNode2.getBaseUrl().toString()); + + Set actualLiveNodes = cspHttp.getLiveNodes(); + assertEquals(2, actualLiveNodes.size()); + assertEquals(Set.of(nodeName1, nodeName2), actualLiveNodes); + + cluster.stopJettySolrRunner(jettyNode1); + waitForCSPCacheTimeout(); + + actualLiveNodes = cspHttp.getLiveNodes(); + assertEquals(1, actualLiveNodes.size()); + assertEquals(Set.of(nodeName2), actualLiveNodes); + + cluster.startJettySolrRunner(jettyNode1, true); + cluster.stopJettySolrRunner(jettyNode2); + waitForCSPCacheTimeout(); + + // Should still be reachable because backup nodes + actualLiveNodes = cspHttp.getLiveNodes(); + assertEquals(1, actualLiveNodes.size()); + assertEquals(Set.of(nodeName1), actualLiveNodes); + } + } + + @Test + public void testClusterStateProviderLiveNodesWithNewNode() throws Exception { + try (var cspHttp = http2ClusterStateProvider()) { + var jettyNode1 = cluster.getJettySolrRunner(0); + var jettyNode2 = cluster.getJettySolrRunner(1); + var jettyNode3 = cluster.startJettySolrRunner(); + + String nodeName1 = getNodeNameForBaseUrl(jettyNode1.getBaseUrl().toString()); + String nodeName2 = getNodeNameForBaseUrl(jettyNode2.getBaseUrl().toString()); + String nodeName3 = getNodeNameForBaseUrl(jettyNode3.getBaseUrl().toString()); + waitForCSPCacheTimeout(); + + Set actualKnownNodes = cspHttp.getLiveNodes(); + assertEquals(3, actualKnownNodes.size()); + assertEquals(Set.of(nodeName1, nodeName2, nodeName3), actualKnownNodes); + + // Stop all backup nodes + cluster.stopJettySolrRunner(jettyNode1); + cluster.stopJettySolrRunner(jettyNode2); + waitForCSPCacheTimeout(); + + actualKnownNodes = cspHttp.getLiveNodes(); + assertEquals(1, actualKnownNodes.size()); + assertEquals(Set.of(nodeName3), actualKnownNodes); + + // Bring back a backup node and take down the new node + cluster.startJettySolrRunner(jettyNode2, true); + cluster.stopJettySolrRunner(jettyNode3); + waitForCSPCacheTimeout(); + + actualKnownNodes = cspHttp.getLiveNodes(); + assertEquals(1, actualKnownNodes.size()); + assertEquals(Set.of(nodeName2), actualKnownNodes); + } + } + + private void waitForCSPCacheTimeout() throws InterruptedException { + Thread.sleep(2000); + } } diff --git a/solr/solrj/src/test/org/apache/solr/common/util/URLUtilTest.java b/solr/solrj/src/test/org/apache/solr/common/util/URLUtilTest.java index 0394d71d46d..1ddb06cc8cc 100644 --- a/solr/solrj/src/test/org/apache/solr/common/util/URLUtilTest.java +++ b/solr/solrj/src/test/org/apache/solr/common/util/URLUtilTest.java @@ -16,6 +16,11 @@ */ package org.apache.solr.common.util; +import static org.apache.solr.common.util.URLUtil.getBaseUrlForNodeName; +import static org.apache.solr.common.util.URLUtil.getNodeNameForBaseUrl; + +import java.net.MalformedURLException; +import java.net.URISyntaxException; import org.apache.solr.SolrTestCase; import org.junit.Test; @@ -95,4 +100,29 @@ public void testCanBuildCoreUrl() { "http://localhost:8983/solr/sTrAnGe-name.for_core", URLUtil.buildCoreUrl("http://localhost:8983/solr", "sTrAnGe-name.for_core")); } + + @Test + public void testGetNodeNameForBaseUrl() throws MalformedURLException, URISyntaxException { + assertEquals("node-1-url:8983_solr", getNodeNameForBaseUrl("https://node-1-url:8983/solr")); + assertEquals("node-1-url:8983_solr", getNodeNameForBaseUrl("http://node-1-url:8983/solr")); + assertEquals("node-1-url:8983_api", getNodeNameForBaseUrl("http://node-1-url:8983/api")); + assertThrows(MalformedURLException.class, () -> getNodeNameForBaseUrl("node-1-url:8983/solr")); + assertThrows( + URISyntaxException.class, () -> getNodeNameForBaseUrl("http://node-1-url:8983/solr^")); + } + + @Test + public void testGetBaseUrlForNodeName() { + assertEquals( + "http://app-node-1:8983/solr", + getBaseUrlForNodeName("app-node-1:8983_solr", "http", false)); + assertEquals( + "https://app-node-1:8983/solr", + getBaseUrlForNodeName("app-node-1:8983_solr", "https", false)); + assertEquals( + "http://app-node-1:8983/api", getBaseUrlForNodeName("app-node-1:8983_solr", "http", true)); + assertEquals( + "https://app-node-1:8983/api", + getBaseUrlForNodeName("app-node-1:8983_solr", "https", true)); + } } diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java index c600c7e4ffa..1eeb25914ce 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java @@ -517,31 +517,44 @@ public JettySolrRunner startJettySolrRunner() throws Exception { } /** - * Stop a Solr instance + * Add a previously stopped node back to the cluster on a different port * - * @param index the index of node in collection returned by {@link #getJettySolrRunners()} - * @return the shut down node + * @param jetty a {@link JettySolrRunner} previously returned by {@link #stopJettySolrRunner(int)} + * @return the started node + * @throws Exception on error */ - public JettySolrRunner stopJettySolrRunner(int index) throws Exception { - JettySolrRunner jetty = jettys.get(index); - jetty.stop(); - jettys.remove(index); - return jetty; + public JettySolrRunner startJettySolrRunner(JettySolrRunner jetty) throws Exception { + return startJettySolrRunner(jetty, false); } /** * Add a previously stopped node back to the cluster * * @param jetty a {@link JettySolrRunner} previously returned by {@link #stopJettySolrRunner(int)} + * @param reusePort the port previously used by jetty * @return the started node * @throws Exception on error */ - public JettySolrRunner startJettySolrRunner(JettySolrRunner jetty) throws Exception { - jetty.start(false); + public JettySolrRunner startJettySolrRunner(JettySolrRunner jetty, boolean reusePort) + throws Exception { + jetty.start(reusePort); if (!jettys.contains(jetty)) jettys.add(jetty); return jetty; } + /** + * Stop a Solr instance + * + * @param index the index of node in collection returned by {@link #getJettySolrRunners()} + * @return the shut down node + */ + public JettySolrRunner stopJettySolrRunner(int index) throws Exception { + JettySolrRunner jetty = jettys.get(index); + jetty.stop(); + jettys.remove(index); + return jetty; + } + /** * Stop the given Solr instance. It will be removed from the cluster's list of running instances. *