Skip to content

Commit

Permalink
SOLR-17192: Add "field-limiting" URP to catch ill-designed schemas (a…
Browse files Browse the repository at this point in the history
…pache#2395)


The URPF, `NumFieldLimitingUpdateRequestProcessorFactory`, blocks all
update requests that go through `processAdd` if the core exceeds a
configurable threshold of fields.

The factory accepts two parameters: `maxFields` is a required integer
representing the maximum field threshold,  and `warnOnly` is an optional
boolean that (when enabled) has the URP chain log warnings instead of
blocking updates.

The factory is included in the default configset, with warnOnly=false and
maxFields=1000.  (More lenient settings will be used on branch_9x)

---------

Co-authored-by: David Smiley <[email protected]>
  • Loading branch information
2 people authored and janhoy committed May 8, 2024
1 parent a6fb754 commit 6b057bc
Show file tree
Hide file tree
Showing 9 changed files with 442 additions and 1 deletion.
5 changes: 5 additions & 0 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ New Features
---------------------
* SOLR-13350: Multithreaded search execution (Ishan Chattopadhyaya, Mark Miller, Christine Poerschke, David Smiley, noble)

* SOLR-17192: Put an UpdateRequestProcessor-enforced soft-limit on the number of fields allowed in a core. The `NumFieldLimitingUpdateRequestProcessorFactory`
limit may be adjusted by raising the factory's `maxFields` setting, toggled in and out of "warning-only" mode using the `warnOnly` setting, or disabled entirely
by removing it solrconfig.xml. The limit is set at 1000 fields in the "_default" configset, but left in warning-only mode. (David Smiley, Eric Pugh,
Jason Gerlowski)

Improvements
---------------------
* SOLR-16921: use -solrUrl to derive the zk host connection for bin/solr zk subcommands (Eric Pugh)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Locale;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.AddUpdateCommand;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* This factory generates an UpdateRequestProcessor which fails update requests once a core has
* exceeded a configurable maximum number of fields. Meant as a safeguard to help users notice
* potentially-dangerous schema design before performance and stability problems start to occur.
*
* <p>The URP uses the core's {@link SolrIndexSearcher} to judge the current number of fields.
* Accordingly, it undercounts the number of fields in the core - missing all fields added since the
* previous searcher was opened. As such, the URP's request-blocking is "best effort" - it cannot be
* relied on as a precise limit on the number of fields.
*
* <p>Additionally, the field-counting includes all documents present in the index, including any
* deleted docs that haven't yet been purged via segment merging. Note that this can differ
* significantly from the number of fields defined in managed-schema.xml - especially when dynamic
* fields are enabled. The only way to reduce this field count is to delete documents and wait until
* the deleted documents have been removed by segment merges. Users may of course speed up this
* process by tweaking Solr's segment-merging, triggering an "optimize" operation, etc.
*
* <p>{@link NumFieldLimitingUpdateRequestProcessorFactory} accepts two configuration parameters:
*
* <ul>
* <li><code>maxFields</code> - (required) The maximum number of fields before update requests
* should be aborted. Once this limit has been exceeded, additional update requests will fail
* until fields have been removed or the "maxFields" is increased.
* <li><code>warnOnly</code> - (optional) If <code>true</code> then the URP logs verbose warnings
* about the limit being exceeded but doesn't abort update requests. Defaults to <code>false
* </code> if not specified
* </ul>
*
* @since 9.7.0
*/
public class NumFieldLimitingUpdateRequestProcessorFactory extends UpdateRequestProcessorFactory {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final String MAXIMUM_FIELDS_PARAM = "maxFields";
private static final String WARN_ONLY_PARAM = "warnOnly";

// package visibility for tests
int maximumFields;
boolean warnOnly;

@Override
public void init(NamedList<?> args) {
warnOnly = args.indexOf(WARN_ONLY_PARAM, 0) > 0 ? args.getBooleanArg(WARN_ONLY_PARAM) : false;

if (args.indexOf(MAXIMUM_FIELDS_PARAM, 0) < 0) {
throw new IllegalArgumentException(
"The "
+ MAXIMUM_FIELDS_PARAM
+ " parameter is required for "
+ getClass().getName()
+ ", but no value was provided.");
}
final Object rawMaxFields = args.get(MAXIMUM_FIELDS_PARAM);
if (!(rawMaxFields instanceof Integer)) {
throw new IllegalArgumentException(
MAXIMUM_FIELDS_PARAM + " must be configured as a non-null <int>");
}
maximumFields = (Integer) rawMaxFields;
if (maximumFields <= 0) {
throw new IllegalArgumentException(MAXIMUM_FIELDS_PARAM + " must be a positive integer");
}
}

@Override
public UpdateRequestProcessor getInstance(
SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
// note: it's unusual to call req.getSearcher in a /update request but it should be fine
final int currentNumFields = req.getSearcher().getFieldInfos().size();
if (currentNumFields <= maximumFields) {
// great; no need to insert an URP to block or log anything
return next;
}

// Block indexing new documents
return new UpdateRequestProcessor(next) {
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
String id = cmd.getPrintableId();
final String messageSuffix = warnOnly ? "Blocking update of document " + id : "";
final String message =
String.format(
Locale.ROOT,
"Current core has %d fields, exceeding the max-fields limit of %d. %s",
currentNumFields,
maximumFields,
messageSuffix);
if (warnOnly) {
log.warn(message);
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, message);
}
}
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="minimal" version="1.1">
<fieldType name="string" class="solr.StrField"/>
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true" />
<dynamicField name="*" type="string" indexed="true" stored="true"/>
<!-- for versioning -->
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="_root_" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="id" type="string" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true" />
<uniqueKey>id</uniqueKey>
</schema>
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
<?xml version="1.0" ?>

<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<!-- Minimal solrconfig.xml with /select, /admin and /update only -->

<config>

<dataDir>${solr.data.dir:}</dataDir>

<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
<schemaFactory class="ClassicIndexSchemaFactory"/>

<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>

<updateHandler class="solr.DirectUpdateHandler2">
<commitWithin>
<softCommit>${solr.commitwithin.softcommit:true}</softCommit>
</commitWithin>
<updateLog class="${solr.ulog:solr.UpdateLog}"></updateLog>
</updateHandler>

<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="indent">true</str>
<str name="df">text</str>
</lst>
</requestHandler>

<updateRequestProcessorChain name="add-unknown-fields-to-the-schema" default="true">
<processor class="solr.NumFieldLimitingUpdateRequestProcessorFactory" name="max-fields">
<int name="maxFields">${solr.test.maxFields:1234}</int>
</processor>
<processor class="solr.LogUpdateProcessorFactory"/>
<processor class="solr.DistributedUpdateProcessorFactory"/>
<processor class="solr.RunUpdateProcessorFactory"/>
</updateRequestProcessorChain>

<indexConfig>
<mergeScheduler class="${solr.mscheduler:org.apache.lucene.index.ConcurrentMergeScheduler}"/>
</indexConfig>
</config>

Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update.processor;

import org.apache.solr.SolrTestCase;
import org.apache.solr.common.util.NamedList;
import org.hamcrest.Matchers;
import org.junit.Before;
import org.junit.Test;

public class NumFieldLimitingUpdateRequestProcessorFactoryTest extends SolrTestCase {

private NumFieldLimitingUpdateRequestProcessorFactory factory = null;

@Before
public void initFactory() {
factory = new NumFieldLimitingUpdateRequestProcessorFactory();
}

@Test
public void testReportsErrorIfMaximumFieldsNotProvided() {
final var initArgs = new NamedList<>();
final IllegalArgumentException thrown =
expectThrows(
IllegalArgumentException.class,
() -> {
factory.init(initArgs);
});
assertThat(thrown.getMessage(), Matchers.containsString("maxFields parameter is required"));
assertThat(thrown.getMessage(), Matchers.containsString("no value was provided"));
}

@Test
public void testReportsErrorIfMaximumFieldsIsInvalid() {
final var initArgs = new NamedList<>();
initArgs.add("maxFields", "nonIntegerValue");
IllegalArgumentException thrown =
expectThrows(
IllegalArgumentException.class,
() -> {
factory.init(initArgs);
});
assertThat(
thrown.getMessage(),
Matchers.containsString("maxFields must be configured as a non-null <int>"));

initArgs.clear();
initArgs.add("maxFields", Integer.valueOf(-5));
thrown =
expectThrows(
IllegalArgumentException.class,
() -> {
factory.init(initArgs);
});
assertThat(
thrown.getMessage(), Matchers.containsString("maxFields must be a positive integer"));
}

@Test
public void testCorrectlyParsesAllConfigurationParams() {
final var initArgs = new NamedList<>();
initArgs.add("maxFields", 123);
initArgs.add("warnOnly", Boolean.TRUE);

factory.init(initArgs);

assertEquals(123, factory.maximumFields);
assertEquals(true, factory.warnOnly);
}
}
Loading

0 comments on commit 6b057bc

Please sign in to comment.