Merge branch 'main' into 2024/03/12/3rd-party-tests-for-csps

tlrx · Mar 13, 2024 · 0e5747c · 0e5747c
2 parents 488772f + ab8f435
commit 0e5747c
Show file tree

Hide file tree

Showing 161 changed files with 6,147 additions and 1,260 deletions.
diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/MrjarPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/MrjarPlugin.java
@@ -30,6 +30,9 @@
 import java.io.UncheckedIOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -56,24 +59,41 @@ public void apply(Project project) {
         var javaExtension = project.getExtensions().getByType(JavaPluginExtension.class);
 
         var srcDir = project.getProjectDir().toPath().resolve("src");
+        List<Integer> mainVersions = new ArrayList<>();
         try (var subdirStream = Files.list(srcDir)) {
             for (Path sourceset : subdirStream.toList()) {
                 assert Files.isDirectory(sourceset);
                 String sourcesetName = sourceset.getFileName().toString();
                 Matcher sourcesetMatcher = MRJAR_SOURCESET_PATTERN.matcher(sourcesetName);
                 if (sourcesetMatcher.matches()) {
-                    int javaVersion = Integer.parseInt(sourcesetMatcher.group(1));
-                    addMrjarSourceset(project, javaExtension, sourcesetName, javaVersion);
+                    mainVersions.add(Integer.parseInt(sourcesetMatcher.group(1)));
                 }
             }
         } catch (IOException e) {
             throw new UncheckedIOException(e);
         }
+
+        Collections.sort(mainVersions);
+        List<String> parentSourceSets = new ArrayList<>();
+        parentSourceSets.add(SourceSet.MAIN_SOURCE_SET_NAME);
+        for (int javaVersion : mainVersions) {
+            String sourcesetName = "main" + javaVersion;
+            addMrjarSourceset(project, javaExtension, sourcesetName, parentSourceSets, javaVersion);
+            parentSourceSets.add(sourcesetName);
+        }
     }
 
-    private void addMrjarSourceset(Project project, JavaPluginExtension javaExtension, String sourcesetName, int javaVersion) {
+    private void addMrjarSourceset(
+        Project project,
+        JavaPluginExtension javaExtension,
+        String sourcesetName,
+        List<String> parentSourceSets,
+        int javaVersion
+    ) {
         SourceSet sourceSet = javaExtension.getSourceSets().maybeCreate(sourcesetName);
-        GradleUtils.extendSourceSet(project, SourceSet.MAIN_SOURCE_SET_NAME, sourcesetName);
+        for (String parentSourceSetName : parentSourceSets) {
+            GradleUtils.extendSourceSet(project, parentSourceSetName, sourcesetName);
+        }
 
         var jarTask = project.getTasks().withType(Jar.class).named(JavaPlugin.JAR_TASK_NAME);
         jarTask.configure(task -> {

diff --git a/docs/changelog/105393.yaml b/docs/changelog/105393.yaml
@@ -0,0 +1,5 @@
+pr: 105393
+summary: Adding support for hex-encoded byte vectors on knn-search
+area: Vector Search
+type: feature
+issues: []
diff --git a/docs/changelog/105470.yaml b/docs/changelog/105470.yaml
@@ -0,0 +1,5 @@
+pr: 105470
+summary: Add retrievers using the parser-only approach
+area: Ranking
+type: enhancement
+issues: []
diff --git a/docs/changelog/106094.yaml b/docs/changelog/106094.yaml
@@ -0,0 +1,5 @@
+pr: 106094
+summary: "ESQL: Support partially folding CASE"
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/docs/changelog/106171.yaml b/docs/changelog/106171.yaml
@@ -0,0 +1,6 @@
+pr: 106171
+summary: Do not log error on node restart when the transform is already failed
+area: Transform
+type: enhancement
+issues:
+ - 106168
diff --git a/docs/changelog/106247.yaml b/docs/changelog/106247.yaml
@@ -0,0 +1,5 @@
+pr: 106247
+summary: Fix a downsample persistent task assignment bug
+area: Downsampling
+type: bug
+issues: []
diff --git a/docs/reference/analysis/normalizers.asciidoc b/docs/reference/analysis/normalizers.asciidoc
@@ -6,15 +6,15 @@ token. As a consequence, they do not have a tokenizer and only accept a subset
 of the available char filters and token filters. Only the filters that work on
 a per-character basis are allowed. For instance a lowercasing filter would be
 allowed, but not a stemming filter, which needs to look at the keyword as a
-whole. The current list of filters that can be used in a normalizer is
-following: `arabic_normalization`, `asciifolding`, `bengali_normalization`,
+whole. The current list of filters that can be used in a normalizer definition
+are: `arabic_normalization`, `asciifolding`, `bengali_normalization`,
 `cjk_width`, `decimal_digit`, `elision`, `german_normalization`,
 `hindi_normalization`, `indic_normalization`, `lowercase`, `pattern_replace`,
 `persian_normalization`, `scandinavian_folding`, `serbian_normalization`,
 `sorani_normalization`, `trim`, `uppercase`.
 
 Elasticsearch ships with a `lowercase` built-in normalizer. For other forms of
-normalization a custom configuration is required.
+normalization, a custom configuration is required.
 
 [discrete]
 === Custom normalizers

diff --git a/docs/reference/query-dsl/knn-query.asciidoc b/docs/reference/query-dsl/knn-query.asciidoc
@@ -87,8 +87,8 @@ the top `size` results.
 `query_vector`::
 +
 --
-(Required, array of floats) Query vector. Must have the same number of dimensions
-as the vector field you are searching against.
+(Required, array of floats or string) Query vector. Must have the same number of dimensions
+as the vector field you are searching against. Must be either an array of floats or a hex-encoded byte vector.
 --
 
 `num_candidates`::

diff --git a/docs/reference/query-dsl/text-expansion-query.asciidoc b/docs/reference/query-dsl/text-expansion-query.asciidoc
@@ -155,47 +155,55 @@ GET my-index/_search
 ----
 // TEST[skip: TBD]
 
-This can also be achieved by using sub searches combined with <<rrf>>.
+This can also be achieved using <<rrf, reciprocal rank fusion (RRF)>>,
+through an <<rrf-retriever, `rrf` retriever>> with multiple
+<<standard-retriever, `standard` retrievers>>.
 
 [source,console]
 ----
 GET my-index/_search
 {
-  "sub_searches": [
-    {
-      "query": {
-        "multi_match": {
-          "query": "How is the weather in Jamaica?",
-          "fields": [
-            "title",
-            "description"
-          ]
-        }
-      }
-    },
-    {
-      "query": {
-        "text_expansion": {
-          "ml.inference.title_expanded.predicted_value": {
-            "model_id": ".elser_model_2",
-            "model_text": "How is the weather in Jamaica?"
+  "retriever": {
+    "rrf": {
+      "retrievers": [
+        {
+          "standard": {
+            "query": {
+              "multi_match": {
+                "query": "How is the weather in Jamaica?",
+                "fields": [
+                  "title",
+                  "description"
+                ]
+              }
+            }
           }
-        }
-      }
-    },
-    {
-      "query": {
-        "text_expansion": {
-          "ml.inference.description_expanded.predicted_value": {
-            "model_id": ".elser_model_2",
-            "model_text": "How is the weather in Jamaica?"
+        },
+        {
+          "standard": {
+            "query": {
+              "text_expansion": {
+                "ml.inference.title_expanded.predicted_value": {
+                  "model_id": ".elser_model_2",
+                  "model_text": "How is the weather in Jamaica?"
+                }
+              }
+            }
+          }
+        },
+        {
+          "standard": {
+            "query": {
+              "text_expansion": {
+                "ml.inference.description_expanded.predicted_value": {
+                  "model_id": ".elser_model_2",
+                  "model_text": "How is the weather in Jamaica?"
+                }
+              }
+            }
           }
         }
-      }
-    }
-  ],
-  "rank": {
-    "rrf": {
+      ],
       "window_size": 10,
       "rank_constant": 20
     }
@@ -245,7 +253,7 @@ GET my-index/_search
                "pruning_config": {
                   "tokens_freq_ratio_threshold": 5,
                   "tokens_weight_threshold": 0.4,
-                  "only_score_pruned_tokens": false
+                  "only_score_pruned_tokens": true
                }
             }
          }

diff --git a/docs/reference/rest-api/common-parms.asciidoc b/docs/reference/rest-api/common-parms.asciidoc
@@ -597,7 +597,7 @@ end::knn-num-candidates[]
 
 tag::knn-query-vector[]
 Query vector. Must have the same number of dimensions as the vector field you
-are searching against.
+are searching against. Must be either an array of floats or a hex-encoded byte vector.
 end::knn-query-vector[]
 
 tag::knn-similarity[]
@@ -1281,3 +1281,34 @@ Default: 1, the primary shard.
 See <<index-wait-for-active-shards>>.
 --
 end::wait_for_active_shards[]
+
+tag::rrf-retrievers[]
+`retrievers`::
+(Required, array of retriever objects)
++
+A list of child retrievers to specify which sets of returned top documents
+will have the RRF formula applied to them. Each child retriever carries an
+equal weight as part of the RRF formula. Two or more child retrievers are
+required.
+end::rrf-retrievers[]
+
+tag::rrf-rank-constant[]
+`rank_constant`::
+(Optional, integer)
++
+This value determines how much influence documents in individual
+result sets per query have over the final ranked result set. A higher value indicates
+that lower ranked documents have more influence. This value must be greater than or
+equal to `1`. Defaults to `60`.
+end::rrf-rank-constant[]
+
+tag::rrf-window-size[]
+`window_size`::
+(Optional, integer)
++
+This value determines the size of the individual result sets per
+query. A higher value will improve result relevance at the cost of performance. The final
+ranked result set is pruned down to the search request's <<search-size-param, size>>.
+`window_size` must be greater than or equal to `size` and greater than or equal to `1`.
+Defaults to the `size` parameter.
+end::rrf-window-size[]
diff --git a/docs/reference/search.asciidoc b/docs/reference/search.asciidoc
@@ -52,6 +52,8 @@ include::search/point-in-time-api.asciidoc[]
 
 include::search/knn-search.asciidoc[]
 
+include::search/retriever.asciidoc[]
+
 include::search/rrf.asciidoc[]
 
 include::search/scroll-api.asciidoc[]

diff --git a/docs/reference/search/knn-search.asciidoc b/docs/reference/search/knn-search.asciidoc
@@ -121,7 +121,7 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=knn-k]
 include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=knn-num-candidates]
 
 `query_vector`::
-(Required, array of floats)
+(Required, array of floats or string)
 include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=knn-query-vector]
 ====