From cc729f97135b2112c099ceb9237aa54d54d7b4ee Mon Sep 17 00:00:00 2001 From: Kaushal Kumar Date: Mon, 6 Jan 2025 17:29:21 -0800 Subject: [PATCH] add trie to store Rules in memory Signed-off-by: Kaushal Kumar --- .../FastPrefixMatchingStructure.java | 8 +- .../wlm/rule/structure/RuleAttributeTrie.java | 76 ++++---- .../wlm/rule/structure/TrieDeleter.java | 66 +++++++ .../wlm/rule/structure/TrieInserter.java | 90 ++++++++++ .../plugin/wlm/rule/structure/TrieNode.java | 118 +++++++++++++ .../wlm/rule/structure/TrieSearcher.java | 112 ++++++++++++ .../{Rule.java => package-info.java} | 7 - .../FastPrefixMatchingStructureTests.java | 163 +++++++++++++++++- 8 files changed, 596 insertions(+), 44 deletions(-) create mode 100644 plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieDeleter.java create mode 100644 plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieInserter.java create mode 100644 plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieNode.java create mode 100644 plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieSearcher.java rename plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/{Rule.java => package-info.java} (69%) diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/FastPrefixMatchingStructure.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/FastPrefixMatchingStructure.java index bfe40468e306b..654b2687c1d80 100644 --- a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/FastPrefixMatchingStructure.java +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/FastPrefixMatchingStructure.java @@ -8,6 +8,12 @@ package org.opensearch.plugin.wlm.rule.structure; +import java.util.List; + public interface FastPrefixMatchingStructure { - void add(String s); + void insert(String key, String value); + + List search(String key); + + boolean delete(String key); } diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/RuleAttributeTrie.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/RuleAttributeTrie.java index 4d0bcdb14e7b8..e68bb52cd03fc 100644 --- a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/RuleAttributeTrie.java +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/RuleAttributeTrie.java @@ -8,47 +8,59 @@ package org.opensearch.plugin.wlm.rule.structure; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Pattern; +import java.util.List; public class RuleAttributeTrie implements FastPrefixMatchingStructure { - private static final Pattern ALLOWED_ATTRIBUTE_VALUES = Pattern.compile("^[a-zA-Z0-9-_]+\\*?$"); - @Override - public void add(String s) { + private static final String ALLOWED_ATTRIBUTE_VALUES = "^[a-zA-Z0-9-_]+\\*?$"; + private TrieNode root; + /** + * Constructs an empty AugmentedTrie. + */ + public RuleAttributeTrie() { + root = new TrieNode(""); } - public enum RuleAttributeName { - USERNAME("username"), - INDEX_PATTERN("index_pattern"); - private final String name; - - RuleAttributeName(String name) { - this.name = name; - } - - public String getName() { return name; } - - public static RuleAttributeName fromString(String name) { - for (RuleAttributeName attributeName : RuleAttributeName.values()) { - if (attributeName.getName().equals(name)) { - return attributeName; - } - } - throw new IllegalArgumentException("Invalid rule attribute name [" + name + "]"); + /** + * Inserts a key-value pair into the trie. + * + * @param key The key to be inserted. + * @param value The value associated with the key. + */ + public void insert(String key, String value) { + if (!IsvalidValue(value)) { + throw new IllegalArgumentException( + "Invalid attribute value: " + value + " it should match the regex " + ALLOWED_ATTRIBUTE_VALUES + ); } - + TrieInserter inserter = new TrieInserter(root, key, value); + root = inserter.insert(); } - public static class LabeledNode { - private String label; - private Map children = new HashMap<>(); + private boolean IsvalidValue(String value) { + return ALLOWED_ATTRIBUTE_VALUES.matches(value); + } - public LabeledNode() {} + /** + * Searches for a key in the trie. + * + * @param key The key to search for. + * @return A list of string values associated with the key or its prefixes. + * Returns an empty list if no matches are found. + */ + public List search(String key) { + TrieSearcher searcher = new TrieSearcher(root, key); + return searcher.search(); + } - public LabeledNode(String label) { - this.label = label; - } + /** + * Deletes a key from the trie. + * + * @param key The key to be deleted. + * @return true if the key was successfully deleted, false otherwise. + */ + public boolean delete(String key) { + TrieDeleter deleter = new TrieDeleter(root, key); + return deleter.delete(); } } diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieDeleter.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieDeleter.java new file mode 100644 index 0000000000000..3f2dbe9c6e8c3 --- /dev/null +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieDeleter.java @@ -0,0 +1,66 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.rule.structure; + +/** + * Handles the deletion operation for the Augmented Trie. + */ +class TrieDeleter { + private TrieNode root; + private String key; + + /** + * Constructs a TrieDeleter with the given root and key. + * + * @param root The root node of the trie. + * @param key The key to be deleted. + */ + public TrieDeleter(TrieNode root, String key) { + this.root = root; + this.key = key; + } + + /** + * Performs the deletion operation. + * + * @return true if the key was successfully deleted, false otherwise. + */ + public boolean delete() { + TrieNode current = root; + TrieNode parent = null; + String remainingKey = key; + while (!remainingKey.isEmpty()) { + TrieNode childNode = current.findCommonPrefixChild(remainingKey); + + if (childNode == null) { + return false; + } + parent = current; + current = childNode; + remainingKey = remainingKey.substring(childNode.getKey().length()); + } + final boolean deleted = current.isEndOfWord(); + + if (deleted) { + current.setEndOfWord(false); + current.setValue(null); + if (current.getChildren().isEmpty()) { + deleteLeafNode(parent, current); + } + } + + return deleted; + } + + private static void deleteLeafNode(TrieNode parent, TrieNode current) { + if (parent != null) { + parent.getChildren().remove(current.getKey()); + } + } +} diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieInserter.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieInserter.java new file mode 100644 index 0000000000000..49c88075ee3f3 --- /dev/null +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieInserter.java @@ -0,0 +1,90 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.rule.structure; + +/** + * Handles the insertion operation for the Augmented Trie. + */ +class TrieInserter { + private TrieNode root; + private String key; + private String value; + + /** + * Constructs a TrieInserter with the given root, key, and value. + * + * @param root The root node of the trie. + * @param key The key to be inserted. + * @param value The value associated with the key. + */ + public TrieInserter(TrieNode root, String key, String value) { + this.root = root; + this.key = key; + this.value = value; + } + + /** + * Performs the insertion operation. + *
    Method should handle 3 cases + *
  1. Simple addition of new child
  2. + *
  3. insert splits a node
  4. + *
  5. inserted key is a prefix to existing key|s, this could either mark a node as endOfWord or it could also split the node
  6. + *
+ * @return The root node of the trie after insertion. + */ + public TrieNode insert() { + TrieNode current = root; + String remainingKey = key; + while (!remainingKey.isEmpty()) { + TrieNode child = current.findCommonPrefixChild(remainingKey); + + if (child == null) { + boolean partialMatch = false; + // partial match + for (String childKey : current.getChildren().keySet()) { + int commonPrefixLength = getLongestCommonPrefixLength(childKey, remainingKey); + if (commonPrefixLength > 0) { + TrieNode newNode = current.splitNode(childKey, commonPrefixLength); + + remainingKey = remainingKey.substring(commonPrefixLength); + + current = newNode; + partialMatch = true; + break; + } + } + // no match + if (!partialMatch) { + current = current.addNewChild(remainingKey); + remainingKey = ""; + } + } else { + current = child; + remainingKey = remainingKey.substring(child.getKey().length()); + } + } + updateNodeValue(current); + return root; + } + + private void updateNodeValue(TrieNode node) { + node.setValue(value); + node.setEndOfWord(true); + } + + private int getLongestCommonPrefixLength(String str1, String str2) { + int minLength = Math.min(str1.length(), str2.length()); + for (int i = 0; i < minLength; i++) { + if (str1.charAt(i) != str2.charAt(i)) { + return i; + } + } + return minLength; + } +} diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieNode.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieNode.java new file mode 100644 index 0000000000000..14b078bd38d7f --- /dev/null +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieNode.java @@ -0,0 +1,118 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.rule.structure; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; + +/** + * Represents a node in the Augmented Trie. + * Each node contains a key, an optional value, and references to child nodes. + */ +class TrieNode { + public static final int CLOSEST_LIMIT = 5; + private Map children; + private String key; + private String value; + private boolean isEndOfWord; + + /** + * Constructs a TrieNode with the given key. + * + * @param key The key associated with this node. + */ + public TrieNode(String key) { + this.children = new HashMap<>(); + this.key = key; + this.value = null; + this.isEndOfWord = false; + } + + // Getters and setters + public Map getChildren() { + return children; + } + + public String getKey() { + return key; + } + + public void setKey(String key) { + this.key = key; + } + + public String getValue() { + return value; + } + + public void setValue(String value) { + this.value = value; + } + + public boolean isEndOfWord() { + return isEndOfWord; + } + + public void setEndOfWord(boolean endOfWord) { + isEndOfWord = endOfWord; + } + + public TrieNode addNewChild(String key) { + TrieNode newNode = new TrieNode(key); + newNode.setValue(value); + newNode.setEndOfWord(true); + getChildren().put(key, newNode); + return newNode; + } + + public TrieNode splitNode(String childKey, int commonPrefixLength) { + String commonPrefix = childKey.substring(0, commonPrefixLength); + TrieNode newNode = new TrieNode(commonPrefix); + TrieNode childNode = getChildren().get(childKey); + + // remove the existing partially matching child node since we will split that + getChildren().remove(childKey); + // re-attach common prefix as direct child + getChildren().put(commonPrefix, newNode); + + childNode.setKey(childKey.substring(commonPrefixLength)); + + newNode.getChildren().put(childKey.substring(commonPrefixLength), childNode); + return newNode; + } + + public TrieNode findCommonPrefixChild(String key) { + return getChildren().entrySet() + .stream() + .filter(entry -> key.startsWith(entry.getKey())) + .findFirst() + .map(Map.Entry::getValue) + .orElse(null); + } + + public List findTopFiveClosest() { + List ans = new ArrayList<>(CLOSEST_LIMIT); + Queue queue = new LinkedList<>(); + queue.offer(this); + + while (!queue.isEmpty() && ans.size() < CLOSEST_LIMIT) { + TrieNode current = queue.poll(); + if (current.isEndOfWord()) { + ans.add(current.getValue()); + } + queue.addAll(current.getChildren().values()); + } + + return ans; + } +} diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieSearcher.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieSearcher.java new file mode 100644 index 0000000000000..0dd21bad4a3d5 --- /dev/null +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/TrieSearcher.java @@ -0,0 +1,112 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.wlm.rule.structure; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Function; + +/** + * Handles the search operation for the Augmented Trie. + */ +class TrieSearcher { + private TrieNode root; + private String key; + + /** + * Constructs a TrieSearcher with the given root and key. + * + * @param root The root node of the trie. + * @param key The key to search for. + */ + public TrieSearcher(TrieNode root, String key) { + this.root = root; + this.key = key; + } + + /** + * Performs the search operation. + * + * @return The value associated with the key if found, or a list of top 5 closest matches, + * or null if no matches found. + */ + public List search() { + SearchResult result = findNode(); + return result.matchType.processResult(result.node); + } + + private SearchResult findNode() { + TrieNode current = root; + String remainingKey = key; + + while (!remainingKey.isEmpty()) { + TrieNode child = findCommonPrefixChild(current, remainingKey); + if (child == null) { + return handleNoChildWithProperPrefixCase(current, remainingKey); + } + current = child; + remainingKey = removePrefix(remainingKey, child.getKey()); + } + + return new SearchResult(current, current.isEndOfWord() ? MatchType.EXACT_MATCH : MatchType.PARTIAL_MATCH); + } + + private static SearchResult handleNoChildWithProperPrefixCase(TrieNode current, String remainingKey) { + // there are two scenarios now + // 1. there is no key that starts with the remaining key + // 2. there might be a key that completely consumes the remaining key as prefix , example key: "apple", remainingKey: "app" + for (String childKey : current.getChildren().keySet()) { + if (childKey.startsWith(remainingKey)) { + return new SearchResult(current.getChildren().get(childKey), MatchType.PARTIAL_MATCH); + } + } + return new SearchResult(null, MatchType.NO_MATCH); + } + + private TrieNode findCommonPrefixChild(TrieNode node, String key) { + return node.getChildren() + .entrySet() + .stream() + .filter(entry -> key.startsWith(entry.getKey())) + .findFirst() + .map(Map.Entry::getValue) + .orElse(null); + } + + private String removePrefix(String str, String prefix) { + return str.substring(prefix.length()); + } + + private enum MatchType { + EXACT_MATCH(node -> Collections.singletonList(node.getValue())), + PARTIAL_MATCH(TrieNode::findTopFiveClosest), + NO_MATCH(n -> Collections.emptyList()); + + final Function> resultProcessor; + + MatchType(Function> resultProcessor) { + this.resultProcessor = resultProcessor; + } + + public List processResult(TrieNode node) { + return resultProcessor.apply(node); + } + } + + private static class SearchResult { + TrieNode node; + MatchType matchType; + + SearchResult(TrieNode node, MatchType matchType) { + this.node = node; + this.matchType = matchType; + } + } +} diff --git a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/Rule.java b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/package-info.java similarity index 69% rename from plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/Rule.java rename to plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/package-info.java index 8f856cf4d0b90..d79622a4e5224 100644 --- a/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/Rule.java +++ b/plugins/workload-management/src/main/java/org/opensearch/plugin/wlm/rule/structure/package-info.java @@ -7,10 +7,3 @@ */ package org.opensearch.plugin.wlm.rule.structure; - -import java.util.List; -import java.util.Map; - -public class Rule { - Map> attributes; -} diff --git a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/rule/structure/FastPrefixMatchingStructureTests.java b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/rule/structure/FastPrefixMatchingStructureTests.java index 2dabffebcd717..fa8425e494b8e 100644 --- a/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/rule/structure/FastPrefixMatchingStructureTests.java +++ b/plugins/workload-management/src/test/java/org/opensearch/plugin/wlm/rule/structure/FastPrefixMatchingStructureTests.java @@ -9,10 +9,165 @@ package org.opensearch.plugin.wlm.rule.structure; import org.opensearch.test.OpenSearchTestCase; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Suite; -public class FastPrefixMatchingStructureTests extends OpenSearchTestCase { - FastPrefixMatchingStructure fastPrefixMatchingStructure = ; - public void shouldAddAString() { - fastPrefixMatchingStructure.add("adffadsas"); +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +@RunWith(Suite.class) +@Suite.SuiteClasses({ + FastPrefixMatchingStructureTests.InsertionTests.class, + FastPrefixMatchingStructureTests.SearchTests.class, + FastPrefixMatchingStructureTests.DeletionTests.class, + FastPrefixMatchingStructureTests.EdgeCaseTests.class }) +public class FastPrefixMatchingStructureTests { + + public static class BaseTest extends OpenSearchTestCase { + protected FastPrefixMatchingStructure trie; + + public void setUp() throws Exception { + super.setUp(); + trie = new RuleAttributeTrie(); + } + } + + public static class InsertionTests extends BaseTest { + @Test + public void testInsertSinglePair() { + trie.insert("apple", "fruit"); + assertEquals(Collections.singletonList("fruit"), trie.search("apple")); + } + + @Test + public void testInsertMultiplePairs() { + trie.insert("apple", "fruit"); + trie.insert("app", "application"); + trie.insert("application", "software"); + + assertEquals(Collections.singletonList("fruit"), trie.search("apple")); + assertEquals(Collections.singletonList("application"), trie.search("app")); + assertEquals(Collections.singletonList("software"), trie.search("application")); + } + + @Test + public void testOverwriteExistingKey() { + trie.insert("apple", "fruit"); + trie.insert("apple", "company"); + assertEquals(Collections.singletonList("company"), trie.search("apple")); + } + + @Test + public void testInsertKeysWithCommonPrefixes() { + trie.insert("car", "vehicle"); + trie.insert("cart", "shopping"); + trie.insert("cartoon", "animation"); + + assertEquals(Collections.singletonList("vehicle"), trie.search("car")); + assertEquals(Collections.singletonList("shopping"), trie.search("cart")); + assertEquals(Collections.singletonList("animation"), trie.search("cartoon")); + } + } + + public static class SearchTests extends BaseTest { + + public void setUp() throws Exception { + super.setUp(); + trie.insert("apple", "fruit"); + trie.insert("app", "application"); + trie.insert("application", "software"); + trie.insert("appreciate", "value"); + trie.insert("book", "reading"); + trie.insert("bookstore", "shop"); + } + + @Test + public void testSearchExistingKeys() { + assertEquals(Collections.singletonList("fruit"), trie.search("apple")); + assertEquals(Collections.singletonList("application"), trie.search("app")); + assertEquals(Collections.singletonList("reading"), trie.search("book")); + } + + @Test + public void testSearchNonExistingKeys() { + assertTrue(trie.search("cocktail").isEmpty()); + assertTrue(trie.search("mock").isEmpty()); + } + + @Test + public void testSearchPartialKeys() { + List result = trie.search("ap"); + assertEquals(4, result.size()); + assertTrue(result.containsAll(Arrays.asList("fruit", "application", "software", "value"))); + } + } + + public static class DeletionTests extends BaseTest { + + public void setUp() throws Exception { + super.setUp(); + trie.insert("apple", "fruit"); + trie.insert("app", "application"); + trie.insert("application", "software"); + trie.insert("appreciate", "value"); + trie.insert("book", "reading"); + trie.insert("bookstore", "shop"); + } + + @Test + public void testDeleteExistingKey() { + assertTrue(trie.delete("apple")); + assertTrue(trie.search("apple").isEmpty()); + assertFalse(trie.search("app").isEmpty()); + } + + @Test + public void testDeleteNonExistingKey() { + assertFalse(trie.delete("appl")); + assertFalse(trie.search("apple").isEmpty()); + } + + @Test + public void testDeleteKeyAndVerifyPartialSearch() { + assertTrue(trie.delete("app")); + List result = trie.search("ap"); + assertEquals(3, result.size()); + assertTrue(result.containsAll(Arrays.asList("fruit", "software", "value"))); + } + + @Test + public void testDeleteAllKeysWithCommonPrefix() { + assertTrue(trie.delete("apple")); + assertTrue(trie.delete("app")); + assertTrue(trie.delete("application")); + assertTrue(trie.delete("appreciate")); + + assertTrue(trie.search("ap").isEmpty()); + assertFalse(trie.search("book").isEmpty()); + } + } + + public static class EdgeCaseTests extends BaseTest { + + @Test + public void testInsertAndSearchEmptyString() { + trie.insert("", "empty"); + assertEquals(Collections.singletonList("empty"), trie.search("")); + } + + @Test + public void testInsertEmptyValue() { + trie.insert("emptyvalue", ""); + assertEquals(Collections.singletonList(""), trie.search("emptyvalue")); + } + + @Test + public void testDeleteEmptyString() { + trie.insert("", "empty"); + assertTrue(trie.delete("")); + assertTrue(trie.search("").isEmpty()); + } } }