-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DROOLS-7589] ansible-rulebook : Throw Exception when heap reaches to… #90
Changes from 1 commit
a0293f5
5badd7c
9356d23
a188e7c
630d427
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package org.drools.ansible.rulebook.integration.api.rulesengine; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
public class MemoryMonitorUtil { | ||
|
||
private static final Logger LOG = LoggerFactory.getLogger(MemoryMonitorUtil.class.getName()); | ||
|
||
public static final String MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD_PROPERTY = "drools.memory.occupation.percentage.threshold"; | ||
|
||
private static final int DEFAULT_MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD = 90; | ||
|
||
private static final int MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD; | ||
|
||
static { | ||
String envValue = System.getenv("DROOLS_MEMORY_THRESHOLD"); | ||
if (envValue != null && !envValue.isEmpty()) { | ||
// Environment variable takes precedence over system property | ||
System.setProperty(MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD_PROPERTY, envValue); | ||
} | ||
MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD = Integer.getInteger(MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD_PROPERTY, DEFAULT_MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD); // percentage | ||
} | ||
|
||
private MemoryMonitorUtil() { | ||
// do not instantiate | ||
} | ||
|
||
public static void checkMemoryOccupation() { | ||
int memoryOccupationPercentage = getMemoryOccupationPercentage(); | ||
if (memoryOccupationPercentage > MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD) { | ||
System.gc(); // NOSONAR | ||
// double check to avoid frequent GC | ||
memoryOccupationPercentage = getMemoryOccupationPercentage(); | ||
if (memoryOccupationPercentage > MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This means I call Also Shall I add another threshold (e.g. 60%) to call As far as I observe the default G1GC, GC is triggered when the used heap is around 40% - 60% of the total heap. Please let me know your thoughts, @mariofusco There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe that what you did here is a good idea. Let's give the gc one last chance to run before actually reporting the problem. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, I keep the logic. |
||
LOG.error("Memory occupation is above the threshold: {}% > {}%. MaxMemory = {}, UsedMemory = {}", | ||
memoryOccupationPercentage, MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD, Runtime.getRuntime().maxMemory(), Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()); | ||
throw new MemoryThresholdReachedException(MEMORY_OCCUPATION_PERCENTAGE_THRESHOLD, memoryOccupationPercentage); | ||
} | ||
} | ||
} | ||
|
||
private static int getMemoryOccupationPercentage() { | ||
return (int) ((100 * (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())) / Runtime.getRuntime().maxMemory()); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package org.drools.ansible.rulebook.integration.api.rulesengine; | ||
|
||
public class MemoryThresholdReachedException extends RuntimeException { | ||
|
||
private final int threshold; | ||
private final int actual; | ||
|
||
public MemoryThresholdReachedException(int threshold, int actual) { | ||
this.threshold = threshold; | ||
this.actual = actual; | ||
} | ||
|
||
@Override | ||
public String getMessage() { | ||
return "Memory threshold reached: " + actual + "% > " + threshold + "%"; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,8 +43,8 @@ public boolean accept(Match match) { | |
boolean validMatch = isValidMatch(fhs); | ||
|
||
if (validMatch) { | ||
if (log.isInfoEnabled()) { | ||
log.info(matchToString(match)); | ||
if (log.isDebugEnabled()) { | ||
log.debug(matchToString(match)); | ||
Comment on lines
-46
to
+47
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Requested by Madhu, changed to DEBUG level. When there are many large events, INFO would result in too much logging. |
||
} | ||
|
||
Map<String, Object> metadata = match.getRule().getMetaData(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,13 @@ | ||
package org.drools.ansible.rulebook.integration.main; | ||
|
||
import org.drools.ansible.rulebook.integration.api.io.JsonMapper; | ||
import org.drools.ansible.rulebook.integration.core.jpy.AstRulesEngine; | ||
|
||
import com.fasterxml.jackson.core.JacksonException; | ||
|
||
import java.io.UncheckedIOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
import org.drools.ansible.rulebook.integration.api.io.JsonMapper; | ||
import org.drools.ansible.rulebook.integration.core.jpy.AstRulesEngine; | ||
|
||
public class Payload { | ||
|
||
private final List<String> list; | ||
|
@@ -22,6 +20,9 @@ public class Payload { | |
|
||
private int shutdown = 0; | ||
|
||
// set true when matchedEvents occupies too much memory | ||
private boolean discardMatchedEvents = false; | ||
Comment on lines
+23
to
+24
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I need to add this to avoid memory retention on |
||
|
||
private Payload(List<String> list) { | ||
this.list = list; | ||
} | ||
|
@@ -88,6 +89,11 @@ static Payload parsePayload(Map ruleSet) { | |
} catch (NullPointerException | NumberFormatException e) { | ||
/* ignore */ | ||
} | ||
try { | ||
payload.discardMatchedEvents = Boolean.valueOf(sourcesArgs.get("discard_matched_events").toString()); | ||
} catch (NullPointerException | NumberFormatException e) { | ||
/* ignore */ | ||
} | ||
|
||
return payload; | ||
} | ||
|
@@ -128,7 +134,9 @@ public void run() { | |
for (int i = 0; i < payload.loopCount; i++) { | ||
for (String p : payload.list) { | ||
String resultJson = engine.assertEvent(sessionId, p); | ||
returnedMatches.addAll(JsonMapper.readValueAsListOfMapOfStringAndObject(resultJson)); | ||
if (!payload.discardMatchedEvents) { | ||
returnedMatches.addAll(JsonMapper.readValueAsListOfMapOfStringAndObject(resultJson)); | ||
} | ||
sleepSeconds(payload.eventDelay); | ||
} | ||
sleepSeconds(payload.loopDelay); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,11 @@ | ||
package org.drools.ansible.rulebook.integration.main; | ||
|
||
import org.drools.ansible.rulebook.integration.api.rulesengine.MemoryThresholdReachedException; | ||
import org.drools.ansible.rulebook.integration.main.Main.ExecuteResult; | ||
import org.junit.Ignore; | ||
import org.junit.Test; | ||
|
||
import static org.junit.Assert.assertThrows; | ||
import static org.junit.Assert.assertTrue; | ||
|
||
public class PerfTest { | ||
|
@@ -12,6 +15,19 @@ public void testManyEvents() { | |
checkDuration("100k_event_rules_ast.json", 10_000); | ||
} | ||
|
||
@Ignore("Disabled by default, because it takes around 40 seconds") | ||
@Test | ||
public void testManyLargeEvents() { | ||
// match_multiple_rules: false means events are removed after match. So this test will pass without throwing MemoryThresholdReachedException | ||
checkDuration("1m_event_with_20kb_payload_rules_ast.json", 120_000); | ||
} | ||
Comment on lines
+18
to
+23
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With this test, we can confirm there is no memory leak. |
||
|
||
@Test | ||
public void testManyLargeEventsMatchMultipleRules() { | ||
// match_multiple_rules: true means events are retained until TTL expires | ||
assertThrows(MemoryThresholdReachedException.class, () -> checkDuration("1m_event_with_20kb_payload_match_multiple_rules_ast.json", 120_000)); | ||
} | ||
Comment on lines
+25
to
+29
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With this test, we can confirm MemoryThresholdReachedException is thrown before hitting OutOfMemoryError. |
||
|
||
@Test | ||
public void testOnceAfter() { | ||
checkDuration("56_once_after.json", 15_000); | ||
|
Large diffs are not rendered by default.
Large diffs are not rendered by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the place where we can check the memory before sync/async execution.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure that checking this synchronously and at each and every rule evaluation is a good idea. I'm afraid the overhead could be relevant and not totally justified. One possibility is to run this asynchronously as a timed task. Another alternative is doing it here and synchronously but actually doing the check (and running the gc) only once in fixed number (100?) of evaluations. What do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. I agree that we want to minimize the overhead. Probably the former (time-based check) may miss "quickly incoming many events" scenario. So I guess the latter (number-based check) is better?