From efaae3939c454bcebb03900f4680f7549031266e Mon Sep 17 00:00:00 2001 From: lbwexler Date: Wed, 24 Apr 2024 01:35:32 -0400 Subject: [PATCH] Checkpoint --- .../MonitorResultsAdminController.groovy | 2 +- .../ConnectionPoolMonitoringService.groovy | 5 +- .../MemoryMonitoringService.groovy | 10 +- .../hoist/monitor/MonitorResultService.groovy | 81 ++++--- .../monitor/MonitoringEmailService.groovy | 62 ------ .../monitor/MonitoringReportService.groovy | 104 +++++++++ .../xh/hoist/monitor/MonitoringService.groovy | 205 +++--------------- .../io/xh/hoist/monitor/MonitorInfo.groovy | 37 ---- .../io/xh/hoist/monitor/MonitorResults.groovy | 115 ++++++++++ .../hoist/monitor/MonitorStatusReport.groovy | 12 +- .../io/xh/hoist/monitor/StatusInfo.groovy | 46 ---- .../DefaultMonitorDefinitionService.groovy | 1 - 12 files changed, 318 insertions(+), 362 deletions(-) rename grails-app/services/io/xh/hoist/{monitor => admin}/ConnectionPoolMonitoringService.groovy (96%) rename grails-app/services/io/xh/hoist/{monitor => admin}/MemoryMonitoringService.groovy (94%) delete mode 100644 grails-app/services/io/xh/hoist/monitor/MonitoringEmailService.groovy create mode 100644 grails-app/services/io/xh/hoist/monitor/MonitoringReportService.groovy delete mode 100644 src/main/groovy/io/xh/hoist/monitor/MonitorInfo.groovy create mode 100644 src/main/groovy/io/xh/hoist/monitor/MonitorResults.groovy delete mode 100644 src/main/groovy/io/xh/hoist/monitor/StatusInfo.groovy diff --git a/grails-app/controllers/io/xh/hoist/admin/cluster/MonitorResultsAdminController.groovy b/grails-app/controllers/io/xh/hoist/admin/cluster/MonitorResultsAdminController.groovy index f4756973..6c9f4df1 100644 --- a/grails-app/controllers/io/xh/hoist/admin/cluster/MonitorResultsAdminController.groovy +++ b/grails-app/controllers/io/xh/hoist/admin/cluster/MonitorResultsAdminController.groovy @@ -24,7 +24,7 @@ class MonitorResultsAdminController extends BaseClusterController { @Access(['HOIST_ADMIN']) def forceRunAllMonitors() { - runOnInstance(new ForceRunAllMonitors(), Utils.clusterService.localName) + runOnPrimary(new ForceRunAllMonitors()) } static class ForceRunAllMonitors extends ClusterRequest { def doCall() { diff --git a/grails-app/services/io/xh/hoist/monitor/ConnectionPoolMonitoringService.groovy b/grails-app/services/io/xh/hoist/admin/ConnectionPoolMonitoringService.groovy similarity index 96% rename from grails-app/services/io/xh/hoist/monitor/ConnectionPoolMonitoringService.groovy rename to grails-app/services/io/xh/hoist/admin/ConnectionPoolMonitoringService.groovy index df5c2c23..3abc6688 100644 --- a/grails-app/services/io/xh/hoist/monitor/ConnectionPoolMonitoringService.groovy +++ b/grails-app/services/io/xh/hoist/admin/ConnectionPoolMonitoringService.groovy @@ -5,10 +5,11 @@ * Copyright © 2023 Extremely Heavy Industries Inc. */ -package io.xh.hoist.monitor +package io.xh.hoist.admin import io.xh.hoist.BaseService import io.xh.hoist.exception.DataNotAvailableException +import io.xh.hoist.util.DateTimeUtils import org.apache.tomcat.jdbc.pool.DataSource as PooledDataSource import org.apache.tomcat.jdbc.pool.PoolConfiguration import org.springframework.boot.jdbc.DataSourceUnwrapper @@ -34,7 +35,7 @@ class ConnectionPoolMonitoringService extends BaseService { void init() { createTimer( - interval: {enabled ? config.snapshotInterval * SECONDS: -1}, + interval: {enabled ? config.snapshotInterval * DateTimeUtils.SECONDS: -1}, runFn: this.&takeSnapshot ) } diff --git a/grails-app/services/io/xh/hoist/monitor/MemoryMonitoringService.groovy b/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy similarity index 94% rename from grails-app/services/io/xh/hoist/monitor/MemoryMonitoringService.groovy rename to grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy index 3f6c1952..e1b6f825 100644 --- a/grails-app/services/io/xh/hoist/monitor/MemoryMonitoringService.groovy +++ b/grails-app/services/io/xh/hoist/admin/MemoryMonitoringService.groovy @@ -5,10 +5,12 @@ * Copyright © 2023 Extremely Heavy Industries Inc. */ -package io.xh.hoist.monitor +package io.xh.hoist.admin import com.sun.management.HotSpotDiagnosticMXBean import io.xh.hoist.BaseService +import io.xh.hoist.util.DateTimeUtils +import io.xh.hoist.util.Utils import java.lang.management.GarbageCollectorMXBean import java.lang.management.ManagementFactory @@ -34,7 +36,7 @@ class MemoryMonitoringService extends BaseService { void init() { createTimer( - interval: {this.enabled ? config.snapshotInterval * SECONDS: -1}, + interval: {this.enabled ? config.snapshotInterval * DateTimeUtils.SECONDS: -1}, runFn: this.&takeSnapshot ) } @@ -91,7 +93,7 @@ class MemoryMonitoringService extends BaseService { if (newSnap.usedPctMax > 90) { logWarn(newSnap) logWarn("MEMORY USAGE ABOVE 90%") - } else if (intervalElapsed(1 * HOURS, _lastInfoLogged)) { + } else if (intervalElapsed(1 * DateTimeUtils.HOURS, _lastInfoLogged)) { logInfo(newSnap) _lastInfoLogged = new Date() } else { @@ -152,7 +154,7 @@ class MemoryMonitoringService extends BaseService { long collectionCount = totalCollectionCount - (last ? last.totalCollectionCount : 0), collectionTime = totalCollectionTime - (last ? last.totalCollectionTime : 0), - elapsedTime = timestamp - (last ? last.timestamp : startupTime.toInstant().toEpochMilli()) + elapsedTime = timestamp - (last ? last.timestamp : Utils.startupTime.toInstant().toEpochMilli()) def avgCollectionTime = collectionCount ? Math.round(collectionTime/collectionCount) : 0 diff --git a/grails-app/services/io/xh/hoist/monitor/MonitorResultService.groovy b/grails-app/services/io/xh/hoist/monitor/MonitorResultService.groovy index 52f05c48..d0270959 100644 --- a/grails-app/services/io/xh/hoist/monitor/MonitorResultService.groovy +++ b/grails-app/services/io/xh/hoist/monitor/MonitorResultService.groovy @@ -7,9 +7,9 @@ package io.xh.hoist.monitor +import grails.async.Promises import grails.gorm.transactions.ReadOnly import io.xh.hoist.BaseService -import io.xh.hoist.util.Utils import static grails.async.Promises.task import java.util.concurrent.TimeoutException @@ -18,26 +18,35 @@ import static io.xh.hoist.monitor.MonitorStatus.* import static java.util.concurrent.TimeUnit.SECONDS -/** - * Runs individual status monitor checks as directed by MonitorService and as configured by - * data-driven status monitor definitions. Timeouts and any other exceptions will be caught and - * returned cleanly as failures. - */ class MonitorResultService extends BaseService { + def configService, + monitorDefinitionService + + /** + * Runs all enabled and active monitors on this instance in parallel. + */ @ReadOnly - MonitorResult runMonitor(String code, long timeoutSeconds) { - def monitor = Monitor.findByCode(code) - if (!monitor) throw new RuntimeException("Monitor '$code' not found.") - return runMonitor(monitor, timeoutSeconds) + List runAllMonitors() { + def timeout = getTimeoutSeconds(), + monitors = Monitor.list().findAll{it.active && (isPrimary || !it.primaryOnly)} + + withDebug("Running ${monitors.size()} monitors") { + def tasks = monitors.collect { m -> task {runMonitor(m, timeout)}}, + ret = Promises.waitAll(tasks) + + if (monitorConfig.writeToMonitorLog != false) logResults(ret) + + return ret + } as List } + /** + * Runs individual monitor on this instance. Timeouts and any other exceptions will be + * caught and returned cleanly as failures. + */ MonitorResult runMonitor(Monitor monitor, long timeoutSeconds) { - if (!monitor.active || (monitor.primaryOnly && !clusterService.isPrimary)) { - return inactiveMonitorResult(monitor) - } - - def defSvc = Utils.appContext.monitorDefinitionService, + def defSvc = monitorDefinitionService, code = monitor.code, result = new MonitorResult(monitor: monitor, instance: clusterService.localName, primary: isPrimary), startTime = new Date() @@ -79,25 +88,6 @@ class MonitorResultService extends BaseService { return result } - MonitorResult unknownMonitorResult(Monitor monitor) { - return new MonitorResult( - status: UNKNOWN, - date: new Date(), - elapsed: 0, - monitor: monitor - ) - } - - MonitorResult inactiveMonitorResult(Monitor monitor) { - return new MonitorResult( - status: INACTIVE, - date: new Date(), - elapsed: 0, - monitor: monitor - ) - } - - //------------------------ // Implementation //------------------------ @@ -129,4 +119,27 @@ class MonitorResultService extends BaseService { result.prependMessage("Metric value is $verb warn limit of $warn $units") } } + + //--------------------- + // Implementation + //-------------------- + private long getTimeoutSeconds() { + (monitorConfig.monitorTimeoutSecs ?: 15) as long + } + + private Map getMonitorConfig() { + configService.getMap('xhMonitorConfig') + } + + private void logResults(Collection results) { + results.each { + logInfo([code: it.code, status: it.status, metric: it.metric]) + } + + def failsCount = results.count {it.status == FAIL}, + warnsCount = results.count {it.status == WARN}, + okCount = results.count {it.status == OK} + + logInfo([fails: failsCount, warns: warnsCount, okays: okCount]) + } } diff --git a/grails-app/services/io/xh/hoist/monitor/MonitoringEmailService.groovy b/grails-app/services/io/xh/hoist/monitor/MonitoringEmailService.groovy deleted file mode 100644 index e11ac490..00000000 --- a/grails-app/services/io/xh/hoist/monitor/MonitoringEmailService.groovy +++ /dev/null @@ -1,62 +0,0 @@ -/* - * This file belongs to Hoist, an application development toolkit - * developed by Extremely Heavy Industries (www.xh.io | info@xh.io) - * - * Copyright © 2023 Extremely Heavy Industries Inc. - */ -package io.xh.hoist.monitor - -import io.xh.hoist.BaseService -import io.xh.hoist.util.Utils - -import static io.xh.hoist.monitor.MonitorStatus.WARN - -/** - * Listens for status monitor change events from MonitoringService and emails status updates to - * a configurable list of recipients. - */ -class MonitoringEmailService extends BaseService { - - def emailService - - void init() { - subscribeToTopic( - topic: 'xhMonitorStatusReport', - onMessage: this.&emailReport, - primaryOnly: true - ) - } - - //------------------------ - // Implementation - //------------------------ - private void emailReport(MonitorStatusReport report) { - def to = emailService.parseMailConfig('xhMonitorEmailRecipients') - if (to) { - emailService.sendEmail( - to: to, - subject: report.title, - html: formatHtml(report), - async: true - ) - } - } - - private String formatHtml(MonitorStatusReport report) { - def results = report.results - - results.sort{it.name} - results.sort{it.status} - - if (report.status < WARN) return "There are no alerting monitors for ${Utils.appName}." - - return results.findAll{it.status >= WARN}.collect { - "+ $it.name | ${it.message ? it.message + ' | ' : ''}Minutes in [${it.status}]: ${it.minsInStatus}" - }.join('
') - } - - Map getAdminStats() {[ - config: [toAddress: emailService.parseMailConfig('xhMonitorEmailRecipients')] - ]} - -} diff --git a/grails-app/services/io/xh/hoist/monitor/MonitoringReportService.groovy b/grails-app/services/io/xh/hoist/monitor/MonitoringReportService.groovy new file mode 100644 index 00000000..c82b0067 --- /dev/null +++ b/grails-app/services/io/xh/hoist/monitor/MonitoringReportService.groovy @@ -0,0 +1,104 @@ +/* + * This file belongs to Hoist, an application development toolkit + * developed by Extremely Heavy Industries (www.xh.io | info@xh.io) + * + * Copyright © 2023 Extremely Heavy Industries Inc. + */ +package io.xh.hoist.monitor + +import io.xh.hoist.BaseService +import io.xh.hoist.util.Utils + +import static grails.util.Environment.isDevelopmentMode +import static io.xh.hoist.monitor.MonitorStatus.WARN +import static io.xh.hoist.util.DateTimeUtils.MINUTES +import static io.xh.hoist.util.DateTimeUtils.intervalElapsed +import static java.lang.System.currentTimeMillis + +/** + * Listens for status monitor change events from MonitoringService and generates a report. + * Reports generated periodically, and also when status changes after certain thresholds. + * + * Also emails status updates to a configurable list of recipients. + */ +class MonitoringReportService extends BaseService { + + def emailService, + configService + + // Notification state for primary instance to manage + // If primary instance goes down, may get extra notification -- that's ok + private Long lastNotified = null + private boolean alertMode = false + + void noteResultsUpdated(Collection results) { + if (!isPrimary) return; + + def failThreshold = config.failNotifyThreshold, + warnThreshold = config.warnNotifyThreshold + + // 1) Calc new alert mode, true if crossed thresholds or already alerting and still have problems + boolean newAlertMode = (alertMode && results?.any {it.status >= WARN}) || + results?.any { it.cyclesAsFail >= failThreshold || it.cyclesAsWarn >= warnThreshold } + + // 2) Generate report if we have a change, or still alerting and interval has elapsed + if (newAlertMode != alertMode || + (newAlertMode && intervalElapsed(config.monitorRepeatNotifyMins * MINUTES, lastNotified)) + ) { + lastNotified = currentTimeMillis() + alertMode = newAlertMode + generateStatusReport(results) + } + } + + //------------------------ + // Implementation + //------------------------ + private MonitorStatusReport generateStatusReport(results) { + def report = new MonitorStatusReport(results: results) + logDebug("Emitting monitor status report: ${report.title}") + getTopic('xhMonitorStatusReport').publishAsync(report) + if (isDevelopmentMode()) { + emailReport(report) + } + } + + private void emailReport(MonitorStatusReport report) { + def to = emailService.parseMailConfig('xhMonitorEmailRecipients') + if (to) { + emailService.sendEmail( + to: to, + subject: report.title, + html: formatHtml(report), + async: true + ) + } + } + + private String formatHtml(MonitorStatusReport report) { + def results = report.results + + results.sort{it.name} + results.sort{it.status} + + if (report.status < WARN) return "There are no alerting monitors for ${Utils.appName}." + + return results.findAll{it.status >= WARN}.collect { + "+ $it.name | ${it.message ? it.message + ' | ' : ''}Minutes in [${it.status}]: ${it.minsInStatus}" + }.join('
') + } + + private Map getConfig() { + configService.getMap('xhMonitorConfig') + } + + Map getAdminStats() {[ + config: [ + toAddress: emailService.parseMailConfig('xhMonitorEmailRecipients'), + *: configService.getForAdminStats('xhMonitorConfig') + ], + lastNotifed: lastNotified, + alertMode: alertMode + ]} + +} diff --git a/grails-app/services/io/xh/hoist/monitor/MonitoringService.groovy b/grails-app/services/io/xh/hoist/monitor/MonitoringService.groovy index acd45f03..38361e8e 100644 --- a/grails-app/services/io/xh/hoist/monitor/MonitoringService.groovy +++ b/grails-app/services/io/xh/hoist/monitor/MonitoringService.groovy @@ -7,27 +7,23 @@ package io.xh.hoist.monitor -import com.hazelcast.core.EntryListener -import com.hazelcast.map.IMap -import grails.async.Promises import grails.gorm.transactions.ReadOnly import io.xh.hoist.BaseService +import io.xh.hoist.cluster.ClusterRequest import io.xh.hoist.cluster.ReplicatedValue import io.xh.hoist.util.Timer -import static io.xh.hoist.util.DateTimeUtils.intervalElapsed -import static grails.async.Promises.task -import static io.xh.hoist.monitor.MonitorStatus.* +import static io.xh.hoist.monitor.MonitorResults.emptyResults +import static io.xh.hoist.monitor.MonitorResults.newResults + import static io.xh.hoist.util.DateTimeUtils.MINUTES -import static io.xh.hoist.util.DateTimeUtils.SECONDS import static grails.util.Environment.isDevelopmentMode -import static java.lang.System.currentTimeMillis +import static io.xh.hoist.util.Utils.getAppContext /** - * Coordinates application status monitoring. Requests monitor results and generates status reports - * on a configurable timer, analyzes the results, and publishes Grails events on status conditions - * of interest to the application. + * Coordinates application status monitoring. The primary instance will co-ordinate + * monitor results on cluster and make them globally available. * * In local development mode, auto-run/refresh of Monitors is disabled, but monitors can still be * run on demand via forceRun(). Notification are never sent during local development. @@ -37,203 +33,74 @@ import static java.lang.System.currentTimeMillis class MonitoringService extends BaseService { def configService, - monitorResultService - - // Shared state for all servers to read - // Map of instance name -> Map of monitor code -> MonitorResult - private IMap> _results = getIMap('results') + monitoringReportService - // Notification state for primary instance to manage - private ReplicatedValue> _statusInfos = getReplicatedValue('statusInfos') - private ReplicatedValue _alertMode = getReplicatedValue('alertMode') - private ReplicatedValue _lastNotified = getReplicatedValue('lastNotified') + // Shared state for all servers to read -- gathered by primary from all instances + private ReplicatedValue> _results = getReplicatedValue('results') - private Timer monitorTimer - private Timer notifyTimer - private Timer cleanupTimer + private Timer timer void init() { - monitorTimer = createTimer( - name: 'monitorTimer', - runFn: this.&onMonitorTimer, + timer = createTimer( interval: {monitorInterval}, - delay: startupDelay - ) - - notifyTimer = createTimer ( - name: 'notifyTimer', - runFn: this.&onNotifyTimer, - interval: {notifyInterval}, + delay: startupDelay, primaryOnly: true ) - - cleanupTimer = createTimer( - name: 'cleanupTimer', - runFn: this.&cleanup, - interval: {cleanupInterval}, - primaryOnly: true, - ) - - _results.addEntryListener([ - entryAdded: { updateStatuses() }, - entryUpdated: { updateStatuses() }, - entryRemoved: { updateStatuses() } - ] as EntryListener, false) } void forceRun() { - cleanupTimer.forceRun() - monitorTimer.forceRun() - } - - @ReadOnly - List getResults() { - def results = _results, - statusInfos = _statusInfos.get() ?: [:] - Monitor.list().collect { - def code = it.code, - statusInfo = statusInfos[code] ?: new StatusInfo(status: UNKNOWN), - instanceResults = results.collect{it.value[code]} - - new MonitorInfo( - monitor: it, - statusInfo: statusInfo, - instanceResults: instanceResults - ) - } + timer.forceRun() } - //------------------------ - // Implementation - //------------------------ @ReadOnly - private void runAllMonitors() { - withDebug('Running monitors') { - def timeout = getTimeoutSeconds() - - def tasks = Monitor.list().collect { m -> - task { monitorResultService.runMonitor(m, timeout) } - } - - Map newResults = Promises - .waitAll(tasks) - .collectEntries { - [it.code, it] - } - - _results[clusterService.localName] = newResults - if (monitorConfig.writeToMonitorLog != false) logResults(newResults.values()) - } + List getResults() { + def results = _results.get() + Monitor.list().collect { results?[it.code] ?: emptyResults(it) } } - @ReadOnly - private void updateStatuses() { - if (!isPrimary) return - def statusInfos = _statusInfos.get() ?: [:] - Monitor.list().each{ monitor -> - def code = monitor.code - List statuses = _results.findAll{it.value[code]}.collect{it.value[code].status} - def statusInfo = statusInfos[code] ?: new StatusInfo() - statusInfo.recordStatus(statuses.max()) - statusInfos[code] = statusInfo - } - _statusInfos.set(statusInfos) - evaluateProblems() - } - private void evaluateProblems() { - def statusInfos = _statusInfos.get()?.values() ?: [] as Collection, - failThreshold = monitorConfig.failNotifyThreshold, - warnThreshold = monitorConfig.warnNotifyThreshold - - // Calc new alert mode, true if crossed thresholds or already alerting and still have problems - def currAlertMode = _alertMode.get() - def newAlertMode = (currAlertMode && statusInfos.any { it.status >= WARN }) || - statusInfos.any { it.cyclesAsFail >= failThreshold || it.cyclesAsWarn >= warnThreshold } - if (newAlertMode != currAlertMode) { - _alertMode.set(newAlertMode) - notifyAlertModeChange() - } - } - - private void notifyAlertModeChange() { - if (!isDevelopmentMode()) { - getTopic('xhMonitorStatusReport').publishAsync(generateStatusReport()) - _lastNotified.set(currentTimeMillis()) - } - } - - private MonitorStatusReport generateStatusReport() { - new MonitorStatusReport(infos: results) - } - - private void logResults(Collection results) { - results.each { - logInfo([code: it.code, status: it.status, metric: it.metric]) + //-------------------------------------------------------------------- + // Implementation + //-------------------------------------------------------------------- + private void onTimer() { + Map> newChecks = clusterService + .submitToAllInstances(new RunAllMonitorsTask()) + .collectMany { instance, response -> (response.value ?: [])} + .groupBy { it.code } + + Map prevResults = _results.get() + Map newResults = newChecks.collectEntries { code, checks -> + [code, newResults(checks, prevResults?[code])] } + _results.set(newResults) - def failsCount = results.count {it.status == FAIL}, - warnsCount = results.count {it.status == WARN}, - okCount = results.count {it.status == OK} - - logInfo([fails: failsCount, warns: warnsCount, okays: okCount]) + monitoringReportService.noteResultsUpdated(newResults.values()) } - private void onNotifyTimer() { - if (!_alertMode.get()) return - - if (intervalElapsed(monitorConfig.monitorRepeatNotifyMins * MINUTES, _lastNotified.get())) { - def report = generateStatusReport() - logDebug("Emitting monitor status report: ${report.title}") - getTopic('xhMonitorStatusReport').publishAsync(report) - _lastNotified.set(currentTimeMillis()) + static class RunAllMonitorsTask extends ClusterRequest> { + List doCall() { + return appContext.monitorResultService.runAllMonitors() } } - private void onMonitorTimer() { - runAllMonitors() - } - private int getMonitorInterval() { return isDevelopmentMode() || !configService.getBool('xhEnableMonitoring') ? -1 : (monitorConfig.monitorRefreshMins * MINUTES) } - private int getNotifyInterval() { - return isDevelopmentMode() || !configService.getBool('xhEnableMonitoring') ? -1 : (15 * SECONDS) - } - - private int getCleanupInterval() { - return isDevelopmentMode() || !configService.getBool('xhEnableMonitoring') ? -1 : (10 * MINUTES) - } - private int getStartupDelay() { return monitorConfig.monitorStartupDelayMins * MINUTES } - // Default supplied here as support for this sub-config was added late in the game. - private long getTimeoutSeconds() { - return monitorConfig.monitorTimeoutSecs ?: 15 - } - private Map getMonitorConfig() { configService.getMap('xhMonitorConfig') } - private void cleanup() { - for (String instance: _results.keySet()) { - if (!clusterService.isMember(instance)) { - _results.remove(instance) - } - } - } - void clearCaches() { super.clearCaches() if (isPrimary) { - _results.clear() - _statusInfos.set(null) + _results.set(null) if (monitorInterval > 0) { - monitorTimer.forceRun() + timer.forceRun() } } } diff --git a/src/main/groovy/io/xh/hoist/monitor/MonitorInfo.groovy b/src/main/groovy/io/xh/hoist/monitor/MonitorInfo.groovy deleted file mode 100644 index 6cf83a6d..00000000 --- a/src/main/groovy/io/xh/hoist/monitor/MonitorInfo.groovy +++ /dev/null @@ -1,37 +0,0 @@ -/* - * This file belongs to Hoist, an application development toolkit - * developed by Extremely Heavy Industries (www.xh.io | info@xh.io) - * - * Copyright © 2024 Extremely Heavy Industries Inc. - */ - -package io.xh.hoist.monitor - -import groovy.transform.CompileStatic -import io.xh.hoist.json.JSONFormat - -import static io.xh.hoist.monitor.MonitorStatus.UNKNOWN - -@CompileStatic -class MonitorInfo implements JSONFormat { - Monitor monitor - StatusInfo statusInfo - List instanceResults = [] - - String getCode() { - monitor.code - } - - Map formatForJSON() { - [ - code: monitor.code, - name: monitor.name, - sortOrder: monitor.sortOrder, - primaryOnly: monitor.primaryOnly, - metricUnit: monitor.metricUnit, - statusInfo: statusInfo, - instanceResults: instanceResults - ] - } - -} diff --git a/src/main/groovy/io/xh/hoist/monitor/MonitorResults.groovy b/src/main/groovy/io/xh/hoist/monitor/MonitorResults.groovy new file mode 100644 index 00000000..538ee16a --- /dev/null +++ b/src/main/groovy/io/xh/hoist/monitor/MonitorResults.groovy @@ -0,0 +1,115 @@ +/* + * This file belongs to Hoist, an application development toolkit + * developed by Extremely Heavy Industries (www.xh.io | info@xh.io) + * + * Copyright © 2024 Extremely Heavy Industries Inc. + */ + +package io.xh.hoist.monitor + +import groovy.transform.CompileStatic +import io.xh.hoist.json.JSONFormat + +import static io.xh.hoist.monitor.MonitorStatus.FAIL +import static io.xh.hoist.monitor.MonitorStatus.INACTIVE +import static io.xh.hoist.monitor.MonitorStatus.OK +import static io.xh.hoist.monitor.MonitorStatus.UNKNOWN +import static io.xh.hoist.monitor.MonitorStatus.WARN +import static io.xh.hoist.util.DateTimeUtils.MINUTES +import static java.lang.System.currentTimeMillis +import static java.util.Collections.emptyList + +@CompileStatic +class MonitorResults implements JSONFormat { + Monitor monitor + MonitorStatus status = UNKNOWN + List results = emptyList() + Date dateComputed = new Date() + + // Aggregated history + int cyclesAsSuccess = 0 + int cyclesAsFail = 0 + int cyclesAsWarn = 0 + Date lastStatusChanged = new Date() + + String getName() { + return monitor.name + } + + String getMessage() { + return status > OK ? results.find { it.status == status }.message : '' + } + + String getMinsInStatus() { + def now = currentTimeMillis(), + timeInStatus = now - lastStatusChanged.time + + (timeInStatus / MINUTES).intValue() + } + + /** + * Create an empty result, for when no underlying checks are available. + */ + static MonitorResults emptyResults(Monitor monitor) { + new MonitorResults( + monitor: monitor, + status: monitor.active ? UNKNOWN : INACTIVE + ) + } + + /** + * Create a new result, appending it to existing history, if available. + */ + static MonitorResults newResults(List results, MonitorResults prev) { + def ret = new MonitorResults( + monitor: results[0].monitor, + results: results, + status: results*.status.max() + ) + + // If there is history, bring it over and append to it. + if (prev && !(ret.status == INACTIVE || ret.status == UNKNOWN)) { + ret.cyclesAsSuccess = prev.cyclesAsSuccess + ret.cyclesAsFail = prev.cyclesAsFail + ret.cyclesAsWarn = prev.cyclesAsWarn + ret.lastStatusChanged = ret.status == prev.status ? prev.lastStatusChanged : new Date() + + switch (ret.status) { + case FAIL: + // Entering FAIL does not clear WARN streaks + ret.cyclesAsSuccess = 0 + ret.cyclesAsFail++ + break + case WARN: + ret.cyclesAsSuccess = 0 + ret.cyclesAsFail = 0 + ret.cyclesAsWarn++ + break + case OK: + ret.cyclesAsFail = 0 + ret.cyclesAsWarn = 0 + ret.cyclesAsSuccess++ + break + } + } + + return ret + } + + Map formatForJSON() {[ + code: monitor.code, + name: monitor.name, + sortOrder: monitor.sortOrder, + primaryOnly: monitor.primaryOnly, + metricUnit: monitor.metricUnit, + + status: status, + results: results, + dateComputed: dateComputed, + + cyclesAsSuccess: cyclesAsSuccess, + cyclesAsFail: cyclesAsFail, + cyclesAsWarn: cyclesAsWarn, + lastStatusChanged: lastStatusChanged + ]} +} diff --git a/src/main/groovy/io/xh/hoist/monitor/MonitorStatusReport.groovy b/src/main/groovy/io/xh/hoist/monitor/MonitorStatusReport.groovy index 6ab38e1e..5f9e04bd 100644 --- a/src/main/groovy/io/xh/hoist/monitor/MonitorStatusReport.groovy +++ b/src/main/groovy/io/xh/hoist/monitor/MonitorStatusReport.groovy @@ -12,17 +12,17 @@ import io.xh.hoist.util.Utils import static io.xh.hoist.monitor.MonitorStatus.* class MonitorStatusReport { - List infos + List results MonitorStatus getStatus() { - if (!infos) return MonitorStatus.OK - infos.max{it.status}.status + if (!results) return OK + results.max { it.status }.status } String getTitle() { - def failsCount = infos.count{it.status == FAIL}, - warnsCount = infos.count{it.status == WARN}, - okCount = infos.count{it.status == OK}, + def failsCount = results.count{it.status == FAIL}, + warnsCount = results.count{it.status == WARN}, + okCount = results.count{it.status == OK}, title = "${Utils.appName}: ", msgParts = [] diff --git a/src/main/groovy/io/xh/hoist/monitor/StatusInfo.groovy b/src/main/groovy/io/xh/hoist/monitor/StatusInfo.groovy deleted file mode 100644 index 0a94c483..00000000 --- a/src/main/groovy/io/xh/hoist/monitor/StatusInfo.groovy +++ /dev/null @@ -1,46 +0,0 @@ -/* - * This file belongs to Hoist, an application development toolkit - * developed by Extremely Heavy Industries (www.xh.io | info@xh.io) - * - * Copyright © 2024 Extremely Heavy Industries Inc. - */ - -package io.xh.hoist.monitor - -import io.xh.hoist.util.Utils - -import static io.xh.hoist.monitor.MonitorStatus.* - - -class StatusInfo { - MonitorStatus status = UNKNOWN - Date lastChange - Integer cyclesAsSuccess = 0 - Integer cyclesAsFail = 0 - Integer cyclesAsWarn = 0 - - void recordStatus(MonitorStatus status) { - // Keep track of the number of consecutive cycles in each status - switch (status) { - case FAIL: - // Entering FAIL does not clear WARN streaks - cyclesAsSuccess = 0 - cyclesAsFail++ - break - case WARN: - cyclesAsSuccess = 0 - cyclesAsFail = 0 - cyclesAsWarn++ - break - case OK: - cyclesAsFail = 0 - cyclesAsWarn = 0 - cyclesAsSuccess++ - break - } - if (status != this.status) { - this.status = status - lastChange = new Date() - } - } -} diff --git a/src/main/groovy/io/xh/hoist/monitor/provided/DefaultMonitorDefinitionService.groovy b/src/main/groovy/io/xh/hoist/monitor/provided/DefaultMonitorDefinitionService.groovy index 213fc3b9..444b4913 100644 --- a/src/main/groovy/io/xh/hoist/monitor/provided/DefaultMonitorDefinitionService.groovy +++ b/src/main/groovy/io/xh/hoist/monitor/provided/DefaultMonitorDefinitionService.groovy @@ -13,7 +13,6 @@ import io.xh.hoist.data.filter.Filter import io.xh.hoist.monitor.Monitor import io.xh.hoist.monitor.MonitorResult import grails.gorm.transactions.Transactional -import io.xh.hoist.util.Utils import static io.xh.hoist.monitor.MonitorStatus.FAIL import static io.xh.hoist.monitor.MonitorStatus.INACTIVE