Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NR Control Health Checks #1250

Merged
merged 16 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 25 additions & 22 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
default_host,
fetch_config_setting,
)
from newrelic.core.super_agent_health import super_agent_health_instance, super_agent_healthcheck_loop
from newrelic.core.super_agent_health import HealthStatus, super_agent_health_instance, super_agent_healthcheck_loop


__all__ = ["initialize", "filter_app_factory"]
Expand Down Expand Up @@ -104,7 +104,7 @@ def _map_aws_account_id(s):
# all the settings have been read.

_cache_object = []
super_agent_instance = super_agent_health_instance()
super_agent_health = super_agent_health_instance()


def _reset_config_parser():
Expand Down Expand Up @@ -1038,22 +1038,25 @@ def _load_configuration(

# Now read in the configuration file. Cache the config file
# name in internal settings object as indication of succeeding.
if config_file.endswith(".toml"):
try:
import tomllib
except ImportError:
raise newrelic.api.exceptions.ConfigurationError(
"TOML configuration file can only be used if tomllib is available (Python 3.11+)."
)
with open(config_file, "rb") as f:
content = tomllib.load(f)
newrelic_section = content.get("tool", {}).get("newrelic")
if not newrelic_section:
raise newrelic.api.exceptions.ConfigurationError("New Relic configuration not found in TOML file.")
_config_object.read_dict(_toml_config_to_configparser_dict(newrelic_section))
elif not _config_object.read([config_file]):
super_agent_instance.set_health_status("invalid_config")
raise newrelic.api.exceptions.ConfigurationError(f"Unable to open configuration file {config_file}.")
try:
if config_file.endswith(".toml"):
try:
import tomllib
except ImportError:
raise newrelic.api.exceptions.ConfigurationError(
"TOML configuration file can only be used if tomllib is available (Python 3.11+)."
)
with open(config_file, "rb") as f:
content = tomllib.load(f)
newrelic_section = content.get("tool", {}).get("newrelic")
if not newrelic_section:
raise newrelic.api.exceptions.ConfigurationError("New Relic configuration not found in TOML file.")
_config_object.read_dict(_toml_config_to_configparser_dict(newrelic_section))
elif not _config_object.read([config_file]):
raise newrelic.api.exceptions.ConfigurationError(f"Unable to open configuration file {config_file}.")
except Exception:
super_agent_health.set_health_status(HealthStatus.INVALID_CONFIG.value)
raise

_settings.config_file = config_file

Expand Down Expand Up @@ -4829,7 +4832,7 @@ def _setup_super_agent_health():
if super_agent_health_thread.is_alive():
return

if super_agent_instance.health_check_enabled:
if super_agent_health.health_check_enabled:
super_agent_health_thread.start()


Expand All @@ -4840,7 +4843,7 @@ def initialize(
log_file=None,
log_level=None,
):
super_agent_instance.start_time_unix_nano = time.time_ns()
super_agent_health.start_time_unix_nano = time.time_ns()

if config_file is None:
config_file = os.environ.get("NEW_RELIC_CONFIG_FILE", None)
Expand All @@ -4857,7 +4860,7 @@ def initialize(

if _settings.monitor_mode:
if not _settings.license_key:
super_agent_instance.set_health_status("missing_license")
super_agent_health.set_health_status(HealthStatus.MISSING_LICENSE.value)

if _settings.monitor_mode or _settings.developer_mode:
_settings.enabled = True
Expand All @@ -4867,7 +4870,7 @@ def initialize(
_setup_agent_console()
else:
_settings.enabled = False
super_agent_instance.set_health_status("agent_disabled")
super_agent_health.set_health_status(HealthStatus.AGENT_DISABLED.value)


def filter_app_factory(app, global_conf, config_file, environment=None):
Expand Down
4 changes: 2 additions & 2 deletions newrelic/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from newrelic.common.object_wrapper import ObjectProxy
from newrelic.core.agent import agent_instance
from newrelic.core.config import flatten_settings, global_settings
from newrelic.core.super_agent_health import super_agent_health_instance
from newrelic.core.super_agent_health import HealthStatus, super_agent_health_instance
from newrelic.core.trace_cache import trace_cache


Expand Down Expand Up @@ -514,7 +514,7 @@ def __init__(self, config_file, stdin=None, stdout=None, log=None):

if not self.__config_object.read([config_file]):
super_agent_instance = super_agent_health_instance()
super_agent_instance.set_health_status("invalid_config")
super_agent_instance.set_health_status(HealthStatus.INVALID_CONFIG.value)
raise RuntimeError(f"Unable to open configuration file {config_file}.")

listener_socket = self.__config_object.get("newrelic", "console.listener_socket") % {"pid": "*"}
Expand Down
8 changes: 8 additions & 0 deletions newrelic/core/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
from newrelic.samplers.cpu_usage import cpu_usage_data_source
from newrelic.samplers.gc_data import garbage_collector_data_source
from newrelic.samplers.memory_usage import memory_usage_data_source
from newrelic.core.super_agent_health import HealthStatus, super_agent_health_instance


_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -217,6 +219,7 @@ def __init__(self, config):
self._scheduler = sched.scheduler(self._harvest_timer, self._harvest_shutdown.wait)

self._process_shutdown = False
self._super_agent = super_agent_health_instance()

self._lock = threading.Lock()

Expand Down Expand Up @@ -734,6 +737,11 @@ def shutdown_agent(self, timeout=None):
if self._harvest_shutdown_is_set():
return

self._super_agent.set_health_status(HealthStatus.AGENT_SHUTDOWN.value)

if self._super_agent.health_check_enabled:
self._super_agent.write_to_health_file()

if timeout is None:
timeout = self._config.shutdown_timeout

Expand Down
12 changes: 6 additions & 6 deletions newrelic/core/agent_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
NetworkInterfaceException,
RetryDataForRequest,
)
from newrelic.core.super_agent_health import super_agent_health_instance
from newrelic.core.super_agent_health import HealthStatus, super_agent_health_instance

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -251,21 +251,21 @@ def send(
# initialize function doesn't get overridden with invalid_license as a missing license key is also
# treated as a 401 status code
if not self._license_key:
self.super_agent.set_health_status("missing_license")
self.super_agent.set_health_status(HealthStatus.MISSING_LICENSE.value)
else:
self.super_agent.set_health_status("invalid_license")
self.super_agent.set_health_status(HealthStatus.INVALID_LICENSE.value)

if status == 407:
self.super_agent.set_health_status("proxy_error", status)
self.super_agent.set_health_status(HealthStatus.PROXY_ERROR.value, status)

if status == 410:
self.super_agent.set_health_status("forced_disconnect")
self.super_agent.set_health_status(HealthStatus.FORCED_DISCONNECT.value)

level, message = self.LOG_MESSAGES.get(status, self.LOG_MESSAGES["default"])

# If the default error message was used, then we know we have a general HTTP error
if message.startswith("Received a non 200 or 202"):
self.super_agent.set_health_status("http_error", status, method)
self.super_agent.set_health_status(HealthStatus.HTTP_ERROR.value, status, method)

_logger.log(
level,
Expand Down
10 changes: 5 additions & 5 deletions newrelic/core/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
RetryDataForRequest,
)
from newrelic.samplers.data_sampler import DataSampler
from newrelic.core.super_agent_health import super_agent_healthcheck_loop, super_agent_health_instance
from newrelic.core.super_agent_health import HealthStatus, super_agent_healthcheck_loop, super_agent_health_instance

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -369,7 +369,7 @@ def connect_to_data_collector(self, activate_agent):
None, self._app_name, self.linked_applications, environment_settings()
)
except ForceAgentDisconnect:
self._super_agent.set_health_status("failed_nr_connection")
self._super_agent.set_health_status(HealthStatus.FAILED_NR_CONNECTION.value)
# Any disconnect exception means we should stop trying to connect
_logger.error(
"The New Relic service has requested that the agent "
Expand All @@ -380,7 +380,7 @@ def connect_to_data_collector(self, activate_agent):
)
return
except NetworkInterfaceException:
self._super_agent.set_health_status("failed_nr_connection")
self._super_agent.set_health_status(HealthStatus.FAILED_NR_CONNECTION.value)
active_session = None
except Exception:
# If an exception occurs after agent has been flagged to be
Expand All @@ -390,7 +390,7 @@ def connect_to_data_collector(self, activate_agent):
# the application is still running.

if not self._agent_shutdown and not self._pending_shutdown:
self._super_agent.set_health_status("failed_nr_connection")
self._super_agent.set_health_status(HealthStatus.FAILED_NR_CONNECTION.value)
_logger.exception(
"Unexpected exception when registering "
"agent with the data collector. If this problem "
Expand Down Expand Up @@ -1700,7 +1700,7 @@ def internal_agent_shutdown(self, restart=False):
optionally triggers activation of a new session.

"""
self._super_agent.set_health_status("agent_shutdown")
self._super_agent.set_health_status(HealthStatus.AGENT_SHUTDOWN.value)
if self._super_agent.health_check_enabled:
self._super_agent.write_to_health_file()

Expand Down
Loading
Loading