Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add deadlock prevention during metric registration #1079

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions prometheus_client/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

class PrometheusClientRuntimeError(RuntimeError):
pass
4 changes: 2 additions & 2 deletions prometheus_client/registry.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from abc import ABC, abstractmethod
import copy
from threading import Lock
from typing import Dict, Iterable, List, Optional

from .metrics_core import Metric
from .utils import WarnLock


# Ideally this would be a Protocol, but Protocols are only available in Python >= 3.8.
Expand All @@ -30,7 +30,7 @@ def __init__(self, auto_describe: bool = False, target_info: Optional[Dict[str,
self._collector_to_names: Dict[Collector, List[str]] = {}
self._names_to_collectors: Dict[str, Collector] = {}
self._auto_describe = auto_describe
self._lock = Lock()
self._lock = WarnLock()
self._target_info: Optional[Dict[str, str]] = {}
self.set_target_info(target_info)

Expand Down
40 changes: 40 additions & 0 deletions prometheus_client/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import math
from threading import Lock, RLock

from .errors import PrometheusClientRuntimeError

INF = float("inf")
MINUS_INF = float("-inf")
Expand All @@ -22,3 +25,40 @@ def floatToGoString(d):
mantissa = f'{s[0]}.{s[1:dot]}{s[dot + 1:]}'.rstrip('0.')
return f'{mantissa}e+0{dot - 1}'
return s


class WarnLock:
"""A wrapper around RLock and Lock that prevents deadlocks.

Raises a RuntimeError when it detects attempts to re-enter the critical
section from a single thread. Intended to be used as a context manager.
"""
error_msg = (
'Attempt to enter a non reentrant context from a single thread.'
' It is possible that the client code is trying to register or update'
' metrics from within metric registration code or from a signal handler'
' while metrics are being registered or updated.'
' This is unsafe and cannot be allowed. It would result in a deadlock'
' if this exception was not raised.'
)

def __init__(self):
self._rlock = RLock()
self._lock = Lock()

def __enter__(self):
self._rlock.acquire()
if not self._lock.acquire(blocking=False):
self._rlock.release()
raise PrometheusClientRuntimeError(self.error_msg)

def __exit__(self, exc_type, exc_value, traceback):
self._lock.release()
self._rlock.release()

def _locked(self):
# For use in tests.
if self._rlock.acquire(blocking=False):
self._rlock.release()
return False
return True
3 changes: 1 addition & 2 deletions prometheus_client/values.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ def MultiProcessValue(process_identifier=os.getpid):
files = {}
values = []
pid = {'value': process_identifier()}
# Use a single global lock when in multi-processing mode
# as we presume this means there is no threading going on.
# Use a single global lock when in multi-processing mode.
# This avoids the need to also have mutexes in __MmapDict.
lock = Lock()

Expand Down
16 changes: 15 additions & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
StateSetMetricFamily, Summary, SummaryMetricFamily, UntypedMetricFamily,
)
from prometheus_client.decorator import getargspec
from prometheus_client.errors import PrometheusClientRuntimeError
from prometheus_client.metrics import _get_use_created
from prometheus_client.validation import (
disable_legacy_validation, enable_legacy_validation,
Expand Down Expand Up @@ -1004,7 +1005,20 @@ def test_restricted_registry_does_not_yield_while_locked(self):
m = Metric('target', 'Target metadata', 'info')
m.samples = [Sample('target_info', {'foo': 'bar'}, 1)]
for _ in registry.restricted_registry(['target_info', 's_sum']).collect():
self.assertFalse(registry._lock.locked())
self.assertFalse(registry._lock._locked())

def test_registry_deadlock_detection(self):
registry = CollectorRegistry(auto_describe=True)

class RecursiveCollector:
def collect(self):
Counter('x', 'help', registry=registry)
return [CounterMetricFamily('c_total', 'help', value=1)]

expected_msg = 'Attempt to enter a non reentrant context from a single thread.'
self.assertRaisesRegex(
PrometheusClientRuntimeError, expected_msg, registry.register, RecursiveCollector()
)


if __name__ == '__main__':
Expand Down