From 942c0c6f79ef4d5d3dbdab9be9eb975000ee52de Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Thu, 2 Jan 2025 16:08:32 -0800 Subject: [PATCH 01/27] add new sampling configuration keys and converters --- src/scout_apm/core/config.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/scout_apm/core/config.py b/src/scout_apm/core/config.py index 2a7cb710..dc5ce154 100644 --- a/src/scout_apm/core/config.py +++ b/src/scout_apm/core/config.py @@ -302,6 +302,19 @@ def convert_to_float(value: Any) -> float: except ValueError: return 0.0 +def convert_sample_rate(value): + """ + Converts sample rate to float, ensuring it's between 0 and 1 + """ + try: + rate = float(value) + if not (0 <= rate <= 1): + logger.warning(f"Invalid sample rate {rate}. Must be between 0 and 1. Defaulting to 1.") + return 1.0 + return rate + except (TypeError, ValueError): + logger.warning(f"Invalid sample rate {value}. Must be a number between 0 and 1. Defaulting to 1.") + return 1.0 def convert_sample_rate(value: Any) -> Optional[int]: """ @@ -369,7 +382,6 @@ def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, in return result return {} - CONVERSIONS = { "collect_remote_ip": convert_to_bool, "core_agent_download": convert_to_bool, @@ -389,4 +401,4 @@ def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, in } -scout_config = ScoutConfig() +scout_config = ScoutConfig() \ No newline at end of file From fe829b19b02a6d1842caaa7c461fa219c4ef125b Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Thu, 2 Jan 2025 16:43:58 -0800 Subject: [PATCH 02/27] add types to all config methods --- src/scout_apm/core/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scout_apm/core/config.py b/src/scout_apm/core/config.py index dc5ce154..cc4b204b 100644 --- a/src/scout_apm/core/config.py +++ b/src/scout_apm/core/config.py @@ -302,7 +302,7 @@ def convert_to_float(value: Any) -> float: except ValueError: return 0.0 -def convert_sample_rate(value): +def convert_sample_rate(value: Any) -> float: """ Converts sample rate to float, ensuring it's between 0 and 1 """ From 60e9e2540b0727cce0db0e1d06c19d2a101964dd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 3 Jan 2025 00:49:36 +0000 Subject: [PATCH 03/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/scout_apm/core/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scout_apm/core/config.py b/src/scout_apm/core/config.py index cc4b204b..10ed555e 100644 --- a/src/scout_apm/core/config.py +++ b/src/scout_apm/core/config.py @@ -401,4 +401,4 @@ def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, in } -scout_config = ScoutConfig() \ No newline at end of file +scout_config = ScoutConfig() From b248ba576a64112f85fdfb5b30d97a2c0d972a10 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Fri, 3 Jan 2025 09:57:43 -0800 Subject: [PATCH 04/27] use 0-100 ints instead of floats --- src/scout_apm/core/config.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/scout_apm/core/config.py b/src/scout_apm/core/config.py index 10ed555e..2d64ff67 100644 --- a/src/scout_apm/core/config.py +++ b/src/scout_apm/core/config.py @@ -302,19 +302,27 @@ def convert_to_float(value: Any) -> float: except ValueError: return 0.0 -def convert_sample_rate(value: Any) -> float: + +def convert_sample_rate(value: Any) -> int: """ - Converts sample rate to float, ensuring it's between 0 and 1 + Converts sample rate to integer, ensuring it's between 0 and 100 """ try: - rate = float(value) - if not (0 <= rate <= 1): - logger.warning(f"Invalid sample rate {rate}. Must be between 0 and 1. Defaulting to 1.") - return 1.0 + rate = int(value) + if not (0 <= rate <= 100): + logger.warning( + f"Invalid sample rate {rate}. Must be between 0 and 100. " + "Defaulting to 100." + ) + return 100 return rate except (TypeError, ValueError): - logger.warning(f"Invalid sample rate {value}. Must be a number between 0 and 1. Defaulting to 1.") - return 1.0 + logger.warning( + f"Invalid sample rate {value}. Must be a number between 0 and 100. " + "Defaulting to 100." + ) + return 100 + def convert_sample_rate(value: Any) -> Optional[int]: """ @@ -382,6 +390,7 @@ def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, in return result return {} + CONVERSIONS = { "collect_remote_ip": convert_to_bool, "core_agent_download": convert_to_bool, From 16eaff7e2fe62d766c9d2f158a0410ddca6d1892 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Fri, 3 Jan 2025 13:36:38 -0800 Subject: [PATCH 05/27] add sampler module --- src/scout_apm/core/sampler.py | 115 +++++++++++++++++----------------- 1 file changed, 56 insertions(+), 59 deletions(-) diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index 21d29b8a..1ca0e508 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -1,7 +1,10 @@ # coding=utf-8 +import logging import random -from typing import Dict, Optional, Tuple +from typing import Dict, Optional + +logger = logging.getLogger(__name__) class Sampler: @@ -26,55 +29,48 @@ def __init__(self, config): config: ScoutConfig instance containing sampling configuration """ self.config = config + # Load sampling configuration self.sample_rate = config.value("sample_rate") self.sample_endpoints = config.value("sample_endpoints") self.sample_jobs = config.value("sample_jobs") - self.ignore_endpoints = set( - config.value("ignore_endpoints") + config.value("ignore") - ) + self.ignore_endpoints = set(config.value("ignore_endpoints")) self.ignore_jobs = set(config.value("ignore_jobs")) - self.endpoint_sample_rate = config.value("endpoint_sample_rate") - self.job_sample_rate = config.value("job_sample_rate") - - def _any_sampling(self): - """ - Check if any sampling is enabled. - Returns: - Boolean indicating if any sampling is enabled - """ - return ( - self.sample_rate < 100 - or self.sample_endpoints - or self.sample_jobs - or self.ignore_endpoints - or self.ignore_jobs - or self.endpoint_sample_rate is not None - or self.job_sample_rate is not None - ) - - def _find_matching_rate( + def _get_matching_pattern( self, name: str, patterns: Dict[str, float] ) -> Optional[str]: """ - Finds the matching sample rate for a given operation name. + Find the most specific matching pattern for an operation name. Args: name: The operation name to match patterns: Dictionary of pattern to sample rate mappings Returns: - The sample rate for the matching pattern or None if no match found + The matching pattern or None if no match found """ + # First check for exact match + if name in patterns: + return name + + # Then check for wildcard patterns, prioritizing longest match + matching_pattern = None + longest_match = 0 + + # Only look at wildcard patterns + wildcard_patterns = [p for p in patterns if "*" in p] + for pattern in wildcard_patterns: + if pattern.endswith("/*"): + prefix = pattern[:-1] + if name.startswith(prefix) and len(prefix) > longest_match: + longest_match = len(prefix) + matching_pattern = pattern - for pattern, rate in patterns.items(): - if name.startswith(pattern): - return rate - return None + return matching_pattern def _get_operation_type_and_name( self, operation: str - ) -> Tuple[Optional[str], Optional[str]]: + ) -> tuple[Optional[str], Optional[str]]: """ Determines if an operation is an endpoint or job and extracts its name. @@ -90,50 +86,54 @@ def _get_operation_type_and_name( elif operation.startswith(self.JOB_PREFIX): return "job", operation[len(self.JOB_PREFIX) :] else: + logger.debug(f"Unknown operation type for: {operation}") return None, None - def get_effective_sample_rate(self, operation: str, is_ignored: bool) -> int: + def get_effective_sample_rate(self, operation: str) -> int: """ Determines the effective sample rate for a given operation. - Prioritization: - 1. Sampling rate for specific endpoint or job - 2. Specified ignore pattern or flag for operation - 3. Global endpoint or job sample rate - 4. Global sample rate + Priority order: + 1. Ignored operations (returns 0) + 2. Specific operation sample rate + 3. Global sample rate Args: operation: The operation string (e.g. "Controller/users/show") - is_ignored: boolean for if the specific transaction is ignored Returns: Integer between 0 and 100 representing sample rate """ op_type, name = self._get_operation_type_and_name(operation) - patterns = self.sample_endpoints if op_type == "endpoint" else self.sample_jobs - ignores = self.ignore_endpoints if op_type == "endpoint" else self.ignore_jobs - default_operation_rate = ( - self.endpoint_sample_rate if op_type == "endpoint" else self.job_sample_rate - ) - if not op_type or not name: - return self.sample_rate - matching_rate = self._find_matching_rate(name, patterns) - if matching_rate is not None: - return matching_rate - for prefix in ignores: - if name.startswith(prefix) or is_ignored: + return self.sample_rate # Fall back to global rate for unknown operations + + if op_type == "endpoint": + # Check if endpoint should be ignored + if name in self.ignore_endpoints: + return 0 + + # Find matching endpoint pattern + matching_pattern = self._get_matching_pattern(name, self.sample_endpoints) + if matching_pattern: + return self.sample_endpoints[matching_pattern] + + else: # op_type == 'job' + # Check if job should be ignored + if name in self.ignore_jobs: return 0 - if default_operation_rate is not None: - return default_operation_rate + + # Find matching job pattern + matching_pattern = self._get_matching_pattern(name, self.sample_jobs) + if matching_pattern: + return self.sample_jobs[matching_pattern] # Fall back to global sample rate return self.sample_rate - def should_sample(self, operation: str, is_ignored: bool) -> bool: + def should_sample(self, operation: str) -> bool: """ Determines if an operation should be sampled. - If no sampling is enabled, always return True. Args: operation: The operation string (e.g. "Controller/users/show" @@ -142,8 +142,5 @@ def should_sample(self, operation: str, is_ignored: bool) -> bool: Returns: Boolean indicating whether to sample this operation """ - if not self._any_sampling(): - return True - return random.randint(1, 100) <= self.get_effective_sample_rate( - operation, is_ignored - ) + rate = self.get_effective_sample_rate(operation) + return random.randint(1, 100) <= rate From 7229c7552b5c67cd4ee898b98277cae0520a2bb4 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Fri, 3 Jan 2025 14:45:04 -0800 Subject: [PATCH 06/27] add sampler tests --- tests/unit/core/test_sampler.py | 123 ++++---------------------------- 1 file changed, 14 insertions(+), 109 deletions(-) diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index 330f6321..3ebf87d9 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -14,19 +14,16 @@ def config(): ScoutConfig.set( sample_rate=50, # 50% global sampling sample_endpoints={ - "users/test": 0, # Never sample specific endpoint "users": 100, # Always sample - "test": 20, # 20% sampling for test endpoints - "health": 0, # Never sample health checks + "test/*": 20, # 20% sampling for test endpoints + "health/*": 0, # Never sample health checks }, sample_jobs={ "critical-job": 100, # Always sample - "batch": 30, # 30% sampling for batch jobs + "batch-*": 30, # 30% sampling for batch jobs }, ignore_endpoints=["metrics", "ping"], ignore_jobs=["test-job"], - endpoint_sample_rate=70, # 70% sampling for unspecified endpoints - job_sample_rate=40, # 40% sampling for unspecified jobs ) yield config ScoutConfig.reset_all() @@ -38,133 +35,41 @@ def sampler(config): def test_should_sample_endpoint_always(sampler): - assert sampler.should_sample("Controller/users", False) is True + assert sampler.should_sample("Controller/users/show") is True def test_should_sample_endpoint_never(sampler): - assert sampler.should_sample("Controller/health/check", False) is False - assert sampler.should_sample("Controller/users/test", False) is False + assert sampler.should_sample("Controller/health/check") is False def test_should_sample_endpoint_ignored(sampler): - assert sampler.should_sample("Controller/metrics/some/more", False) is False + assert sampler.should_sample("Controller/metrics") is False def test_should_sample_endpoint_partial(sampler): with mock.patch("random.randint", return_value=10): - assert sampler.should_sample("Controller/test/endpoint", False) is True + assert sampler.should_sample("Controller/test/endpoint") is True with mock.patch("random.randint", return_value=30): - assert sampler.should_sample("Controller/test/endpoint", False) is False + assert sampler.should_sample("Controller/test/endpoint") is False def test_should_sample_job_always(sampler): - assert sampler.should_sample("Job/critical-job", False) is True + assert sampler.should_sample("Job/critical-job") is True def test_should_sample_job_never(sampler): - assert sampler.should_sample("Job/test-job", False) is False + assert sampler.should_sample("Job/test-job") is False def test_should_sample_job_partial(sampler): with mock.patch("random.randint", return_value=10): - assert sampler.should_sample("Job/batch-process", False) is True + assert sampler.should_sample("Job/batch-process") is True with mock.patch("random.randint", return_value=40): - assert sampler.should_sample("Job/batch-process", False) is False + assert sampler.should_sample("Job/batch-process") is False def test_should_sample_unknown_operation(sampler): with mock.patch("random.randint", return_value=10): - assert sampler.should_sample("Unknown/operation", False) is True + assert sampler.should_sample("Unknown/operation") is True with mock.patch("random.randint", return_value=60): - assert sampler.should_sample("Unknown/operation", False) is False - - -def test_should_sample_no_sampling_enabled(config): - config.set( - sample_rate=100, # Return config to defaults - sample_endpoints={}, - sample_jobs={}, - ignore_endpoints=[], - ignore_jobs=[], - endpoint_sample_rate=None, - job_sample_rate=None, - ) - sampler = Sampler(config) - assert sampler.should_sample("Controller/any_endpoint", False) is True - assert sampler.should_sample("Job/any_job", False) is True - - -def test_should_sample_endpoint_default_rate(sampler): - with mock.patch("random.randint", return_value=60): - assert sampler.should_sample("Controller/unspecified", False) is True - with mock.patch("random.randint", return_value=80): - assert sampler.should_sample("Controller/unspecified", False) is False - - -def test_should_sample_job_default_rate(sampler): - with mock.patch("random.randint", return_value=30): - assert sampler.should_sample("Job/unspecified-job", False) is True - with mock.patch("random.randint", return_value=50): - assert sampler.should_sample("Job/unspecified-job", False) is False - - -def test_should_sample_endpoint_fallback_to_global_rate(config): - config.set(endpoint_sample_rate=None) - sampler = Sampler(config) - with mock.patch("random.randint", return_value=40): - assert sampler.should_sample("Controller/unspecified", False) is True - with mock.patch("random.randint", return_value=60): - assert sampler.should_sample("Controller/unspecified", False) is False - - -def test_should_sample_job_fallback_to_global_rate(config): - config.set(job_sample_rate=None) - sampler = Sampler(config) - with mock.patch("random.randint", return_value=40): - assert sampler.should_sample("Job/unspecified-job", False) is True - with mock.patch("random.randint", return_value=60): - assert sampler.should_sample("Job/unspecified-job", False) is False - - -def test_should_handle_legacy_ignore_with_specific_sampling(config): - """Test that specific sampling rates override legacy ignore patterns.""" - config.set( - sample_endpoints={ - "foo/bar": 50, # Should override the ignore pattern for specific endpoint - "foo": 0, # Ignore all other foo endpoints - }, - ) - sampler = Sampler(config) - - # foo/bar should be sampled at 50% - with mock.patch("random.randint", return_value=40): - assert sampler.should_sample("Controller/foo/bar", False) is True - with mock.patch("random.randint", return_value=60): - assert sampler.should_sample("Controller/foo/bar", False) is False - - # foo/other should be ignored (0% sampling) - assert sampler.should_sample("Controller/foo/other", False) is False - - -def test_prefix_matching_precedence(config): - """Test that longer prefix matches take precedence.""" - config.set( - sample_endpoints={ - "api/users/vip": 100, # Sample all VIP user endpoints - "api/users": 50, # Sample 50% of user endpoints - "api": 0, # Ignore all API endpoints by default - } - ) - sampler = Sampler(config) - - # Regular API endpoint should be ignored - assert sampler.should_sample("Controller/api/status", False) is False - - # Users API should be sampled at 50% - with mock.patch("random.randint", return_value=40): - assert sampler.should_sample("Controller/api/users/list", False) is True - with mock.patch("random.randint", return_value=60): - assert sampler.should_sample("Controller/api/users/list", False) is False - - # VIP users API should always be sampled - assert sampler.should_sample("Controller/api/users/vip/list", False) is True + assert sampler.should_sample("Unknown/operation") is False From e501b7b879c3aaa89d6338033f860dc7f7490ebb Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Sun, 5 Jan 2025 14:31:59 -0800 Subject: [PATCH 07/27] improve sampler tests --- src/scout_apm/core/sampler.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index 1ca0e508..a6f045c8 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -2,7 +2,7 @@ import logging import random -from typing import Dict, Optional +from typing import Dict, Optional, Tuple logger = logging.getLogger(__name__) @@ -49,8 +49,12 @@ def _get_matching_pattern( Returns: The matching pattern or None if no match found """ + logger.debug(f"Finding matching pattern for name: {name}") + logger.debug(f"Patterns: {patterns}") + # First check for exact match if name in patterns: + logger.debug(f"Exact match found for name: {name}") return name # Then check for wildcard patterns, prioritizing longest match @@ -60,7 +64,7 @@ def _get_matching_pattern( # Only look at wildcard patterns wildcard_patterns = [p for p in patterns if "*" in p] for pattern in wildcard_patterns: - if pattern.endswith("/*"): + if pattern.endswith("*"): prefix = pattern[:-1] if name.startswith(prefix) and len(prefix) > longest_match: longest_match = len(prefix) @@ -70,7 +74,7 @@ def _get_matching_pattern( def _get_operation_type_and_name( self, operation: str - ) -> tuple[Optional[str], Optional[str]]: + ) -> Tuple[Optional[str], Optional[str]]: """ Determines if an operation is an endpoint or job and extracts its name. @@ -126,9 +130,11 @@ def get_effective_sample_rate(self, operation: str) -> int: # Find matching job pattern matching_pattern = self._get_matching_pattern(name, self.sample_jobs) if matching_pattern: + logger.debug(f"Matching job pattern: {matching_pattern}") return self.sample_jobs[matching_pattern] # Fall back to global sample rate + logger.debug(f"Using global sample rate: {self.sample_rate}") return self.sample_rate def should_sample(self, operation: str) -> bool: @@ -143,4 +149,10 @@ def should_sample(self, operation: str) -> bool: Boolean indicating whether to sample this operation """ rate = self.get_effective_sample_rate(operation) - return random.randint(1, 100) <= rate + rand = random.randint(1, 100) + result = rand <= rate + + logger.debug( + f"Sampling decision for {operation}: {result} rand: {rand} (rate: {rate})" + ) + return result From 0574fd9201e6b08220d9fad333e5b75693f8b0aa Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Mon, 6 Jan 2025 09:59:20 -0800 Subject: [PATCH 08/27] fix always sample endpoint test --- tests/unit/core/test_sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index 3ebf87d9..d3207fd1 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -35,7 +35,7 @@ def sampler(config): def test_should_sample_endpoint_always(sampler): - assert sampler.should_sample("Controller/users/show") is True + assert sampler.should_sample("Controller/users") is True def test_should_sample_endpoint_never(sampler): From 1915274a38e8169f0a789637c81fedb2302ff0a5 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Mon, 6 Jan 2025 12:03:39 -0800 Subject: [PATCH 09/27] remove unnecessary logging --- src/scout_apm/core/config.py | 24 ------------------------ src/scout_apm/core/sampler.py | 15 +-------------- 2 files changed, 1 insertion(+), 38 deletions(-) diff --git a/src/scout_apm/core/config.py b/src/scout_apm/core/config.py index 2d64ff67..51165fcd 100644 --- a/src/scout_apm/core/config.py +++ b/src/scout_apm/core/config.py @@ -324,30 +324,6 @@ def convert_sample_rate(value: Any) -> int: return 100 -def convert_sample_rate(value: Any) -> Optional[int]: - """ - Converts sample rate to integer, ensuring it's between 0 and 100. - Allows None as a valid value. - """ - if value is None: - return None - try: - rate = int(value) - if not (0 <= rate <= 100): - logger.warning( - f"Invalid sample rate {rate}. Must be between 0 and 100. " - "Defaulting to 100." - ) - return 100 - return rate - except (TypeError, ValueError): - logger.warning( - f"Invalid sample rate {value}. Must be a number between 0 and 100. " - "Defaulting to 100." - ) - return 100 - - def convert_to_list(value: Any) -> List[Any]: if isinstance(value, list): return value diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index a6f045c8..c943a29c 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -29,7 +29,6 @@ def __init__(self, config): config: ScoutConfig instance containing sampling configuration """ self.config = config - # Load sampling configuration self.sample_rate = config.value("sample_rate") self.sample_endpoints = config.value("sample_endpoints") self.sample_jobs = config.value("sample_jobs") @@ -61,7 +60,6 @@ def _get_matching_pattern( matching_pattern = None longest_match = 0 - # Only look at wildcard patterns wildcard_patterns = [p for p in patterns if "*" in p] for pattern in wildcard_patterns: if pattern.endswith("*"): @@ -113,21 +111,17 @@ def get_effective_sample_rate(self, operation: str) -> int: return self.sample_rate # Fall back to global rate for unknown operations if op_type == "endpoint": - # Check if endpoint should be ignored if name in self.ignore_endpoints: return 0 - # Find matching endpoint pattern matching_pattern = self._get_matching_pattern(name, self.sample_endpoints) if matching_pattern: return self.sample_endpoints[matching_pattern] else: # op_type == 'job' - # Check if job should be ignored if name in self.ignore_jobs: return 0 - # Find matching job pattern matching_pattern = self._get_matching_pattern(name, self.sample_jobs) if matching_pattern: logger.debug(f"Matching job pattern: {matching_pattern}") @@ -148,11 +142,4 @@ def should_sample(self, operation: str) -> bool: Returns: Boolean indicating whether to sample this operation """ - rate = self.get_effective_sample_rate(operation) - rand = random.randint(1, 100) - result = rand <= rate - - logger.debug( - f"Sampling decision for {operation}: {result} rand: {rand} (rate: {rate})" - ) - return result + return random.randint(1, 100) <= self.get_effective_sample_rate(operation) From 41867efbcb6acee2128c158195f5c4a6fd14d363 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Mon, 6 Jan 2025 15:43:18 -0800 Subject: [PATCH 10/27] remove debug logging --- src/scout_apm/core/sampler.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index c943a29c..0a51bc30 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -48,12 +48,9 @@ def _get_matching_pattern( Returns: The matching pattern or None if no match found """ - logger.debug(f"Finding matching pattern for name: {name}") - logger.debug(f"Patterns: {patterns}") # First check for exact match if name in patterns: - logger.debug(f"Exact match found for name: {name}") return name # Then check for wildcard patterns, prioritizing longest match @@ -88,7 +85,6 @@ def _get_operation_type_and_name( elif operation.startswith(self.JOB_PREFIX): return "job", operation[len(self.JOB_PREFIX) :] else: - logger.debug(f"Unknown operation type for: {operation}") return None, None def get_effective_sample_rate(self, operation: str) -> int: @@ -124,11 +120,9 @@ def get_effective_sample_rate(self, operation: str) -> int: matching_pattern = self._get_matching_pattern(name, self.sample_jobs) if matching_pattern: - logger.debug(f"Matching job pattern: {matching_pattern}") return self.sample_jobs[matching_pattern] # Fall back to global sample rate - logger.debug(f"Using global sample rate: {self.sample_rate}") return self.sample_rate def should_sample(self, operation: str) -> bool: From bb39635c42d136171d4606de9773b15a784def0d Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Tue, 7 Jan 2025 13:41:38 -0800 Subject: [PATCH 11/27] short circuit sampling logic if no sampling configured --- src/scout_apm/core/sampler.py | 18 ++++++++++++++++++ tests/unit/core/test_sampler.py | 13 +++++++++++++ 2 files changed, 31 insertions(+) diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index 0a51bc30..bc14d88e 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -35,6 +35,21 @@ def __init__(self, config): self.ignore_endpoints = set(config.value("ignore_endpoints")) self.ignore_jobs = set(config.value("ignore_jobs")) + def _any_sampling(self): + """ + Check if any sampling is enabled. + + Returns: + Boolean indicating if any sampling is enabled + """ + return ( + self.sample_rate < 100 + or self.sample_endpoints + or self.sample_jobs + or self.ignore_endpoints + or self.ignore_jobs + ) + def _get_matching_pattern( self, name: str, patterns: Dict[str, float] ) -> Optional[str]: @@ -128,6 +143,7 @@ def get_effective_sample_rate(self, operation: str) -> int: def should_sample(self, operation: str) -> bool: """ Determines if an operation should be sampled. + If no sampling is enabled, always return True. Args: operation: The operation string (e.g. "Controller/users/show" @@ -136,4 +152,6 @@ def should_sample(self, operation: str) -> bool: Returns: Boolean indicating whether to sample this operation """ + if not self._any_sampling(): + return True return random.randint(1, 100) <= self.get_effective_sample_rate(operation) diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index d3207fd1..add8923b 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -73,3 +73,16 @@ def test_should_sample_unknown_operation(sampler): assert sampler.should_sample("Unknown/operation") is True with mock.patch("random.randint", return_value=60): assert sampler.should_sample("Unknown/operation") is False + + +def test_should_sample_no_sampling_enabled(config): + config.set( + sample_rate=100, # Return config to defaults + sample_endpoints={}, + sample_jobs={}, + ignore_endpoints=[], + ignore_jobs=[], + ) + sampler = Sampler(config) + assert sampler.should_sample("Controller/any_endpoint") is True + assert sampler.should_sample("Job/any_job") is True From d9cf86cc62a51fcee1fd3511dd3073c2214c9422 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Thu, 9 Jan 2025 14:19:10 -0800 Subject: [PATCH 12/27] simplify pattern match add support for sample_endpoint/job_rate --- src/scout_apm/core/config.py | 19 ++++++++------ src/scout_apm/core/sampler.py | 45 ++++++++++++++------------------- tests/unit/core/test_sampler.py | 40 ++++++++++++++++++++++++++--- 3 files changed, 67 insertions(+), 37 deletions(-) diff --git a/src/scout_apm/core/config.py b/src/scout_apm/core/config.py index 51165fcd..a2c7dc38 100644 --- a/src/scout_apm/core/config.py +++ b/src/scout_apm/core/config.py @@ -87,10 +87,10 @@ def log(self) -> None: "name", "revision_sha", "sample_rate", - "endpoint_sample_rate", + "sample_endpoint_rate", "sample_endpoints", "sample_jobs", - "job_sample_rate", + "sample_job_rate", "scm_subdirectory", "shutdown_message_enabled", "shutdown_timeout_seconds", @@ -252,9 +252,9 @@ def __init__(self): "revision_sha": self._git_revision_sha(), "sample_rate": 100, "sample_endpoints": [], - "endpoint_sample_rate": None, + "sample_endpoint_rate": None, "sample_jobs": [], - "job_sample_rate": None, + "sample_job_rate": None, "scm_subdirectory": "", "shutdown_message_enabled": True, "shutdown_timeout_seconds": 2.0, @@ -303,10 +303,13 @@ def convert_to_float(value: Any) -> float: return 0.0 -def convert_sample_rate(value: Any) -> int: +def convert_sample_rate(value: Any) -> Optional[int]: """ - Converts sample rate to integer, ensuring it's between 0 and 100 + Converts sample rate to integer, ensuring it's between 0 and 100. + Allows None as a valid value. """ + if value is None: + return None try: rate = int(value) if not (0 <= rate <= 100): @@ -378,9 +381,9 @@ def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, in "monitor": convert_to_bool, "sample_rate": convert_sample_rate, "sample_endpoints": convert_endpoint_sampling, - "endpoint_sample_rate": convert_sample_rate, + "sample_endpoint_rate": convert_sample_rate, "sample_jobs": convert_endpoint_sampling, - "job_sample_rate": convert_sample_rate, + "sample_job_rate": convert_sample_rate, "shutdown_message_enabled": convert_to_bool, "shutdown_timeout_seconds": convert_to_float, } diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index bc14d88e..f1a0921b 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -34,6 +34,8 @@ def __init__(self, config): self.sample_jobs = config.value("sample_jobs") self.ignore_endpoints = set(config.value("ignore_endpoints")) self.ignore_jobs = set(config.value("ignore_jobs")) + self.sample_endpoint_rate = config.value("sample_endpoint_rate") + self.sample_job_rate = config.value("sample_job_rate") def _any_sampling(self): """ @@ -50,37 +52,24 @@ def _any_sampling(self): or self.ignore_jobs ) - def _get_matching_pattern( + def _find_matching_rate( self, name: str, patterns: Dict[str, float] ) -> Optional[str]: """ - Find the most specific matching pattern for an operation name. + Finds the matching sample rate for a given operation name. Args: name: The operation name to match patterns: Dictionary of pattern to sample rate mappings Returns: - The matching pattern or None if no match found + The sample rate for the matching pattern or None if no match found """ - # First check for exact match - if name in patterns: - return name - - # Then check for wildcard patterns, prioritizing longest match - matching_pattern = None - longest_match = 0 - - wildcard_patterns = [p for p in patterns if "*" in p] - for pattern in wildcard_patterns: - if pattern.endswith("*"): - prefix = pattern[:-1] - if name.startswith(prefix) and len(prefix) > longest_match: - longest_match = len(prefix) - matching_pattern = pattern - - return matching_pattern + for pattern, rate in patterns.items(): + if name.startswith(pattern): + return rate + return None def _get_operation_type_and_name( self, operation: str @@ -125,17 +114,21 @@ def get_effective_sample_rate(self, operation: str) -> int: if name in self.ignore_endpoints: return 0 - matching_pattern = self._get_matching_pattern(name, self.sample_endpoints) - if matching_pattern: - return self.sample_endpoints[matching_pattern] + matching_rate = self._find_matching_rate(name, self.sample_endpoints) + if matching_rate is not None: + return matching_rate + if self.sample_endpoint_rate is not None: + return self.sample_endpoint_rate else: # op_type == 'job' if name in self.ignore_jobs: return 0 - matching_pattern = self._get_matching_pattern(name, self.sample_jobs) - if matching_pattern: - return self.sample_jobs[matching_pattern] + matching_rate = self._find_matching_rate(name, self.sample_jobs) + if matching_rate is not None: + return matching_rate + if self.sample_job_rate is not None: + return self.sample_job_rate # Fall back to global sample rate return self.sample_rate diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index add8923b..cec0b748 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -15,15 +15,17 @@ def config(): sample_rate=50, # 50% global sampling sample_endpoints={ "users": 100, # Always sample - "test/*": 20, # 20% sampling for test endpoints - "health/*": 0, # Never sample health checks + "test": 20, # 20% sampling for test endpoints + "health": 0, # Never sample health checks }, sample_jobs={ "critical-job": 100, # Always sample - "batch-*": 30, # 30% sampling for batch jobs + "batch": 30, # 30% sampling for batch jobs }, ignore_endpoints=["metrics", "ping"], ignore_jobs=["test-job"], + sample_endpoint_rate=70, # 70% sampling for unspecified endpoints + sample_job_rate=40, # 40% sampling for unspecified jobs ) yield config ScoutConfig.reset_all() @@ -86,3 +88,35 @@ def test_should_sample_no_sampling_enabled(config): sampler = Sampler(config) assert sampler.should_sample("Controller/any_endpoint") is True assert sampler.should_sample("Job/any_job") is True + + +def test_should_sample_endpoint_default_rate(sampler): + with mock.patch("random.randint", return_value=60): + assert sampler.should_sample("Controller/unspecified") is True + with mock.patch("random.randint", return_value=80): + assert sampler.should_sample("Controller/unspecified") is False + + +def test_should_sample_job_default_rate(sampler): + with mock.patch("random.randint", return_value=30): + assert sampler.should_sample("Job/unspecified-job") is True + with mock.patch("random.randint", return_value=50): + assert sampler.should_sample("Job/unspecified-job") is False + + +def test_should_sample_endpoint_fallback_to_global_rate(config): + config.set(sample_endpoint_rate=None) + sampler = Sampler(config) + with mock.patch("random.randint", return_value=40): + assert sampler.should_sample("Controller/unspecified") is True + with mock.patch("random.randint", return_value=60): + assert sampler.should_sample("Controller/unspecified") is False + + +def test_should_sample_job_fallback_to_global_rate(config): + config.set(sample_job_rate=None) + sampler = Sampler(config) + with mock.patch("random.randint", return_value=40): + assert sampler.should_sample("Job/unspecified-job") is True + with mock.patch("random.randint", return_value=60): + assert sampler.should_sample("Job/unspecified-job") is False From 5998f8043012f4ac0c0aefaef57fc7201cc49d08 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Fri, 10 Jan 2025 08:33:30 -0800 Subject: [PATCH 13/27] rename config option to X_sample_rate --- src/scout_apm/core/config.py | 12 ++++++------ src/scout_apm/core/sampler.py | 12 ++++++------ tests/unit/core/test_sampler.py | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/scout_apm/core/config.py b/src/scout_apm/core/config.py index a2c7dc38..2a7cb710 100644 --- a/src/scout_apm/core/config.py +++ b/src/scout_apm/core/config.py @@ -87,10 +87,10 @@ def log(self) -> None: "name", "revision_sha", "sample_rate", - "sample_endpoint_rate", + "endpoint_sample_rate", "sample_endpoints", "sample_jobs", - "sample_job_rate", + "job_sample_rate", "scm_subdirectory", "shutdown_message_enabled", "shutdown_timeout_seconds", @@ -252,9 +252,9 @@ def __init__(self): "revision_sha": self._git_revision_sha(), "sample_rate": 100, "sample_endpoints": [], - "sample_endpoint_rate": None, + "endpoint_sample_rate": None, "sample_jobs": [], - "sample_job_rate": None, + "job_sample_rate": None, "scm_subdirectory": "", "shutdown_message_enabled": True, "shutdown_timeout_seconds": 2.0, @@ -381,9 +381,9 @@ def convert_endpoint_sampling(value: Union[str, Dict[str, Any]]) -> Dict[str, in "monitor": convert_to_bool, "sample_rate": convert_sample_rate, "sample_endpoints": convert_endpoint_sampling, - "sample_endpoint_rate": convert_sample_rate, + "endpoint_sample_rate": convert_sample_rate, "sample_jobs": convert_endpoint_sampling, - "sample_job_rate": convert_sample_rate, + "job_sample_rate": convert_sample_rate, "shutdown_message_enabled": convert_to_bool, "shutdown_timeout_seconds": convert_to_float, } diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index f1a0921b..6bdcecd8 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -34,8 +34,8 @@ def __init__(self, config): self.sample_jobs = config.value("sample_jobs") self.ignore_endpoints = set(config.value("ignore_endpoints")) self.ignore_jobs = set(config.value("ignore_jobs")) - self.sample_endpoint_rate = config.value("sample_endpoint_rate") - self.sample_job_rate = config.value("sample_job_rate") + self.endpoint_sample_rate = config.value("endpoint_sample_rate") + self.job_sample_rate = config.value("job_sample_rate") def _any_sampling(self): """ @@ -117,8 +117,8 @@ def get_effective_sample_rate(self, operation: str) -> int: matching_rate = self._find_matching_rate(name, self.sample_endpoints) if matching_rate is not None: return matching_rate - if self.sample_endpoint_rate is not None: - return self.sample_endpoint_rate + if self.endpoint_sample_rate is not None: + return self.endpoint_sample_rate else: # op_type == 'job' if name in self.ignore_jobs: @@ -127,8 +127,8 @@ def get_effective_sample_rate(self, operation: str) -> int: matching_rate = self._find_matching_rate(name, self.sample_jobs) if matching_rate is not None: return matching_rate - if self.sample_job_rate is not None: - return self.sample_job_rate + if self.job_sample_rate is not None: + return self.job_sample_rate # Fall back to global sample rate return self.sample_rate diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index cec0b748..1c970856 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -24,8 +24,8 @@ def config(): }, ignore_endpoints=["metrics", "ping"], ignore_jobs=["test-job"], - sample_endpoint_rate=70, # 70% sampling for unspecified endpoints - sample_job_rate=40, # 40% sampling for unspecified jobs + endpoint_sample_rate=70, # 70% sampling for unspecified endpoints + job_sample_rate=40, # 40% sampling for unspecified jobs ) yield config ScoutConfig.reset_all() @@ -105,7 +105,7 @@ def test_should_sample_job_default_rate(sampler): def test_should_sample_endpoint_fallback_to_global_rate(config): - config.set(sample_endpoint_rate=None) + config.set(endpoint_sample_rate=None) sampler = Sampler(config) with mock.patch("random.randint", return_value=40): assert sampler.should_sample("Controller/unspecified") is True @@ -114,7 +114,7 @@ def test_should_sample_endpoint_fallback_to_global_rate(config): def test_should_sample_job_fallback_to_global_rate(config): - config.set(sample_job_rate=None) + config.set(job_sample_rate=None) sampler = Sampler(config) with mock.patch("random.randint", return_value=40): assert sampler.should_sample("Job/unspecified-job") is True From bf3e3935227fc5053223f2c2d22b0417f2f2cb2e Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Mon, 13 Jan 2025 13:23:58 -0800 Subject: [PATCH 14/27] check endpoint/job sample rate in any_sampling --- src/scout_apm/core/sampler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index 6bdcecd8..714330d8 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -50,6 +50,8 @@ def _any_sampling(self): or self.sample_jobs or self.ignore_endpoints or self.ignore_jobs + or self.endpoint_sample_rate is not None + or self.job_sample_rate is not None ) def _find_matching_rate( From 0090e944cb676362b119e2c9f91ee2329e63060a Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Mon, 13 Jan 2025 15:03:10 -0800 Subject: [PATCH 15/27] add test assertion --- tests/unit/core/test_sampler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index 1c970856..f3f2799e 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -22,7 +22,7 @@ def config(): "critical-job": 100, # Always sample "batch": 30, # 30% sampling for batch jobs }, - ignore_endpoints=["metrics", "ping"], + ignore_endpoints=["metrics", "ping", "users/test"], ignore_jobs=["test-job"], endpoint_sample_rate=70, # 70% sampling for unspecified endpoints job_sample_rate=40, # 40% sampling for unspecified jobs @@ -61,6 +61,7 @@ def test_should_sample_job_always(sampler): def test_should_sample_job_never(sampler): assert sampler.should_sample("Job/test-job") is False + assert sampler.should_sample("users/test") is False def test_should_sample_job_partial(sampler): @@ -84,6 +85,8 @@ def test_should_sample_no_sampling_enabled(config): sample_jobs={}, ignore_endpoints=[], ignore_jobs=[], + endpoint_sample_rate=None, + job_sample_rate=None, ) sampler = Sampler(config) assert sampler.should_sample("Controller/any_endpoint") is True From 82b626ad01e7503be9b6d157d52b3e5475a5d9a2 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Mon, 13 Jan 2025 15:03:10 -0800 Subject: [PATCH 16/27] add test assertion --- tests/unit/core/test_sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index f3f2799e..a9a0004a 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -61,7 +61,7 @@ def test_should_sample_job_always(sampler): def test_should_sample_job_never(sampler): assert sampler.should_sample("Job/test-job") is False - assert sampler.should_sample("users/test") is False + assert sampler.should_sample("Controller/users/test") is False def test_should_sample_job_partial(sampler): From b55252901f4deb182986c836dd027e5077ee72dd Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Mon, 13 Jan 2025 17:30:47 -0800 Subject: [PATCH 17/27] refactor sampler to better handler legacy ignore and prioritization --- src/scout_apm/core/sampler.py | 105 +++++++++++++++--------- tests/unit/core/test_sampler.py | 140 ++++++++++++++++++++++++-------- 2 files changed, 173 insertions(+), 72 deletions(-) diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index 714330d8..fe25d45e 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -4,6 +4,8 @@ import random from typing import Dict, Optional, Tuple +from scout_apm.core.tracked_request import TrackedRequest + logger = logging.getLogger(__name__) @@ -36,6 +38,7 @@ def __init__(self, config): self.ignore_jobs = set(config.value("ignore_jobs")) self.endpoint_sample_rate = config.value("endpoint_sample_rate") self.job_sample_rate = config.value("job_sample_rate") + self.legacy_ignore = config.value("ignore") def _any_sampling(self): """ @@ -54,11 +57,11 @@ def _any_sampling(self): or self.job_sample_rate is not None ) - def _find_matching_rate( + def _find_exact_match( self, name: str, patterns: Dict[str, float] - ) -> Optional[str]: + ) -> Optional[float]: """ - Finds the matching sample rate for a given operation name. + Finds the exact sample rate for a given operation name. Args: name: The operation name to match @@ -67,11 +70,25 @@ def _find_matching_rate( Returns: The sample rate for the matching pattern or None if no match found """ + return patterns.get(name) - for pattern, rate in patterns.items(): - if name.startswith(pattern): - return rate - return None + def _find_prefix_match( + self, name: str, patterns: Dict[str, float] + ) -> Optional[float]: + """Find the longest matching prefix in sample configurations.""" + matching_prefixes = [ + (prefix, rate) + for prefix, rate in patterns.items() + if name.startswith(prefix) + ] + if not matching_prefixes: + return None + # Return rate for longest matching prefix + return max(matching_prefixes, key=lambda x: len(x[0]))[1] + + def _is_legacy_ignored(self, name: str) -> bool: + """Check if path matches any legacy ignore patterns.""" + return any(name.startswith(ignored) for ignored in self.legacy_ignore) def _get_operation_type_and_name( self, operation: str @@ -93,49 +110,63 @@ def _get_operation_type_and_name( else: return None, None - def get_effective_sample_rate(self, operation: str) -> int: + def get_effective_sample_rate(self, request: TrackedRequest) -> int: """ Determines the effective sample rate for a given operation. - Priority order: - 1. Ignored operations (returns 0) - 2. Specific operation sample rate - 3. Global sample rate - - Args: - operation: The operation string (e.g. "Controller/users/show") - - Returns: - Integer between 0 and 100 representing sample rate + Priority order (highest to lowest): + 1. Exact matches in sample_endpoints/sample_jobs + 2. Exact matches in ignore lists (returns 0) + 3. Prefix matches in sample_endpoints/sample_jobs + 4. Legacy ignore patterns (returns 0) + 5. Request-level ignore (returns 0) + 6. Operation-specific default rate + 7. Global sample rate """ + operation = request.operation op_type, name = self._get_operation_type_and_name(operation) + if not op_type or not name: - return self.sample_rate # Fall back to global rate for unknown operations + return self.sample_rate + + patterns = self.sample_endpoints if op_type == "endpoint" else self.sample_jobs + ignored_set = ( + self.ignore_endpoints if op_type == "endpoint" else self.ignore_jobs + ) + default_rate = ( + self.endpoint_sample_rate if op_type == "endpoint" else self.job_sample_rate + ) + + # Check for exact match in sampling patterns + exact_rate = self._find_exact_match(name, patterns) + if exact_rate is not None: + return exact_rate + + # Check for exact endpoint/job ignores + if name in ignored_set: + return 0 - if op_type == "endpoint": - if name in self.ignore_endpoints: - return 0 + # Check for prefix match in sampling patterns + prefix_rate = self._find_prefix_match(name, patterns) + if prefix_rate is not None: + return prefix_rate - matching_rate = self._find_matching_rate(name, self.sample_endpoints) - if matching_rate is not None: - return matching_rate - if self.endpoint_sample_rate is not None: - return self.endpoint_sample_rate + # Check legacy ignore patterns + if self._is_legacy_ignored(name): + return 0 - else: # op_type == 'job' - if name in self.ignore_jobs: - return 0 + # Check if request is explicitly ignored via tag + if request.is_ignored(): + return 0 - matching_rate = self._find_matching_rate(name, self.sample_jobs) - if matching_rate is not None: - return matching_rate - if self.job_sample_rate is not None: - return self.job_sample_rate + # Use operation-specific default rate if available + if default_rate is not None: + return default_rate # Fall back to global sample rate return self.sample_rate - def should_sample(self, operation: str) -> bool: + def should_sample(self, request: TrackedRequest) -> bool: """ Determines if an operation should be sampled. If no sampling is enabled, always return True. @@ -149,4 +180,4 @@ def should_sample(self, operation: str) -> bool: """ if not self._any_sampling(): return True - return random.randint(1, 100) <= self.get_effective_sample_rate(operation) + return random.randint(1, 100) <= self.get_effective_sample_rate(request) diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index a9a0004a..f466f016 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -6,6 +6,7 @@ from scout_apm.core.config import ScoutConfig from scout_apm.core.sampler import Sampler +from scout_apm.core.tracked_request import TrackedRequest @pytest.fixture @@ -36,49 +37,63 @@ def sampler(config): return Sampler(config) -def test_should_sample_endpoint_always(sampler): - assert sampler.should_sample("Controller/users") is True +@pytest.fixture +def tracked_request(): + return TrackedRequest() + +def test_should_sample_endpoint_always(sampler, tracked_request): + tracked_request.operation = "Controller/users" + assert sampler.should_sample(tracked_request) is True -def test_should_sample_endpoint_never(sampler): - assert sampler.should_sample("Controller/health/check") is False +def test_should_sample_endpoint_never(sampler, tracked_request): + tracked_request.operation = "Controller/health/check" + assert sampler.should_sample(tracked_request) is False + tracked_request.operation = "Controller/users/test" + assert sampler.should_sample(tracked_request) is False -def test_should_sample_endpoint_ignored(sampler): - assert sampler.should_sample("Controller/metrics") is False +def test_should_sample_endpoint_ignored(sampler, tracked_request): + tracked_request.operation = "Controller/metrics" + assert sampler.should_sample(tracked_request) is False -def test_should_sample_endpoint_partial(sampler): + +def test_should_sample_endpoint_partial(sampler, tracked_request): + tracked_request.operation = "Controller/test/endpoint" with mock.patch("random.randint", return_value=10): - assert sampler.should_sample("Controller/test/endpoint") is True + assert sampler.should_sample(tracked_request) is True with mock.patch("random.randint", return_value=30): - assert sampler.should_sample("Controller/test/endpoint") is False + assert sampler.should_sample(tracked_request) is False -def test_should_sample_job_always(sampler): - assert sampler.should_sample("Job/critical-job") is True +def test_should_sample_job_always(sampler, tracked_request): + tracked_request.operation = "Job/critical-job" + assert sampler.should_sample(tracked_request) is True -def test_should_sample_job_never(sampler): - assert sampler.should_sample("Job/test-job") is False - assert sampler.should_sample("Controller/users/test") is False +def test_should_sample_job_never(sampler, tracked_request): + tracked_request.operation = "Job/test-job" + assert sampler.should_sample(tracked_request) is False -def test_should_sample_job_partial(sampler): +def test_should_sample_job_partial(sampler, tracked_request): + tracked_request.operation = "Job/batch-process" with mock.patch("random.randint", return_value=10): - assert sampler.should_sample("Job/batch-process") is True + assert sampler.should_sample(tracked_request) is True with mock.patch("random.randint", return_value=40): - assert sampler.should_sample("Job/batch-process") is False + assert sampler.should_sample(tracked_request) is False -def test_should_sample_unknown_operation(sampler): +def test_should_sample_unknown_operation(sampler, tracked_request): + tracked_request.operation = "Unknown/operation" with mock.patch("random.randint", return_value=10): - assert sampler.should_sample("Unknown/operation") is True + assert sampler.should_sample(tracked_request) is True with mock.patch("random.randint", return_value=60): - assert sampler.should_sample("Unknown/operation") is False + assert sampler.should_sample(tracked_request) is False -def test_should_sample_no_sampling_enabled(config): +def test_should_sample_no_sampling_enabled(config, tracked_request): config.set( sample_rate=100, # Return config to defaults sample_endpoints={}, @@ -89,37 +104,92 @@ def test_should_sample_no_sampling_enabled(config): job_sample_rate=None, ) sampler = Sampler(config) - assert sampler.should_sample("Controller/any_endpoint") is True - assert sampler.should_sample("Job/any_job") is True + tracked_request.operation = "Controller/any_endpoint" + assert sampler.should_sample(tracked_request) is True + tracked_request.operation = "Job/any_job" + assert sampler.should_sample(tracked_request) is True -def test_should_sample_endpoint_default_rate(sampler): +def test_should_sample_endpoint_default_rate(sampler, tracked_request): + tracked_request.operation = "Controller/unspecified" with mock.patch("random.randint", return_value=60): - assert sampler.should_sample("Controller/unspecified") is True + assert sampler.should_sample(tracked_request) is True with mock.patch("random.randint", return_value=80): - assert sampler.should_sample("Controller/unspecified") is False + assert sampler.should_sample(tracked_request) is False -def test_should_sample_job_default_rate(sampler): +def test_should_sample_job_default_rate(sampler, tracked_request): + tracked_request.operation = "Job/unspecified-job" with mock.patch("random.randint", return_value=30): - assert sampler.should_sample("Job/unspecified-job") is True + assert sampler.should_sample(tracked_request) is True with mock.patch("random.randint", return_value=50): - assert sampler.should_sample("Job/unspecified-job") is False + assert sampler.should_sample(tracked_request) is False -def test_should_sample_endpoint_fallback_to_global_rate(config): +def test_should_sample_endpoint_fallback_to_global_rate(config, tracked_request): config.set(endpoint_sample_rate=None) sampler = Sampler(config) + tracked_request.operation = "Controller/unspecified" with mock.patch("random.randint", return_value=40): - assert sampler.should_sample("Controller/unspecified") is True + assert sampler.should_sample(tracked_request) is True with mock.patch("random.randint", return_value=60): - assert sampler.should_sample("Controller/unspecified") is False + assert sampler.should_sample(tracked_request) is False -def test_should_sample_job_fallback_to_global_rate(config): +def test_should_sample_job_fallback_to_global_rate(config, tracked_request): config.set(job_sample_rate=None) sampler = Sampler(config) + tracked_request.operation = "Job/unspecified-job" + with mock.patch("random.randint", return_value=40): + assert sampler.should_sample(tracked_request) is True + with mock.patch("random.randint", return_value=60): + assert sampler.should_sample(tracked_request) is False + + +def test_should_handle_legacy_ignore_with_specific_sampling(config, tracked_request): + """Test that specific sampling rates override legacy ignore patterns.""" + config.set( + ignore=["foo"], + sample_endpoints={ + "foo/bar": 50 # Should override the ignore pattern for specific endpoint + }, + ) + sampler = Sampler(config) + + # foo/bar should be sampled at 50% + tracked_request.operation = "Controller/foo/bar" with mock.patch("random.randint", return_value=40): - assert sampler.should_sample("Job/unspecified-job") is True + assert sampler.should_sample(tracked_request) is True with mock.patch("random.randint", return_value=60): - assert sampler.should_sample("Job/unspecified-job") is False + assert sampler.should_sample(tracked_request) is False + + # foo/other should be ignored (0% sampling) + tracked_request.operation = "Controller/foo/other" + assert sampler.should_sample(tracked_request) is False + + +def test_prefix_matching_precedence(config, tracked_request): + """Test that longer prefix matches take precedence.""" + config.set( + sample_endpoints={ + "api": 0, # Ignore all API endpoints by default + "api/users": 50, # Sample 50% of user endpoints + "api/users/vip": 100, # Sample all VIP user endpoints + } + ) + sampler = Sampler(config) + + # Regular API endpoint should be ignored + tracked_request.operation = "Controller/api/status" + assert sampler.should_sample(tracked_request) is False + + # Users API should be sampled at 50% + tracked_request.operation = "Controller/api/users/list" + with mock.patch("random.randint", return_value=40): + assert sampler.should_sample(tracked_request) is True + with mock.patch("random.randint", return_value=60): + assert sampler.should_sample(tracked_request) is False + + # VIP users API should always be sampled + tracked_request.operation = "Controller/api/users/vip/list" + assert sampler.should_sample(tracked_request) is True From a235170b31c75f228491bf80c4099ad161f1423e Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Mon, 13 Jan 2025 18:05:35 -0800 Subject: [PATCH 18/27] de-couple from TrackedRequest --- src/scout_apm/core/sampler.py | 27 +++----- tests/unit/core/test_sampler.py | 114 ++++++++++++-------------------- 2 files changed, 54 insertions(+), 87 deletions(-) diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index fe25d45e..2000cedf 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -4,8 +4,6 @@ import random from typing import Dict, Optional, Tuple -from scout_apm.core.tracked_request import TrackedRequest - logger = logging.getLogger(__name__) @@ -110,20 +108,12 @@ def _get_operation_type_and_name( else: return None, None - def get_effective_sample_rate(self, request: TrackedRequest) -> int: + def get_effective_sample_rate( + self, operation: str, is_ignored: bool = False + ) -> int: """ Determines the effective sample rate for a given operation. - - Priority order (highest to lowest): - 1. Exact matches in sample_endpoints/sample_jobs - 2. Exact matches in ignore lists (returns 0) - 3. Prefix matches in sample_endpoints/sample_jobs - 4. Legacy ignore patterns (returns 0) - 5. Request-level ignore (returns 0) - 6. Operation-specific default rate - 7. Global sample rate """ - operation = request.operation op_type, name = self._get_operation_type_and_name(operation) if not op_type or not name: @@ -155,8 +145,9 @@ def get_effective_sample_rate(self, request: TrackedRequest) -> int: if self._is_legacy_ignored(name): return 0 - # Check if request is explicitly ignored via tag - if request.is_ignored(): + # Check if request is explicitly ignored via the + # is_ignored() tracked_request method. + if is_ignored: return 0 # Use operation-specific default rate if available @@ -166,7 +157,7 @@ def get_effective_sample_rate(self, request: TrackedRequest) -> int: # Fall back to global sample rate return self.sample_rate - def should_sample(self, request: TrackedRequest) -> bool: + def should_sample(self, operation: str, is_ignored: bool) -> bool: """ Determines if an operation should be sampled. If no sampling is enabled, always return True. @@ -180,4 +171,6 @@ def should_sample(self, request: TrackedRequest) -> bool: """ if not self._any_sampling(): return True - return random.randint(1, 100) <= self.get_effective_sample_rate(request) + return random.randint(1, 100) <= self.get_effective_sample_rate( + operation, is_ignored + ) diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index f466f016..199bcb68 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -6,7 +6,6 @@ from scout_apm.core.config import ScoutConfig from scout_apm.core.sampler import Sampler -from scout_apm.core.tracked_request import TrackedRequest @pytest.fixture @@ -37,63 +36,49 @@ def sampler(config): return Sampler(config) -@pytest.fixture -def tracked_request(): - return TrackedRequest() - - -def test_should_sample_endpoint_always(sampler, tracked_request): - tracked_request.operation = "Controller/users" - assert sampler.should_sample(tracked_request) is True +def test_should_sample_endpoint_always(sampler): + assert sampler.should_sample("Controller/users", False) is True -def test_should_sample_endpoint_never(sampler, tracked_request): - tracked_request.operation = "Controller/health/check" - assert sampler.should_sample(tracked_request) is False - tracked_request.operation = "Controller/users/test" - assert sampler.should_sample(tracked_request) is False +def test_should_sample_endpoint_never(sampler): + assert sampler.should_sample("Controller/health/check", False) is False + assert sampler.should_sample("Controller/users/test", False) is False -def test_should_sample_endpoint_ignored(sampler, tracked_request): - tracked_request.operation = "Controller/metrics" - assert sampler.should_sample(tracked_request) is False +def test_should_sample_endpoint_ignored(sampler): + assert sampler.should_sample("Controller/metrics", False) is False -def test_should_sample_endpoint_partial(sampler, tracked_request): - tracked_request.operation = "Controller/test/endpoint" +def test_should_sample_endpoint_partial(sampler): with mock.patch("random.randint", return_value=10): - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Controller/test/endpoint", False) is True with mock.patch("random.randint", return_value=30): - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Controller/test/endpoint", False) is False -def test_should_sample_job_always(sampler, tracked_request): - tracked_request.operation = "Job/critical-job" - assert sampler.should_sample(tracked_request) is True +def test_should_sample_job_always(sampler): + assert sampler.should_sample("Job/critical-job", False) is True -def test_should_sample_job_never(sampler, tracked_request): - tracked_request.operation = "Job/test-job" - assert sampler.should_sample(tracked_request) is False +def test_should_sample_job_never(sampler): + assert sampler.should_sample("Job/test-job", False) is False -def test_should_sample_job_partial(sampler, tracked_request): - tracked_request.operation = "Job/batch-process" +def test_should_sample_job_partial(sampler): with mock.patch("random.randint", return_value=10): - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Job/batch-process", False) is True with mock.patch("random.randint", return_value=40): - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Job/batch-process", False) is False -def test_should_sample_unknown_operation(sampler, tracked_request): - tracked_request.operation = "Unknown/operation" +def test_should_sample_unknown_operation(sampler): with mock.patch("random.randint", return_value=10): - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Unknown/operation", False) is True with mock.patch("random.randint", return_value=60): - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Unknown/operation", False) is False -def test_should_sample_no_sampling_enabled(config, tracked_request): +def test_should_sample_no_sampling_enabled(config): config.set( sample_rate=100, # Return config to defaults sample_endpoints={}, @@ -104,49 +89,43 @@ def test_should_sample_no_sampling_enabled(config, tracked_request): job_sample_rate=None, ) sampler = Sampler(config) - tracked_request.operation = "Controller/any_endpoint" - assert sampler.should_sample(tracked_request) is True - tracked_request.operation = "Job/any_job" - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Controller/any_endpoint", False) is True + assert sampler.should_sample("Job/any_job", False) is True -def test_should_sample_endpoint_default_rate(sampler, tracked_request): - tracked_request.operation = "Controller/unspecified" +def test_should_sample_endpoint_default_rate(sampler): with mock.patch("random.randint", return_value=60): - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Controller/unspecified", False) is True with mock.patch("random.randint", return_value=80): - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Controller/unspecified", False) is False -def test_should_sample_job_default_rate(sampler, tracked_request): - tracked_request.operation = "Job/unspecified-job" +def test_should_sample_job_default_rate(sampler): with mock.patch("random.randint", return_value=30): - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Job/unspecified-job", False) is True with mock.patch("random.randint", return_value=50): - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Job/unspecified-job", False) is False -def test_should_sample_endpoint_fallback_to_global_rate(config, tracked_request): +def test_should_sample_endpoint_fallback_to_global_rate(config): config.set(endpoint_sample_rate=None) sampler = Sampler(config) - tracked_request.operation = "Controller/unspecified" with mock.patch("random.randint", return_value=40): - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Controller/unspecified", False) is True with mock.patch("random.randint", return_value=60): - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Controller/unspecified", False) is False -def test_should_sample_job_fallback_to_global_rate(config, tracked_request): +def test_should_sample_job_fallback_to_global_rate(config): config.set(job_sample_rate=None) sampler = Sampler(config) - tracked_request.operation = "Job/unspecified-job" with mock.patch("random.randint", return_value=40): - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Job/unspecified-job", False) is True with mock.patch("random.randint", return_value=60): - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Job/unspecified-job", False) is False -def test_should_handle_legacy_ignore_with_specific_sampling(config, tracked_request): +def test_should_handle_legacy_ignore_with_specific_sampling(config): """Test that specific sampling rates override legacy ignore patterns.""" config.set( ignore=["foo"], @@ -157,18 +136,16 @@ def test_should_handle_legacy_ignore_with_specific_sampling(config, tracked_requ sampler = Sampler(config) # foo/bar should be sampled at 50% - tracked_request.operation = "Controller/foo/bar" with mock.patch("random.randint", return_value=40): - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Controller/foo/bar", False) is True with mock.patch("random.randint", return_value=60): - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Controller/foo/bar", False) is False # foo/other should be ignored (0% sampling) - tracked_request.operation = "Controller/foo/other" - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Controller/foo/other", False) is False -def test_prefix_matching_precedence(config, tracked_request): +def test_prefix_matching_precedence(config): """Test that longer prefix matches take precedence.""" config.set( sample_endpoints={ @@ -180,16 +157,13 @@ def test_prefix_matching_precedence(config, tracked_request): sampler = Sampler(config) # Regular API endpoint should be ignored - tracked_request.operation = "Controller/api/status" - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Controller/api/status", False) is False # Users API should be sampled at 50% - tracked_request.operation = "Controller/api/users/list" with mock.patch("random.randint", return_value=40): - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Controller/api/users/list", False) is True with mock.patch("random.randint", return_value=60): - assert sampler.should_sample(tracked_request) is False + assert sampler.should_sample("Controller/api/users/list", False) is False # VIP users API should always be sampled - tracked_request.operation = "Controller/api/users/vip/list" - assert sampler.should_sample(tracked_request) is True + assert sampler.should_sample("Controller/api/users/vip/list", False) is True From fb4744d11e86f6c45bf5a3ca305564f3205558ff Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Tue, 14 Jan 2025 11:24:27 -0800 Subject: [PATCH 19/27] simplify sampler logic to rely on configuration --- src/scout_apm/core/sampler.py | 110 +++++++++++++------------------- tests/unit/core/test_sampler.py | 11 ++-- 2 files changed, 50 insertions(+), 71 deletions(-) diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index 2000cedf..5b3037e5 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -1,11 +1,8 @@ # coding=utf-8 -import logging import random from typing import Dict, Optional, Tuple -logger = logging.getLogger(__name__) - class Sampler: """ @@ -32,11 +29,12 @@ def __init__(self, config): self.sample_rate = config.value("sample_rate") self.sample_endpoints = config.value("sample_endpoints") self.sample_jobs = config.value("sample_jobs") - self.ignore_endpoints = set(config.value("ignore_endpoints")) + self.ignore_endpoints = set( + config.value("ignore_endpoints") + config.value("ignore") + ) self.ignore_jobs = set(config.value("ignore_jobs")) self.endpoint_sample_rate = config.value("endpoint_sample_rate") self.job_sample_rate = config.value("job_sample_rate") - self.legacy_ignore = config.value("ignore") def _any_sampling(self): """ @@ -55,11 +53,11 @@ def _any_sampling(self): or self.job_sample_rate is not None ) - def _find_exact_match( + def _find_matching_rate( self, name: str, patterns: Dict[str, float] - ) -> Optional[float]: + ) -> Optional[str]: """ - Finds the exact sample rate for a given operation name. + Finds the matching sample rate for a given operation name. Args: name: The operation name to match @@ -68,25 +66,11 @@ def _find_exact_match( Returns: The sample rate for the matching pattern or None if no match found """ - return patterns.get(name) - def _find_prefix_match( - self, name: str, patterns: Dict[str, float] - ) -> Optional[float]: - """Find the longest matching prefix in sample configurations.""" - matching_prefixes = [ - (prefix, rate) - for prefix, rate in patterns.items() - if name.startswith(prefix) - ] - if not matching_prefixes: - return None - # Return rate for longest matching prefix - return max(matching_prefixes, key=lambda x: len(x[0]))[1] - - def _is_legacy_ignored(self, name: str) -> bool: - """Check if path matches any legacy ignore patterns.""" - return any(name.startswith(ignored) for ignored in self.legacy_ignore) + for pattern, rate in patterns.items(): + if name.startswith(pattern): + return rate + return None def _get_operation_type_and_name( self, operation: str @@ -108,51 +92,45 @@ def _get_operation_type_and_name( else: return None, None - def get_effective_sample_rate( - self, operation: str, is_ignored: bool = False - ) -> int: + def get_effective_sample_rate(self, operation: str, is_ignored: bool) -> int: """ Determines the effective sample rate for a given operation. - """ - op_type, name = self._get_operation_type_and_name(operation) - - if not op_type or not name: - return self.sample_rate - - patterns = self.sample_endpoints if op_type == "endpoint" else self.sample_jobs - ignored_set = ( - self.ignore_endpoints if op_type == "endpoint" else self.ignore_jobs - ) - default_rate = ( - self.endpoint_sample_rate if op_type == "endpoint" else self.job_sample_rate - ) - - # Check for exact match in sampling patterns - exact_rate = self._find_exact_match(name, patterns) - if exact_rate is not None: - return exact_rate - - # Check for exact endpoint/job ignores - if name in ignored_set: - return 0 - # Check for prefix match in sampling patterns - prefix_rate = self._find_prefix_match(name, patterns) - if prefix_rate is not None: - return prefix_rate + Prioritization: + 1. Sampling rate for specific endpoint or job + 2. Specified ignore pattern or flag for operation + 3. Global endpoint or job sample rate + 4. Global sample rate - # Check legacy ignore patterns - if self._is_legacy_ignored(name): - return 0 - - # Check if request is explicitly ignored via the - # is_ignored() tracked_request method. - if is_ignored: - return 0 + Args: + operation: The operation string (e.g. "Controller/users/show") + is_ignored: boolean for if the specific transaction is ignored - # Use operation-specific default rate if available - if default_rate is not None: - return default_rate + Returns: + Integer between 0 and 100 representing sample rate + """ + op_type, name = self._get_operation_type_and_name(operation) + if not op_type or not name: + return self.sample_rate # Fall back to global rate for unknown operations + + if op_type == "endpoint": + matching_rate = self._find_matching_rate(name, self.sample_endpoints) + if matching_rate is not None: + return matching_rate + if name in self.ignore_endpoints or is_ignored: + return 0 + if self.endpoint_sample_rate is not None: + return self.endpoint_sample_rate + + else: # op_type == 'job' + if name in self.ignore_jobs: + return 0 + + matching_rate = self._find_matching_rate(name, self.sample_jobs) + if matching_rate is not None: + return matching_rate + if self.job_sample_rate is not None or is_ignored: + return self.job_sample_rate # Fall back to global sample rate return self.sample_rate diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index 199bcb68..3811f4c1 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -14,6 +14,7 @@ def config(): ScoutConfig.set( sample_rate=50, # 50% global sampling sample_endpoints={ + "users/test": 0, # Never sample specific endpoint "users": 100, # Always sample "test": 20, # 20% sampling for test endpoints "health": 0, # Never sample health checks @@ -22,7 +23,7 @@ def config(): "critical-job": 100, # Always sample "batch": 30, # 30% sampling for batch jobs }, - ignore_endpoints=["metrics", "ping", "users/test"], + ignore_endpoints=["metrics", "ping"], ignore_jobs=["test-job"], endpoint_sample_rate=70, # 70% sampling for unspecified endpoints job_sample_rate=40, # 40% sampling for unspecified jobs @@ -128,9 +129,9 @@ def test_should_sample_job_fallback_to_global_rate(config): def test_should_handle_legacy_ignore_with_specific_sampling(config): """Test that specific sampling rates override legacy ignore patterns.""" config.set( - ignore=["foo"], sample_endpoints={ - "foo/bar": 50 # Should override the ignore pattern for specific endpoint + "foo/bar": 50, # Should override the ignore pattern for specific endpoint + "foo": 0, # Ignore all other foo endpoints }, ) sampler = Sampler(config) @@ -149,9 +150,9 @@ def test_prefix_matching_precedence(config): """Test that longer prefix matches take precedence.""" config.set( sample_endpoints={ - "api": 0, # Ignore all API endpoints by default - "api/users": 50, # Sample 50% of user endpoints "api/users/vip": 100, # Sample all VIP user endpoints + "api/users": 50, # Sample 50% of user endpoints + "api": 0, # Ignore all API endpoints by default } ) sampler = Sampler(config) From d78d973f20c3cb858967125375be1f026aa95310 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Tue, 14 Jan 2025 11:39:12 -0800 Subject: [PATCH 20/27] flatten control flow --- src/scout_apm/core/sampler.py | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index 5b3037e5..dd4b7400 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -110,27 +110,21 @@ def get_effective_sample_rate(self, operation: str, is_ignored: bool) -> int: Integer between 0 and 100 representing sample rate """ op_type, name = self._get_operation_type_and_name(operation) + patterns = self.sample_endpoints if op_type == "endpoint" else self.sample_jobs + ignores = self.ignore_endpoints if op_type == "endpoint" else self.ignore_jobs + default_operation_rate = ( + self.endpoint_sample_rate if op_type == "endpoint" else self.job_sample_rate + ) + if not op_type or not name: - return self.sample_rate # Fall back to global rate for unknown operations - - if op_type == "endpoint": - matching_rate = self._find_matching_rate(name, self.sample_endpoints) - if matching_rate is not None: - return matching_rate - if name in self.ignore_endpoints or is_ignored: - return 0 - if self.endpoint_sample_rate is not None: - return self.endpoint_sample_rate - - else: # op_type == 'job' - if name in self.ignore_jobs: - return 0 - - matching_rate = self._find_matching_rate(name, self.sample_jobs) - if matching_rate is not None: - return matching_rate - if self.job_sample_rate is not None or is_ignored: - return self.job_sample_rate + return self.sample_rate + matching_rate = self._find_matching_rate(name, patterns) + if matching_rate is not None: + return matching_rate + if name in ignores or is_ignored: + return 0 + if default_operation_rate is not None: + return default_operation_rate # Fall back to global sample rate return self.sample_rate From 215f0b2c633a40c2cae61403c942a63c1890ce62 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Tue, 14 Jan 2025 12:49:13 -0800 Subject: [PATCH 21/27] use prefix to check ignores --- src/scout_apm/core/sampler.py | 5 +++-- tests/unit/core/test_sampler.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/scout_apm/core/sampler.py b/src/scout_apm/core/sampler.py index dd4b7400..21d29b8a 100644 --- a/src/scout_apm/core/sampler.py +++ b/src/scout_apm/core/sampler.py @@ -121,8 +121,9 @@ def get_effective_sample_rate(self, operation: str, is_ignored: bool) -> int: matching_rate = self._find_matching_rate(name, patterns) if matching_rate is not None: return matching_rate - if name in ignores or is_ignored: - return 0 + for prefix in ignores: + if name.startswith(prefix) or is_ignored: + return 0 if default_operation_rate is not None: return default_operation_rate diff --git a/tests/unit/core/test_sampler.py b/tests/unit/core/test_sampler.py index 3811f4c1..330f6321 100644 --- a/tests/unit/core/test_sampler.py +++ b/tests/unit/core/test_sampler.py @@ -47,7 +47,7 @@ def test_should_sample_endpoint_never(sampler): def test_should_sample_endpoint_ignored(sampler): - assert sampler.should_sample("Controller/metrics", False) is False + assert sampler.should_sample("Controller/metrics/some/more", False) is False def test_should_sample_endpoint_partial(sampler): From 7e8ceae69d314a49cea3123f44e8eb3e6108343e Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Mon, 6 Jan 2025 15:31:19 -0800 Subject: [PATCH 22/27] add sampler to tracked_request class --- setup.py | 2 +- src/scout_apm/core/tracked_request.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index d1a59c5f..b649c6fd 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ setup( name="scout_apm", - version="3.2.1", + version="3.3.0", description="Scout Application Performance Monitoring Agent", long_description=long_description, long_description_content_type="text/markdown", diff --git a/src/scout_apm/core/tracked_request.py b/src/scout_apm/core/tracked_request.py index 0df73368..f9e31fa1 100644 --- a/src/scout_apm/core/tracked_request.py +++ b/src/scout_apm/core/tracked_request.py @@ -10,6 +10,7 @@ from scout_apm.core.agent.socket import CoreAgentSocketThread from scout_apm.core.config import scout_config from scout_apm.core.n_plus_one_tracker import NPlusOneTracker +from scout_apm.core.sampler import Sampler from scout_apm.core.samplers.memory import get_rss_in_mb from scout_apm.core.samplers.thread import SamplersThread @@ -24,6 +25,7 @@ class TrackedRequest(object): """ __slots__ = ( + "sampler", "request_id", "start_time", "end_time", @@ -48,6 +50,7 @@ def instance(cls): return context.get_tracked_request() def __init__(self): + self.sampler = Sampler(scout_config) self.request_id = "req-" + str(uuid4()) self.start_time = dt.datetime.now(dt.timezone.utc) self.end_time = None @@ -150,8 +153,14 @@ def finish(self): self.end_time = dt.datetime.now(dt.timezone.utc) if self.is_real_request: - self.tag("mem_delta", self._get_mem_delta()) - if not self.is_ignored() and not self.sent: + # Use the sampler to determine if the request should be sampled + logger.debug("Checking if request should be sampled") + if ( + not self.is_ignored() + and not self.sent + and self.sampler.should_sample(self.operation) + ): + self.tag("mem_delta", self._get_mem_delta()) self.sent = True batch_command = BatchCommand.from_tracked_request(self) if scout_config.value("log_payload_content"): From c9e198e430c24cbd7fdec14d9fe9fef924b40543 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Mon, 6 Jan 2025 15:31:36 -0800 Subject: [PATCH 23/27] add 3.3.0 changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2ff97aa..666fd563 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ - Change to tz-aware dates internally (Issue #799) - psutil dependency un-pin (#790) +## [3.3.0] 2025-01-07 +### Added +- Added support for down-sampling via Scout configuration. + - Sample rates can be set globally, for specific jobs/endpoints, or with wildcard selectors. + - Check out our [documentation](https://scoutapm.com/docs/python#sampling) for more information and example usage. + ## [3.2.0] 2024-09-12 ### Added - "Operation" attribute added to TrackedRequest class to better support development of [scout_apm_python_logging](https://github.com/scoutapp/scout_apm_python_logging) From b304e842b5a5c6534f51a7f6c3125b43fa8f924e Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Tue, 7 Jan 2025 13:40:47 -0800 Subject: [PATCH 24/27] use singleton pattern for sampler --- src/scout_apm/core/tracked_request.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/scout_apm/core/tracked_request.py b/src/scout_apm/core/tracked_request.py index f9e31fa1..d53fe841 100644 --- a/src/scout_apm/core/tracked_request.py +++ b/src/scout_apm/core/tracked_request.py @@ -24,6 +24,14 @@ class TrackedRequest(object): their keyname """ + _sampler = None + + @classmethod + def get_sampler(cls): + if cls._sampler is None: + cls._sampler = Sampler(scout_config) + return cls._sampler + __slots__ = ( "sampler", "request_id", @@ -50,7 +58,6 @@ def instance(cls): return context.get_tracked_request() def __init__(self): - self.sampler = Sampler(scout_config) self.request_id = "req-" + str(uuid4()) self.start_time = dt.datetime.now(dt.timezone.utc) self.end_time = None @@ -153,12 +160,10 @@ def finish(self): self.end_time = dt.datetime.now(dt.timezone.utc) if self.is_real_request: - # Use the sampler to determine if the request should be sampled - logger.debug("Checking if request should be sampled") if ( not self.is_ignored() and not self.sent - and self.sampler.should_sample(self.operation) + and self.get_sampler().should_sample(self.operation) ): self.tag("mem_delta", self._get_mem_delta()) self.sent = True From 6ad4cad78c2e592095a8d25a264aa1c242919894 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Tue, 7 Jan 2025 15:08:38 -0800 Subject: [PATCH 25/27] add sampler tests to test_tracked_request --- tests/unit/core/test_tracked_request.py | 51 +++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/tests/unit/core/test_tracked_request.py b/tests/unit/core/test_tracked_request.py index 7fb43044..ecf07497 100644 --- a/tests/unit/core/test_tracked_request.py +++ b/tests/unit/core/test_tracked_request.py @@ -16,6 +16,12 @@ ) +@pytest.fixture(autouse=True) +def clear_sampler(): + """Reset the sampler before each test""" + TrackedRequest._sampler = None + + @pytest.fixture def reset_config(): """ @@ -26,6 +32,8 @@ def reset_config(): finally: # Reset Scout configuration. scout_config.reset_all() + # Clear the sampler + TrackedRequest._sampler = None def test_tracked_request_repr(tracked_request): @@ -358,3 +366,46 @@ def test_request_only_sent_once(tracked_request, caplog): len([log_tuple for log_tuple in caplog.record_tuples if log_tuple == info_log]) == 2 ) + + +def test_sampler_behavior(tracked_request): + """Test that sampler is only created when first needed and shared across requests""" + assert TrackedRequest._sampler is None + sampler1 = TrackedRequest.get_sampler() + + assert TrackedRequest._sampler is not None + + # Should get the same sampler instance + sampler2 = TrackedRequest.get_sampler() + assert sampler1 is sampler2 + + # Test that all TrackedRequests share the same sampler + request1 = TrackedRequest() + request2 = TrackedRequest() + + sampler1 = request1.get_sampler() + sampler2 = request2.get_sampler() + + assert sampler1 is sampler2 + + +@pytest.mark.parametrize( + "operation,is_real,expected_calls", + [ + ("Controller/test", True, 1), # Should check sampling + ("Controller/test", False, 0), # Shouldn't check sampling if not real + ], +) +def test_finish_sampling_behavior(tracked_request, operation, is_real, expected_calls): + """Test that sampling only occurs under the right conditions""" + mock_sampler = mock.Mock() + mock_sampler.should_sample.return_value = True + TrackedRequest._sampler = mock_sampler + + tracked_request.operation = operation + tracked_request.is_real_request = is_real + tracked_request.sent = False + + tracked_request.finish() + + assert mock_sampler.should_sample.call_count == expected_calls From 2876f804117d6fe88e6609c038437af2cb4106c4 Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Tue, 7 Jan 2025 15:09:29 -0800 Subject: [PATCH 26/27] update changelog url --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 666fd563..bfea957d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ ### Added - Added support for down-sampling via Scout configuration. - Sample rates can be set globally, for specific jobs/endpoints, or with wildcard selectors. - - Check out our [documentation](https://scoutapm.com/docs/python#sampling) for more information and example usage. + - Check out our [documentation](https://scoutapm.com/docs/python/configuration#sampling) for more information and example usage. ## [3.2.0] 2024-09-12 ### Added From c81950afd57b08f12444a83a18ee58b6e3f748fd Mon Sep 17 00:00:00 2001 From: Quinn Milionis Date: Tue, 14 Jan 2025 12:04:41 -0800 Subject: [PATCH 27/27] pass ignored boolean to should_sample --- CHANGELOG.md | 2 +- src/scout_apm/core/tracked_request.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bfea957d..81121e88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ ## [3.3.0] 2025-01-07 ### Added - Added support for down-sampling via Scout configuration. - - Sample rates can be set globally, for specific jobs/endpoints, or with wildcard selectors. + - Sample rates can be set globally or for specific jobs/endpoints - Check out our [documentation](https://scoutapm.com/docs/python/configuration#sampling) for more information and example usage. ## [3.2.0] 2024-09-12 diff --git a/src/scout_apm/core/tracked_request.py b/src/scout_apm/core/tracked_request.py index d53fe841..4f531aec 100644 --- a/src/scout_apm/core/tracked_request.py +++ b/src/scout_apm/core/tracked_request.py @@ -160,10 +160,8 @@ def finish(self): self.end_time = dt.datetime.now(dt.timezone.utc) if self.is_real_request: - if ( - not self.is_ignored() - and not self.sent - and self.get_sampler().should_sample(self.operation) + if not self.sent and self.get_sampler().should_sample( + self.operation, self.is_ignored() ): self.tag("mem_delta", self._get_mem_delta()) self.sent = True