From ff642e13277954890c877bac85a7f2441a8445b3 Mon Sep 17 00:00:00 2001 From: Brandon Liu Date: Thu, 8 Jun 2023 10:32:07 -0700 Subject: [PATCH] Support complex timeseries output in YCSB that summarizes per-second stats. PiperOrigin-RevId: 538826247 --- perfkitbenchmarker/linux_packages/ycsb.py | 36 +- .../linux_packages/ycsb_stats.py | 351 +++++++++++-- requirements-testing.txt | 1 + tests/data/ycsb-time-series-2.dat | 96 ++-- tests/data/ycsb-time-series.dat | 96 ++-- tests/linux_packages/linux_boot_test.py | 9 +- tests/linux_packages/ycsb_stats_test.py | 489 ++++++++++++++++++ tests/linux_packages/ycsb_test.py | 311 ----------- 8 files changed, 921 insertions(+), 468 deletions(-) create mode 100644 tests/linux_packages/ycsb_stats_test.py diff --git a/perfkitbenchmarker/linux_packages/ycsb.py b/perfkitbenchmarker/linux_packages/ycsb.py index 5988894a49..d77247c5a6 100644 --- a/perfkitbenchmarker/linux_packages/ycsb.py +++ b/perfkitbenchmarker/linux_packages/ycsb.py @@ -300,7 +300,8 @@ ) # Status line pattern -_STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+.\d+) current ops\/sec' +_STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+(\.\d+)?) current ops\/sec' +_STATUS_GROUPS_PATTERN = r'\[(.+?): (.+?)\]' # Status interval default is 10 sec, change to 1 sec. _STATUS_INTERVAL_SEC = 1 @@ -444,6 +445,18 @@ def CheckPrerequisites(): '--ycsb_incremental_load=target and not --ycsb_run_parameters.' ) + # Both HISTOGRAM and TIMESERIES do not output latencies on a per-interval + # basis, so we use the more-detailed HDRHISTOGRAM. + if ( + _THROUGHPUT_TIME_SERIES.value + and FLAGS.ycsb_measurement_type != ycsb_stats.HDRHISTOGRAM + ): + raise errors.Config.InvalidValue( + 'Measuring a throughput histogram requires running with ' + '--ycsb_measurement_type=HDRHISTOGRAM. Other measurement types are ' + 'unsupported unless additional parsing is added.' + ) + @vm_util.Retry(poll_interval=1) def Install(vm): @@ -551,6 +564,8 @@ class YCSBExecutor: perclientparam: list. shardkeyspace: boolean. Default to False, indicates if clients should have their own keyspace. + burst_time_offset_sec: When running with --ycsb_burst_load, the amount of + seconds to offset time series measurements during the increased load. """ FLAG_ATTRIBUTES = 'cp', 'jvm-args', 'target', 'threads' @@ -571,6 +586,8 @@ def __init__(self, database, parameter_files=None, **kwargs): self.perclientparam = self.parameters.pop('perclientparam', None) self.shardkeyspace = self.parameters.pop('shardkeyspace', False) + self.burst_time_offset_sec = 0 + def _BuildCommand(self, command_name, parameter_files=None, **kwargs): """Builds the YCSB command line.""" command = [YCSB_EXE, command_name, self.database] @@ -737,7 +754,10 @@ def _Run(self, vm, **kwargs): vm.RemoteCommand('mkdir -p {0}'.format(hdr_files_dir)) stdout, stderr = vm.RobustRemoteCommand(command) return ycsb_stats.ParseResults( - str(stderr + stdout), self.measurement_type, _ERROR_RATE_THRESHOLD.value + str(stderr + stdout), + self.measurement_type, + _ERROR_RATE_THRESHOLD.value, + self.burst_time_offset_sec, ) def _RunThreaded(self, vms, **kwargs): @@ -1066,14 +1086,22 @@ def _RunBurstMode(self, vms, workloads, run_kwargs=None): initial_qps = int(run_params.get('target', 0)) samples = list(self.RunStaircaseLoads(vms, workloads, **run_kwargs)) + # Attach metadata for identifying pre burst load. + for s in samples: + s.metadata['ycsb_burst_multiplier'] = 1 + + self.burst_time_offset_sec = FLAGS.ycsb_timelimit if _BURST_LOAD_MULTIPLIER.value == -1: run_params.pop('target') # Set to unlimited else: run_params['target'] = initial_qps * _BURST_LOAD_MULTIPLIER.value self._SetRunParameters(run_params) - samples += list(self.RunStaircaseLoads(vms, workloads, **run_kwargs)) - return samples + burst_samples = list(self.RunStaircaseLoads(vms, workloads, **run_kwargs)) + for s in burst_samples: + s.metadata['ycsb_burst_multiplier'] = _BURST_LOAD_MULTIPLIER.value + + return samples + burst_samples def _GetIncrementalQpsTargets(self, target_qps: int) -> list[int]: """Returns incremental QPS targets.""" diff --git a/perfkitbenchmarker/linux_packages/ycsb_stats.py b/perfkitbenchmarker/linux_packages/ycsb_stats.py index d48f679d55..eabfa330a1 100644 --- a/perfkitbenchmarker/linux_packages/ycsb_stats.py +++ b/perfkitbenchmarker/linux_packages/ycsb_stats.py @@ -15,7 +15,7 @@ import bisect import collections -from collections.abc import Iterable, Mapping +from collections.abc import Iterable, Iterator, Mapping import copy import csv import dataclasses @@ -33,6 +33,7 @@ from perfkitbenchmarker import linux_packages from perfkitbenchmarker import sample from perfkitbenchmarker import virtual_machine +from perfkitbenchmarker import vm_util FLAGS = flags.FLAGS @@ -60,6 +61,7 @@ # Statistics with operator 'None' will be dropped. AGGREGATE_OPERATORS = { 'Operations': operator.add, + 'Count': operator.add, 'RunTime(ms)': max, 'Return=0': operator.add, 'Return=-1': operator.add, @@ -75,10 +77,19 @@ '99thPercentileLatency(ms)': None, # Calculated across clients. 'MinLatency(ms)': min, 'MaxLatency(ms)': max, + 'Max': max, + 'Min': min, } +_STATUS_LATENCIES = [ + 'Avg', + 'Max', + 'Min', +] + # Status line pattern -_STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+.\d+) current ops\/sec' +_STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+(\.\d+)?) current ops\/sec' +_STATUS_GROUPS_PATTERN = r'\[(.+?): (.+?)\]' # Status interval default is 10 sec, change to 1 sec. _STATUS_INTERVAL_SEC = 1 @@ -97,14 +108,25 @@ _HdrHistogramTuple = tuple[float, float, int] +def _IsStatusLatencyStatistic(stat_name: str) -> bool: + """Returns whether a name is a latency statistic (i.e. "99.9").""" + return ( + stat_name.replace('.', '', 1).isdigit() or stat_name in _STATUS_LATENCIES + ) + + @dataclasses.dataclass class _OpResult: """Individual results for a single operation. + YCSB results are either aggregated per operation (read/update) at the end of + the run or output on a per-interval (i.e. second) basis during the run. + Attributes: - group: group name (e.g., update, insert, overall) - statistics: dict mapping from statistic name to value - data_type: Corresponds to --ycsb_measurement_type. + group: group name (e.g. update, insert, overall) + statistics: dict mapping from statistic name to value (e.g. {'Count': 33}) + data_type: Corresponds to --ycsb_measurement_type (e.g. histogram, + hdrhistogram, or timeseries). data: For HISTOGRAM/HDRHISTOGRAM: list of (ms_lower_bound, count) tuples, e.g. [(0, 530), (19, 1)] indicates that 530 ops took between 0ms and 1ms, and 1 took between 19ms and 20ms. Empty bins are not reported. For @@ -116,6 +138,100 @@ class _OpResult: data: list[tuple[int, float]] = dataclasses.field(default_factory=list) statistics: dict[str, float] = dataclasses.field(default_factory=dict) + @classmethod + def FromSummaryLines( + cls, lines: Iterable[str], operation: str, data_type: str + ) -> '_OpResult': + """Returns an _OpResult parsed from YCSB summary lines. + + Example format: + [UPDATE], Operations, 2468054 + [UPDATE], AverageLatency(us), 2218.8513395574005 + [UPDATE], MinLatency(us), 554 + [UPDATE], MaxLatency(us), 352634 + [UPDATE], 95thPercentileLatency(ms), 4 + [UPDATE], 99thPercentileLatency(ms), 7 + [UPDATE], Return=0, 2468054 + + Args: + lines: An iterable of lines parsed from the YCSB summary, groouped by + operation type. + operation: The operation type that corresponds to `lines`. + data_type: Corresponds to --ycsb_measurement_type. + + Returns: + An _OpResult with the parsed data. + """ + result = cls(group=operation, data_type=data_type) + latency_unit = 'ms' + for _, name, val in lines: + name = name.strip() + val = val.strip() + # Drop ">" from ">1000" + if name.startswith('>'): + name = name[1:] + val = float(val) if '.' in val or 'nan' in val.lower() else int(val) + if name.isdigit(): + if val: + if data_type == TIMESERIES and latency_unit == 'us': + val /= 1000.0 + result.data.append((int(name), val)) + else: + if '(us)' in name: + name = name.replace('(us)', '(ms)') + val /= 1000.0 + latency_unit = 'us' + result.statistics[name] = val + return result + + @classmethod + def FromStatusLine(cls, match: re.Match[str]) -> '_OpResult': + """Returns an _OpResult from a _STATUS_GROUPS_PATTERN match. + + Example format: + [READ: Count=33, Max=11487, Min=2658, Avg=4987.36, + 90=8271, 99=11487, 99.9=11487, 99.99=11487] + + Args: + match: Match object that matches _STATUS_GROUPS_PATTERN. + + Returns: + An _OpResult object with group and statistics. + """ + operation_name = match.group(1).lower() + statistics = {} + for pair in match.group(2).split(', '): + k, v = pair.split('=') + # Sometimes output can look like "Avg=". + v = 0 if not v else float(v) + if _IsStatusLatencyStatistic(k): + v /= 1000.0 + statistics[k] = float(v) + return cls(group=operation_name, statistics=statistics) + + +@dataclasses.dataclass +class _StatusResult: + """Represents YCSB results at a given timestamp. + + Example format: + + 254 sec: 6149469 operations; 5897 current ops/sec; est completion in 11 hours + 24 minutes [READ: Count=5887, Max=4259839, Min=2514, Avg=63504.23, 90=3863, + 99=3848191, 99.9=4161535, 99.99=4243455] [READ-FAILED: Count=11, Max=4040703, + Min=3696640, Avg=3836369.45, 90=4005887, 99=4040703, 99.9=4040703, + 99.99=4040703] + + Attributes: + timestamp: The time (in seconds) since the start of the test. + overall_throughput: Average QPS. + op_results: list of _OpResult. + """ + + timestamp: int + overall_throughput: float + op_results: list[_OpResult] = dataclasses.field(default_factory=list) + @dataclasses.dataclass class YcsbResult: @@ -124,17 +240,75 @@ class YcsbResult: Attributes: client: Contains YCSB version information. command_line: Command line executed. - throughput_time_series: Time series of throughputs (interval, QPS). - groups: dict of operation group name to results for that operation. + status_time_series: Granular time series (see _StatusResult). + groups: Summary dict of operation group name to results for that operation. """ client: str = '' command_line: str = '' - throughput_time_series: _ThroughputTimeSeries = dataclasses.field( + status_time_series: dict[int, _StatusResult] = dataclasses.field( default_factory=dict ) groups: dict[str, _OpResult] = dataclasses.field(default_factory=dict) + def SplitStatusTimeSeriesForSamples( + self, + ) -> dict[str, dict[str, list[tuple[int, float]]]]: + """Yields individual time series by operation type (i.e. read/update).""" + time_series_by_op_and_stat = collections.defaultdict( + lambda: collections.defaultdict(list) + ) + status_results = sorted(self.status_time_series.items()) + for timestamp, status_result in status_results: + for op_result in status_result.op_results: + for stat, value in op_result.statistics.items(): + time_series_by_op_and_stat[op_result.group][stat].append( + (timestamp, value) + ) + + return time_series_by_op_and_stat + + def _GetStatsToWrite(self) -> list[str]: + stats_to_write = set() + for _, status_result in sorted(self.status_time_series.items()): + for op_result in status_result.op_results: + stats_to_write.update( + [ + stat + for stat in op_result.statistics.keys() + if _IsStatusLatencyStatistic(stat) or stat == 'Count' + ] + ) + return list(stats_to_write) + + def WriteStatusTimeSeriesToFile(self) -> None: + """Writes time series for each operation to separate file in tempdir.""" + stats_to_write = ['time'] + sorted(self._GetStatsToWrite()) + written_headers = [] + for timestamp, status_result in sorted(self.status_time_series.items()): + for op_result in status_result.op_results: + output_file = vm_util.PrependTempDir( + f'ycsb_status_output_{op_result.group}.csv' + ) + filtered_dict = { + k: v + for (k, v) in op_result.statistics.items() + if k in stats_to_write + } + filtered_dict['time'] = timestamp + with open(output_file, 'a+', newline='') as f: + writer = csv.DictWriter(f, fieldnames=stats_to_write) + if op_result.group not in written_headers: + writer.writeheader() + written_headers.append(op_result.group) + writer.writerow(filtered_dict) + + +def _ParseStatusLine(line: str) -> Iterator[_OpResult]: + """Returns a list of _OpResults from granular YCSB status output.""" + matches = re.finditer(_STATUS_GROUPS_PATTERN, line) + return (_OpResult.FromStatusLine(match) for match in matches) + def _ValidateErrorRate(result: YcsbResult, threshold: float) -> None: """Raises an error if results contains entries with too high error rate. @@ -180,6 +354,7 @@ def ParseResults( ycsb_result_string: str, data_type: str = 'histogram', error_rate_threshold: float = 1.0, + timestamp_offset_sec: int = 0, ) -> 'YcsbResult': """Parse YCSB results. @@ -262,6 +437,9 @@ def ParseResults( similarly. error_rate_threshold: Error statistics in the output should not exceed this ratio. + timestamp_offset_sec: The number of seconds to offset the timestamp by for + runs measuring the status time series. Useful for if there are multiple + runs back-to-back. Returns: A YcsbResult object that contains the results from parsing YCSB output. @@ -281,7 +459,7 @@ def ParseResults( lines = [] client_string = 'YCSB' command_line = 'unknown' - throughput_time_series = {} + status_time_series = {} fp = io.StringIO(ycsb_result_string) result_string = next(fp).strip() @@ -297,9 +475,12 @@ def IsHeadOfResults(line): match = re.search(_STATUS_PATTERN, result_string) if match is not None: timestamp, qps = int(match.group(1)), float(match.group(2)) + timestamp += timestamp_offset_sec # Repeats in the printed status are erroneous, ignore. - if timestamp not in throughput_time_series: - throughput_time_series[timestamp] = qps + if timestamp not in status_time_series: + status_time_series[timestamp] = _StatusResult( + timestamp, qps, list(_ParseStatusLine(result_string)) + ) try: result_string = next(fp).strip() except StopIteration: @@ -328,37 +509,16 @@ def LineFilter(line): result = YcsbResult( client=client_string, command_line=command_line, - throughput_time_series=throughput_time_series, + status_time_series=status_time_series, ) for operation, lines in by_operation: operation = operation[1:-1].lower() - if operation == 'cleanup': continue - - op_result = _OpResult(group=operation, data_type=data_type) - latency_unit = 'ms' - for _, name, val in lines: - name = name.strip() - val = val.strip() - # Drop ">" from ">1000" - if name.startswith('>'): - name = name[1:] - val = float(val) if '.' in val or 'nan' in val.lower() else int(val) - if name.isdigit(): - if val: - if data_type == TIMESERIES and latency_unit == 'us': - val /= 1000.0 - op_result.data.append((int(name), val)) - else: - if '(us)' in name: - name = name.replace('(us)', '(ms)') - val /= 1000.0 - latency_unit = 'us' - op_result.statistics[name] = val - - result.groups[operation] = op_result + result.groups[operation] = _OpResult.FromSummaryLines( + lines, operation, data_type + ) _ValidateErrorRate(result, error_rate_threshold) return result @@ -592,10 +752,79 @@ def _CombineLatencyTimeSeries( combined_weights[timestamp] += 1.0 return result - def _CombineThroughputTimeSeries( - series1: _ThroughputTimeSeries, series2: _ThroughputTimeSeries - ) -> _ThroughputTimeSeries: - """Returns a combined dict of [timestamp, total QPS] from the two series.""" + def _CombineStatistics(result1: _OpResult, result2: _OpResult) -> _OpResult: + """Combines reported statistics. + + If no combining operator is defined, the statistic is skipped. + Otherwise, the aggregated value is either: + * The value in 'indiv', if the statistic is not present in 'result' or + * AGGREGATE_OPERATORS[statistic](result_value, indiv_value) + + Args: + result1: First _OpResult to combine. + result2: Second _OpResult to combine. + + Returns: + A combined _OpResult. + """ + combined = copy.deepcopy(result1) + for k, v in result2.statistics.items(): + # Numeric keys are latencies + if k not in AGGREGATE_OPERATORS and not _IsStatusLatencyStatistic(k): + continue + # Drop if not an aggregated statistic. + elif not _IsStatusLatencyStatistic(k) and AGGREGATE_OPERATORS[k] is None: + combined.statistics.pop(k, None) + continue + # Copy over if not already in aggregate. + elif k not in combined.statistics: + combined.statistics[k] = copy.deepcopy(v) + continue + + # Different cases for average latency and numeric latency when reporting a + # status time series. Provide the average of percentile latencies since we + # can't accurately calculate it. + if k == 'Avg': + s1, s2 = result1.statistics, result2.statistics + count = s1['Count'] + s2['Count'] + new_avg = ( + (s2['Count'] * s2['Avg'] + s1['Count'] * s1['Avg']) / count + if count + else 0 + ) + combined.statistics['Avg'] = new_avg + continue + # Cases where the stat is a latency i.e. 99, 99.9, 99.99. + elif k.replace('.', '', 1).isdigit(): + s1, s2 = result1.statistics, result2.statistics + new_avg = (s1[k] + s2[k]) / 2 + combined.statistics[k] = new_avg + continue + + op = AGGREGATE_OPERATORS[k] + combined.statistics[k] = op(combined.statistics[k], v) + return combined + + def _CombineOpResultLists( + list1: Iterable[_OpResult], list2: Iterable[_OpResult] + ) -> list[_OpResult]: + """Combines two lists of _OpResult into a single list.""" + list1_by_operation = {result.group: result for result in list1} + list2_by_operation = {result.group: result for result in list2} + result = copy.deepcopy(list1_by_operation) + for operation in list2_by_operation: + if operation not in result: + result[operation] = copy.deepcopy(list2_by_operation[operation]) + else: + result[operation] = _CombineStatistics( + result[operation], list2_by_operation[operation] + ) + return list(result.values()) + + def _CombineStatusTimeSeries( + series1: Mapping[int, _StatusResult], series2: Mapping[int, _StatusResult] + ) -> dict[int, _StatusResult]: + """Returns a combined dict of [timestamp, result] from the two series.""" timestamps1 = set(series1) timestamps2 = set(series2) all_timestamps = timestamps1 | timestamps2 @@ -610,7 +839,18 @@ def _CombineThroughputTimeSeries( ) result = {} for timestamp in all_timestamps: - result[timestamp] = series1.get(timestamp, 0) + series2.get(timestamp, 0) + combined_status_result = _StatusResult(timestamp, 0) + status1 = series1.get(timestamp, _StatusResult(timestamp, 0)) + status2 = series2.get(timestamp, _StatusResult(timestamp, 0)) + # Add overall throughputs + combined_status_result.overall_throughput = ( + status1.overall_throughput + status2.overall_throughput + ) + # Combine statistics via operators. + combined_status_result.op_results = _CombineOpResultLists( + status1.op_results, status2.op_results + ) + result[timestamp] = combined_status_result return result result_list = list(result_list) @@ -665,10 +905,8 @@ def _CombineThroughputTimeSeries( ) result.client = ' '.join((result.client, indiv.client)) result.command_line = ';'.join((result.command_line, indiv.command_line)) - - # if _THROUGHPUT_TIME_SERIES.value: - result.throughput_time_series = _CombineThroughputTimeSeries( - result.throughput_time_series, indiv.throughput_time_series + result.status_time_series = _CombineStatusTimeSeries( + result.status_time_series, indiv.status_time_series ) if measurement_type == HDRHISTOGRAM: @@ -767,14 +1005,21 @@ def CreateSamples( base_metadata['command_line'] = command_line base_metadata.update(kwargs) - throughput_time_series = ycsb_result.throughput_time_series - if throughput_time_series: - yield sample.Sample( - 'Throughput Time Series', - 0, - '', - {'throughput_time_series': sorted(throughput_time_series.items())}, - ) + if ycsb_result.status_time_series: + for ( + operation, + time_series_by_stat, + ) in ycsb_result.SplitStatusTimeSeriesForSamples().items(): + for stat, time_series in time_series_by_stat.items(): + timestamps, values = zip(*time_series) + yield sample.CreateTimeSeriesSample( + values=values, + timestamps=timestamps, + metric=f'{operation} {stat} time series', + units='ms' if _IsStatusLatencyStatistic(stat) else 'ops/sec', + interval=1.0, + ) + ycsb_result.WriteStatusTimeSeriesToFile() for group_name, group in ycsb_result.groups.items(): meta = base_metadata.copy() diff --git a/requirements-testing.txt b/requirements-testing.txt index d339353b39..5671ff0616 100644 --- a/requirements-testing.txt +++ b/requirements-testing.txt @@ -36,3 +36,4 @@ freezegun boto3 google-cloud-pubsub requests-mock +pyfakefs diff --git a/tests/data/ycsb-time-series-2.dat b/tests/data/ycsb-time-series-2.dat index b0616c9596..b1ddc94e93 100644 --- a/tests/data/ycsb-time-series-2.dat +++ b/tests/data/ycsb-time-series-2.dat @@ -1,50 +1,48 @@ -2022-10-01 01:23:57:907 0 sec: 0 operations; est completion in 0 second -2022-10-01 01:24:07:891 10 sec: 20846 operations; 2084.6 current ops/sec; est completion in 5 days 13 hours [READ AverageLatency(us)=22365.65] -2022-10-01 01:24:17:891 20 sec: 45803 operations; 2495.7 current ops/sec; est completion in 5 days 1 hour [READ AverageLatency(us)=20884.12] -2022-10-01 01:24:27:891 30 sec: 70775 operations; 2497.2 current ops/sec; est completion in 4 days 21 hours [READ AverageLatency(us)=17488] -2022-10-01 01:24:37:891 40 sec: 95861 operations; 2508.6 current ops/sec; est completion in 4 days 19 hours [READ AverageLatency(us)=22344.81] -2022-10-01 01:24:47:891 50 sec: 120757 operations; 2489.3 current ops/sec; est completion in 4 days 19 hours [READ AverageLatency(us)=19458.49] -2022-10-01 01:24:57:891 60 sec: 145857 operations; 2510 current ops/sec; est completion in 4 days 18 hours [READ AverageLatency(us)=14855.12] -2022-10-01 01:24:57:988 60 sec: 145947 operations; 937.5 current ops/sec; est completion in 4 days 18 hours [READ AverageLatency(us)=10245.58] [CLEANUP AverageLatency(us)=68.07] -[OVERALL], RunTime(ms), 60095 -[OVERALL], Throughput(ops/sec), 2428.604709210417 -[TOTAL_GCS_PS_Scavenge], Count, 16 -[TOTAL_GC_TIME_PS_Scavenge], Time(ms), 107 -[TOTAL_GC_TIME_%_PS_Scavenge], Time(%), 0.17805141858723686 +2023-06-05 16:15:44:641 0 sec: 0 operations; est completion in 106751991167300 days 15 hours +2023-06-05 16:15:44,655 WARN [Thread-8] util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2023-06-05 16:15:44,979 INFO [Thread-8] hbase1_x.BigtableConnection: Using bigtable-hbase client from jar file:/opt/pkb/ycsb/hbase12-binding/lib/bigtable-hbase-1.x-2.5.0.jar. Version: 2.5.0 +2023-06-05 16:15:45:636 1 sec: 0 operations; est completion in 106751991167300 days 15 hours +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +2023-06-05 16:15:46:636 2 sec: 18 operations; 18 current ops/sec; est completion in 128 days 15 hours [READ: Count=18, Max=239231, Min=34592, Avg=137007.11, 90=235903, 99=239231, 99.9=239231, 99.99=239231] +2023-06-05 16:15:47:636 3 sec: 1200 operations; 1182 current ops/sec; est completion in 2 days 21 hours [READ: Count=1184, Max=76735, Min=2574, Avg=6996.8, 90=10471, 99=48767, 99.9=68863, 99.99=76735] +2023-06-05 16:15:48:636 4 sec: 3045 operations; 1845 current ops/sec; est completion in 1 day 12 hours [READ: Count=1844, Max=14311, Min=2522, Avg=4327.71, 90=5723, 99=10023, 99.9=10967, 99.99=14311] +2023-06-05 16:15:49:636 5 sec: 5029 operations; 1984 current ops/sec; est completion in 1 day 3 hours [READ: Count=1983, Max=16303, Min=2416, Avg=4013.38, 90=4871, 99=9623, 99.9=13543, 99.99=16303] +2023-06-05 16:15:50:636 6 sec: 7068 operations; 2039 current ops/sec; est completion in 23 hours 34 minutes [READ: Count=2039, Max=12839, Min=2296, Avg=3923.2, 90=4887, 99=9087, 99.9=11575, 99.99=12839] +2023-06-05 16:15:51:636 7 sec: 9133 operations; 2065 current ops/sec; est completion in 21 hours 17 minutes [READ: Count=2067, Max=32991, Min=2356, Avg=3866.79, 90=4643, 99=9295, 99.9=13887, 99.99=32991] +2023-06-05 16:15:52:636 8 sec: 11157 operations; 2024 current ops/sec; est completion in 19 hours 55 minutes [READ: Count=2023, Max=45439, Min=2320, Avg=3946.67, 90=4731, 99=9295, 99.9=25439, 99.99=45439] +2023-06-05 16:15:53:636 9 sec: 13279 operations; 2122 current ops/sec; est completion in 18 hours 49 minutes [READ: Count=2122, Max=16703, Min=2158, Avg=3765.34, 90=4607, 99=8623, 99.9=14175, 99.99=16703] +Maximum time elapsed. Requesting stop for the workload. +Stop requested for workload. Now Joining! +2023-06-05 16:15:54:636 10 sec: 15395 operations; 2116 current ops/sec; est completion in 18 hours 2 minutes [READ: Count=2115, Max=52767, Min=2178, Avg=3776.73, 90=4563, 99=7619, 99.9=37087, 99.99=52767] +2023-06-05 16:15:54:654 10 sec: 15407 operations; 666.67 current ops/sec; est completion in 18 hours 3 minutes [READ: Count=12, Max=5607, Min=2880, Avg=4079.83, 90=5291, 99=5607, 99.9=5607, 99.99=5607] [CLEANUP: Count=16, Max=12823, Min=0, Avg=987.19, 90=916, 99=12823, 99.9=12823, 99.99=12823] +[OVERALL], RunTime(ms), 10018 +[OVERALL], Throughput(ops/sec), 1537.9317228987823 +[TOTAL_GCS_PS_Scavenge], Count, 4 +[TOTAL_GC_TIME_PS_Scavenge], Time(ms), 33 +[TOTAL_GC_TIME_%_PS_Scavenge], Time(%), 0.329407067278898 [TOTAL_GCS_PS_MarkSweep], Count, 2 -[TOTAL_GC_TIME_PS_MarkSweep], Time(ms), 57 -[TOTAL_GC_TIME_%_PS_MarkSweep], Time(%), 0.09484982111656544 -[TOTAL_GCs], Count, 18 -[TOTAL_GC_TIME], Time(ms), 164 -[TOTAL_GC_TIME_%], Time(%), 0.2729012397038023 -[READ], Operations, 145947 -[READ], AverageLatency(us), 19479.12810814885 -[READ], MinLatency(us), 2589 -[READ], MaxLatency(us), 924425 -[READ], Return=OK, 145947 -[READ], 0, 41983.64836255375 -[READ], 1000, 25742.023255813954 -[READ], 2000, 22715.42782468838 -[READ], 3000, 18937.807952286283 -[READ], 4000, 14030.440746753247 -[READ], 5000, 15569.699881376038 -[READ], 6000, 19632.656532456862 -[READ], 7000, 17519.806401249025 -[READ], 8000, 12427.848152659359 -[READ], 9000, 13402.123028391166 -[READ], 10000, 14583.187627187628 -[READ], 11000, 19648.792601338057 -[READ], 12000, 21465.837489943686 -[READ], 13000, 35111.63052011776 -[READ], 14000, 40859.85603244339 -[READ], 15000, 19755.08925750395 -[READ], 16000, 14847.317745803357 -[READ], 17000, 14418.057306590257 -[READ], 18000, 10584.808112324494 -[READ], 19000, 13655.13615980432 -[READ], 20000, 16421.41550139832 -[READ], 21000, 17593.828355236958 -[READ], 22000, 15556.543219666932 -[READ], 23000, 11649.72311827957 -[READ], 24000, 30852.80266474613 -[READ], 25000, 15630.590407470288 +[TOTAL_GC_TIME_PS_MarkSweep], Time(ms), 48 +[TOTAL_GC_TIME_%_PS_MarkSweep], Time(%), 0.4791375524056698 +[TOTAL_GCs], Count, 6 +[TOTAL_GC_TIME], Time(ms), 81 +[TOTAL_GC_TIME_%], Time(%), 0.8085446196845678 +[READ], Operations, 15407 +[READ], AverageLatency(us), 4328.689751411696 +[READ], MinLatency(us), 2158 +[READ], MaxLatency(us), 239231 +[READ], 95thPercentileLatency(us), 8375 +[READ], 99thPercentileLatency(us), 10927 +[READ], Return=OK, 15407 +[CLEANUP], Operations, 16 +[CLEANUP], AverageLatency(us), 987.1875 +[CLEANUP], MinLatency(us), 0 +[CLEANUP], MaxLatency(us), 12823 +[CLEANUP], 95thPercentileLatency(us), 1410 +[CLEANUP], 99thPercentileLatency(us), 12823 diff --git a/tests/data/ycsb-time-series.dat b/tests/data/ycsb-time-series.dat index 6b8ed509ec..dec198a07c 100644 --- a/tests/data/ycsb-time-series.dat +++ b/tests/data/ycsb-time-series.dat @@ -1,50 +1,48 @@ -2022-10-01 01:23:57:917 0 sec: 0 operations; est completion in 0 second -2022-10-01 01:24:07:902 10 sec: 21029 operations; 2102.9 current ops/sec; est completion in 5 days 12 hours [READ AverageLatency(us)=22779.72] -2022-10-01 01:24:17:902 20 sec: 45974 operations; 2494.5 current ops/sec; est completion in 5 days [READ AverageLatency(us)=21093.79] -2022-10-01 01:24:27:902 30 sec: 70942 operations; 2496.8 current ops/sec; est completion in 4 days 21 hours [READ AverageLatency(us)=17638.34] -2022-10-01 01:24:37:902 40 sec: 96038 operations; 2509.6 current ops/sec; est completion in 4 days 19 hours [READ AverageLatency(us)=22447.92] -2022-10-01 01:24:47:902 50 sec: 120910 operations; 2487.2 current ops/sec; est completion in 4 days 18 hours [READ AverageLatency(us)=19622.87] -2022-10-01 01:24:57:902 60 sec: 146042 operations; 2513.2 current ops/sec; est completion in 4 days 18 hours [READ AverageLatency(us)=14863.78] -2022-10-01 01:24:57:997 60 sec: 146134 operations; 978.72 current ops/sec; est completion in 4 days 18 hours [READ AverageLatency(us)=7788.22] [CLEANUP AverageLatency(us)=72.19] -[OVERALL], RunTime(ms), 60093 -[OVERALL], Throughput(ops/sec), 2431.7973807265407 -[TOTAL_GCS_PS_Scavenge], Count, 16 -[TOTAL_GC_TIME_PS_Scavenge], Time(ms), 104 -[TOTAL_GC_TIME_%_PS_Scavenge], Time(%), 0.17306508245552726 +2023-06-05 16:13:47:463 0 sec: 0 operations; est completion in 0 second +2023-06-05 16:13:47,477 WARN [Thread-8] util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable +2023-06-05 16:13:47,812 INFO [Thread-8] hbase1_x.BigtableConnection: Using bigtable-hbase client from jar file:/opt/pkb/ycsb/hbase12-binding/lib/bigtable-hbase-1.x-2.5.0.jar. Version: 2.5.0 +2023-06-05 16:13:48:459 1 sec: 0 operations; est completion in 106751991167300 days 15 hours +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +DBWrapper: report latency for each error is false and specific error codes to track for latency are: [] +2023-06-05 16:13:49:458 2 sec: 19 operations; 19 current ops/sec; est completion in 121 days 19 hours [READ: Count=19, Max=222207, Min=27504, Avg=122345.26, 90=218751, 99=222207, 99.9=222207, 99.99=222207] +2023-06-05 16:13:50:459 3 sec: 1373 operations; 1354 current ops/sec; est completion in 2 days 12 hours [READ: Count=1354, Max=74431, Min=2800, Avg=6083.69, 90=9975, 99=22559, 99.9=70911, 99.99=74431] +2023-06-05 16:13:51:459 4 sec: 3310 operations; 1937 current ops/sec; est completion in 1 day 9 hours [READ: Count=1938, Max=33535, Min=2442, Avg=4112.58, 90=5243, 99=9719, 99.9=11695, 99.99=33535] +2023-06-05 16:13:52:458 5 sec: 5448 operations; 2138 current ops/sec; est completion in 1 day 1 hour [READ: Count=2137, Max=54559, Min=2342, Avg=3734.04, 90=4343, 99=9551, 99.9=33983, 99.99=54559] +2023-06-05 16:13:53:458 6 sec: 7638 operations; 2190 current ops/sec; est completion in 21 hours 49 minutes [READ: Count=2190, Max=56159, Min=2212, Avg=3645.48, 90=4247, 99=8959, 99.9=20911, 99.99=56159] +2023-06-05 16:13:54:459 7 sec: 9860 operations; 2222 current ops/sec; est completion in 19 hours 43 minutes [READ: Count=2223, Max=22191, Min=2348, Avg=3592.64, 90=4211, 99=8303, 99.9=11495, 99.99=22191] +2023-06-05 16:13:55:459 8 sec: 12095 operations; 2235 current ops/sec; est completion in 18 hours 22 minutes [READ: Count=2237, Max=11575, Min=2304, Avg=3576.38, 90=4299, 99=7523, 99.9=9247, 99.99=11575] +2023-06-05 16:13:56:458 9 sec: 14372 operations; 2277 current ops/sec; est completion in 17 hours 23 minutes [READ: Count=2275, Max=47455, Min=2336, Avg=3507.24, 90=4163, 99=7427, 99.9=17535, 99.99=47455] +Maximum time elapsed. Requesting stop for the workload. +Stop requested for workload. Now Joining! +2023-06-05 16:13:57:458 10 sec: 16688 operations; 2316 current ops/sec; est completion in 16 hours 38 minutes [READ: Count=2315, Max=40511, Min=2350, Avg=3452.71, 90=4131, 99=7239, 99.9=10175, 99.99=40511] +2023-06-05 16:13:57:477 10 sec: 16699 operations; 611.11 current ops/sec; est completion in 16 hours 39 minutes [READ: Count=11, Max=4735, Min=2828, Avg=3426.55, 90=3839, 99=4735, 99.9=4735, 99.99=4735] [CLEANUP: Count=16, Max=13327, Min=0, Avg=1351.94, 90=1597, 99=13327, 99.9=13327, 99.99=13327] +[OVERALL], RunTime(ms), 10018 +[OVERALL], Throughput(ops/sec), 1666.8995807546416 +[TOTAL_GCS_PS_Scavenge], Count, 5 +[TOTAL_GC_TIME_PS_Scavenge], Time(ms), 37 +[TOTAL_GC_TIME_%_PS_Scavenge], Time(%), 0.36933519664603714 [TOTAL_GCS_PS_MarkSweep], Count, 2 -[TOTAL_GC_TIME_PS_MarkSweep], Time(ms), 52 -[TOTAL_GC_TIME_%_PS_MarkSweep], Time(%), 0.08653254122776363 -[TOTAL_GCs], Count, 18 -[TOTAL_GC_TIME], Time(ms), 156 -[TOTAL_GC_TIME_%], Time(%), 0.2595976236832909 -[READ], Operations, 146134 -[READ], AverageLatency(us), 19648.40244570052 -[READ], MinLatency(us), 2419 -[READ], MaxLatency(us), 926968 -[READ], Return=OK, 146134 -[READ], 0, 42307.65222970805 -[READ], 1000, 25891.854741896757 -[READ], 2000, 22558.006075334142 -[READ], 3000, 19565.929796569606 -[READ], 4000, 14591.428852975956 -[READ], 5000, 16380.601777059774 -[READ], 6000, 20572.916465863455 -[READ], 7000, 17695.34032449545 -[READ], 8000, 12106.920532472772 -[READ], 9000, 13669.89244760775 -[READ], 10000, 15021.342295345105 -[READ], 11000, 20345.75170204245 -[READ], 12000, 20804.036348949918 -[READ], 13000, 31959.07740686986 -[READ], 14000, 44360.403739612186 -[READ], 15000, 19838.39495145631 -[READ], 16000, 15042.471107544141 -[READ], 17000, 14904.455489021957 -[READ], 18000, 10872.696848823294 -[READ], 19000, 13736.977991196478 -[READ], 20000, 15973.014016820183 -[READ], 21000, 17668.569322709165 -[READ], 22000, 16865.471417069242 -[READ], 23000, 11306.913763066203 -[READ], 24000, 32364.298769116 -[READ], 25000, 15075.527027027027 +[TOTAL_GC_TIME_PS_MarkSweep], Time(ms), 48 +[TOTAL_GC_TIME_%_PS_MarkSweep], Time(%), 0.4791375524056698 +[TOTAL_GCs], Count, 7 +[TOTAL_GC_TIME], Time(ms), 85 +[TOTAL_GC_TIME_%], Time(%), 0.8484727490517069 +[READ], Operations, 16699 +[READ], AverageLatency(us), 3981.7847775315886 +[READ], MinLatency(us), 2212 +[READ], MaxLatency(us), 222207 +[READ], 95thPercentileLatency(us), 6171 +[READ], 99thPercentileLatency(us), 10215 +[READ], Return=OK, 16699 +[CLEANUP], Operations, 16 +[CLEANUP], AverageLatency(us), 1351.9375 +[CLEANUP], MinLatency(us), 0 +[CLEANUP], MaxLatency(us), 13327 +[CLEANUP], 95thPercentileLatency(us), 1736 +[CLEANUP], 99thPercentileLatency(us), 13327 diff --git a/tests/linux_packages/linux_boot_test.py b/tests/linux_packages/linux_boot_test.py index cdeb44f4d5..0abb86f5ab 100644 --- a/tests/linux_packages/linux_boot_test.py +++ b/tests/linux_packages/linux_boot_test.py @@ -134,8 +134,13 @@ def testCollectVmToVmSamples(self): with open(os.path.join(self.data_dir, 'boot.output')) as f: boot_output = f.read() # Load tcpdump output, which contains egress timestamps. - vm_util.PrependTempDir = mock.Mock( - return_value=os.path.join(self.data_dir, 'tcpdump.output')) + self.enter_context( + mock.patch.object( + vm_util, + 'PrependTempDir', + return_value=os.path.join(self.data_dir, 'tcpdump.output'), + ) + ) self.mock_vm.RemoteCommand = mock.Mock(return_value=(boot_output, '')) self.mock_vm.internal_ip = '10.128.0.11' # Compare golden samples with calculated egress/ingress samples. diff --git a/tests/linux_packages/ycsb_stats_test.py b/tests/linux_packages/ycsb_stats_test.py new file mode 100644 index 0000000000..3b7ca9a737 --- /dev/null +++ b/tests/linux_packages/ycsb_stats_test.py @@ -0,0 +1,489 @@ +# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for perfkitbenchmarker.packages.ycsb.""" + +import copy +import logging +import os +import unittest + +from absl import flags +from absl.testing import flagsaver +import mock +from perfkitbenchmarker import errors +from perfkitbenchmarker import vm_util +from perfkitbenchmarker.linux_packages import ycsb +from perfkitbenchmarker.linux_packages import ycsb_stats +from tests import pkb_common_test_case +from pyfakefs import fake_filesystem + + +FLAGS = flags.FLAGS + +TEST_DIR = '/mock_dir' + + +def open_data_file(filename): + path = os.path.join(os.path.dirname(__file__), '..', 'data', filename) + with open(path) as fp: + return fp.read() + + +def _parse_and_return_time_series(filename): + content = open_data_file(filename) + return ycsb_stats.ParseResults(content, ycsb_stats.HDRHISTOGRAM) + + +class SimpleResultParserTestCase(pkb_common_test_case.PkbCommonTestCase): + + def setUp(self): + super(SimpleResultParserTestCase, self).setUp() + self.contents = open_data_file('ycsb-test-run.dat') + self.results = ycsb_stats.ParseResults(self.contents, 'histogram') + + def testCommandLineSet(self): + self.assertEqual( + 'Command line: -db com.yahoo.ycsb.BasicDB -P workloads/workloada -t', + self.results.command_line, + ) + + def testClientSet(self): + self.assertEqual('YCSB Client 0.1', self.results.client) + + def testUpdateStatisticsParsed(self): + self.assertEqual( + ycsb_stats._OpResult( + group='update', + statistics={ + 'Operations': 531, + 'Return=0': 531, + 'AverageLatency(ms)': 0.0659774011299435, + 'MinLatency(ms)': 0.042, + 'MaxLatency(ms)': 0.345, + '95thPercentileLatency(ms)': 0, + '99thPercentileLatency(ms)': 0, + }, + data_type=ycsb_stats.HISTOGRAM, + data=[(0, 530), (19, 1)], + ), + self.results.groups['update'], + ) + + def testReadStatisticsParsed(self): + self.assertEqual( + ycsb_stats._OpResult( + group='read', + statistics={ + 'Operations': 469, + 'Return=0': 469, + 'AverageLatency(ms)': 0.03847761194029851, + 'MinLatency(ms)': 0.034, + 'MaxLatency(ms)': 0.102, + '95thPercentileLatency(ms)': 0, + '99thPercentileLatency(ms)': 0, + }, + data_type=ycsb_stats.HISTOGRAM, + data=[(0, 469)], + ), + self.results.groups['read'], + ) + + def testOverallStatisticsParsed(self): + self.assertEqual( + ycsb_stats._OpResult( + group='overall', + statistics={'RunTime(ms)': 80.0, 'Throughput(ops/sec)': 12500.0}, + data_type='histogram', + data=[], + ), + self.results.groups['overall'], + ) + + +class DetailedResultParserTestCase(unittest.TestCase): + + def setUp(self): + super(DetailedResultParserTestCase, self).setUp() + self.contents = open_data_file('ycsb-test-run-2.dat') + self.results = ycsb_stats.ParseResults(self.contents, 'histogram') + + def testPercentilesFromHistogram_read(self): + hist = self.results.groups['read'].data + percentiles = ycsb_stats._PercentilesFromHistogram(hist) + self.assertEqual(1, percentiles['p50']) + self.assertEqual(7, percentiles['p99']) + + def testPercentilesFromHistogram_update(self): + hist = self.results.groups['update'].data + percentiles = ycsb_stats._PercentilesFromHistogram(hist) + self.assertEqual(1, percentiles['p50']) + self.assertEqual(7, percentiles['p99']) + + +class StatusTimeSeriesParserTestCase(pkb_common_test_case.PkbCommonTestCase): + + def setUp(self): + super().setUp() + self.results_1 = _parse_and_return_time_series('ycsb-time-series.dat') + self.results_2 = _parse_and_return_time_series('ycsb-time-series-2.dat') + + def testParsedStatusTimeSeriesIsCorrect(self): + results = _parse_and_return_time_series('ycsb-time-series.dat') + expected = ycsb_stats._StatusResult( + timestamp=10, + overall_throughput=2316.0, + op_results=[ + ycsb_stats._OpResult( + group='read', + statistics={ + 'Count': 2315.0, + 'Max': 40.511, + 'Min': 2.350, + 'Avg': 3.45271, + '90': 4.131, + '99': 7.239, + '99.9': 10.175, + '99.99': 40.511, + }, + ) + ], + ) + + self.assertEqual(results.status_time_series[10], expected) + self.assertLen(results.status_time_series, 9) + + @flagsaver.flagsaver(ycsb_throughput_time_series=True) + def testCombinedStatusTimeSeriesIsCorrect(self): + results_1 = _parse_and_return_time_series('ycsb-time-series.dat') + results_2 = _parse_and_return_time_series('ycsb-time-series-2.dat') + + combined = ycsb_stats.CombineResults( + result_list=[results_1, results_2], + measurement_type=ycsb_stats.TIMESERIES, + combined_hdr={}, + ) + + actual = combined.status_time_series[10] + with self.subTest('Attributes'): + self.assertEqual(actual.timestamp, 10) + self.assertEqual(actual.overall_throughput, 4432.0) + with self.subTest('Statistics'): + self.assertEqual(actual.op_results[0].group, 'read') + expected_statistics = { + 'Count': 4430.0, + 'Max': 52.767, + 'Min': 2.178, + 'Avg': 3.60740577, + '90': 4.347, + '99': 7.429, + '99.9': 23.631, + '99.99': 46.639, + } + for stat, value in expected_statistics.items(): + self.assertAlmostEqual(actual.op_results[0].statistics[stat], value) + with self.subTest('NumItems'): + self.assertLen(combined.status_time_series, 9) + + +class BadResultParserTestCase(unittest.TestCase): + + def testBadTestRun(self): + contents = open_data_file('ycsb-test-run-3.dat') + self.assertRaises( + errors.Benchmarks.KnownIntermittentError, + ycsb_stats.ParseResults, + contents, + 'histogram', + ) + + def testErrorRate(self): + contents = open_data_file('ycsb-test-run-4.dat') + self.assertRaises( + errors.Benchmarks.RunError, + ycsb_stats.ParseResults, + contents, + 'hdrhistogram', + 0.95, + ) + + +class WeightedQuantileTestCase(unittest.TestCase): + + def testEvenlyWeightedSamples(self): + x = list(range(1, 101)) # 1-100 + weights = [1 for _ in x] + self.assertEqual(50, ycsb_stats._WeightedQuantile(x, weights, 0.50)) + self.assertEqual(75, ycsb_stats._WeightedQuantile(x, weights, 0.75)) + self.assertEqual(90, ycsb_stats._WeightedQuantile(x, weights, 0.90)) + self.assertEqual(95, ycsb_stats._WeightedQuantile(x, weights, 0.95)) + self.assertEqual(99, ycsb_stats._WeightedQuantile(x, weights, 0.99)) + self.assertEqual(100, ycsb_stats._WeightedQuantile(x, weights, 1)) + + def testLowWeight(self): + x = [1, 4] + weights = [99, 1] + for i in range(100): + self.assertEqual(1, ycsb_stats._WeightedQuantile(x, weights, i / 100.0)) + self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995)) + + def testMidWeight(self): + x = [0, 1.2, 4] + weights = [1, 98, 1] + for i in range(2, 99): + self.assertAlmostEqual( + 1.2, ycsb_stats._WeightedQuantile(x, weights, i / 100.0) + ) + self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995)) + + +class ParseWorkloadTestCase(unittest.TestCase): + + def testParsesEmptyString(self): + self.assertDictEqual({}, ycsb.ParseWorkload('')) + + def testIgnoresComment(self): + self.assertDictEqual({}, ycsb.ParseWorkload('#\n')) + self.assertDictEqual( + {}, ycsb.ParseWorkload('#recordcount = 10\n# columnfamily=cf') + ) + self.assertDictEqual( + {'recordcount': '10'}, + ycsb.ParseWorkload('#Sample!\nrecordcount = 10'), + ) + + def testParsesSampleWorkload(self): + contents = open_data_file('ycsb_workloada') + actual = ycsb.ParseWorkload(contents) + + expected = { + 'recordcount': '1000', + 'operationcount': '1000', + 'workload': 'com.yahoo.ycsb.workloads.CoreWorkload', + 'readallfields': 'true', + 'readproportion': '0.5', + 'updateproportion': '0.5', + 'scanproportion': '0', + 'insertproportion': '0', + 'requestdistribution': 'zipfian', + } + + self.assertDictEqual(expected, actual) + + +class CombineResultsTestCase(unittest.TestCase): + + def testGroupMissing(self): + r1 = ycsb_stats.YcsbResult( + groups={ + 'read': ycsb_stats._OpResult( + group='read', + statistics={'Operations': 100, 'Return=0': 100}, + data_type=ycsb_stats.HISTOGRAM, + ) + } + ) + r2 = ycsb_stats.YcsbResult( + groups={ + 'read': ycsb_stats._OpResult( + group='read', + statistics={'Operations': 96, 'Return=0': 94, 'Return=-1': 2}, + data_type=ycsb_stats.HISTOGRAM, + ), + 'update': ycsb_stats._OpResult( + group='update', + statistics={'Operations': 100, 'AverageLatency(ms)': 25}, + data_type=ycsb_stats.HISTOGRAM, + ), + } + ) + combined = ycsb_stats.CombineResults([r1, r2], 'histogram', {}) + self.assertCountEqual(['read', 'update'], combined.groups) + self.assertCountEqual( + ['Operations', 'Return=0', 'Return=-1'], + combined.groups['read'].statistics, + ) + read_stats = combined.groups['read'].statistics + self.assertEqual( + {'Operations': 196, 'Return=0': 194, 'Return=-1': 2}, read_stats + ) + + def testDropUnaggregatedFromSingleResult(self): + r = ycsb_stats.YcsbResult( + client='', + command_line='', + groups={ + 'read': ycsb_stats._OpResult( + group='read', + statistics={'AverageLatency(ms)': 21}, + data_type=ycsb_stats.HISTOGRAM, + ) + }, + ) + + r_copy = copy.deepcopy(r) + self.assertEqual(r, r_copy) + combined = ycsb_stats.CombineResults([r], 'histogram', {}) + self.assertEqual(r, r_copy) + r.groups['read'].statistics = {} + self.assertEqual(r, combined) + + +class HdrLogsParserTestCase(unittest.TestCase): + + def testParseHdrLogFile(self): + rawlog = """ + #[StartTime: 1523565997 (seconds since epoch), Thu Apr 12 20:46:37 UTC 2018] + Value Percentile TotalCount 1/(1-Percentile) + + 314.000 0.000000000000 2 1.00 + 853.000 0.100000000000 49955 1.11 + 949.000 0.200000000000 100351 1.25 + 949.000 0.210000000000 100351 1.27 + 1033.000 0.300000000000 150110 1.43 + #[Mean = 1651.145, StdDeviation = 851.707] + #[Max = 203903.000, Total count = 499019] + #[Buckets = 8, SubBuckets = 2048] + """ + actual = ycsb_stats.ParseHdrLogFile(rawlog) + expected = [ + (0.0, 0.314, 2), + (10.0, 0.853, 49953), + (20.0, 0.949, 50396), + (30.0, 1.033, 49759), + ] + self.assertEqual(actual, expected) + + +class YcsbResultTestCase(pkb_common_test_case.PkbCommonTestCase): + + def setUp(self): + super().setUp() + self.fs = fake_filesystem.FakeFilesystem() + self.fs.create_dir(TEST_DIR) + self.fake_open = fake_filesystem.FakeFileOpen(self.fs) + + def testWriteStatusTimeSeriesToFile(self): + result = ycsb_stats.YcsbResult( + status_time_series={ + 2: ycsb_stats._StatusResult( + timestamp=2, + overall_throughput=2920.0, + op_results=[ + ycsb_stats._OpResult( + group='read', + data_type='', + data=[], + statistics={ + 'Count': 2993.0, + 'Max': 346111.0, + 'Min': 5804.0, + 'Avg': 96.36988477781489, + '90': 244.555, + '99': 273.94100000000003, + '99.9': 288.693, + '99.99': 308.661, + }, + ) + ], + ), + 3: ycsb_stats._StatusResult( + timestamp=3, + overall_throughput=24279.0, + op_results=[ + ycsb_stats._OpResult( + group='read', + data_type='', + data=[], + statistics={ + 'Count': 24211.0, + 'Max': 463359.0, + 'Min': 2824.0, + 'Avg': 23.845614725951016, + '90': 72.162, + '99': 148.974, + '99.9': 193.701, + '99.99': 263.192, + }, + ) + ], + ), + } + ) + self.enter_context( + mock.patch.object(vm_util, 'GetTempDir', return_value=TEST_DIR) + ) + self.enter_context( + mock.patch.object(ycsb_stats, 'open', side_effect=self.fake_open) + ) + + result.WriteStatusTimeSeriesToFile() + + expected_content = ( + 'time,90,99,99.9,99.99,Avg,Count,Max,Min\r\n' + '2,244.555,273.94100000000003,288.693,308.661,96.36988477781489,2993.0,346111.0,5804.0\r\n' + '3,72.162,148.974,193.701,263.192,23.845614725951016,24211.0,463359.0,2824.0\r\n' + ) + self.assertEqual( + self.fs.get_object(TEST_DIR + '/ycsb_status_output_read.csv').contents, + expected_content, + ) + + +class OpResultTestCase(pkb_common_test_case.PkbCommonTestCase): + + def testFromStatusLine(self): + sample_line = ( + '2023-06-05 16:15:54:654 10 sec: 15407 operations; 666.67 current' + ' ops/sec; est completion in 18 hours 3 minutes [READ: Count=12,' + ' Max=5607, Min=2880, Avg=4079.83, 90=5291, 99=5607, 99.9=5607,' + ' 99.99=5607] [CLEANUP: Count=16, Max=12823, Min=0, Avg=987.19, 90=916,' + ' 99=12823, 99.9=12823, 99.99=12823]' + ) + + actual = list(ycsb_stats._ParseStatusLine(sample_line)) + expected = [ + ycsb_stats._OpResult( + group='read', + statistics={ + '90': 5.291, + '99': 5.607, + '99.9': 5.607, + '99.99': 5.607, + 'Avg': 4.07983, + 'Count': 12.0, + 'Max': 5.607, + 'Min': 2.880, + }, + ), + ycsb_stats._OpResult( + group='cleanup', + statistics={ + '90': 0.916, + '99': 12.823, + '99.9': 12.823, + '99.99': 12.823, + 'Avg': 0.98719, + 'Count': 16.0, + 'Max': 12.823, + 'Min': 0.0, + }, + ), + ] + + self.assertEqual(actual, expected) + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + unittest.main() diff --git a/tests/linux_packages/ycsb_test.py b/tests/linux_packages/ycsb_test.py index de07a6b3ab..1c2a2c32cc 100644 --- a/tests/linux_packages/ycsb_test.py +++ b/tests/linux_packages/ycsb_test.py @@ -13,9 +13,7 @@ # limitations under the License. """Tests for perfkitbenchmarker.packages.ycsb.""" -import copy import logging -import os import unittest from absl import flags @@ -31,315 +29,6 @@ FLAGS = flags.FLAGS -def open_data_file(filename): - path = os.path.join(os.path.dirname(__file__), '..', 'data', filename) - with open(path) as fp: - return fp.read() - - -def _parse_and_return_time_series(filename): - content = open_data_file(filename) - return ycsb_stats.ParseResults(content, 'timeseries') - - -class SimpleResultParserTestCase(pkb_common_test_case.PkbCommonTestCase): - - def setUp(self): - super(SimpleResultParserTestCase, self).setUp() - self.contents = open_data_file('ycsb-test-run.dat') - self.results = ycsb_stats.ParseResults(self.contents, 'histogram') - - def testCommandLineSet(self): - self.assertEqual( - 'Command line: -db com.yahoo.ycsb.BasicDB -P workloads/workloada -t', - self.results.command_line, - ) - - def testClientSet(self): - self.assertEqual('YCSB Client 0.1', self.results.client) - - def testUpdateStatisticsParsed(self): - self.assertEqual( - ycsb_stats._OpResult( - group='update', - statistics={ - 'Operations': 531, - 'Return=0': 531, - 'AverageLatency(ms)': 0.0659774011299435, - 'MinLatency(ms)': 0.042, - 'MaxLatency(ms)': 0.345, - '95thPercentileLatency(ms)': 0, - '99thPercentileLatency(ms)': 0, - }, - data_type=ycsb_stats.HISTOGRAM, - data=[(0, 530), (19, 1)], - ), - self.results.groups['update'], - ) - - def testReadStatisticsParsed(self): - self.assertEqual( - ycsb_stats._OpResult( - group='read', - statistics={ - 'Operations': 469, - 'Return=0': 469, - 'AverageLatency(ms)': 0.03847761194029851, - 'MinLatency(ms)': 0.034, - 'MaxLatency(ms)': 0.102, - '95thPercentileLatency(ms)': 0, - '99thPercentileLatency(ms)': 0, - }, - data_type=ycsb_stats.HISTOGRAM, - data=[(0, 469)], - ), - self.results.groups['read'], - ) - - def testOverallStatisticsParsed(self): - self.assertEqual( - ycsb_stats._OpResult( - group='overall', - statistics={'RunTime(ms)': 80.0, 'Throughput(ops/sec)': 12500.0}, - data_type='histogram', - data=[], - ), - self.results.groups['overall'], - ) - - -class DetailedResultParserTestCase(unittest.TestCase): - - def setUp(self): - super(DetailedResultParserTestCase, self).setUp() - self.contents = open_data_file('ycsb-test-run-2.dat') - self.results = ycsb_stats.ParseResults(self.contents, 'histogram') - - def testPercentilesFromHistogram_read(self): - hist = self.results.groups['read'].data - percentiles = ycsb_stats._PercentilesFromHistogram(hist) - self.assertEqual(1, percentiles['p50']) - self.assertEqual(7, percentiles['p99']) - - def testPercentilesFromHistogram_update(self): - hist = self.results.groups['update'].data - percentiles = ycsb_stats._PercentilesFromHistogram(hist) - self.assertEqual(1, percentiles['p50']) - self.assertEqual(7, percentiles['p99']) - - -class ThroughputTimeSeriesParserTestCase( - pkb_common_test_case.PkbCommonTestCase -): - - def setUp(self): - super().setUp() - self.results_1 = _parse_and_return_time_series('ycsb-time-series.dat') - self.results_2 = _parse_and_return_time_series('ycsb-time-series-2.dat') - - def testParsedThroughputTimeSeriesIsCorrect(self): - results = _parse_and_return_time_series('ycsb-time-series.dat') - expected = { - 10: 2102.9, - 20: 2494.5, - 30: 2496.8, - 40: 2509.6, - 50: 2487.2, - 60: 2513.2, - } - self.assertEqual(results.throughput_time_series, expected) - - @flagsaver.flagsaver(ycsb_throughput_time_series=True) - def testCombinedThroughputTimeSeriesIsCorrect(self): - results_1 = _parse_and_return_time_series('ycsb-time-series.dat') - results_2 = _parse_and_return_time_series('ycsb-time-series-2.dat') - - combined = ycsb_stats.CombineResults( - result_list=[results_1, results_2], - measurement_type=ycsb_stats.TIMESERIES, - combined_hdr={}, - ) - - expected = { - 10: 4187.5, - 20: 4990.2, - 30: 4994.0, - 40: 5018.2, - 50: 4976.5, - 60: 5023.2, - } - self.assertEqual(combined.throughput_time_series, expected) - - -class BadResultParserTestCase(unittest.TestCase): - - def testBadTestRun(self): - contents = open_data_file('ycsb-test-run-3.dat') - self.assertRaises( - errors.Benchmarks.KnownIntermittentError, - ycsb_stats.ParseResults, - contents, - 'histogram', - ) - - def testErrorRate(self): - contents = open_data_file('ycsb-test-run-4.dat') - self.assertRaises( - errors.Benchmarks.RunError, - ycsb_stats.ParseResults, - contents, - 'hdrhistogram', - 0.95, - ) - - -class WeightedQuantileTestCase(unittest.TestCase): - - def testEvenlyWeightedSamples(self): - x = list(range(1, 101)) # 1-100 - weights = [1 for _ in x] - self.assertEqual(50, ycsb_stats._WeightedQuantile(x, weights, 0.50)) - self.assertEqual(75, ycsb_stats._WeightedQuantile(x, weights, 0.75)) - self.assertEqual(90, ycsb_stats._WeightedQuantile(x, weights, 0.90)) - self.assertEqual(95, ycsb_stats._WeightedQuantile(x, weights, 0.95)) - self.assertEqual(99, ycsb_stats._WeightedQuantile(x, weights, 0.99)) - self.assertEqual(100, ycsb_stats._WeightedQuantile(x, weights, 1)) - - def testLowWeight(self): - x = [1, 4] - weights = [99, 1] - for i in range(100): - self.assertEqual(1, ycsb_stats._WeightedQuantile(x, weights, i / 100.0)) - self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995)) - - def testMidWeight(self): - x = [0, 1.2, 4] - weights = [1, 98, 1] - for i in range(2, 99): - self.assertAlmostEqual( - 1.2, ycsb_stats._WeightedQuantile(x, weights, i / 100.0) - ) - self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995)) - - -class ParseWorkloadTestCase(unittest.TestCase): - - def testParsesEmptyString(self): - self.assertDictEqual({}, ycsb.ParseWorkload('')) - - def testIgnoresComment(self): - self.assertDictEqual({}, ycsb.ParseWorkload('#\n')) - self.assertDictEqual( - {}, ycsb.ParseWorkload('#recordcount = 10\n# columnfamily=cf') - ) - self.assertDictEqual( - {'recordcount': '10'}, - ycsb.ParseWorkload('#Sample!\nrecordcount = 10'), - ) - - def testParsesSampleWorkload(self): - contents = open_data_file('ycsb_workloada') - actual = ycsb.ParseWorkload(contents) - - expected = { - 'recordcount': '1000', - 'operationcount': '1000', - 'workload': 'com.yahoo.ycsb.workloads.CoreWorkload', - 'readallfields': 'true', - 'readproportion': '0.5', - 'updateproportion': '0.5', - 'scanproportion': '0', - 'insertproportion': '0', - 'requestdistribution': 'zipfian', - } - - self.assertDictEqual(expected, actual) - - -class CombineResultsTestCase(unittest.TestCase): - - def testGroupMissing(self): - r1 = ycsb_stats.YcsbResult( - groups={ - 'read': ycsb_stats._OpResult( - group='read', - statistics={'Operations': 100, 'Return=0': 100}, - data_type=ycsb_stats.HISTOGRAM, - ) - } - ) - r2 = ycsb_stats.YcsbResult( - groups={ - 'read': ycsb_stats._OpResult( - group='read', - statistics={'Operations': 96, 'Return=0': 94, 'Return=-1': 2}, - data_type=ycsb_stats.HISTOGRAM, - ), - 'update': ycsb_stats._OpResult( - group='update', - statistics={'Operations': 100, 'AverageLatency(ms)': 25}, - data_type=ycsb_stats.HISTOGRAM, - ), - } - ) - combined = ycsb_stats.CombineResults([r1, r2], 'histogram', {}) - self.assertCountEqual(['read', 'update'], combined.groups) - self.assertCountEqual( - ['Operations', 'Return=0', 'Return=-1'], - combined.groups['read'].statistics, - ) - read_stats = combined.groups['read'].statistics - self.assertEqual( - {'Operations': 196, 'Return=0': 194, 'Return=-1': 2}, read_stats - ) - - def testDropUnaggregatedFromSingleResult(self): - r = ycsb_stats.YcsbResult( - client='', - command_line='', - groups={ - 'read': ycsb_stats._OpResult( - group='read', - statistics={'AverageLatency(ms)': 21}, - data_type=ycsb_stats.HISTOGRAM, - ) - }, - ) - - r_copy = copy.deepcopy(r) - self.assertEqual(r, r_copy) - combined = ycsb_stats.CombineResults([r], 'histogram', {}) - self.assertEqual(r, r_copy) - r.groups['read'].statistics = {} - self.assertEqual(r, combined) - - -class HdrLogsParserTestCase(unittest.TestCase): - - def testParseHdrLogFile(self): - rawlog = """ - #[StartTime: 1523565997 (seconds since epoch), Thu Apr 12 20:46:37 UTC 2018] - Value Percentile TotalCount 1/(1-Percentile) - - 314.000 0.000000000000 2 1.00 - 853.000 0.100000000000 49955 1.11 - 949.000 0.200000000000 100351 1.25 - 949.000 0.210000000000 100351 1.27 - 1033.000 0.300000000000 150110 1.43 - #[Mean = 1651.145, StdDeviation = 851.707] - #[Max = 203903.000, Total count = 499019] - #[Buckets = 8, SubBuckets = 2048] - """ - actual = ycsb_stats.ParseHdrLogFile(rawlog) - expected = [ - (0.0, 0.314, 2), - (10.0, 0.853, 49953), - (20.0, 0.949, 50396), - (30.0, 1.033, 49759), - ] - self.assertEqual(actual, expected) - - class PrerequisitesTestCase(pkb_common_test_case.PkbCommonTestCase): @parameterized.named_parameters(