From ff642e13277954890c877bac85a7f2441a8445b3 Mon Sep 17 00:00:00 2001
From: Brandon Liu <liubrandon@google.com>
Date: Thu, 8 Jun 2023 10:32:07 -0700
Subject: [PATCH] Support complex timeseries output in YCSB that summarizes
 per-second stats.

PiperOrigin-RevId: 538826247
---
 perfkitbenchmarker/linux_packages/ycsb.py     |  36 +-
 .../linux_packages/ycsb_stats.py              | 351 +++++++++++--
 requirements-testing.txt                      |   1 +
 tests/data/ycsb-time-series-2.dat             |  96 ++--
 tests/data/ycsb-time-series.dat               |  96 ++--
 tests/linux_packages/linux_boot_test.py       |   9 +-
 tests/linux_packages/ycsb_stats_test.py       | 489 ++++++++++++++++++
 tests/linux_packages/ycsb_test.py             | 311 -----------
 8 files changed, 921 insertions(+), 468 deletions(-)
 create mode 100644 tests/linux_packages/ycsb_stats_test.py

diff --git a/perfkitbenchmarker/linux_packages/ycsb.py b/perfkitbenchmarker/linux_packages/ycsb.py
index 5988894a49..d77247c5a6 100644
--- a/perfkitbenchmarker/linux_packages/ycsb.py
+++ b/perfkitbenchmarker/linux_packages/ycsb.py
@@ -300,7 +300,8 @@
 )
 
 # Status line pattern
-_STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+.\d+) current ops\/sec'
+_STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+(\.\d+)?) current ops\/sec'
+_STATUS_GROUPS_PATTERN = r'\[(.+?): (.+?)\]'
 # Status interval default is 10 sec, change to 1 sec.
 _STATUS_INTERVAL_SEC = 1
 
@@ -444,6 +445,18 @@ def CheckPrerequisites():
         '--ycsb_incremental_load=target and not --ycsb_run_parameters.'
     )
 
+  # Both HISTOGRAM and TIMESERIES do not output latencies on a per-interval
+  # basis, so we use the more-detailed HDRHISTOGRAM.
+  if (
+      _THROUGHPUT_TIME_SERIES.value
+      and FLAGS.ycsb_measurement_type != ycsb_stats.HDRHISTOGRAM
+  ):
+    raise errors.Config.InvalidValue(
+        'Measuring a throughput histogram requires running with '
+        '--ycsb_measurement_type=HDRHISTOGRAM. Other measurement types are '
+        'unsupported unless additional parsing is added.'
+    )
+
 
 @vm_util.Retry(poll_interval=1)
 def Install(vm):
@@ -551,6 +564,8 @@ class YCSBExecutor:
       perclientparam: list.
       shardkeyspace: boolean. Default to False, indicates if clients should have
         their own keyspace.
+    burst_time_offset_sec: When running with --ycsb_burst_load, the amount of
+      seconds to offset time series measurements during the increased load.
   """
 
   FLAG_ATTRIBUTES = 'cp', 'jvm-args', 'target', 'threads'
@@ -571,6 +586,8 @@ def __init__(self, database, parameter_files=None, **kwargs):
     self.perclientparam = self.parameters.pop('perclientparam', None)
     self.shardkeyspace = self.parameters.pop('shardkeyspace', False)
 
+    self.burst_time_offset_sec = 0
+
   def _BuildCommand(self, command_name, parameter_files=None, **kwargs):
     """Builds the YCSB command line."""
     command = [YCSB_EXE, command_name, self.database]
@@ -737,7 +754,10 @@ def _Run(self, vm, **kwargs):
       vm.RemoteCommand('mkdir -p {0}'.format(hdr_files_dir))
     stdout, stderr = vm.RobustRemoteCommand(command)
     return ycsb_stats.ParseResults(
-        str(stderr + stdout), self.measurement_type, _ERROR_RATE_THRESHOLD.value
+        str(stderr + stdout),
+        self.measurement_type,
+        _ERROR_RATE_THRESHOLD.value,
+        self.burst_time_offset_sec,
     )
 
   def _RunThreaded(self, vms, **kwargs):
@@ -1066,14 +1086,22 @@ def _RunBurstMode(self, vms, workloads, run_kwargs=None):
     initial_qps = int(run_params.get('target', 0))
 
     samples = list(self.RunStaircaseLoads(vms, workloads, **run_kwargs))
+    # Attach metadata for identifying pre burst load.
+    for s in samples:
+      s.metadata['ycsb_burst_multiplier'] = 1
+
+    self.burst_time_offset_sec = FLAGS.ycsb_timelimit
 
     if _BURST_LOAD_MULTIPLIER.value == -1:
       run_params.pop('target')  # Set to unlimited
     else:
       run_params['target'] = initial_qps * _BURST_LOAD_MULTIPLIER.value
     self._SetRunParameters(run_params)
-    samples += list(self.RunStaircaseLoads(vms, workloads, **run_kwargs))
-    return samples
+    burst_samples = list(self.RunStaircaseLoads(vms, workloads, **run_kwargs))
+    for s in burst_samples:
+      s.metadata['ycsb_burst_multiplier'] = _BURST_LOAD_MULTIPLIER.value
+
+    return samples + burst_samples
 
   def _GetIncrementalQpsTargets(self, target_qps: int) -> list[int]:
     """Returns incremental QPS targets."""
diff --git a/perfkitbenchmarker/linux_packages/ycsb_stats.py b/perfkitbenchmarker/linux_packages/ycsb_stats.py
index d48f679d55..eabfa330a1 100644
--- a/perfkitbenchmarker/linux_packages/ycsb_stats.py
+++ b/perfkitbenchmarker/linux_packages/ycsb_stats.py
@@ -15,7 +15,7 @@
 
 import bisect
 import collections
-from collections.abc import Iterable, Mapping
+from collections.abc import Iterable, Iterator, Mapping
 import copy
 import csv
 import dataclasses
@@ -33,6 +33,7 @@
 from perfkitbenchmarker import linux_packages
 from perfkitbenchmarker import sample
 from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
 
 FLAGS = flags.FLAGS
 
@@ -60,6 +61,7 @@
 # Statistics with operator 'None' will be dropped.
 AGGREGATE_OPERATORS = {
     'Operations': operator.add,
+    'Count': operator.add,
     'RunTime(ms)': max,
     'Return=0': operator.add,
     'Return=-1': operator.add,
@@ -75,10 +77,19 @@
     '99thPercentileLatency(ms)': None,  # Calculated across clients.
     'MinLatency(ms)': min,
     'MaxLatency(ms)': max,
+    'Max': max,
+    'Min': min,
 }
 
+_STATUS_LATENCIES = [
+    'Avg',
+    'Max',
+    'Min',
+]
+
 # Status line pattern
-_STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+.\d+) current ops\/sec'
+_STATUS_PATTERN = r'(\d+) sec: \d+ operations; (\d+(\.\d+)?) current ops\/sec'
+_STATUS_GROUPS_PATTERN = r'\[(.+?): (.+?)\]'
 # Status interval default is 10 sec, change to 1 sec.
 _STATUS_INTERVAL_SEC = 1
 
@@ -97,14 +108,25 @@
 _HdrHistogramTuple = tuple[float, float, int]
 
 
+def _IsStatusLatencyStatistic(stat_name: str) -> bool:
+  """Returns whether a name is a latency statistic (i.e. "99.9")."""
+  return (
+      stat_name.replace('.', '', 1).isdigit() or stat_name in _STATUS_LATENCIES
+  )
+
+
 @dataclasses.dataclass
 class _OpResult:
   """Individual results for a single operation.
 
+  YCSB results are either aggregated per operation (read/update) at the end of
+  the run or output on a per-interval (i.e. second) basis during the run.
+
   Attributes:
-    group: group name (e.g., update, insert, overall)
-    statistics: dict mapping from statistic name to value
-    data_type: Corresponds to --ycsb_measurement_type.
+    group: group name (e.g. update, insert, overall)
+    statistics: dict mapping from statistic name to value (e.g. {'Count': 33})
+    data_type: Corresponds to --ycsb_measurement_type (e.g. histogram,
+      hdrhistogram, or timeseries).
     data: For HISTOGRAM/HDRHISTOGRAM: list of (ms_lower_bound, count) tuples,
       e.g. [(0, 530), (19, 1)] indicates that 530 ops took between 0ms and 1ms,
       and 1 took between 19ms and 20ms. Empty bins are not reported. For
@@ -116,6 +138,100 @@ class _OpResult:
   data: list[tuple[int, float]] = dataclasses.field(default_factory=list)
   statistics: dict[str, float] = dataclasses.field(default_factory=dict)
 
+  @classmethod
+  def FromSummaryLines(
+      cls, lines: Iterable[str], operation: str, data_type: str
+  ) -> '_OpResult':
+    """Returns an _OpResult parsed from YCSB summary lines.
+
+    Example format:
+      [UPDATE], Operations, 2468054
+      [UPDATE], AverageLatency(us), 2218.8513395574005
+      [UPDATE], MinLatency(us), 554
+      [UPDATE], MaxLatency(us), 352634
+      [UPDATE], 95thPercentileLatency(ms), 4
+      [UPDATE], 99thPercentileLatency(ms), 7
+      [UPDATE], Return=0, 2468054
+
+    Args:
+      lines: An iterable of lines parsed from the YCSB summary, groouped by
+        operation type.
+      operation: The operation type that corresponds to `lines`.
+      data_type: Corresponds to --ycsb_measurement_type.
+
+    Returns:
+      An _OpResult with the parsed data.
+    """
+    result = cls(group=operation, data_type=data_type)
+    latency_unit = 'ms'
+    for _, name, val in lines:
+      name = name.strip()
+      val = val.strip()
+      # Drop ">" from ">1000"
+      if name.startswith('>'):
+        name = name[1:]
+      val = float(val) if '.' in val or 'nan' in val.lower() else int(val)
+      if name.isdigit():
+        if val:
+          if data_type == TIMESERIES and latency_unit == 'us':
+            val /= 1000.0
+          result.data.append((int(name), val))
+      else:
+        if '(us)' in name:
+          name = name.replace('(us)', '(ms)')
+          val /= 1000.0
+          latency_unit = 'us'
+        result.statistics[name] = val
+    return result
+
+  @classmethod
+  def FromStatusLine(cls, match: re.Match[str]) -> '_OpResult':
+    """Returns an _OpResult from a _STATUS_GROUPS_PATTERN match.
+
+    Example format:
+    [READ: Count=33, Max=11487, Min=2658, Avg=4987.36,
+    90=8271, 99=11487, 99.9=11487, 99.99=11487]
+
+    Args:
+      match: Match object that matches _STATUS_GROUPS_PATTERN.
+
+    Returns:
+      An _OpResult object with group and statistics.
+    """
+    operation_name = match.group(1).lower()
+    statistics = {}
+    for pair in match.group(2).split(', '):
+      k, v = pair.split('=')
+      # Sometimes output can look like "Avg=".
+      v = 0 if not v else float(v)
+      if _IsStatusLatencyStatistic(k):
+        v /= 1000.0
+      statistics[k] = float(v)
+    return cls(group=operation_name, statistics=statistics)
+
+
+@dataclasses.dataclass
+class _StatusResult:
+  """Represents YCSB results at a given timestamp.
+
+  Example format:
+
+  254 sec: 6149469 operations; 5897 current ops/sec; est completion in 11 hours
+  24 minutes [READ: Count=5887, Max=4259839, Min=2514, Avg=63504.23, 90=3863,
+  99=3848191, 99.9=4161535, 99.99=4243455] [READ-FAILED: Count=11, Max=4040703,
+  Min=3696640, Avg=3836369.45, 90=4005887, 99=4040703, 99.9=4040703,
+  99.99=4040703]
+
+   Attributes:
+     timestamp: The time (in seconds) since the start of the test.
+     overall_throughput: Average QPS.
+     op_results: list of _OpResult.
+  """
+
+  timestamp: int
+  overall_throughput: float
+  op_results: list[_OpResult] = dataclasses.field(default_factory=list)
+
 
 @dataclasses.dataclass
 class YcsbResult:
@@ -124,17 +240,75 @@ class YcsbResult:
   Attributes:
     client: Contains YCSB version information.
     command_line: Command line executed.
-    throughput_time_series: Time series of throughputs (interval, QPS).
-    groups: dict of operation group name to results for that operation.
+    status_time_series: Granular time series (see _StatusResult).
+    groups: Summary dict of operation group name to results for that operation.
   """
 
   client: str = ''
   command_line: str = ''
-  throughput_time_series: _ThroughputTimeSeries = dataclasses.field(
+  status_time_series: dict[int, _StatusResult] = dataclasses.field(
       default_factory=dict
   )
   groups: dict[str, _OpResult] = dataclasses.field(default_factory=dict)
 
+  def SplitStatusTimeSeriesForSamples(
+      self,
+  ) -> dict[str, dict[str, list[tuple[int, float]]]]:
+    """Yields individual time series by operation type (i.e. read/update)."""
+    time_series_by_op_and_stat = collections.defaultdict(
+        lambda: collections.defaultdict(list)
+    )
+    status_results = sorted(self.status_time_series.items())
+    for timestamp, status_result in status_results:
+      for op_result in status_result.op_results:
+        for stat, value in op_result.statistics.items():
+          time_series_by_op_and_stat[op_result.group][stat].append(
+              (timestamp, value)
+          )
+
+    return time_series_by_op_and_stat
+
+  def _GetStatsToWrite(self) -> list[str]:
+    stats_to_write = set()
+    for _, status_result in sorted(self.status_time_series.items()):
+      for op_result in status_result.op_results:
+        stats_to_write.update(
+            [
+                stat
+                for stat in op_result.statistics.keys()
+                if _IsStatusLatencyStatistic(stat) or stat == 'Count'
+            ]
+        )
+    return list(stats_to_write)
+
+  def WriteStatusTimeSeriesToFile(self) -> None:
+    """Writes time series for each operation to separate file in tempdir."""
+    stats_to_write = ['time'] + sorted(self._GetStatsToWrite())
+    written_headers = []
+    for timestamp, status_result in sorted(self.status_time_series.items()):
+      for op_result in status_result.op_results:
+        output_file = vm_util.PrependTempDir(
+            f'ycsb_status_output_{op_result.group}.csv'
+        )
+        filtered_dict = {
+            k: v
+            for (k, v) in op_result.statistics.items()
+            if k in stats_to_write
+        }
+        filtered_dict['time'] = timestamp
+        with open(output_file, 'a+', newline='') as f:
+          writer = csv.DictWriter(f, fieldnames=stats_to_write)
+          if op_result.group not in written_headers:
+            writer.writeheader()
+            written_headers.append(op_result.group)
+          writer.writerow(filtered_dict)
+
+
+def _ParseStatusLine(line: str) -> Iterator[_OpResult]:
+  """Returns a list of _OpResults from granular YCSB status output."""
+  matches = re.finditer(_STATUS_GROUPS_PATTERN, line)
+  return (_OpResult.FromStatusLine(match) for match in matches)
+
 
 def _ValidateErrorRate(result: YcsbResult, threshold: float) -> None:
   """Raises an error if results contains entries with too high error rate.
@@ -180,6 +354,7 @@ def ParseResults(
     ycsb_result_string: str,
     data_type: str = 'histogram',
     error_rate_threshold: float = 1.0,
+    timestamp_offset_sec: int = 0,
 ) -> 'YcsbResult':
   """Parse YCSB results.
 
@@ -262,6 +437,9 @@ def ParseResults(
       similarly.
     error_rate_threshold: Error statistics in the output should not exceed this
       ratio.
+    timestamp_offset_sec: The number of seconds to offset the timestamp by for
+      runs measuring the status time series. Useful for if there are multiple
+      runs back-to-back.
 
   Returns:
     A YcsbResult object that contains the results from parsing YCSB output.
@@ -281,7 +459,7 @@ def ParseResults(
   lines = []
   client_string = 'YCSB'
   command_line = 'unknown'
-  throughput_time_series = {}
+  status_time_series = {}
   fp = io.StringIO(ycsb_result_string)
   result_string = next(fp).strip()
 
@@ -297,9 +475,12 @@ def IsHeadOfResults(line):
     match = re.search(_STATUS_PATTERN, result_string)
     if match is not None:
       timestamp, qps = int(match.group(1)), float(match.group(2))
+      timestamp += timestamp_offset_sec
       # Repeats in the printed status are erroneous, ignore.
-      if timestamp not in throughput_time_series:
-        throughput_time_series[timestamp] = qps
+      if timestamp not in status_time_series:
+        status_time_series[timestamp] = _StatusResult(
+            timestamp, qps, list(_ParseStatusLine(result_string))
+        )
     try:
       result_string = next(fp).strip()
     except StopIteration:
@@ -328,37 +509,16 @@ def LineFilter(line):
   result = YcsbResult(
       client=client_string,
       command_line=command_line,
-      throughput_time_series=throughput_time_series,
+      status_time_series=status_time_series,
   )
 
   for operation, lines in by_operation:
     operation = operation[1:-1].lower()
-
     if operation == 'cleanup':
       continue
-
-    op_result = _OpResult(group=operation, data_type=data_type)
-    latency_unit = 'ms'
-    for _, name, val in lines:
-      name = name.strip()
-      val = val.strip()
-      # Drop ">" from ">1000"
-      if name.startswith('>'):
-        name = name[1:]
-      val = float(val) if '.' in val or 'nan' in val.lower() else int(val)
-      if name.isdigit():
-        if val:
-          if data_type == TIMESERIES and latency_unit == 'us':
-            val /= 1000.0
-          op_result.data.append((int(name), val))
-      else:
-        if '(us)' in name:
-          name = name.replace('(us)', '(ms)')
-          val /= 1000.0
-          latency_unit = 'us'
-        op_result.statistics[name] = val
-
-    result.groups[operation] = op_result
+    result.groups[operation] = _OpResult.FromSummaryLines(
+        lines, operation, data_type
+    )
   _ValidateErrorRate(result, error_rate_threshold)
   return result
 
@@ -592,10 +752,79 @@ def _CombineLatencyTimeSeries(
       combined_weights[timestamp] += 1.0
     return result
 
-  def _CombineThroughputTimeSeries(
-      series1: _ThroughputTimeSeries, series2: _ThroughputTimeSeries
-  ) -> _ThroughputTimeSeries:
-    """Returns a combined dict of [timestamp, total QPS] from the two series."""
+  def _CombineStatistics(result1: _OpResult, result2: _OpResult) -> _OpResult:
+    """Combines reported statistics.
+
+    If no combining operator is defined, the statistic is skipped.
+    Otherwise, the aggregated value is either:
+    * The value in 'indiv', if the statistic is not present in 'result' or
+    * AGGREGATE_OPERATORS[statistic](result_value, indiv_value)
+
+    Args:
+      result1: First _OpResult to combine.
+      result2: Second _OpResult to combine.
+
+    Returns:
+      A combined _OpResult.
+    """
+    combined = copy.deepcopy(result1)
+    for k, v in result2.statistics.items():
+      # Numeric keys are latencies
+      if k not in AGGREGATE_OPERATORS and not _IsStatusLatencyStatistic(k):
+        continue
+      # Drop if not an aggregated statistic.
+      elif not _IsStatusLatencyStatistic(k) and AGGREGATE_OPERATORS[k] is None:
+        combined.statistics.pop(k, None)
+        continue
+      # Copy over if not already in aggregate.
+      elif k not in combined.statistics:
+        combined.statistics[k] = copy.deepcopy(v)
+        continue
+
+      # Different cases for average latency and numeric latency when reporting a
+      # status time series. Provide the average of percentile latencies since we
+      # can't accurately calculate it.
+      if k == 'Avg':
+        s1, s2 = result1.statistics, result2.statistics
+        count = s1['Count'] + s2['Count']
+        new_avg = (
+            (s2['Count'] * s2['Avg'] + s1['Count'] * s1['Avg']) / count
+            if count
+            else 0
+        )
+        combined.statistics['Avg'] = new_avg
+        continue
+      # Cases where the stat is a latency i.e. 99, 99.9, 99.99.
+      elif k.replace('.', '', 1).isdigit():
+        s1, s2 = result1.statistics, result2.statistics
+        new_avg = (s1[k] + s2[k]) / 2
+        combined.statistics[k] = new_avg
+        continue
+
+      op = AGGREGATE_OPERATORS[k]
+      combined.statistics[k] = op(combined.statistics[k], v)
+    return combined
+
+  def _CombineOpResultLists(
+      list1: Iterable[_OpResult], list2: Iterable[_OpResult]
+  ) -> list[_OpResult]:
+    """Combines two lists of _OpResult into a single list."""
+    list1_by_operation = {result.group: result for result in list1}
+    list2_by_operation = {result.group: result for result in list2}
+    result = copy.deepcopy(list1_by_operation)
+    for operation in list2_by_operation:
+      if operation not in result:
+        result[operation] = copy.deepcopy(list2_by_operation[operation])
+      else:
+        result[operation] = _CombineStatistics(
+            result[operation], list2_by_operation[operation]
+        )
+    return list(result.values())
+
+  def _CombineStatusTimeSeries(
+      series1: Mapping[int, _StatusResult], series2: Mapping[int, _StatusResult]
+  ) -> dict[int, _StatusResult]:
+    """Returns a combined dict of [timestamp, result] from the two series."""
     timestamps1 = set(series1)
     timestamps2 = set(series2)
     all_timestamps = timestamps1 | timestamps2
@@ -610,7 +839,18 @@ def _CombineThroughputTimeSeries(
       )
     result = {}
     for timestamp in all_timestamps:
-      result[timestamp] = series1.get(timestamp, 0) + series2.get(timestamp, 0)
+      combined_status_result = _StatusResult(timestamp, 0)
+      status1 = series1.get(timestamp, _StatusResult(timestamp, 0))
+      status2 = series2.get(timestamp, _StatusResult(timestamp, 0))
+      # Add overall throughputs
+      combined_status_result.overall_throughput = (
+          status1.overall_throughput + status2.overall_throughput
+      )
+      # Combine statistics via operators.
+      combined_status_result.op_results = _CombineOpResultLists(
+          status1.op_results, status2.op_results
+      )
+      result[timestamp] = combined_status_result
     return result
 
   result_list = list(result_list)
@@ -665,10 +905,8 @@ def _CombineThroughputTimeSeries(
         )
     result.client = ' '.join((result.client, indiv.client))
     result.command_line = ';'.join((result.command_line, indiv.command_line))
-
-    # if _THROUGHPUT_TIME_SERIES.value:
-    result.throughput_time_series = _CombineThroughputTimeSeries(
-        result.throughput_time_series, indiv.throughput_time_series
+    result.status_time_series = _CombineStatusTimeSeries(
+        result.status_time_series, indiv.status_time_series
     )
 
   if measurement_type == HDRHISTOGRAM:
@@ -767,14 +1005,21 @@ def CreateSamples(
     base_metadata['command_line'] = command_line
   base_metadata.update(kwargs)
 
-  throughput_time_series = ycsb_result.throughput_time_series
-  if throughput_time_series:
-    yield sample.Sample(
-        'Throughput Time Series',
-        0,
-        '',
-        {'throughput_time_series': sorted(throughput_time_series.items())},
-    )
+  if ycsb_result.status_time_series:
+    for (
+        operation,
+        time_series_by_stat,
+    ) in ycsb_result.SplitStatusTimeSeriesForSamples().items():
+      for stat, time_series in time_series_by_stat.items():
+        timestamps, values = zip(*time_series)
+        yield sample.CreateTimeSeriesSample(
+            values=values,
+            timestamps=timestamps,
+            metric=f'{operation} {stat} time series',
+            units='ms' if _IsStatusLatencyStatistic(stat) else 'ops/sec',
+            interval=1.0,
+        )
+    ycsb_result.WriteStatusTimeSeriesToFile()
 
   for group_name, group in ycsb_result.groups.items():
     meta = base_metadata.copy()
diff --git a/requirements-testing.txt b/requirements-testing.txt
index d339353b39..5671ff0616 100644
--- a/requirements-testing.txt
+++ b/requirements-testing.txt
@@ -36,3 +36,4 @@ freezegun
 boto3
 google-cloud-pubsub
 requests-mock
+pyfakefs
diff --git a/tests/data/ycsb-time-series-2.dat b/tests/data/ycsb-time-series-2.dat
index b0616c9596..b1ddc94e93 100644
--- a/tests/data/ycsb-time-series-2.dat
+++ b/tests/data/ycsb-time-series-2.dat
@@ -1,50 +1,48 @@
-2022-10-01 01:23:57:907 0 sec: 0 operations; est completion in 0 second
-2022-10-01 01:24:07:891 10 sec: 20846 operations; 2084.6 current ops/sec; est completion in 5 days 13 hours [READ AverageLatency(us)=22365.65]
-2022-10-01 01:24:17:891 20 sec: 45803 operations; 2495.7 current ops/sec; est completion in 5 days 1 hour [READ AverageLatency(us)=20884.12]
-2022-10-01 01:24:27:891 30 sec: 70775 operations; 2497.2 current ops/sec; est completion in 4 days 21 hours [READ AverageLatency(us)=17488]
-2022-10-01 01:24:37:891 40 sec: 95861 operations; 2508.6 current ops/sec; est completion in 4 days 19 hours [READ AverageLatency(us)=22344.81]
-2022-10-01 01:24:47:891 50 sec: 120757 operations; 2489.3 current ops/sec; est completion in 4 days 19 hours [READ AverageLatency(us)=19458.49]
-2022-10-01 01:24:57:891 60 sec: 145857 operations; 2510 current ops/sec; est completion in 4 days 18 hours [READ AverageLatency(us)=14855.12]
-2022-10-01 01:24:57:988 60 sec: 145947 operations; 937.5 current ops/sec; est completion in 4 days 18 hours [READ AverageLatency(us)=10245.58] [CLEANUP AverageLatency(us)=68.07]
-[OVERALL], RunTime(ms), 60095
-[OVERALL], Throughput(ops/sec), 2428.604709210417
-[TOTAL_GCS_PS_Scavenge], Count, 16
-[TOTAL_GC_TIME_PS_Scavenge], Time(ms), 107
-[TOTAL_GC_TIME_%_PS_Scavenge], Time(%), 0.17805141858723686
+2023-06-05 16:15:44:641 0 sec: 0 operations; est completion in 106751991167300 days 15 hours
+2023-06-05 16:15:44,655 WARN  [Thread-8] util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2023-06-05 16:15:44,979 INFO  [Thread-8] hbase1_x.BigtableConnection: Using bigtable-hbase client from jar file:/opt/pkb/ycsb/hbase12-binding/lib/bigtable-hbase-1.x-2.5.0.jar. Version: 2.5.0
+2023-06-05 16:15:45:636 1 sec: 0 operations; est completion in 106751991167300 days 15 hours
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+2023-06-05 16:15:46:636 2 sec: 18 operations; 18 current ops/sec; est completion in 128 days 15 hours [READ: Count=18, Max=239231, Min=34592, Avg=137007.11, 90=235903, 99=239231, 99.9=239231, 99.99=239231]
+2023-06-05 16:15:47:636 3 sec: 1200 operations; 1182 current ops/sec; est completion in 2 days 21 hours [READ: Count=1184, Max=76735, Min=2574, Avg=6996.8, 90=10471, 99=48767, 99.9=68863, 99.99=76735]
+2023-06-05 16:15:48:636 4 sec: 3045 operations; 1845 current ops/sec; est completion in 1 day 12 hours [READ: Count=1844, Max=14311, Min=2522, Avg=4327.71, 90=5723, 99=10023, 99.9=10967, 99.99=14311]
+2023-06-05 16:15:49:636 5 sec: 5029 operations; 1984 current ops/sec; est completion in 1 day 3 hours [READ: Count=1983, Max=16303, Min=2416, Avg=4013.38, 90=4871, 99=9623, 99.9=13543, 99.99=16303]
+2023-06-05 16:15:50:636 6 sec: 7068 operations; 2039 current ops/sec; est completion in 23 hours 34 minutes [READ: Count=2039, Max=12839, Min=2296, Avg=3923.2, 90=4887, 99=9087, 99.9=11575, 99.99=12839]
+2023-06-05 16:15:51:636 7 sec: 9133 operations; 2065 current ops/sec; est completion in 21 hours 17 minutes [READ: Count=2067, Max=32991, Min=2356, Avg=3866.79, 90=4643, 99=9295, 99.9=13887, 99.99=32991]
+2023-06-05 16:15:52:636 8 sec: 11157 operations; 2024 current ops/sec; est completion in 19 hours 55 minutes [READ: Count=2023, Max=45439, Min=2320, Avg=3946.67, 90=4731, 99=9295, 99.9=25439, 99.99=45439]
+2023-06-05 16:15:53:636 9 sec: 13279 operations; 2122 current ops/sec; est completion in 18 hours 49 minutes [READ: Count=2122, Max=16703, Min=2158, Avg=3765.34, 90=4607, 99=8623, 99.9=14175, 99.99=16703]
+Maximum time elapsed. Requesting stop for the workload.
+Stop requested for workload. Now Joining!
+2023-06-05 16:15:54:636 10 sec: 15395 operations; 2116 current ops/sec; est completion in 18 hours 2 minutes [READ: Count=2115, Max=52767, Min=2178, Avg=3776.73, 90=4563, 99=7619, 99.9=37087, 99.99=52767]
+2023-06-05 16:15:54:654 10 sec: 15407 operations; 666.67 current ops/sec; est completion in 18 hours 3 minutes [READ: Count=12, Max=5607, Min=2880, Avg=4079.83, 90=5291, 99=5607, 99.9=5607, 99.99=5607] [CLEANUP: Count=16, Max=12823, Min=0, Avg=987.19, 90=916, 99=12823, 99.9=12823, 99.99=12823]
+[OVERALL], RunTime(ms), 10018
+[OVERALL], Throughput(ops/sec), 1537.9317228987823
+[TOTAL_GCS_PS_Scavenge], Count, 4
+[TOTAL_GC_TIME_PS_Scavenge], Time(ms), 33
+[TOTAL_GC_TIME_%_PS_Scavenge], Time(%), 0.329407067278898
 [TOTAL_GCS_PS_MarkSweep], Count, 2
-[TOTAL_GC_TIME_PS_MarkSweep], Time(ms), 57
-[TOTAL_GC_TIME_%_PS_MarkSweep], Time(%), 0.09484982111656544
-[TOTAL_GCs], Count, 18
-[TOTAL_GC_TIME], Time(ms), 164
-[TOTAL_GC_TIME_%], Time(%), 0.2729012397038023
-[READ], Operations, 145947
-[READ], AverageLatency(us), 19479.12810814885
-[READ], MinLatency(us), 2589
-[READ], MaxLatency(us), 924425
-[READ], Return=OK, 145947
-[READ], 0, 41983.64836255375
-[READ], 1000, 25742.023255813954
-[READ], 2000, 22715.42782468838
-[READ], 3000, 18937.807952286283
-[READ], 4000, 14030.440746753247
-[READ], 5000, 15569.699881376038
-[READ], 6000, 19632.656532456862
-[READ], 7000, 17519.806401249025
-[READ], 8000, 12427.848152659359
-[READ], 9000, 13402.123028391166
-[READ], 10000, 14583.187627187628
-[READ], 11000, 19648.792601338057
-[READ], 12000, 21465.837489943686
-[READ], 13000, 35111.63052011776
-[READ], 14000, 40859.85603244339
-[READ], 15000, 19755.08925750395
-[READ], 16000, 14847.317745803357
-[READ], 17000, 14418.057306590257
-[READ], 18000, 10584.808112324494
-[READ], 19000, 13655.13615980432
-[READ], 20000, 16421.41550139832
-[READ], 21000, 17593.828355236958
-[READ], 22000, 15556.543219666932
-[READ], 23000, 11649.72311827957
-[READ], 24000, 30852.80266474613
-[READ], 25000, 15630.590407470288
+[TOTAL_GC_TIME_PS_MarkSweep], Time(ms), 48
+[TOTAL_GC_TIME_%_PS_MarkSweep], Time(%), 0.4791375524056698
+[TOTAL_GCs], Count, 6
+[TOTAL_GC_TIME], Time(ms), 81
+[TOTAL_GC_TIME_%], Time(%), 0.8085446196845678
+[READ], Operations, 15407
+[READ], AverageLatency(us), 4328.689751411696
+[READ], MinLatency(us), 2158
+[READ], MaxLatency(us), 239231
+[READ], 95thPercentileLatency(us), 8375
+[READ], 99thPercentileLatency(us), 10927
+[READ], Return=OK, 15407
+[CLEANUP], Operations, 16
+[CLEANUP], AverageLatency(us), 987.1875
+[CLEANUP], MinLatency(us), 0
+[CLEANUP], MaxLatency(us), 12823
+[CLEANUP], 95thPercentileLatency(us), 1410
+[CLEANUP], 99thPercentileLatency(us), 12823
diff --git a/tests/data/ycsb-time-series.dat b/tests/data/ycsb-time-series.dat
index 6b8ed509ec..dec198a07c 100644
--- a/tests/data/ycsb-time-series.dat
+++ b/tests/data/ycsb-time-series.dat
@@ -1,50 +1,48 @@
-2022-10-01 01:23:57:917 0 sec: 0 operations; est completion in 0 second
-2022-10-01 01:24:07:902 10 sec: 21029 operations; 2102.9 current ops/sec; est completion in 5 days 12 hours [READ AverageLatency(us)=22779.72]
-2022-10-01 01:24:17:902 20 sec: 45974 operations; 2494.5 current ops/sec; est completion in 5 days [READ AverageLatency(us)=21093.79]
-2022-10-01 01:24:27:902 30 sec: 70942 operations; 2496.8 current ops/sec; est completion in 4 days 21 hours [READ AverageLatency(us)=17638.34]
-2022-10-01 01:24:37:902 40 sec: 96038 operations; 2509.6 current ops/sec; est completion in 4 days 19 hours [READ AverageLatency(us)=22447.92]
-2022-10-01 01:24:47:902 50 sec: 120910 operations; 2487.2 current ops/sec; est completion in 4 days 18 hours [READ AverageLatency(us)=19622.87]
-2022-10-01 01:24:57:902 60 sec: 146042 operations; 2513.2 current ops/sec; est completion in 4 days 18 hours [READ AverageLatency(us)=14863.78]
-2022-10-01 01:24:57:997 60 sec: 146134 operations; 978.72 current ops/sec; est completion in 4 days 18 hours [READ AverageLatency(us)=7788.22] [CLEANUP AverageLatency(us)=72.19]
-[OVERALL], RunTime(ms), 60093
-[OVERALL], Throughput(ops/sec), 2431.7973807265407
-[TOTAL_GCS_PS_Scavenge], Count, 16
-[TOTAL_GC_TIME_PS_Scavenge], Time(ms), 104
-[TOTAL_GC_TIME_%_PS_Scavenge], Time(%), 0.17306508245552726
+2023-06-05 16:13:47:463 0 sec: 0 operations; est completion in 0 second
+2023-06-05 16:13:47,477 WARN  [Thread-8] util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+2023-06-05 16:13:47,812 INFO  [Thread-8] hbase1_x.BigtableConnection: Using bigtable-hbase client from jar file:/opt/pkb/ycsb/hbase12-binding/lib/bigtable-hbase-1.x-2.5.0.jar. Version: 2.5.0
+2023-06-05 16:13:48:459 1 sec: 0 operations; est completion in 106751991167300 days 15 hours
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+DBWrapper: report latency for each error is false and specific error codes to track for latency are: []
+2023-06-05 16:13:49:458 2 sec: 19 operations; 19 current ops/sec; est completion in 121 days 19 hours [READ: Count=19, Max=222207, Min=27504, Avg=122345.26, 90=218751, 99=222207, 99.9=222207, 99.99=222207]
+2023-06-05 16:13:50:459 3 sec: 1373 operations; 1354 current ops/sec; est completion in 2 days 12 hours [READ: Count=1354, Max=74431, Min=2800, Avg=6083.69, 90=9975, 99=22559, 99.9=70911, 99.99=74431]
+2023-06-05 16:13:51:459 4 sec: 3310 operations; 1937 current ops/sec; est completion in 1 day 9 hours [READ: Count=1938, Max=33535, Min=2442, Avg=4112.58, 90=5243, 99=9719, 99.9=11695, 99.99=33535]
+2023-06-05 16:13:52:458 5 sec: 5448 operations; 2138 current ops/sec; est completion in 1 day 1 hour [READ: Count=2137, Max=54559, Min=2342, Avg=3734.04, 90=4343, 99=9551, 99.9=33983, 99.99=54559]
+2023-06-05 16:13:53:458 6 sec: 7638 operations; 2190 current ops/sec; est completion in 21 hours 49 minutes [READ: Count=2190, Max=56159, Min=2212, Avg=3645.48, 90=4247, 99=8959, 99.9=20911, 99.99=56159]
+2023-06-05 16:13:54:459 7 sec: 9860 operations; 2222 current ops/sec; est completion in 19 hours 43 minutes [READ: Count=2223, Max=22191, Min=2348, Avg=3592.64, 90=4211, 99=8303, 99.9=11495, 99.99=22191]
+2023-06-05 16:13:55:459 8 sec: 12095 operations; 2235 current ops/sec; est completion in 18 hours 22 minutes [READ: Count=2237, Max=11575, Min=2304, Avg=3576.38, 90=4299, 99=7523, 99.9=9247, 99.99=11575]
+2023-06-05 16:13:56:458 9 sec: 14372 operations; 2277 current ops/sec; est completion in 17 hours 23 minutes [READ: Count=2275, Max=47455, Min=2336, Avg=3507.24, 90=4163, 99=7427, 99.9=17535, 99.99=47455]
+Maximum time elapsed. Requesting stop for the workload.
+Stop requested for workload. Now Joining!
+2023-06-05 16:13:57:458 10 sec: 16688 operations; 2316 current ops/sec; est completion in 16 hours 38 minutes [READ: Count=2315, Max=40511, Min=2350, Avg=3452.71, 90=4131, 99=7239, 99.9=10175, 99.99=40511]
+2023-06-05 16:13:57:477 10 sec: 16699 operations; 611.11 current ops/sec; est completion in 16 hours 39 minutes [READ: Count=11, Max=4735, Min=2828, Avg=3426.55, 90=3839, 99=4735, 99.9=4735, 99.99=4735] [CLEANUP: Count=16, Max=13327, Min=0, Avg=1351.94, 90=1597, 99=13327, 99.9=13327, 99.99=13327]
+[OVERALL], RunTime(ms), 10018
+[OVERALL], Throughput(ops/sec), 1666.8995807546416
+[TOTAL_GCS_PS_Scavenge], Count, 5
+[TOTAL_GC_TIME_PS_Scavenge], Time(ms), 37
+[TOTAL_GC_TIME_%_PS_Scavenge], Time(%), 0.36933519664603714
 [TOTAL_GCS_PS_MarkSweep], Count, 2
-[TOTAL_GC_TIME_PS_MarkSweep], Time(ms), 52
-[TOTAL_GC_TIME_%_PS_MarkSweep], Time(%), 0.08653254122776363
-[TOTAL_GCs], Count, 18
-[TOTAL_GC_TIME], Time(ms), 156
-[TOTAL_GC_TIME_%], Time(%), 0.2595976236832909
-[READ], Operations, 146134
-[READ], AverageLatency(us), 19648.40244570052
-[READ], MinLatency(us), 2419
-[READ], MaxLatency(us), 926968
-[READ], Return=OK, 146134
-[READ], 0, 42307.65222970805
-[READ], 1000, 25891.854741896757
-[READ], 2000, 22558.006075334142
-[READ], 3000, 19565.929796569606
-[READ], 4000, 14591.428852975956
-[READ], 5000, 16380.601777059774
-[READ], 6000, 20572.916465863455
-[READ], 7000, 17695.34032449545
-[READ], 8000, 12106.920532472772
-[READ], 9000, 13669.89244760775
-[READ], 10000, 15021.342295345105
-[READ], 11000, 20345.75170204245
-[READ], 12000, 20804.036348949918
-[READ], 13000, 31959.07740686986
-[READ], 14000, 44360.403739612186
-[READ], 15000, 19838.39495145631
-[READ], 16000, 15042.471107544141
-[READ], 17000, 14904.455489021957
-[READ], 18000, 10872.696848823294
-[READ], 19000, 13736.977991196478
-[READ], 20000, 15973.014016820183
-[READ], 21000, 17668.569322709165
-[READ], 22000, 16865.471417069242
-[READ], 23000, 11306.913763066203
-[READ], 24000, 32364.298769116
-[READ], 25000, 15075.527027027027
+[TOTAL_GC_TIME_PS_MarkSweep], Time(ms), 48
+[TOTAL_GC_TIME_%_PS_MarkSweep], Time(%), 0.4791375524056698
+[TOTAL_GCs], Count, 7
+[TOTAL_GC_TIME], Time(ms), 85
+[TOTAL_GC_TIME_%], Time(%), 0.8484727490517069
+[READ], Operations, 16699
+[READ], AverageLatency(us), 3981.7847775315886
+[READ], MinLatency(us), 2212
+[READ], MaxLatency(us), 222207
+[READ], 95thPercentileLatency(us), 6171
+[READ], 99thPercentileLatency(us), 10215
+[READ], Return=OK, 16699
+[CLEANUP], Operations, 16
+[CLEANUP], AverageLatency(us), 1351.9375
+[CLEANUP], MinLatency(us), 0
+[CLEANUP], MaxLatency(us), 13327
+[CLEANUP], 95thPercentileLatency(us), 1736
+[CLEANUP], 99thPercentileLatency(us), 13327
diff --git a/tests/linux_packages/linux_boot_test.py b/tests/linux_packages/linux_boot_test.py
index cdeb44f4d5..0abb86f5ab 100644
--- a/tests/linux_packages/linux_boot_test.py
+++ b/tests/linux_packages/linux_boot_test.py
@@ -134,8 +134,13 @@ def testCollectVmToVmSamples(self):
     with open(os.path.join(self.data_dir, 'boot.output')) as f:
       boot_output = f.read()
     # Load tcpdump output, which contains egress timestamps.
-    vm_util.PrependTempDir = mock.Mock(
-        return_value=os.path.join(self.data_dir, 'tcpdump.output'))
+    self.enter_context(
+        mock.patch.object(
+            vm_util,
+            'PrependTempDir',
+            return_value=os.path.join(self.data_dir, 'tcpdump.output'),
+        )
+    )
     self.mock_vm.RemoteCommand = mock.Mock(return_value=(boot_output, ''))
     self.mock_vm.internal_ip = '10.128.0.11'
     # Compare golden samples with calculated egress/ingress samples.
diff --git a/tests/linux_packages/ycsb_stats_test.py b/tests/linux_packages/ycsb_stats_test.py
new file mode 100644
index 0000000000..3b7ca9a737
--- /dev/null
+++ b/tests/linux_packages/ycsb_stats_test.py
@@ -0,0 +1,489 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for perfkitbenchmarker.packages.ycsb."""
+
+import copy
+import logging
+import os
+import unittest
+
+from absl import flags
+from absl.testing import flagsaver
+import mock
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.linux_packages import ycsb
+from perfkitbenchmarker.linux_packages import ycsb_stats
+from tests import pkb_common_test_case
+from pyfakefs import fake_filesystem
+
+
+FLAGS = flags.FLAGS
+
+TEST_DIR = '/mock_dir'
+
+
+def open_data_file(filename):
+  path = os.path.join(os.path.dirname(__file__), '..', 'data', filename)
+  with open(path) as fp:
+    return fp.read()
+
+
+def _parse_and_return_time_series(filename):
+  content = open_data_file(filename)
+  return ycsb_stats.ParseResults(content, ycsb_stats.HDRHISTOGRAM)
+
+
+class SimpleResultParserTestCase(pkb_common_test_case.PkbCommonTestCase):
+
+  def setUp(self):
+    super(SimpleResultParserTestCase, self).setUp()
+    self.contents = open_data_file('ycsb-test-run.dat')
+    self.results = ycsb_stats.ParseResults(self.contents, 'histogram')
+
+  def testCommandLineSet(self):
+    self.assertEqual(
+        'Command line: -db com.yahoo.ycsb.BasicDB -P workloads/workloada -t',
+        self.results.command_line,
+    )
+
+  def testClientSet(self):
+    self.assertEqual('YCSB Client 0.1', self.results.client)
+
+  def testUpdateStatisticsParsed(self):
+    self.assertEqual(
+        ycsb_stats._OpResult(
+            group='update',
+            statistics={
+                'Operations': 531,
+                'Return=0': 531,
+                'AverageLatency(ms)': 0.0659774011299435,
+                'MinLatency(ms)': 0.042,
+                'MaxLatency(ms)': 0.345,
+                '95thPercentileLatency(ms)': 0,
+                '99thPercentileLatency(ms)': 0,
+            },
+            data_type=ycsb_stats.HISTOGRAM,
+            data=[(0, 530), (19, 1)],
+        ),
+        self.results.groups['update'],
+    )
+
+  def testReadStatisticsParsed(self):
+    self.assertEqual(
+        ycsb_stats._OpResult(
+            group='read',
+            statistics={
+                'Operations': 469,
+                'Return=0': 469,
+                'AverageLatency(ms)': 0.03847761194029851,
+                'MinLatency(ms)': 0.034,
+                'MaxLatency(ms)': 0.102,
+                '95thPercentileLatency(ms)': 0,
+                '99thPercentileLatency(ms)': 0,
+            },
+            data_type=ycsb_stats.HISTOGRAM,
+            data=[(0, 469)],
+        ),
+        self.results.groups['read'],
+    )
+
+  def testOverallStatisticsParsed(self):
+    self.assertEqual(
+        ycsb_stats._OpResult(
+            group='overall',
+            statistics={'RunTime(ms)': 80.0, 'Throughput(ops/sec)': 12500.0},
+            data_type='histogram',
+            data=[],
+        ),
+        self.results.groups['overall'],
+    )
+
+
+class DetailedResultParserTestCase(unittest.TestCase):
+
+  def setUp(self):
+    super(DetailedResultParserTestCase, self).setUp()
+    self.contents = open_data_file('ycsb-test-run-2.dat')
+    self.results = ycsb_stats.ParseResults(self.contents, 'histogram')
+
+  def testPercentilesFromHistogram_read(self):
+    hist = self.results.groups['read'].data
+    percentiles = ycsb_stats._PercentilesFromHistogram(hist)
+    self.assertEqual(1, percentiles['p50'])
+    self.assertEqual(7, percentiles['p99'])
+
+  def testPercentilesFromHistogram_update(self):
+    hist = self.results.groups['update'].data
+    percentiles = ycsb_stats._PercentilesFromHistogram(hist)
+    self.assertEqual(1, percentiles['p50'])
+    self.assertEqual(7, percentiles['p99'])
+
+
+class StatusTimeSeriesParserTestCase(pkb_common_test_case.PkbCommonTestCase):
+
+  def setUp(self):
+    super().setUp()
+    self.results_1 = _parse_and_return_time_series('ycsb-time-series.dat')
+    self.results_2 = _parse_and_return_time_series('ycsb-time-series-2.dat')
+
+  def testParsedStatusTimeSeriesIsCorrect(self):
+    results = _parse_and_return_time_series('ycsb-time-series.dat')
+    expected = ycsb_stats._StatusResult(
+        timestamp=10,
+        overall_throughput=2316.0,
+        op_results=[
+            ycsb_stats._OpResult(
+                group='read',
+                statistics={
+                    'Count': 2315.0,
+                    'Max': 40.511,
+                    'Min': 2.350,
+                    'Avg': 3.45271,
+                    '90': 4.131,
+                    '99': 7.239,
+                    '99.9': 10.175,
+                    '99.99': 40.511,
+                },
+            )
+        ],
+    )
+
+    self.assertEqual(results.status_time_series[10], expected)
+    self.assertLen(results.status_time_series, 9)
+
+  @flagsaver.flagsaver(ycsb_throughput_time_series=True)
+  def testCombinedStatusTimeSeriesIsCorrect(self):
+    results_1 = _parse_and_return_time_series('ycsb-time-series.dat')
+    results_2 = _parse_and_return_time_series('ycsb-time-series-2.dat')
+
+    combined = ycsb_stats.CombineResults(
+        result_list=[results_1, results_2],
+        measurement_type=ycsb_stats.TIMESERIES,
+        combined_hdr={},
+    )
+
+    actual = combined.status_time_series[10]
+    with self.subTest('Attributes'):
+      self.assertEqual(actual.timestamp, 10)
+      self.assertEqual(actual.overall_throughput, 4432.0)
+    with self.subTest('Statistics'):
+      self.assertEqual(actual.op_results[0].group, 'read')
+      expected_statistics = {
+          'Count': 4430.0,
+          'Max': 52.767,
+          'Min': 2.178,
+          'Avg': 3.60740577,
+          '90': 4.347,
+          '99': 7.429,
+          '99.9': 23.631,
+          '99.99': 46.639,
+      }
+      for stat, value in expected_statistics.items():
+        self.assertAlmostEqual(actual.op_results[0].statistics[stat], value)
+    with self.subTest('NumItems'):
+      self.assertLen(combined.status_time_series, 9)
+
+
+class BadResultParserTestCase(unittest.TestCase):
+
+  def testBadTestRun(self):
+    contents = open_data_file('ycsb-test-run-3.dat')
+    self.assertRaises(
+        errors.Benchmarks.KnownIntermittentError,
+        ycsb_stats.ParseResults,
+        contents,
+        'histogram',
+    )
+
+  def testErrorRate(self):
+    contents = open_data_file('ycsb-test-run-4.dat')
+    self.assertRaises(
+        errors.Benchmarks.RunError,
+        ycsb_stats.ParseResults,
+        contents,
+        'hdrhistogram',
+        0.95,
+    )
+
+
+class WeightedQuantileTestCase(unittest.TestCase):
+
+  def testEvenlyWeightedSamples(self):
+    x = list(range(1, 101))  # 1-100
+    weights = [1 for _ in x]
+    self.assertEqual(50, ycsb_stats._WeightedQuantile(x, weights, 0.50))
+    self.assertEqual(75, ycsb_stats._WeightedQuantile(x, weights, 0.75))
+    self.assertEqual(90, ycsb_stats._WeightedQuantile(x, weights, 0.90))
+    self.assertEqual(95, ycsb_stats._WeightedQuantile(x, weights, 0.95))
+    self.assertEqual(99, ycsb_stats._WeightedQuantile(x, weights, 0.99))
+    self.assertEqual(100, ycsb_stats._WeightedQuantile(x, weights, 1))
+
+  def testLowWeight(self):
+    x = [1, 4]
+    weights = [99, 1]
+    for i in range(100):
+      self.assertEqual(1, ycsb_stats._WeightedQuantile(x, weights, i / 100.0))
+    self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995))
+
+  def testMidWeight(self):
+    x = [0, 1.2, 4]
+    weights = [1, 98, 1]
+    for i in range(2, 99):
+      self.assertAlmostEqual(
+          1.2, ycsb_stats._WeightedQuantile(x, weights, i / 100.0)
+      )
+    self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995))
+
+
+class ParseWorkloadTestCase(unittest.TestCase):
+
+  def testParsesEmptyString(self):
+    self.assertDictEqual({}, ycsb.ParseWorkload(''))
+
+  def testIgnoresComment(self):
+    self.assertDictEqual({}, ycsb.ParseWorkload('#\n'))
+    self.assertDictEqual(
+        {}, ycsb.ParseWorkload('#recordcount = 10\n# columnfamily=cf')
+    )
+    self.assertDictEqual(
+        {'recordcount': '10'},
+        ycsb.ParseWorkload('#Sample!\nrecordcount = 10'),
+    )
+
+  def testParsesSampleWorkload(self):
+    contents = open_data_file('ycsb_workloada')
+    actual = ycsb.ParseWorkload(contents)
+
+    expected = {
+        'recordcount': '1000',
+        'operationcount': '1000',
+        'workload': 'com.yahoo.ycsb.workloads.CoreWorkload',
+        'readallfields': 'true',
+        'readproportion': '0.5',
+        'updateproportion': '0.5',
+        'scanproportion': '0',
+        'insertproportion': '0',
+        'requestdistribution': 'zipfian',
+    }
+
+    self.assertDictEqual(expected, actual)
+
+
+class CombineResultsTestCase(unittest.TestCase):
+
+  def testGroupMissing(self):
+    r1 = ycsb_stats.YcsbResult(
+        groups={
+            'read': ycsb_stats._OpResult(
+                group='read',
+                statistics={'Operations': 100, 'Return=0': 100},
+                data_type=ycsb_stats.HISTOGRAM,
+            )
+        }
+    )
+    r2 = ycsb_stats.YcsbResult(
+        groups={
+            'read': ycsb_stats._OpResult(
+                group='read',
+                statistics={'Operations': 96, 'Return=0': 94, 'Return=-1': 2},
+                data_type=ycsb_stats.HISTOGRAM,
+            ),
+            'update': ycsb_stats._OpResult(
+                group='update',
+                statistics={'Operations': 100, 'AverageLatency(ms)': 25},
+                data_type=ycsb_stats.HISTOGRAM,
+            ),
+        }
+    )
+    combined = ycsb_stats.CombineResults([r1, r2], 'histogram', {})
+    self.assertCountEqual(['read', 'update'], combined.groups)
+    self.assertCountEqual(
+        ['Operations', 'Return=0', 'Return=-1'],
+        combined.groups['read'].statistics,
+    )
+    read_stats = combined.groups['read'].statistics
+    self.assertEqual(
+        {'Operations': 196, 'Return=0': 194, 'Return=-1': 2}, read_stats
+    )
+
+  def testDropUnaggregatedFromSingleResult(self):
+    r = ycsb_stats.YcsbResult(
+        client='',
+        command_line='',
+        groups={
+            'read': ycsb_stats._OpResult(
+                group='read',
+                statistics={'AverageLatency(ms)': 21},
+                data_type=ycsb_stats.HISTOGRAM,
+            )
+        },
+    )
+
+    r_copy = copy.deepcopy(r)
+    self.assertEqual(r, r_copy)
+    combined = ycsb_stats.CombineResults([r], 'histogram', {})
+    self.assertEqual(r, r_copy)
+    r.groups['read'].statistics = {}
+    self.assertEqual(r, combined)
+
+
+class HdrLogsParserTestCase(unittest.TestCase):
+
+  def testParseHdrLogFile(self):
+    rawlog = """
+      #[StartTime: 1523565997 (seconds since epoch), Thu Apr 12 20:46:37 UTC 2018]
+         Value     Percentile TotalCount 1/(1-Percentile)
+
+         314.000 0.000000000000          2           1.00
+         853.000 0.100000000000      49955           1.11
+         949.000 0.200000000000     100351           1.25
+         949.000 0.210000000000     100351           1.27
+         1033.000 0.300000000000     150110           1.43
+      #[Mean    =     1651.145, StdDeviation   =      851.707]
+      #[Max     =   203903.000, Total count    =       499019]
+      #[Buckets =            8, SubBuckets     =         2048]
+    """
+    actual = ycsb_stats.ParseHdrLogFile(rawlog)
+    expected = [
+        (0.0, 0.314, 2),
+        (10.0, 0.853, 49953),
+        (20.0, 0.949, 50396),
+        (30.0, 1.033, 49759),
+    ]
+    self.assertEqual(actual, expected)
+
+
+class YcsbResultTestCase(pkb_common_test_case.PkbCommonTestCase):
+
+  def setUp(self):
+    super().setUp()
+    self.fs = fake_filesystem.FakeFilesystem()
+    self.fs.create_dir(TEST_DIR)
+    self.fake_open = fake_filesystem.FakeFileOpen(self.fs)
+
+  def testWriteStatusTimeSeriesToFile(self):
+    result = ycsb_stats.YcsbResult(
+        status_time_series={
+            2: ycsb_stats._StatusResult(
+                timestamp=2,
+                overall_throughput=2920.0,
+                op_results=[
+                    ycsb_stats._OpResult(
+                        group='read',
+                        data_type='',
+                        data=[],
+                        statistics={
+                            'Count': 2993.0,
+                            'Max': 346111.0,
+                            'Min': 5804.0,
+                            'Avg': 96.36988477781489,
+                            '90': 244.555,
+                            '99': 273.94100000000003,
+                            '99.9': 288.693,
+                            '99.99': 308.661,
+                        },
+                    )
+                ],
+            ),
+            3: ycsb_stats._StatusResult(
+                timestamp=3,
+                overall_throughput=24279.0,
+                op_results=[
+                    ycsb_stats._OpResult(
+                        group='read',
+                        data_type='',
+                        data=[],
+                        statistics={
+                            'Count': 24211.0,
+                            'Max': 463359.0,
+                            'Min': 2824.0,
+                            'Avg': 23.845614725951016,
+                            '90': 72.162,
+                            '99': 148.974,
+                            '99.9': 193.701,
+                            '99.99': 263.192,
+                        },
+                    )
+                ],
+            ),
+        }
+    )
+    self.enter_context(
+        mock.patch.object(vm_util, 'GetTempDir', return_value=TEST_DIR)
+    )
+    self.enter_context(
+        mock.patch.object(ycsb_stats, 'open', side_effect=self.fake_open)
+    )
+
+    result.WriteStatusTimeSeriesToFile()
+
+    expected_content = (
+        'time,90,99,99.9,99.99,Avg,Count,Max,Min\r\n'
+        '2,244.555,273.94100000000003,288.693,308.661,96.36988477781489,2993.0,346111.0,5804.0\r\n'
+        '3,72.162,148.974,193.701,263.192,23.845614725951016,24211.0,463359.0,2824.0\r\n'
+    )
+    self.assertEqual(
+        self.fs.get_object(TEST_DIR + '/ycsb_status_output_read.csv').contents,
+        expected_content,
+    )
+
+
+class OpResultTestCase(pkb_common_test_case.PkbCommonTestCase):
+
+  def testFromStatusLine(self):
+    sample_line = (
+        '2023-06-05 16:15:54:654 10 sec: 15407 operations; 666.67 current'
+        ' ops/sec; est completion in 18 hours 3 minutes [READ: Count=12,'
+        ' Max=5607, Min=2880, Avg=4079.83, 90=5291, 99=5607, 99.9=5607,'
+        ' 99.99=5607] [CLEANUP: Count=16, Max=12823, Min=0, Avg=987.19, 90=916,'
+        ' 99=12823, 99.9=12823, 99.99=12823]'
+    )
+
+    actual = list(ycsb_stats._ParseStatusLine(sample_line))
+    expected = [
+        ycsb_stats._OpResult(
+            group='read',
+            statistics={
+                '90': 5.291,
+                '99': 5.607,
+                '99.9': 5.607,
+                '99.99': 5.607,
+                'Avg': 4.07983,
+                'Count': 12.0,
+                'Max': 5.607,
+                'Min': 2.880,
+            },
+        ),
+        ycsb_stats._OpResult(
+            group='cleanup',
+            statistics={
+                '90': 0.916,
+                '99': 12.823,
+                '99.9': 12.823,
+                '99.99': 12.823,
+                'Avg': 0.98719,
+                'Count': 16.0,
+                'Max': 12.823,
+                'Min': 0.0,
+            },
+        ),
+    ]
+
+    self.assertEqual(actual, expected)
+
+
+if __name__ == '__main__':
+  logging.basicConfig(level=logging.INFO)
+  unittest.main()
diff --git a/tests/linux_packages/ycsb_test.py b/tests/linux_packages/ycsb_test.py
index de07a6b3ab..1c2a2c32cc 100644
--- a/tests/linux_packages/ycsb_test.py
+++ b/tests/linux_packages/ycsb_test.py
@@ -13,9 +13,7 @@
 # limitations under the License.
 """Tests for perfkitbenchmarker.packages.ycsb."""
 
-import copy
 import logging
-import os
 import unittest
 
 from absl import flags
@@ -31,315 +29,6 @@
 FLAGS = flags.FLAGS
 
 
-def open_data_file(filename):
-  path = os.path.join(os.path.dirname(__file__), '..', 'data', filename)
-  with open(path) as fp:
-    return fp.read()
-
-
-def _parse_and_return_time_series(filename):
-  content = open_data_file(filename)
-  return ycsb_stats.ParseResults(content, 'timeseries')
-
-
-class SimpleResultParserTestCase(pkb_common_test_case.PkbCommonTestCase):
-
-  def setUp(self):
-    super(SimpleResultParserTestCase, self).setUp()
-    self.contents = open_data_file('ycsb-test-run.dat')
-    self.results = ycsb_stats.ParseResults(self.contents, 'histogram')
-
-  def testCommandLineSet(self):
-    self.assertEqual(
-        'Command line: -db com.yahoo.ycsb.BasicDB -P workloads/workloada -t',
-        self.results.command_line,
-    )
-
-  def testClientSet(self):
-    self.assertEqual('YCSB Client 0.1', self.results.client)
-
-  def testUpdateStatisticsParsed(self):
-    self.assertEqual(
-        ycsb_stats._OpResult(
-            group='update',
-            statistics={
-                'Operations': 531,
-                'Return=0': 531,
-                'AverageLatency(ms)': 0.0659774011299435,
-                'MinLatency(ms)': 0.042,
-                'MaxLatency(ms)': 0.345,
-                '95thPercentileLatency(ms)': 0,
-                '99thPercentileLatency(ms)': 0,
-            },
-            data_type=ycsb_stats.HISTOGRAM,
-            data=[(0, 530), (19, 1)],
-        ),
-        self.results.groups['update'],
-    )
-
-  def testReadStatisticsParsed(self):
-    self.assertEqual(
-        ycsb_stats._OpResult(
-            group='read',
-            statistics={
-                'Operations': 469,
-                'Return=0': 469,
-                'AverageLatency(ms)': 0.03847761194029851,
-                'MinLatency(ms)': 0.034,
-                'MaxLatency(ms)': 0.102,
-                '95thPercentileLatency(ms)': 0,
-                '99thPercentileLatency(ms)': 0,
-            },
-            data_type=ycsb_stats.HISTOGRAM,
-            data=[(0, 469)],
-        ),
-        self.results.groups['read'],
-    )
-
-  def testOverallStatisticsParsed(self):
-    self.assertEqual(
-        ycsb_stats._OpResult(
-            group='overall',
-            statistics={'RunTime(ms)': 80.0, 'Throughput(ops/sec)': 12500.0},
-            data_type='histogram',
-            data=[],
-        ),
-        self.results.groups['overall'],
-    )
-
-
-class DetailedResultParserTestCase(unittest.TestCase):
-
-  def setUp(self):
-    super(DetailedResultParserTestCase, self).setUp()
-    self.contents = open_data_file('ycsb-test-run-2.dat')
-    self.results = ycsb_stats.ParseResults(self.contents, 'histogram')
-
-  def testPercentilesFromHistogram_read(self):
-    hist = self.results.groups['read'].data
-    percentiles = ycsb_stats._PercentilesFromHistogram(hist)
-    self.assertEqual(1, percentiles['p50'])
-    self.assertEqual(7, percentiles['p99'])
-
-  def testPercentilesFromHistogram_update(self):
-    hist = self.results.groups['update'].data
-    percentiles = ycsb_stats._PercentilesFromHistogram(hist)
-    self.assertEqual(1, percentiles['p50'])
-    self.assertEqual(7, percentiles['p99'])
-
-
-class ThroughputTimeSeriesParserTestCase(
-    pkb_common_test_case.PkbCommonTestCase
-):
-
-  def setUp(self):
-    super().setUp()
-    self.results_1 = _parse_and_return_time_series('ycsb-time-series.dat')
-    self.results_2 = _parse_and_return_time_series('ycsb-time-series-2.dat')
-
-  def testParsedThroughputTimeSeriesIsCorrect(self):
-    results = _parse_and_return_time_series('ycsb-time-series.dat')
-    expected = {
-        10: 2102.9,
-        20: 2494.5,
-        30: 2496.8,
-        40: 2509.6,
-        50: 2487.2,
-        60: 2513.2,
-    }
-    self.assertEqual(results.throughput_time_series, expected)
-
-  @flagsaver.flagsaver(ycsb_throughput_time_series=True)
-  def testCombinedThroughputTimeSeriesIsCorrect(self):
-    results_1 = _parse_and_return_time_series('ycsb-time-series.dat')
-    results_2 = _parse_and_return_time_series('ycsb-time-series-2.dat')
-
-    combined = ycsb_stats.CombineResults(
-        result_list=[results_1, results_2],
-        measurement_type=ycsb_stats.TIMESERIES,
-        combined_hdr={},
-    )
-
-    expected = {
-        10: 4187.5,
-        20: 4990.2,
-        30: 4994.0,
-        40: 5018.2,
-        50: 4976.5,
-        60: 5023.2,
-    }
-    self.assertEqual(combined.throughput_time_series, expected)
-
-
-class BadResultParserTestCase(unittest.TestCase):
-
-  def testBadTestRun(self):
-    contents = open_data_file('ycsb-test-run-3.dat')
-    self.assertRaises(
-        errors.Benchmarks.KnownIntermittentError,
-        ycsb_stats.ParseResults,
-        contents,
-        'histogram',
-    )
-
-  def testErrorRate(self):
-    contents = open_data_file('ycsb-test-run-4.dat')
-    self.assertRaises(
-        errors.Benchmarks.RunError,
-        ycsb_stats.ParseResults,
-        contents,
-        'hdrhistogram',
-        0.95,
-    )
-
-
-class WeightedQuantileTestCase(unittest.TestCase):
-
-  def testEvenlyWeightedSamples(self):
-    x = list(range(1, 101))  # 1-100
-    weights = [1 for _ in x]
-    self.assertEqual(50, ycsb_stats._WeightedQuantile(x, weights, 0.50))
-    self.assertEqual(75, ycsb_stats._WeightedQuantile(x, weights, 0.75))
-    self.assertEqual(90, ycsb_stats._WeightedQuantile(x, weights, 0.90))
-    self.assertEqual(95, ycsb_stats._WeightedQuantile(x, weights, 0.95))
-    self.assertEqual(99, ycsb_stats._WeightedQuantile(x, weights, 0.99))
-    self.assertEqual(100, ycsb_stats._WeightedQuantile(x, weights, 1))
-
-  def testLowWeight(self):
-    x = [1, 4]
-    weights = [99, 1]
-    for i in range(100):
-      self.assertEqual(1, ycsb_stats._WeightedQuantile(x, weights, i / 100.0))
-    self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995))
-
-  def testMidWeight(self):
-    x = [0, 1.2, 4]
-    weights = [1, 98, 1]
-    for i in range(2, 99):
-      self.assertAlmostEqual(
-          1.2, ycsb_stats._WeightedQuantile(x, weights, i / 100.0)
-      )
-    self.assertEqual(4, ycsb_stats._WeightedQuantile(x, weights, 0.995))
-
-
-class ParseWorkloadTestCase(unittest.TestCase):
-
-  def testParsesEmptyString(self):
-    self.assertDictEqual({}, ycsb.ParseWorkload(''))
-
-  def testIgnoresComment(self):
-    self.assertDictEqual({}, ycsb.ParseWorkload('#\n'))
-    self.assertDictEqual(
-        {}, ycsb.ParseWorkload('#recordcount = 10\n# columnfamily=cf')
-    )
-    self.assertDictEqual(
-        {'recordcount': '10'},
-        ycsb.ParseWorkload('#Sample!\nrecordcount = 10'),
-    )
-
-  def testParsesSampleWorkload(self):
-    contents = open_data_file('ycsb_workloada')
-    actual = ycsb.ParseWorkload(contents)
-
-    expected = {
-        'recordcount': '1000',
-        'operationcount': '1000',
-        'workload': 'com.yahoo.ycsb.workloads.CoreWorkload',
-        'readallfields': 'true',
-        'readproportion': '0.5',
-        'updateproportion': '0.5',
-        'scanproportion': '0',
-        'insertproportion': '0',
-        'requestdistribution': 'zipfian',
-    }
-
-    self.assertDictEqual(expected, actual)
-
-
-class CombineResultsTestCase(unittest.TestCase):
-
-  def testGroupMissing(self):
-    r1 = ycsb_stats.YcsbResult(
-        groups={
-            'read': ycsb_stats._OpResult(
-                group='read',
-                statistics={'Operations': 100, 'Return=0': 100},
-                data_type=ycsb_stats.HISTOGRAM,
-            )
-        }
-    )
-    r2 = ycsb_stats.YcsbResult(
-        groups={
-            'read': ycsb_stats._OpResult(
-                group='read',
-                statistics={'Operations': 96, 'Return=0': 94, 'Return=-1': 2},
-                data_type=ycsb_stats.HISTOGRAM,
-            ),
-            'update': ycsb_stats._OpResult(
-                group='update',
-                statistics={'Operations': 100, 'AverageLatency(ms)': 25},
-                data_type=ycsb_stats.HISTOGRAM,
-            ),
-        }
-    )
-    combined = ycsb_stats.CombineResults([r1, r2], 'histogram', {})
-    self.assertCountEqual(['read', 'update'], combined.groups)
-    self.assertCountEqual(
-        ['Operations', 'Return=0', 'Return=-1'],
-        combined.groups['read'].statistics,
-    )
-    read_stats = combined.groups['read'].statistics
-    self.assertEqual(
-        {'Operations': 196, 'Return=0': 194, 'Return=-1': 2}, read_stats
-    )
-
-  def testDropUnaggregatedFromSingleResult(self):
-    r = ycsb_stats.YcsbResult(
-        client='',
-        command_line='',
-        groups={
-            'read': ycsb_stats._OpResult(
-                group='read',
-                statistics={'AverageLatency(ms)': 21},
-                data_type=ycsb_stats.HISTOGRAM,
-            )
-        },
-    )
-
-    r_copy = copy.deepcopy(r)
-    self.assertEqual(r, r_copy)
-    combined = ycsb_stats.CombineResults([r], 'histogram', {})
-    self.assertEqual(r, r_copy)
-    r.groups['read'].statistics = {}
-    self.assertEqual(r, combined)
-
-
-class HdrLogsParserTestCase(unittest.TestCase):
-
-  def testParseHdrLogFile(self):
-    rawlog = """
-      #[StartTime: 1523565997 (seconds since epoch), Thu Apr 12 20:46:37 UTC 2018]
-         Value     Percentile TotalCount 1/(1-Percentile)
-
-         314.000 0.000000000000          2           1.00
-         853.000 0.100000000000      49955           1.11
-         949.000 0.200000000000     100351           1.25
-         949.000 0.210000000000     100351           1.27
-         1033.000 0.300000000000     150110           1.43
-      #[Mean    =     1651.145, StdDeviation   =      851.707]
-      #[Max     =   203903.000, Total count    =       499019]
-      #[Buckets =            8, SubBuckets     =         2048]
-    """
-    actual = ycsb_stats.ParseHdrLogFile(rawlog)
-    expected = [
-        (0.0, 0.314, 2),
-        (10.0, 0.853, 49953),
-        (20.0, 0.949, 50396),
-        (30.0, 1.033, 49759),
-    ]
-    self.assertEqual(actual, expected)
-
-
 class PrerequisitesTestCase(pkb_common_test_case.PkbCommonTestCase):
 
   @parameterized.named_parameters(