Skip to content

Commit

Permalink
Merge pull request #4491 from jasonb5/performance-baselines
Browse files Browse the repository at this point in the history
Updates performance baselines

Updates how performance baselines are created and compared.

Before
Baseline generation stores cpl.log
Comparison parses baseline and testcase cpl.log and compares throughput/memory
--check-throughput and --check-memory will PASS even if MEMCOMP or TPUTCOMP fail
bless_test_results --hist-only will update the baseline cpl.log

After
Baseline generation stores throughput in cpl-tput.log and memory usage in cpl-mem.log
Comparison parses testcase cpl.log and compares the baselines respective files
--check-throughput and --check-memory will DIFF if MEMCOMP or TPUTCOMP fail
bless_test_results now has --tput-only and --mem-only to bless individual performance DIFF
Fixes logging throughput/memory comparison results in TestStatus.log
Allows generation/comparison to be customized by the coupler

Test suite: pytest -vvv CIME/tests
Test baseline: n/a
Test namelist changes: n/a
Test status: n/a

Fixes E3SM-Project/E3SM#5885

User interface changes?: N
Update gh-pages html (Y/N)?: N
  • Loading branch information
jgfouca authored Oct 16, 2023
2 parents af9b27f + 7e30703 commit 88255d5
Show file tree
Hide file tree
Showing 16 changed files with 2,329 additions and 435 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ jobs:
user_name: 'github-actions[bot]'
user_email: 'github-actions[bot]@users.noreply.github.com'
- name: Comment about previewing documentation
if: ${{ github.event_name == 'pull_request' }}
if: |
github.event_name == 'pull_request' &&
github.event.pull_request.head.repo.full_name == github.repository
uses: actions/github-script@v6
with:
script: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ jobs:
init_cime
pytest -vvv --machine docker --no-fortran-run CIME/tests/test_unit*
pytest -vvv --cov=CIME --machine docker --no-fortran-run CIME/tests/test_unit*
# Run system tests
system-testing:
Expand Down Expand Up @@ -145,7 +145,7 @@ jobs:
conda activate base
pytest -vvv --machine docker --no-fortran-run --no-teardown CIME/tests/test_sys*
pytest -vvv --cov=CIME --machine docker --no-fortran-run --no-teardown CIME/tests/test_sys*
- name: Create testing log archive
if: ${{ failure() }}
shell: bash
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ scripts/Tools/JENKINS*
components
libraries
share
test_coverage/**
290 changes: 104 additions & 186 deletions CIME/SystemTests/system_tests_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@
from CIME.config import Config
from CIME.provenance import save_test_time, get_test_success
from CIME.locked_files import LOCKED_DIR, lock_file, is_locked
from CIME.baselines.performance import (
get_latest_cpl_logs,
_perf_get_memory,
perf_compare_memory_baseline,
perf_compare_throughput_baseline,
perf_write_baseline,
load_coupler_customization,
)
import CIME.build as build

import glob, gzip, time, traceback, os
Expand Down Expand Up @@ -493,7 +501,7 @@ def run_indv(
self._case.case_st_archive(resubmit=True)

def _coupler_log_indicates_run_complete(self):
newestcpllogfiles = self._get_latest_cpl_logs()
newestcpllogfiles = get_latest_cpl_logs(self._case)
logger.debug("Latest Coupler log file(s) {}".format(newestcpllogfiles))
# Exception is raised if the file is not compressed
allgood = len(newestcpllogfiles)
Expand Down Expand Up @@ -598,43 +606,6 @@ def _st_archive_case_test(self):
else:
self._test_status.set_status(STARCHIVE_PHASE, TEST_FAIL_STATUS)

def _get_mem_usage(self, cpllog):
"""
Examine memory usage as recorded in the cpl log file and look for unexpected
increases.
"""
memlist = []
meminfo = re.compile(
r".*model date =\s+(\w+).*memory =\s+(\d+\.?\d+).*highwater"
)
if cpllog is not None and os.path.isfile(cpllog):
if ".gz" == cpllog[-3:]:
fopen = gzip.open
else:
fopen = open
with fopen(cpllog, "rb") as f:
for line in f:
m = meminfo.match(line.decode("utf-8"))
if m:
memlist.append((float(m.group(1)), float(m.group(2))))
# Remove the last mem record, it's sometimes artificially high
if len(memlist) > 0:
memlist.pop()
return memlist

def _get_throughput(self, cpllog):
"""
Examine memory usage as recorded in the cpl log file and look for unexpected
increases.
"""
if cpllog is not None and os.path.isfile(cpllog):
with gzip.open(cpllog, "rb") as f:
cpltext = f.read().decode("utf-8")
m = re.search(r"# simulated years / cmp-day =\s+(\d+\.\d+)\s", cpltext)
if m:
return float(m.group(1))
return None

def _phase_modifying_call(self, phase, function):
"""
Ensures that unexpected exceptions from phases will result in a FAIL result
Expand All @@ -661,47 +632,29 @@ def _check_for_memleak(self):
Examine memory usage as recorded in the cpl log file and look for unexpected
increases.
"""
config = load_coupler_customization(self._case)

# default to 0.1
tolerance = self._case.get_value("TEST_MEMLEAK_TOLERANCE") or 0.1

expect(tolerance > 0.0, "Bad value for memleak tolerance in test")

with self._test_status:
latestcpllogs = self._get_latest_cpl_logs()
for cpllog in latestcpllogs:
memlist = self._get_mem_usage(cpllog)
try:
memleak, comment = config.perf_check_for_memory_leak(
self._case, tolerance
)
except AttributeError:
memleak, comment = perf_check_for_memory_leak(self._case, tolerance)

if len(memlist) < 3:
self._test_status.set_status(
MEMLEAK_PHASE,
TEST_PASS_STATUS,
comments="insuffiencient data for memleak test",
)
else:
finaldate = int(memlist[-1][0])
originaldate = int(
memlist[1][0]
) # skip first day mem record, it can be too low while initializing
finalmem = float(memlist[-1][1])
originalmem = float(memlist[1][1])
memdiff = -1
if originalmem > 0:
memdiff = (finalmem - originalmem) / originalmem
tolerance = self._case.get_value("TEST_MEMLEAK_TOLERANCE")
if tolerance is None:
tolerance = 0.1
expect(tolerance > 0.0, "Bad value for memleak tolerance in test")
if memdiff < 0:
self._test_status.set_status(
MEMLEAK_PHASE,
TEST_PASS_STATUS,
comments="data for memleak test is insuffiencient",
)
elif memdiff < tolerance:
self._test_status.set_status(MEMLEAK_PHASE, TEST_PASS_STATUS)
else:
comment = "memleak detected, memory went from {:f} to {:f} in {:d} days".format(
originalmem, finalmem, finaldate - originaldate
)
append_testlog(comment, self._orig_caseroot)
self._test_status.set_status(
MEMLEAK_PHASE, TEST_FAIL_STATUS, comments=comment
)
if memleak:
append_testlog(comment, self._orig_caseroot)

status = TEST_FAIL_STATUS
else:
status = TEST_PASS_STATUS

self._test_status.set_status(MEMLEAK_PHASE, status, comments=comment)

def compare_env_run(self, expected=None):
"""
Expand All @@ -728,121 +681,45 @@ def compare_env_run(self, expected=None):
return False
return True

def _get_latest_cpl_logs(self):
def _compare_memory(self):
"""
find and return the latest cpl log file in the run directory
Compares current test memory usage to baseline.
"""
coupler_log_path = self._case.get_value("RUNDIR")
cpllogs = glob.glob(
os.path.join(coupler_log_path, "{}*.log.*".format(self._cpllog))
)
lastcpllogs = []
if cpllogs:
lastcpllogs.append(max(cpllogs, key=os.path.getctime))
basename = os.path.basename(lastcpllogs[0])
suffix = basename.split(".", 1)[1]
for log in cpllogs:
if log in lastcpllogs:
continue

if log.endswith(suffix):
lastcpllogs.append(log)
with self._test_status:
below_tolerance, comment = perf_compare_memory_baseline(self._case)

return lastcpllogs
if below_tolerance is not None:
append_testlog(comment, self._orig_caseroot)

def _compare_memory(self):
with self._test_status:
# compare memory usage to baseline
baseline_name = self._case.get_value("BASECMP_CASE")
basecmp_dir = os.path.join(
self._case.get_value("BASELINE_ROOT"), baseline_name
)
newestcpllogfiles = self._get_latest_cpl_logs()
if len(newestcpllogfiles) > 0:
memlist = self._get_mem_usage(newestcpllogfiles[0])
for cpllog in newestcpllogfiles:
m = re.search(r"/({}.*.log).*.gz".format(self._cpllog), cpllog)
if m is not None:
baselog = os.path.join(basecmp_dir, m.group(1)) + ".gz"
if baselog is None or not os.path.isfile(baselog):
# for backward compatibility
baselog = os.path.join(basecmp_dir, self._cpllog + ".log")
if os.path.isfile(baselog) and len(memlist) > 3:
blmem = self._get_mem_usage(baselog)
blmem = 0 if blmem == [] else blmem[-1][1]
curmem = memlist[-1][1]
diff = 0.0 if blmem == 0 else (curmem - blmem) / blmem
tolerance = self._case.get_value("TEST_MEMLEAK_TOLERANCE")
if tolerance is None:
tolerance = 0.1
if (
diff < tolerance
and self._test_status.get_status(MEMCOMP_PHASE) is None
):
self._test_status.set_status(MEMCOMP_PHASE, TEST_PASS_STATUS)
elif (
self._test_status.get_status(MEMCOMP_PHASE) != TEST_FAIL_STATUS
):
comment = "Error: Memory usage increase >{:d}% from baseline's {:f} to {:f}".format(
int(tolerance * 100), blmem, curmem
)
self._test_status.set_status(
MEMCOMP_PHASE, TEST_FAIL_STATUS, comments=comment
)
append_testlog(comment, self._orig_caseroot)
if (
below_tolerance
and self._test_status.get_status(MEMCOMP_PHASE) is None
):
self._test_status.set_status(MEMCOMP_PHASE, TEST_PASS_STATUS)
elif self._test_status.get_status(MEMCOMP_PHASE) != TEST_FAIL_STATUS:
self._test_status.set_status(
MEMCOMP_PHASE, TEST_FAIL_STATUS, comments=comment
)

def _compare_throughput(self):
"""
Compares current test throughput to baseline.
"""
with self._test_status:
# compare memory usage to baseline
baseline_name = self._case.get_value("BASECMP_CASE")
basecmp_dir = os.path.join(
self._case.get_value("BASELINE_ROOT"), baseline_name
)
newestcpllogfiles = self._get_latest_cpl_logs()
for cpllog in newestcpllogfiles:
m = re.search(r"/({}.*.log).*.gz".format(self._cpllog), cpllog)
if m is not None:
baselog = os.path.join(basecmp_dir, m.group(1)) + ".gz"
if baselog is None or not os.path.isfile(baselog):
# for backward compatibility
baselog = os.path.join(basecmp_dir, self._cpllog)

if os.path.isfile(baselog):
# compare throughput to baseline
current = self._get_throughput(cpllog)
baseline = self._get_throughput(baselog)
# comparing ypd so bigger is better
if baseline is not None and current is not None:
diff = (baseline - current) / baseline
tolerance = self._case.get_value("TEST_TPUT_TOLERANCE")
if tolerance is None:
tolerance = 0.1
expect(
tolerance > 0.0,
"Bad value for throughput tolerance in test",
)
comment = "TPUTCOMP: Computation time changed by {:.2f}% relative to baseline".format(
diff * 100
)
append_testlog(comment, self._orig_caseroot)
if (
diff < tolerance
and self._test_status.get_status(THROUGHPUT_PHASE) is None
):
self._test_status.set_status(
THROUGHPUT_PHASE, TEST_PASS_STATUS
)
elif (
self._test_status.get_status(THROUGHPUT_PHASE)
!= TEST_FAIL_STATUS
):
comment = "Error: TPUTCOMP: Computation time increase > {:d}% from baseline".format(
int(tolerance * 100)
)
self._test_status.set_status(
THROUGHPUT_PHASE, TEST_FAIL_STATUS, comments=comment
)
append_testlog(comment, self._orig_caseroot)
below_tolerance, comment = perf_compare_throughput_baseline(self._case)

if below_tolerance is not None:
append_testlog(comment, self._orig_caseroot)

if (
below_tolerance
and self._test_status.get_status(THROUGHPUT_PHASE) is None
):
self._test_status.set_status(THROUGHPUT_PHASE, TEST_PASS_STATUS)
elif self._test_status.get_status(THROUGHPUT_PHASE) != TEST_FAIL_STATUS:
self._test_status.set_status(
THROUGHPUT_PHASE, TEST_FAIL_STATUS, comments=comment
)

def _compare_baseline(self):
"""
Expand Down Expand Up @@ -884,18 +761,59 @@ def _generate_baseline(self):
)
# copy latest cpl log to baseline
# drop the date so that the name is generic
newestcpllogfiles = self._get_latest_cpl_logs()
newestcpllogfiles = get_latest_cpl_logs(self._case)
with SharedArea():
# TODO ever actually more than one cpl log?
for cpllog in newestcpllogfiles:
m = re.search(r"/({}.*.log).*.gz".format(self._cpllog), cpllog)

if m is not None:
baselog = os.path.join(basegen_dir, m.group(1)) + ".gz"

safe_copy(
cpllog,
os.path.join(basegen_dir, baselog),
preserve_meta=False,
)

perf_write_baseline(self._case, basegen_dir, cpllog)


def perf_check_for_memory_leak(case, tolerance):
leak = False
comment = ""

latestcpllogs = get_latest_cpl_logs(case)

for cpllog in latestcpllogs:
try:
memlist = _perf_get_memory(case, cpllog)
except RuntimeError:
return False, "insufficient data for memleak test"

# last day - second day, skip first day, can be too low while initializing
elapsed_days = int(memlist[-1][0]) - int(memlist[1][0])

finalmem, originalmem = float(memlist[-1][1]), float(memlist[1][1])

memdiff = -1 if originalmem <= 0 else (finalmem - originalmem) / originalmem

if memdiff < 0:
leak = False
comment = "data for memleak test is insufficient"
elif memdiff < tolerance:
leak = False
comment = ""
else:
leak = True
comment = (
"memleak detected, memory went from {:f} to {:f} in {:d} days".format(
originalmem, finalmem, elapsed_days
)
)

return leak, comment


class FakeTest(SystemTestsCommon):
"""
Expand Down
Loading

0 comments on commit 88255d5

Please sign in to comment.