From 11367e2a136d4aa259db6ebab7d4c0c609f343ee Mon Sep 17 00:00:00 2001 From: Tony Kao Date: Tue, 15 Oct 2024 20:56:34 -0700 Subject: [PATCH] torchx - add exception_type, exception_message, and exception_source_location to torchx event (#966) Summary: Add exception type, exception message, exception source location to torchx event. This allows for better logging of exception details for further analysis. Differential Revision: D64406552 --- torchx/runner/events/__init__.py | 16 ++++++++++++++++ torchx/runner/events/api.py | 3 +++ 2 files changed, 19 insertions(+) diff --git a/torchx/runner/events/__init__.py b/torchx/runner/events/__init__.py index c8eb89d96..8fab92a10 100644 --- a/torchx/runner/events/__init__.py +++ b/torchx/runner/events/__init__.py @@ -20,7 +20,9 @@ """ +import json import logging +import sys import time import traceback from types import TracebackType @@ -123,6 +125,20 @@ def __exit__( ) // 1000 if traceback_type: self._torchx_event.raw_exception = traceback.format_exc() + typ, value, tb = sys.exc_info() + if tb: + last_frame = traceback.extract_tb(tb)[-1] + self._torchx_event.exception_source_location = json.dumps( + { + "filename": last_frame.filename, + "lineno": last_frame.lineno, + "name": last_frame.name, + } + ) + if exec_type: + self._torchx_event.exception_type = exec_type.__name__ + if exec_value: + self._torchx_event.exception_message = str(exec_value) record(self._torchx_event) def _generate_torchx_event( diff --git a/torchx/runner/events/api.py b/torchx/runner/events/api.py index 355c03f6c..f03815e75 100644 --- a/torchx/runner/events/api.py +++ b/torchx/runner/events/api.py @@ -52,6 +52,9 @@ class TorchxEvent: wall_time_usec: Optional[int] = None start_epoch_time_usec: Optional[int] = None workspace: Optional[str] = None + exception_type: Optional[str] = None + exception_message: Optional[str] = None + exception_source_location: Optional[str] = None def __str__(self) -> str: return self.serialize()