forked from nvaccess/nvda
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwatchdog.py
379 lines (340 loc) · 13.2 KB
/
watchdog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# A part of NonVisual Desktop Access (NVDA)
# Copyright (C) 2008-2021 NV Access Limited
# This file is covered by the GNU General Public License.
# See the file COPYING for more details.
import sys
import os
import traceback
import time
import threading
import inspect
from ctypes import windll, oledll
import ctypes.wintypes
import msvcrt
import comtypes
import winUser
import winKernel
from logHandler import log
import globalVars
import core
import exceptions
import NVDAHelper
#settings
#: The minimum time to wait for the core to be alive.
MIN_CORE_ALIVE_TIMEOUT=0.5
#: How long to wait for the core to be alive under normal circumstances.
#: This must be a multiple of MIN_CORE_ALIVE_TIMEOUT.
NORMAL_CORE_ALIVE_TIMEOUT=10
#: How long to wait between recovery attempts
RECOVER_ATTEMPT_INTERVAL = 0.05
#: The amount of time before the core should be considered severely frozen and a warning logged.
FROZEN_WARNING_TIMEOUT = 15
safeWindowClassSet=set([
'Internet Explorer_Server',
'_WwG',
'EXCEL7',
])
isRunning=False
isAttemptingRecovery: bool = False
_coreIsAsleep = False
_coreDeadTimer = windll.kernel32.CreateWaitableTimerW(None, True, None)
_suspended = False
_watcherThread=None
_cancelCallEvent = None
def getFormattedStacksForAllThreads():
"""
Generates a string containing a call stack for every Python thread in this process, suitable for logging.
"""
# First collect the names of all threads that have actually been started by Python itself.
threadNamesByID = {x.ident: x.name for x in threading.enumerate()}
stacks = []
# If a Python function is entered by a thread that was not started by Python itself,
# It will have a frame, but won't be tracked by Python's threading module and therefore will have no name.
for ident, frame in sys._current_frames().items():
# The strings in the formatted stack all end with \n, so no join separator is necessary.
stack = "".join(traceback.format_stack(frame))
name = threadNamesByID.get(ident, "Unknown")
stacks.append(f"Python stack for thread {ident} ({name}):\n{stack}")
return "\n".join(stacks)
def alive():
"""Inform the watchdog that the core is alive.
"""
global _coreIsAsleep
_coreIsAsleep = False
# Stop cancelling calls.
windll.kernel32.ResetEvent(_cancelCallEvent)
# Set the timer so the watcher will take action in MIN_CORE_ALIVE_TIMEOUT
# if this function or asleep() isn't called.
windll.kernel32.SetWaitableTimer(_coreDeadTimer,
ctypes.byref(ctypes.wintypes.LARGE_INTEGER(-int(10000000 * MIN_CORE_ALIVE_TIMEOUT))),
0, None, None, False)
def asleep():
"""Inform the watchdog that the core is going to sleep.
"""
global _coreIsAsleep
# #5189: Reset in case the core was treated as dead.
alive()
# CancelWaitableTimer does not reset the signaled state; if it was signaled, it remains signaled.
# However, alive() calls SetWaitableTimer, which resets the timer to unsignaled.
windll.kernel32.CancelWaitableTimer(_coreDeadTimer)
_coreIsAsleep = True
def isCoreAsleep():
"""
Finds out if the core is currently asleep (I.e. not in a core cycle).
Note that if the core is actually frozen, this function will return false
as it is frozen in a core cycle while awake.
"""
return _coreIsAsleep
def _isAlive():
# #5189: If the watchdog has been terminated, treat the core as being alive.
# This will stop recovery if it has started and allow the watcher to terminate.
return not isRunning or winKernel.waitForSingleObject(_coreDeadTimer, 0) != 0
def _watcher():
global isAttemptingRecovery
while True:
# Wait for the core to die.
winKernel.waitForSingleObject(_coreDeadTimer, winKernel.INFINITE)
if not isRunning:
return
# The core hasn't reported alive for MIN_CORE_ALIVE_TIMEOUT.
waited = MIN_CORE_ALIVE_TIMEOUT
while not _isAlive() and not _shouldRecoverAfterMinTimeout():
# The core is still dead and fast recovery doesn't apply.
# Wait up to NORMAL_ALIVE_TIMEOUT.
time.sleep(MIN_CORE_ALIVE_TIMEOUT)
waited += MIN_CORE_ALIVE_TIMEOUT
if waited >= NORMAL_CORE_ALIVE_TIMEOUT:
break
if _isAlive():
continue
if log.isEnabledFor(log.DEBUGWARNING):
stacks = getFormattedStacksForAllThreads()
log.debugWarning(f"Trying to recover from freeze. Listing stacks for Python threads:\n{stacks}")
lastTime=time.time()
isAttemptingRecovery = True
# Cancel calls until the core is alive.
# This event will be reset by alive().
windll.kernel32.SetEvent(_cancelCallEvent)
# Some calls have to be killed individually.
while True:
curTime=time.time()
if curTime-lastTime>FROZEN_WARNING_TIMEOUT:
lastTime=curTime
# Core is completely frozen.
# Collect formatted stacks for all Python threads.
log.error("Core frozen in stack!")
stacks = getFormattedStacksForAllThreads()
log.info(f"Listing stacks for Python threads:\n{stacks}")
_recoverAttempt()
time.sleep(RECOVER_ATTEMPT_INTERVAL)
if _isAlive():
break
isAttemptingRecovery = False
def _shouldRecoverAfterMinTimeout():
info=winUser.getGUIThreadInfo(0)
if not info.hwndFocus:
# The foreground thread is frozen or there is no foreground thread (probably due to a freeze elsewhere).
return True
# Import late to avoid circular import.
import api
#If a system menu has been activated but NVDA's focus is not yet in the menu then use min timeout
if info.flags&winUser.GUI_SYSTEMMENUMODE and info.hwndMenuOwner and api.getFocusObject().windowClassName!='#32768':
return True
if winUser.getClassName(info.hwndFocus) in safeWindowClassSet:
return False
if not winUser.isDescendantWindow(info.hwndActive, api.getFocusObject().windowHandle):
# The foreground window has changed.
return True
newHwnd=info.hwndFocus
newThreadID=winUser.getWindowThreadProcessID(newHwnd)[1]
return newThreadID!=api.getFocusObject().windowThreadID
def _recoverAttempt():
try:
oledll.ole32.CoCancelCall(core.mainThreadId,0)
except:
pass
class MINIDUMP_EXCEPTION_INFORMATION(ctypes.Structure):
_fields_ = (
("ThreadId", ctypes.wintypes.DWORD),
("ExceptionPointers", ctypes.c_void_p),
("ClientPointers", ctypes.wintypes.BOOL),
)
@ctypes.WINFUNCTYPE(ctypes.wintypes.LONG, ctypes.c_void_p)
def _crashHandler(exceptionInfo):
threadId = ctypes.windll.kernel32.GetCurrentThreadId()
# An exception might have been set for this thread.
# Clear it so that it doesn't get raised in this function.
ctypes.pythonapi.PyThreadState_SetAsyncExc(threadId, None)
# Write a minidump.
dumpPath = os.path.join(globalVars.appArgs.logFileName, "..", "nvda_crash.dmp")
try:
# Though we aren't using pythonic functions to write to the dump file,
# open it in binary mode as opening it in text mode (the default) doesn't make sense.
with open(dumpPath, "wb") as mdf:
mdExc = MINIDUMP_EXCEPTION_INFORMATION(ThreadId=threadId,
ExceptionPointers=exceptionInfo, ClientPointers=False)
if not ctypes.windll.DbgHelp.MiniDumpWriteDump(
ctypes.windll.kernel32.GetCurrentProcess(),
os.getpid(),
msvcrt.get_osfhandle(mdf.fileno()),
0, # MiniDumpNormal
ctypes.byref(mdExc),
None,
None
):
raise ctypes.WinError()
except:
log.critical("NVDA crashed! Error writing minidump", exc_info=True)
else:
log.critical("NVDA crashed! Minidump written to %s" % dumpPath)
# Log Python stacks for every thread.
stacks = getFormattedStacksForAllThreads()
log.info(f"Listing stacks for Python threads:\n{stacks}")
log.info("Restarting due to crash")
# if NVDA has crashed we cannot rely on the queue handler to start the new NVDA instance
core.restartUnsafely()
return 1 # EXCEPTION_EXECUTE_HANDLER
@ctypes.WINFUNCTYPE(None)
def _notifySendMessageCancelled():
caller = inspect.currentframe().f_back
if not caller:
return
# Set a profile function which will raise an exception when returning from the calling frame.
def sendMessageCallCanceller(frame, event, arg):
if frame == caller:
# Raising an exception will also cause the profile function to be deactivated.
raise exceptions.CallCancelled
sys.setprofile(sendMessageCallCanceller)
def initialize():
"""Initialize the watchdog.
"""
global _watcherThread, isRunning, _cancelCallEvent
if isRunning:
raise RuntimeError("already running")
isRunning=True
# Catch application crashes.
windll.kernel32.SetUnhandledExceptionFilter(_crashHandler)
oledll.ole32.CoEnableCallCancellation(None)
# Cache cancelCallEvent.
_cancelCallEvent = ctypes.wintypes.HANDLE.in_dll(NVDAHelper.localLib,
"cancelCallEvent")
# Handle cancelled SendMessage calls.
NVDAHelper._setDllFuncPointer(NVDAHelper.localLib, "_notifySendMessageCancelled", _notifySendMessageCancelled)
_watcherThread = threading.Thread(
name=__name__,
target=_watcher
)
alive()
_watcherThread.start()
def terminate():
"""Terminate the watchdog.
"""
global isRunning
if not isRunning:
return
isRunning=False
oledll.ole32.CoDisableCallCancellation(None)
# Wake up the watcher so it knows to finish.
windll.kernel32.SetWaitableTimer(_coreDeadTimer,
ctypes.byref(ctypes.wintypes.LARGE_INTEGER(0)),
0, None, None, False)
_watcherThread.join()
class Suspender(object):
"""A context manager to temporarily suspend the watchdog for a block of code.
"""
def __enter__(self):
global _suspended
_suspended = True
asleep()
def __exit__(self,*args):
global _suspended
_suspended = False
alive()
class CancellableCallThread(threading.Thread):
"""A worker thread used to execute a call which must be made cancellable.
If the call is cancelled, this thread must be abandoned.
"""
def __init__(self):
super(CancellableCallThread, self).__init__()
self.daemon = True
self._executeEvent = threading.Event()
self._executionDoneEvent = ctypes.windll.kernel32.CreateEventW(None, False, False, None)
self.isUsable = True
def execute(self, func, *args, pumpMessages=True, **kwargs):
fname = repr(func)
self.name = f"{self.__class__.__module__}.{self.execute.__qualname__}({fname})"
# Don't even bother making the call if the core is already dead.
if isAttemptingRecovery:
raise exceptions.CallCancelled
self._func = func
self._args = args
self._kwargs = kwargs
self._result = None
self._exc_info = None
self._executeEvent.set()
waitHandles = (ctypes.wintypes.HANDLE * 2)(
self._executionDoneEvent, _cancelCallEvent)
waitIndex = ctypes.wintypes.DWORD()
if pumpMessages:
oledll.ole32.CoWaitForMultipleHandles(0, winKernel.INFINITE, 2, waitHandles, ctypes.byref(waitIndex))
else:
waitIndex.value = windll.kernel32.WaitForMultipleObjects(2, waitHandles, False, winKernel.INFINITE)
if waitIndex.value == 1:
# Cancelled.
self.isUsable = False
raise exceptions.CallCancelled
exc = self._exc_info
if exc:
# The execution of the function in the other thread caused an exception.
# Re-raise it here.
# Note that in Python3, the traceback (stack) is now part of the exception,
# So the logged traceback will correctly show the stack for both this thread and the other thread.
raise exc
return self._result
def run(self):
comtypes.CoInitializeEx(comtypes.COINIT_MULTITHREADED)
while self.isUsable:
self._executeEvent.wait()
self._executeEvent.clear()
try:
self._result = self._func(*self._args, **self._kwargs)
except Exception as e:
self._exc_info = e
ctypes.windll.kernel32.SetEvent(self._executionDoneEvent)
ctypes.windll.kernel32.CloseHandle(self._executionDoneEvent)
cancellableCallThread = None
def cancellableExecute(func, *args, ccPumpMessages=True, **kwargs):
"""Execute a function in the main thread, making it cancellable.
@param func: The function to execute.
@type func: callable
@param ccPumpMessages: Whether to pump messages while waiting.
@type ccPumpMessages: bool
@param args: Positional arguments for the function.
@param kwargs: Keyword arguments for the function.
@raise CallCancelled: If the call was cancelled.
"""
global cancellableCallThread
if not isRunning or _suspended or not isinstance(threading.currentThread(), threading._MainThread):
# Watchdog is not running or this is a background thread,
# so just execute the call.
return func(*args, **kwargs)
if not cancellableCallThread or not cancellableCallThread.isUsable:
# The thread hasn't yet been created or is not usable.
# Create a new one.
cancellableCallThread = CancellableCallThread()
cancellableCallThread.start()
return cancellableCallThread.execute(func, *args, pumpMessages=ccPumpMessages, **kwargs)
def cancellableSendMessage(hwnd, msg, wParam, lParam, flags=0, timeout=60000):
"""Send a window message, making the call cancellable.
The C{timeout} and C{flags} arguments should usually be left at their default values.
The call will still be cancelled if appropriate even if the specified timeout has not yet been reached.
@raise CallCancelled: If the call was cancelled.
"""
result = ctypes.wintypes.DWORD()
NVDAHelper.localLib.cancellableSendMessageTimeout(hwnd, msg, wParam, lParam, flags, timeout, ctypes.byref(result))
return result.value
class WatchdogObserver:
@property
def isAttemptingRecovery(self) -> bool:
global isAttemptingRecovery
return isAttemptingRecovery