forked from nvaccess/nvda
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathXMLFormatting.py
executable file
·78 lines (69 loc) · 2.57 KB
/
XMLFormatting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# A part of NonVisual Desktop Access (NVDA)
# Copyright (C) 2008-2021 NV Access Limited, Babbage B.V.
# This file is covered by the GNU General Public License.
# See the file COPYING for more details.
import typing
from xml.parsers import expat
import textInfos
import textUtils
from logHandler import log
from textUtils import WCHAR_ENCODING, isLowSurrogate
CommandsT = typing.Union[textInfos.FieldCommand, typing.Optional[str]]
CommandListT = typing.List[CommandsT]
class XMLTextParser(object):
def _startElementHandler(self,tagName,attrs):
if tagName=='unich':
data=attrs.get('value',None)
if data is not None:
try:
data=chr(int(data))
except ValueError:
data = textUtils.REPLACEMENT_CHAR
self._CharacterDataHandler(data, processBufferedSurrogates=isLowSurrogate(data))
return
elif tagName=='control':
newAttrs=textInfos.ControlField(attrs)
self._commandList.append(textInfos.FieldCommand("controlStart",newAttrs))
elif tagName=='text':
newAttrs=textInfos.FormatField(attrs)
self._commandList.append(textInfos.FieldCommand("formatChange",newAttrs))
else:
raise ValueError("Unknown tag name: %s"%tagName)
# Normalise attributes common to both field types.
try:
newAttrs["_startOfNode"] = newAttrs["_startOfNode"] == "1"
except KeyError:
pass
try:
newAttrs["_endOfNode"] = newAttrs["_endOfNode"] == "1"
except KeyError:
pass
def _EndElementHandler(self,tagName):
if tagName=="control":
self._commandList.append(textInfos.FieldCommand("controlEnd",None))
elif tagName in ("text","unich"):
pass
else:
raise ValueError("unknown tag name: %s"%tagName)
def _CharacterDataHandler(self, data: typing.Optional[str], processBufferedSurrogates=False):
cmdList=self._commandList
if not isinstance(data, str):
dataStr = repr(data)
log.warning(f"unknown type for data: {dataStr}")
if cmdList and isinstance(cmdList[-1],str):
cmdList[-1] += data
if processBufferedSurrogates:
cmdList[-1] = cmdList[-1].encode(WCHAR_ENCODING, errors="surrogatepass").decode(WCHAR_ENCODING)
else:
cmdList.append(data)
def parse(self, XMLText) -> CommandListT:
parser = expat.ParserCreate('utf-8')
parser.StartElementHandler = self._startElementHandler
parser.EndElementHandler = self._EndElementHandler
parser.CharacterDataHandler = self._CharacterDataHandler
self._commandList: XMLTextParser.CommandListT = []
try:
parser.Parse(XMLText)
except Exception:
log.error("XML: %s" % XMLText, exc_info=True)
return self._commandList