From 75f0f98801365db46d7e2bc4118b38926420946c Mon Sep 17 00:00:00 2001 From: "Seva Alekseyev (he/him)" Date: Tue, 23 Apr 2024 10:59:00 -0400 Subject: [PATCH] Performance enhancements (#557) * Performance enhancements * Exception wording --- elftools/common/construct_utils.py | 72 +++++++-------- elftools/dwarf/abbrevtable.py | 9 +- elftools/dwarf/callframe.py | 32 +++---- elftools/dwarf/die.py | 139 ++++++++++++++--------------- elftools/dwarf/dwarf_expr.py | 84 ++++++++--------- elftools/dwarf/dwarf_util.py | 2 +- elftools/dwarf/dwarfinfo.py | 4 +- elftools/dwarf/lineprogram.py | 22 ++--- elftools/dwarf/locationlists.py | 8 +- elftools/dwarf/ranges.py | 4 +- elftools/dwarf/structs.py | 79 +++++++++------- 11 files changed, 230 insertions(+), 225 deletions(-) diff --git a/elftools/common/construct_utils.py b/elftools/common/construct_utils.py index 425b7c59..50f2fd92 100644 --- a/elftools/common/construct_utils.py +++ b/elftools/common/construct_utils.py @@ -9,7 +9,7 @@ from struct import Struct from ..construct import ( Subconstruct, ConstructError, ArrayError, Adapter, Field, RepeatUntil, - Rename, SizeofError, Construct, StaticField + Rename, SizeofError, Construct, StaticField, FieldError ) @@ -47,49 +47,43 @@ def _build(self, obj, stream, context): def _sizeof(self, context): raise SizeofError("can't calculate size") +class ULEB128(Construct): + """A construct based parser for ULEB128 encoding. -def _LEB128_reader(): - """ Read LEB128 variable-length data from the stream. The data is terminated - by a byte with 0 in its highest bit. + Incompatible with Python 2 - assumes that the return of read() + is an indexed collection of numbers. """ - return RepeatUntil( - lambda obj, ctx: ord(obj) < 0x80, - Field(None, 1)) - - -class _ULEB128Adapter(Adapter): - """ An adapter for ULEB128, given a sequence of bytes in a sub-construct. - """ - def _decode(self, obj, context): + def _parse(self, stream, context): value = 0 - for b in reversed(obj): - value = (value << 7) + (ord(b) & 0x7F) - return value - - -class _SLEB128Adapter(Adapter): - """ An adapter for SLEB128, given a sequence of bytes in a sub-construct. + shift = 0 + while True: + data = stream.read(1) + if len(data) != 1: + raise FieldError("unexpected end of stream while parsing a ULEB128 encoded value") + b = data[0] + value |= (b & 0x7F) << shift + shift += 7 + if b & 0x80 == 0: + return value + +class SLEB128(Construct): + """A construct based parser for SLEB128 encoding. + + Incompatible with Python 2 - assumes that the return of read() + is an indexed collection of numbers. """ - def _decode(self, obj, context): + def _parse(self, stream, context): value = 0 - for b in reversed(obj): - value = (value << 7) + (ord(b) & 0x7F) - if ord(obj[-1]) & 0x40: - # negative -> sign extend - value |= - (1 << (7 * len(obj))) - return value - - -def ULEB128(name): - """ A construct creator for ULEB128 encoding. - """ - return Rename(name, _ULEB128Adapter(_LEB128_reader())) - - -def SLEB128(name): - """ A construct creator for SLEB128 encoding. - """ - return Rename(name, _SLEB128Adapter(_LEB128_reader())) + shift = 0 + while True: + data = stream.read(1) + if len(data) != 1: + raise FieldError("unexpected end of stream while parsing a SLEB128 encoded value") + b = data[0] + value |= (b & 0x7F) << shift + shift += 7 + if b & 0x80 == 0: + return value | (~0 << shift) if b & 0x40 else value class StreamOffset(Construct): """ diff --git a/elftools/dwarf/abbrevtable.py b/elftools/dwarf/abbrevtable.py index 6d29d5cf..060de775 100644 --- a/elftools/dwarf/abbrevtable.py +++ b/elftools/dwarf/abbrevtable.py @@ -12,6 +12,7 @@ class AbbrevTable(object): """ Represents a DWARF abbreviation table. """ + __slots__ = ('structs', 'stream', 'offset', '_abbrev_map') def __init__(self, structs, stream, offset): """ Create new abbreviation table. Parses the actual table from the stream and stores it internally. @@ -42,7 +43,7 @@ def _parse_abbrev_table(self): self.stream.seek(self.offset) while True: decl_code = struct_parse( - struct=self.structs.Dwarf_uleb128(''), + struct=self.structs.the_Dwarf_uleb128, stream=self.stream) if decl_code == 0: break @@ -59,14 +60,14 @@ class AbbrevDecl(object): The abbreviation declaration represents an "entry" that points to it. """ + __slots__ = ('code', 'decl', '_has_children') def __init__(self, code, decl): self.code = code self.decl = decl + self._has_children = decl['children_flag'] == 'DW_CHILDREN_yes' def has_children(self): - """ Does the entry have children? - """ - return self['children_flag'] == 'DW_CHILDREN_yes' + return self._has_children def iter_attr_specs(self): """ Iterate over the attribute specifications for the entry. Yield diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index 0e071170..8fa9990f 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -90,7 +90,7 @@ def _parse_entry_at(self, offset): return self._entry_cache[offset] entry_length = struct_parse( - self.base_structs.Dwarf_uint32(''), self.stream, offset) + self.base_structs.the_Dwarf_uint32, self.stream, offset) if self.for_eh_frame and entry_length == 0: return ZERO(offset) @@ -104,7 +104,7 @@ def _parse_entry_at(self, offset): # Read the next field to see whether this is a CIE or FDE CIE_id = struct_parse( - entry_structs.Dwarf_offset(''), self.stream) + entry_structs.the_Dwarf_offset, self.stream) if self.for_eh_frame: is_CIE = CIE_id == 0 @@ -184,7 +184,7 @@ def _parse_instructions(self, structs, offset, end_offset): """ instructions = [] while offset < end_offset: - opcode = struct_parse(structs.Dwarf_uint8(''), self.stream, offset) + opcode = struct_parse(structs.the_Dwarf_uint8, self.stream, offset) args = [] primary = opcode & _PRIMARY_MASK @@ -194,7 +194,7 @@ def _parse_instructions(self, structs, offset, end_offset): elif primary == DW_CFA_offset: args = [ primary_arg, - struct_parse(structs.Dwarf_uleb128(''), self.stream)] + struct_parse(structs.the_Dwarf_uleb128, self.stream)] elif primary == DW_CFA_restore: args = [primary_arg] # primary == 0 and real opcode is extended @@ -203,39 +203,39 @@ def _parse_instructions(self, structs, offset, end_offset): args = [] elif opcode == DW_CFA_set_loc: args = [ - struct_parse(structs.Dwarf_target_addr(''), self.stream)] + struct_parse(structs.the_Dwarf_target_addr, self.stream)] elif opcode == DW_CFA_advance_loc1: - args = [struct_parse(structs.Dwarf_uint8(''), self.stream)] + args = [struct_parse(structs.the_Dwarf_uint8, self.stream)] elif opcode == DW_CFA_advance_loc2: - args = [struct_parse(structs.Dwarf_uint16(''), self.stream)] + args = [struct_parse(structs.the_Dwarf_uint16, self.stream)] elif opcode == DW_CFA_advance_loc4: - args = [struct_parse(structs.Dwarf_uint32(''), self.stream)] + args = [struct_parse(structs.the_Dwarf_uint32, self.stream)] elif opcode in (DW_CFA_offset_extended, DW_CFA_register, DW_CFA_def_cfa, DW_CFA_val_offset): args = [ - struct_parse(structs.Dwarf_uleb128(''), self.stream), - struct_parse(structs.Dwarf_uleb128(''), self.stream)] + struct_parse(structs.the_Dwarf_uleb128, self.stream), + struct_parse(structs.the_Dwarf_uleb128, self.stream)] elif opcode in (DW_CFA_restore_extended, DW_CFA_undefined, DW_CFA_same_value, DW_CFA_def_cfa_register, DW_CFA_def_cfa_offset): - args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)] + args = [struct_parse(structs.the_Dwarf_uleb128, self.stream)] elif opcode == DW_CFA_def_cfa_offset_sf: - args = [struct_parse(structs.Dwarf_sleb128(''), self.stream)] + args = [struct_parse(structs.the_Dwarf_sleb128, self.stream)] elif opcode == DW_CFA_def_cfa_expression: args = [struct_parse( structs.Dwarf_dw_form['DW_FORM_block'], self.stream)] elif opcode in (DW_CFA_expression, DW_CFA_val_expression): args = [ - struct_parse(structs.Dwarf_uleb128(''), self.stream), + struct_parse(structs.the_Dwarf_uleb128, self.stream), struct_parse( structs.Dwarf_dw_form['DW_FORM_block'], self.stream)] elif opcode in (DW_CFA_offset_extended_sf, DW_CFA_def_cfa_sf, DW_CFA_val_offset_sf): args = [ - struct_parse(structs.Dwarf_uleb128(''), self.stream), - struct_parse(structs.Dwarf_sleb128(''), self.stream)] + struct_parse(structs.the_Dwarf_uleb128, self.stream), + struct_parse(structs.the_Dwarf_sleb128, self.stream)] elif opcode == DW_CFA_GNU_args_size: - args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)] + args = [struct_parse(structs.the_Dwarf_uleb128, self.stream)] else: dwarf_assert(False, 'Unknown CFI opcode: 0x%x' % opcode) diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 2b18a50e..208e2538 100755 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -9,10 +9,11 @@ from collections import namedtuple, OrderedDict import os -from ..common.exceptions import DWARFError +from ..common.exceptions import DWARFError, ELFParseError from ..common.utils import bytes2str, struct_parse, preserve_stream_pos from .enums import DW_FORM_raw2name from .dwarf_util import _resolve_via_offset_table, _get_base_offset +from ..construct import ConstructError # AttributeValue - describes an attribute value in the DIE: @@ -224,52 +225,56 @@ def _parse_DIE(self): """ Parses the DIE info from the section, based on the abbreviation table of the CU """ - structs = self.cu.structs - - # A DIE begins with the abbreviation code. Read it and use it to - # obtain the abbrev declaration for this DIE. - # Note: here and elsewhere, preserve_stream_pos is used on operations - # that manipulate the stream by reading data from it. - self.abbrev_code = struct_parse( - structs.Dwarf_uleb128(''), self.stream, self.offset) - - # This may be a null entry - if self.abbrev_code == 0: - self.size = self.stream.tell() - self.offset - return - - abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code) - self.tag = abbrev_decl['tag'] - self.has_children = abbrev_decl.has_children() - - # Guided by the attributes listed in the abbreviation declaration, parse - # values from the stream. - for spec in abbrev_decl['attr_spec']: - form = spec.form - name = spec.name - attr_offset = self.stream.tell() - indirection_length = 0 - # Special case here: the attribute value is stored in the attribute - # definition in the abbreviation spec, not in the DIE itself. - if form == 'DW_FORM_implicit_const': - value = spec.value - raw_value = value - # Another special case: the attribute value is a form code followed by the real value in that form - elif form == 'DW_FORM_indirect': - (form, raw_value, indirection_length) = self._resolve_indirect() - value = self._translate_attr_value(form, raw_value) - else: - raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream) - value = self._translate_attr_value(form, raw_value) - self.attributes[name] = AttributeValue( - name=name, - form=form, - value=value, - raw_value=raw_value, - offset=attr_offset, - indirection_length = indirection_length) - - self.size = self.stream.tell() - self.offset + try: + structs = self.cu.structs + stream = self.stream + + # A DIE begins with the abbreviation code. Read it and use it to + # obtain the abbrev declaration for this DIE. + # Note: here and elsewhere, preserve_stream_pos is used on operations + # that manipulate the stream by reading data from it. + stream.seek(self.offset) + self.abbrev_code = structs.the_Dwarf_uleb128.parse_stream(stream) + + # This may be a null entry + if self.abbrev_code == 0: + self.size = stream.tell() - self.offset + return + + abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code) + self.tag = abbrev_decl['tag'] + self.has_children = abbrev_decl.has_children() + + # Guided by the attributes listed in the abbreviation declaration, parse + # values from the stream. + for spec in abbrev_decl['attr_spec']: + form = spec.form + name = spec.name + attr_offset = stream.tell() + indirection_length = 0 + # Special case here: the attribute value is stored in the attribute + # definition in the abbreviation spec, not in the DIE itself. + if form == 'DW_FORM_implicit_const': + value = spec.value + raw_value = value + # Another special case: the attribute value is a form code followed by the real value in that form + elif form == 'DW_FORM_indirect': + (form, raw_value, indirection_length) = self._resolve_indirect() + value = self._translate_attr_value(form, raw_value) + else: + raw_value = structs.Dwarf_dw_form[form].parse_stream(stream) + value = self._translate_attr_value(form, raw_value) + self.attributes[name] = AttributeValue( + name=name, + form=form, + value=value, + raw_value=raw_value, + offset=attr_offset, + indirection_length = indirection_length) + + self.size = stream.tell() - self.offset + except ConstructError as e: + raise ELFParseError(str(e)) def _resolve_indirect(self): # Supports arbitrary indirection nesting (the standard doesn't prohibit that). @@ -277,7 +282,7 @@ def _resolve_indirect(self): # Returns (form, raw_value, length). structs = self.cu.structs length = 1 - real_form_code = struct_parse(structs.Dwarf_uleb128(''), self.stream) # Numeric form code + real_form_code = struct_parse(structs.the_Dwarf_uleb128, self.stream) # Numeric form code while True: try: real_form = DW_FORM_raw2name[real_form_code] # Form name or exception if bogus code @@ -302,39 +307,30 @@ def _translate_attr_value(self, form, raw_value): # This breaks if there is an indirect encoding in the top DIE itself before the # corresponding _base, and it was seen in the wild. # There is a hook in get_top_DIE() to resolve those lazily. - translate_indirect = self.cu.has_top_DIE() or self.offset != self.cu.cu_die_offset - value = None + translate_indirect = self.cu.has_top_DIE() or self.offset != self.cu.cu_die_offset if form == 'DW_FORM_strp': - with preserve_stream_pos(self.stream): - value = self.dwarfinfo.get_string_from_table(raw_value) + return self.dwarfinfo.get_string_from_table(raw_value) elif form == 'DW_FORM_line_strp': - with preserve_stream_pos(self.stream): - value = self.dwarfinfo.get_string_from_linetable(raw_value) - elif form in ('DW_FORM_GNU_strp_alt', 'DW_FORM_strp_sup'): - if self.dwarfinfo.supplementary_dwarfinfo: - return self.dwarfinfo.supplementary_dwarfinfo.get_string_from_table(raw_value) - else: - value = raw_value + return self.dwarfinfo.get_string_from_linetable(raw_value) + elif form in ('DW_FORM_GNU_strp_alt', 'DW_FORM_strp_sup') and self.dwarfinfo.supplementary_dwarfinfo: + return self.dwarfinfo.supplementary_dwarfinfo.get_string_from_table(raw_value) elif form == 'DW_FORM_flag': - value = not raw_value == 0 + return not raw_value == 0 elif form == 'DW_FORM_flag_present': - value = True + return True elif form in ('DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4') and translate_indirect: - value = self.cu.dwarfinfo.get_addr(self.cu, raw_value) + return self.cu.dwarfinfo.get_addr(self.cu, raw_value) elif form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4') and translate_indirect: stream = self.dwarfinfo.debug_str_offsets_sec.stream base_offset = _get_base_offset(self.cu, 'DW_AT_str_offsets_base') offset_size = 4 if self.cu.structs.dwarf_format == 32 else 8 - with preserve_stream_pos(stream): - str_offset = struct_parse(self.cu.structs.Dwarf_offset(''), stream, base_offset + raw_value*offset_size) - value = self.dwarfinfo.get_string_from_table(str_offset) + str_offset = struct_parse(self.cu.structs.the_Dwarf_offset, stream, base_offset + raw_value*offset_size) + return self.dwarfinfo.get_string_from_table(str_offset) elif form == 'DW_FORM_loclistx' and translate_indirect: - value = _resolve_via_offset_table(self.dwarfinfo.debug_loclists_sec.stream, self.cu, raw_value, 'DW_AT_loclists_base') + return _resolve_via_offset_table(self.dwarfinfo.debug_loclists_sec.stream, self.cu, raw_value, 'DW_AT_loclists_base') elif form == 'DW_FORM_rnglistx' and translate_indirect: - value = _resolve_via_offset_table(self.dwarfinfo.debug_rnglists_sec.stream, self.cu, raw_value, 'DW_AT_rnglists_base') - else: - value = raw_value - return value + return _resolve_via_offset_table(self.dwarfinfo.debug_rnglists_sec.stream, self.cu, raw_value, 'DW_AT_rnglists_base') + return raw_value def _translate_indirect_attributes(self): """ This is a hook to translate the DW_FORM_...x values in the top DIE @@ -343,8 +339,7 @@ def _translate_indirect_attributes(self): reference to the DW_AT_xxx_base attribute in the same DIE that may not have been parsed yet. """ - for key in self.attributes: - attr = self.attributes[key] + for key, attr in self.attributes.items(): if attr.form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4', 'DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4', 'DW_FORM_loclistx', 'DW_FORM_rnglistx'): diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index f05f739b..a8f07f29 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -174,7 +174,7 @@ def parse_noargs(): return lambda stream: [] def parse_op_addr(): - return lambda stream: [struct_parse(structs.Dwarf_target_addr(''), + return lambda stream: [struct_parse(structs.the_Dwarf_target_addr, stream)] def parse_arg_struct(arg_struct): @@ -187,46 +187,46 @@ def parse_arg_struct2(arg1_struct, arg2_struct): # ULEB128, then an expression of that length def parse_nestedexpr(): def parse(stream): - size = struct_parse(structs.Dwarf_uleb128(''), stream) + size = struct_parse(structs.the_Dwarf_uleb128, stream) nested_expr_blob = read_blob(stream, size) return [DWARFExprParser(structs).parse_expr(nested_expr_blob)] return parse # ULEB128, then a blob of that size def parse_blob(): - return lambda stream: [read_blob(stream, struct_parse(structs.Dwarf_uleb128(''), stream))] + return lambda stream: [read_blob(stream, struct_parse(structs.the_Dwarf_uleb128, stream))] # ULEB128 with datatype DIE offset, then byte, then a blob of that size def parse_typedblob(): - return lambda stream: [struct_parse(structs.Dwarf_uleb128(''), stream), read_blob(stream, struct_parse(structs.Dwarf_uint8(''), stream))] + return lambda stream: [struct_parse(structs.the_Dwarf_uleb128, stream), read_blob(stream, struct_parse(structs.the_Dwarf_uint8, stream))] # https://yurydelendik.github.io/webassembly-dwarf/ # Byte, then variant: 0, 1, 2 => uleb128, 3 => uint32 def parse_wasmloc(): def parse(stream): - op = struct_parse(structs.Dwarf_uint8(''), stream) + op = struct_parse(structs.the_Dwarf_uint8, stream) if 0 <= op <= 2: - return [op, struct_parse(structs.Dwarf_uleb128(''), stream)] + return [op, struct_parse(structs.the_Dwarf_uleb128, stream)] elif op == 3: - return [op, struct_parse(structs.Dwarf_uint32(''), stream)] + return [op, struct_parse(structs.the_Dwarf_uint32, stream)] else: raise DWARFError("Unknown operation code in DW_OP_WASM_location: %d" % (op,)) return parse add('DW_OP_addr', parse_op_addr()) - add('DW_OP_addrx', parse_arg_struct(structs.Dwarf_uleb128(''))) - add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8(''))) + add('DW_OP_addrx', parse_arg_struct(structs.the_Dwarf_uleb128)) + add('DW_OP_const1u', parse_arg_struct(structs.the_Dwarf_uint8)) add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8(''))) - add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16(''))) + add('DW_OP_const2u', parse_arg_struct(structs.the_Dwarf_uint16)) add('DW_OP_const2s', parse_arg_struct(structs.Dwarf_int16(''))) - add('DW_OP_const4u', parse_arg_struct(structs.Dwarf_uint32(''))) + add('DW_OP_const4u', parse_arg_struct(structs.the_Dwarf_uint32)) add('DW_OP_const4s', parse_arg_struct(structs.Dwarf_int32(''))) add('DW_OP_const8u', parse_arg_struct(structs.Dwarf_uint64(''))) add('DW_OP_const8s', parse_arg_struct(structs.Dwarf_int64(''))) - add('DW_OP_constu', parse_arg_struct(structs.Dwarf_uleb128(''))) - add('DW_OP_consts', parse_arg_struct(structs.Dwarf_sleb128(''))) - add('DW_OP_pick', parse_arg_struct(structs.Dwarf_uint8(''))) - add('DW_OP_plus_uconst', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_constu', parse_arg_struct(structs.the_Dwarf_uleb128)) + add('DW_OP_consts', parse_arg_struct(structs.the_Dwarf_sleb128)) + add('DW_OP_pick', parse_arg_struct(structs.the_Dwarf_uint8)) + add('DW_OP_plus_uconst', parse_arg_struct(structs.the_Dwarf_uleb128)) add('DW_OP_bra', parse_arg_struct(structs.Dwarf_int16(''))) add('DW_OP_skip', parse_arg_struct(structs.Dwarf_int16(''))) @@ -245,40 +245,40 @@ def parse(stream): for n in range(0, 32): add('DW_OP_lit%s' % n, parse_noargs()) add('DW_OP_reg%s' % n, parse_noargs()) - add('DW_OP_breg%s' % n, parse_arg_struct(structs.Dwarf_sleb128(''))) - - add('DW_OP_fbreg', parse_arg_struct(structs.Dwarf_sleb128(''))) - add('DW_OP_regx', parse_arg_struct(structs.Dwarf_uleb128(''))) - add('DW_OP_bregx', parse_arg_struct2(structs.Dwarf_uleb128(''), - structs.Dwarf_sleb128(''))) - add('DW_OP_piece', parse_arg_struct(structs.Dwarf_uleb128(''))) - add('DW_OP_bit_piece', parse_arg_struct2(structs.Dwarf_uleb128(''), - structs.Dwarf_uleb128(''))) + add('DW_OP_breg%s' % n, parse_arg_struct(structs.the_Dwarf_sleb128)) + + add('DW_OP_fbreg', parse_arg_struct(structs.the_Dwarf_sleb128)) + add('DW_OP_regx', parse_arg_struct(structs.the_Dwarf_uleb128)) + add('DW_OP_bregx', parse_arg_struct2(structs.the_Dwarf_uleb128, + structs.the_Dwarf_sleb128)) + add('DW_OP_piece', parse_arg_struct(structs.the_Dwarf_uleb128)) + add('DW_OP_bit_piece', parse_arg_struct2(structs.the_Dwarf_uleb128, + structs.the_Dwarf_uleb128)) add('DW_OP_deref_size', parse_arg_struct(structs.Dwarf_int8(''))) add('DW_OP_xderef_size', parse_arg_struct(structs.Dwarf_int8(''))) - add('DW_OP_call2', parse_arg_struct(structs.Dwarf_uint16(''))) - add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32(''))) - add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset(''))) + add('DW_OP_call2', parse_arg_struct(structs.the_Dwarf_uint16)) + add('DW_OP_call4', parse_arg_struct(structs.the_Dwarf_uint32)) + add('DW_OP_call_ref', parse_arg_struct(structs.the_Dwarf_offset)) add('DW_OP_implicit_value', parse_blob()) add('DW_OP_entry_value', parse_nestedexpr()) add('DW_OP_const_type', parse_typedblob()) - add('DW_OP_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''), - structs.Dwarf_uleb128(''))) - add('DW_OP_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''), - structs.Dwarf_uleb128(''))) - add('DW_OP_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''), - structs.Dwarf_sleb128(''))) - add('DW_OP_convert', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_regval_type', parse_arg_struct2(structs.the_Dwarf_uleb128, + structs.the_Dwarf_uleb128)) + add('DW_OP_deref_type', parse_arg_struct2(structs.the_Dwarf_uint8, + structs.the_Dwarf_uleb128)) + add('DW_OP_implicit_pointer', parse_arg_struct2(structs.the_Dwarf_offset, + structs.the_Dwarf_sleb128)) + add('DW_OP_convert', parse_arg_struct(structs.the_Dwarf_uleb128)) add('DW_OP_GNU_entry_value', parse_nestedexpr()) add('DW_OP_GNU_const_type', parse_typedblob()) - add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''), - structs.Dwarf_uleb128(''))) - add('DW_OP_GNU_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''), - structs.Dwarf_uleb128(''))) - add('DW_OP_GNU_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''), - structs.Dwarf_sleb128(''))) - add('DW_OP_GNU_parameter_ref', parse_arg_struct(structs.Dwarf_offset(''))) - add('DW_OP_GNU_convert', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.the_Dwarf_uleb128, + structs.the_Dwarf_uleb128)) + add('DW_OP_GNU_deref_type', parse_arg_struct2(structs.the_Dwarf_uint8, + structs.the_Dwarf_uleb128)) + add('DW_OP_GNU_implicit_pointer', parse_arg_struct2(structs.the_Dwarf_offset, + structs.the_Dwarf_sleb128)) + add('DW_OP_GNU_parameter_ref', parse_arg_struct(structs.the_Dwarf_offset)) + add('DW_OP_GNU_convert', parse_arg_struct(structs.the_Dwarf_uleb128)) add('DW_OP_WASM_location', parse_wasmloc()) return table diff --git a/elftools/dwarf/dwarf_util.py b/elftools/dwarf/dwarf_util.py index 3dc5681d..811cc0b0 100644 --- a/elftools/dwarf/dwarf_util.py +++ b/elftools/dwarf/dwarf_util.py @@ -38,7 +38,7 @@ def _resolve_via_offset_table(stream, cu, index, base_attribute_name): offset_size = 4 if cu.structs.dwarf_format == 32 else 8 with preserve_stream_pos(stream): - return base_offset + struct_parse(cu.structs.Dwarf_offset(''), stream, base_offset + index*offset_size) + return base_offset + struct_parse(cu.structs.the_Dwarf_offset, stream, base_offset + index*offset_size) def _iter_CUs_in_section(stream, structs, parser): """Iterates through the list of CU sections in loclists or rangelists. Almost identical structures there. diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 40614628..f3ad369b 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -393,7 +393,7 @@ def get_addr(self, cu, addr_index): raise DWARFError('The file does not contain a debug_addr section for indirect address access') # Selectors are not supported, but no assert on that. TODO? cu_addr_base = _get_base_offset(cu, 'DW_AT_addr_base') - return struct_parse(cu.structs.Dwarf_target_addr(''), self.debug_addr_sec.stream, cu_addr_base + addr_index*cu.header.address_size) + return struct_parse(cu.structs.the_Dwarf_target_addr, self.debug_addr_sec.stream, cu_addr_base + addr_index*cu.header.address_size) #------ PRIVATE ------# @@ -457,7 +457,7 @@ def _parse_CU_at_offset(self, offset): # instance suitable for this CU and use it to parse the rest. # initial_length = struct_parse( - self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset) + self.structs.the_Dwarf_uint32, self.debug_info_sec.stream, offset) dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index dbde7baf..ea680f6d 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -144,7 +144,7 @@ def add_entry_old_state(cmd, args, is_extended=False): offset = self.program_start_offset while offset < self.program_end_offset: opcode = struct_parse( - self.structs.Dwarf_uint8(''), + self.structs.the_Dwarf_uint8, self.stream, offset) @@ -171,9 +171,9 @@ def add_entry_old_state(cmd, args, is_extended=False): elif opcode == 0: # Extended opcode: start with a zero byte, followed by # instruction size and the instruction itself. - inst_len = struct_parse(self.structs.Dwarf_uleb128(''), + inst_len = struct_parse(self.structs.the_Dwarf_uleb128, self.stream) - ex_opcode = struct_parse(self.structs.Dwarf_uint8(''), + ex_opcode = struct_parse(self.structs.the_Dwarf_uint8, self.stream) if ex_opcode == DW_LNE_end_sequence: @@ -183,7 +183,7 @@ def add_entry_old_state(cmd, args, is_extended=False): # reset state state = LineState(self.header['default_is_stmt']) elif ex_opcode == DW_LNE_set_address: - operand = struct_parse(self.structs.Dwarf_target_addr(''), + operand = struct_parse(self.structs.the_Dwarf_target_addr, self.stream) state.address = operand add_entry_old_state(ex_opcode, [operand], is_extended=True) @@ -193,7 +193,7 @@ def add_entry_old_state(cmd, args, is_extended=False): self['file_entry'].append(operand) add_entry_old_state(ex_opcode, [operand], is_extended=True) elif ex_opcode == DW_LNE_set_discriminator: - operand = struct_parse(self.structs.Dwarf_uleb128(''), + operand = struct_parse(self.structs.the_Dwarf_uleb128, self.stream) state.discriminator = operand else: @@ -207,23 +207,23 @@ def add_entry_old_state(cmd, args, is_extended=False): if opcode == DW_LNS_copy: add_entry_new_state(opcode, []) elif opcode == DW_LNS_advance_pc: - operand = struct_parse(self.structs.Dwarf_uleb128(''), + operand = struct_parse(self.structs.the_Dwarf_uleb128, self.stream) address_addend = ( operand * self.header['minimum_instruction_length']) state.address += address_addend add_entry_old_state(opcode, [address_addend]) elif opcode == DW_LNS_advance_line: - operand = struct_parse(self.structs.Dwarf_sleb128(''), + operand = struct_parse(self.structs.the_Dwarf_sleb128, self.stream) state.line += operand elif opcode == DW_LNS_set_file: - operand = struct_parse(self.structs.Dwarf_uleb128(''), + operand = struct_parse(self.structs.the_Dwarf_uleb128, self.stream) state.file = operand add_entry_old_state(opcode, [operand]) elif opcode == DW_LNS_set_column: - operand = struct_parse(self.structs.Dwarf_uleb128(''), + operand = struct_parse(self.structs.the_Dwarf_uleb128, self.stream) state.column = operand add_entry_old_state(opcode, [operand]) @@ -240,7 +240,7 @@ def add_entry_old_state(cmd, args, is_extended=False): state.address += address_addend add_entry_old_state(opcode, [address_addend]) elif opcode == DW_LNS_fixed_advance_pc: - operand = struct_parse(self.structs.Dwarf_uint16(''), + operand = struct_parse(self.structs.the_Dwarf_uint16, self.stream) state.address += operand add_entry_old_state(opcode, [operand]) @@ -251,7 +251,7 @@ def add_entry_old_state(cmd, args, is_extended=False): state.epilogue_begin = True add_entry_old_state(opcode, []) elif opcode == DW_LNS_set_isa: - operand = struct_parse(self.structs.Dwarf_uleb128(''), + operand = struct_parse(self.structs.the_Dwarf_uleb128, self.stream) state.isa = operand add_entry_old_state(opcode, [operand]) diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index ffdfde8b..d7abbfb6 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -207,9 +207,9 @@ def _parse_location_list_from_stream(self): while True: entry_offset = self.stream.tell() begin_offset = struct_parse( - self.structs.Dwarf_target_addr(''), self.stream) + self.structs.the_Dwarf_target_addr, self.stream) end_offset = struct_parse( - self.structs.Dwarf_target_addr(''), self.stream) + self.structs.the_Dwarf_target_addr, self.stream) if begin_offset == 0 and end_offset == 0: # End of list - we're done. break @@ -220,8 +220,8 @@ def _parse_location_list_from_stream(self): else: # Location list entry expr_len = struct_parse( - self.structs.Dwarf_uint16(''), self.stream) - loc_expr = [struct_parse(self.structs.Dwarf_uint8(''), + self.structs.the_Dwarf_uint16, self.stream) + loc_expr = [struct_parse(self.structs.the_Dwarf_uint8, self.stream) for i in range(expr_len)] entry_length = self.stream.tell() - entry_offset diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index 4491918f..04d4bc59 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -179,9 +179,9 @@ def _parse_range_list_from_stream(self, cu): while True: entry_offset = self.stream.tell() begin_offset = struct_parse( - self.structs.Dwarf_target_addr(''), self.stream) + self.structs.the_Dwarf_target_addr, self.stream) end_offset = struct_parse( - self.structs.Dwarf_target_addr(''), self.stream) + self.structs.the_Dwarf_target_addr, self.stream) if begin_offset == 0 and end_offset == 0: # End of list - we're done. break diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index bad6a8cc..df6075e8 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -147,6 +147,16 @@ def _create_structs(self): self.Dwarf_int32 = SBInt32 self.Dwarf_int64 = SBInt64 + # Only instantiate those parsers that are used standalone, + # as opposed to dispatch tables (e. g. forms, opcodes). + # In dispatch tables, they are instantiated already. + # LEB128 parsers are instantiated too, elsewhere. + self.the_Dwarf_offset = self.Dwarf_offset('') + self.the_Dwarf_target_addr = self.Dwarf_target_addr('') + self.the_Dwarf_uint32 = self.Dwarf_uint32('') + self.the_Dwarf_uint16 = self.Dwarf_uint16('') + self.the_Dwarf_uint8 = self.Dwarf_uint8('') + self._create_initial_length() self._create_leb128() self._create_cu_header() @@ -180,6 +190,8 @@ def _InitialLength(name): def _create_leb128(self): self.Dwarf_uleb128 = ULEB128 self.Dwarf_sleb128 = SLEB128 + self.the_Dwarf_uleb128 = self.Dwarf_uleb128('') + self.the_Dwarf_sleb128 = self.Dwarf_sleb128('') def _create_cu_header(self): dwarfv4_CU_header = Struct('', @@ -251,12 +263,12 @@ def _create_gnu_debugaltlink(self): def _create_dw_form(self): self.Dwarf_dw_form = dict( - DW_FORM_addr=self.Dwarf_target_addr(''), - DW_FORM_addrx=self.Dwarf_uleb128(''), - DW_FORM_addrx1=self.Dwarf_uint8(''), - DW_FORM_addrx2=self.Dwarf_uint16(''), + DW_FORM_addr=self.the_Dwarf_target_addr, + DW_FORM_addrx=self.the_Dwarf_uleb128, + DW_FORM_addrx1=self.the_Dwarf_uint8, + DW_FORM_addrx2=self.the_Dwarf_uint16, DW_FORM_addrx3=self.Dwarf_uint24(''), - DW_FORM_addrx4=self.Dwarf_uint32(''), + DW_FORM_addrx4=self.the_Dwarf_uint32, DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8), DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16), @@ -264,49 +276,52 @@ def _create_dw_form(self): DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128), # All DW_FORM_data forms are assumed to be unsigned - DW_FORM_data1=self.Dwarf_uint8(''), - DW_FORM_data2=self.Dwarf_uint16(''), - DW_FORM_data4=self.Dwarf_uint32(''), + DW_FORM_data1=self.the_Dwarf_uint8, + DW_FORM_data2=self.the_Dwarf_uint16, + DW_FORM_data4=self.the_Dwarf_uint32, DW_FORM_data8=self.Dwarf_uint64(''), - DW_FORM_data16=Array(16, self.Dwarf_uint8('')), # Used for hashes and such, not for integers - DW_FORM_sdata=self.Dwarf_sleb128(''), - DW_FORM_udata=self.Dwarf_uleb128(''), + DW_FORM_data16=Array(16, self.the_Dwarf_uint8), # Used for hashes and such, not for integers + DW_FORM_sdata=self.the_Dwarf_sleb128, + DW_FORM_udata=self.the_Dwarf_uleb128, DW_FORM_string=CString(''), - DW_FORM_strp=self.Dwarf_offset(''), - DW_FORM_strp_sup=self.Dwarf_offset(''), - DW_FORM_line_strp=self.Dwarf_offset(''), - DW_FORM_strx1=self.Dwarf_uint8(''), - DW_FORM_strx2=self.Dwarf_uint16(''), + DW_FORM_strp=self.the_Dwarf_offset, + DW_FORM_strp_sup=self.the_Dwarf_offset, + DW_FORM_line_strp=self.the_Dwarf_offset, + DW_FORM_strx1=self.the_Dwarf_uint8, + DW_FORM_strx2=self.the_Dwarf_uint16, DW_FORM_strx3=self.Dwarf_uint24(''), DW_FORM_strx4=self.Dwarf_uint64(''), - DW_FORM_flag=self.Dwarf_uint8(''), + DW_FORM_flag=self.the_Dwarf_uint8, - DW_FORM_ref=self.Dwarf_uint32(''), - DW_FORM_ref1=self.Dwarf_uint8(''), - DW_FORM_ref2=self.Dwarf_uint16(''), - DW_FORM_ref4=self.Dwarf_uint32(''), - DW_FORM_ref_sup4=self.Dwarf_uint32(''), + DW_FORM_ref=self.the_Dwarf_uint32, + DW_FORM_ref1=self.the_Dwarf_uint8, + DW_FORM_ref2=self.the_Dwarf_uint16, + DW_FORM_ref4=self.the_Dwarf_uint32, + DW_FORM_ref_sup4=self.the_Dwarf_uint32, DW_FORM_ref8=self.Dwarf_uint64(''), DW_FORM_ref_sup8=self.Dwarf_uint64(''), - DW_FORM_ref_udata=self.Dwarf_uleb128(''), - DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''), + DW_FORM_ref_udata=self.the_Dwarf_uleb128, + DW_FORM_ref_addr=self.the_Dwarf_target_addr if self.dwarf_version == 2 else self.the_Dwarf_offset, - DW_FORM_indirect=self.Dwarf_uleb128(''), + DW_FORM_indirect=self.the_Dwarf_uleb128, + + # Treated separatedly while parsing, but here so that all forms resovle + DW_FORM_implicit_const=None, # New forms in DWARFv4 DW_FORM_flag_present = StaticField('', 0), - DW_FORM_sec_offset = self.Dwarf_offset(''), + DW_FORM_sec_offset = self.the_Dwarf_offset, DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128), DW_FORM_ref_sig8 = self.Dwarf_uint64(''), - DW_FORM_GNU_strp_alt=self.Dwarf_offset(''), - DW_FORM_GNU_ref_alt=self.Dwarf_offset(''), - DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''), + DW_FORM_GNU_strp_alt=self.the_Dwarf_offset, + DW_FORM_GNU_ref_alt=self.the_Dwarf_offset, + DW_AT_GNU_all_call_sites=self.the_Dwarf_uleb128, # New forms in DWARFv5 - DW_FORM_loclistx=self.Dwarf_uleb128(''), - DW_FORM_rnglistx=self.Dwarf_uleb128('') + DW_FORM_loclistx=self.the_Dwarf_uleb128, + DW_FORM_rnglistx=self.the_Dwarf_uleb128 ) def _create_aranges_header(self): @@ -479,7 +494,7 @@ def _create_loclists_parsers(self): self.Dwarf_uint32('offset_count'), StreamOffset('offset_table_offset')) - cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128('')) + cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.the_Dwarf_uleb128) self.Dwarf_loclists_entries = RepeatUntilExcluding( lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list',