Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CIEv4 and FDE ahead of its CIE #563

Merged
merged 2 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 13 additions & 15 deletions elftools/dwarf/callframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Eli Bendersky ([email protected])
# This code is in the public domain
#-------------------------------------------------------------------------------
import copy
import copy, os
from collections import namedtuple
from ..common.utils import (
struct_parse, dwarf_assert, preserve_stream_pos, iterbytes)
Expand Down Expand Up @@ -84,10 +84,13 @@ def _parse_entries(self):
def _parse_entry_at(self, offset):
""" Parse an entry from self.stream starting with the given offset.
Return the entry object. self.stream will point right after the
entry.
entry (even if pulled from the cache).
"""
if offset in self._entry_cache:
return self._entry_cache[offset]
entry = self._entry_cache[offset]
self.stream.seek(entry.header.length +
entry.structs.initial_length_field_size(), os.SEEK_CUR)
return entry

entry_length = struct_parse(
self.base_structs.the_Dwarf_uint32, self.stream, offset)
Expand All @@ -97,6 +100,9 @@ def _parse_entry_at(self, offset):

dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32

# Theoretically possible to have a DWARF bitness transition here.
# DWARF version doesn't matter (CIEs are versioned separately), endianness can't change.
# The structs are cached though, so no extraneous creation.
entry_structs = DWARFStructs(
little_endian=self.base_structs.little_endian,
dwarf_format=dwarf_format,
Expand Down Expand Up @@ -124,15 +130,6 @@ def _parse_entry_at(self, offset):
else:
header = self._parse_fde_header(entry_structs, offset)


# If this is DWARF version 4 or later, we can have a more precise
# address size, read from the CIE header.
if not self.for_eh_frame and entry_structs.dwarf_version >= 4:
entry_structs = DWARFStructs(
little_endian=entry_structs.little_endian,
dwarf_format=entry_structs.dwarf_format,
address_size=header.address_size)

# If the augmentation string is not empty, hope to find a length field
# in order to skip the data specified augmentation.
if is_CIE:
Expand Down Expand Up @@ -161,21 +158,22 @@ def _parse_entry_at(self, offset):
entry_structs, self.stream.tell(), end_offset)

if is_CIE:
self._entry_cache[offset] = CIE(
entry = CIE(
header=header, instructions=instructions, offset=offset,
augmentation_dict=aug_dict,
augmentation_bytes=aug_bytes,
structs=entry_structs)

else: # FDE
cie = self._parse_cie_for_fde(offset, header, entry_structs)
self._entry_cache[offset] = FDE(
entry = FDE(
header=header, instructions=instructions, offset=offset,
structs=entry_structs, cie=cie,
augmentation_bytes=aug_bytes,
lsda_pointer=lsda_pointer,
)
return self._entry_cache[offset]
self._entry_cache[offset] = entry
return entry

def _parse_instructions(self, structs, offset, end_offset):
""" Parse a list of CFI instructions from self.stream, starting with
Expand Down
6 changes: 5 additions & 1 deletion elftools/dwarf/descriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ def _full_reg_name(regnum):
instr.args[1] * cie['data_alignment_factor'])
elif name in ('DW_CFA_def_cfa_offset', 'DW_CFA_GNU_args_size'):
s += ' %s: %s\n' % (name, instr.args[0])
elif name == 'DW_CFA_def_cfa_offset_sf':
s += ' %s: %s\n' % (name, instr.args[0]*entry.cie['data_alignment_factor'])
elif name == 'DW_CFA_def_cfa_expression':
expr_dumper = ExprDumper(entry.structs)
# readelf output is missing a colon for DW_CFA_def_cfa_expression
Expand Down Expand Up @@ -618,7 +620,7 @@ def _init_lookups(self):
for n in range(0, 32):
self._ops_with_decimal_arg.add('DW_OP_breg%s' % n)

self._ops_with_two_decimal_args = set(['DW_OP_bregx', 'DW_OP_bit_piece'])
self._ops_with_two_decimal_args = set(['DW_OP_bregx'])

self._ops_with_hex_arg = set(
['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref'])
Expand Down Expand Up @@ -674,5 +676,7 @@ def _dump_to_string(self, opcode, opcode_name, args, cu_offset=None):
return "%s: <0x%x> %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1]))
elif opcode_name in ('DW_OP_GNU_regval_type', 'DW_OP_regval_type'):
return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset)
elif opcode_name == 'DW_OP_bit_piece':
return '%s: size: %s offset: %s' % (opcode_name, args[0], args[1])
else:
return '<unknown %s>' % opcode_name
22 changes: 9 additions & 13 deletions elftools/dwarf/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,23 +449,19 @@ def _create_callframe_entry_headers(self):
self.Dwarf_offset('CIE_id'),
self.Dwarf_uint8('version'),
CString('augmentation'),
If(lambda ctx: ctx.version >= 4, self.Dwarf_uint8('address_size')),
If(lambda ctx: ctx.version >= 4, self.Dwarf_uint8('segment_size')),
self.Dwarf_uleb128('code_alignment_factor'),
self.Dwarf_sleb128('data_alignment_factor'),
self.Dwarf_uleb128('return_address_register'))
IfThenElse('return_address_register', lambda ctx: ctx.version > 1,
self.Dwarf_uleb128(''),
self.Dwarf_uint8('')))
self.EH_CIE_header = self.Dwarf_CIE_header

# The CIE header was modified in DWARFv4.
if self.dwarf_version == 4:
self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
self.Dwarf_initial_length('length'),
self.Dwarf_offset('CIE_id'),
self.Dwarf_uint8('version'),
CString('augmentation'),
self.Dwarf_uint8('address_size'),
self.Dwarf_uint8('segment_size'),
self.Dwarf_uleb128('code_alignment_factor'),
self.Dwarf_sleb128('data_alignment_factor'),
self.Dwarf_uleb128('return_address_register'))
# The CIE header was modified in DWARFv4, but the
# CIE header version is driven by the version # in the header
# itself, independent of the DWARF version
# in the CUs.

self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
self.Dwarf_initial_length('length'),
Expand Down
10 changes: 8 additions & 2 deletions elftools/elf/segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,14 @@ def section_in_segment(self, section):
# The third condition is the 'strict' one - an empty section will
# not match at the very end of the segment (unless the segment is
# also zero size, which is handled by the second condition).

# Seva 2024-07-12: a zero length section at a zero offset
# in a zero length segment should match - in GNU readelf, p_memsz
# is unsigned, on a zero length segment p_memsz-1 wraps around
# and the third condition matches.
if not (secaddr >= vaddr and
secaddr - vaddr + section['sh_size'] <= self['p_memsz'] and
secaddr - vaddr <= self['p_memsz'] - 1):
(self['p_memsz'] == 0 or secaddr - vaddr <= self['p_memsz'] - 1)):
return False

# If we've come this far and it's a NOBITS section, it's in the segment
Expand All @@ -83,9 +88,10 @@ def section_in_segment(self, section):

# Same logic as with secaddr vs. vaddr checks above, just on offsets in
# the file
# Seva 2024-07-12: similar discrepancy with readelf from unsignedness of p_filesz
return (secoffset >= poffset and
secoffset - poffset + section['sh_size'] <= self['p_filesz'] and
secoffset - poffset <= self['p_filesz'] - 1)
(self['p_filesz'] == 0 or secoffset - poffset <= self['p_filesz'] - 1))


class InterpSegment(Segment):
Expand Down
9 changes: 7 additions & 2 deletions scripts/dwarfdump.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ def _safe_DIE_linkage_name(die, default=None):
def _desc_ref(attr, die, extra=''):
if extra:
extra = " \"%s\"" % extra
# TODO: leading zeros on the addend to CU - sometimes present, sometimes not.
# Check by the LLVM sources.
return "cu + 0x%04x => {0x%08x}%s" % (
attr.raw_value,
die.cu.cu_offset + attr.raw_value,
Expand All @@ -99,7 +101,7 @@ def _desc_strx(attr, die):
return "indexed (%08x) string = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\"))

FORM_DESCRIPTIONS = dict(
DW_FORM_string=lambda attr, die: "\"%s\"" % (bytes2str(attr.value),),
DW_FORM_string=lambda attr, die: "\"%s\"" % (bytes2str(attr.value).replace("\\", "\\\\"),),
DW_FORM_strp=lambda attr, die: " .debug_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")),
DW_FORM_strx1=_desc_strx,
DW_FORM_strx2=_desc_strx,
Expand Down Expand Up @@ -391,7 +393,10 @@ def dump_info(self):
'(0x%08x)' % die.get_parent().offset if die.get_parent() is not None else ''))
for attr_name in die.attributes:
attr = die.attributes[attr_name]
self._emitline(" %s [%s] (%s)" % (attr_name, attr.form, self.describe_attr_value(die, attr)))
self._emitline(" %s [%s] (%s)" % (
attr_name if isinstance(attr_name, str) else "DW_AT_unknown_%x" % (attr_name,),
attr.form,
self.describe_attr_value(die, attr)))
else:
self._emitline("0x%08x: NULL" % (die.offset,))
parent = die.get_parent()
Expand Down
11 changes: 9 additions & 2 deletions scripts/readelf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1282,6 +1282,9 @@ def _dump_frames_info(self, section, cfi_entries):
self._format_hex(entry['CIE_id'], fieldsize=8, lead0x=False)))
self._emitline(' Version: %d' % entry['version'])
self._emitline(' Augmentation: "%s"' % bytes2str(entry['augmentation']))
if(entry['version'] >= 4):
self._emitline(' Pointer Size: %d' % entry['address_size'])
self._emitline(' Segment Size: %d' % entry['segment_size'])
self._emitline(' Code alignment factor: %u' % entry['code_alignment_factor'])
self._emitline(' Data alignment factor: %d' % entry['data_alignment_factor'])
self._emitline(' Return address column: %d' % entry['return_address_register'])
Expand All @@ -1293,9 +1296,11 @@ def _dump_frames_info(self, section, cfi_entries):
self._emitline()

elif isinstance(entry, FDE):
# Readelf bug #31973
length = entry['length'] if entry.cie.offset < entry.offset else entry.cie['length']
self._emitline('\n%08x %s %s FDE cie=%08x pc=%s..%s' % (
entry.offset,
self._format_hex(entry['length'], fullhex=True, lead0x=False),
self._format_hex(length, fullhex=True, lead0x=False),
self._format_hex(entry['CIE_pointer'], fieldsize=8, lead0x=False),
entry.cie.offset,
self._format_hex(entry['initial_location'], fullhex=True, lead0x=False),
Expand Down Expand Up @@ -1428,9 +1433,11 @@ def _dump_frames_interp_info(self, section, cfi_entries):
ra_regnum = entry['return_address_register']

elif isinstance(entry, FDE):
# Readelf bug #31973 - FDE length misreported if FDE precedes its CIE
length = entry['length'] if entry.cie.offset < entry.offset else entry.cie['length']
self._emitline('\n%08x %s %s FDE cie=%08x pc=%s..%s' % (
entry.offset,
self._format_hex(entry['length'], fullhex=True, lead0x=False),
self._format_hex(length, fullhex=True, lead0x=False),
self._format_hex(entry['CIE_pointer'], fieldsize=8, lead0x=False),
entry.cie.offset,
self._format_hex(entry['initial_location'], fullhex=True, lead0x=False),
Expand Down
9 changes: 9 additions & 0 deletions test/run_readelf_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ def run_test_on_file(filename, verbose=False, opt=None):
testlog.info('.......................SKIPPED')
continue

# TODO(sevaa): excluding the binary with CIE ahead of FDE until binutils' bug #31975 is fixed
if "dwarf_v4cie" in filename and option == "--debug-dump=frames-interp":
continue

# TODO(sevaa): excluding the binary with unaligned aranges entries. Readelf tried to recover
# but produces nonsensical output, but ultimately it's a toolchain bug (in IAR I presume).
if "dwarf_v4cie" in filename and option == "--debug-dump=aranges":
continue

# sevaa says: there is another shorted out test; in dwarf_lineprogramv5.elf, the two bytes at 0x2072 were
# patched from 0x07 0x10 to 00 00.
# Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction
Expand Down
3 changes: 2 additions & 1 deletion test/test_dwarf_expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def test_basic_single(self):
'DW_OP_regx: 16 (rip)')

self.assertEqual(self.visitor.dump_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]),
'DW_OP_bit_piece: 1295 144')
# Explaining the arguments is what the latest readelf does
'DW_OP_bit_piece: size: 1295 offset: 144')

self.assertEqual(self.visitor.dump_expr([0x0e, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00]),
'DW_OP_const8u: 71777214294589695')
Expand Down
Binary file added test/testfiles_for_readelf/dwarf_v4cie.elf
Binary file not shown.
Loading