From c7aa8f5835c152bb7e7da3e3b603365514d1399d Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Thu, 9 Mar 2023 10:04:14 -0800 Subject: [PATCH] python: Allow construction of Symbol objects Previously, Symbol objects could not be constructed in Python. However, in order to allow Python Symbol finders, this needs to be changed. We introduce the concept of "owned" names inside symbols: ideally, a symbol name shares the lifetime of the program associated with it. However, if that's not the case (as with Python symbols), then the symbol itself owns the name, and must free it when the symbol is freed. When the symbol, rather than the program, owns the name buffer, the name_owned field is true. The added complexity is justified by the fact that most symbols are from the ELF file, and thus share a lifetime with the Program. It would be a waste to constantly strdup() these strings, just to support a small number of Symbols created by Python code. Signed-off-by: Stephen Brennan --- libdrgn/python/symbol.c | 67 +++++++++++++++++++++++++++++++++++++++-- libdrgn/symbol.c | 6 ++++ libdrgn/symbol.h | 1 + tests/test_symbol.py | 41 ++++++------------------- 4 files changed, 80 insertions(+), 35 deletions(-) diff --git a/libdrgn/python/symbol.c b/libdrgn/python/symbol.c index ea0d98cb3..0ea395e22 100644 --- a/libdrgn/python/symbol.c +++ b/libdrgn/python/symbol.c @@ -3,23 +3,83 @@ #include +#include "drgn.h" #include "drgnpy.h" +#include "symbol.h" PyObject *Symbol_wrap(struct drgn_symbol *sym, Program *prog) { - Symbol *ret = call_tp_alloc(Symbol); + Symbol *ret; + + if (!sym->name_owned && !prog) { + /* If a symbol does not own its name, then it implicitly + * shares the lifetime of its associate program. So, we + * must have it here. Symbols created via Python (Symbol_new()) + * always own their names. */ + PyErr_SetString(PyExc_ValueError, + "Creating symbol with unowned name," + " without program reference"); + return NULL; + } + ret = call_tp_alloc(Symbol); if (ret) { ret->sym = sym; ret->prog = prog; - Py_INCREF(prog); + if (prog) + Py_INCREF(prog); } return (PyObject *)ret; } +PyObject *Symbol_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) +{ + struct drgn_symbol *sym; + static char *keywords[] = {"name", "address", "size", "binding", "kind", NULL}; + const char *name; + struct index_arg address = {}, size = {}; + struct enum_arg binding = { + .type = SymbolBinding_class, + }; + struct enum_arg kind = { + .type = SymbolKind_class, + }; + + sym = malloc(sizeof(*sym)); + if (!sym) + return set_drgn_error(&drgn_enomem); + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "sO&O&O&O&:Symbol", keywords, + &name, + index_converter, &address, + index_converter, &size, + enum_converter, &binding, + enum_converter, &kind)) { + free(sym); + return NULL; + } + + sym->name_owned = true; + sym->name = strdup(name); + if (!sym->name) { + free(sym); + return set_drgn_error(&drgn_enomem); + } + sym->address = address.uvalue; + sym->size = size.uvalue; + sym->binding = binding.value; + sym->kind = kind.value; + PyObject *ret = Symbol_wrap(sym, NULL); + if (!ret) { + drgn_symbol_destroy(sym); + } + return ret; +} + static void Symbol_dealloc(Symbol *self) { drgn_symbol_destroy(self->sym); - Py_XDECREF(self->prog); + if (self->prog) + Py_DECREF(self->prog); Py_TYPE(self)->tp_free((PyObject *)self); } @@ -100,4 +160,5 @@ PyTypeObject Symbol_type = { .tp_doc = drgn_Symbol_DOC, .tp_richcompare = (richcmpfunc)Symbol_richcompare, .tp_getset = Symbol_getset, + .tp_new = Symbol_new, }; diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index f2ac81670..876b3ad34 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -11,6 +11,11 @@ LIBDRGN_PUBLIC void drgn_symbol_destroy(struct drgn_symbol *sym) { + if (sym && sym->name_owned) + /* Cast here is necessary - we want symbol users to + * never modify sym->name, but when we own the name, + * we must modify it by freeing it. */ + free((char *)sym->name); free(sym); } @@ -26,6 +31,7 @@ void drgn_symbol_from_elf(const char *name, uint64_t address, const GElf_Sym *elf_sym, struct drgn_symbol *ret) { ret->name = name; + ret->name_owned = false; ret->address = address; ret->size = elf_sym->st_size; int binding = GELF_ST_BIND(elf_sym->st_info); diff --git a/libdrgn/symbol.h b/libdrgn/symbol.h index 0fb62fb5c..bc89ab7b7 100644 --- a/libdrgn/symbol.h +++ b/libdrgn/symbol.h @@ -16,6 +16,7 @@ struct drgn_symbol { uint64_t size; enum drgn_symbol_binding binding; enum drgn_symbol_kind kind; + bool name_owned; }; struct drgn_symbol_finder { diff --git a/tests/test_symbol.py b/tests/test_symbol.py index 3ff949325..a8a55d21a 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -1,9 +1,8 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later import tempfile -from typing import NamedTuple -from drgn import Program, SymbolBinding, SymbolKind +from drgn import Program, Symbol, SymbolBinding, SymbolKind from tests import TestCase from tests.dwarfwriter import dwarf_sections from tests.elf import ET, PT, SHT, STB, STT @@ -45,35 +44,13 @@ def elf_symbol_program(*modules): return prog -# We don't want to support creating drgn.Symbol instances yet, so use this dumb -# class for testing. -class Symbol(NamedTuple): - name: str - address: int - size: int - binding: SymbolBinding - kind: SymbolKind - - class TestElfSymbol(TestCase): - def assert_symbol_equal(self, drgn_symbol, symbol): - self.assertEqual( - Symbol( - drgn_symbol.name, - drgn_symbol.address, - drgn_symbol.size, - drgn_symbol.binding, - drgn_symbol.kind, - ), - symbol, - ) - def assert_symbols_equal_unordered(self, drgn_symbols, symbols): self.assertEqual(len(drgn_symbols), len(symbols)) drgn_symbols = sorted(drgn_symbols, key=lambda x: (x.address, x.name)) symbols = sorted(symbols, key=lambda x: (x.address, x.name)) for drgn_symbol, symbol in zip(drgn_symbols, symbols): - self.assert_symbol_equal(drgn_symbol, symbol) + self.assertEqual(drgn_symbol, symbol) def test_by_address(self): elf_first = ElfSymbol("first", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL) @@ -91,13 +68,13 @@ def test_by_address(self): prog = elf_symbol_program(*modules) self.assertRaises(LookupError, prog.symbol, 0xFFFEFFFF) self.assertEqual(prog.symbols(0xFFFEFFFF), []) - self.assert_symbol_equal(prog.symbol(0xFFFF0000), first) + self.assertEqual(prog.symbol(0xFFFF0000), first) self.assert_symbols_equal_unordered(prog.symbols(0xFFFF0000), [first]) - self.assert_symbol_equal(prog.symbol(0xFFFF0004), first) + self.assertEqual(prog.symbol(0xFFFF0004), first) self.assert_symbols_equal_unordered(prog.symbols(0xFFFF0004), [first]) - self.assert_symbol_equal(prog.symbol(0xFFFF0008), second) + self.assertEqual(prog.symbol(0xFFFF0008), second) self.assert_symbols_equal_unordered(prog.symbols(0xFFFF0008), [second]) - self.assert_symbol_equal(prog.symbol(0xFFFF000C), second) + self.assertEqual(prog.symbol(0xFFFF000C), second) self.assert_symbols_equal_unordered(prog.symbols(0xFFFF000C), [second]) self.assertRaises(LookupError, prog.symbol, 0xFFFF0010) @@ -171,8 +148,8 @@ def test_by_name(self): for modules in same_module, different_modules: with self.subTest(modules=len(modules)): prog = elf_symbol_program(*modules) - self.assert_symbol_equal(prog.symbol("first"), first) - self.assert_symbol_equal(prog.symbol("second"), second) + self.assertEqual(prog.symbol("first"), first) + self.assertEqual(prog.symbol("second"), second) self.assertRaises(LookupError, prog.symbol, "third") self.assert_symbols_equal_unordered(prog.symbols("first"), [first]) @@ -258,7 +235,7 @@ def test_kind(self): (ElfSymbol("foo", 0xFFFF0000, 1, elf_type, STB.GLOBAL),) ) symbol = Symbol("foo", 0xFFFF0000, 1, SymbolBinding.GLOBAL, drgn_kind) - self.assert_symbol_equal(prog.symbol("foo"), symbol) + self.assertEqual(prog.symbol("foo"), symbol) symbols = prog.symbols("foo") self.assert_symbols_equal_unordered(symbols, [symbol])