Add support for symbol addition to the Python API

Message ID 20230112020021.29-1-dark.ryu.550@gmail.com
State New
Headers
Series Add support for symbol addition to the Python API |

Commit Message

Matheus Branco Borella Jan. 12, 2023, 2 a.m. UTC
  > overlap in the addresses of two compunit_symtabs.  What would functions
> like find_compunit_symtab_by_address return?  Should the new symbol be
> added to an existing compunit_symtab, if the address falls into an
> existing compunit_symtab's address range?

I'm actually not sure, from what I can tell `find_compunit_symtab_by_address`
looks into the qfs, that aren't changed by buildsym_compunit, at least not
from what I can tell. I'm probably wrong though, this part of the code is
still pretty confusing to me.

This patch adds support for symbol creation and registration. It currently
supports adding type symbols (VAR_DOMAIN/LOC_TYPEDEF), static symbols
(VAR_DOMAIN/LOC_STATIC) and goto target labels (LABEL_DOMAIN/LOC_LABEL). It
adds the `add_type_symbol`, `add_static_symbol` and `add_label_symbol`
functions to the `gdb.Objfile` type, allowing for the addition of the 
aforementioned types of symbols.

This is done through building a new `compunit_symtab`s for each symbol that is
to be added, owned by a given objfile and whose lifetimes is bound to it. I
might be missing something here, but there doesn't seem to be an intended way
to add new symbols to a compunit_symtab after it's been finished. If there is,
then the efficiency of this method could very much be improved. It could also be
made more efficient by having a way to add whole batches of symbols at once,
which would then all get added to the same `compunit_symtab`.

For now, though, this implementation lets us add symbols that can be used to,
for instance, query registered types through `gdb.lookup_type`, and allows
reverse engineering GDB plugins (such as Pwndbg [0] or decomp2gdb [1]) to add
symbols directly through the Python API instead of having to compile an object
file for the target architecture that they later load through the add-symbol-
file command. [2]

[0] https://github.com/pwndbg/pwndbg/
[1] https://github.com/mahaloz/decomp2dbg
[2] https://github.com/mahaloz/decomp2dbg/blob/055be6b2001954d00db2d683f20e9b714af75880/decomp2dbg/clients/gdb/symbol_mapper.py#L235-L243]
---
 gdb/python/py-objfile.c      | 258 +++++++++++++++++++++++++++++++++++
 gdb/python/python-internal.h |   2 +
 2 files changed, 260 insertions(+)
  

Patch

diff --git a/gdb/python/py-objfile.c b/gdb/python/py-objfile.c
index c278925531b..00fe8de74f1 100644
--- a/gdb/python/py-objfile.c
+++ b/gdb/python/py-objfile.c
@@ -25,6 +25,7 @@ 
 #include "build-id.h"
 #include "symtab.h"
 #include "python.h"
+#include "buildsym.h"
 
 struct objfile_object
 {
@@ -527,6 +528,233 @@  objfpy_lookup_static_symbol (PyObject *self, PyObject *args, PyObject *kw)
   Py_RETURN_NONE;
 }
 
+/* Adds a new symbol to the given objfile. */
+
+static struct symbol *
+add_new_symbol
+  (struct objfile *objfile,
+   const char *name,
+   enum language language,
+   enum domain_enum domain,
+   enum address_class aclass,
+   short section_index,
+   CORE_ADDR last_addr,
+   CORE_ADDR end_addr,
+   bool global,
+   std::function<void(struct symbol*)> params)
+{
+  struct symbol *symbol = new (&objfile->objfile_obstack) struct symbol ();
+  OBJSTAT (objfile, n_syms++);
+
+  symbol->set_language (language, &objfile->objfile_obstack);
+  symbol->compute_and_set_names (gdb::string_view (name), true, objfile->per_bfd);
+
+  symbol->set_is_objfile_owned (true);
+  symbol->set_section_index (aclass);
+  symbol->set_domain (domain);
+  symbol->set_aclass_index (aclass);
+
+  params (symbol);
+
+  buildsym_compunit builder (objfile, "", "", language, last_addr);
+  add_symbol_to_list (symbol, global ? builder.get_global_symbols() : builder.get_file_symbols ());
+  builder.end_compunit_symtab (end_addr, section_index);
+
+  return symbol;
+}
+
+/* Parses a language from a string (coming from Python) into a language variant. */
+
+static enum language
+parse_language (const char *language)
+{
+  if (strcmp (language, "c") == 0)
+    return language_c;
+  else if (strcmp (language, "objc") == 0)
+    return language_objc;
+  else if (strcmp (language, "cplus") == 0)
+    return language_cplus;
+  else if (strcmp (language, "d") == 0)
+    return language_d;
+  else if (strcmp (language, "go") == 0)
+    return language_go;
+  else if (strcmp (language, "fortran") == 0)
+    return language_fortran;
+  else if (strcmp (language, "m2") == 0)
+    return language_m2;
+  else if (strcmp (language, "asm") == 0)
+    return language_asm;
+  else if (strcmp (language, "pascal") == 0)
+    return language_pascal;
+  else if (strcmp (language, "opencl") == 0)
+    return language_opencl;
+  else if (strcmp (language, "rust") == 0)
+    return language_rust;
+  else if (strcmp (language, "ada") == 0)
+    return language_ada;
+  else if (strcmp (language, "auto") == 0)
+    return language_auto;
+  else
+    return language_unknown;
+}
+
+/* Adds a type (LOC_TYPEDEF) symbol to a given objfile. */
+
+static PyObject *
+objfpy_add_type_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sO|s";
+  static const char *keywords[] =
+    {
+      "name", "type", "language",NULL
+    };
+
+  PyObject *type_object;
+  const char *name;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &type_object, &language_name))
+    return nullptr;
+
+  struct objfile *objfile = objfile_object_to_objfile (self);
+  if (objfile == nullptr)
+    return nullptr;
+
+  struct type *type = type_object_to_type (type_object);
+  if (type == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+  {
+    PyErr_SetString (PyExc_ValueError, "invalid language name");
+    return nullptr;
+  }
+
+  struct symbol* symbol = add_new_symbol
+    (objfile,
+     name,
+     language,
+     VAR_DOMAIN,
+     LOC_TYPEDEF,
+     0,
+     0,
+     0,
+     false,
+     [&](struct symbol* temp_symbol)
+     {
+       temp_symbol->set_type(type);
+     });
+
+
+  return symbol_to_symbol_object (symbol);
+}
+
+/* Adds a label (LOC_LABEL) symbol to a given objfile. */
+
+static PyObject *
+objfpy_add_label_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sk|s";
+  static const char *keywords[] =
+    {
+      "name", "address", "language",NULL
+    };
+
+  const char *name;
+  CORE_ADDR address;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &address, &language_name))
+    return nullptr;
+
+  struct objfile *objfile = objfile_object_to_objfile (self);
+  if (objfile == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+  {
+    PyErr_SetString (PyExc_ValueError, "invalid language name");
+    return nullptr;
+  }
+
+  struct symbol* symbol = add_new_symbol
+    (objfile,
+     name,
+     language,
+     LABEL_DOMAIN,
+     LOC_LABEL,
+     0,
+     0,
+     0,
+     false,
+     [&](struct symbol* temp_symbol)
+     {
+       temp_symbol->set_value_address(address);
+     });
+
+
+  return symbol_to_symbol_object (symbol);
+}
+
+/* Adds a static (LOC_STATIC) symbol to a given objfile. */
+
+static PyObject *
+objfpy_add_static_symbol (PyObject *self, PyObject *args, PyObject *kw)
+{
+  static const char *format = "sk|s";
+  static const char *keywords[] =
+    {
+      "name", "address", "language", NULL
+    };
+
+  const char *name;
+  CORE_ADDR address;
+  const char *language_name = nullptr;
+
+  if (!gdb_PyArg_ParseTupleAndKeywords (args, kw, format, keywords, &name,
+                                        &address, &language_name))
+    return nullptr;
+
+  struct objfile *objfile = objfile_object_to_objfile (self);
+  if (objfile == nullptr)
+    return nullptr;
+
+  if (language_name == nullptr)
+    language_name = "auto";
+  enum language language = parse_language (language_name);
+  if (language == language_unknown)
+  {
+    PyErr_SetString (PyExc_ValueError, "invalid language name");
+    return nullptr;
+  }
+
+  struct symbol* symbol = add_new_symbol
+    (objfile,
+     name,
+     language,
+     VAR_DOMAIN,
+     LOC_STATIC,
+     0,
+     0,
+     0,
+     false,
+     [&](struct symbol* temp_symbol)
+     {
+       temp_symbol->set_value_address(address);
+     });
+
+
+  return symbol_to_symbol_object (symbol);
+}
+
 /* Implement repr() for gdb.Objfile.  */
 
 static PyObject *
@@ -704,6 +932,18 @@  objfile_to_objfile_object (struct objfile *objfile)
   return gdbpy_ref<>::new_reference (result);
 }
 
+struct objfile *
+objfile_object_to_objfile (PyObject *self)
+{
+  if (!PyObject_TypeCheck (self, &objfile_object_type))
+    return nullptr;
+
+  auto objfile_object = (struct objfile_object*) self;
+  OBJFPY_REQUIRE_VALID (objfile_object);
+
+  return objfile_object->objfile;
+}
+
 int
 gdbpy_initialize_objfile (void)
 {
@@ -737,6 +977,24 @@  Look up a global symbol in this objfile and return it." },
     "lookup_static_symbol (name [, domain]).\n\
 Look up a static-linkage global symbol in this objfile and return it." },
 
+  { "add_type_symbol", (PyCFunction) objfpy_add_type_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_type_symbol(name: string, type: gdb.Type, [language: string])\n\
+    Registers a new symbol inside VAR_DOMAIN/LOC_TYPEDEF, with the given name\
+    referring to the given type." },
+
+  { "add_label_symbol", (PyCFunction) objfpy_add_label_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_label_symbol(name: string, address: int, [language: string])\n\
+    Registers a new symbol inside LABEL_DOMAIN/LOC_LABEL, with the given name\
+    pointing to the given address." },
+
+  { "add_static_symbol", (PyCFunction) objfpy_add_static_symbol,
+    METH_VARARGS | METH_KEYWORDS,
+    "add_static_symbol(name: string, address: int, [language: string])\n\
+    Registers a new symbol inside VAR_DOMAIN/LOC_STATIC, with the given name\
+    pointing to the given address." },
+
   { NULL }
 };
 
diff --git a/gdb/python/python-internal.h b/gdb/python/python-internal.h
index 06357cc8c0b..bb10df63077 100644
--- a/gdb/python/python-internal.h
+++ b/gdb/python/python-internal.h
@@ -481,6 +481,8 @@  struct symtab *symtab_object_to_symtab (PyObject *obj);
 struct symtab_and_line *sal_object_to_symtab_and_line (PyObject *obj);
 frame_info_ptr frame_object_to_frame_info (PyObject *frame_obj);
 struct gdbarch *arch_object_to_gdbarch (PyObject *obj);
+struct objfile *objfile_object_to_objfile (PyObject *self);
+struct floatformat *float_format_object_as_float_format (PyObject *self);
 
 /* Convert Python object OBJ to a program_space pointer.  OBJ must be a
    gdb.Progspace reference.  Return nullptr if the gdb.Progspace is not