[3/4] Add file offsets to the source cache

Message ID 20190726133422.5896-4-tromey@adacore.com
State New, archived
Headers

Commit Message

Tom Tromey July 26, 2019, 1:34 p.m. UTC
  Currently, gdb stores the number of lines and an array of file offsets
for the start of each line in struct symtab.  This patch moves this
information to the source cache.  This has two benefits.

First, it allows gdb to read a source file less frequently.
Currently, a source file may be read multiple times: once when
computing the file offsets, once when highlighting, and then pieces
may be read again while printing source lines.  With this change, the
file is read once for its source text and file offsets; and then
perhaps read again if it is evicted from the cache.

Second, if multiple symtabs cover the same source file, then this will
share the file offsets between them.  I'm not sure whether this
happens in practice.

gdb/ChangeLog
2019-07-26  Tom Tromey  <tromey@adacore.com>

	* annotate.c (annotate_source_line): Use g_source_cache.
	* source-cache.c (source_cache::get_plain_source_lines): Change
	parameters.  Populate m_offset_cache.
	(source_cache::ensure): New method.
	(source_cache::get_line_charpos): New method.
	(extract_lines): Move lower.  Change parameters.
	(source_cache::get_source_lines): Move lower.
	* source-cache.h (class source_cache): Update comment.
	<get_line_charpos>: New method.
	<get_source_lines>: Update comment.
	<clear>: Clear m_offset_cache.
	<get_plain_source_lines>: Change parameters.
	<ensure>: New method
	<m_offset_cache>: New member.
	* source.c (forget_cached_source_info_for_objfile): Update.
	(info_source_command): Use g_source_cache.
	(find_source_lines, open_source_file_with_line_charpos): Remove.
	(print_source_lines_base, search_command_helper): Use g_source_cache.
	* source.h (open_source_file_with_line_charpos): Don't declare.
	* symtab.h (struct symtab) <nlines, line_charpos>: Remove.
	* tui/tui-source.c (tui_source_window::do_scroll_vertical):
	Use g_source_cache.
---
 gdb/ChangeLog        |  25 ++++++
 gdb/annotate.c       |  11 +--
 gdb/source-cache.c   | 193 ++++++++++++++++++++++++++++++-------------
 gdb/source-cache.h   |  52 +++++++++---
 gdb/source.c         | 120 ++++-----------------------
 gdb/source.h         |   5 --
 gdb/symtab.h         |  10 ---
 gdb/tui/tui-source.c |   4 +-
 8 files changed, 228 insertions(+), 192 deletions(-)
  

Patch

diff --git a/gdb/annotate.c b/gdb/annotate.c
index 8d8a0196fb0..3011b26eb58 100644
--- a/gdb/annotate.c
+++ b/gdb/annotate.c
@@ -28,6 +28,7 @@ 
 #include "top.h"
 #include "source.h"
 #include "objfiles.h"
+#include "source-cache.h"
 
 
 /* Prototypes for local functions.  */
@@ -440,15 +441,15 @@  annotate_source_line (struct symtab *s, int line, int mid_statement,
 {
   if (annotation_level > 0)
     {
-      if (s->line_charpos == nullptr)
-	open_source_file_with_line_charpos (s);
-      if (s->fullname == nullptr)
+      const std::vector<off_t> *offsets;
+      if (!g_source_cache.get_line_charpos (s, &offsets))
 	return;
+
       /* Don't index off the end of the line_charpos array.  */
-      if (line > s->nlines)
+      if (line > offsets->size ())
 	return;
 
-      annotate_source (s->fullname, line, s->line_charpos[line - 1],
+      annotate_source (s->fullname, line, (int) (*offsets)[line - 1],
 		       mid_statement, get_objfile_arch (SYMTAB_OBJFILE (s)),
 		       pc);
     }
diff --git a/gdb/source-cache.c b/gdb/source-cache.c
index 0cc2076258c..9039f8fde2a 100644
--- a/gdb/source-cache.c
+++ b/gdb/source-cache.c
@@ -23,6 +23,8 @@ 
 #include "cli/cli-style.h"
 #include "symtab.h"
 #include "gdbsupport/selftest.h"
+#include "objfiles.h"
+#include "exec.h"
 
 #ifdef HAVE_SOURCE_HIGHLIGHT
 /* If Gnulib redirects 'open' and 'close' to its replacements
@@ -46,60 +48,50 @@  source_cache g_source_cache;
 
 /* See source-cache.h.  */
 
-bool
-source_cache::get_plain_source_lines (struct symtab *s, std::string *lines)
+std::string
+source_cache::get_plain_source_lines (struct symtab *s,
+				      const std::string &fullname)
 {
-  scoped_fd desc (open_source_file_with_line_charpos (s));
+  scoped_fd desc (open_source_file (s));
   if (desc.get () < 0)
-    return false;
+    perror_with_name (symtab_to_filename_for_display (s));
 
   struct stat st;
-
   if (fstat (desc.get (), &st) < 0)
     perror_with_name (symtab_to_filename_for_display (s));
 
-  /* We could cache this in line_charpos... */
-  lines->resize (st.st_size);
-  if (myread (desc.get (), &(*lines)[0], lines->size ()) < 0)
+  std::string lines;
+  lines.resize (st.st_size);
+  if (myread (desc.get (), &lines[0], lines.size ()) < 0)
     perror_with_name (symtab_to_filename_for_display (s));
 
-  return true;
-}
+  time_t mtime = 0;
+  if (SYMTAB_OBJFILE (s) != NULL && SYMTAB_OBJFILE (s)->obfd != NULL)
+    mtime = SYMTAB_OBJFILE (s)->mtime;
+  else if (exec_bfd)
+    mtime = exec_bfd_mtime;
 
-/* A helper function for get_plain_source_lines that extracts the
-   desired source lines from TEXT, putting them into LINES_OUT.  The
-   arguments are as for get_source_lines.  The return value is the
-   desired lines.  */
-static std::string
-extract_lines (const std::string &text, int first_line, int last_line)
-{
-  int lineno = 1;
-  std::string::size_type pos = 0;
-  std::string::size_type first_pos = std::string::npos;
+  if (mtime && mtime < st.st_mtime)
+    warning (_("Source file is more recent than executable."));
 
-  while (pos != std::string::npos && lineno <= last_line)
+  std::vector<off_t> offsets;
+  offsets.push_back (0);
+  for (size_t offset = lines.find ('\n');
+       offset != std::string::npos;
+       offset = lines.find ('\n', offset))
     {
-      std::string::size_type new_pos = text.find ('\n', pos);
-
-      if (lineno == first_line)
-	first_pos = pos;
-
-      pos = new_pos;
-      if (lineno == last_line || pos == std::string::npos)
-	{
-	  if (first_pos == std::string::npos)
-	    return {};
-	  if (pos == std::string::npos)
-	    pos = text.size ();
-	  else
-	    ++pos;
-	  return text.substr (first_pos, pos - first_pos);
-	}
-      ++lineno;
-      ++pos;
+      ++offset;
+      /* A newline at the end does not start a new line.  It would
+	 seem simpler to just strip the newline in this function, but
+	 then "list" won't print the final newline.  */
+      if (offset != lines.size ())
+	offsets.push_back (offset);
     }
 
-  return {};
+  offsets.shrink_to_fit ();
+  m_offset_cache.emplace (fullname, std::move (offsets));
+
+  return lines;
 }
 
 #ifdef HAVE_SOURCE_HIGHLIGHT
@@ -161,26 +153,31 @@  get_language_name (enum language lang)
 /* See source-cache.h.  */
 
 bool
-source_cache::get_source_lines (struct symtab *s, int first_line,
-				int last_line, std::string *lines)
+source_cache::ensure (struct symtab *s)
 {
-  if (first_line < 1 || last_line < 1 || first_line > last_line)
-    return false;
-
   std::string fullname = symtab_to_fullname (s);
 
-  for (const auto &item : m_source_map)
+  size_t size = m_source_map.size ();
+  for (int i = 0; i < size; ++i)
     {
-      if (item.fullname == fullname)
+      if (m_source_map[i].fullname == fullname)
 	{
-	  *lines = extract_lines (item.contents, first_line, last_line);
+	  /* This should always hold, because we create the file
+	     offsets when reading the file, and never free them
+	     without also clearing the contents cache.  */
+	  gdb_assert (m_offset_cache.find (fullname)
+		      != m_offset_cache.end ());
+	  /* Not strictly LRU, but at least ensure that the most
+	     recently used entry is always the last candidate for
+	     deletion.  Note that this property is relied upon by at
+	     least one caller.  */
+	  if (i != size - 1)
+	    std::swap (m_source_map[i], m_source_map[size - 1]);
 	  return true;
 	}
     }
 
-  std::string contents;
-  if (!get_plain_source_lines (s, &contents))
-    return false;
+  std::string contents = get_plain_source_lines (s, fullname);
 
 #ifdef HAVE_SOURCE_HIGHLIGHT
   if (source_styling && gdb_stdout->can_emit_style_escape ())
@@ -215,22 +212,102 @@  source_cache::get_source_lines (struct symtab *s, int first_line,
   if (m_source_map.size () > MAX_ENTRIES)
     m_source_map.erase (m_source_map.begin ());
 
-  *lines = extract_lines (m_source_map.back ().contents,
-			  first_line, last_line);
   return true;
 }
 
+/* See source-cache.h.  */
+
+bool
+source_cache::get_line_charpos (struct symtab *s,
+				const std::vector<off_t> **offsets)
+{
+  std::string fullname = symtab_to_fullname (s);
+
+  auto iter = m_offset_cache.find (fullname);
+  if (iter == m_offset_cache.end ())
+    {
+      ensure (s);
+      iter = m_offset_cache.find (fullname);
+      /* cache_source_text ensured this was entered.  */
+      gdb_assert (iter != m_offset_cache.end ());
+    }
+
+  *offsets = &iter->second;
+  return true;
+}
+
+/* A helper function that extracts the desired source lines from TEXT,
+   putting them into LINES_OUT.  The arguments are as for
+   get_source_lines.  Returns true on success, false if the line
+   numbers are invalid.  */
+
+static bool
+extract_lines (const std::string &text, int first_line, int last_line,
+	       std::string *lines_out)
+{
+  int lineno = 1;
+  std::string::size_type pos = 0;
+  std::string::size_type first_pos = std::string::npos;
+
+  while (pos != std::string::npos && lineno <= last_line)
+    {
+      std::string::size_type new_pos = text.find ('\n', pos);
+
+      if (lineno == first_line)
+	first_pos = pos;
+
+      pos = new_pos;
+      if (lineno == last_line || pos == std::string::npos)
+	{
+	  /* A newline at the end does not start a new line.  */
+	  if (first_pos == std::string::npos
+	      || first_pos == text.size ())
+	    return false;
+	  if (pos == std::string::npos)
+	    pos = text.size ();
+	  else
+	    ++pos;
+	  *lines_out = text.substr (first_pos, pos - first_pos);
+	  return true;
+	}
+      ++lineno;
+      ++pos;
+    }
+
+  return false;
+}
+
+/* See source-cache.h.  */
+
+bool
+source_cache::get_source_lines (struct symtab *s, int first_line,
+				int last_line, std::string *lines)
+{
+  if (first_line < 1 || last_line < 1 || first_line > last_line)
+    return false;
+
+  if (!ensure (s))
+    return false;
+
+  return extract_lines (m_source_map.back ().contents,
+			first_line, last_line, lines);
+}
+
 #if GDB_SELF_TEST
 namespace selftests
 {
 static void extract_lines_test ()
 {
   std::string input_text = "abc\ndef\nghi\njkl\n";
-
-  SELF_CHECK (extract_lines (input_text, 1, 1) == "abc\n");
-  SELF_CHECK (extract_lines (input_text, 2, 1) == "");
-  SELF_CHECK (extract_lines (input_text, 1, 2) == "abc\ndef\n");
-  SELF_CHECK (extract_lines ("abc", 1, 1) == "abc");
+  std::string result;
+
+  SELF_CHECK (extract_lines (input_text, 1, 1, &result)
+	      && result == "abc\n");
+  SELF_CHECK (!extract_lines (input_text, 2, 1, &result));
+  SELF_CHECK (extract_lines (input_text, 1, 2, &result)
+	      && result == "abc\ndef\n");
+  SELF_CHECK (extract_lines ("abc", 1, 1, &result)
+	      && result == "abc");
 }
 }
 #endif
diff --git a/gdb/source-cache.h b/gdb/source-cache.h
index 0c8b14e483e..b6e8690d12b 100644
--- a/gdb/source-cache.h
+++ b/gdb/source-cache.h
@@ -19,12 +19,20 @@ 
 #ifndef SOURCE_CACHE_H
 #define SOURCE_CACHE_H
 
-/* This caches highlighted source text, keyed by the source file's
-   full name.  A size-limited LRU cache is used.
+#include <unordered_map>
+#include <unordered_set>
+
+/* This caches two things related to source files.
+
+   First, it caches highlighted source text, keyed by the source
+   file's full name.  A size-limited LRU cache is used.
 
    Highlighting depends on the GNU Source Highlight library.  When not
-   available, this cache will fall back on reading plain text from the
-   appropriate file.  */
+   available or when highlighting fails for some reason, this cache
+   will instead store the un-highlighted source text.
+
+   Second, this will cache the file offsets corresponding to the start
+   of each line of a source file.  This cache is not size-limited.  */
 class source_cache
 {
 public:
@@ -33,11 +41,23 @@  public:
   {
   }
 
+  /* This returns the vector of file offsets for the symtab S,
+     computing the vector first if needed.
+
+     On failure, returns false.
+
+     On success, returns true and sets *OFFSETS.  This pointer is not
+     guaranteed to remain valid across other calls to get_source_lines
+     or get_line_charpos.  */
+  bool get_line_charpos (struct symtab *s,
+			 const std::vector<off_t> **offsets);
+
   /* Get the source text for the source file in symtab S.  FIRST_LINE
      and LAST_LINE are the first and last lines to return; line
-     numbers are 1-based.  If the file cannot be read, false is
-     returned.  Otherwise, LINES_OUT is set to the desired text.  The
-     returned text may include ANSI terminal escapes.  */
+     numbers are 1-based.  If the file cannot be read, or if the line
+     numbers are out of range, false is returned.  Otherwise,
+     LINES_OUT is set to the desired text.  The returned text may
+     include ANSI terminal escapes.  */
   bool get_source_lines (struct symtab *s, int first_line,
 			 int last_line, std::string *lines_out);
 
@@ -45,6 +65,7 @@  public:
   void clear ()
   {
     m_source_map.clear ();
+    m_offset_cache.clear ();
   }
 
 private:
@@ -59,12 +80,21 @@  private:
   };
 
   /* A helper function for get_source_lines reads a source file.
-     Returns false on error.  If no error, the contents of the file
-     are put into *LINES_OUT, and returns true.  */
-  bool get_plain_source_lines (struct symtab *s, std::string *lines_out);
+     Returns the contents of the file; or throws an exception on
+     error.  This also updates m_offset_cache.  */
+  std::string get_plain_source_lines (struct symtab *s,
+				      const std::string &fullname);
 
-  /* The contents of the cache.  */
+  /* A helper function that the data for the given symtab is entered
+     into both caches.  Returns false on error.  */
+  bool ensure (struct symtab *s);
+
+  /* The contents of the source text cache.  */
   std::vector<source_text> m_source_map;
+
+  /* The file offset cache.  The key is the full name of the source
+     file.  */
+  std::unordered_map<std::string, std::vector<off_t>> m_offset_cache;
 };
 
 /* The global source cache.  */
diff --git a/gdb/source.c b/gdb/source.c
index a83e55e5699..e0050f1fb82 100644
--- a/gdb/source.c
+++ b/gdb/source.c
@@ -357,11 +357,6 @@  forget_cached_source_info_for_objfile (struct objfile *objfile)
     {
       for (symtab *s : compunit_filetabs (cu))
 	{
-	  if (s->line_charpos != NULL)
-	    {
-	      xfree (s->line_charpos);
-	      s->line_charpos = NULL;
-	    }
 	  if (s->fullname != NULL)
 	    {
 	      xfree (s->fullname);
@@ -642,9 +637,10 @@  info_source_command (const char *ignore, int from_tty)
     printf_filtered (_("Compilation directory is %s\n"), SYMTAB_DIRNAME (s));
   if (s->fullname)
     printf_filtered (_("Located in %s\n"), s->fullname);
-  if (s->nlines)
-    printf_filtered (_("Contains %d line%s.\n"), s->nlines,
-		     s->nlines == 1 ? "" : "s");
+  const std::vector<off_t> *offsets;
+  if (g_source_cache.get_line_charpos (s, &offsets))
+    printf_filtered (_("Contains %d line%s.\n"), (int) offsets->size (),
+		     offsets->size () == 1 ? "" : "s");
 
   printf_filtered (_("Source language is %s.\n"), language_str (s->language));
   printf_filtered (_("Producer is %s.\n"),
@@ -1123,92 +1119,6 @@  symtab_to_filename_for_display (struct symtab *symtab)
   else
     internal_error (__FILE__, __LINE__, _("invalid filename_display_string"));
 }
-
-/* Create and initialize the table S->line_charpos that records
-   the positions of the lines in the source file, which is assumed
-   to be open on descriptor DESC.
-   All set S->nlines to the number of such lines.  */
-
-static void
-find_source_lines (struct symtab *s, int desc)
-{
-  struct stat st;
-  char *p, *end;
-  int nlines = 0;
-  int lines_allocated = 1000;
-  int *line_charpos;
-  long mtime = 0;
-  int size;
-
-  gdb_assert (s);
-  line_charpos = XNEWVEC (int, lines_allocated);
-  if (fstat (desc, &st) < 0)
-    perror_with_name (symtab_to_filename_for_display (s));
-
-  if (SYMTAB_OBJFILE (s) != NULL && SYMTAB_OBJFILE (s)->obfd != NULL)
-    mtime = SYMTAB_OBJFILE (s)->mtime;
-  else if (exec_bfd)
-    mtime = exec_bfd_mtime;
-
-  if (mtime && mtime < st.st_mtime)
-    warning (_("Source file is more recent than executable."));
-
-  {
-    /* st_size might be a large type, but we only support source files whose 
-       size fits in an int.  */
-    size = (int) st.st_size;
-
-    /* Use the heap, not the stack, because this may be pretty large,
-       and we may run into various kinds of limits on stack size.  */
-    gdb::def_vector<char> data (size);
-
-    /* Reassign `size' to result of read for systems where \r\n -> \n.  */
-    size = myread (desc, data.data (), size);
-    if (size < 0)
-      perror_with_name (symtab_to_filename_for_display (s));
-    end = data.data () + size;
-    p = &data[0];
-    line_charpos[0] = 0;
-    nlines = 1;
-    while (p != end)
-      {
-	if (*p++ == '\n'
-	/* A newline at the end does not start a new line.  */
-	    && p != end)
-	  {
-	    if (nlines == lines_allocated)
-	      {
-		lines_allocated *= 2;
-		line_charpos =
-		  (int *) xrealloc ((char *) line_charpos,
-				    sizeof (int) * lines_allocated);
-	      }
-	    line_charpos[nlines++] = p - data.data ();
-	  }
-      }
-  }
-
-  s->nlines = nlines;
-  s->line_charpos =
-    (int *) xrealloc ((char *) line_charpos, nlines * sizeof (int));
-
-}
-
-
-
-/* See source.h.  */
-
-scoped_fd
-open_source_file_with_line_charpos (struct symtab *s)
-{
-  scoped_fd fd (open_source_file (s));
-  if (fd.get () < 0)
-    return fd;
-
-  if (s->line_charpos == nullptr)
-    find_source_lines (s, fd.get ());
-  return fd;
-}
 
 
 
@@ -1308,8 +1218,13 @@  print_source_lines_base (struct symtab *s, int line, int stopline,
 
   std::string lines;
   if (!g_source_cache.get_source_lines (s, line, stopline - 1, &lines))
-    error (_("Line number %d out of range; %s has %d lines."),
-	   line, symtab_to_filename_for_display (s), s->nlines);
+    {
+      const std::vector<off_t> *offsets = nullptr;
+      g_source_cache.get_line_charpos (s, &offsets);
+      error (_("Line number %d out of range; %s has %d lines."),
+	     line, symtab_to_filename_for_display (s),
+	     offsets == nullptr ? 0 : (int) offsets->size ());
+    }
 
   const char *iter = lines.c_str ();
   while (nlines-- > 0 && *iter != '\0')
@@ -1524,7 +1439,7 @@  search_command_helper (const char *regex, int from_tty, bool forward)
   if (current_source_symtab == 0)
     select_source_symtab (0);
 
-  scoped_fd desc (open_source_file_with_line_charpos (current_source_symtab));
+  scoped_fd desc (open_source_file (current_source_symtab));
   if (desc.get () < 0)
     perror_with_name (symtab_to_filename_for_display (current_source_symtab));
 
@@ -1532,11 +1447,13 @@  search_command_helper (const char *regex, int from_tty, bool forward)
 	      ? last_line_listed + 1
 	      : last_line_listed - 1);
 
-  if (line < 1 || line > current_source_symtab->nlines)
+  const std::vector<off_t> *offsets;
+  if (line < 1
+      || !g_source_cache.get_line_charpos (current_source_symtab, &offsets)
+      || line > offsets->size ())
     error (_("Expression not found"));
 
-  if (lseek (desc.get (), current_source_symtab->line_charpos[line - 1], 0)
-      < 0)
+  if (lseek (desc.get (), (*offsets)[line - 1], 0) < 0)
     perror_with_name (symtab_to_filename_for_display (current_source_symtab));
 
   gdb_file_up stream = desc.to_file (FDOPEN_MODE);
@@ -1585,8 +1502,7 @@  search_command_helper (const char *regex, int from_tty, bool forward)
 	  line--;
 	  if (line < 1)
 	    break;
-	  if (fseek (stream.get (),
-		     current_source_symtab->line_charpos[line - 1], 0) < 0)
+	  if (fseek (stream.get (), (*offsets)[line - 1], 0) < 0)
 	    {
 	      const char *filename
 		= symtab_to_filename_for_display (current_source_symtab);
diff --git a/gdb/source.h b/gdb/source.h
index 54d899a1d04..84cc2fa9922 100644
--- a/gdb/source.h
+++ b/gdb/source.h
@@ -76,11 +76,6 @@  extern scoped_fd find_and_open_source (const char *filename,
    negative number for error.  */
 extern scoped_fd open_source_file (struct symtab *s);
 
-/* Open a source file given a symtab S (by calling open_source_file), then
-   ensure the line_charpos data is initialised for symtab S before
-   returning.  */
-extern scoped_fd open_source_file_with_line_charpos (struct symtab *s);
-
 extern gdb::unique_xmalloc_ptr<char> rewrite_source_path (const char *path);
 
 extern const char *symtab_to_fullname (struct symtab *s);
diff --git a/gdb/symtab.h b/gdb/symtab.h
index 9880ecc4c53..f2d59a9f90b 100644
--- a/gdb/symtab.h
+++ b/gdb/symtab.h
@@ -1321,16 +1321,6 @@  struct symtab
 
   const char *filename;
 
-  /* Total number of lines found in source file.  */
-
-  int nlines;
-
-  /* line_charpos[N] is the position of the (N-1)th line of the
-     source file.  "position" means something we can lseek() to; it
-     is not guaranteed to be useful any other way.  */
-
-  int *line_charpos;
-
   /* Language of this source file.  */
 
   enum language language;
diff --git a/gdb/tui/tui-source.c b/gdb/tui/tui-source.c
index f0bac24bfea..619d9374500 100644
--- a/gdb/tui/tui-source.c
+++ b/gdb/tui/tui-source.c
@@ -251,7 +251,9 @@  tui_source_window::do_scroll_vertical (int num_to_scroll)
       l.loa = LOA_LINE;
       l.u.line_no = content[0].line_or_addr.u.line_no
 	+ num_to_scroll;
-      if (l.u.line_no > s->nlines)
+      const std::vector<off_t> *offsets;
+      if (g_source_cache.get_line_charpos (s, &offsets)
+	  && l.u.line_no > offsets->size ())
 	/* line = s->nlines - win_info->content_size + 1; */
 	/* elz: fix for dts 23398.  */
 	l.u.line_no = content[0].line_or_addr.u.line_no;