Patchwork Improve symbol cache performance

login
register
mail settings
Submitter Doug Evans
Date Feb. 5, 2015, 4:12 a.m.
Message ID <m3wq3x3sje.fsf@seba.sebabeach.org>
Download mbox | patch
Permalink /patch/4924/
State New
Headers show

Comments

Doug Evans - Feb. 5, 2015, 4:12 a.m.
Hi.

This patch contains some improvements to the symbol cache.
I don't have any data (yet) to show how much it can improve things
so I don't intend to check this in right away.

2015-02-04  Doug Evans  <xdje42@gmail.com>

	* symtab.c (NR_SYMBOL_CACHE_WAYS): New macro.
	(MIN_SYMBOL_CACHE_SIZE): New macro.
	(struct block_symbol_cache) <lookups>: New member.
	(struct symbol_cache_slot) <last_used_time>: New member.
	(set_symbol_cache_size_handler): Flag error if new size is too small.
	(reset_bsc_counters): New function.
	(reset_bsc_last_used_times): New function.
	(symbol_cache_lookup): Try NR_SYMBOL_CACHE_WAYS slots to find
	the symbol. Keep track of the oldest slot in case not present.
	(symbol_cache_clear_slot): Reset last_used_time.
	(symbol_cache_mark_found): Set last_used_time.
	(symbol_cache_mark_not_found): Ditto.
	(symbol_cache_flush): Call reset_bsc_counters.
	(symbol_cache_dump): Print last used time.
	(symbol_cache_stats): Print number of lookups.

Patch

--- /main/to-push/gdb/symtab.c	2015-02-04 08:21:01.912572265 -0800
+++ ./symtab.c	2015-02-04 19:47:42.330563611 -0800
@@ -114,6 +114,19 @@  static const struct program_space_data *
    computation, so that's why the number is what it is.  */
 #define DEFAULT_SYMBOL_CACHE_SIZE 1021
 
+/* The number of "ways" of the cache.
+   These aren't ways in the h/w cache sense.
+   Each lookup is compared with hashed_slot + 0, ... hashed_slot + #ways - 1.
+   There is no data to suggest how many ways is best.  This is just a first
+   try.  */
+#define NR_SYMBOL_CACHE_WAYS 2
+
+/* The minimum symbol cache size.
+   It doesn't make sense to set the size less than NR_SYMBOL_CACHE_WAYS.
+   To keep things simple we just set a lower bound.
+   A size of zero is still used to disable the cache.  */
+#define MIN_SYMBOL_CACHE_SIZE 32
+
 /* The maximum symbol cache size.
    There's no method to the decision of what value to use here, other than
    there's no point in allowing a user typo to make gdb consume all memory.  */
@@ -142,6 +155,12 @@  struct block_symbol_cache
   unsigned int misses;
   unsigned int collisions;
 
+  /* A count of the number of lookups.
+     This is used as the value of last_used_time of the found symbol.
+     This is equal to hits + misses, but it is tracked separately to simplify
+     overflow handling.  */
+  unsigned int lookups;
+
   /* SYMBOLS is a variable length array of this size.
      One can imagine that in general one cache (global/static) should be a
      fraction of the size of the other, but there's no data at the moment
@@ -152,6 +171,10 @@  struct block_symbol_cache
   {
     enum symbol_cache_slot_state state;
 
+    /* The "time" the symbol was last return from a lookup.
+       This is used to decide which entry to kick out when there's a miss.  */
+    unsigned int last_used_time;
+
     /* The objfile that was current when the symbol was looked up.
        This is only needed for global blocks, but for simplicity's sake
        we allocate the space for both.  If data shows the extra space used
@@ -1373,6 +1396,17 @@  static void
 set_symbol_cache_size_handler (char *args, int from_tty,
 			       struct cmd_list_element *c)
 {
+  if (new_symbol_cache_size != 0
+      && new_symbol_cache_size < MIN_SYMBOL_CACHE_SIZE)
+    {
+      /* Restore the previous value.
+	 This is the value the "show" command prints.  */
+      new_symbol_cache_size = symbol_cache_size;
+
+      error (_("Symbol cache size is too small, min is %u"
+	       " (or zero to disable)"),
+	     MIN_SYMBOL_CACHE_SIZE);
+    }
   if (new_symbol_cache_size > MAX_SYMBOL_CACHE_SIZE)
     {
       /* Restore the previous value.
@@ -1387,6 +1421,29 @@  set_symbol_cache_size_handler (char *arg
   set_symbol_cache_size (symbol_cache_size);
 }
 
+/* Reset BSC's counters.
+   This is done to simplify handling the lookup counter wrapping around.  */
+
+static void
+reset_bsc_counters (struct block_symbol_cache *bsc)
+{
+  bsc->hits = 0;
+  bsc->misses = 0;
+  bsc->collisions = 0;
+  bsc->lookups = 0;
+}
+
+/* Reset the "last used" times of each entry in BSC.  */
+
+static void
+reset_bsc_last_used_times (struct block_symbol_cache *bsc)
+{
+  unsigned int i;
+
+  for (i = 0; i < bsc->size; ++i)
+    bsc->symbols[i].last_used_time = 0;
+}
+
 /* Lookup symbol NAME,DOMAIN in BLOCK in the symbol cache of PSPACE.
    OBJFILE_CONTEXT is the current objfile, which may be NULL.
    The result is the symbol if found, SYMBOL_LOOKUP_FAILED if a previous lookup
@@ -1404,8 +1461,8 @@  symbol_cache_lookup (struct symbol_cache
 		     struct symbol_cache_slot **slot_ptr)
 {
   struct block_symbol_cache *bsc;
-  unsigned int hash;
-  struct symbol_cache_slot *slot;
+  unsigned int i, hash, first_slot_nr;
+  struct symbol_cache_slot *oldest_slot;
 
   if (block == GLOBAL_BLOCK)
     bsc = cache->global_symbols;
@@ -1418,28 +1475,56 @@  symbol_cache_lookup (struct symbol_cache
       return NULL;
     }
 
+  ++bsc->lookups;
+  if (bsc->lookups == 0)
+    {
+      /* To avoid confusion and/or complicated handling of the lookup counter
+	 wrapping around, just reset everything to zero here.  It happens
+	 rare enough that there's no need (yet) for anything else.  */
+      reset_bsc_counters (bsc);
+      reset_bsc_last_used_times (bsc);
+      ++bsc->lookups;
+    }
+
   hash = hash_symbol_entry (objfile_context, name, domain);
-  slot = bsc->symbols + hash % bsc->size;
+  first_slot_nr = hash % bsc->size;
+
+  /* While we're looking for a hit, keep track of the oldest slot.  */
+  oldest_slot = &bsc->symbols[first_slot_nr];
 
-  if (eq_symbol_entry (slot, objfile_context, name, domain))
+  for (i = 0; i < NR_SYMBOL_CACHE_WAYS; ++i)
     {
-      if (symbol_lookup_debug)
-	fprintf_unfiltered (gdb_stdlog,
-			    "%s block symbol cache hit%s for %s, %s\n",
-			    block == GLOBAL_BLOCK ? "Global" : "Static",
-			    slot->state == SYMBOL_SLOT_NOT_FOUND
-			    ? " (not found)" : "",
-			    name, domain_name (domain));
-      ++bsc->hits;
-      if (slot->state == SYMBOL_SLOT_NOT_FOUND)
-	return SYMBOL_LOOKUP_FAILED;
-      return slot->value.found;
+      unsigned int slot_nr = (first_slot_nr + i) % bsc->size;
+      struct symbol_cache_slot *slot = &bsc->symbols[slot_nr];
+
+      if (eq_symbol_entry (slot, objfile_context, name, domain))
+	{
+	  if (symbol_lookup_debug)
+	    fprintf_unfiltered (gdb_stdlog,
+				"%s block symbol cache hit%s for %s, %s\n",
+				block == GLOBAL_BLOCK ? "Global" : "Static",
+				slot->state == SYMBOL_SLOT_NOT_FOUND
+				? " (not found)" : "",
+				name, domain_name (domain));
+	  ++bsc->hits;
+	  slot->last_used_time = bsc->lookups;
+	  if (slot->state == SYMBOL_SLOT_NOT_FOUND)
+	    return SYMBOL_LOOKUP_FAILED;
+	  return slot->value.found;
+	}
+
+      /* Unused slots have a last_used_time of zero, so they are automagically
+	 preferred over used slots.  */
+      if (slot->last_used_time < oldest_slot->last_used_time)
+	oldest_slot = slot;
     }
 
   /* Symbol is not present in the cache.  */
 
+  ++bsc->misses;
+
   *bsc_ptr = bsc;
-  *slot_ptr = slot;
+  *slot_ptr = oldest_slot;
 
   if (symbol_lookup_debug)
     {
@@ -1448,7 +1533,6 @@  symbol_cache_lookup (struct symbol_cache
 			  block == GLOBAL_BLOCK ? "Global" : "Static",
 			  name, domain_name (domain));
     }
-  ++bsc->misses;
   return NULL;
 }
 
@@ -1460,6 +1544,7 @@  symbol_cache_clear_slot (struct symbol_c
   if (slot->state == SYMBOL_SLOT_NOT_FOUND)
     xfree (slot->value.not_found.name);
   slot->state = SYMBOL_SLOT_UNUSED;
+  slot->last_used_time = 0;
 }
 
 /* Mark SYMBOL as found in SLOT.
@@ -1481,6 +1566,7 @@  symbol_cache_mark_found (struct block_sy
       symbol_cache_clear_slot (slot);
     }
   slot->state = SYMBOL_SLOT_FOUND;
+  slot->last_used_time = bsc->lookups;
   slot->objfile_context = objfile_context;
   slot->value.found = symbol;
 }
@@ -1503,6 +1589,7 @@  symbol_cache_mark_not_found (struct bloc
       symbol_cache_clear_slot (slot);
     }
   slot->state = SYMBOL_SLOT_NOT_FOUND;
+  slot->last_used_time = bsc->lookups;
   slot->objfile_context = objfile_context;
   slot->value.not_found.name = xstrdup (name);
   slot->value.not_found.domain = domain;
@@ -1546,12 +1633,8 @@  symbol_cache_flush (struct program_space
 	symbol_cache_clear_slot (&bsc->symbols[i]);
     }
 
-  cache->global_symbols->hits = 0;
-  cache->global_symbols->misses = 0;
-  cache->global_symbols->collisions = 0;
-  cache->static_symbols->hits = 0;
-  cache->static_symbols->misses = 0;
-  cache->static_symbols->collisions = 0;
+  reset_bsc_counters (cache->global_symbols);
+  reset_bsc_counters (cache->static_symbols);
 }
 
 /* Dump CACHE.  */
@@ -1589,16 +1672,18 @@  symbol_cache_dump (const struct symbol_c
 	    case SYMBOL_SLOT_UNUSED:
 	      break;
 	    case SYMBOL_SLOT_NOT_FOUND:
-	      printf_filtered ("  [%4u] = %s, %s %s (not found)\n", i,
+	      printf_filtered ("  [%4u] = %s, %s %s (not found), time %u\n", i,
 			       host_address_to_string (slot->objfile_context),
 			       slot->value.not_found.name,
-			       domain_name (slot->value.not_found.domain));
+			       domain_name (slot->value.not_found.domain),
+			       slot->last_used_time);
 	      break;
 	    case SYMBOL_SLOT_FOUND:
-	      printf_filtered ("  [%4u] = %s, %s %s\n", i,
+	      printf_filtered ("  [%4u] = %s, %s %s, time %u\n", i,
 			       host_address_to_string (slot->objfile_context),
 			       SYMBOL_PRINT_NAME (slot->value.found),
-			       domain_name (SYMBOL_DOMAIN (slot->value.found)));
+			       domain_name (SYMBOL_DOMAIN (slot->value.found)),
+			       slot->last_used_time);
 	      break;
 	    }
 	}
@@ -1673,6 +1758,7 @@  symbol_cache_stats (struct symbol_cache
       printf_filtered ("  hits:       %u\n", bsc->hits);
       printf_filtered ("  misses:     %u\n", bsc->misses);
       printf_filtered ("  collisions: %u\n", bsc->collisions);
+      printf_filtered ("  lookups:    %u\n", bsc->lookups);
     }
 }