Patchwork [0/4,v3] Exec events in gdbserver on Linux

login
register
mail settings
Submitter Don Breazeal
Date May 28, 2014, 6:02 p.m.
Message ID <538624BE.5070107@codesourcery.com>
Download mbox | patch
Permalink /patch/1185/
State New
Headers show

Comments

Don Breazeal - May 28, 2014, 6:02 p.m.
On 5/27/2014 2:41 PM, Breazeal, Don wrote:
> On 5/27/2014 11:49 AM, Breazeal, Don wrote:
>> On 5/25/2014 9:55 PM, Doug Evans wrote:

--snip snip--

>>> Hi.
>>>
>>> How do I tweak your patch so that I can see the race condition for myself?
>>> [I realize the window is small ... I'd just like to play with it.]
>>
>> Hi Doug, thanks for looking at this.  I will have to look back through
>> my notes to see if I can come up with a reasonable way of doing this.  I
>> was using the same test case that you were, running the basic test
>> manually.  I saw the race condition by inserting fprintf(stderr,.. into
>> linux_proc_pid_has_state and check_zombie_leaders, running with no debug
>> output.

Now that I have gone back to look at this, I recall that I wasn't
running the test manually, but using a script that ran it repeatedly
until it failed.  At different times the failure would occur at
different frequencies.  I attributed that to system load, which I think
was relatively high.

>>
>> From my notes:

--snip snip--

> The '#' lines are my annotations,
> the other lines were actual trace log output.
> ----------------------------------------------
> # result of if stmt condition in check_zombie_leaders
> # original leader is in zombie state
> linux_proc_pid_has_state: procfile:
> State:      Z (zombie)
> 
> # result inside 'if' stmt in check_zombie_leaders - execing
> # thread has replaced original leader since we evaluated
> # the 'if' condition
> linux_proc_pid_has_state: procfile:
> State:      R (running)
> 
> # printed inside if stmt, required zombie=1 to get here
> # we still think we have 2 lwps, but after the exec there
> # is only one.  zombie=0 came from call to linux_proc_pid_has_state
> # above.
> check_zombie_leaders: leader_pid=30981, leader_lp!=NULL=1, num_lwps=2,
> zombie=0
> ----------------------------------------------

I don't think there is a way to (reasonably) tweak my existing patch to
use something other than exit events.  Here is a patch for an earlier
implementation of remote exec events that contains the race condition.
If I hadn't run into the race condition in my testing I would have
submitted something similar to this.

I spent yesterday afternoon and part of this morning trying to reproduce
the failure, without success.  I ran gdb.threads/non-ldr-exc-1.exp
several thousand times, with/without optimization, on local/NFS drives,
inserted random sleeps, etc.

Print statements similar to those that generated the logs above are in
check_zombie_leaders and linux_proc_pid_has_state, commented out.

I hope this is useful.
--Don

Subject: Exec patch w/race condition

---
 gdb/common/linux-procfs.c    |    1 +
 gdb/common/linux-ptrace.c    |    2 +-
 gdb/gdbserver/linux-low.c    |   98
++++++++++++++++++++++++++++++++++++++---
 gdb/gdbserver/linux-low.h    |    5 ++
 gdb/gdbserver/remote-utils.c |   27 +++++++++++-
 gdb/remote.c                 |   19 ++++++++
 6 files changed, 142 insertions(+), 10 deletions(-)

Patch

diff --git a/gdb/common/linux-procfs.c b/gdb/common/linux-procfs.c
index 1443a88..7233ce4 100644
--- a/gdb/common/linux-procfs.c
+++ b/gdb/common/linux-procfs.c
@@ -95,6 +95,7 @@  linux_proc_pid_has_state (pid_t pid, const char *state)
   while (fgets (buffer, sizeof (buffer), procfile) != NULL)
     if (strncmp (buffer, "State:", 6) == 0)
       {
+//fprintf (stderr, "%s: %s\n", __func__, buffer);
 	have_state = 1;
 	break;
       }
diff --git a/gdb/common/linux-ptrace.c b/gdb/common/linux-ptrace.c
index efbd1ea..e38e266 100644
--- a/gdb/common/linux-ptrace.c
+++ b/gdb/common/linux-ptrace.c
@@ -437,7 +437,7 @@  linux_test_for_tracefork (int child_pid)
 	  /* Do not enable all the options for now since gdbserver does not
 	     properly support them.  This restriction will be lifted when
 	     gdbserver is augmented to support them.  */
-	  current_ptrace_options |= PTRACE_O_TRACECLONE;
+	  current_ptrace_options |= PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC;
 #else
 	  current_ptrace_options |= PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK
 	    | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC;
diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
index 1932ff2..b8a808f 100644
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -280,6 +280,32 @@  static int linux_event_pipe[2] = { -1, -1 };
 static void send_sigstop (struct lwp_info *lwp);
 static void wait_for_sigstop (void);

+/* Accepts an integer PID; Returns a string representing a file that
+   can be opened to get info for the child process.
+   Space for the result is malloc'd, caller must free.  */
+
+static char *
+linux_child_pid_to_exec_file (int pid)
+{
+  char *name1, *name2;
+
+  name1 = xmalloc (PATH_MAX);
+  name2 = xmalloc (PATH_MAX);
+  memset (name2, 0, PATH_MAX);
+
+  sprintf (name1, "/proc/%d/exe", pid);
+  if (readlink (name1, name2, PATH_MAX) > 0)
+    {
+      free (name1);
+      return name2;
+    }
+  else
+    {
+      free (name2);
+      return name1;
+    }
+}
+
 /* Return non-zero if HEADER is a 64-bit ELF file.  */

 static int
@@ -369,9 +395,10 @@  linux_add_process (int pid, int attached)

 /* Handle a GNU/Linux extended wait response.  If we see a clone
    event, we need to add the new LWP to our list (and not report the
-   trap to higher layers).  */
+   trap to higher layers).  This function returns non-zero if the
+   event should be ignored and we should wait again.  */

-static void
+static int
 handle_extended_wait (struct lwp_info *event_child, int wstat)
 {
   int event = wstat >> 16;
@@ -452,7 +479,27 @@  handle_extended_wait (struct lwp_info *event_child,
int wstat)
 	 threads, it will have a pending SIGSTOP; we may as well
 	 collect it now.  */
       linux_resume_one_lwp (event_child, event_child->stepping, 0, NULL);
+
+      /* Don't report the event.  */
+      return 1;
     }
+
+  if (event == PTRACE_EVENT_EXEC)
+    {
+      if (debug_threads)
+        debug_printf ("LHEW: Got exec event from LWP %ld\n",
+                      lwpid_of (event_thr));
+
+      event_child->waitstatus.kind = TARGET_WAITKIND_EXECD;
+      event_child->waitstatus.value.execd_pathname
+        = linux_child_pid_to_exec_file (lwpid_of (event_thr));
+
+      /* Report the event.  */
+      return 0;
+    }
+
+  internal_error (__FILE__, __LINE__,
+		  _("unknown ptrace event %d"), event);
 }

 /* Return the PC as read from the regcache of LWP, without any
@@ -1345,6 +1392,7 @@  check_zombie_leaders (void)
 		     "(it exited, or another thread execd).\n",
 		     leader_pid);

+//fprintf (stderr, "%s: ldr lwp %d, zombie %d\n", __func__, leader_pid,
linux_proc_pid_is_zombie (leader_pid));
 	  delete_lwp (leader_lp);
 	}
     }
@@ -1851,8 +1899,8 @@  linux_low_filter_event (ptid_t filter_ptid, int
lwpid, int wstat)
   if (WIFSTOPPED (wstat) && WSTOPSIG (wstat) == SIGTRAP
       && wstat >> 16 != 0)
     {
-      handle_extended_wait (child, wstat);
-      return NULL;
+      if (handle_extended_wait (child, wstat))
+	return NULL;
     }

   if (WIFSTOPPED (wstat) && WSTOPSIG (wstat) == SIGSTOP
@@ -2452,6 +2500,19 @@  linux_stabilize_threads (void)
     }
 }

+/* Return non-zero if WAITSTATUS reflects an extended linux
+   event.  Otherwise, return 0.  */
+
+static int
+extended_event_reported (const struct target_waitstatus *waitstatus)
+{
+
+  if (waitstatus == NULL)
+    return 0;
+
+  return waitstatus->kind == TARGET_WAITKIND_EXECD;
+}
+
 /* Wait for process, returns status.  */

 static ptid_t
@@ -2815,7 +2876,8 @@  retry:
 		       && !bp_explains_trap && !trace_event)
 		   || (gdb_breakpoint_here (event_child->stop_pc)
 		       && gdb_condition_true_at_breakpoint (event_child->stop_pc)
-		       && gdb_no_commands_at_breakpoint (event_child->stop_pc)));
+		       && gdb_no_commands_at_breakpoint (event_child->stop_pc))
+		   || extended_event_reported (&event_child->waitstatus));

   run_breakpoint_commands (event_child->stop_pc);

@@ -2837,7 +2899,16 @@  retry:
 			  paddress (event_child->stop_pc),
 			  paddress (event_child->step_range_start),
 			  paddress (event_child->step_range_end));
-	}
+          if (debug_threads
+              && extended_event_reported (&event_child->waitstatus))
+            {
+                char *str = target_waitstatus_to_string (ourstatus);
+                debug_printf ("LWP %ld has forked, cloned, vforked or
execd"
+                              " with waitstatus %s\n",
+                              lwpid_of (current_inferior), str);
+                xfree (str);
+            }
+        }

       /* We're not reporting this breakpoint to GDB, so apply the
 	 decr_pc_after_break adjustment to the inferior's regcache
@@ -2935,7 +3006,18 @@  retry:
 	unstop_all_lwps (1, event_child);
     }

-  ourstatus->kind = TARGET_WAITKIND_STOPPED;
+  /* If the reported event is a fork, vfork or exec, let GDB know.  */
+  if (extended_event_reported (&event_child->waitstatus))
+    {
+      ourstatus->kind = event_child->waitstatus.kind;
+      ourstatus->value = event_child->waitstatus.value;
+
+      /* Reset the event child's waitstatus since we handled it
+         already.  */
+      event_child->waitstatus.kind = TARGET_WAITKIND_IGNORE;
+    }
+  else
+    ourstatus->kind = TARGET_WAITKIND_STOPPED;

   if (current_inferior->last_resume_kind == resume_stop
       && WSTOPSIG (w) == SIGSTOP)
@@ -2952,7 +3034,7 @@  retry:
 	 but, it stopped for other reasons.  */
       ourstatus->value.sig = gdb_signal_from_host (WSTOPSIG (w));
     }
-  else
+  else if (ourstatus->kind == TARGET_WAITKIND_STOPPED)
     {
       ourstatus->value.sig = gdb_signal_from_host (WSTOPSIG (w));
     }
diff --git a/gdb/gdbserver/linux-low.h b/gdb/gdbserver/linux-low.h
index 498b221..2534ad1 100644
--- a/gdb/gdbserver/linux-low.h
+++ b/gdb/gdbserver/linux-low.h
@@ -269,6 +269,11 @@  struct lwp_info
   /* When stopped is set, the last wait status recorded for this lwp.  */
   int last_status;

+  /* If WAITSTATUS->KIND != TARGET_WAITKIND_SPURIOUS, the waitstatus
+     for this LWP's last event.  This may correspond to LAST_STATUS above,
+     or to a local variable in lin_lwp_wait.  */
+  struct target_waitstatus waitstatus;
+
   /* When stopped is set, this is where the lwp stopped, with
      decr_pc_after_break already accounted for.  */
   CORE_ADDR stop_pc;
diff --git a/gdb/gdbserver/remote-utils.c b/gdb/gdbserver/remote-utils.c
index 4fcafa0..6b8c10b 100644
--- a/gdb/gdbserver/remote-utils.c
+++ b/gdb/gdbserver/remote-utils.c
@@ -1111,14 +1111,39 @@  prepare_resume_reply (char *buf, ptid_t ptid,
   switch (status->kind)
     {
     case TARGET_WAITKIND_STOPPED:
+    case TARGET_WAITKIND_EXECD:
       {
 	struct thread_info *saved_inferior;
 	const char **regp;
 	struct regcache *regcache;
+	enum gdb_signal signal;

-	sprintf (buf, "T%02x", status->value.sig);
+	if (status->kind == TARGET_WAITKIND_EXECD)
+	  signal = GDB_SIGNAL_TRAP;
+	else
+	  signal = status->value.sig;
+
+	sprintf (buf, "T%02x", signal);
 	buf += strlen (buf);

+	if (status->kind == TARGET_WAITKIND_EXECD && multi_process)
+	  {
+	    const char *event = "target_exec";
+	    char hexified_pathname[PATH_MAX];
+
+	    sprintf (buf, "%s:", event);
+	    buf += strlen (buf);
+
+	    /* Encode pathname to hexified format.  */
+            bin2hex ((const gdb_byte *) status->value.execd_pathname,
+                     hexified_pathname,
strlen(status->value.execd_pathname));
+
+	    sprintf (buf, "%s;", hexified_pathname);
+	    xfree (status->value.execd_pathname);
+	    status->value.execd_pathname = NULL;
+	    buf += strlen (buf);
+	  }
+
 	saved_inferior = current_inferior;

 	current_inferior = find_thread_ptid (ptid);
diff --git a/gdb/remote.c b/gdb/remote.c
index 964bd41..55bafc2 100644
--- a/gdb/remote.c
+++ b/gdb/remote.c
@@ -5471,6 +5471,25 @@  Packet: '%s'\n"),
 		  p = unpack_varlen_hex (++p1, &c);
 		  event->core = c;
 		}
+	      else if (strncmp (p, "target_exec", p1 - p) == 0)
+		{
+                  ULONGEST pid;
+                  char pathname[PATH_MAX];
+
+                  p = unpack_varlen_hex (++p1, &pid);
+
+                  /* Save the pathname for event reporting and for
+                     the next run command. */
+                  hex2bin (p1, (gdb_byte *) pathname, (p - p1)/2);
+                  /* Add the null terminator.  */
+                  pathname[(p - p1)/2] = '\0';
+                  /* This is freed during event handling.  */
+                  event->ws.value.execd_pathname = xstrdup (pathname);
+                  event->ws.kind = TARGET_WAITKIND_EXECD;
+                  /* Save the pathname for the next run command.  */
+                  xfree (remote_exec_file);
+                  remote_exec_file = pathname;
+                }
 	      else
 		{
 		  /* Silently skip unknown optional info.  */