[v4] gdb/testsuite: add test for backtracing for threaded inferiors from a corefile

Message ID 20231204173316.4175260-2-blarsen@redhat.com
State New
Headers
Series [v4] gdb/testsuite: add test for backtracing for threaded inferiors from a corefile |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm fail Testing failed

Commit Message

Guinevere Larsen Dec. 4, 2023, 5:33 p.m. UTC
  This patch is based on an out-of-tree patch that fedora has been
carrying for a while. It tests if GDB is able to properly unwind a
threaded program in the following situations:
* regular threads
* in a signal handler
* in a signal handler executing on an alternate stack

And the final frame can either be in a syscall or in an infinite loop.

The test works by running the inferior until a crash to generate a
corefile, or until right before the crash. Then applies a backtrace to
all threads to see if any frame can't be identified, and the order of
the threads in GDB. Finally, it goes thread by thread and tries to
collect a large part of the backtrace, to confirm that everything is
being unwound correctly.

Co-Authored-By: Andrew Burgess <aburgess@redhat.com>
Reviewed-By:  Luis Machado  <luis.machado@arm.com>

---

Changes for v4:
* Luis mentioned that my strategy for starting the inferior didn't work
  with native-extended testing. Changed to use runto_main instead
* Improved comments in the exp file based on Andrew's comments
* Minor cleanups with regards to TCL usage
---
 gdb/testsuite/gdb.threads/threadcrash.c   | 443 ++++++++++++++++++++++
 gdb/testsuite/gdb.threads/threadcrash.exp | 233 ++++++++++++
 2 files changed, 676 insertions(+)
 create mode 100644 gdb/testsuite/gdb.threads/threadcrash.c
 create mode 100644 gdb/testsuite/gdb.threads/threadcrash.exp
  

Comments

Guinevere Larsen Dec. 20, 2023, 9:40 a.m. UTC | #1
Ping!
  
Guinevere Larsen Jan. 2, 2024, 10:41 a.m. UTC | #2
Ping!
On 20/12/2023 10:40, Guinevere Larsen wrote:
> Ping!
>
> -- 
> Cheers,
> Guinevere Larsen
> She/Her/Hers
>
> On 04/12/2023 18:33, Guinevere Larsen wrote:
>> This patch is based on an out-of-tree patch that fedora has been
>> carrying for a while. It tests if GDB is able to properly unwind a
>> threaded program in the following situations:
>> * regular threads
>> * in a signal handler
>> * in a signal handler executing on an alternate stack
>>
>> And the final frame can either be in a syscall or in an infinite loop.
>>
>> The test works by running the inferior until a crash to generate a
>> corefile, or until right before the crash. Then applies a backtrace to
>> all threads to see if any frame can't be identified, and the order of
>> the threads in GDB. Finally, it goes thread by thread and tries to
>> collect a large part of the backtrace, to confirm that everything is
>> being unwound correctly.
>>
>> Co-Authored-By: Andrew Burgess <aburgess@redhat.com>
>> Reviewed-By:  Luis Machado <luis.machado@arm.com>
>>
>> ---
>>
>> Changes for v4:
>> * Luis mentioned that my strategy for starting the inferior didn't work
>>    with native-extended testing. Changed to use runto_main instead
>> * Improved comments in the exp file based on Andrew's comments
>> * Minor cleanups with regards to TCL usage
>> ---
>>   gdb/testsuite/gdb.threads/threadcrash.c   | 443 ++++++++++++++++++++++
>>   gdb/testsuite/gdb.threads/threadcrash.exp | 233 ++++++++++++
>>   2 files changed, 676 insertions(+)
>>   create mode 100644 gdb/testsuite/gdb.threads/threadcrash.c
>>   create mode 100644 gdb/testsuite/gdb.threads/threadcrash.exp
>>
>> diff --git a/gdb/testsuite/gdb.threads/threadcrash.c 
>> b/gdb/testsuite/gdb.threads/threadcrash.c
>> new file mode 100644
>> index 00000000000..e476ae7b07d
>> --- /dev/null
>> +++ b/gdb/testsuite/gdb.threads/threadcrash.c
>> @@ -0,0 +1,443 @@
>> +/* This testcase is part of GDB, the GNU debugger.
>> +
>> +   Copyright 2023 Free Software Foundation, Inc.
>> +
>> +   This program is free software; you can redistribute it and/or modify
>> +   it under the terms of the GNU General Public License as published by
>> +   the Free Software Foundation; either version 3 of the License, or
>> +   (at your option) any later version.
>> +
>> +   This program is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> +   GNU General Public License for more details.
>> +
>> +   You should have received a copy of the GNU General Public License
>> +   along with this program.  If not, see 
>> <http://www.gnu.org/licenses/>. */
>> +
>> +#include <pthread.h>
>> +#include <assert.h>
>> +#include <stdlib.h>
>> +#include <signal.h>
>> +#include <unistd.h>
>> +
>> +/* The delay that the main thread gives once all the worker threads 
>> have
>> +   reached the barrier before the main thread enters the function on 
>> which
>> +   GDB will have placed a breakpoint.  */
>> +
>> +#define MAIN_THREAD_DELAY 2
>> +
>> +/* The maximum time we allow this test program to run for before an 
>> alarm
>> +   signal is sent and everything will exit.  */
>> +#define WATCHDOG_ALARM_TIME 600
>> +
>> +/* Aliases for the signals used within this script.  Each signal
>> +   corresponds to an action (from the FINAL_ACTION enum) that the 
>> signal
>> +   handler will perform.  */
>> +
>> +#define SPIN_SIGNAL SIGUSR1
>> +#define SYSCALL_SIGNAL SIGUSR2
>> +
>> +/* Describe the final action that a thread should perform. */
>> +
>> +enum final_action
>> +  {
>> +    /* Thread should spin in an infinite loop.  */
>> +    SPIN = 0,
>> +
>> +    /* Thread should block in a syscall.  */
>> +    SYSCALL,
>> +
>> +    /* This is just a marker to allow for looping over the enum.  */
>> +    LAST_ACTION
>> +  };
>> +
>> +/* Where should the thread perform this action?  */
>> +
>> +enum exec_location
>> +  {
>> +    /* Just a normal thread, on a normal stack.  */
>> +    NORMAL = 0,
>> +
>> +    /* In a signal handler, but use the normal stack.  */
>> +    SIGNAL_HANDLER,
>> +
>> +    /* In a signal handler using an alternative stack.  */
>> +    SIGNAL_ALT_STACK,
>> +
>> +    /* This is just a marker to allow for looping over the enum.  */
>> +    LAST_LOCACTION
>> +  };
>> +
>> +/* A descriptor for a single thread job.  We create a new thread for 
>> each
>> +   job_description.  */
>> +
>> +struct job_description
>> +{
>> +  /* What action should this thread perform.  */
>> +  enum final_action action;
>> +
>> +  /* Where should the thread perform the action.  */
>> +  enum exec_location location;
>> +
>> +  /* The actual thread handle, so we can join with the thread.  */
>> +  pthread_t thread;
>> +};
>> +
>> +/* A pthread barrier, used to (try) and synchronise the threads.  */
>> +pthread_barrier_t global_barrier;
>> +
>> +/* Return a list of jobs, and place the length of the list in 
>> *COUNT.  */
>> +
>> +struct job_description *
>> +get_job_list (int *count)
>> +{
>> +  /* The number of jobs.  */
>> +  int num = LAST_ACTION * LAST_LOCACTION;
>> +
>> +  /* The uninitialised array of jobs.  */
>> +  struct job_description *list
>> +    = malloc (num * sizeof (struct job_description));
>> +  assert (list != NULL);
>> +
>> +  /* Fill the array with all possible jobs.  */
>> +  for (int i = 0; i < (int) LAST_ACTION; ++i)
>> +    for (int j = 0; j < (int) LAST_LOCACTION; ++j)
>> +      {
>> +    int idx = (i * LAST_LOCACTION) + j;
>> +    list[idx].action = (enum final_action) i;
>> +    list[idx].location = (enum exec_location) j;
>> +      }
>> +
>> +  /* Return the array of jobs.  */
>> +  *count = num;
>> +  return list;
>> +}
>> +
>> +/* This function should never be called.  If it is then an assertion 
>> will
>> +   trigger.  */
>> +
>> +void
>> +assert_not_reached (void)
>> +{
>> +  assert (0);
>> +}
>> +
>> +/* The function for a SPIN action.  Just spins in a loop. The LOCATION
>> +   argument exists so GDB can identify the expected context for this
>> +   function.  */
>> +
>> +void
>> +do_spin_task (enum exec_location location)
>> +{
>> +  (void) location;
>> +
>> +  /* Let everyone know that we're about to perform our action.  */
>> +  int res = pthread_barrier_wait (&global_barrier);
>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>> +
>> +  while (1)
>> +    {
>> +      /* Nothing.  */
>> +    }
>> +}
>> +
>> +/* The function for a SYSCALL action.  Just spins in a loop.  The 
>> LOCATION
>> +   argument exists so GDB can identify the expected context for this
>> +   function.  */
>> +
>> +void
>> +do_syscall_task (enum exec_location location)
>> +{
>> +  (void) location;
>> +
>> +  /* Let everyone know that we're about to perform our action.  */
>> +  int res = pthread_barrier_wait (&global_barrier);
>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>> +
>> +  sleep (600);
>> +}
>> +
>> +/* Return the required size for a sigaltstack.  We start with a single
>> +   page, but do check against the system defined minimums. We don't run
>> +   much on the alternative stacks, so we don't need a huge one.  */
>> +
>> +size_t
>> +get_stack_size (void)
>> +{
>> +  size_t size = getpagesize ();    /* Arbitrary starting size.  */
>> +  if (size < SIGSTKSZ)
>> +    size = SIGSTKSZ;
>> +  if (size < MINSIGSTKSZ)
>> +    size = MINSIGSTKSZ;
>> +  return size;
>> +}
>> +
>> +/* A descriptor for an alternative stack.  */
>> +
>> +struct stack_descriptor
>> +{
>> +  /* The base address of the alternative stack.  This is the address 
>> that
>> +     must be freed to release the memory used by this stack.  */
>> +  void *base;
>> +
>> +  /* The size of this alternative stack.  Tracked just so we can 
>> query this
>> +     from GDB.  */
>> +  size_t size;
>> +};
>> +
>> +/* Install an alternative signal stack.  Return a descriptor for the 
>> newly
>> +   allocated alternative stack.  */
>> +
>> +struct stack_descriptor
>> +setup_alt_stack (void)
>> +{
>> +  size_t stack_size = get_stack_size ();
>> +
>> +  void *stack_area = malloc (stack_size);
>> +
>> +  stack_t stk;
>> +  stk.ss_sp = stack_area;
>> +  stk.ss_flags = 0;
>> +  stk.ss_size = stack_size;
>> +
>> +  int res = sigaltstack (&stk, NULL);
>> +  assert (res == 0);
>> +
>> +  struct stack_descriptor desc;
>> +  desc.base = stack_area;
>> +  desc.size = stack_size;
>> +
>> +  return desc;
>> +}
>> +
>> +/* Return true (non-zero) if we are currently on the alternative stack,
>> +   otherwise, return false (zero).  */
>> +
>> +int
>> +on_alt_stack_p (void)
>> +{
>> +  stack_t stk;
>> +  int res = sigaltstack (NULL, &stk);
>> +  assert (res == 0);
>> +
>> +  return (stk.ss_flags & SS_ONSTACK) != 0;
>> +}
>> +
>> +/* The signal handler function.  All signals call here, so we use SIGNO
>> +   (the signal that was delivered) to decide what action to 
>> perform.  This
>> +   function might, or might not, have been called on an alternative 
>> signal
>> +   stack.  */
>> +
>> +void
>> +signal_handler (int signo)
>> +{
>> +  enum exec_location location
>> +    = on_alt_stack_p () ? SIGNAL_ALT_STACK : SIGNAL_HANDLER;
>> +
>> +  switch (signo)
>> +    {
>> +    case SPIN_SIGNAL:
>> +      do_spin_task (location);
>> +      break;
>> +
>> +    case SYSCALL_SIGNAL:
>> +      do_syscall_task (location);
>> +      break;
>> +
>> +    default:
>> +      assert_not_reached ();
>> +    }
>> +}
>> +
>> +/* The thread worker function.  ARG is a job_description pointer which
>> +   describes what this thread is expected to do.  This function always
>> +   returns a NULL pointer.  */
>> +
>> +void *
>> +thread_function (void *arg)
>> +{
>> +  struct job_description *job = (struct job_description *) arg;
>> +  struct stack_descriptor desc = { NULL, 0 };
>> +  int sa_flags = 0;
>> +
>> +  switch (job->location)
>> +    {
>> +    case NORMAL:
>> +      /* This thread performs the worker action on the current thread,
>> +     select the correct worker function based on the requested
>> +     action.  */
>> +      switch (job->action)
>> +    {
>> +    case SPIN:
>> +      do_spin_task (NORMAL);
>> +      break;
>> +
>> +    case SYSCALL:
>> +      do_syscall_task (NORMAL);
>> +      break;
>> +
>> +    default:
>> +      assert_not_reached ();
>> +    }
>> +      break;
>> +
>> +    case SIGNAL_ALT_STACK:
>> +      /* This thread is to perform its action in a signal handler on 
>> the
>> +     alternative stack.  Install the alternative stack now, and then
>> +     fall through to the normal signal handler location code.  */
>> +      desc = setup_alt_stack ();
>> +      assert (desc.base != NULL);
>> +      assert (desc.size > 0);
>> +      sa_flags = SA_ONSTACK;
>> +
>> +      /* Fall through.  */
>> +    case SIGNAL_HANDLER:
>> +      {
>> +    /* This thread is to perform its action in a signal handler.  We
>> +       might have just installed an alternative signal stack.  */
>> +    int signo, res;
>> +
>> +    /* Select the correct signal number so that the signal handler will
>> +       perform the required action.  */
>> +    switch (job->action)
>> +      {
>> +      case SPIN:
>> +        signo = SPIN_SIGNAL;
>> +        break;
>> +
>> +      case SYSCALL:
>> +        signo = SYSCALL_SIGNAL;
>> +        break;
>> +
>> +      default:
>> +        assert_not_reached ();
>> +      }
>> +
>> +    /* Now setup the signal handler.  */
>> +    struct sigaction sa;
>> +    sa.sa_handler = signal_handler;
>> +    sigfillset (&sa.sa_mask);
>> +    sa.sa_flags = sa_flags;
>> +    res = sigaction (signo, &sa, NULL);
>> +    assert (res == 0);
>> +
>> +    /* Send the signal to this thread.  */
>> +    res = pthread_kill (job->thread, signo);
>> +    assert (res == 0);
>> +      }
>> +      break;
>> +
>> +    default:
>> +      assert_not_reached ();
>> +    };
>> +
>> +  /* Free the alt-stack if we allocated one, if not DESC.BASE will be
>> +     NULL so this call is fine.  */
>> +  free (desc.base);
>> +
>> +  /* Thread complete.  */
>> +  return NULL;
>> +}
>> +
>> +void
>> +start_job (struct job_description *job)
>> +{
>> +  int res;
>> +
>> +  res = pthread_create (&job->thread, NULL, thread_function, job);
>> +  assert (res == 0);
>> +}
>> +
>> +/* Join with the thread for JOB.  This will block until the thread 
>> for JOB
>> +   has finished.  */
>> +
>> +void
>> +finalise_job (struct job_description *job)
>> +{
>> +  int res;
>> +  void *retval;
>> +
>> +  res = pthread_join (job->thread, &retval);
>> +  assert (res == 0);
>> +  assert (retval == NULL);
>> +}
>> +
>> +/* Function that GDB can place a breakpoint on.  */
>> +
>> +void
>> +breakpt (void)
>> +{
>> +  /* Nothing.  */
>> +}
>> +
>> +/* Function that triggers a crash, if the user has setup their 
>> environment
>> +   correctly this will dump a core file, which GDB can then 
>> examine.  */
>> +
>> +void
>> +crash_function (void)
>> +{
>> +  volatile int *p = 0;
>> +  volatile int n = *p;
>> +  (void) n;
>> +}
>> +
>> +/* Entry point.  */
>> +
>> +int
>> +main ()
>> +{
>> +  int job_count, res;
>> +  struct job_description *jobs = get_job_list (&job_count);
>> +
>> +  /* This test is going to park some threads inside infinite loops.  
>> Just
>> +     in case this program is left running, install an alarm that 
>> will cause
>> +     everything to exit.  */
>> +  alarm (WATCHDOG_ALARM_TIME);
>> +
>> +  /* We want each worker thread (of which there are JOB_COUNT) plus the
>> +     main thread (hence + 1) to wait at the barrier.  */
>> +  res = pthread_barrier_init (&global_barrier, NULL, job_count + 1);
>> +  assert (res == 0);
>> +
>> +  /* Start all the jobs.  */
>> +  for (int i = 0; i < job_count; ++i)
>> +    start_job (&jobs[i]);
>> +
>> +  /* Notify all the worker threads that we're waiting for them.  */
>> +  res = pthread_barrier_wait (&global_barrier);
>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>> +
>> +  /* All we know at this point is that all the worker threads have 
>> reached
>> +     the barrier, which is just before they perform their action.  
>> But we
>> +     really want them to start their action.
>> +
>> +     There's really no way we can be 100% certain that the worker 
>> threads
>> +     have started their action, all we can do is wait for a short 
>> while and
>> +     hope that the machine we're running on is not too slow.  */
>> +  sleep (MAIN_THREAD_DELAY);
>> +
>> +  /* A function that GDB can place a breakpoint on.  By the time we get
>> +     here we are as sure as we can be that all of the worker threads 
>> have
>> +     started and are in their worker action (spinning, or syscall).  */
>> +  breakpt ();
>> +
>> +  /* If GDB is not attached then this function will cause a crash, 
>> which
>> +     can be used to dump a core file, which GDB can then analyse.  */
>> +  crash_function ();
>> +
>> +  /* Due to the crash we never expect to get here.  Plus the worker 
>> actions
>> +     never terminate.  But for completeness, here's where we join 
>> with all
>> +     the worker threads.  */
>> +  for (int i = 0; i < job_count; ++i)
>> +    finalise_job (&jobs[i]);
>> +
>> +  /* Cleanup the barrier.  */
>> +  res = pthread_barrier_destroy (&global_barrier);
>> +  assert (res == 0);
>> +
>> +  /* And clean up the jobs list.  */
>> +  free (jobs);
>> +
>> +  return 0;
>> +}
>> diff --git a/gdb/testsuite/gdb.threads/threadcrash.exp 
>> b/gdb/testsuite/gdb.threads/threadcrash.exp
>> new file mode 100644
>> index 00000000000..996e020d1e8
>> --- /dev/null
>> +++ b/gdb/testsuite/gdb.threads/threadcrash.exp
>> @@ -0,0 +1,233 @@
>> +# This testcase is part of GDB, the GNU debugger.
>> +
>> +# Copyright 2023 Free Software Foundation, Inc.
>> +
>> +# This program is free software; you can redistribute it and/or modify
>> +# it under the terms of the GNU General Public License as published by
>> +# the Free Software Foundation; either version 3 of the License, or
>> +# (at your option) any later version.
>> +#
>> +# This program is distributed in the hope that it will be useful,
>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +# GNU General Public License for more details.
>> +#
>> +# You should have received a copy of the GNU General Public License
>> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
>> +
>> +# This test case looks at GDB's ability to get correct backtraces for a
>> +# crashed inferior, recreating it from a live inferior, a corefile and
>> +# a gcore.
>> +
>> +
>> +# Check that the inferior has 7 threads, and return the number of 
>> threads (7).
>> +# We return the thread count so that, even if there is some error in 
>> the test,
>> +# the final log doesn't get flooded with failures.
>> +
>> +proc test_thread_count {} {
>> +    set thread_count 0
>> +
>> +    gdb_test_multiple "info threads" "getting thread count" -lbl {
>> +    -re "Thread" {
>> +        incr thread_count
>> +        exp_continue
>> +    }
>> +    -re "$::gdb_prompt " {
>> +        gdb_assert {$thread_count == 7}
>> +    }
>> +    }
>> +
>> +    return $thread_count
>> +}
>> +
>> +# Use 'thread apply all backtrace' to check if all expected threads
>> +# are present, and stopped in the expected locations.  Set the global
>> +# TEST_LIST to be the a list of regexps expected to match all the
>> +# threads.  We generate it now so that the list is in the order that
>> +# GDB sees the threads.
>> +
>> +proc thread_apply_all {} {
>> +    global test_list
>> +
>> +    set test_list { }
>> +
>> +    set unwind_fail false
>> +
>> +    gdb_test_multiple "thread apply all backtrace" \
>> +    "Get thread information" -lbl {
>> +        -re "#\[0-9\]+\\\?\\\?\[^\n\]*" {
>> +        set unwind_fail true
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*syscall_task .location=SIGNAL_ALT_STACK\[^\n\]*" {
>> +        lappend test_list [multi_line ".*sleep.*" \
>> +                          ".*do_syscall_task 
>> .location=SIGNAL_ALT_STACK.*" \
>> +                          ".*signal_handler.*" \
>> +                          ".*signal handler called.*" \
>> +                          ".*pthread_kill.*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*syscall_task .location=SIGNAL_HANDLER\[^\n\]*" {
>> +        lappend test_list [multi_line ".*sleep.*" \
>> +                          ".*do_syscall_task 
>> .location=SIGNAL_HANDLER.*" \
>> +                          ".*signal_handler.*" \
>> +                          ".*signal handler called.*" \
>> +                          ".*pthread_kill.*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*syscall_task .location=NORMAL\[^\n\]*" {
>> +        lappend test_list [multi_line ".*sleep.*" \
>> +                          ".*do_syscall_task .location=NORMAL.*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*spin_task .location=SIGNAL_ALT_STACK\[^\n\]*" {
>> +        lappend test_list [multi_line ".*do_spin_task 
>> .location=SIGNAL_ALT_STACK.*" \
>> +                          ".*signal_handler.*" \
>> +                          ".*signal handler called.*" \
>> +                          ".*pthread_kill.*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*spin_task .location=SIGNAL_HANDLER\[^\n\]*" {
>> +        lappend test_list [multi_line ".*do_spin_task 
>> .location=SIGNAL_HANDLER.*" \
>> +                          ".*signal_handler.*" \
>> +                          ".*signal handler called.*" \
>> +                          ".*pthread_kill.*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*spin_task .location=NORMAL\[^\n\]*" {
>> +        lappend test_list [multi_line ".*do_spin_task 
>> .location=NORMAL..*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*main\[^\n\]*" {
>> +        lappend test_list ".*main.*"
>> +        exp_continue
>> +        }
>> +        -re "$::gdb_prompt " {
>> +        pass $gdb_test_name
>> +        }
>> +    }
>> +
>> +    gdb_assert {$unwind_fail == false}
>> +}
>> +
>> +# Perform all the tests we're interested in.  They are:
>> +# * test if we have 7 threads
>> +# * Creating the list of backtraces for all threads seen
>> +# * testing if GDB recreated the full backtrace we expect for all 
>> threads
>> +
>> +proc do_full_test {} {
>> +    global test_list
>> +    set thread_count [test_thread_count]
>> +
>> +    thread_apply_all
>> +
>> +    gdb_assert {$thread_count == [llength $test_list]}
>> +
>> +    for {set i 0} {$i < $thread_count } {incr i} {
>> +    set thread_num [expr [llength $test_list] - $i]
>> +
>> +    gdb_test "thread apply $thread_num backtrace" [lindex $test_list 
>> $i]
>> +    }
>> +}
>> +
>> +# Do all preparation steps for running the corefile tests, then
>> +# call do_full_test to actually run the tests.
>> +
>> +proc_with_prefix test_live_inferior {} {
>> +    gdb_test "handle SIGUSR1 nostop print pass" \
>> +    ".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
>> +    "setup SIGUSR1"
>> +    gdb_test "handle SIGUSR2 nostop print pass" \
>> +    ".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
>> +    "setup SIGUSR2"
>> +
>> +    if {![runto_main]} {
>> +    return
>> +    }
>> +
>> +    gdb_breakpoint "breakpt"
>> +    gdb_continue_to_breakpoint "running to breakpoint" ".*"
>> +
>> +    do_full_test
>> +}
>> +
>> +# Do all preparation steps for running the corefile tests, then
>> +# call do_full_test to actually run the tests.
>> +
>> +proc_with_prefix test_corefile {} {
>> +    set corefile [core_find $::binfile]
>> +    if { $corefile == "" } {
>> +    untested "couldn't generate corefile"
>> +    return
>> +    }
>> +    set corefile [gdb_remote_download host $corefile]
>> +
>> +    gdb_test "core-file $corefile" \
>> +         "" \
>> +         "loading_corefile" \
>> +         "A program is being debugged already\\\.  Kill it\\\? \\\(y 
>> or n\\\) " \
>> +         "y"
>> +
>> +    do_full_test
>> +}
>> +
>> +# Do all preparation steps for running the gcore tests, then
>> +# call do_full_test to actually run the tests.
>> +
>> +proc_with_prefix test_gcore {} {
>> +
>> +    clean_restart "$::binfile"
>> +
>> +    gdb_test "handle SIGUSR1 nostop print pass" \
>> +    ".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
>> +    "setup SIGUSR1"
>> +    gdb_test "handle SIGUSR2 nostop print pass" \
>> +    ".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
>> +    "setup SIGUSR2"
>> +
>> +    if {![runto_main]} {
>> +    return -1
>> +    }
>> +    gdb_test "continue" ".*Segmentation fault.*" "continue to crash"
>> +
>> +    set gcore_name "${::binfile}.gcore"
>> +    set gcore_supported [gdb_gcore_cmd "$gcore_name" "saving gcore"]
>> +
>> +    if {!$gcore_supported} {
>> +    unsupported "couldn't generate gcore file"
>> +    return
>> +    }
>> +
>> +    set corefile [gdb_remote_download host $gcore_name]
>> +
>> +    gdb_test "core-file $corefile" \
>> +         "" \
>> +         "loading_corefile" \
>> +         "A program is being debugged already\\\.  Kill it\\\? \\\(y 
>> or n\\\) " \
>> +         "y"
>> +
>> +    do_full_test
>> +}
>> +
>> +standard_testfile
>> +
>> +if [prepare_for_testing "failed to prepare" $testfile $srcfile \
>> +    {debug pthreads}] {
>> +    return -1
>> +}
>> +
>> +clean_restart ${binfile}
>> +
>> +gdb_test_no_output "set backtrace limit unlimited"
>> +
>> +test_live_inferior
>> +
>> +test_corefile
>> +
>> +test_gcore
>
>
  
Guinevere Larsen Jan. 9, 2024, 11:50 a.m. UTC | #3
Ping!
On 20/12/2023 10:40, Guinevere Larsen wrote:
> Ping!
>
> -- 
> Cheers,
> Guinevere Larsen
> She/Her/Hers
>
> On 04/12/2023 18:33, Guinevere Larsen wrote:
>> This patch is based on an out-of-tree patch that fedora has been
>> carrying for a while. It tests if GDB is able to properly unwind a
>> threaded program in the following situations:
>> * regular threads
>> * in a signal handler
>> * in a signal handler executing on an alternate stack
>>
>> And the final frame can either be in a syscall or in an infinite loop.
>>
>> The test works by running the inferior until a crash to generate a
>> corefile, or until right before the crash. Then applies a backtrace to
>> all threads to see if any frame can't be identified, and the order of
>> the threads in GDB. Finally, it goes thread by thread and tries to
>> collect a large part of the backtrace, to confirm that everything is
>> being unwound correctly.
>>
>> Co-Authored-By: Andrew Burgess <aburgess@redhat.com>
>> Reviewed-By:  Luis Machado <luis.machado@arm.com>
>>
>> ---
>>
>> Changes for v4:
>> * Luis mentioned that my strategy for starting the inferior didn't work
>>    with native-extended testing. Changed to use runto_main instead
>> * Improved comments in the exp file based on Andrew's comments
>> * Minor cleanups with regards to TCL usage
>> ---
>>   gdb/testsuite/gdb.threads/threadcrash.c   | 443 ++++++++++++++++++++++
>>   gdb/testsuite/gdb.threads/threadcrash.exp | 233 ++++++++++++
>>   2 files changed, 676 insertions(+)
>>   create mode 100644 gdb/testsuite/gdb.threads/threadcrash.c
>>   create mode 100644 gdb/testsuite/gdb.threads/threadcrash.exp
>>
>> diff --git a/gdb/testsuite/gdb.threads/threadcrash.c 
>> b/gdb/testsuite/gdb.threads/threadcrash.c
>> new file mode 100644
>> index 00000000000..e476ae7b07d
>> --- /dev/null
>> +++ b/gdb/testsuite/gdb.threads/threadcrash.c
>> @@ -0,0 +1,443 @@
>> +/* This testcase is part of GDB, the GNU debugger.
>> +
>> +   Copyright 2023 Free Software Foundation, Inc.
>> +
>> +   This program is free software; you can redistribute it and/or modify
>> +   it under the terms of the GNU General Public License as published by
>> +   the Free Software Foundation; either version 3 of the License, or
>> +   (at your option) any later version.
>> +
>> +   This program is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> +   GNU General Public License for more details.
>> +
>> +   You should have received a copy of the GNU General Public License
>> +   along with this program.  If not, see 
>> <http://www.gnu.org/licenses/>. */
>> +
>> +#include <pthread.h>
>> +#include <assert.h>
>> +#include <stdlib.h>
>> +#include <signal.h>
>> +#include <unistd.h>
>> +
>> +/* The delay that the main thread gives once all the worker threads 
>> have
>> +   reached the barrier before the main thread enters the function on 
>> which
>> +   GDB will have placed a breakpoint.  */
>> +
>> +#define MAIN_THREAD_DELAY 2
>> +
>> +/* The maximum time we allow this test program to run for before an 
>> alarm
>> +   signal is sent and everything will exit.  */
>> +#define WATCHDOG_ALARM_TIME 600
>> +
>> +/* Aliases for the signals used within this script.  Each signal
>> +   corresponds to an action (from the FINAL_ACTION enum) that the 
>> signal
>> +   handler will perform.  */
>> +
>> +#define SPIN_SIGNAL SIGUSR1
>> +#define SYSCALL_SIGNAL SIGUSR2
>> +
>> +/* Describe the final action that a thread should perform. */
>> +
>> +enum final_action
>> +  {
>> +    /* Thread should spin in an infinite loop.  */
>> +    SPIN = 0,
>> +
>> +    /* Thread should block in a syscall.  */
>> +    SYSCALL,
>> +
>> +    /* This is just a marker to allow for looping over the enum.  */
>> +    LAST_ACTION
>> +  };
>> +
>> +/* Where should the thread perform this action?  */
>> +
>> +enum exec_location
>> +  {
>> +    /* Just a normal thread, on a normal stack.  */
>> +    NORMAL = 0,
>> +
>> +    /* In a signal handler, but use the normal stack.  */
>> +    SIGNAL_HANDLER,
>> +
>> +    /* In a signal handler using an alternative stack.  */
>> +    SIGNAL_ALT_STACK,
>> +
>> +    /* This is just a marker to allow for looping over the enum.  */
>> +    LAST_LOCACTION
>> +  };
>> +
>> +/* A descriptor for a single thread job.  We create a new thread for 
>> each
>> +   job_description.  */
>> +
>> +struct job_description
>> +{
>> +  /* What action should this thread perform.  */
>> +  enum final_action action;
>> +
>> +  /* Where should the thread perform the action.  */
>> +  enum exec_location location;
>> +
>> +  /* The actual thread handle, so we can join with the thread.  */
>> +  pthread_t thread;
>> +};
>> +
>> +/* A pthread barrier, used to (try) and synchronise the threads.  */
>> +pthread_barrier_t global_barrier;
>> +
>> +/* Return a list of jobs, and place the length of the list in 
>> *COUNT.  */
>> +
>> +struct job_description *
>> +get_job_list (int *count)
>> +{
>> +  /* The number of jobs.  */
>> +  int num = LAST_ACTION * LAST_LOCACTION;
>> +
>> +  /* The uninitialised array of jobs.  */
>> +  struct job_description *list
>> +    = malloc (num * sizeof (struct job_description));
>> +  assert (list != NULL);
>> +
>> +  /* Fill the array with all possible jobs.  */
>> +  for (int i = 0; i < (int) LAST_ACTION; ++i)
>> +    for (int j = 0; j < (int) LAST_LOCACTION; ++j)
>> +      {
>> +    int idx = (i * LAST_LOCACTION) + j;
>> +    list[idx].action = (enum final_action) i;
>> +    list[idx].location = (enum exec_location) j;
>> +      }
>> +
>> +  /* Return the array of jobs.  */
>> +  *count = num;
>> +  return list;
>> +}
>> +
>> +/* This function should never be called.  If it is then an assertion 
>> will
>> +   trigger.  */
>> +
>> +void
>> +assert_not_reached (void)
>> +{
>> +  assert (0);
>> +}
>> +
>> +/* The function for a SPIN action.  Just spins in a loop. The LOCATION
>> +   argument exists so GDB can identify the expected context for this
>> +   function.  */
>> +
>> +void
>> +do_spin_task (enum exec_location location)
>> +{
>> +  (void) location;
>> +
>> +  /* Let everyone know that we're about to perform our action.  */
>> +  int res = pthread_barrier_wait (&global_barrier);
>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>> +
>> +  while (1)
>> +    {
>> +      /* Nothing.  */
>> +    }
>> +}
>> +
>> +/* The function for a SYSCALL action.  Just spins in a loop.  The 
>> LOCATION
>> +   argument exists so GDB can identify the expected context for this
>> +   function.  */
>> +
>> +void
>> +do_syscall_task (enum exec_location location)
>> +{
>> +  (void) location;
>> +
>> +  /* Let everyone know that we're about to perform our action.  */
>> +  int res = pthread_barrier_wait (&global_barrier);
>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>> +
>> +  sleep (600);
>> +}
>> +
>> +/* Return the required size for a sigaltstack.  We start with a single
>> +   page, but do check against the system defined minimums. We don't run
>> +   much on the alternative stacks, so we don't need a huge one.  */
>> +
>> +size_t
>> +get_stack_size (void)
>> +{
>> +  size_t size = getpagesize ();    /* Arbitrary starting size.  */
>> +  if (size < SIGSTKSZ)
>> +    size = SIGSTKSZ;
>> +  if (size < MINSIGSTKSZ)
>> +    size = MINSIGSTKSZ;
>> +  return size;
>> +}
>> +
>> +/* A descriptor for an alternative stack.  */
>> +
>> +struct stack_descriptor
>> +{
>> +  /* The base address of the alternative stack.  This is the address 
>> that
>> +     must be freed to release the memory used by this stack.  */
>> +  void *base;
>> +
>> +  /* The size of this alternative stack.  Tracked just so we can 
>> query this
>> +     from GDB.  */
>> +  size_t size;
>> +};
>> +
>> +/* Install an alternative signal stack.  Return a descriptor for the 
>> newly
>> +   allocated alternative stack.  */
>> +
>> +struct stack_descriptor
>> +setup_alt_stack (void)
>> +{
>> +  size_t stack_size = get_stack_size ();
>> +
>> +  void *stack_area = malloc (stack_size);
>> +
>> +  stack_t stk;
>> +  stk.ss_sp = stack_area;
>> +  stk.ss_flags = 0;
>> +  stk.ss_size = stack_size;
>> +
>> +  int res = sigaltstack (&stk, NULL);
>> +  assert (res == 0);
>> +
>> +  struct stack_descriptor desc;
>> +  desc.base = stack_area;
>> +  desc.size = stack_size;
>> +
>> +  return desc;
>> +}
>> +
>> +/* Return true (non-zero) if we are currently on the alternative stack,
>> +   otherwise, return false (zero).  */
>> +
>> +int
>> +on_alt_stack_p (void)
>> +{
>> +  stack_t stk;
>> +  int res = sigaltstack (NULL, &stk);
>> +  assert (res == 0);
>> +
>> +  return (stk.ss_flags & SS_ONSTACK) != 0;
>> +}
>> +
>> +/* The signal handler function.  All signals call here, so we use SIGNO
>> +   (the signal that was delivered) to decide what action to 
>> perform.  This
>> +   function might, or might not, have been called on an alternative 
>> signal
>> +   stack.  */
>> +
>> +void
>> +signal_handler (int signo)
>> +{
>> +  enum exec_location location
>> +    = on_alt_stack_p () ? SIGNAL_ALT_STACK : SIGNAL_HANDLER;
>> +
>> +  switch (signo)
>> +    {
>> +    case SPIN_SIGNAL:
>> +      do_spin_task (location);
>> +      break;
>> +
>> +    case SYSCALL_SIGNAL:
>> +      do_syscall_task (location);
>> +      break;
>> +
>> +    default:
>> +      assert_not_reached ();
>> +    }
>> +}
>> +
>> +/* The thread worker function.  ARG is a job_description pointer which
>> +   describes what this thread is expected to do.  This function always
>> +   returns a NULL pointer.  */
>> +
>> +void *
>> +thread_function (void *arg)
>> +{
>> +  struct job_description *job = (struct job_description *) arg;
>> +  struct stack_descriptor desc = { NULL, 0 };
>> +  int sa_flags = 0;
>> +
>> +  switch (job->location)
>> +    {
>> +    case NORMAL:
>> +      /* This thread performs the worker action on the current thread,
>> +     select the correct worker function based on the requested
>> +     action.  */
>> +      switch (job->action)
>> +    {
>> +    case SPIN:
>> +      do_spin_task (NORMAL);
>> +      break;
>> +
>> +    case SYSCALL:
>> +      do_syscall_task (NORMAL);
>> +      break;
>> +
>> +    default:
>> +      assert_not_reached ();
>> +    }
>> +      break;
>> +
>> +    case SIGNAL_ALT_STACK:
>> +      /* This thread is to perform its action in a signal handler on 
>> the
>> +     alternative stack.  Install the alternative stack now, and then
>> +     fall through to the normal signal handler location code.  */
>> +      desc = setup_alt_stack ();
>> +      assert (desc.base != NULL);
>> +      assert (desc.size > 0);
>> +      sa_flags = SA_ONSTACK;
>> +
>> +      /* Fall through.  */
>> +    case SIGNAL_HANDLER:
>> +      {
>> +    /* This thread is to perform its action in a signal handler.  We
>> +       might have just installed an alternative signal stack.  */
>> +    int signo, res;
>> +
>> +    /* Select the correct signal number so that the signal handler will
>> +       perform the required action.  */
>> +    switch (job->action)
>> +      {
>> +      case SPIN:
>> +        signo = SPIN_SIGNAL;
>> +        break;
>> +
>> +      case SYSCALL:
>> +        signo = SYSCALL_SIGNAL;
>> +        break;
>> +
>> +      default:
>> +        assert_not_reached ();
>> +      }
>> +
>> +    /* Now setup the signal handler.  */
>> +    struct sigaction sa;
>> +    sa.sa_handler = signal_handler;
>> +    sigfillset (&sa.sa_mask);
>> +    sa.sa_flags = sa_flags;
>> +    res = sigaction (signo, &sa, NULL);
>> +    assert (res == 0);
>> +
>> +    /* Send the signal to this thread.  */
>> +    res = pthread_kill (job->thread, signo);
>> +    assert (res == 0);
>> +      }
>> +      break;
>> +
>> +    default:
>> +      assert_not_reached ();
>> +    };
>> +
>> +  /* Free the alt-stack if we allocated one, if not DESC.BASE will be
>> +     NULL so this call is fine.  */
>> +  free (desc.base);
>> +
>> +  /* Thread complete.  */
>> +  return NULL;
>> +}
>> +
>> +void
>> +start_job (struct job_description *job)
>> +{
>> +  int res;
>> +
>> +  res = pthread_create (&job->thread, NULL, thread_function, job);
>> +  assert (res == 0);
>> +}
>> +
>> +/* Join with the thread for JOB.  This will block until the thread 
>> for JOB
>> +   has finished.  */
>> +
>> +void
>> +finalise_job (struct job_description *job)
>> +{
>> +  int res;
>> +  void *retval;
>> +
>> +  res = pthread_join (job->thread, &retval);
>> +  assert (res == 0);
>> +  assert (retval == NULL);
>> +}
>> +
>> +/* Function that GDB can place a breakpoint on.  */
>> +
>> +void
>> +breakpt (void)
>> +{
>> +  /* Nothing.  */
>> +}
>> +
>> +/* Function that triggers a crash, if the user has setup their 
>> environment
>> +   correctly this will dump a core file, which GDB can then 
>> examine.  */
>> +
>> +void
>> +crash_function (void)
>> +{
>> +  volatile int *p = 0;
>> +  volatile int n = *p;
>> +  (void) n;
>> +}
>> +
>> +/* Entry point.  */
>> +
>> +int
>> +main ()
>> +{
>> +  int job_count, res;
>> +  struct job_description *jobs = get_job_list (&job_count);
>> +
>> +  /* This test is going to park some threads inside infinite loops.  
>> Just
>> +     in case this program is left running, install an alarm that 
>> will cause
>> +     everything to exit.  */
>> +  alarm (WATCHDOG_ALARM_TIME);
>> +
>> +  /* We want each worker thread (of which there are JOB_COUNT) plus the
>> +     main thread (hence + 1) to wait at the barrier.  */
>> +  res = pthread_barrier_init (&global_barrier, NULL, job_count + 1);
>> +  assert (res == 0);
>> +
>> +  /* Start all the jobs.  */
>> +  for (int i = 0; i < job_count; ++i)
>> +    start_job (&jobs[i]);
>> +
>> +  /* Notify all the worker threads that we're waiting for them.  */
>> +  res = pthread_barrier_wait (&global_barrier);
>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>> +
>> +  /* All we know at this point is that all the worker threads have 
>> reached
>> +     the barrier, which is just before they perform their action.  
>> But we
>> +     really want them to start their action.
>> +
>> +     There's really no way we can be 100% certain that the worker 
>> threads
>> +     have started their action, all we can do is wait for a short 
>> while and
>> +     hope that the machine we're running on is not too slow.  */
>> +  sleep (MAIN_THREAD_DELAY);
>> +
>> +  /* A function that GDB can place a breakpoint on.  By the time we get
>> +     here we are as sure as we can be that all of the worker threads 
>> have
>> +     started and are in their worker action (spinning, or syscall).  */
>> +  breakpt ();
>> +
>> +  /* If GDB is not attached then this function will cause a crash, 
>> which
>> +     can be used to dump a core file, which GDB can then analyse.  */
>> +  crash_function ();
>> +
>> +  /* Due to the crash we never expect to get here.  Plus the worker 
>> actions
>> +     never terminate.  But for completeness, here's where we join 
>> with all
>> +     the worker threads.  */
>> +  for (int i = 0; i < job_count; ++i)
>> +    finalise_job (&jobs[i]);
>> +
>> +  /* Cleanup the barrier.  */
>> +  res = pthread_barrier_destroy (&global_barrier);
>> +  assert (res == 0);
>> +
>> +  /* And clean up the jobs list.  */
>> +  free (jobs);
>> +
>> +  return 0;
>> +}
>> diff --git a/gdb/testsuite/gdb.threads/threadcrash.exp 
>> b/gdb/testsuite/gdb.threads/threadcrash.exp
>> new file mode 100644
>> index 00000000000..996e020d1e8
>> --- /dev/null
>> +++ b/gdb/testsuite/gdb.threads/threadcrash.exp
>> @@ -0,0 +1,233 @@
>> +# This testcase is part of GDB, the GNU debugger.
>> +
>> +# Copyright 2023 Free Software Foundation, Inc.
>> +
>> +# This program is free software; you can redistribute it and/or modify
>> +# it under the terms of the GNU General Public License as published by
>> +# the Free Software Foundation; either version 3 of the License, or
>> +# (at your option) any later version.
>> +#
>> +# This program is distributed in the hope that it will be useful,
>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +# GNU General Public License for more details.
>> +#
>> +# You should have received a copy of the GNU General Public License
>> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
>> +
>> +# This test case looks at GDB's ability to get correct backtraces for a
>> +# crashed inferior, recreating it from a live inferior, a corefile and
>> +# a gcore.
>> +
>> +
>> +# Check that the inferior has 7 threads, and return the number of 
>> threads (7).
>> +# We return the thread count so that, even if there is some error in 
>> the test,
>> +# the final log doesn't get flooded with failures.
>> +
>> +proc test_thread_count {} {
>> +    set thread_count 0
>> +
>> +    gdb_test_multiple "info threads" "getting thread count" -lbl {
>> +    -re "Thread" {
>> +        incr thread_count
>> +        exp_continue
>> +    }
>> +    -re "$::gdb_prompt " {
>> +        gdb_assert {$thread_count == 7}
>> +    }
>> +    }
>> +
>> +    return $thread_count
>> +}
>> +
>> +# Use 'thread apply all backtrace' to check if all expected threads
>> +# are present, and stopped in the expected locations.  Set the global
>> +# TEST_LIST to be the a list of regexps expected to match all the
>> +# threads.  We generate it now so that the list is in the order that
>> +# GDB sees the threads.
>> +
>> +proc thread_apply_all {} {
>> +    global test_list
>> +
>> +    set test_list { }
>> +
>> +    set unwind_fail false
>> +
>> +    gdb_test_multiple "thread apply all backtrace" \
>> +    "Get thread information" -lbl {
>> +        -re "#\[0-9\]+\\\?\\\?\[^\n\]*" {
>> +        set unwind_fail true
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*syscall_task .location=SIGNAL_ALT_STACK\[^\n\]*" {
>> +        lappend test_list [multi_line ".*sleep.*" \
>> +                          ".*do_syscall_task 
>> .location=SIGNAL_ALT_STACK.*" \
>> +                          ".*signal_handler.*" \
>> +                          ".*signal handler called.*" \
>> +                          ".*pthread_kill.*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*syscall_task .location=SIGNAL_HANDLER\[^\n\]*" {
>> +        lappend test_list [multi_line ".*sleep.*" \
>> +                          ".*do_syscall_task 
>> .location=SIGNAL_HANDLER.*" \
>> +                          ".*signal_handler.*" \
>> +                          ".*signal handler called.*" \
>> +                          ".*pthread_kill.*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*syscall_task .location=NORMAL\[^\n\]*" {
>> +        lappend test_list [multi_line ".*sleep.*" \
>> +                          ".*do_syscall_task .location=NORMAL.*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*spin_task .location=SIGNAL_ALT_STACK\[^\n\]*" {
>> +        lappend test_list [multi_line ".*do_spin_task 
>> .location=SIGNAL_ALT_STACK.*" \
>> +                          ".*signal_handler.*" \
>> +                          ".*signal handler called.*" \
>> +                          ".*pthread_kill.*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*spin_task .location=SIGNAL_HANDLER\[^\n\]*" {
>> +        lappend test_list [multi_line ".*do_spin_task 
>> .location=SIGNAL_HANDLER.*" \
>> +                          ".*signal_handler.*" \
>> +                          ".*signal handler called.*" \
>> +                          ".*pthread_kill.*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*spin_task .location=NORMAL\[^\n\]*" {
>> +        lappend test_list [multi_line ".*do_spin_task 
>> .location=NORMAL..*" \
>> +                          ".*thread_function.*"]
>> +        exp_continue
>> +        }
>> +        -re "\[^\n\]*main\[^\n\]*" {
>> +        lappend test_list ".*main.*"
>> +        exp_continue
>> +        }
>> +        -re "$::gdb_prompt " {
>> +        pass $gdb_test_name
>> +        }
>> +    }
>> +
>> +    gdb_assert {$unwind_fail == false}
>> +}
>> +
>> +# Perform all the tests we're interested in.  They are:
>> +# * test if we have 7 threads
>> +# * Creating the list of backtraces for all threads seen
>> +# * testing if GDB recreated the full backtrace we expect for all 
>> threads
>> +
>> +proc do_full_test {} {
>> +    global test_list
>> +    set thread_count [test_thread_count]
>> +
>> +    thread_apply_all
>> +
>> +    gdb_assert {$thread_count == [llength $test_list]}
>> +
>> +    for {set i 0} {$i < $thread_count } {incr i} {
>> +    set thread_num [expr [llength $test_list] - $i]
>> +
>> +    gdb_test "thread apply $thread_num backtrace" [lindex $test_list 
>> $i]
>> +    }
>> +}
>> +
>> +# Do all preparation steps for running the corefile tests, then
>> +# call do_full_test to actually run the tests.
>> +
>> +proc_with_prefix test_live_inferior {} {
>> +    gdb_test "handle SIGUSR1 nostop print pass" \
>> +    ".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
>> +    "setup SIGUSR1"
>> +    gdb_test "handle SIGUSR2 nostop print pass" \
>> +    ".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
>> +    "setup SIGUSR2"
>> +
>> +    if {![runto_main]} {
>> +    return
>> +    }
>> +
>> +    gdb_breakpoint "breakpt"
>> +    gdb_continue_to_breakpoint "running to breakpoint" ".*"
>> +
>> +    do_full_test
>> +}
>> +
>> +# Do all preparation steps for running the corefile tests, then
>> +# call do_full_test to actually run the tests.
>> +
>> +proc_with_prefix test_corefile {} {
>> +    set corefile [core_find $::binfile]
>> +    if { $corefile == "" } {
>> +    untested "couldn't generate corefile"
>> +    return
>> +    }
>> +    set corefile [gdb_remote_download host $corefile]
>> +
>> +    gdb_test "core-file $corefile" \
>> +         "" \
>> +         "loading_corefile" \
>> +         "A program is being debugged already\\\.  Kill it\\\? \\\(y 
>> or n\\\) " \
>> +         "y"
>> +
>> +    do_full_test
>> +}
>> +
>> +# Do all preparation steps for running the gcore tests, then
>> +# call do_full_test to actually run the tests.
>> +
>> +proc_with_prefix test_gcore {} {
>> +
>> +    clean_restart "$::binfile"
>> +
>> +    gdb_test "handle SIGUSR1 nostop print pass" \
>> +    ".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
>> +    "setup SIGUSR1"
>> +    gdb_test "handle SIGUSR2 nostop print pass" \
>> +    ".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
>> +    "setup SIGUSR2"
>> +
>> +    if {![runto_main]} {
>> +    return -1
>> +    }
>> +    gdb_test "continue" ".*Segmentation fault.*" "continue to crash"
>> +
>> +    set gcore_name "${::binfile}.gcore"
>> +    set gcore_supported [gdb_gcore_cmd "$gcore_name" "saving gcore"]
>> +
>> +    if {!$gcore_supported} {
>> +    unsupported "couldn't generate gcore file"
>> +    return
>> +    }
>> +
>> +    set corefile [gdb_remote_download host $gcore_name]
>> +
>> +    gdb_test "core-file $corefile" \
>> +         "" \
>> +         "loading_corefile" \
>> +         "A program is being debugged already\\\.  Kill it\\\? \\\(y 
>> or n\\\) " \
>> +         "y"
>> +
>> +    do_full_test
>> +}
>> +
>> +standard_testfile
>> +
>> +if [prepare_for_testing "failed to prepare" $testfile $srcfile \
>> +    {debug pthreads}] {
>> +    return -1
>> +}
>> +
>> +clean_restart ${binfile}
>> +
>> +gdb_test_no_output "set backtrace limit unlimited"
>> +
>> +test_live_inferior
>> +
>> +test_corefile
>> +
>> +test_gcore
>
>
  
Guinevere Larsen Jan. 17, 2024, 9:04 a.m. UTC | #4
Ping!
On 09/01/2024 12:50, Guinevere Larsen wrote:
> Ping!
> On 20/12/2023 10:40, Guinevere Larsen wrote:
>> Ping!
>>
>> -- 
>> Cheers,
>> Guinevere Larsen
>> She/Her/Hers
>>
>> On 04/12/2023 18:33, Guinevere Larsen wrote:
>>> This patch is based on an out-of-tree patch that fedora has been
>>> carrying for a while. It tests if GDB is able to properly unwind a
>>> threaded program in the following situations:
>>> * regular threads
>>> * in a signal handler
>>> * in a signal handler executing on an alternate stack
>>>
>>> And the final frame can either be in a syscall or in an infinite loop.
>>>
>>> The test works by running the inferior until a crash to generate a
>>> corefile, or until right before the crash. Then applies a backtrace to
>>> all threads to see if any frame can't be identified, and the order of
>>> the threads in GDB. Finally, it goes thread by thread and tries to
>>> collect a large part of the backtrace, to confirm that everything is
>>> being unwound correctly.
>>>
>>> Co-Authored-By: Andrew Burgess <aburgess@redhat.com>
>>> Reviewed-By:  Luis Machado <luis.machado@arm.com>
>>>
>>> ---
>>>
>>> Changes for v4:
>>> * Luis mentioned that my strategy for starting the inferior didn't work
>>>    with native-extended testing. Changed to use runto_main instead
>>> * Improved comments in the exp file based on Andrew's comments
>>> * Minor cleanups with regards to TCL usage
>>> ---
>>>   gdb/testsuite/gdb.threads/threadcrash.c   | 443 
>>> ++++++++++++++++++++++
>>>   gdb/testsuite/gdb.threads/threadcrash.exp | 233 ++++++++++++
>>>   2 files changed, 676 insertions(+)
>>>   create mode 100644 gdb/testsuite/gdb.threads/threadcrash.c
>>>   create mode 100644 gdb/testsuite/gdb.threads/threadcrash.exp
>>>
>>> diff --git a/gdb/testsuite/gdb.threads/threadcrash.c 
>>> b/gdb/testsuite/gdb.threads/threadcrash.c
>>> new file mode 100644
>>> index 00000000000..e476ae7b07d
>>> --- /dev/null
>>> +++ b/gdb/testsuite/gdb.threads/threadcrash.c
>>> @@ -0,0 +1,443 @@
>>> +/* This testcase is part of GDB, the GNU debugger.
>>> +
>>> +   Copyright 2023 Free Software Foundation, Inc.
>>> +
>>> +   This program is free software; you can redistribute it and/or 
>>> modify
>>> +   it under the terms of the GNU General Public License as 
>>> published by
>>> +   the Free Software Foundation; either version 3 of the License, or
>>> +   (at your option) any later version.
>>> +
>>> +   This program is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>>> +   GNU General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU General Public License
>>> +   along with this program.  If not, see 
>>> <http://www.gnu.org/licenses/>. */
>>> +
>>> +#include <pthread.h>
>>> +#include <assert.h>
>>> +#include <stdlib.h>
>>> +#include <signal.h>
>>> +#include <unistd.h>
>>> +
>>> +/* The delay that the main thread gives once all the worker threads 
>>> have
>>> +   reached the barrier before the main thread enters the function 
>>> on which
>>> +   GDB will have placed a breakpoint.  */
>>> +
>>> +#define MAIN_THREAD_DELAY 2
>>> +
>>> +/* The maximum time we allow this test program to run for before an 
>>> alarm
>>> +   signal is sent and everything will exit.  */
>>> +#define WATCHDOG_ALARM_TIME 600
>>> +
>>> +/* Aliases for the signals used within this script.  Each signal
>>> +   corresponds to an action (from the FINAL_ACTION enum) that the 
>>> signal
>>> +   handler will perform.  */
>>> +
>>> +#define SPIN_SIGNAL SIGUSR1
>>> +#define SYSCALL_SIGNAL SIGUSR2
>>> +
>>> +/* Describe the final action that a thread should perform. */
>>> +
>>> +enum final_action
>>> +  {
>>> +    /* Thread should spin in an infinite loop.  */
>>> +    SPIN = 0,
>>> +
>>> +    /* Thread should block in a syscall.  */
>>> +    SYSCALL,
>>> +
>>> +    /* This is just a marker to allow for looping over the enum.  */
>>> +    LAST_ACTION
>>> +  };
>>> +
>>> +/* Where should the thread perform this action?  */
>>> +
>>> +enum exec_location
>>> +  {
>>> +    /* Just a normal thread, on a normal stack.  */
>>> +    NORMAL = 0,
>>> +
>>> +    /* In a signal handler, but use the normal stack.  */
>>> +    SIGNAL_HANDLER,
>>> +
>>> +    /* In a signal handler using an alternative stack.  */
>>> +    SIGNAL_ALT_STACK,
>>> +
>>> +    /* This is just a marker to allow for looping over the enum.  */
>>> +    LAST_LOCACTION
>>> +  };
>>> +
>>> +/* A descriptor for a single thread job.  We create a new thread 
>>> for each
>>> +   job_description.  */
>>> +
>>> +struct job_description
>>> +{
>>> +  /* What action should this thread perform.  */
>>> +  enum final_action action;
>>> +
>>> +  /* Where should the thread perform the action.  */
>>> +  enum exec_location location;
>>> +
>>> +  /* The actual thread handle, so we can join with the thread.  */
>>> +  pthread_t thread;
>>> +};
>>> +
>>> +/* A pthread barrier, used to (try) and synchronise the threads.  */
>>> +pthread_barrier_t global_barrier;
>>> +
>>> +/* Return a list of jobs, and place the length of the list in 
>>> *COUNT.  */
>>> +
>>> +struct job_description *
>>> +get_job_list (int *count)
>>> +{
>>> +  /* The number of jobs.  */
>>> +  int num = LAST_ACTION * LAST_LOCACTION;
>>> +
>>> +  /* The uninitialised array of jobs.  */
>>> +  struct job_description *list
>>> +    = malloc (num * sizeof (struct job_description));
>>> +  assert (list != NULL);
>>> +
>>> +  /* Fill the array with all possible jobs.  */
>>> +  for (int i = 0; i < (int) LAST_ACTION; ++i)
>>> +    for (int j = 0; j < (int) LAST_LOCACTION; ++j)
>>> +      {
>>> +    int idx = (i * LAST_LOCACTION) + j;
>>> +    list[idx].action = (enum final_action) i;
>>> +    list[idx].location = (enum exec_location) j;
>>> +      }
>>> +
>>> +  /* Return the array of jobs.  */
>>> +  *count = num;
>>> +  return list;
>>> +}
>>> +
>>> +/* This function should never be called.  If it is then an 
>>> assertion will
>>> +   trigger.  */
>>> +
>>> +void
>>> +assert_not_reached (void)
>>> +{
>>> +  assert (0);
>>> +}
>>> +
>>> +/* The function for a SPIN action.  Just spins in a loop. The LOCATION
>>> +   argument exists so GDB can identify the expected context for this
>>> +   function.  */
>>> +
>>> +void
>>> +do_spin_task (enum exec_location location)
>>> +{
>>> +  (void) location;
>>> +
>>> +  /* Let everyone know that we're about to perform our action.  */
>>> +  int res = pthread_barrier_wait (&global_barrier);
>>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>>> +
>>> +  while (1)
>>> +    {
>>> +      /* Nothing.  */
>>> +    }
>>> +}
>>> +
>>> +/* The function for a SYSCALL action.  Just spins in a loop. The 
>>> LOCATION
>>> +   argument exists so GDB can identify the expected context for this
>>> +   function.  */
>>> +
>>> +void
>>> +do_syscall_task (enum exec_location location)
>>> +{
>>> +  (void) location;
>>> +
>>> +  /* Let everyone know that we're about to perform our action.  */
>>> +  int res = pthread_barrier_wait (&global_barrier);
>>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>>> +
>>> +  sleep (600);
>>> +}
>>> +
>>> +/* Return the required size for a sigaltstack.  We start with a single
>>> +   page, but do check against the system defined minimums. We don't 
>>> run
>>> +   much on the alternative stacks, so we don't need a huge one.  */
>>> +
>>> +size_t
>>> +get_stack_size (void)
>>> +{
>>> +  size_t size = getpagesize ();    /* Arbitrary starting size.  */
>>> +  if (size < SIGSTKSZ)
>>> +    size = SIGSTKSZ;
>>> +  if (size < MINSIGSTKSZ)
>>> +    size = MINSIGSTKSZ;
>>> +  return size;
>>> +}
>>> +
>>> +/* A descriptor for an alternative stack.  */
>>> +
>>> +struct stack_descriptor
>>> +{
>>> +  /* The base address of the alternative stack.  This is the 
>>> address that
>>> +     must be freed to release the memory used by this stack. */
>>> +  void *base;
>>> +
>>> +  /* The size of this alternative stack.  Tracked just so we can 
>>> query this
>>> +     from GDB.  */
>>> +  size_t size;
>>> +};
>>> +
>>> +/* Install an alternative signal stack.  Return a descriptor for 
>>> the newly
>>> +   allocated alternative stack.  */
>>> +
>>> +struct stack_descriptor
>>> +setup_alt_stack (void)
>>> +{
>>> +  size_t stack_size = get_stack_size ();
>>> +
>>> +  void *stack_area = malloc (stack_size);
>>> +
>>> +  stack_t stk;
>>> +  stk.ss_sp = stack_area;
>>> +  stk.ss_flags = 0;
>>> +  stk.ss_size = stack_size;
>>> +
>>> +  int res = sigaltstack (&stk, NULL);
>>> +  assert (res == 0);
>>> +
>>> +  struct stack_descriptor desc;
>>> +  desc.base = stack_area;
>>> +  desc.size = stack_size;
>>> +
>>> +  return desc;
>>> +}
>>> +
>>> +/* Return true (non-zero) if we are currently on the alternative 
>>> stack,
>>> +   otherwise, return false (zero).  */
>>> +
>>> +int
>>> +on_alt_stack_p (void)
>>> +{
>>> +  stack_t stk;
>>> +  int res = sigaltstack (NULL, &stk);
>>> +  assert (res == 0);
>>> +
>>> +  return (stk.ss_flags & SS_ONSTACK) != 0;
>>> +}
>>> +
>>> +/* The signal handler function.  All signals call here, so we use 
>>> SIGNO
>>> +   (the signal that was delivered) to decide what action to 
>>> perform.  This
>>> +   function might, or might not, have been called on an alternative 
>>> signal
>>> +   stack.  */
>>> +
>>> +void
>>> +signal_handler (int signo)
>>> +{
>>> +  enum exec_location location
>>> +    = on_alt_stack_p () ? SIGNAL_ALT_STACK : SIGNAL_HANDLER;
>>> +
>>> +  switch (signo)
>>> +    {
>>> +    case SPIN_SIGNAL:
>>> +      do_spin_task (location);
>>> +      break;
>>> +
>>> +    case SYSCALL_SIGNAL:
>>> +      do_syscall_task (location);
>>> +      break;
>>> +
>>> +    default:
>>> +      assert_not_reached ();
>>> +    }
>>> +}
>>> +
>>> +/* The thread worker function.  ARG is a job_description pointer which
>>> +   describes what this thread is expected to do.  This function always
>>> +   returns a NULL pointer.  */
>>> +
>>> +void *
>>> +thread_function (void *arg)
>>> +{
>>> +  struct job_description *job = (struct job_description *) arg;
>>> +  struct stack_descriptor desc = { NULL, 0 };
>>> +  int sa_flags = 0;
>>> +
>>> +  switch (job->location)
>>> +    {
>>> +    case NORMAL:
>>> +      /* This thread performs the worker action on the current thread,
>>> +     select the correct worker function based on the requested
>>> +     action.  */
>>> +      switch (job->action)
>>> +    {
>>> +    case SPIN:
>>> +      do_spin_task (NORMAL);
>>> +      break;
>>> +
>>> +    case SYSCALL:
>>> +      do_syscall_task (NORMAL);
>>> +      break;
>>> +
>>> +    default:
>>> +      assert_not_reached ();
>>> +    }
>>> +      break;
>>> +
>>> +    case SIGNAL_ALT_STACK:
>>> +      /* This thread is to perform its action in a signal handler 
>>> on the
>>> +     alternative stack.  Install the alternative stack now, and then
>>> +     fall through to the normal signal handler location code.  */
>>> +      desc = setup_alt_stack ();
>>> +      assert (desc.base != NULL);
>>> +      assert (desc.size > 0);
>>> +      sa_flags = SA_ONSTACK;
>>> +
>>> +      /* Fall through.  */
>>> +    case SIGNAL_HANDLER:
>>> +      {
>>> +    /* This thread is to perform its action in a signal handler.  We
>>> +       might have just installed an alternative signal stack.  */
>>> +    int signo, res;
>>> +
>>> +    /* Select the correct signal number so that the signal handler 
>>> will
>>> +       perform the required action.  */
>>> +    switch (job->action)
>>> +      {
>>> +      case SPIN:
>>> +        signo = SPIN_SIGNAL;
>>> +        break;
>>> +
>>> +      case SYSCALL:
>>> +        signo = SYSCALL_SIGNAL;
>>> +        break;
>>> +
>>> +      default:
>>> +        assert_not_reached ();
>>> +      }
>>> +
>>> +    /* Now setup the signal handler.  */
>>> +    struct sigaction sa;
>>> +    sa.sa_handler = signal_handler;
>>> +    sigfillset (&sa.sa_mask);
>>> +    sa.sa_flags = sa_flags;
>>> +    res = sigaction (signo, &sa, NULL);
>>> +    assert (res == 0);
>>> +
>>> +    /* Send the signal to this thread.  */
>>> +    res = pthread_kill (job->thread, signo);
>>> +    assert (res == 0);
>>> +      }
>>> +      break;
>>> +
>>> +    default:
>>> +      assert_not_reached ();
>>> +    };
>>> +
>>> +  /* Free the alt-stack if we allocated one, if not DESC.BASE will be
>>> +     NULL so this call is fine.  */
>>> +  free (desc.base);
>>> +
>>> +  /* Thread complete.  */
>>> +  return NULL;
>>> +}
>>> +
>>> +void
>>> +start_job (struct job_description *job)
>>> +{
>>> +  int res;
>>> +
>>> +  res = pthread_create (&job->thread, NULL, thread_function, job);
>>> +  assert (res == 0);
>>> +}
>>> +
>>> +/* Join with the thread for JOB.  This will block until the thread 
>>> for JOB
>>> +   has finished.  */
>>> +
>>> +void
>>> +finalise_job (struct job_description *job)
>>> +{
>>> +  int res;
>>> +  void *retval;
>>> +
>>> +  res = pthread_join (job->thread, &retval);
>>> +  assert (res == 0);
>>> +  assert (retval == NULL);
>>> +}
>>> +
>>> +/* Function that GDB can place a breakpoint on.  */
>>> +
>>> +void
>>> +breakpt (void)
>>> +{
>>> +  /* Nothing.  */
>>> +}
>>> +
>>> +/* Function that triggers a crash, if the user has setup their 
>>> environment
>>> +   correctly this will dump a core file, which GDB can then 
>>> examine.  */
>>> +
>>> +void
>>> +crash_function (void)
>>> +{
>>> +  volatile int *p = 0;
>>> +  volatile int n = *p;
>>> +  (void) n;
>>> +}
>>> +
>>> +/* Entry point.  */
>>> +
>>> +int
>>> +main ()
>>> +{
>>> +  int job_count, res;
>>> +  struct job_description *jobs = get_job_list (&job_count);
>>> +
>>> +  /* This test is going to park some threads inside infinite 
>>> loops.  Just
>>> +     in case this program is left running, install an alarm that 
>>> will cause
>>> +     everything to exit.  */
>>> +  alarm (WATCHDOG_ALARM_TIME);
>>> +
>>> +  /* We want each worker thread (of which there are JOB_COUNT) plus 
>>> the
>>> +     main thread (hence + 1) to wait at the barrier.  */
>>> +  res = pthread_barrier_init (&global_barrier, NULL, job_count + 1);
>>> +  assert (res == 0);
>>> +
>>> +  /* Start all the jobs.  */
>>> +  for (int i = 0; i < job_count; ++i)
>>> +    start_job (&jobs[i]);
>>> +
>>> +  /* Notify all the worker threads that we're waiting for them.  */
>>> +  res = pthread_barrier_wait (&global_barrier);
>>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>>> +
>>> +  /* All we know at this point is that all the worker threads have 
>>> reached
>>> +     the barrier, which is just before they perform their action.  
>>> But we
>>> +     really want them to start their action.
>>> +
>>> +     There's really no way we can be 100% certain that the worker 
>>> threads
>>> +     have started their action, all we can do is wait for a short 
>>> while and
>>> +     hope that the machine we're running on is not too slow. */
>>> +  sleep (MAIN_THREAD_DELAY);
>>> +
>>> +  /* A function that GDB can place a breakpoint on.  By the time we 
>>> get
>>> +     here we are as sure as we can be that all of the worker 
>>> threads have
>>> +     started and are in their worker action (spinning, or 
>>> syscall).  */
>>> +  breakpt ();
>>> +
>>> +  /* If GDB is not attached then this function will cause a crash, 
>>> which
>>> +     can be used to dump a core file, which GDB can then analyse.  */
>>> +  crash_function ();
>>> +
>>> +  /* Due to the crash we never expect to get here.  Plus the worker 
>>> actions
>>> +     never terminate.  But for completeness, here's where we join 
>>> with all
>>> +     the worker threads.  */
>>> +  for (int i = 0; i < job_count; ++i)
>>> +    finalise_job (&jobs[i]);
>>> +
>>> +  /* Cleanup the barrier.  */
>>> +  res = pthread_barrier_destroy (&global_barrier);
>>> +  assert (res == 0);
>>> +
>>> +  /* And clean up the jobs list.  */
>>> +  free (jobs);
>>> +
>>> +  return 0;
>>> +}
>>> diff --git a/gdb/testsuite/gdb.threads/threadcrash.exp 
>>> b/gdb/testsuite/gdb.threads/threadcrash.exp
>>> new file mode 100644
>>> index 00000000000..996e020d1e8
>>> --- /dev/null
>>> +++ b/gdb/testsuite/gdb.threads/threadcrash.exp
>>> @@ -0,0 +1,233 @@
>>> +# This testcase is part of GDB, the GNU debugger.
>>> +
>>> +# Copyright 2023 Free Software Foundation, Inc.
>>> +
>>> +# This program is free software; you can redistribute it and/or modify
>>> +# it under the terms of the GNU General Public License as published by
>>> +# the Free Software Foundation; either version 3 of the License, or
>>> +# (at your option) any later version.
>>> +#
>>> +# This program is distributed in the hope that it will be useful,
>>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> +# GNU General Public License for more details.
>>> +#
>>> +# You should have received a copy of the GNU General Public License
>>> +# along with this program.  If not, see 
>>> <http://www.gnu.org/licenses/>.
>>> +
>>> +# This test case looks at GDB's ability to get correct backtraces 
>>> for a
>>> +# crashed inferior, recreating it from a live inferior, a corefile and
>>> +# a gcore.
>>> +
>>> +
>>> +# Check that the inferior has 7 threads, and return the number of 
>>> threads (7).
>>> +# We return the thread count so that, even if there is some error 
>>> in the test,
>>> +# the final log doesn't get flooded with failures.
>>> +
>>> +proc test_thread_count {} {
>>> +    set thread_count 0
>>> +
>>> +    gdb_test_multiple "info threads" "getting thread count" -lbl {
>>> +    -re "Thread" {
>>> +        incr thread_count
>>> +        exp_continue
>>> +    }
>>> +    -re "$::gdb_prompt " {
>>> +        gdb_assert {$thread_count == 7}
>>> +    }
>>> +    }
>>> +
>>> +    return $thread_count
>>> +}
>>> +
>>> +# Use 'thread apply all backtrace' to check if all expected threads
>>> +# are present, and stopped in the expected locations.  Set the global
>>> +# TEST_LIST to be the a list of regexps expected to match all the
>>> +# threads.  We generate it now so that the list is in the order that
>>> +# GDB sees the threads.
>>> +
>>> +proc thread_apply_all {} {
>>> +    global test_list
>>> +
>>> +    set test_list { }
>>> +
>>> +    set unwind_fail false
>>> +
>>> +    gdb_test_multiple "thread apply all backtrace" \
>>> +    "Get thread information" -lbl {
>>> +        -re "#\[0-9\]+\\\?\\\?\[^\n\]*" {
>>> +        set unwind_fail true
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*syscall_task 
>>> .location=SIGNAL_ALT_STACK\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*sleep.*" \
>>> +                          ".*do_syscall_task 
>>> .location=SIGNAL_ALT_STACK.*" \
>>> +                          ".*signal_handler.*" \
>>> +                          ".*signal handler called.*" \
>>> +                          ".*pthread_kill.*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*syscall_task .location=SIGNAL_HANDLER\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*sleep.*" \
>>> +                          ".*do_syscall_task 
>>> .location=SIGNAL_HANDLER.*" \
>>> +                          ".*signal_handler.*" \
>>> +                          ".*signal handler called.*" \
>>> +                          ".*pthread_kill.*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*syscall_task .location=NORMAL\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*sleep.*" \
>>> +                          ".*do_syscall_task .location=NORMAL.*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*spin_task .location=SIGNAL_ALT_STACK\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*do_spin_task 
>>> .location=SIGNAL_ALT_STACK.*" \
>>> +                          ".*signal_handler.*" \
>>> +                          ".*signal handler called.*" \
>>> +                          ".*pthread_kill.*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*spin_task .location=SIGNAL_HANDLER\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*do_spin_task 
>>> .location=SIGNAL_HANDLER.*" \
>>> +                          ".*signal_handler.*" \
>>> +                          ".*signal handler called.*" \
>>> +                          ".*pthread_kill.*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*spin_task .location=NORMAL\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*do_spin_task 
>>> .location=NORMAL..*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*main\[^\n\]*" {
>>> +        lappend test_list ".*main.*"
>>> +        exp_continue
>>> +        }
>>> +        -re "$::gdb_prompt " {
>>> +        pass $gdb_test_name
>>> +        }
>>> +    }
>>> +
>>> +    gdb_assert {$unwind_fail == false}
>>> +}
>>> +
>>> +# Perform all the tests we're interested in.  They are:
>>> +# * test if we have 7 threads
>>> +# * Creating the list of backtraces for all threads seen
>>> +# * testing if GDB recreated the full backtrace we expect for all 
>>> threads
>>> +
>>> +proc do_full_test {} {
>>> +    global test_list
>>> +    set thread_count [test_thread_count]
>>> +
>>> +    thread_apply_all
>>> +
>>> +    gdb_assert {$thread_count == [llength $test_list]}
>>> +
>>> +    for {set i 0} {$i < $thread_count } {incr i} {
>>> +    set thread_num [expr [llength $test_list] - $i]
>>> +
>>> +    gdb_test "thread apply $thread_num backtrace" [lindex 
>>> $test_list $i]
>>> +    }
>>> +}
>>> +
>>> +# Do all preparation steps for running the corefile tests, then
>>> +# call do_full_test to actually run the tests.
>>> +
>>> +proc_with_prefix test_live_inferior {} {
>>> +    gdb_test "handle SIGUSR1 nostop print pass" \
>>> +    ".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
>>> +    "setup SIGUSR1"
>>> +    gdb_test "handle SIGUSR2 nostop print pass" \
>>> +    ".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
>>> +    "setup SIGUSR2"
>>> +
>>> +    if {![runto_main]} {
>>> +    return
>>> +    }
>>> +
>>> +    gdb_breakpoint "breakpt"
>>> +    gdb_continue_to_breakpoint "running to breakpoint" ".*"
>>> +
>>> +    do_full_test
>>> +}
>>> +
>>> +# Do all preparation steps for running the corefile tests, then
>>> +# call do_full_test to actually run the tests.
>>> +
>>> +proc_with_prefix test_corefile {} {
>>> +    set corefile [core_find $::binfile]
>>> +    if { $corefile == "" } {
>>> +    untested "couldn't generate corefile"
>>> +    return
>>> +    }
>>> +    set corefile [gdb_remote_download host $corefile]
>>> +
>>> +    gdb_test "core-file $corefile" \
>>> +         "" \
>>> +         "loading_corefile" \
>>> +         "A program is being debugged already\\\.  Kill it\\\? 
>>> \\\(y or n\\\) " \
>>> +         "y"
>>> +
>>> +    do_full_test
>>> +}
>>> +
>>> +# Do all preparation steps for running the gcore tests, then
>>> +# call do_full_test to actually run the tests.
>>> +
>>> +proc_with_prefix test_gcore {} {
>>> +
>>> +    clean_restart "$::binfile"
>>> +
>>> +    gdb_test "handle SIGUSR1 nostop print pass" \
>>> +    ".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
>>> +    "setup SIGUSR1"
>>> +    gdb_test "handle SIGUSR2 nostop print pass" \
>>> +    ".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
>>> +    "setup SIGUSR2"
>>> +
>>> +    if {![runto_main]} {
>>> +    return -1
>>> +    }
>>> +    gdb_test "continue" ".*Segmentation fault.*" "continue to crash"
>>> +
>>> +    set gcore_name "${::binfile}.gcore"
>>> +    set gcore_supported [gdb_gcore_cmd "$gcore_name" "saving gcore"]
>>> +
>>> +    if {!$gcore_supported} {
>>> +    unsupported "couldn't generate gcore file"
>>> +    return
>>> +    }
>>> +
>>> +    set corefile [gdb_remote_download host $gcore_name]
>>> +
>>> +    gdb_test "core-file $corefile" \
>>> +         "" \
>>> +         "loading_corefile" \
>>> +         "A program is being debugged already\\\.  Kill it\\\? 
>>> \\\(y or n\\\) " \
>>> +         "y"
>>> +
>>> +    do_full_test
>>> +}
>>> +
>>> +standard_testfile
>>> +
>>> +if [prepare_for_testing "failed to prepare" $testfile $srcfile \
>>> +    {debug pthreads}] {
>>> +    return -1
>>> +}
>>> +
>>> +clean_restart ${binfile}
>>> +
>>> +gdb_test_no_output "set backtrace limit unlimited"
>>> +
>>> +test_live_inferior
>>> +
>>> +test_corefile
>>> +
>>> +test_gcore
>>
>>
>
  
Guinevere Larsen Jan. 24, 2024, 9:34 a.m. UTC | #5
Ping!
On 09/01/2024 12:50, Guinevere Larsen wrote:
> Ping!
> On 20/12/2023 10:40, Guinevere Larsen wrote:
>> Ping!
>>
>> -- 
>> Cheers,
>> Guinevere Larsen
>> She/Her/Hers
>>
>> On 04/12/2023 18:33, Guinevere Larsen wrote:
>>> This patch is based on an out-of-tree patch that fedora has been
>>> carrying for a while. It tests if GDB is able to properly unwind a
>>> threaded program in the following situations:
>>> * regular threads
>>> * in a signal handler
>>> * in a signal handler executing on an alternate stack
>>>
>>> And the final frame can either be in a syscall or in an infinite loop.
>>>
>>> The test works by running the inferior until a crash to generate a
>>> corefile, or until right before the crash. Then applies a backtrace to
>>> all threads to see if any frame can't be identified, and the order of
>>> the threads in GDB. Finally, it goes thread by thread and tries to
>>> collect a large part of the backtrace, to confirm that everything is
>>> being unwound correctly.
>>>
>>> Co-Authored-By: Andrew Burgess <aburgess@redhat.com>
>>> Reviewed-By:  Luis Machado <luis.machado@arm.com>
>>>
>>> ---
>>>
>>> Changes for v4:
>>> * Luis mentioned that my strategy for starting the inferior didn't work
>>>    with native-extended testing. Changed to use runto_main instead
>>> * Improved comments in the exp file based on Andrew's comments
>>> * Minor cleanups with regards to TCL usage
>>> ---
>>>   gdb/testsuite/gdb.threads/threadcrash.c   | 443 
>>> ++++++++++++++++++++++
>>>   gdb/testsuite/gdb.threads/threadcrash.exp | 233 ++++++++++++
>>>   2 files changed, 676 insertions(+)
>>>   create mode 100644 gdb/testsuite/gdb.threads/threadcrash.c
>>>   create mode 100644 gdb/testsuite/gdb.threads/threadcrash.exp
>>>
>>> diff --git a/gdb/testsuite/gdb.threads/threadcrash.c 
>>> b/gdb/testsuite/gdb.threads/threadcrash.c
>>> new file mode 100644
>>> index 00000000000..e476ae7b07d
>>> --- /dev/null
>>> +++ b/gdb/testsuite/gdb.threads/threadcrash.c
>>> @@ -0,0 +1,443 @@
>>> +/* This testcase is part of GDB, the GNU debugger.
>>> +
>>> +   Copyright 2023 Free Software Foundation, Inc.
>>> +
>>> +   This program is free software; you can redistribute it and/or 
>>> modify
>>> +   it under the terms of the GNU General Public License as 
>>> published by
>>> +   the Free Software Foundation; either version 3 of the License, or
>>> +   (at your option) any later version.
>>> +
>>> +   This program is distributed in the hope that it will be useful,
>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>>> +   GNU General Public License for more details.
>>> +
>>> +   You should have received a copy of the GNU General Public License
>>> +   along with this program.  If not, see 
>>> <http://www.gnu.org/licenses/>. */
>>> +
>>> +#include <pthread.h>
>>> +#include <assert.h>
>>> +#include <stdlib.h>
>>> +#include <signal.h>
>>> +#include <unistd.h>
>>> +
>>> +/* The delay that the main thread gives once all the worker threads 
>>> have
>>> +   reached the barrier before the main thread enters the function 
>>> on which
>>> +   GDB will have placed a breakpoint.  */
>>> +
>>> +#define MAIN_THREAD_DELAY 2
>>> +
>>> +/* The maximum time we allow this test program to run for before an 
>>> alarm
>>> +   signal is sent and everything will exit.  */
>>> +#define WATCHDOG_ALARM_TIME 600
>>> +
>>> +/* Aliases for the signals used within this script.  Each signal
>>> +   corresponds to an action (from the FINAL_ACTION enum) that the 
>>> signal
>>> +   handler will perform.  */
>>> +
>>> +#define SPIN_SIGNAL SIGUSR1
>>> +#define SYSCALL_SIGNAL SIGUSR2
>>> +
>>> +/* Describe the final action that a thread should perform. */
>>> +
>>> +enum final_action
>>> +  {
>>> +    /* Thread should spin in an infinite loop.  */
>>> +    SPIN = 0,
>>> +
>>> +    /* Thread should block in a syscall.  */
>>> +    SYSCALL,
>>> +
>>> +    /* This is just a marker to allow for looping over the enum.  */
>>> +    LAST_ACTION
>>> +  };
>>> +
>>> +/* Where should the thread perform this action?  */
>>> +
>>> +enum exec_location
>>> +  {
>>> +    /* Just a normal thread, on a normal stack.  */
>>> +    NORMAL = 0,
>>> +
>>> +    /* In a signal handler, but use the normal stack.  */
>>> +    SIGNAL_HANDLER,
>>> +
>>> +    /* In a signal handler using an alternative stack.  */
>>> +    SIGNAL_ALT_STACK,
>>> +
>>> +    /* This is just a marker to allow for looping over the enum.  */
>>> +    LAST_LOCACTION
>>> +  };
>>> +
>>> +/* A descriptor for a single thread job.  We create a new thread 
>>> for each
>>> +   job_description.  */
>>> +
>>> +struct job_description
>>> +{
>>> +  /* What action should this thread perform.  */
>>> +  enum final_action action;
>>> +
>>> +  /* Where should the thread perform the action.  */
>>> +  enum exec_location location;
>>> +
>>> +  /* The actual thread handle, so we can join with the thread.  */
>>> +  pthread_t thread;
>>> +};
>>> +
>>> +/* A pthread barrier, used to (try) and synchronise the threads.  */
>>> +pthread_barrier_t global_barrier;
>>> +
>>> +/* Return a list of jobs, and place the length of the list in 
>>> *COUNT.  */
>>> +
>>> +struct job_description *
>>> +get_job_list (int *count)
>>> +{
>>> +  /* The number of jobs.  */
>>> +  int num = LAST_ACTION * LAST_LOCACTION;
>>> +
>>> +  /* The uninitialised array of jobs.  */
>>> +  struct job_description *list
>>> +    = malloc (num * sizeof (struct job_description));
>>> +  assert (list != NULL);
>>> +
>>> +  /* Fill the array with all possible jobs.  */
>>> +  for (int i = 0; i < (int) LAST_ACTION; ++i)
>>> +    for (int j = 0; j < (int) LAST_LOCACTION; ++j)
>>> +      {
>>> +    int idx = (i * LAST_LOCACTION) + j;
>>> +    list[idx].action = (enum final_action) i;
>>> +    list[idx].location = (enum exec_location) j;
>>> +      }
>>> +
>>> +  /* Return the array of jobs.  */
>>> +  *count = num;
>>> +  return list;
>>> +}
>>> +
>>> +/* This function should never be called.  If it is then an 
>>> assertion will
>>> +   trigger.  */
>>> +
>>> +void
>>> +assert_not_reached (void)
>>> +{
>>> +  assert (0);
>>> +}
>>> +
>>> +/* The function for a SPIN action.  Just spins in a loop. The LOCATION
>>> +   argument exists so GDB can identify the expected context for this
>>> +   function.  */
>>> +
>>> +void
>>> +do_spin_task (enum exec_location location)
>>> +{
>>> +  (void) location;
>>> +
>>> +  /* Let everyone know that we're about to perform our action.  */
>>> +  int res = pthread_barrier_wait (&global_barrier);
>>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>>> +
>>> +  while (1)
>>> +    {
>>> +      /* Nothing.  */
>>> +    }
>>> +}
>>> +
>>> +/* The function for a SYSCALL action.  Just spins in a loop. The 
>>> LOCATION
>>> +   argument exists so GDB can identify the expected context for this
>>> +   function.  */
>>> +
>>> +void
>>> +do_syscall_task (enum exec_location location)
>>> +{
>>> +  (void) location;
>>> +
>>> +  /* Let everyone know that we're about to perform our action.  */
>>> +  int res = pthread_barrier_wait (&global_barrier);
>>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>>> +
>>> +  sleep (600);
>>> +}
>>> +
>>> +/* Return the required size for a sigaltstack.  We start with a single
>>> +   page, but do check against the system defined minimums. We don't 
>>> run
>>> +   much on the alternative stacks, so we don't need a huge one.  */
>>> +
>>> +size_t
>>> +get_stack_size (void)
>>> +{
>>> +  size_t size = getpagesize ();    /* Arbitrary starting size.  */
>>> +  if (size < SIGSTKSZ)
>>> +    size = SIGSTKSZ;
>>> +  if (size < MINSIGSTKSZ)
>>> +    size = MINSIGSTKSZ;
>>> +  return size;
>>> +}
>>> +
>>> +/* A descriptor for an alternative stack.  */
>>> +
>>> +struct stack_descriptor
>>> +{
>>> +  /* The base address of the alternative stack.  This is the 
>>> address that
>>> +     must be freed to release the memory used by this stack. */
>>> +  void *base;
>>> +
>>> +  /* The size of this alternative stack.  Tracked just so we can 
>>> query this
>>> +     from GDB.  */
>>> +  size_t size;
>>> +};
>>> +
>>> +/* Install an alternative signal stack.  Return a descriptor for 
>>> the newly
>>> +   allocated alternative stack.  */
>>> +
>>> +struct stack_descriptor
>>> +setup_alt_stack (void)
>>> +{
>>> +  size_t stack_size = get_stack_size ();
>>> +
>>> +  void *stack_area = malloc (stack_size);
>>> +
>>> +  stack_t stk;
>>> +  stk.ss_sp = stack_area;
>>> +  stk.ss_flags = 0;
>>> +  stk.ss_size = stack_size;
>>> +
>>> +  int res = sigaltstack (&stk, NULL);
>>> +  assert (res == 0);
>>> +
>>> +  struct stack_descriptor desc;
>>> +  desc.base = stack_area;
>>> +  desc.size = stack_size;
>>> +
>>> +  return desc;
>>> +}
>>> +
>>> +/* Return true (non-zero) if we are currently on the alternative 
>>> stack,
>>> +   otherwise, return false (zero).  */
>>> +
>>> +int
>>> +on_alt_stack_p (void)
>>> +{
>>> +  stack_t stk;
>>> +  int res = sigaltstack (NULL, &stk);
>>> +  assert (res == 0);
>>> +
>>> +  return (stk.ss_flags & SS_ONSTACK) != 0;
>>> +}
>>> +
>>> +/* The signal handler function.  All signals call here, so we use 
>>> SIGNO
>>> +   (the signal that was delivered) to decide what action to 
>>> perform.  This
>>> +   function might, or might not, have been called on an alternative 
>>> signal
>>> +   stack.  */
>>> +
>>> +void
>>> +signal_handler (int signo)
>>> +{
>>> +  enum exec_location location
>>> +    = on_alt_stack_p () ? SIGNAL_ALT_STACK : SIGNAL_HANDLER;
>>> +
>>> +  switch (signo)
>>> +    {
>>> +    case SPIN_SIGNAL:
>>> +      do_spin_task (location);
>>> +      break;
>>> +
>>> +    case SYSCALL_SIGNAL:
>>> +      do_syscall_task (location);
>>> +      break;
>>> +
>>> +    default:
>>> +      assert_not_reached ();
>>> +    }
>>> +}
>>> +
>>> +/* The thread worker function.  ARG is a job_description pointer which
>>> +   describes what this thread is expected to do.  This function always
>>> +   returns a NULL pointer.  */
>>> +
>>> +void *
>>> +thread_function (void *arg)
>>> +{
>>> +  struct job_description *job = (struct job_description *) arg;
>>> +  struct stack_descriptor desc = { NULL, 0 };
>>> +  int sa_flags = 0;
>>> +
>>> +  switch (job->location)
>>> +    {
>>> +    case NORMAL:
>>> +      /* This thread performs the worker action on the current thread,
>>> +     select the correct worker function based on the requested
>>> +     action.  */
>>> +      switch (job->action)
>>> +    {
>>> +    case SPIN:
>>> +      do_spin_task (NORMAL);
>>> +      break;
>>> +
>>> +    case SYSCALL:
>>> +      do_syscall_task (NORMAL);
>>> +      break;
>>> +
>>> +    default:
>>> +      assert_not_reached ();
>>> +    }
>>> +      break;
>>> +
>>> +    case SIGNAL_ALT_STACK:
>>> +      /* This thread is to perform its action in a signal handler 
>>> on the
>>> +     alternative stack.  Install the alternative stack now, and then
>>> +     fall through to the normal signal handler location code.  */
>>> +      desc = setup_alt_stack ();
>>> +      assert (desc.base != NULL);
>>> +      assert (desc.size > 0);
>>> +      sa_flags = SA_ONSTACK;
>>> +
>>> +      /* Fall through.  */
>>> +    case SIGNAL_HANDLER:
>>> +      {
>>> +    /* This thread is to perform its action in a signal handler.  We
>>> +       might have just installed an alternative signal stack.  */
>>> +    int signo, res;
>>> +
>>> +    /* Select the correct signal number so that the signal handler 
>>> will
>>> +       perform the required action.  */
>>> +    switch (job->action)
>>> +      {
>>> +      case SPIN:
>>> +        signo = SPIN_SIGNAL;
>>> +        break;
>>> +
>>> +      case SYSCALL:
>>> +        signo = SYSCALL_SIGNAL;
>>> +        break;
>>> +
>>> +      default:
>>> +        assert_not_reached ();
>>> +      }
>>> +
>>> +    /* Now setup the signal handler.  */
>>> +    struct sigaction sa;
>>> +    sa.sa_handler = signal_handler;
>>> +    sigfillset (&sa.sa_mask);
>>> +    sa.sa_flags = sa_flags;
>>> +    res = sigaction (signo, &sa, NULL);
>>> +    assert (res == 0);
>>> +
>>> +    /* Send the signal to this thread.  */
>>> +    res = pthread_kill (job->thread, signo);
>>> +    assert (res == 0);
>>> +      }
>>> +      break;
>>> +
>>> +    default:
>>> +      assert_not_reached ();
>>> +    };
>>> +
>>> +  /* Free the alt-stack if we allocated one, if not DESC.BASE will be
>>> +     NULL so this call is fine.  */
>>> +  free (desc.base);
>>> +
>>> +  /* Thread complete.  */
>>> +  return NULL;
>>> +}
>>> +
>>> +void
>>> +start_job (struct job_description *job)
>>> +{
>>> +  int res;
>>> +
>>> +  res = pthread_create (&job->thread, NULL, thread_function, job);
>>> +  assert (res == 0);
>>> +}
>>> +
>>> +/* Join with the thread for JOB.  This will block until the thread 
>>> for JOB
>>> +   has finished.  */
>>> +
>>> +void
>>> +finalise_job (struct job_description *job)
>>> +{
>>> +  int res;
>>> +  void *retval;
>>> +
>>> +  res = pthread_join (job->thread, &retval);
>>> +  assert (res == 0);
>>> +  assert (retval == NULL);
>>> +}
>>> +
>>> +/* Function that GDB can place a breakpoint on.  */
>>> +
>>> +void
>>> +breakpt (void)
>>> +{
>>> +  /* Nothing.  */
>>> +}
>>> +
>>> +/* Function that triggers a crash, if the user has setup their 
>>> environment
>>> +   correctly this will dump a core file, which GDB can then 
>>> examine.  */
>>> +
>>> +void
>>> +crash_function (void)
>>> +{
>>> +  volatile int *p = 0;
>>> +  volatile int n = *p;
>>> +  (void) n;
>>> +}
>>> +
>>> +/* Entry point.  */
>>> +
>>> +int
>>> +main ()
>>> +{
>>> +  int job_count, res;
>>> +  struct job_description *jobs = get_job_list (&job_count);
>>> +
>>> +  /* This test is going to park some threads inside infinite 
>>> loops.  Just
>>> +     in case this program is left running, install an alarm that 
>>> will cause
>>> +     everything to exit.  */
>>> +  alarm (WATCHDOG_ALARM_TIME);
>>> +
>>> +  /* We want each worker thread (of which there are JOB_COUNT) plus 
>>> the
>>> +     main thread (hence + 1) to wait at the barrier.  */
>>> +  res = pthread_barrier_init (&global_barrier, NULL, job_count + 1);
>>> +  assert (res == 0);
>>> +
>>> +  /* Start all the jobs.  */
>>> +  for (int i = 0; i < job_count; ++i)
>>> +    start_job (&jobs[i]);
>>> +
>>> +  /* Notify all the worker threads that we're waiting for them.  */
>>> +  res = pthread_barrier_wait (&global_barrier);
>>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>>> +
>>> +  /* All we know at this point is that all the worker threads have 
>>> reached
>>> +     the barrier, which is just before they perform their action.  
>>> But we
>>> +     really want them to start their action.
>>> +
>>> +     There's really no way we can be 100% certain that the worker 
>>> threads
>>> +     have started their action, all we can do is wait for a short 
>>> while and
>>> +     hope that the machine we're running on is not too slow. */
>>> +  sleep (MAIN_THREAD_DELAY);
>>> +
>>> +  /* A function that GDB can place a breakpoint on.  By the time we 
>>> get
>>> +     here we are as sure as we can be that all of the worker 
>>> threads have
>>> +     started and are in their worker action (spinning, or 
>>> syscall).  */
>>> +  breakpt ();
>>> +
>>> +  /* If GDB is not attached then this function will cause a crash, 
>>> which
>>> +     can be used to dump a core file, which GDB can then analyse.  */
>>> +  crash_function ();
>>> +
>>> +  /* Due to the crash we never expect to get here.  Plus the worker 
>>> actions
>>> +     never terminate.  But for completeness, here's where we join 
>>> with all
>>> +     the worker threads.  */
>>> +  for (int i = 0; i < job_count; ++i)
>>> +    finalise_job (&jobs[i]);
>>> +
>>> +  /* Cleanup the barrier.  */
>>> +  res = pthread_barrier_destroy (&global_barrier);
>>> +  assert (res == 0);
>>> +
>>> +  /* And clean up the jobs list.  */
>>> +  free (jobs);
>>> +
>>> +  return 0;
>>> +}
>>> diff --git a/gdb/testsuite/gdb.threads/threadcrash.exp 
>>> b/gdb/testsuite/gdb.threads/threadcrash.exp
>>> new file mode 100644
>>> index 00000000000..996e020d1e8
>>> --- /dev/null
>>> +++ b/gdb/testsuite/gdb.threads/threadcrash.exp
>>> @@ -0,0 +1,233 @@
>>> +# This testcase is part of GDB, the GNU debugger.
>>> +
>>> +# Copyright 2023 Free Software Foundation, Inc.
>>> +
>>> +# This program is free software; you can redistribute it and/or modify
>>> +# it under the terms of the GNU General Public License as published by
>>> +# the Free Software Foundation; either version 3 of the License, or
>>> +# (at your option) any later version.
>>> +#
>>> +# This program is distributed in the hope that it will be useful,
>>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> +# GNU General Public License for more details.
>>> +#
>>> +# You should have received a copy of the GNU General Public License
>>> +# along with this program.  If not, see 
>>> <http://www.gnu.org/licenses/>.
>>> +
>>> +# This test case looks at GDB's ability to get correct backtraces 
>>> for a
>>> +# crashed inferior, recreating it from a live inferior, a corefile and
>>> +# a gcore.
>>> +
>>> +
>>> +# Check that the inferior has 7 threads, and return the number of 
>>> threads (7).
>>> +# We return the thread count so that, even if there is some error 
>>> in the test,
>>> +# the final log doesn't get flooded with failures.
>>> +
>>> +proc test_thread_count {} {
>>> +    set thread_count 0
>>> +
>>> +    gdb_test_multiple "info threads" "getting thread count" -lbl {
>>> +    -re "Thread" {
>>> +        incr thread_count
>>> +        exp_continue
>>> +    }
>>> +    -re "$::gdb_prompt " {
>>> +        gdb_assert {$thread_count == 7}
>>> +    }
>>> +    }
>>> +
>>> +    return $thread_count
>>> +}
>>> +
>>> +# Use 'thread apply all backtrace' to check if all expected threads
>>> +# are present, and stopped in the expected locations.  Set the global
>>> +# TEST_LIST to be the a list of regexps expected to match all the
>>> +# threads.  We generate it now so that the list is in the order that
>>> +# GDB sees the threads.
>>> +
>>> +proc thread_apply_all {} {
>>> +    global test_list
>>> +
>>> +    set test_list { }
>>> +
>>> +    set unwind_fail false
>>> +
>>> +    gdb_test_multiple "thread apply all backtrace" \
>>> +    "Get thread information" -lbl {
>>> +        -re "#\[0-9\]+\\\?\\\?\[^\n\]*" {
>>> +        set unwind_fail true
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*syscall_task 
>>> .location=SIGNAL_ALT_STACK\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*sleep.*" \
>>> +                          ".*do_syscall_task 
>>> .location=SIGNAL_ALT_STACK.*" \
>>> +                          ".*signal_handler.*" \
>>> +                          ".*signal handler called.*" \
>>> +                          ".*pthread_kill.*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*syscall_task .location=SIGNAL_HANDLER\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*sleep.*" \
>>> +                          ".*do_syscall_task 
>>> .location=SIGNAL_HANDLER.*" \
>>> +                          ".*signal_handler.*" \
>>> +                          ".*signal handler called.*" \
>>> +                          ".*pthread_kill.*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*syscall_task .location=NORMAL\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*sleep.*" \
>>> +                          ".*do_syscall_task .location=NORMAL.*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*spin_task .location=SIGNAL_ALT_STACK\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*do_spin_task 
>>> .location=SIGNAL_ALT_STACK.*" \
>>> +                          ".*signal_handler.*" \
>>> +                          ".*signal handler called.*" \
>>> +                          ".*pthread_kill.*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*spin_task .location=SIGNAL_HANDLER\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*do_spin_task 
>>> .location=SIGNAL_HANDLER.*" \
>>> +                          ".*signal_handler.*" \
>>> +                          ".*signal handler called.*" \
>>> +                          ".*pthread_kill.*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*spin_task .location=NORMAL\[^\n\]*" {
>>> +        lappend test_list [multi_line ".*do_spin_task 
>>> .location=NORMAL..*" \
>>> +                          ".*thread_function.*"]
>>> +        exp_continue
>>> +        }
>>> +        -re "\[^\n\]*main\[^\n\]*" {
>>> +        lappend test_list ".*main.*"
>>> +        exp_continue
>>> +        }
>>> +        -re "$::gdb_prompt " {
>>> +        pass $gdb_test_name
>>> +        }
>>> +    }
>>> +
>>> +    gdb_assert {$unwind_fail == false}
>>> +}
>>> +
>>> +# Perform all the tests we're interested in.  They are:
>>> +# * test if we have 7 threads
>>> +# * Creating the list of backtraces for all threads seen
>>> +# * testing if GDB recreated the full backtrace we expect for all 
>>> threads
>>> +
>>> +proc do_full_test {} {
>>> +    global test_list
>>> +    set thread_count [test_thread_count]
>>> +
>>> +    thread_apply_all
>>> +
>>> +    gdb_assert {$thread_count == [llength $test_list]}
>>> +
>>> +    for {set i 0} {$i < $thread_count } {incr i} {
>>> +    set thread_num [expr [llength $test_list] - $i]
>>> +
>>> +    gdb_test "thread apply $thread_num backtrace" [lindex 
>>> $test_list $i]
>>> +    }
>>> +}
>>> +
>>> +# Do all preparation steps for running the corefile tests, then
>>> +# call do_full_test to actually run the tests.
>>> +
>>> +proc_with_prefix test_live_inferior {} {
>>> +    gdb_test "handle SIGUSR1 nostop print pass" \
>>> +    ".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
>>> +    "setup SIGUSR1"
>>> +    gdb_test "handle SIGUSR2 nostop print pass" \
>>> +    ".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
>>> +    "setup SIGUSR2"
>>> +
>>> +    if {![runto_main]} {
>>> +    return
>>> +    }
>>> +
>>> +    gdb_breakpoint "breakpt"
>>> +    gdb_continue_to_breakpoint "running to breakpoint" ".*"
>>> +
>>> +    do_full_test
>>> +}
>>> +
>>> +# Do all preparation steps for running the corefile tests, then
>>> +# call do_full_test to actually run the tests.
>>> +
>>> +proc_with_prefix test_corefile {} {
>>> +    set corefile [core_find $::binfile]
>>> +    if { $corefile == "" } {
>>> +    untested "couldn't generate corefile"
>>> +    return
>>> +    }
>>> +    set corefile [gdb_remote_download host $corefile]
>>> +
>>> +    gdb_test "core-file $corefile" \
>>> +         "" \
>>> +         "loading_corefile" \
>>> +         "A program is being debugged already\\\.  Kill it\\\? 
>>> \\\(y or n\\\) " \
>>> +         "y"
>>> +
>>> +    do_full_test
>>> +}
>>> +
>>> +# Do all preparation steps for running the gcore tests, then
>>> +# call do_full_test to actually run the tests.
>>> +
>>> +proc_with_prefix test_gcore {} {
>>> +
>>> +    clean_restart "$::binfile"
>>> +
>>> +    gdb_test "handle SIGUSR1 nostop print pass" \
>>> +    ".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
>>> +    "setup SIGUSR1"
>>> +    gdb_test "handle SIGUSR2 nostop print pass" \
>>> +    ".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
>>> +    "setup SIGUSR2"
>>> +
>>> +    if {![runto_main]} {
>>> +    return -1
>>> +    }
>>> +    gdb_test "continue" ".*Segmentation fault.*" "continue to crash"
>>> +
>>> +    set gcore_name "${::binfile}.gcore"
>>> +    set gcore_supported [gdb_gcore_cmd "$gcore_name" "saving gcore"]
>>> +
>>> +    if {!$gcore_supported} {
>>> +    unsupported "couldn't generate gcore file"
>>> +    return
>>> +    }
>>> +
>>> +    set corefile [gdb_remote_download host $gcore_name]
>>> +
>>> +    gdb_test "core-file $corefile" \
>>> +         "" \
>>> +         "loading_corefile" \
>>> +         "A program is being debugged already\\\.  Kill it\\\? 
>>> \\\(y or n\\\) " \
>>> +         "y"
>>> +
>>> +    do_full_test
>>> +}
>>> +
>>> +standard_testfile
>>> +
>>> +if [prepare_for_testing "failed to prepare" $testfile $srcfile \
>>> +    {debug pthreads}] {
>>> +    return -1
>>> +}
>>> +
>>> +clean_restart ${binfile}
>>> +
>>> +gdb_test_no_output "set backtrace limit unlimited"
>>> +
>>> +test_live_inferior
>>> +
>>> +test_corefile
>>> +
>>> +test_gcore
>>
>>
>
  
Luis Machado Jan. 24, 2024, 12:35 p.m. UTC | #6
Hi Guinevere,

Sorry for the delay.

On 1/24/24 09:34, Guinevere Larsen wrote:
> Ping!
> On 09/01/2024 12:50, Guinevere Larsen wrote:
>> Ping!
>> On 20/12/2023 10:40, Guinevere Larsen wrote:
>>> Ping!
>>>
>>> -- 
>>> Cheers,
>>> Guinevere Larsen
>>> She/Her/Hers
>>>
>>> On 04/12/2023 18:33, Guinevere Larsen wrote:
>>>> This patch is based on an out-of-tree patch that fedora has been
>>>> carrying for a while. It tests if GDB is able to properly unwind a
>>>> threaded program in the following situations:
>>>> * regular threads
>>>> * in a signal handler
>>>> * in a signal handler executing on an alternate stack
>>>>
>>>> And the final frame can either be in a syscall or in an infinite loop.
>>>>
>>>> The test works by running the inferior until a crash to generate a
>>>> corefile, or until right before the crash. Then applies a backtrace to
>>>> all threads to see if any frame can't be identified, and the order of
>>>> the threads in GDB. Finally, it goes thread by thread and tries to
>>>> collect a large part of the backtrace, to confirm that everything is
>>>> being unwound correctly.
>>>>
>>>> Co-Authored-By: Andrew Burgess <aburgess@redhat.com>
>>>> Reviewed-By:  Luis Machado <luis.machado@arm.com>
>>>>
>>>> ---
>>>>
>>>> Changes for v4:
>>>> * Luis mentioned that my strategy for starting the inferior didn't work
>>>>    with native-extended testing. Changed to use runto_main instead
>>>> * Improved comments in the exp file based on Andrew's comments
>>>> * Minor cleanups with regards to TCL usage
>>>> ---
>>>>   gdb/testsuite/gdb.threads/threadcrash.c   | 443 ++++++++++++++++++++++
>>>>   gdb/testsuite/gdb.threads/threadcrash.exp | 233 ++++++++++++
>>>>   2 files changed, 676 insertions(+)
>>>>   create mode 100644 gdb/testsuite/gdb.threads/threadcrash.c
>>>>   create mode 100644 gdb/testsuite/gdb.threads/threadcrash.exp
>>>>
>>>> diff --git a/gdb/testsuite/gdb.threads/threadcrash.c b/gdb/testsuite/gdb.threads/threadcrash.c
>>>> new file mode 100644
>>>> index 00000000000..e476ae7b07d
>>>> --- /dev/null
>>>> +++ b/gdb/testsuite/gdb.threads/threadcrash.c
>>>> @@ -0,0 +1,443 @@
>>>> +/* This testcase is part of GDB, the GNU debugger.
>>>> +
>>>> +   Copyright 2023 Free Software Foundation, Inc.
>>>> +
>>>> +   This program is free software; you can redistribute it and/or modify
>>>> +   it under the terms of the GNU General Public License as published by
>>>> +   the Free Software Foundation; either version 3 of the License, or
>>>> +   (at your option) any later version.
>>>> +
>>>> +   This program is distributed in the hope that it will be useful,
>>>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>>>> +   GNU General Public License for more details.
>>>> +
>>>> +   You should have received a copy of the GNU General Public License
>>>> +   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
>>>> +
>>>> +#include <pthread.h>
>>>> +#include <assert.h>
>>>> +#include <stdlib.h>
>>>> +#include <signal.h>
>>>> +#include <unistd.h>
>>>> +
>>>> +/* The delay that the main thread gives once all the worker threads have
>>>> +   reached the barrier before the main thread enters the function on which
>>>> +   GDB will have placed a breakpoint.  */
>>>> +
>>>> +#define MAIN_THREAD_DELAY 2
>>>> +
>>>> +/* The maximum time we allow this test program to run for before an alarm
>>>> +   signal is sent and everything will exit.  */
>>>> +#define WATCHDOG_ALARM_TIME 600
>>>> +
>>>> +/* Aliases for the signals used within this script.  Each signal
>>>> +   corresponds to an action (from the FINAL_ACTION enum) that the signal
>>>> +   handler will perform.  */
>>>> +
>>>> +#define SPIN_SIGNAL SIGUSR1
>>>> +#define SYSCALL_SIGNAL SIGUSR2
>>>> +
>>>> +/* Describe the final action that a thread should perform. */
>>>> +
>>>> +enum final_action
>>>> +  {
>>>> +    /* Thread should spin in an infinite loop.  */
>>>> +    SPIN = 0,
>>>> +
>>>> +    /* Thread should block in a syscall.  */
>>>> +    SYSCALL,
>>>> +
>>>> +    /* This is just a marker to allow for looping over the enum.  */
>>>> +    LAST_ACTION
>>>> +  };
>>>> +
>>>> +/* Where should the thread perform this action?  */
>>>> +
>>>> +enum exec_location
>>>> +  {
>>>> +    /* Just a normal thread, on a normal stack.  */
>>>> +    NORMAL = 0,
>>>> +
>>>> +    /* In a signal handler, but use the normal stack.  */
>>>> +    SIGNAL_HANDLER,
>>>> +
>>>> +    /* In a signal handler using an alternative stack.  */
>>>> +    SIGNAL_ALT_STACK,
>>>> +
>>>> +    /* This is just a marker to allow for looping over the enum.  */
>>>> +    LAST_LOCACTION
>>>> +  };
>>>> +
>>>> +/* A descriptor for a single thread job.  We create a new thread for each
>>>> +   job_description.  */
>>>> +
>>>> +struct job_description
>>>> +{
>>>> +  /* What action should this thread perform.  */
>>>> +  enum final_action action;
>>>> +
>>>> +  /* Where should the thread perform the action.  */
>>>> +  enum exec_location location;
>>>> +
>>>> +  /* The actual thread handle, so we can join with the thread.  */
>>>> +  pthread_t thread;
>>>> +};
>>>> +
>>>> +/* A pthread barrier, used to (try) and synchronise the threads.  */
>>>> +pthread_barrier_t global_barrier;
>>>> +
>>>> +/* Return a list of jobs, and place the length of the list in *COUNT.  */
>>>> +
>>>> +struct job_description *
>>>> +get_job_list (int *count)
>>>> +{
>>>> +  /* The number of jobs.  */
>>>> +  int num = LAST_ACTION * LAST_LOCACTION;
>>>> +
>>>> +  /* The uninitialised array of jobs.  */
>>>> +  struct job_description *list
>>>> +    = malloc (num * sizeof (struct job_description));
>>>> +  assert (list != NULL);
>>>> +
>>>> +  /* Fill the array with all possible jobs.  */
>>>> +  for (int i = 0; i < (int) LAST_ACTION; ++i)
>>>> +    for (int j = 0; j < (int) LAST_LOCACTION; ++j)
>>>> +      {
>>>> +    int idx = (i * LAST_LOCACTION) + j;
>>>> +    list[idx].action = (enum final_action) i;
>>>> +    list[idx].location = (enum exec_location) j;
>>>> +      }
>>>> +
>>>> +  /* Return the array of jobs.  */
>>>> +  *count = num;
>>>> +  return list;
>>>> +}
>>>> +
>>>> +/* This function should never be called.  If it is then an assertion will
>>>> +   trigger.  */
>>>> +
>>>> +void
>>>> +assert_not_reached (void)
>>>> +{
>>>> +  assert (0);
>>>> +}
>>>> +
>>>> +/* The function for a SPIN action.  Just spins in a loop. The LOCATION
>>>> +   argument exists so GDB can identify the expected context for this
>>>> +   function.  */
>>>> +
>>>> +void
>>>> +do_spin_task (enum exec_location location)
>>>> +{
>>>> +  (void) location;
>>>> +
>>>> +  /* Let everyone know that we're about to perform our action.  */
>>>> +  int res = pthread_barrier_wait (&global_barrier);
>>>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>>>> +
>>>> +  while (1)
>>>> +    {
>>>> +      /* Nothing.  */
>>>> +    }
>>>> +}
>>>> +
>>>> +/* The function for a SYSCALL action.  Just spins in a loop. The LOCATION
>>>> +   argument exists so GDB can identify the expected context for this
>>>> +   function.  */
>>>> +
>>>> +void
>>>> +do_syscall_task (enum exec_location location)
>>>> +{
>>>> +  (void) location;
>>>> +
>>>> +  /* Let everyone know that we're about to perform our action.  */
>>>> +  int res = pthread_barrier_wait (&global_barrier);
>>>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>>>> +
>>>> +  sleep (600);
>>>> +}
>>>> +
>>>> +/* Return the required size for a sigaltstack.  We start with a single
>>>> +   page, but do check against the system defined minimums. We don't run
>>>> +   much on the alternative stacks, so we don't need a huge one.  */
>>>> +
>>>> +size_t
>>>> +get_stack_size (void)
>>>> +{
>>>> +  size_t size = getpagesize ();    /* Arbitrary starting size.  */
>>>> +  if (size < SIGSTKSZ)
>>>> +    size = SIGSTKSZ;
>>>> +  if (size < MINSIGSTKSZ)
>>>> +    size = MINSIGSTKSZ;
>>>> +  return size;
>>>> +}
>>>> +
>>>> +/* A descriptor for an alternative stack.  */
>>>> +
>>>> +struct stack_descriptor
>>>> +{
>>>> +  /* The base address of the alternative stack.  This is the address that
>>>> +     must be freed to release the memory used by this stack. */
>>>> +  void *base;
>>>> +
>>>> +  /* The size of this alternative stack.  Tracked just so we can query this
>>>> +     from GDB.  */
>>>> +  size_t size;
>>>> +};
>>>> +
>>>> +/* Install an alternative signal stack.  Return a descriptor for the newly
>>>> +   allocated alternative stack.  */
>>>> +
>>>> +struct stack_descriptor
>>>> +setup_alt_stack (void)
>>>> +{
>>>> +  size_t stack_size = get_stack_size ();
>>>> +
>>>> +  void *stack_area = malloc (stack_size);
>>>> +
>>>> +  stack_t stk;
>>>> +  stk.ss_sp = stack_area;
>>>> +  stk.ss_flags = 0;
>>>> +  stk.ss_size = stack_size;
>>>> +
>>>> +  int res = sigaltstack (&stk, NULL);
>>>> +  assert (res == 0);
>>>> +
>>>> +  struct stack_descriptor desc;
>>>> +  desc.base = stack_area;
>>>> +  desc.size = stack_size;
>>>> +
>>>> +  return desc;
>>>> +}
>>>> +
>>>> +/* Return true (non-zero) if we are currently on the alternative stack,
>>>> +   otherwise, return false (zero).  */
>>>> +
>>>> +int
>>>> +on_alt_stack_p (void)
>>>> +{
>>>> +  stack_t stk;
>>>> +  int res = sigaltstack (NULL, &stk);
>>>> +  assert (res == 0);
>>>> +
>>>> +  return (stk.ss_flags & SS_ONSTACK) != 0;
>>>> +}
>>>> +
>>>> +/* The signal handler function.  All signals call here, so we use SIGNO
>>>> +   (the signal that was delivered) to decide what action to perform.  This
>>>> +   function might, or might not, have been called on an alternative signal
>>>> +   stack.  */
>>>> +
>>>> +void
>>>> +signal_handler (int signo)
>>>> +{
>>>> +  enum exec_location location
>>>> +    = on_alt_stack_p () ? SIGNAL_ALT_STACK : SIGNAL_HANDLER;
>>>> +
>>>> +  switch (signo)
>>>> +    {
>>>> +    case SPIN_SIGNAL:
>>>> +      do_spin_task (location);
>>>> +      break;
>>>> +
>>>> +    case SYSCALL_SIGNAL:
>>>> +      do_syscall_task (location);
>>>> +      break;
>>>> +
>>>> +    default:
>>>> +      assert_not_reached ();
>>>> +    }
>>>> +}
>>>> +
>>>> +/* The thread worker function.  ARG is a job_description pointer which
>>>> +   describes what this thread is expected to do.  This function always
>>>> +   returns a NULL pointer.  */
>>>> +
>>>> +void *
>>>> +thread_function (void *arg)
>>>> +{
>>>> +  struct job_description *job = (struct job_description *) arg;
>>>> +  struct stack_descriptor desc = { NULL, 0 };
>>>> +  int sa_flags = 0;
>>>> +
>>>> +  switch (job->location)
>>>> +    {
>>>> +    case NORMAL:
>>>> +      /* This thread performs the worker action on the current thread,
>>>> +     select the correct worker function based on the requested
>>>> +     action.  */
>>>> +      switch (job->action)
>>>> +    {
>>>> +    case SPIN:
>>>> +      do_spin_task (NORMAL);
>>>> +      break;
>>>> +
>>>> +    case SYSCALL:
>>>> +      do_syscall_task (NORMAL);
>>>> +      break;
>>>> +
>>>> +    default:
>>>> +      assert_not_reached ();
>>>> +    }
>>>> +      break;
>>>> +
>>>> +    case SIGNAL_ALT_STACK:
>>>> +      /* This thread is to perform its action in a signal handler on the
>>>> +     alternative stack.  Install the alternative stack now, and then
>>>> +     fall through to the normal signal handler location code.  */
>>>> +      desc = setup_alt_stack ();
>>>> +      assert (desc.base != NULL);
>>>> +      assert (desc.size > 0);
>>>> +      sa_flags = SA_ONSTACK;
>>>> +
>>>> +      /* Fall through.  */
>>>> +    case SIGNAL_HANDLER:
>>>> +      {
>>>> +    /* This thread is to perform its action in a signal handler.  We
>>>> +       might have just installed an alternative signal stack.  */
>>>> +    int signo, res;
>>>> +
>>>> +    /* Select the correct signal number so that the signal handler will
>>>> +       perform the required action.  */
>>>> +    switch (job->action)
>>>> +      {
>>>> +      case SPIN:
>>>> +        signo = SPIN_SIGNAL;
>>>> +        break;
>>>> +
>>>> +      case SYSCALL:
>>>> +        signo = SYSCALL_SIGNAL;
>>>> +        break;
>>>> +
>>>> +      default:
>>>> +        assert_not_reached ();
>>>> +      }
>>>> +
>>>> +    /* Now setup the signal handler.  */
>>>> +    struct sigaction sa;
>>>> +    sa.sa_handler = signal_handler;
>>>> +    sigfillset (&sa.sa_mask);
>>>> +    sa.sa_flags = sa_flags;
>>>> +    res = sigaction (signo, &sa, NULL);
>>>> +    assert (res == 0);
>>>> +
>>>> +    /* Send the signal to this thread.  */
>>>> +    res = pthread_kill (job->thread, signo);
>>>> +    assert (res == 0);
>>>> +      }
>>>> +      break;
>>>> +
>>>> +    default:
>>>> +      assert_not_reached ();
>>>> +    };
>>>> +
>>>> +  /* Free the alt-stack if we allocated one, if not DESC.BASE will be
>>>> +     NULL so this call is fine.  */
>>>> +  free (desc.base);
>>>> +
>>>> +  /* Thread complete.  */
>>>> +  return NULL;
>>>> +}
>>>> +
>>>> +void
>>>> +start_job (struct job_description *job)
>>>> +{
>>>> +  int res;
>>>> +
>>>> +  res = pthread_create (&job->thread, NULL, thread_function, job);
>>>> +  assert (res == 0);
>>>> +}
>>>> +
>>>> +/* Join with the thread for JOB.  This will block until the thread for JOB
>>>> +   has finished.  */
>>>> +
>>>> +void
>>>> +finalise_job (struct job_description *job)
>>>> +{
>>>> +  int res;
>>>> +  void *retval;
>>>> +
>>>> +  res = pthread_join (job->thread, &retval);
>>>> +  assert (res == 0);
>>>> +  assert (retval == NULL);
>>>> +}
>>>> +
>>>> +/* Function that GDB can place a breakpoint on.  */
>>>> +
>>>> +void
>>>> +breakpt (void)
>>>> +{
>>>> +  /* Nothing.  */
>>>> +}
>>>> +
>>>> +/* Function that triggers a crash, if the user has setup their environment
>>>> +   correctly this will dump a core file, which GDB can then examine.  */
>>>> +
>>>> +void
>>>> +crash_function (void)
>>>> +{
>>>> +  volatile int *p = 0;
>>>> +  volatile int n = *p;
>>>> +  (void) n;
>>>> +}
>>>> +
>>>> +/* Entry point.  */
>>>> +
>>>> +int
>>>> +main ()
>>>> +{
>>>> +  int job_count, res;
>>>> +  struct job_description *jobs = get_job_list (&job_count);
>>>> +
>>>> +  /* This test is going to park some threads inside infinite loops.  Just
>>>> +     in case this program is left running, install an alarm that will cause
>>>> +     everything to exit.  */
>>>> +  alarm (WATCHDOG_ALARM_TIME);
>>>> +
>>>> +  /* We want each worker thread (of which there are JOB_COUNT) plus the
>>>> +     main thread (hence + 1) to wait at the barrier.  */
>>>> +  res = pthread_barrier_init (&global_barrier, NULL, job_count + 1);
>>>> +  assert (res == 0);
>>>> +
>>>> +  /* Start all the jobs.  */
>>>> +  for (int i = 0; i < job_count; ++i)
>>>> +    start_job (&jobs[i]);
>>>> +
>>>> +  /* Notify all the worker threads that we're waiting for them.  */
>>>> +  res = pthread_barrier_wait (&global_barrier);
>>>> +  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
>>>> +
>>>> +  /* All we know at this point is that all the worker threads have reached
>>>> +     the barrier, which is just before they perform their action.  But we
>>>> +     really want them to start their action.
>>>> +
>>>> +     There's really no way we can be 100% certain that the worker threads
>>>> +     have started their action, all we can do is wait for a short while and
>>>> +     hope that the machine we're running on is not too slow. */
>>>> +  sleep (MAIN_THREAD_DELAY);
>>>> +
>>>> +  /* A function that GDB can place a breakpoint on.  By the time we get
>>>> +     here we are as sure as we can be that all of the worker threads have
>>>> +     started and are in their worker action (spinning, or syscall).  */
>>>> +  breakpt ();
>>>> +
>>>> +  /* If GDB is not attached then this function will cause a crash, which
>>>> +     can be used to dump a core file, which GDB can then analyse.  */
>>>> +  crash_function ();
>>>> +
>>>> +  /* Due to the crash we never expect to get here.  Plus the worker actions
>>>> +     never terminate.  But for completeness, here's where we join with all
>>>> +     the worker threads.  */
>>>> +  for (int i = 0; i < job_count; ++i)
>>>> +    finalise_job (&jobs[i]);
>>>> +
>>>> +  /* Cleanup the barrier.  */
>>>> +  res = pthread_barrier_destroy (&global_barrier);
>>>> +  assert (res == 0);
>>>> +
>>>> +  /* And clean up the jobs list.  */
>>>> +  free (jobs);
>>>> +
>>>> +  return 0;
>>>> +}
>>>> diff --git a/gdb/testsuite/gdb.threads/threadcrash.exp b/gdb/testsuite/gdb.threads/threadcrash.exp
>>>> new file mode 100644
>>>> index 00000000000..996e020d1e8
>>>> --- /dev/null
>>>> +++ b/gdb/testsuite/gdb.threads/threadcrash.exp
>>>> @@ -0,0 +1,233 @@
>>>> +# This testcase is part of GDB, the GNU debugger.
>>>> +
>>>> +# Copyright 2023 Free Software Foundation, Inc.
>>>> +
>>>> +# This program is free software; you can redistribute it and/or modify
>>>> +# it under the terms of the GNU General Public License as published by
>>>> +# the Free Software Foundation; either version 3 of the License, or
>>>> +# (at your option) any later version.
>>>> +#
>>>> +# This program is distributed in the hope that it will be useful,
>>>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>> +# GNU General Public License for more details.
>>>> +#
>>>> +# You should have received a copy of the GNU General Public License
>>>> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
>>>> +
>>>> +# This test case looks at GDB's ability to get correct backtraces for a
>>>> +# crashed inferior, recreating it from a live inferior, a corefile and
>>>> +# a gcore.
>>>> +
>>>> +
>>>> +# Check that the inferior has 7 threads, and return the number of threads (7).
>>>> +# We return the thread count so that, even if there is some error in the test,
>>>> +# the final log doesn't get flooded with failures.
>>>> +
>>>> +proc test_thread_count {} {
>>>> +    set thread_count 0
>>>> +
>>>> +    gdb_test_multiple "info threads" "getting thread count" -lbl {
>>>> +    -re "Thread" {
>>>> +        incr thread_count
>>>> +        exp_continue
>>>> +    }
>>>> +    -re "$::gdb_prompt " {
>>>> +        gdb_assert {$thread_count == 7}
>>>> +    }
>>>> +    }
>>>> +
>>>> +    return $thread_count
>>>> +}
>>>> +
>>>> +# Use 'thread apply all backtrace' to check if all expected threads
>>>> +# are present, and stopped in the expected locations.  Set the global
>>>> +# TEST_LIST to be the a list of regexps expected to match all the
>>>> +# threads.  We generate it now so that the list is in the order that
>>>> +# GDB sees the threads.
>>>> +
>>>> +proc thread_apply_all {} {
>>>> +    global test_list
>>>> +
>>>> +    set test_list { }
>>>> +
>>>> +    set unwind_fail false
>>>> +
>>>> +    gdb_test_multiple "thread apply all backtrace" \
>>>> +    "Get thread information" -lbl {
>>>> +        -re "#\[0-9\]+\\\?\\\?\[^\n\]*" {
>>>> +        set unwind_fail true
>>>> +        exp_continue
>>>> +        }
>>>> +        -re "\[^\n\]*syscall_task .location=SIGNAL_ALT_STACK\[^\n\]*" {
>>>> +        lappend test_list [multi_line ".*sleep.*" \
>>>> +                          ".*do_syscall_task .location=SIGNAL_ALT_STACK.*" \
>>>> +                          ".*signal_handler.*" \
>>>> +                          ".*signal handler called.*" \
>>>> +                          ".*pthread_kill.*" \
>>>> +                          ".*thread_function.*"]
>>>> +        exp_continue
>>>> +        }
>>>> +        -re "\[^\n\]*syscall_task .location=SIGNAL_HANDLER\[^\n\]*" {
>>>> +        lappend test_list [multi_line ".*sleep.*" \
>>>> +                          ".*do_syscall_task .location=SIGNAL_HANDLER.*" \
>>>> +                          ".*signal_handler.*" \
>>>> +                          ".*signal handler called.*" \
>>>> +                          ".*pthread_kill.*" \
>>>> +                          ".*thread_function.*"]
>>>> +        exp_continue
>>>> +        }
>>>> +        -re "\[^\n\]*syscall_task .location=NORMAL\[^\n\]*" {
>>>> +        lappend test_list [multi_line ".*sleep.*" \
>>>> +                          ".*do_syscall_task .location=NORMAL.*" \
>>>> +                          ".*thread_function.*"]
>>>> +        exp_continue
>>>> +        }
>>>> +        -re "\[^\n\]*spin_task .location=SIGNAL_ALT_STACK\[^\n\]*" {
>>>> +        lappend test_list [multi_line ".*do_spin_task .location=SIGNAL_ALT_STACK.*" \
>>>> +                          ".*signal_handler.*" \
>>>> +                          ".*signal handler called.*" \
>>>> +                          ".*pthread_kill.*" \
>>>> +                          ".*thread_function.*"]
>>>> +        exp_continue
>>>> +        }
>>>> +        -re "\[^\n\]*spin_task .location=SIGNAL_HANDLER\[^\n\]*" {
>>>> +        lappend test_list [multi_line ".*do_spin_task .location=SIGNAL_HANDLER.*" \
>>>> +                          ".*signal_handler.*" \
>>>> +                          ".*signal handler called.*" \
>>>> +                          ".*pthread_kill.*" \
>>>> +                          ".*thread_function.*"]
>>>> +        exp_continue
>>>> +        }
>>>> +        -re "\[^\n\]*spin_task .location=NORMAL\[^\n\]*" {
>>>> +        lappend test_list [multi_line ".*do_spin_task .location=NORMAL..*" \
>>>> +                          ".*thread_function.*"]
>>>> +        exp_continue
>>>> +        }
>>>> +        -re "\[^\n\]*main\[^\n\]*" {
>>>> +        lappend test_list ".*main.*"
>>>> +        exp_continue
>>>> +        }
>>>> +        -re "$::gdb_prompt " {
>>>> +        pass $gdb_test_name
>>>> +        }
>>>> +    }
>>>> +
>>>> +    gdb_assert {$unwind_fail == false}
>>>> +}
>>>> +
>>>> +# Perform all the tests we're interested in.  They are:
>>>> +# * test if we have 7 threads
>>>> +# * Creating the list of backtraces for all threads seen
>>>> +# * testing if GDB recreated the full backtrace we expect for all threads
>>>> +
>>>> +proc do_full_test {} {
>>>> +    global test_list
>>>> +    set thread_count [test_thread_count]
>>>> +
>>>> +    thread_apply_all
>>>> +
>>>> +    gdb_assert {$thread_count == [llength $test_list]}
>>>> +
>>>> +    for {set i 0} {$i < $thread_count } {incr i} {
>>>> +    set thread_num [expr [llength $test_list] - $i]
>>>> +
>>>> +    gdb_test "thread apply $thread_num backtrace" [lindex $test_list $i]
>>>> +    }
>>>> +}
>>>> +
>>>> +# Do all preparation steps for running the corefile tests, then
>>>> +# call do_full_test to actually run the tests.
>>>> +
>>>> +proc_with_prefix test_live_inferior {} {
>>>> +    gdb_test "handle SIGUSR1 nostop print pass" \
>>>> +    ".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
>>>> +    "setup SIGUSR1"
>>>> +    gdb_test "handle SIGUSR2 nostop print pass" \
>>>> +    ".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
>>>> +    "setup SIGUSR2"
>>>> +
>>>> +    if {![runto_main]} {
>>>> +    return
>>>> +    }
>>>> +
>>>> +    gdb_breakpoint "breakpt"
>>>> +    gdb_continue_to_breakpoint "running to breakpoint" ".*"
>>>> +
>>>> +    do_full_test
>>>> +}
>>>> +
>>>> +# Do all preparation steps for running the corefile tests, then
>>>> +# call do_full_test to actually run the tests.
>>>> +
>>>> +proc_with_prefix test_corefile {} {
>>>> +    set corefile [core_find $::binfile]
>>>> +    if { $corefile == "" } {
>>>> +    untested "couldn't generate corefile"
>>>> +    return
>>>> +    }
>>>> +    set corefile [gdb_remote_download host $corefile]
>>>> +
>>>> +    gdb_test "core-file $corefile" \
>>>> +         "" \
>>>> +         "loading_corefile" \
>>>> +         "A program is being debugged already\\\.  Kill it\\\? \\\(y or n\\\) " \
>>>> +         "y"
>>>> +
>>>> +    do_full_test
>>>> +}
>>>> +
>>>> +# Do all preparation steps for running the gcore tests, then
>>>> +# call do_full_test to actually run the tests.
>>>> +
>>>> +proc_with_prefix test_gcore {} {
>>>> +
>>>> +    clean_restart "$::binfile"
>>>> +
>>>> +    gdb_test "handle SIGUSR1 nostop print pass" \
>>>> +    ".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
>>>> +    "setup SIGUSR1"
>>>> +    gdb_test "handle SIGUSR2 nostop print pass" \
>>>> +    ".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
>>>> +    "setup SIGUSR2"
>>>> +
>>>> +    if {![runto_main]} {
>>>> +    return -1
>>>> +    }
>>>> +    gdb_test "continue" ".*Segmentation fault.*" "continue to crash"
>>>> +
>>>> +    set gcore_name "${::binfile}.gcore"
>>>> +    set gcore_supported [gdb_gcore_cmd "$gcore_name" "saving gcore"]
>>>> +
>>>> +    if {!$gcore_supported} {
>>>> +    unsupported "couldn't generate gcore file"
>>>> +    return
>>>> +    }
>>>> +
>>>> +    set corefile [gdb_remote_download host $gcore_name]
>>>> +
>>>> +    gdb_test "core-file $corefile" \
>>>> +         "" \
>>>> +         "loading_corefile" \
>>>> +         "A program is being debugged already\\\.  Kill it\\\? \\\(y or n\\\) " \
>>>> +         "y"
>>>> +
>>>> +    do_full_test
>>>> +}
>>>> +
>>>> +standard_testfile
>>>> +
>>>> +if [prepare_for_testing "failed to prepare" $testfile $srcfile \
>>>> +    {debug pthreads}] {
>>>> +    return -1
>>>> +}
>>>> +
>>>> +clean_restart ${binfile}
>>>> +
>>>> +gdb_test_no_output "set backtrace limit unlimited"
>>>> +
>>>> +test_live_inferior
>>>> +
>>>> +test_corefile
>>>> +
>>>> +test_gcore
>>>
>>>
>>
> 

Thanks for the patch.

I re-tested this on aarch64-linux, for native gdb/gdbserver and extended native gdbserver.

I'm getting full passes on my end. The code looks good to me.

Approved-By: Luis Machado <luis.machado@arm.com>
  
Guinevere Larsen Jan. 24, 2024, 12:54 p.m. UTC | #7
On 24/01/2024 13:35, Luis Machado wrote:
> Thanks for the patch.
>
> I re-tested this on aarch64-linux, for native gdb/gdbserver and extended native gdbserver.
>
> I'm getting full passes on my end. The code looks good to me.
>
> Approved-By: Luis Machado<luis.machado@arm.com>
>
Thank you! I've pushed this patch
  

Patch

diff --git a/gdb/testsuite/gdb.threads/threadcrash.c b/gdb/testsuite/gdb.threads/threadcrash.c
new file mode 100644
index 00000000000..e476ae7b07d
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/threadcrash.c
@@ -0,0 +1,443 @@ 
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2023 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <pthread.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+
+/* The delay that the main thread gives once all the worker threads have
+   reached the barrier before the main thread enters the function on which
+   GDB will have placed a breakpoint.  */
+
+#define MAIN_THREAD_DELAY 2
+
+/* The maximum time we allow this test program to run for before an alarm
+   signal is sent and everything will exit.  */
+#define WATCHDOG_ALARM_TIME 600
+
+/* Aliases for the signals used within this script.  Each signal
+   corresponds to an action (from the FINAL_ACTION enum) that the signal
+   handler will perform.  */
+
+#define SPIN_SIGNAL SIGUSR1
+#define SYSCALL_SIGNAL SIGUSR2
+
+/* Describe the final action that a thread should perform.  */
+
+enum final_action
+  {
+    /* Thread should spin in an infinite loop.  */
+    SPIN = 0,
+
+    /* Thread should block in a syscall.  */
+    SYSCALL,
+
+    /* This is just a marker to allow for looping over the enum.  */
+    LAST_ACTION
+  };
+
+/* Where should the thread perform this action?  */
+
+enum exec_location
+  {
+    /* Just a normal thread, on a normal stack.  */
+    NORMAL = 0,
+
+    /* In a signal handler, but use the normal stack.  */
+    SIGNAL_HANDLER,
+
+    /* In a signal handler using an alternative stack.  */
+    SIGNAL_ALT_STACK,
+
+    /* This is just a marker to allow for looping over the enum.  */
+    LAST_LOCACTION
+  };
+
+/* A descriptor for a single thread job.  We create a new thread for each
+   job_description.  */
+
+struct job_description
+{
+  /* What action should this thread perform.  */
+  enum final_action action;
+
+  /* Where should the thread perform the action.  */
+  enum exec_location location;
+
+  /* The actual thread handle, so we can join with the thread.  */
+  pthread_t thread;
+};
+
+/* A pthread barrier, used to (try) and synchronise the threads.  */
+pthread_barrier_t global_barrier;
+
+/* Return a list of jobs, and place the length of the list in *COUNT.  */
+
+struct job_description *
+get_job_list (int *count)
+{
+  /* The number of jobs.  */
+  int num = LAST_ACTION * LAST_LOCACTION;
+
+  /* The uninitialised array of jobs.  */
+  struct job_description *list
+    = malloc (num * sizeof (struct job_description));
+  assert (list != NULL);
+
+  /* Fill the array with all possible jobs.  */
+  for (int i = 0; i < (int) LAST_ACTION; ++i)
+    for (int j = 0; j < (int) LAST_LOCACTION; ++j)
+      {
+	int idx = (i * LAST_LOCACTION) + j;
+	list[idx].action = (enum final_action) i;
+	list[idx].location = (enum exec_location) j;
+      }
+
+  /* Return the array of jobs.  */
+  *count = num;
+  return list;
+}
+
+/* This function should never be called.  If it is then an assertion will
+   trigger.  */
+
+void
+assert_not_reached (void)
+{
+  assert (0);
+}
+
+/* The function for a SPIN action.  Just spins in a loop.  The LOCATION
+   argument exists so GDB can identify the expected context for this
+   function.  */
+
+void
+do_spin_task (enum exec_location location)
+{
+  (void) location;
+
+  /* Let everyone know that we're about to perform our action.  */
+  int res = pthread_barrier_wait (&global_barrier);
+  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
+
+  while (1)
+    {
+      /* Nothing.  */
+    }
+}
+
+/* The function for a SYSCALL action.  Just spins in a loop.  The LOCATION
+   argument exists so GDB can identify the expected context for this
+   function.  */
+
+void
+do_syscall_task (enum exec_location location)
+{
+  (void) location;
+
+  /* Let everyone know that we're about to perform our action.  */
+  int res = pthread_barrier_wait (&global_barrier);
+  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
+
+  sleep (600);
+}
+
+/* Return the required size for a sigaltstack.  We start with a single
+   page, but do check against the system defined minimums.  We don't run
+   much on the alternative stacks, so we don't need a huge one.  */
+
+size_t
+get_stack_size (void)
+{
+  size_t size = getpagesize ();	/* Arbitrary starting size.  */
+  if (size < SIGSTKSZ)
+    size = SIGSTKSZ;
+  if (size < MINSIGSTKSZ)
+    size = MINSIGSTKSZ;
+  return size;
+}
+
+/* A descriptor for an alternative stack.  */
+
+struct stack_descriptor
+{
+  /* The base address of the alternative stack.  This is the address that
+     must be freed to release the memory used by this stack.  */
+  void *base;
+
+  /* The size of this alternative stack.  Tracked just so we can query this
+     from GDB.  */
+  size_t size;
+};
+
+/* Install an alternative signal stack.  Return a descriptor for the newly
+   allocated alternative stack.  */
+
+struct stack_descriptor
+setup_alt_stack (void)
+{
+  size_t stack_size = get_stack_size ();
+
+  void *stack_area = malloc (stack_size);
+
+  stack_t stk;
+  stk.ss_sp = stack_area;
+  stk.ss_flags = 0;
+  stk.ss_size = stack_size;
+
+  int res = sigaltstack (&stk, NULL);
+  assert (res == 0);
+
+  struct stack_descriptor desc;
+  desc.base = stack_area;
+  desc.size = stack_size;
+
+  return desc;
+}
+
+/* Return true (non-zero) if we are currently on the alternative stack,
+   otherwise, return false (zero).  */
+
+int
+on_alt_stack_p (void)
+{
+  stack_t stk;
+  int res = sigaltstack (NULL, &stk);
+  assert (res == 0);
+
+  return (stk.ss_flags & SS_ONSTACK) != 0;
+}
+
+/* The signal handler function.  All signals call here, so we use SIGNO
+   (the signal that was delivered) to decide what action to perform.  This
+   function might, or might not, have been called on an alternative signal
+   stack.  */
+
+void
+signal_handler (int signo)
+{
+  enum exec_location location
+    = on_alt_stack_p () ? SIGNAL_ALT_STACK : SIGNAL_HANDLER;
+
+  switch (signo)
+    {
+    case SPIN_SIGNAL:
+      do_spin_task (location);
+      break;
+
+    case SYSCALL_SIGNAL:
+      do_syscall_task (location);
+      break;
+
+    default:
+      assert_not_reached ();
+    }
+}
+
+/* The thread worker function.  ARG is a job_description pointer which
+   describes what this thread is expected to do.  This function always
+   returns a NULL pointer.  */
+
+void *
+thread_function (void *arg)
+{
+  struct job_description *job = (struct job_description *) arg;
+  struct stack_descriptor desc = { NULL, 0 };
+  int sa_flags = 0;
+
+  switch (job->location)
+    {
+    case NORMAL:
+      /* This thread performs the worker action on the current thread,
+	 select the correct worker function based on the requested
+	 action.  */
+      switch (job->action)
+	{
+	case SPIN:
+	  do_spin_task (NORMAL);
+	  break;
+
+	case SYSCALL:
+	  do_syscall_task (NORMAL);
+	  break;
+
+	default:
+	  assert_not_reached ();
+	}
+      break;
+
+    case SIGNAL_ALT_STACK:
+      /* This thread is to perform its action in a signal handler on the
+	 alternative stack.  Install the alternative stack now, and then
+	 fall through to the normal signal handler location code.  */
+      desc = setup_alt_stack ();
+      assert (desc.base != NULL);
+      assert (desc.size > 0);
+      sa_flags = SA_ONSTACK;
+
+      /* Fall through.  */
+    case SIGNAL_HANDLER:
+      {
+	/* This thread is to perform its action in a signal handler.  We
+	   might have just installed an alternative signal stack.  */
+	int signo, res;
+
+	/* Select the correct signal number so that the signal handler will
+	   perform the required action.  */
+	switch (job->action)
+	  {
+	  case SPIN:
+	    signo = SPIN_SIGNAL;
+	    break;
+
+	  case SYSCALL:
+	    signo = SYSCALL_SIGNAL;
+	    break;
+
+	  default:
+	    assert_not_reached ();
+	  }
+
+	/* Now setup the signal handler.  */
+	struct sigaction sa;
+	sa.sa_handler = signal_handler;
+	sigfillset (&sa.sa_mask);
+	sa.sa_flags = sa_flags;
+	res = sigaction (signo, &sa, NULL);
+	assert (res == 0);
+
+	/* Send the signal to this thread.  */
+	res = pthread_kill (job->thread, signo);
+	assert (res == 0);
+      }
+      break;
+
+    default:
+      assert_not_reached ();
+    };
+
+  /* Free the alt-stack if we allocated one, if not DESC.BASE will be
+     NULL so this call is fine.  */
+  free (desc.base);
+
+  /* Thread complete.  */
+  return NULL;
+}
+
+void
+start_job (struct job_description *job)
+{
+  int res;
+
+  res = pthread_create (&job->thread, NULL, thread_function, job);
+  assert (res == 0);
+}
+
+/* Join with the thread for JOB.  This will block until the thread for JOB
+   has finished.  */
+
+void
+finalise_job (struct job_description *job)
+{
+  int res;
+  void *retval;
+
+  res = pthread_join (job->thread, &retval);
+  assert (res == 0);
+  assert (retval == NULL);
+}
+
+/* Function that GDB can place a breakpoint on.  */
+
+void
+breakpt (void)
+{
+  /* Nothing.  */
+}
+
+/* Function that triggers a crash, if the user has setup their environment
+   correctly this will dump a core file, which GDB can then examine.  */
+
+void
+crash_function (void)
+{
+  volatile int *p = 0;
+  volatile int n = *p;
+  (void) n;
+}
+
+/* Entry point.  */
+
+int
+main ()
+{
+  int job_count, res;
+  struct job_description *jobs = get_job_list (&job_count);
+
+  /* This test is going to park some threads inside infinite loops.  Just
+     in case this program is left running, install an alarm that will cause
+     everything to exit.  */
+  alarm (WATCHDOG_ALARM_TIME);
+
+  /* We want each worker thread (of which there are JOB_COUNT) plus the
+     main thread (hence + 1) to wait at the barrier.  */
+  res = pthread_barrier_init (&global_barrier, NULL, job_count + 1);
+  assert (res == 0);
+
+  /* Start all the jobs.  */
+  for (int i = 0; i < job_count; ++i)
+    start_job (&jobs[i]);
+
+  /* Notify all the worker threads that we're waiting for them.  */
+  res = pthread_barrier_wait (&global_barrier);
+  assert (res == PTHREAD_BARRIER_SERIAL_THREAD || res == 0);
+
+  /* All we know at this point is that all the worker threads have reached
+     the barrier, which is just before they perform their action.  But we
+     really want them to start their action.
+
+     There's really no way we can be 100% certain that the worker threads
+     have started their action, all we can do is wait for a short while and
+     hope that the machine we're running on is not too slow.  */
+  sleep (MAIN_THREAD_DELAY);
+
+  /* A function that GDB can place a breakpoint on.  By the time we get
+     here we are as sure as we can be that all of the worker threads have
+     started and are in their worker action (spinning, or syscall).  */
+  breakpt ();
+
+  /* If GDB is not attached then this function will cause a crash, which
+     can be used to dump a core file, which GDB can then analyse.  */
+  crash_function ();
+
+  /* Due to the crash we never expect to get here.  Plus the worker actions
+     never terminate.  But for completeness, here's where we join with all
+     the worker threads.  */
+  for (int i = 0; i < job_count; ++i)
+    finalise_job (&jobs[i]);
+
+  /* Cleanup the barrier.  */
+  res = pthread_barrier_destroy (&global_barrier);
+  assert (res == 0);
+
+  /* And clean up the jobs list.  */
+  free (jobs);
+
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.threads/threadcrash.exp b/gdb/testsuite/gdb.threads/threadcrash.exp
new file mode 100644
index 00000000000..996e020d1e8
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/threadcrash.exp
@@ -0,0 +1,233 @@ 
+# This testcase is part of GDB, the GNU debugger.
+
+# Copyright 2023 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This test case looks at GDB's ability to get correct backtraces for a
+# crashed inferior, recreating it from a live inferior, a corefile and
+# a gcore.
+
+
+# Check that the inferior has 7 threads, and return the number of threads (7).
+# We return the thread count so that, even if there is some error in the test,
+# the final log doesn't get flooded with failures.
+
+proc test_thread_count {} {
+    set thread_count 0
+
+    gdb_test_multiple "info threads" "getting thread count" -lbl {
+	-re "Thread" {
+	    incr thread_count
+	    exp_continue
+	}
+	-re "$::gdb_prompt " {
+	    gdb_assert {$thread_count == 7}
+	}
+    }
+
+    return $thread_count
+}
+
+# Use 'thread apply all backtrace' to check if all expected threads
+# are present, and stopped in the expected locations.  Set the global
+# TEST_LIST to be the a list of regexps expected to match all the
+# threads.  We generate it now so that the list is in the order that
+# GDB sees the threads.
+
+proc thread_apply_all {} {
+    global test_list
+
+    set test_list { }
+
+    set unwind_fail false
+
+    gdb_test_multiple "thread apply all backtrace" \
+	"Get thread information" -lbl {
+	    -re "#\[0-9\]+\\\?\\\?\[^\n\]*" {
+		set unwind_fail true
+		exp_continue
+	    }
+	    -re "\[^\n\]*syscall_task .location=SIGNAL_ALT_STACK\[^\n\]*" {
+		lappend test_list [multi_line ".*sleep.*" \
+					      ".*do_syscall_task .location=SIGNAL_ALT_STACK.*" \
+					      ".*signal_handler.*" \
+					      ".*signal handler called.*" \
+					      ".*pthread_kill.*" \
+					      ".*thread_function.*"]
+		exp_continue
+	    }
+	    -re "\[^\n\]*syscall_task .location=SIGNAL_HANDLER\[^\n\]*" {
+		lappend test_list [multi_line ".*sleep.*" \
+					      ".*do_syscall_task .location=SIGNAL_HANDLER.*" \
+					      ".*signal_handler.*" \
+					      ".*signal handler called.*" \
+					      ".*pthread_kill.*" \
+					      ".*thread_function.*"]
+		exp_continue
+	    }
+	    -re "\[^\n\]*syscall_task .location=NORMAL\[^\n\]*" {
+		lappend test_list [multi_line ".*sleep.*" \
+					      ".*do_syscall_task .location=NORMAL.*" \
+					      ".*thread_function.*"]
+		exp_continue
+	    }
+	    -re "\[^\n\]*spin_task .location=SIGNAL_ALT_STACK\[^\n\]*" {
+		lappend test_list [multi_line ".*do_spin_task .location=SIGNAL_ALT_STACK.*" \
+					      ".*signal_handler.*" \
+					      ".*signal handler called.*" \
+					      ".*pthread_kill.*" \
+					      ".*thread_function.*"]
+		exp_continue
+	    }
+	    -re "\[^\n\]*spin_task .location=SIGNAL_HANDLER\[^\n\]*" {
+		lappend test_list [multi_line ".*do_spin_task .location=SIGNAL_HANDLER.*" \
+					      ".*signal_handler.*" \
+					      ".*signal handler called.*" \
+					      ".*pthread_kill.*" \
+					      ".*thread_function.*"]
+		exp_continue
+	    }
+	    -re "\[^\n\]*spin_task .location=NORMAL\[^\n\]*" {
+		lappend test_list [multi_line ".*do_spin_task .location=NORMAL..*" \
+					      ".*thread_function.*"]
+		exp_continue
+	    }
+	    -re "\[^\n\]*main\[^\n\]*" {
+		lappend test_list ".*main.*"
+		exp_continue
+	    }
+	    -re "$::gdb_prompt " {
+		pass $gdb_test_name
+	    }
+    }
+
+    gdb_assert {$unwind_fail == false}
+}
+
+# Perform all the tests we're interested in.  They are:
+# * test if we have 7 threads
+# * Creating the list of backtraces for all threads seen
+# * testing if GDB recreated the full backtrace we expect for all threads
+
+proc do_full_test {} {
+    global test_list
+    set thread_count [test_thread_count]
+
+    thread_apply_all
+
+    gdb_assert {$thread_count == [llength $test_list]}
+
+    for {set i 0} {$i < $thread_count } {incr i} {
+	set thread_num [expr [llength $test_list] - $i]
+
+	gdb_test "thread apply $thread_num backtrace" [lindex $test_list $i]
+    }
+}
+
+# Do all preparation steps for running the corefile tests, then
+# call do_full_test to actually run the tests.
+
+proc_with_prefix test_live_inferior {} {
+    gdb_test "handle SIGUSR1 nostop print pass" \
+	".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
+	"setup SIGUSR1"
+    gdb_test "handle SIGUSR2 nostop print pass" \
+	".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
+	"setup SIGUSR2"
+
+    if {![runto_main]} {
+	return
+    }
+
+    gdb_breakpoint "breakpt"
+    gdb_continue_to_breakpoint "running to breakpoint" ".*"
+
+    do_full_test
+}
+
+# Do all preparation steps for running the corefile tests, then
+# call do_full_test to actually run the tests.
+
+proc_with_prefix test_corefile {} {
+    set corefile [core_find $::binfile]
+    if { $corefile == "" } {
+	untested "couldn't generate corefile"
+	return
+    }
+    set corefile [gdb_remote_download host $corefile]
+
+    gdb_test "core-file $corefile" \
+	     "" \
+	     "loading_corefile" \
+	     "A program is being debugged already\\\.  Kill it\\\? \\\(y or n\\\) " \
+	     "y"
+
+    do_full_test
+}
+
+# Do all preparation steps for running the gcore tests, then
+# call do_full_test to actually run the tests.
+
+proc_with_prefix test_gcore {} {
+
+    clean_restart "$::binfile"
+
+    gdb_test "handle SIGUSR1 nostop print pass" \
+	".*SIGUSR1.*No.*Yes.*Yes.*User defined signal 1" \
+	"setup SIGUSR1"
+    gdb_test "handle SIGUSR2 nostop print pass" \
+	".*SIGUSR2.*No.*Yes.*Yes.*User defined signal 2" \
+	"setup SIGUSR2"
+
+    if {![runto_main]} {
+	return -1
+    }
+    gdb_test "continue" ".*Segmentation fault.*" "continue to crash"
+
+    set gcore_name "${::binfile}.gcore"
+    set gcore_supported [gdb_gcore_cmd "$gcore_name" "saving gcore"]
+
+    if {!$gcore_supported} {
+	unsupported "couldn't generate gcore file"
+	return
+    }
+
+    set corefile [gdb_remote_download host $gcore_name]
+
+    gdb_test "core-file $corefile" \
+	     "" \
+	     "loading_corefile" \
+	     "A program is being debugged already\\\.  Kill it\\\? \\\(y or n\\\) " \
+	     "y"
+
+    do_full_test
+}
+
+standard_testfile
+
+if [prepare_for_testing "failed to prepare" $testfile $srcfile \
+    {debug pthreads}] {
+    return -1
+}
+
+clean_restart ${binfile}
+
+gdb_test_no_output "set backtrace limit unlimited"
+
+test_live_inferior
+
+test_corefile
+
+test_gcore