malloc/tst-mallocfork2: Kill lingering process for unexpected failures
Commit Message
If the test fails due some unexpected failure after the children
creation, either in the signal handler by calling abort or in the main
loop; the created children might not be killed properly.
This patches fixes it by:
* Avoid aborting in the signal handler by setting a flag that
an error has occured and add a check in the main loop.
* Add a atfork handler to handle kill the signal sending
processes.
Checked on x86_64-linux-gnu.
---
malloc/tst-mallocfork2.c | 39 ++++++++++++++++++++++++++++-----------
1 file changed, 28 insertions(+), 11 deletions(-)
Comments
On 2/20/20 11:58 AM, Adhemerval Zanella wrote:
> If the test fails due some unexpected failure after the children
> creation, either in the signal handler by calling abort or in the main
> loop; the created children might not be killed properly.
>
> This patches fixes it by:
>
> * Avoid aborting in the signal handler by setting a flag that
> an error has occured and add a check in the main loop.
>
> * Add a atfork handler to handle kill the signal sending
> processes.
s/atfork/atexit/, per the code below.
"handle kill the signal sending processes" doesn't read well.
Perhaps "kill child processes"?
> Checked on x86_64-linux-gnu.
> ---
> malloc/tst-mallocfork2.c | 39 ++++++++++++++++++++++++++++-----------
> 1 file changed, 28 insertions(+), 11 deletions(-)
>
> diff --git a/malloc/tst-mallocfork2.c b/malloc/tst-mallocfork2.c
> index 0602a94895..4caf61489f 100644
> --- a/malloc/tst-mallocfork2.c
> +++ b/malloc/tst-mallocfork2.c
> @@ -62,6 +62,9 @@ static volatile sig_atomic_t sigusr1_received;
> progress. Checked by liveness_signal_handler. */
> static volatile sig_atomic_t progress_indicator = 1;
>
> +/* Set to 1 if an error occurs in the signal handler. */
> +static volatile sig_atomic_t error_indicator = 0;
> +
> static void
> sigusr1_handler (int signo)
> {
> @@ -72,7 +75,8 @@ sigusr1_handler (int signo)
> if (pid == -1)
> {
> write_message ("error: fork\n");
> - abort ();
> + error_indicator = 1;
> + return;
> }
> if (pid == 0)
> _exit (0);
> @@ -81,12 +85,14 @@ sigusr1_handler (int signo)
> if (ret < 0)
> {
> write_message ("error: waitpid\n");
> - abort ();
> + error_indicator = 1;
> + return;
> }
> if (status != 0)
> {
> write_message ("error: unexpected exit status from subprocess\n");
> - abort ();
> + error_indicator = 1;
> + return;
> }
> }
>
OK
> @@ -122,9 +128,25 @@ signal_sender (int signo, bool sleep)
> }
> }
>
> +/* Children processes. */
> +static pid_t sigusr1_sender_pids[5] = { -1 };
Doesn't this set [0] to -1 and [1..4] to 0 ?
> +static pid_t sigusr2_sender_pid = -1;
> +
> +static void
> +kill_children (void)
> +{
> + for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
> + if (sigusr1_sender_pids[i] != -1)
> + kill (sigusr1_sender_pids[i], SIGKILL);
...and this will perform kill (0, SIGKILL) if not otherwise initialized, which will kill this task immediately, if I understand correctly. I presume that's not what you want.
Why not use 0 as your initial/uninitialized value, and test against that? Also, that way, you'll never issue kill (0, ...).
> + if (sigusr2_sender_pid != -1)
> + kill (sigusr2_sender_pid, SIGKILL);
Same.
> +}
> +
> static int
> do_test (void)
> {
> + atexit (kill_children);
> +
> /* shared->barrier is intialized along with sigusr1_sender_pids
> below. */
> shared = support_shared_allocate (sizeof (*shared));
> @@ -148,14 +170,13 @@ do_test (void)
> return 1;
> }
>
> - pid_t sigusr2_sender_pid = xfork ();
> + sigusr2_sender_pid = xfork ();
> if (sigusr2_sender_pid == 0)
> signal_sender (SIGUSR2, true);
>
> /* Send SIGUSR1 signals from several processes. Hopefully, one
> signal will hit one of the ciritical functions. Use a barrier to
> avoid sending signals while not running fork/free/malloc. */
> - pid_t sigusr1_sender_pids[5];
> {
> pthread_barrierattr_t attr;
> xpthread_barrierattr_init (&attr);
> @@ -166,7 +187,7 @@ do_test (void)
> }
> for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
> {
> - sigusr1_sender_pids[i] = fork ();
> + sigusr1_sender_pids[i] = xfork ();
> if (sigusr1_sender_pids[i] == 0)
> signal_sender (SIGUSR1, false);
> }
> @@ -211,7 +232,7 @@ do_test (void)
> ++malloc_signals;
> xpthread_barrier_wait (&shared->barrier);
>
> - if (objects[slot] == NULL)
> + if (objects[slot] == NULL || error_indicator != 0)
> {
> printf ("error: malloc: %m\n");
> for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
> @@ -225,10 +246,6 @@ do_test (void)
> for (int slot = 0; slot < malloc_objects; ++slot)
> free (objects[slot]);
>
> - for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
> - kill (sigusr1_sender_pids[i], SIGKILL);
> - kill (sigusr2_sender_pid, SIGKILL);
> -
OK
PC
On 21/02/2020 17:07, Paul Clarke wrote:
> On 2/20/20 11:58 AM, Adhemerval Zanella wrote:
>> If the test fails due some unexpected failure after the children
>> creation, either in the signal handler by calling abort or in the main
>> loop; the created children might not be killed properly.
>>
>> This patches fixes it by:
>>
>> * Avoid aborting in the signal handler by setting a flag that
>> an error has occured and add a check in the main loop.
>>
>> * Add a atfork handler to handle kill the signal sending
>> processes.
>
> s/atfork/atexit/, per the code below.
Ack.
>
> "handle kill the signal sending processes" doesn't read well.
> Perhaps "kill child processes"?
Ack.
>
>> Checked on x86_64-linux-gnu.
>> ---
>> malloc/tst-mallocfork2.c | 39 ++++++++++++++++++++++++++++-----------
>> 1 file changed, 28 insertions(+), 11 deletions(-)
>>
>> diff --git a/malloc/tst-mallocfork2.c b/malloc/tst-mallocfork2.c
>> index 0602a94895..4caf61489f 100644
>> --- a/malloc/tst-mallocfork2.c
>> +++ b/malloc/tst-mallocfork2.c
>> @@ -62,6 +62,9 @@ static volatile sig_atomic_t sigusr1_received;
>> progress. Checked by liveness_signal_handler. */
>> static volatile sig_atomic_t progress_indicator = 1;
>>
>> +/* Set to 1 if an error occurs in the signal handler. */
>> +static volatile sig_atomic_t error_indicator = 0;
>> +
>> static void
>> sigusr1_handler (int signo)
>> {
>> @@ -72,7 +75,8 @@ sigusr1_handler (int signo)
>> if (pid == -1)
>> {
>> write_message ("error: fork\n");
>> - abort ();
>> + error_indicator = 1;
>> + return;
>> }
>> if (pid == 0)
>> _exit (0);
>> @@ -81,12 +85,14 @@ sigusr1_handler (int signo)
>> if (ret < 0)
>> {
>> write_message ("error: waitpid\n");
>> - abort ();
>> + error_indicator = 1;
>> + return;
>> }
>> if (status != 0)
>> {
>> write_message ("error: unexpected exit status from subprocess\n");
>> - abort ();
>> + error_indicator = 1;
>> + return;
>> }
>> }
>>
>
> OK
>
>> @@ -122,9 +128,25 @@ signal_sender (int signo, bool sleep)
>> }
>> }
>>
>> +/* Children processes. */
>> +static pid_t sigusr1_sender_pids[5] = { -1 };
>
> Doesn't this set [0] to -1 and [1..4] to 0 ?
>
>> +static pid_t sigusr2_sender_pid = -1;
>> +
>> +static void
>> +kill_children (void)
>> +{
>> + for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
>> + if (sigusr1_sender_pids[i] != -1)
>> + kill (sigusr1_sender_pids[i], SIGKILL);
>
> ...and this will perform kill (0, SIGKILL) if not otherwise initialized, which will kill this task immediately, if I understand correctly. I presume that's not what you want.
>
> Why not use 0 as your initial/uninitialized value, and test against that? Also, that way, you'll never issue kill (0, ...).
Ack, I changed to use 0.
>
>> + if (sigusr2_sender_pid != -1)
>> + kill (sigusr2_sender_pid, SIGKILL);
>
> Same.
>
>> +}
>> +
>> static int
>> do_test (void)
>> {
>> + atexit (kill_children);
>> +
>> /* shared->barrier is intialized along with sigusr1_sender_pids
>> below. */
>> shared = support_shared_allocate (sizeof (*shared));
>> @@ -148,14 +170,13 @@ do_test (void)
>> return 1;
>> }
>>
>> - pid_t sigusr2_sender_pid = xfork ();
>> + sigusr2_sender_pid = xfork ();
>> if (sigusr2_sender_pid == 0)
>> signal_sender (SIGUSR2, true);
>>
>> /* Send SIGUSR1 signals from several processes. Hopefully, one
>> signal will hit one of the ciritical functions. Use a barrier to
>> avoid sending signals while not running fork/free/malloc. */
>> - pid_t sigusr1_sender_pids[5];
>> {
>> pthread_barrierattr_t attr;
>> xpthread_barrierattr_init (&attr);
>> @@ -166,7 +187,7 @@ do_test (void)
>> }
>> for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
>> {
>> - sigusr1_sender_pids[i] = fork ();
>> + sigusr1_sender_pids[i] = xfork ();
>> if (sigusr1_sender_pids[i] == 0)
>> signal_sender (SIGUSR1, false);
>> }
>> @@ -211,7 +232,7 @@ do_test (void)
>> ++malloc_signals;
>> xpthread_barrier_wait (&shared->barrier);
>>
>> - if (objects[slot] == NULL)
>> + if (objects[slot] == NULL || error_indicator != 0)
>> {
>> printf ("error: malloc: %m\n");
>> for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
>> @@ -225,10 +246,6 @@ do_test (void)
>> for (int slot = 0; slot < malloc_objects; ++slot)
>> free (objects[slot]);
>>
>> - for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
>> - kill (sigusr1_sender_pids[i], SIGKILL);
>> - kill (sigusr2_sender_pid, SIGKILL);
>> -
>
> OK
>
> PC
>
Thanks for the review, I will push with your suggested changes.
@@ -62,6 +62,9 @@ static volatile sig_atomic_t sigusr1_received;
progress. Checked by liveness_signal_handler. */
static volatile sig_atomic_t progress_indicator = 1;
+/* Set to 1 if an error occurs in the signal handler. */
+static volatile sig_atomic_t error_indicator = 0;
+
static void
sigusr1_handler (int signo)
{
@@ -72,7 +75,8 @@ sigusr1_handler (int signo)
if (pid == -1)
{
write_message ("error: fork\n");
- abort ();
+ error_indicator = 1;
+ return;
}
if (pid == 0)
_exit (0);
@@ -81,12 +85,14 @@ sigusr1_handler (int signo)
if (ret < 0)
{
write_message ("error: waitpid\n");
- abort ();
+ error_indicator = 1;
+ return;
}
if (status != 0)
{
write_message ("error: unexpected exit status from subprocess\n");
- abort ();
+ error_indicator = 1;
+ return;
}
}
@@ -122,9 +128,25 @@ signal_sender (int signo, bool sleep)
}
}
+/* Children processes. */
+static pid_t sigusr1_sender_pids[5] = { -1 };
+static pid_t sigusr2_sender_pid = -1;
+
+static void
+kill_children (void)
+{
+ for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
+ if (sigusr1_sender_pids[i] != -1)
+ kill (sigusr1_sender_pids[i], SIGKILL);
+ if (sigusr2_sender_pid != -1)
+ kill (sigusr2_sender_pid, SIGKILL);
+}
+
static int
do_test (void)
{
+ atexit (kill_children);
+
/* shared->barrier is intialized along with sigusr1_sender_pids
below. */
shared = support_shared_allocate (sizeof (*shared));
@@ -148,14 +170,13 @@ do_test (void)
return 1;
}
- pid_t sigusr2_sender_pid = xfork ();
+ sigusr2_sender_pid = xfork ();
if (sigusr2_sender_pid == 0)
signal_sender (SIGUSR2, true);
/* Send SIGUSR1 signals from several processes. Hopefully, one
signal will hit one of the ciritical functions. Use a barrier to
avoid sending signals while not running fork/free/malloc. */
- pid_t sigusr1_sender_pids[5];
{
pthread_barrierattr_t attr;
xpthread_barrierattr_init (&attr);
@@ -166,7 +187,7 @@ do_test (void)
}
for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
{
- sigusr1_sender_pids[i] = fork ();
+ sigusr1_sender_pids[i] = xfork ();
if (sigusr1_sender_pids[i] == 0)
signal_sender (SIGUSR1, false);
}
@@ -211,7 +232,7 @@ do_test (void)
++malloc_signals;
xpthread_barrier_wait (&shared->barrier);
- if (objects[slot] == NULL)
+ if (objects[slot] == NULL || error_indicator != 0)
{
printf ("error: malloc: %m\n");
for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
@@ -225,10 +246,6 @@ do_test (void)
for (int slot = 0; slot < malloc_objects; ++slot)
free (objects[slot]);
- for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i)
- kill (sigusr1_sender_pids[i], SIGKILL);
- kill (sigusr2_sender_pid, SIGKILL);
-
printf ("info: signals received during fork: %u\n", fork_signals);
printf ("info: signals received during free: %u\n", free_signals);
printf ("info: signals received during malloc: %u\n", malloc_signals);