glibc 2.21 - Machine maintainers, please test your machines.
Commit Message
On 1/24/2015 12:13 PM, Torvald Riegel wrote:
> One thing you could test is set __HAVE_64B_ATOMICS to 0 on tilegx32, and
> see whether it makes the semaphore tests pass.
I reran the tests with that change, and both tilegx64 and tilegx32 pass,
modulo pre-existing failures from the known bug (14266).
However, I'm now running with the following change, to see if tilegx32 will
also pass with it; this implements my suggestion of rounding new_sem to
an 8-byte boundary explicitly on ILP32 platforms. My guess is that this
change will also result in improved performance for x32 and AArch64 ILP32.
@@ -25,7 +25,7 @@
int
__new_sem_getvalue (sem_t *sem, int *sval)
{
- struct new_sem *isem = (struct new_sem *) sem;
+ struct new_sem *isem = to_new_sem (sem);
/* XXX Check for valid SEM parameter. */
/* FIXME This uses relaxed MO, even though POSIX specifies that this function
@@ -50,7 +50,7 @@ __new_sem_init (sem_t *sem, int pshared, unsigned int value)
}
/* Map to the internal type. */
- struct new_sem *isem = (struct new_sem *) sem;
+ struct new_sem *isem = to_new_sem (sem);
/* Use the values the caller provided. */
#if __HAVE_64B_ATOMICS
@@ -186,7 +186,9 @@ sem_open (const char *name, int oflag, ...)
return SEM_FAILED;
}
- /* Create the initial file content. */
+ /* Create the initial file content. Note that the new_sem
+ will have the necessary alignment in this case, so we are
+ not obliged to use the to_new_sem macro in this case. */
union
{
sem_t initsem;
@@ -56,7 +56,7 @@ futex_wake (unsigned int* futex, int processes_to_wake, int private)
int
__new_sem_post (sem_t *sem)
{
- struct new_sem *isem = (struct new_sem *) sem;
+ struct new_sem *isem = to_new_sem (sem);
int private = isem->private;
#if __HAVE_64B_ATOMICS
@@ -30,8 +30,8 @@ sem_timedwait (sem_t *sem, const struct timespec *abstime)
return -1;
}
- if (__new_sem_wait_fast ((struct new_sem *) sem, 0) == 0)
+ if (__new_sem_wait_fast (to_new_sem (sem), 0) == 0)
return 0;
else
- return __new_sem_wait_slow((struct new_sem *) sem, abstime);
+ return __new_sem_wait_slow(to_new_sem (sem), abstime);
}
@@ -22,10 +22,10 @@
int
__new_sem_wait (sem_t *sem)
{
- if (__new_sem_wait_fast ((struct new_sem *) sem, 0) == 0)
+ if (__new_sem_wait_fast (to_new_sem (sem), 0) == 0)
return 0;
else
- return __new_sem_wait_slow((struct new_sem *) sem, NULL);
+ return __new_sem_wait_slow(to_new_sem (sem), NULL);
}
versioned_symbol (libpthread, __new_sem_wait, sem_wait, GLIBC_2_1);
@@ -65,7 +65,7 @@ __new_sem_trywait (sem_t *sem)
{
/* We must not fail spuriously, so require a definitive result even if this
may lead to a long execution time. */
- if (__new_sem_wait_fast ((struct new_sem *) sem, 1) == 0)
+ if (__new_sem_wait_fast (to_new_sem (sem), 1) == 0)
return 0;
__set_errno (EAGAIN);
return -1;
@@ -168,6 +168,22 @@ struct new_sem
#endif
};
+/* The sem_t alignment is typically just 4 bytes for ILP32, whereas
+ new_sem is 8 bytes. For better atomic performance on platforms
+ that tolerate unaligned atomics (and to make it work at all on
+ platforms that don't), round up the new_sem pointer within the
+ sem_t object to an 8-byte boundary.
+
+ Note that in this case, the "pad" variable at the end of the
+ new_sem object extends beyond the end of the memory allocated
+ for the sem_t, so it's important that it not be initialized
+ or otherwise used. */
+#if __HAVE_64B_ATOMICS && !defined (_LP64)
+# define to_new_sem(s) ((struct new_sem *)(((uintptr_t)(s) + 4) & -8))
+#else
+# define to_new_sem(s) ((struct new_sem *)(s))
+#endif
+
struct old_sem
{
unsigned int value;