x86: Use _rdtsc intrinsic for HP_TIMING_NOW
Commit Message
Since _rdtsc intrinsic is supported in GCC 4.9, we can use it for
HP_TIMING_NOW.
* sysdeps/i386/i686/hp-timing.h: Include <x86intrin.h>.
(HP_TIMING_NOW): Use _rdtsc.
* sysdeps/x86_64/hp-timing.h: Just include
<sysdeps/i386/i686/hp-timing.h>.
---
sysdeps/i386/i686/hp-timing.h | 4 +++-
sysdeps/x86_64/hp-timing.h | 41 +----------------------------------
2 files changed, 4 insertions(+), 41 deletions(-)
Comments
On 01/10/2018 19:08, H.J. Lu wrote:
> Since _rdtsc intrinsic is supported in GCC 4.9, we can use it for
> HP_TIMING_NOW.
>
> * sysdeps/i386/i686/hp-timing.h: Include <x86intrin.h>.
> (HP_TIMING_NOW): Use _rdtsc.
> * sysdeps/x86_64/hp-timing.h: Just include
> <sysdeps/i386/i686/hp-timing.h>.
> ---
> sysdeps/i386/i686/hp-timing.h | 4 +++-
> sysdeps/x86_64/hp-timing.h | 41 +----------------------------------
> 2 files changed, 4 insertions(+), 41 deletions(-)
>
> diff --git a/sysdeps/i386/i686/hp-timing.h b/sysdeps/i386/i686/hp-timing.h
> index 59af526fdb..6eee8c58be 100644
> --- a/sysdeps/i386/i686/hp-timing.h
> +++ b/sysdeps/i386/i686/hp-timing.h
> @@ -20,6 +20,8 @@
> #ifndef _HP_TIMING_H
> #define _HP_TIMING_H 1
>
> +#include <x86intrin.h>
> +
> /* We always assume having the timestamp register. */
> #define HP_TIMING_AVAIL (1)
> #define HP_SMALL_TIMING_AVAIL (1)
> @@ -35,7 +37,7 @@ typedef unsigned long long int hp_timing_t;
> running in this moment. This could be changed by using a barrier like
> 'cpuid' right before the `rdtsc' instruciton. But we are not interested
> in accurate clock cycles here so we don't do this. */
> -#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var))
> +#define HP_TIMING_NOW(Var) ({ (Var) = _rdtsc (); })
Do we need a compound statement here? It does not use loops, switches, or
local variables.
>
> #include <hp-timing-common.h>
>
> diff --git a/sysdeps/x86_64/hp-timing.h b/sysdeps/x86_64/hp-timing.h
> index ec543bef03..46236b9777 100644
> --- a/sysdeps/x86_64/hp-timing.h
> +++ b/sysdeps/x86_64/hp-timing.h
> @@ -1,40 +1 @@
[...]
> -
> -#endif /* hp-timing.h */
> +#include <sysdeps/i386/i686/hp-timing.h>
>
I am not very found of cross abi includes like this one, wouldn't be better
to create a 'sysdeps/x86/hp-timing.h' and for 32-bit use _rdtsc iff
__i686__ is defined otherwise include generic header?
On Tue, Oct 2, 2018 at 6:15 AM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 01/10/2018 19:08, H.J. Lu wrote:
> > Since _rdtsc intrinsic is supported in GCC 4.9, we can use it for
> > HP_TIMING_NOW.
> >
> > * sysdeps/i386/i686/hp-timing.h: Include <x86intrin.h>.
> > (HP_TIMING_NOW): Use _rdtsc.
> > * sysdeps/x86_64/hp-timing.h: Just include
> > <sysdeps/i386/i686/hp-timing.h>.
> > ---
> > sysdeps/i386/i686/hp-timing.h | 4 +++-
> > sysdeps/x86_64/hp-timing.h | 41 +----------------------------------
> > 2 files changed, 4 insertions(+), 41 deletions(-)
> >
> > diff --git a/sysdeps/i386/i686/hp-timing.h b/sysdeps/i386/i686/hp-timing.h
> > index 59af526fdb..6eee8c58be 100644
> > --- a/sysdeps/i386/i686/hp-timing.h
> > +++ b/sysdeps/i386/i686/hp-timing.h
> > @@ -20,6 +20,8 @@
> > #ifndef _HP_TIMING_H
> > #define _HP_TIMING_H 1
> >
> > +#include <x86intrin.h>
> > +
> > /* We always assume having the timestamp register. */
> > #define HP_TIMING_AVAIL (1)
> > #define HP_SMALL_TIMING_AVAIL (1)
> > @@ -35,7 +37,7 @@ typedef unsigned long long int hp_timing_t;
> > running in this moment. This could be changed by using a barrier like
> > 'cpuid' right before the `rdtsc' instruciton. But we are not interested
> > in accurate clock cycles here so we don't do this. */
> > -#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var))
> > +#define HP_TIMING_NOW(Var) ({ (Var) = _rdtsc (); })
>
> Do we need a compound statement here? It does not use loops, switches, or
> local variables.
I have a followup patch to use _rdtscp which will need a local variable.
> >
> > #include <hp-timing-common.h>
> >
> > diff --git a/sysdeps/x86_64/hp-timing.h b/sysdeps/x86_64/hp-timing.h
> > index ec543bef03..46236b9777 100644
> > --- a/sysdeps/x86_64/hp-timing.h
> > +++ b/sysdeps/x86_64/hp-timing.h
> > @@ -1,40 +1 @@
>
> [...]
>
> > -
> > -#endif /* hp-timing.h */
> > +#include <sysdeps/i386/i686/hp-timing.h>
> >
>
> I am not very found of cross abi includes like this one, wouldn't be better
> to create a 'sysdeps/x86/hp-timing.h' and for 32-bit use _rdtsc iff
> __i686__ is defined otherwise include generic header?
I can do that.
On Tue, Oct 2, 2018 at 6:45 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Oct 2, 2018 at 6:15 AM Adhemerval Zanella
> <adhemerval.zanella@linaro.org> wrote:
> > I am not very found of cross abi includes like this one, wouldn't be better
> > to create a 'sysdeps/x86/hp-timing.h' and for 32-bit use _rdtsc iff
> > __i686__ is defined otherwise include generic header?
>
> I can do that.
Here is the V2 patch. OK for master?
On 02/10/2018 13:56, H.J. Lu wrote:
> On Tue, Oct 2, 2018 at 6:45 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>> On Tue, Oct 2, 2018 at 6:15 AM Adhemerval Zanella
>> <adhemerval.zanella@linaro.org> wrote:
>
>>> I am not very found of cross abi includes like this one, wouldn't be better
>>> to create a 'sysdeps/x86/hp-timing.h' and for 32-bit use _rdtsc iff
>>> __i686__ is defined otherwise include generic header?
>>
>> I can do that.
>
> Here is the V2 patch. OK for master?
>
> NB: Checking if __i686__ isn't sufficient since __i686__ may not be
> defined when building for i686 class processors.
Right, it seems gcc does not define it for -march newer than pentium4.
> diff --git a/sysdeps/i386/i586/init-arch.h b/sysdeps/i386/i586/isa.h
> similarity index 89%
> rename from sysdeps/i386/i586/init-arch.h
> rename to sysdeps/i386/i586/isa.h
> index 72fb46c61e..a2b5d585d4 100644
> --- a/sysdeps/i386/i586/init-arch.h
> +++ b/sysdeps/i386/i586/isa.h
> @@ -1,4 +1,4 @@
> -/* Copyright (C) 2015-2018 Free Software Foundation, Inc.
> +/* Copyright (C) 2018 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
>
> The GNU C Library is free software; you can redistribute it and/or
> @@ -15,5 +15,9 @@
> License along with the GNU C Library; if not, see
> <http://www.gnu.org/licenses/>. */
>
> +#ifndef _isa_h
> +#define _isa_h
> +
Not sure if this is explicit on our coding rules, but usually we
use uppercase caps for preprocessor guards. I think it requires
a one-line description as well (same for other isa.h files).
> -#ifndef _HP_TIMING_H
> -#define _HP_TIMING_H 1
> +#include <isa.h>
> +
> +#if MINIMUM_ISA == 686 || MINIMUM_ISA == 8664
> +# ifndef _HP_TIMING_H
> +# define _HP_TIMING_H 1
> +
The include guard inside the MINIMUM_ISA check seems off, why do
you need it?
On Tue, 2 Oct 2018, Adhemerval Zanella wrote:
> > NB: Checking if __i686__ isn't sufficient since __i686__ may not be
> > defined when building for i686 class processors.
>
> Right, it seems gcc does not define it for -march newer than pentium4.
Specifically, you need a negative test (testing not building for an older
processor) as in sysdeps/x86/cpu-features.h. (There's probably a case for
factoring it out so there's a single header, usable from assembly sources
as well as from C code, that defines a macro, suitable for #if tests, that
can be used to test "known to be i686 or later".)
On Tue, Oct 2, 2018 at 10:23 AM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 02/10/2018 13:56, H.J. Lu wrote:
> > On Tue, Oct 2, 2018 at 6:45 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>
> >> On Tue, Oct 2, 2018 at 6:15 AM Adhemerval Zanella
> >> <adhemerval.zanella@linaro.org> wrote:
> >
> >>> I am not very found of cross abi includes like this one, wouldn't be better
> >>> to create a 'sysdeps/x86/hp-timing.h' and for 32-bit use _rdtsc iff
> >>> __i686__ is defined otherwise include generic header?
> >>
> >> I can do that.
> >
> > Here is the V2 patch. OK for master?
> >
>
> > NB: Checking if __i686__ isn't sufficient since __i686__ may not be
> > defined when building for i686 class processors.
>
> Right, it seems gcc does not define it for -march newer than pentium4.
>
> > diff --git a/sysdeps/i386/i586/init-arch.h b/sysdeps/i386/i586/isa.h
> > similarity index 89%
> > rename from sysdeps/i386/i586/init-arch.h
> > rename to sysdeps/i386/i586/isa.h
> > index 72fb46c61e..a2b5d585d4 100644
> > --- a/sysdeps/i386/i586/init-arch.h
> > +++ b/sysdeps/i386/i586/isa.h
> > @@ -1,4 +1,4 @@
> > -/* Copyright (C) 2015-2018 Free Software Foundation, Inc.
> > +/* Copyright (C) 2018 Free Software Foundation, Inc.
> > This file is part of the GNU C Library.
> >
> > The GNU C Library is free software; you can redistribute it and/or
> > @@ -15,5 +15,9 @@
> > License along with the GNU C Library; if not, see
> > <http://www.gnu.org/licenses/>. */
> >
> > +#ifndef _isa_h
> > +#define _isa_h
> > +
>
> Not sure if this is explicit on our coding rules, but usually we
> use uppercase caps for preprocessor guards. I think it requires
> a one-line description as well (same for other isa.h files).
I will change that.
> > -#ifndef _HP_TIMING_H
> > -#define _HP_TIMING_H 1
> > +#include <isa.h>
> > +
> > +#if MINIMUM_ISA == 686 || MINIMUM_ISA == 8664
> > +# ifndef _HP_TIMING_H
> > +# define _HP_TIMING_H 1
> > +
>
> The include guard inside the MINIMUM_ISA check seems off, why do
> you need it?
<sysdeps/generic/hp-timing.h> will be skipped if _HP_TIMING_H is
defined. That is the draw back for the single x86 hp-timing.h.
@@ -20,6 +20,8 @@
#ifndef _HP_TIMING_H
#define _HP_TIMING_H 1
+#include <x86intrin.h>
+
/* We always assume having the timestamp register. */
#define HP_TIMING_AVAIL (1)
#define HP_SMALL_TIMING_AVAIL (1)
@@ -35,7 +37,7 @@ typedef unsigned long long int hp_timing_t;
running in this moment. This could be changed by using a barrier like
'cpuid' right before the `rdtsc' instruciton. But we are not interested
in accurate clock cycles here so we don't do this. */
-#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var))
+#define HP_TIMING_NOW(Var) ({ (Var) = _rdtsc (); })
#include <hp-timing-common.h>
@@ -1,40 +1 @@
-/* High precision, low overhead timing functions. x86-64 version.
- Copyright (C) 2002-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifndef _HP_TIMING_H
-#define _HP_TIMING_H 1
-
-/* We always assume having the timestamp register. */
-#define HP_TIMING_AVAIL (1)
-#define HP_SMALL_TIMING_AVAIL (1)
-
-/* We indeed have inlined functions. */
-#define HP_TIMING_INLINE (1)
-
-/* We use 64bit values for the times. */
-typedef unsigned long long int hp_timing_t;
-
-/* The "=A" constraint used in 32-bit mode does not work in 64-bit mode. */
-#define HP_TIMING_NOW(Var) \
- ({ unsigned int _hi, _lo; \
- asm volatile ("rdtsc" : "=a" (_lo), "=d" (_hi)); \
- (Var) = ((unsigned long long int) _hi << 32) | _lo; })
-
-#include <hp-timing-common.h>
-
-#endif /* hp-timing.h */
+#include <sysdeps/i386/i686/hp-timing.h>