[RFC,4/3] Fix previous patch and add header.
Commit Message
Sorry, found that my method of testing as compiling these as standalone
file then including them to libc caused problems. This should fix these.
Also moved reusable functions to separate header as I will cover a
strlen, rawmemchr, memchr and rawmemchr.
Comments
On Tue, May 26, 2015 at 08:55:34PM +0200, Ondřej Bílka wrote:
>
Now I get second thoughts which are nasty.
So I decided to resubmit this as all these functions follow same
template so I will just write this template and add few specific bits.
On Tue, 26 May 2015, Ondřej Bílka wrote:
> diff --git a/string/common.h b/string/common.h
> new file mode 100644
> index 0000000..84f9767
> --- /dev/null
> +++ b/string/common.h
> @@ -0,0 +1,35 @@
> +#include <stdint.h>
All files should start with a comment that first describes the file, then
has the copyright and license notice.
> +/* Use vector arithmetic tricks. Idea is to take expression works on
> + unsigned byte and evaluates 0 for nozero byte and nonzero on zero byte.
> + Our expression is ((s + 127) ^ (~s)) & 128
> + Now we evaluate this expression on each byte in parallel and on first
> + nonzero byte our expression will have nonzero value. */
I think a more detailed, multi-paragraph comment carefully explaining the
algorithm used (and why and how it works) would be better. Like the one
discussed in bug 5806, but of course without the mistake discussed there.
The principle of a common header with this logic is a good one - hopefully
if it gets used everywhere this can resolve bug 5806 by eliminating all
copies of the old comment.
(The patch submission should carefully discuss how the algorithm relates
to the ones discussed in bug 5806 / 5807 or used in existing code. But
that's part of justifying the patches rather than for the comments in the
new code.)
> +static __always_inline
> +unsigned long int
> +contains_zero (unsigned long int s)
> +{
> + return ((s + add) ^ ~s) & high_bits;
Instead of using unsigned long int, architectures should be able to
configure the type used. I expect that for ilp32 configurations on 64-bit
architectures, where registers are 64-bit but unsigned long int is 32-bit,
such as x32 and MIPS n32, it will be better to use unsigned long long int.
> +#define CROSS_PAGE(x, n) (((uintptr_t)x) % 4096 >= 4096 - n)
It might also make sense for the use of 4096 here to be configurable by
architectures as well.
On Thu, May 28, 2015 at 05:17:22PM +0000, Joseph Myers wrote:
> On Tue, 26 May 2015, Ondřej Bílka wrote:
>
> > diff --git a/string/common.h b/string/common.h
> > new file mode 100644
> > index 0000000..84f9767
> > --- /dev/null
> > +++ b/string/common.h
> > @@ -0,0 +1,35 @@
> > +#include <stdint.h>
>
> All files should start with a comment that first describes the file, then
> has the copyright and license notice.
>
> > +/* Use vector arithmetic tricks. Idea is to take expression works on
> > + unsigned byte and evaluates 0 for nozero byte and nonzero on zero byte.
> > + Our expression is ((s + 127) ^ (~s)) & 128
> > + Now we evaluate this expression on each byte in parallel and on first
> > + nonzero byte our expression will have nonzero value. */
>
> I think a more detailed, multi-paragraph comment carefully explaining the
> algorithm used (and why and how it works) would be better. Like the one
> discussed in bug 5806, but of course without the mistake discussed there.
>
> The principle of a common header with this logic is a good one - hopefully
> if it gets used everywhere this can resolve bug 5806 by eliminating all
> copies of the old comment.
>
> (The patch submission should carefully discuss how the algorithm relates
> to the ones discussed in bug 5806 / 5807 or used in existing code. But
> that's part of justifying the patches rather than for the comments in the
> new code.)
>
> > +static __always_inline
> > +unsigned long int
> > +contains_zero (unsigned long int s)
> > +{
> > + return ((s + add) ^ ~s) & high_bits;
>
> Instead of using unsigned long int, architectures should be able to
> configure the type used. I expect that for ilp32 configurations on 64-bit
> architectures, where registers are 64-bit but unsigned long int is 32-bit,
> such as x32 and MIPS n32, it will be better to use unsigned long long int.
>
in next iteration I would add typedef and archs would use it from
header. Perhaps something like.
#ifndef VECTOR_INT
# define VECTOR_INT unsigned long int
#endif
typedef VECTOR_INT vector_int;
> > +#define CROSS_PAGE(x, n) (((uintptr_t)x) % 4096 >= 4096 - n)
>
> It might also make sense for the use of 4096 here to be configurable by
> architectures as well.
>
Could but it won't likely make noticable difference as you cross page
with probability 0.8% for uniformly random address.
new file mode 100644
@@ -0,0 +1,35 @@
+#include <stdint.h>
+
+static const unsigned long int ones = (~0UL / 255); /* 0x0101...*/
+static const unsigned long int add = 127 * (~0UL / 255);
+static const unsigned long int high_bits = 128 * (~0UL / 255);
+
+/* Use vector arithmetic tricks. Idea is to take expression works on
+ unsigned byte and evaluates 0 for nozero byte and nonzero on zero byte.
+ Our expression is ((s + 127) ^ (~s)) & 128
+ Now we evaluate this expression on each byte in parallel and on first
+ nonzero byte our expression will have nonzero value. */
+
+static __always_inline
+unsigned long int
+contains_zero (unsigned long int s)
+{
+ return ((s + add) ^ ~s) & high_bits;
+}
+
+#define LSIZE sizeof (unsigned long int)
+#define CROSS_PAGE(x, n) (((uintptr_t)x) % 4096 >= 4096 - n)
+
+static __always_inline
+size_t
+first_nonzero_byte (unsigned long int u)
+{
+#ifdef FAST_FFS
+ return ffsl (u) / 8 - 1;
+#else
+ u = u = u ^ (u - 1);
+ u = u & ones;
+ u = u * ones;
+ return (u >> (8 * LSIZE - 8)) - 1;
+#endif
+}
@@ -25,9 +25,8 @@
#undef strchr
-
/* Include strchrnul and let gcc inline it. */
-#define AS_STRCHRNUL
+#define AS_STRCHR
#define STRCHRNUL strchrnul_i
#include "string/strchrnul.c"
@@ -23,6 +23,7 @@
#include <string.h>
#include <libc-internal.h>
#include <stdint.h>
+#include "string/common.h"
#undef __strchrnul
#undef strchrnul
@@ -30,24 +31,8 @@
# define STRCHRNUL __strchrnul
#endif
-static const unsigned long int ones = (~0UL / 255); /* 0x0101...*/
-static const unsigned long int add = 127 * (~0UL / 255);
-static const unsigned long int high_bits = 128 * (~0UL / 255);
-/* Use vector arithmetic tricks. Idea is to take expression works on
- unsigned byte and evaluates 0 for nozero byte and nonzero on zero byte.
- Our expression is ((s + 127) ^ (~s)) & 128
- Now we evaluate this expression on each byte in parallel and on first
- nonzero byte our expression will have nonzero value. */
-
-static always_inline
-unsigned long int
-contains_zero (unsigned long int s)
-{
- return ((s + add) ^ ~s) & high_bits;
-}
-
-static always_inline
+static __always_inline
int
found_in_long_bytes(char *s, unsigned long int cmask, char **result)
{
@@ -55,19 +40,16 @@ found_in_long_bytes(char *s, unsigned long int cmask, char **result)
unsigned long int mask = contains_zero (*lptr) | contains_zero (*lptr ^ cmask);
if (mask)
{
- *result = s + ffsl (mask) / 8 - 1;
+ *result = s + first_nonzero_byte (mask);
return 1;
}
else
return 0;
}
-#define LSIZE sizeof (unsigned long int)
-#define CROSS_PAGE(x, n) (((uintptr_t)x)%4096 >= 4096 - n)
-
/* Find the first occurrence of C in S or the final NUL byte. */
#ifdef AS_STRCHR
-static always_inline
+static __always_inline
#endif
char *
STRCHRNUL (const char *s_in, int c_in)
@@ -122,7 +104,7 @@ STRCHRNUL (const char *s_in, int c_in)
>> (8 * (s_aligned - s));
if (mask)
- return s + ffsl (mask) / 8 - 1;
+ return s + first_nonzero_byte (mask);
if (found_in_long_bytes (s + 1 * LSIZE, cmask, &r))
return r;
@@ -160,14 +142,6 @@ STRCHRNUL (const char *s_in, int c_in)
return r;
}
-
-char *strchr(char *s, int c)
-{
- char *r = __strchrnule(s,c);
- return *r ? r : 0;
-
-}
-
-#ifndef AS_STRCHR
+#ifdef AS_STRCHR
weak_alias (__strchrnul, strchrnul)
#endif