Remove strcmp inlines

Message ID AM5PR0802MB2610A1F99165A04F1C48645283980@AM5PR0802MB2610.eurprd08.prod.outlook.com
State Superseded
Headers

Commit Message

Wilco Dijkstra Dec. 12, 2016, 12:11 p.m. UTC
  Remove the str(n)cmp inlines from string/bits/string2.h.  The strncmp
optimization seems unlikely to ever be useful, but if it occurs in
real code it should be added to GCC.  Expanding strcmp of small strings
does appear useful (benchmarking shows it is 2-3x faster), so this would
be useful to implement in GCC.

ChangeLog:
2015-12-12  Wilco Dijkstra  <wdijkstr@arm.com>

	* string/bits/string2.h (strcmp): Remove define.
	(__strcmp_cg): Likewise.
	(strncmp): Likewise.
--
  

Comments

Zack Weinberg Dec. 12, 2016, 12:40 p.m. UTC | #1
On Mon, Dec 12, 2016 at 7:11 AM, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
> Remove the str(n)cmp inlines from string/bits/string2.h.  The strncmp
> optimization seems unlikely to ever be useful, but if it occurs in
> real code it should be added to GCC.  Expanding strcmp of small strings
> does appear useful (benchmarking shows it is 2-3x faster), so this would
> be useful to implement in GCC.

I'm inclined to say that the strcmp expansion should stick around
until after it _is_ added to GCC.  Code such as

int main(int argc, char **argv)
{
    return (argc == 2 &&
        (!strcmp(argv[1], "-w") ||
         !strcmp(argv[1], "-x") ||
         !strcmp(argv[1], "-z") ||
         !strcmp(argv[1], "-t")));
}

is reasonably likely to appear in real life and is significantly
improved by the expansion.

zw
  
Adhemerval Zanella Dec. 12, 2016, 12:40 p.m. UTC | #2
LGTM.

On 12/12/2016 10:11, Wilco Dijkstra wrote:
> Remove the str(n)cmp inlines from string/bits/string2.h.  The strncmp
> optimization seems unlikely to ever be useful, but if it occurs in
> real code it should be added to GCC.  Expanding strcmp of small strings
> does appear useful (benchmarking shows it is 2-3x faster), so this would
> be useful to implement in GCC.
> 
> ChangeLog:
> 2015-12-12  Wilco Dijkstra  <wdijkstr@arm.com>
> 
> 	* string/bits/string2.h (strcmp): Remove define.
> 	(__strcmp_cg): Likewise.
> 	(strncmp): Likewise.
> --
> 
> diff --git a/string/bits/string2.h b/string/bits/string2.h
> index b0be5f6a49024a0bedfc37e9ec2c0356e0e4fa09..5e0339223697256fff7eba4cc32d2eb618f07713 100644
> --- a/string/bits/string2.h
> +++ b/string/bits/string2.h
> @@ -60,64 +60,6 @@
>  # define __stpcpy(dest, src) __builtin_stpcpy (dest, src)
>  #endif
>  
> -/* Compare characters of S1 and S2.  */
> -#ifndef strcmp
> -# define strcmp(s1, s2) \
> -  __extension__								      \
> -  ({ size_t __s1_len, __s2_len;						      \
> -     (__builtin_constant_p (s1) && __builtin_constant_p (s2)		      \
> -      && (__s1_len = strlen (s1), __s2_len = strlen (s2),		      \
> -	  (!__string2_1bptr_p (s1) || __s1_len >= 4)			      \
> -	  && (!__string2_1bptr_p (s2) || __s2_len >= 4))		      \
> -      ? __builtin_strcmp (s1, s2)					      \
> -      : (__builtin_constant_p (s1) && __string2_1bptr_p (s1)		      \
> -	 && (__s1_len = strlen (s1), __s1_len < 4)			      \
> -	 ? (__builtin_constant_p (s2) && __string2_1bptr_p (s2)		      \
> -	    ? __builtin_strcmp (s1, s2)					      \
> -	    : __strcmp_cg (s1, s2, __s1_len))				      \
> -	 : (__builtin_constant_p (s2) && __string2_1bptr_p (s2)		      \
> -	    && (__s2_len = strlen (s2), __s2_len < 4)			      \
> -	    ? (__builtin_constant_p (s1) && __string2_1bptr_p (s1)	      \
> -	       ? __builtin_strcmp (s1, s2)				      \
> -	       : -__strcmp_cg (s2, s1, __s2_len))			      \
> -            : __builtin_strcmp (s1, s2)))); })
> -
> -# define __strcmp_cg(s1, s2, l1) \
> -  (__extension__ ({ const unsigned char *__s2 =				      \
> -		      (const unsigned char *) (const char *) (s2);	      \
> -		    int __result =					      \
> -		      (((const unsigned char *) (const char *) (s1))[0]	      \
> -		       - __s2[0]);					      \
> -		    if (l1 > 0 && __result == 0)			      \
> -		      {							      \
> -			__result = (((const unsigned char *)		      \
> -				     (const char *) (s1))[1] - __s2[1]);      \
> -			if (l1 > 1 && __result == 0)			      \
> -			  {						      \
> -			    __result = (((const unsigned char *)	      \
> -					 (const char *) (s1))[2] - __s2[2]);  \
> -			    if (l1 > 2 && __result == 0)		      \
> -			      __result = (((const unsigned char *)	      \
> -					  (const char *)  (s1))[3]	      \
> -					  - __s2[3]);			      \
> -			  }						      \
> -		      }							      \
> -		    __result; }))
> -#endif
> -
> -
> -/* Compare N characters of S1 and S2.  */
> -#ifndef strncmp
> -# define strncmp(s1, s2, n)						      \
> -  (__extension__ (__builtin_constant_p (n)				      \
> -		  && ((__builtin_constant_p (s1)			      \
> -		       && strlen (s1) < ((size_t) (n)))			      \
> -		      || (__builtin_constant_p (s2)			      \
> -			  && strlen (s2) < ((size_t) (n))))		      \
> -		  ? strcmp (s1, s2) : strncmp (s1, s2, n)))
> -#endif
> -
> -
>  #ifndef _FORCE_INLINES
>  # undef __STRING_INLINE
>  #endif
> 
> 
> 
>
  
Adhemerval Zanella Dec. 12, 2016, 12:48 p.m. UTC | #3
On 12/12/2016 10:40, Zack Weinberg wrote:
> On Mon, Dec 12, 2016 at 7:11 AM, Wilco Dijkstra <Wilco.Dijkstra@arm.com> wrote:
>> Remove the str(n)cmp inlines from string/bits/string2.h.  The strncmp
>> optimization seems unlikely to ever be useful, but if it occurs in
>> real code it should be added to GCC.  Expanding strcmp of small strings
>> does appear useful (benchmarking shows it is 2-3x faster), so this would
>> be useful to implement in GCC.
> 
> I'm inclined to say that the strcmp expansion should stick around
> until after it _is_ added to GCC.  Code such as
> 
> int main(int argc, char **argv)
> {
>     return (argc == 2 &&
>         (!strcmp(argv[1], "-w") ||
>          !strcmp(argv[1], "-x") ||
>          !strcmp(argv[1], "-z") ||
>          !strcmp(argv[1], "-t")));
> }
> 
> is reasonably likely to appear in real life and is significantly
> improved by the expansion.

But I hardly see this as a performance hotstop for any significant workload,
but rather as a microptimization that has driven string2.h creation as a
whole.

Also the idea is to avoid rely on specific libc implementation to actually
get these kind of optimization (if it is worth).
  
Zack Weinberg Dec. 12, 2016, 1:31 p.m. UTC | #4
On Mon, Dec 12, 2016 at 7:48 AM, Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
> On 12/12/2016 10:40, Zack Weinberg wrote:
>> I'm inclined to say that the strcmp expansion should stick around
>> until after it _is_ added to GCC.
>
> But I hardly see this as a performance hotstop for any significant workload,
> but rather as a microptimization that has driven string2.h creation as a
> whole.
>
> Also the idea is to avoid rely on specific libc implementation to actually
> get these kind of optimization (if it is worth).

I'd agree with this if we were talking about a new addition to
string2.h - those should be justified by something beyond just
optimization.  (For instance, the explicit_bzero inline is a partial
mitigation for the problem with variables whose address otherwise
wouldn't be taken.)

With an _existing_ string2.h microoptimization though, I think we need
to be able to argue that it is too infrequently used to be worth it
(e.g. strdup) or that it's been subsumed by compiler optimizations.
Long lists of short strcmp() operations are very common.

zw
  
Adhemerval Zanella Dec. 12, 2016, 1:53 p.m. UTC | #5
On 12/12/2016 11:31, Zack Weinberg wrote:
> On Mon, Dec 12, 2016 at 7:48 AM, Adhemerval Zanella
> <adhemerval.zanella@linaro.org> wrote:
>> On 12/12/2016 10:40, Zack Weinberg wrote:
>>> I'm inclined to say that the strcmp expansion should stick around
>>> until after it _is_ added to GCC.
>>
>> But I hardly see this as a performance hotstop for any significant workload,
>> but rather as a microptimization that has driven string2.h creation as a
>> whole.
>>
>> Also the idea is to avoid rely on specific libc implementation to actually
>> get these kind of optimization (if it is worth).
> 
> I'd agree with this if we were talking about a new addition to
> string2.h - those should be justified by something beyond just
> optimization.  (For instance, the explicit_bzero inline is a partial
> mitigation for the problem with variables whose address otherwise
> wouldn't be taken.)
> 
> With an _existing_ string2.h microoptimization though, I think we need
> to be able to argue that it is too infrequently used to be worth it
> (e.g. strdup) or that it's been subsumed by compiler optimizations.
> Long lists of short strcmp() operations are very common.
> 
> zw

I tend to agree with your rationale, however tying remove existing 
string2.h microoptimization with compiler support one can argue that
we still need to support old compiler and then it will require multiple 
releases to actually remove such code.

But my idea is these kind of optimization should not have been added
in glibc in first place: there are often misleading since user are not
really aware of its application unless he actually reads the headers
or check the pre-compiled code and mostly are actually only applied
for GNU compilers.

Also, I highly doubt this specific optimization was added with some
specific workload in mind and I think even if a real workload have
this specific snippet as hotspot it will highly have performance
issues with different compiler/libcs usage (which is another
side-effect I would like to minimize by removing all the string2.h
snippets).
  
Joseph Myers Dec. 12, 2016, 11:09 p.m. UTC | #6
On Mon, 12 Dec 2016, Adhemerval Zanella wrote:

> I tend to agree with your rationale, however tying remove existing 
> string2.h microoptimization with compiler support one can argue that
> we still need to support old compiler and then it will require multiple 
> releases to actually remove such code.

Well, considering the GNU system as a whole a minimum if the optimization 
is plausibly useful would be at least to make sure there is a GCC bug 
filed for it before removing the optimization from glibc.
  
Wilco Dijkstra Dec. 14, 2016, 2:35 p.m. UTC | #7
Joseph wrote:
On Mon, 12 Dec 2016, Adhemerval Zanella wrote:
> > I tend to agree with your rationale, however tying remove existing 
> > string2.h microoptimization with compiler support one can argue that
> > we still need to support old compiler and then it will require multiple 
> > releases to actually remove such code.
>
> Well, considering the GNU system as a whole a minimum if the optimization 
> is plausibly useful would be at least to make sure there is a GCC bug 
> filed for it before removing the optimization from glibc.

Yes I agree this is a reasonble optimization, so I've created PR 78809.
It probably doesn't help a lot in general code but the current optimizations
on strcmp are basic and that only deal with both strings constant or one
string being the empty string.

Wilco
  

Patch

diff --git a/string/bits/string2.h b/string/bits/string2.h
index b0be5f6a49024a0bedfc37e9ec2c0356e0e4fa09..5e0339223697256fff7eba4cc32d2eb618f07713 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -60,64 +60,6 @@ 
 # define __stpcpy(dest, src) __builtin_stpcpy (dest, src)
 #endif
 
-/* Compare characters of S1 and S2.  */
-#ifndef strcmp
-# define strcmp(s1, s2) \
-  __extension__								      \
-  ({ size_t __s1_len, __s2_len;						      \
-     (__builtin_constant_p (s1) && __builtin_constant_p (s2)		      \
-      && (__s1_len = strlen (s1), __s2_len = strlen (s2),		      \
-	  (!__string2_1bptr_p (s1) || __s1_len >= 4)			      \
-	  && (!__string2_1bptr_p (s2) || __s2_len >= 4))		      \
-      ? __builtin_strcmp (s1, s2)					      \
-      : (__builtin_constant_p (s1) && __string2_1bptr_p (s1)		      \
-	 && (__s1_len = strlen (s1), __s1_len < 4)			      \
-	 ? (__builtin_constant_p (s2) && __string2_1bptr_p (s2)		      \
-	    ? __builtin_strcmp (s1, s2)					      \
-	    : __strcmp_cg (s1, s2, __s1_len))				      \
-	 : (__builtin_constant_p (s2) && __string2_1bptr_p (s2)		      \
-	    && (__s2_len = strlen (s2), __s2_len < 4)			      \
-	    ? (__builtin_constant_p (s1) && __string2_1bptr_p (s1)	      \
-	       ? __builtin_strcmp (s1, s2)				      \
-	       : -__strcmp_cg (s2, s1, __s2_len))			      \
-            : __builtin_strcmp (s1, s2)))); })
-
-# define __strcmp_cg(s1, s2, l1) \
-  (__extension__ ({ const unsigned char *__s2 =				      \
-		      (const unsigned char *) (const char *) (s2);	      \
-		    int __result =					      \
-		      (((const unsigned char *) (const char *) (s1))[0]	      \
-		       - __s2[0]);					      \
-		    if (l1 > 0 && __result == 0)			      \
-		      {							      \
-			__result = (((const unsigned char *)		      \
-				     (const char *) (s1))[1] - __s2[1]);      \
-			if (l1 > 1 && __result == 0)			      \
-			  {						      \
-			    __result = (((const unsigned char *)	      \
-					 (const char *) (s1))[2] - __s2[2]);  \
-			    if (l1 > 2 && __result == 0)		      \
-			      __result = (((const unsigned char *)	      \
-					  (const char *)  (s1))[3]	      \
-					  - __s2[3]);			      \
-			  }						      \
-		      }							      \
-		    __result; }))
-#endif
-
-
-/* Compare N characters of S1 and S2.  */
-#ifndef strncmp
-# define strncmp(s1, s2, n)						      \
-  (__extension__ (__builtin_constant_p (n)				      \
-		  && ((__builtin_constant_p (s1)			      \
-		       && strlen (s1) < ((size_t) (n)))			      \
-		      || (__builtin_constant_p (s2)			      \
-			  && strlen (s2) < ((size_t) (n))))		      \
-		  ? strcmp (s1, s2) : strncmp (s1, s2, n)))
-#endif
-
-
 #ifndef _FORCE_INLINES
 # undef __STRING_INLINE
 #endif