[2/7] stdlib: Simplify getenv
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
And remove _STRING_ARCH_unaligned usage. The small size optimization
just adds boilerplate code.
Checked on x86_64-linux-gnu and i686-linux-gnu.
---
stdlib/getenv.c | 63 ++++---------------------------------------------
1 file changed, 4 insertions(+), 59 deletions(-)
Comments
Hi Adhemerval,
+ size_t len = __strchrnul (name, '=') - name;
This is a change in behaviour, ignoring anything behind a '=' in name. What is
wrong with strlen?
+ for (char **ep = __environ; *ep != NULL; ++ep)
{
- size_t len = strlen (name);
-#if _STRING_ARCH_unaligned
- name_start = *(const uint16_t *) name;
-#else
- name_start = (((const unsigned char *) name)[0]
- | (((const unsigned char *) name)[1] << 8));
-#endif
- len -= 2;
- name += 2;
-
- for (ep = __environ; *ep != NULL; ++ep)
- {
-#if _STRING_ARCH_unaligned
- uint16_t ep_start = *(uint16_t *) *ep;
-#else
- uint16_t ep_start = (((unsigned char *) *ep)[0]
- | (((unsigned char *) *ep)[1] << 8));
-#endif
-
- if (name_start == ep_start && !strncmp (*ep + 2, name, len)
- && (*ep)[len + 2] == '=')
- return &(*ep)[len + 3];
- }
+ if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')
+ return *ep + len + 1;
It's still useful to keep a check for the first character, eg. name[0] == (*ep)[0] as this
is about twice as fast as always calling strncmp.
Cheers,
Wilco
On 15/02/23 14:50, Wilco Dijkstra wrote:
> Hi Adhemerval,
>
> + size_t len = __strchrnul (name, '=') - name;
>
> This is a change in behaviour, ignoring anything behind a '=' in name. What is
> wrong with strlen?
Indeed it is better to keep current semantic.
>
> + for (char **ep = __environ; *ep != NULL; ++ep)
> {
> - size_t len = strlen (name);
> -#if _STRING_ARCH_unaligned
> - name_start = *(const uint16_t *) name;
> -#else
> - name_start = (((const unsigned char *) name)[0]
> - | (((const unsigned char *) name)[1] << 8));
> -#endif
> - len -= 2;
> - name += 2;
> -
> - for (ep = __environ; *ep != NULL; ++ep)
> - {
> -#if _STRING_ARCH_unaligned
> - uint16_t ep_start = *(uint16_t *) *ep;
> -#else
> - uint16_t ep_start = (((unsigned char *) *ep)[0]
> - | (((unsigned char *) *ep)[1] << 8));
> -#endif
> -
> - if (name_start == ep_start && !strncmp (*ep + 2, name, len)
> - && (*ep)[len + 2] == '=')
> - return &(*ep)[len + 3];
> - }
> + if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')
> + return *ep + len + 1;
>
> It's still useful to keep a check for the first character, eg. name[0] == (*ep)[0] as this
> is about twice as fast as always calling strncmp.
Alright, updated patch below.
From ddfb6419c5e74b34fb22454581fa0636275ec942 Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu, 9 Feb 2023 10:36:57 -0300
Subject: [PATCH 2/7] stdlib: Simplify getenv
And remove _STRING_ARCH_unaligned usage.
Checked on x86_64-linux-gnu and i686-linux-gnu.
---
stdlib/getenv.c | 68 +++++++------------------------------------------
1 file changed, 9 insertions(+), 59 deletions(-)
diff --git a/stdlib/getenv.c b/stdlib/getenv.c
index e3157ce2f3..61a42d941d 100644
--- a/stdlib/getenv.c
+++ b/stdlib/getenv.c
@@ -15,76 +15,26 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <endian.h>
-#include <errno.h>
-#include <stdint.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-
-/* Return the value of the environment variable NAME. This implementation
- is tuned a bit in that it assumes no environment variable has an empty
- name which of course should always be true. We have a special case for
- one character names so that for the general case we can assume at least
- two characters which we can access. By doing this we can avoid using the
- `strncmp' most of the time. */
char *
getenv (const char *name)
{
- char **ep;
- uint16_t name_start;
-
if (__environ == NULL || name[0] == '\0')
return NULL;
- if (name[1] == '\0')
- {
- /* The name of the variable consists of only one character. Therefore
- the first two characters of the environment entry are this character
- and a '=' character. */
-#if __BYTE_ORDER == __LITTLE_ENDIAN || !_STRING_ARCH_unaligned
- name_start = ('=' << 8) | *(const unsigned char *) name;
-#else
- name_start = '=' | ((*(const unsigned char *) name) << 8);
-#endif
- for (ep = __environ; *ep != NULL; ++ep)
- {
-#if _STRING_ARCH_unaligned
- uint16_t ep_start = *(uint16_t *) *ep;
-#else
- uint16_t ep_start = (((unsigned char *) *ep)[0]
- | (((unsigned char *) *ep)[1] << 8));
-#endif
- if (name_start == ep_start)
- return &(*ep)[2];
- }
- }
- else
- {
- size_t len = strlen (name);
-#if _STRING_ARCH_unaligned
- name_start = *(const uint16_t *) name;
-#else
- name_start = (((const unsigned char *) name)[0]
- | (((const unsigned char *) name)[1] << 8));
-#endif
- len -= 2;
- name += 2;
-
- for (ep = __environ; *ep != NULL; ++ep)
- {
-#if _STRING_ARCH_unaligned
- uint16_t ep_start = *(uint16_t *) *ep;
-#else
- uint16_t ep_start = (((unsigned char *) *ep)[0]
- | (((unsigned char *) *ep)[1] << 8));
-#endif
+ bool single_char = name[1] == '\0';
- if (name_start == ep_start && !strncmp (*ep + 2, name, len)
- && (*ep)[len + 2] == '=')
- return &(*ep)[len + 3];
- }
+ size_t len = strlen (name);;
+ for (char **ep = __environ; *ep != NULL; ++ep)
+ {
+ if (single_char && (*ep)[0] == name[0] && (*ep)[1] == '=')
+ return *ep + 2;
+ else if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')
+ return *ep + len + 1;
}
return NULL;
On Feb 16 2023, Adhemerval Zanella Netto via Libc-alpha wrote:
> + size_t len = strlen (name);;
Extra semicolon.
Hi Adhemerval,
+ size_t len = strlen (name);;
+ for (char **ep = __environ; *ep != NULL; ++ep)
+ {
+ if (single_char && (*ep)[0] == name[0] && (*ep)[1] == '=')
+ return *ep + 2;
+ else if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')
+ return *ep + len + 1;
This is about 10-20% slower than the previous patch both for single-char and
multi-char case... The approach I showed it is far simpler and 3-4 times faster
(within 10% of original performance).
Cheers,
Wilco
On 16/02/23 15:02, Wilco Dijkstra wrote:
> Hi Adhemerval,
>
> + size_t len = strlen (name);;
> + for (char **ep = __environ; *ep != NULL; ++ep)
> + {
> + if (single_char && (*ep)[0] == name[0] && (*ep)[1] == '=')
> + return *ep + 2;
> + else if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')
> + return *ep + len + 1;
>
> This is about 10-20% slower than the previous patch both for single-char and
> multi-char case... The approach I showed it is far simpler and 3-4 times faster
> (within 10% of original performance).
Do you mean something like:
size_t len = strlen (name);;
for (char **ep = __environ; *ep != NULL; ++ep)
{
if (((*ep)[0] == name[0] && (*ep)[1] == '=')
|| (strncmp (name, *ep, len) == 0 && (*ep)[len] == '='))
return *ep + len + 1;
}
Hi Adhemerval,
> Do you mean something like:
>
> size_t len = strlen (name);;
> for (char **ep = __environ; *ep != NULL; ++ep)
> {
> if (((*ep)[0] == name[0] && (*ep)[1] == '=')
> || (strncmp (name, *ep, len) == 0 && (*ep)[len] == '='))
> return *ep + len + 1;
> }
No I meant checking the first character first before doing more expensive calls, so:
if (name[0] == (*ep)[0] && strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')
return *ep + len + 1;
Basically this means you get a very tight loop checking the first character - this avoids
most calls to strncmp. This is basically the same result as the current implementation
but without all the defines and complex unaligned/big-endian stuff!
Cheers,
Wilco
On 16/02/23 15:47, Wilco Dijkstra wrote:
> Hi Adhemerval,
>
>> Do you mean something like:
>>
>> size_t len = strlen (name);;
>> for (char **ep = __environ; *ep != NULL; ++ep)
>> {
>> if (((*ep)[0] == name[0] && (*ep)[1] == '=')
>> || (strncmp (name, *ep, len) == 0 && (*ep)[len] == '='))
>> return *ep + len + 1;
>> }
>
> No I meant checking the first character first before doing more expensive calls, so:
>
> if (name[0] == (*ep)[0] && strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')
> return *ep + len + 1;
>
> Basically this means you get a very tight loop checking the first character - this avoids
> most calls to strncmp. This is basically the same result as the current implementation
> but without all the defines and complex unaligned/big-endian stuff!
Ah ok, I see it now. Do the follow addresses the issues pointed out?
From 373682e6c290f1c38d0319b5695e6a12a388be0c Mon Sep 17 00:00:00 2001
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Thu, 9 Feb 2023 10:36:57 -0300
Subject: [PATCH 2/7] stdlib: Simplify getenv
And remove _STRING_ARCH_unaligned usage.
Checked on x86_64-linux-gnu and i686-linux-gnu.
---
stdlib/getenv.c | 64 ++++---------------------------------------------
1 file changed, 5 insertions(+), 59 deletions(-)
diff --git a/stdlib/getenv.c b/stdlib/getenv.c
index e3157ce2f3..6942d23433 100644
--- a/stdlib/getenv.c
+++ b/stdlib/getenv.c
@@ -15,76 +15,22 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <endian.h>
-#include <errno.h>
-#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-
-/* Return the value of the environment variable NAME. This implementation
- is tuned a bit in that it assumes no environment variable has an empty
- name which of course should always be true. We have a special case for
- one character names so that for the general case we can assume at least
- two characters which we can access. By doing this we can avoid using the
- `strncmp' most of the time. */
char *
getenv (const char *name)
{
- char **ep;
- uint16_t name_start;
-
if (__environ == NULL || name[0] == '\0')
return NULL;
- if (name[1] == '\0')
- {
- /* The name of the variable consists of only one character. Therefore
- the first two characters of the environment entry are this character
- and a '=' character. */
-#if __BYTE_ORDER == __LITTLE_ENDIAN || !_STRING_ARCH_unaligned
- name_start = ('=' << 8) | *(const unsigned char *) name;
-#else
- name_start = '=' | ((*(const unsigned char *) name) << 8);
-#endif
- for (ep = __environ; *ep != NULL; ++ep)
- {
-#if _STRING_ARCH_unaligned
- uint16_t ep_start = *(uint16_t *) *ep;
-#else
- uint16_t ep_start = (((unsigned char *) *ep)[0]
- | (((unsigned char *) *ep)[1] << 8));
-#endif
- if (name_start == ep_start)
- return &(*ep)[2];
- }
- }
- else
+ size_t len = strlen (name);;
+ for (char **ep = __environ; *ep != NULL; ++ep)
{
- size_t len = strlen (name);
-#if _STRING_ARCH_unaligned
- name_start = *(const uint16_t *) name;
-#else
- name_start = (((const unsigned char *) name)[0]
- | (((const unsigned char *) name)[1] << 8));
-#endif
- len -= 2;
- name += 2;
-
- for (ep = __environ; *ep != NULL; ++ep)
- {
-#if _STRING_ARCH_unaligned
- uint16_t ep_start = *(uint16_t *) *ep;
-#else
- uint16_t ep_start = (((unsigned char *) *ep)[0]
- | (((unsigned char *) *ep)[1] << 8));
-#endif
-
- if (name_start == ep_start && !strncmp (*ep + 2, name, len)
- && (*ep)[len + 2] == '=')
- return &(*ep)[len + 3];
- }
+ if (name[0] == (*ep)[0]
+ && strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')
+ return *ep + len + 1;
}
return NULL;
Hi Adhemerval,
> Ah ok, I see it now. Do the follow addresses the issues pointed out?
Yes that looks good to me - just one thing (as pointed out by Andreas):
+ size_t len = strlen (name);;
Double semicolon. OK with that fixed.
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
Cheers,
Wilco
@@ -15,76 +15,21 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <endian.h>
-#include <errno.h>
-#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-
-/* Return the value of the environment variable NAME. This implementation
- is tuned a bit in that it assumes no environment variable has an empty
- name which of course should always be true. We have a special case for
- one character names so that for the general case we can assume at least
- two characters which we can access. By doing this we can avoid using the
- `strncmp' most of the time. */
char *
getenv (const char *name)
{
- char **ep;
- uint16_t name_start;
-
if (__environ == NULL || name[0] == '\0')
return NULL;
- if (name[1] == '\0')
- {
- /* The name of the variable consists of only one character. Therefore
- the first two characters of the environment entry are this character
- and a '=' character. */
-#if __BYTE_ORDER == __LITTLE_ENDIAN || !_STRING_ARCH_unaligned
- name_start = ('=' << 8) | *(const unsigned char *) name;
-#else
- name_start = '=' | ((*(const unsigned char *) name) << 8);
-#endif
- for (ep = __environ; *ep != NULL; ++ep)
- {
-#if _STRING_ARCH_unaligned
- uint16_t ep_start = *(uint16_t *) *ep;
-#else
- uint16_t ep_start = (((unsigned char *) *ep)[0]
- | (((unsigned char *) *ep)[1] << 8));
-#endif
- if (name_start == ep_start)
- return &(*ep)[2];
- }
- }
- else
+ size_t len = __strchrnul (name, '=') - name;
+ for (char **ep = __environ; *ep != NULL; ++ep)
{
- size_t len = strlen (name);
-#if _STRING_ARCH_unaligned
- name_start = *(const uint16_t *) name;
-#else
- name_start = (((const unsigned char *) name)[0]
- | (((const unsigned char *) name)[1] << 8));
-#endif
- len -= 2;
- name += 2;
-
- for (ep = __environ; *ep != NULL; ++ep)
- {
-#if _STRING_ARCH_unaligned
- uint16_t ep_start = *(uint16_t *) *ep;
-#else
- uint16_t ep_start = (((unsigned char *) *ep)[0]
- | (((unsigned char *) *ep)[1] << 8));
-#endif
-
- if (name_start == ep_start && !strncmp (*ep + 2, name, len)
- && (*ep)[len + 2] == '=')
- return &(*ep)[len + 3];
- }
+ if (strncmp (name, *ep, len) == 0 && (*ep)[len] == '=')
+ return *ep + len + 1;
}
return NULL;