stdlib: fix grouping verification with multi-byte thousands separator

Message ID mvmbkd5xlhu.fsf@suse.de
State Committed
Headers
Series stdlib: fix grouping verification with multi-byte thousands separator |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm fail Patch failed to apply

Commit Message

Andreas Schwab Oct. 11, 2023, 2:49 p.m. UTC
  The grouping verification only worked for a single-byte thousands
separator.  With a multi-byte separator it returned as if no separators
were present.  The actual parsing in str_to_mpn will then go wrong when
there are multiple adjacent multi-byte separators in the number.
---
 stdlib/grouping.c    | 33 ++++++++++-----------------------
 stdlib/tst-strtod4.c |  3 ++-
 2 files changed, 12 insertions(+), 24 deletions(-)
  

Comments

Florian Weimer Oct. 12, 2023, 8:25 a.m. UTC | #1
* Andreas Schwab:

> @@ -91,7 +84,7 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
>        if (cp < begin)
>  	return end;
>  
> -      if (end - cp == (int) *gp + 1)
> +      if (end - cp == (int) *gp + thousands_len)
>  	{
>  	  /* This group matches the specification.  */

This still does not work for alternative digits, right?  But that's an
existing limitation of strto[dl]?

Does this need a bug?  Is grouping validation required by a standard?

The patch itself looks okay to me.

Thanks,
Florian
  
Andreas Schwab Oct. 12, 2023, 9:28 a.m. UTC | #2
On Okt 12 2023, Florian Weimer wrote:

> * Andreas Schwab:
>
>> @@ -91,7 +84,7 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
>>        if (cp < begin)
>>  	return end;
>>  
>> -      if (end - cp == (int) *gp + 1)
>> +      if (end - cp == (int) *gp + thousands_len)
>>  	{
>>  	  /* This group matches the specification.  */
>
> This still does not work for alternative digits, right?  But that's an
> existing limitation of strto[dl]?

strtod only supports [0-9].

> Does this need a bug?

Bug 30964.

> Is grouping validation required by a standard?

I don't think so, but this makes the behaviour consistent with
locales that have single-byte separators.  Note that str_to_mpn will
malfunction if the number contains multiple adjacent separators.
  
Florian Weimer Oct. 12, 2023, 9:33 a.m. UTC | #3
* Andreas Schwab:

> On Okt 12 2023, Florian Weimer wrote:
>
>> * Andreas Schwab:
>>
>>> @@ -91,7 +84,7 @@ __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
>>>        if (cp < begin)
>>>  	return end;
>>>  
>>> -      if (end - cp == (int) *gp + 1)
>>> +      if (end - cp == (int) *gp + thousands_len)
>>>  	{
>>>  	  /* This group matches the specification.  */
>>
>> This still does not work for alternative digits, right?  But that's an
>> existing limitation of strto[dl]?
>
> strtod only supports [0-9].
>
>> Does this need a bug?
>
> Bug 30964.

Thanks, please commit this with the bug reference in the commit message.

Florian
  

Patch

diff --git a/stdlib/grouping.c b/stdlib/grouping.c
index b6bf1dbab2..16b266d3e0 100644
--- a/stdlib/grouping.c
+++ b/stdlib/grouping.c
@@ -59,7 +59,6 @@  __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
   size_t thousands_len = 1;
 #else
   size_t thousands_len = strlen (thousands);
-  int cnt;
 #endif
 
   while (end - begin >= thousands_len)
@@ -74,14 +73,8 @@  __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
 	  if (*cp == thousands)
 	    break;
 #else
-	  if (cp[thousands_len - 1] == *thousands)
-	    {
-	      for (cnt = 1; thousands[cnt] != '\0'; ++cnt)
-		if (thousands[cnt] != cp[thousands_len - 1 - cnt])
-		  break;
-	      if (thousands[cnt] == '\0')
-		break;
-	    }
+	  if (memcmp (cp, thousands, thousands_len) == 0)
+	    break;
 #endif
 	  --cp;
 	}
@@ -91,7 +84,7 @@  __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
       if (cp < begin)
 	return end;
 
-      if (end - cp == (int) *gp + 1)
+      if (end - cp == (int) *gp + thousands_len)
 	{
 	  /* This group matches the specification.  */
 
@@ -105,7 +98,7 @@  __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
 	     remainder of the string from BEGIN to NEW_END is the part we
 	     will consider if there is a grouping error in this trailing
 	     portion from CP to END.  */
-	  new_end = cp - 1;
+	  new_end = cp;
 
 	  /* Loop while the grouping is correct.  */
 	  while (1)
@@ -132,10 +125,7 @@  __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
 		      if (*cp == thousands)
 			break;
 #else
-		      for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
-			if (thousands[cnt] != cp[thousands_len - cnt - 1])
-			  break;
-		      if (thousands[cnt] == '\0')
+		      if (memcmp (cp, thousands, thousands_len) == 0)
 			break;
 #endif
 		      --cp;
@@ -156,20 +146,17 @@  __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
 		      if (*cp == thousands)
 			break;
 #else
-		      for (cnt = 0; thousands[cnt] != '\0'; ++cnt)
-			if (thousands[cnt] != cp[thousands_len - cnt - 1])
-			  break;
-		      if (thousands[cnt] == '\0')
+		      if (memcmp (cp, thousands, thousands_len) == 0)
 			break;
 #endif
 		      --cp;
 		    }
 
-		  if (cp < begin && group_end - cp <= (int) *gp)
+		  if (cp < begin && group_end - cp <= (int) *gp + thousands_len - 1)
 		    /* Final group is correct.  */
 		    return end;
 
-		  if (cp < begin || group_end - cp != (int) *gp)
+		  if (cp < begin || group_end - cp != (int) *gp + thousands_len - 1)
 		    /* Incorrect group.  Punt.  */
 		    break;
 		}
@@ -183,8 +170,8 @@  __correctly_grouped_prefixmb (const STRING_TYPE *begin, const STRING_TYPE *end,
       else
 	{
 	  /* Even the first group was wrong; determine maximum shift.  */
-	  if (end - cp > (int) *gp + 1)
-	    end = cp + (int) *gp + 1;
+	  if (end - cp > (int) *gp + thousands_len)
+	    end = cp + (int) *gp + thousands_len;
 	  else if (cp < begin)
 	    /* This number does not fill the first group, but is correct.  */
 	    return end;
diff --git a/stdlib/tst-strtod4.c b/stdlib/tst-strtod4.c
index aae9835d82..841b525836 100644
--- a/stdlib/tst-strtod4.c
+++ b/stdlib/tst-strtod4.c
@@ -13,7 +13,8 @@  static const struct
 } tests[] =
   {
     { "000"NNBSP"000"NNBSP"000", "", 0.0 },
-    { "1"NNBSP"000"NNBSP"000,5x", "x", 1000000.5 }
+    { "1"NNBSP"000"NNBSP"000,5x", "x", 1000000.5 },
+    { "10"NNBSP NNBSP"200", NNBSP NNBSP"200", 10.0 }
   };
 #define NTESTS (sizeof (tests) / sizeof (tests[0]))