diff mbox series

[v2] remove attribute access from regexec

Message ID ba40f5e9-0a2f-cc6d-e96a-8959d45fbafa@gmail.com
State Superseded
Headers show
Series [v2] remove attribute access from regexec | expand

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Martin Sebor Aug. 18, 2021, 3:53 p.m. UTC
Let me repost an updated patch as v2 to let the patch tester know
about the revision, and to also include the corresponding change
to regexec.c needed to avoid the -Wvla-parameter I mentioned.

Apparently the first patch failed to apply and the second one
wasn't picked up because the subject line hasn't changed.  Thanks
for letting me know, Carlos!

Here's a link to the Patchwork check:
http://patchwork.sourceware.org/project/glibc/patch/15a32181-8060-4135-cb72-9e79831697d5@gmail.com/

On 8/14/21 2:08 PM, Martin Sebor wrote:
> On 8/13/21 4:34 PM, Paul Eggert wrote:
>> On 8/13/21 2:30 PM, Martin Sebor wrote:
>>> Attached is a revised patch with this approach.
>>
>> The revised patch is to include/regex.h but the original patch was to 
>> posix/regex.h. Is that intentional?
> 
> Yes, they need to be consistent, otherwise GCC issues -Wvla-parameter.
> (That's to help detect inadvertent array/VLA mismatches as well as
> mismatches in the VLA parameter bounds.)
> 
>>
>> We need to check whether __STDC_VERSION__ is defined. Also, no need 
>> for parens around arg of 'defined'. Something like this perhaps:
>>
>>    #if (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__ \
>>         && !defined __STDC_NO_VLA__)
>>
>> Also, the duplication of the declarations make the headers harder to 
>> read and encourage typos (I noticed one typo: "_Restrict_arr" without 
>> the trailing "_"). Instead, I suggest something like this:
>>
>>    #if (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__ \
>>         && !defined __STDC_NO_VLA__)
>>    # define _REGEX_VLA(arg) arg
>>    #else
>>    # define _REGEX_VLA(arg)
>>    #endif
>>
>> That way, we can simply change "regmatch_t __pmatch[_Restrict_arr_]" 
>> to "regmatch_t __pmatch[_Restrict_arr_ _REGEX_VLA (__nmatch)]" without 
>> having to duplicate the entire function declaration.
> 
> Sounds good.  I've defined the macro in cdefs.h and mamed it _VLA_ARG
> to make it usable in other contexts.  Please see the attached revision.
> 
>>
>>> PS POSIX says regexec() ignores pnmatch when REG_NOSUB is set, so
>>> strictly speaking, warning for such calls to it in that case is
>>> also a false positive.
>>
>> Ouch, this casts doubt on the entire exercise. It's not simply about 
>> warnings: it's about the code being generated for the matcher. For 
>> example, for:
>>
>> int
>> f (_Bool flag, unsigned long n, int a[n])
>> {
>>    return n == 0 ? 0 : flag ? a[n - 1] : a[0];
>> }
>>
>> a compiler is allowed to generate code that loads a[n - 1] even when 
>> FLAG is false. Similarly, if we add this VLA business to regexec, the 
>> generated machine code could dereference pmatch unconditionally even 
>> if our source code makes the dereferencing conditional on REG_NOSUB, 
>> and the resulting behavior would fail to conform to POSIX.
> 
> The VLA bound by itself doesn't affect codegen.  I suspect you're
> thinking of a[static n]?  With just a[n], without static, there
> is no requirement that a point to an array with n elements.  It
> simply declares an ordinary pointer, same as [] or *.
> 
> Martin
diff mbox series

Patch

diff --git a/include/regex.h b/include/regex.h
index 24eca2c297..5cf3ef7636 100644
--- a/include/regex.h
+++ b/include/regex.h
@@ -37,7 +37,8 @@  extern int __regcomp (regex_t *__preg, const char *__pattern, int __cflags);
 libc_hidden_proto (__regcomp)
 
 extern int __regexec (const regex_t *__preg, const char *__string,
-		      size_t __nmatch, regmatch_t __pmatch[], int __eflags);
+		      size_t __nmatch, regmatch_t __pmatch[_VLA_ARG (__nmatch)],
+		      int __eflags);
 libc_hidden_proto (__regexec)
 
 extern size_t __regerror (int __errcode, const regex_t *__preg,
diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h
index e490fc1aeb..6b6e4c233c 100644
--- a/misc/sys/cdefs.h
+++ b/misc/sys/cdefs.h
@@ -632,4 +632,14 @@  _Static_assert (0, "IEEE 128-bits long double requires redirection on this platf
 # define __attribute_returns_twice__ /* Ignore.  */
 #endif
 
+
+#if (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__	\
+     && !defined __STDC_NO_VLA__)
+/* Used to specify a variable bound in a declaration of a function
+   VLA-like parameter, as in 'int f (int n, int[_VLA_ARG (n)n]);'  */
+# define _VLA_ARG(arg) arg
+#else
+# define _VLA_ARG(arg)
+#endif
+
 #endif	 /* sys/cdefs.h */
diff --git a/posix/regex.h b/posix/regex.h
index 14fb1d8364..52ccc4b577 100644
--- a/posix/regex.h
+++ b/posix/regex.h
@@ -654,9 +654,8 @@  extern int regcomp (regex_t *_Restrict_ __preg,
 
 extern int regexec (const regex_t *_Restrict_ __preg,
 		    const char *_Restrict_ __String, size_t __nmatch,
-		    regmatch_t __pmatch[_Restrict_arr_],
-		    int __eflags)
-    __attr_access ((__write_only__, 4, 3));
+		    regmatch_t __pmatch[_Restrict_arr_ _VLA_ARG (__nmatch)],
+		    int __eflags);
 
 extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg,
 			char *_Restrict_ __errbuf, size_t __errbuf_size)
diff --git a/posix/regexec.c b/posix/regexec.c
index f7b4f9cfc3..7b9d0ee65f 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -190,7 +190,7 @@  static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
 
 int
 regexec (const regex_t *__restrict preg, const char *__restrict string,
-	 size_t nmatch, regmatch_t pmatch[], int eflags)
+	 size_t nmatch, regmatch_t pmatch[_VLA_ARG (nmatch)], int eflags)
 {
   reg_errcode_t err;
   Idx start, length;