[committed,3/7] analyzer: add note about valid subscripts [PR106626]

Message ID 20221201024200.3722982-3-dmalcolm@redhat.com
State Committed
Commit 7c655699ed51b0c987e5472767db48b19044ae05
Headers
Series [committed,1/7] analyzer: move bounds checking to a new bounds-checking.cc |

Commit Message

David Malcolm Dec. 1, 2022, 2:41 a.m. UTC
  Consider -fanalyzer on:

#include <stdint.h>

int32_t arr[10];

void int_arr_write_element_after_end_off_by_one(int32_t x)
{
  arr[10] = x;
}

Trunk x86_64: https://godbolt.org/z/17zn3qYY4

Currently we emit:

<source>: In function 'int_arr_write_element_after_end_off_by_one':
<source>:7:11: warning: buffer overflow [CWE-787] [-Wanalyzer-out-of-bounds]
    7 |   arr[10] = x;
      |   ~~~~~~~~^~~
  event 1
    |
    |    3 | int32_t arr[10];
    |      |         ^~~
    |      |         |
    |      |         (1) capacity is 40 bytes
    |
    +--> 'int_arr_write_element_after_end_off_by_one': events 2-3
           |
           |    5 | void int_arr_write_element_after_end_off_by_one(int32_t x)
           |      |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
           |      |      |
           |      |      (2) entry to 'int_arr_write_element_after_end_off_by_one'
           |    6 | {
           |    7 |   arr[10] = x;
           |      |   ~~~~~~~~~~~
           |      |           |
           |      |           (3) out-of-bounds write from byte 40 till byte 43 but 'arr' ends at byte 40
           |
<source>:7:11: note: write of 4 bytes to beyond the end of 'arr'
    7 |   arr[10] = x;
      |   ~~~~~~~~^~~

This is worded in terms of bytes, due to the way -Wanalyzer-out-of-bounds
is implemented, but this isn't what the user wrote.

This patch tries to get closer to the user's code by adding a note about
array bounds when we're referring to an array.  In the above example it
adds this trailing note:

  note: valid subscripts for 'arr' are '[0]' to '[9]'

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-4427-g7c655699ed51b0.

gcc/analyzer/ChangeLog:
	PR analyzer/106626
	* bounds-checking.cc (out_of_bounds::maybe_describe_array_bounds):
	New.
	(buffer_overflow::emit): Call maybe_describe_array_bounds.
	(buffer_overread::emit): Likewise.
	(buffer_underflow::emit): Likewise.
	(buffer_underread::emit): Likewise.

gcc/testsuite/ChangeLog:
	PR analyzer/106626
	* gcc.dg/analyzer/call-summaries-2.c: Add dg-message for expected
	note about valid indexes.
	* gcc.dg/analyzer/out-of-bounds-1.c: Likewise, fixing up existing
	dg-message directives.
	* gcc.dg/analyzer/out-of-bounds-write-char-arr.c: Likewise.
	* gcc.dg/analyzer/out-of-bounds-write-int-arr.c: Likewise.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
---
 gcc/analyzer/bounds-checking.cc               | 46 +++++++++++++++++--
 .../gcc.dg/analyzer/call-summaries-2.c        |  1 +
 .../gcc.dg/analyzer/out-of-bounds-1.c         | 16 ++++---
 .../analyzer/out-of-bounds-write-char-arr.c   |  6 +++
 .../analyzer/out-of-bounds-write-int-arr.c    |  6 +++
 5 files changed, 64 insertions(+), 11 deletions(-)
  

Patch

diff --git a/gcc/analyzer/bounds-checking.cc b/gcc/analyzer/bounds-checking.cc
index ad7f431ea2f..b02bc79a926 100644
--- a/gcc/analyzer/bounds-checking.cc
+++ b/gcc/analyzer/bounds-checking.cc
@@ -71,6 +71,34 @@  public:
   }
 
 protected:
+  /* Potentially add a note about valid ways to index this array, such
+     as (given "int arr[10];"):
+       note: valid subscripts for 'arr' are '[0]' to '[9]'
+     We print the '[' and ']' characters so as to express the valid
+     subscripts using C syntax, rather than just as byte ranges,
+     which hopefully is more clear to the user.  */
+  void
+  maybe_describe_array_bounds (location_t loc) const
+  {
+    if (!m_diag_arg)
+      return;
+    tree t = TREE_TYPE (m_diag_arg);
+    if (!t)
+      return;
+    if (TREE_CODE (t) != ARRAY_TYPE)
+      return;
+    tree domain = TYPE_DOMAIN (t);
+    if (!domain)
+      return;
+    tree max_idx = TYPE_MAX_VALUE (domain);
+    if (!max_idx)
+      return;
+    tree min_idx = TYPE_MIN_VALUE (domain);
+    inform (loc,
+	    "valid subscripts for %qE are %<[%E]%> to %<[%E]%>",
+	    m_diag_arg, min_idx, max_idx);
+  }
+
   const region *m_reg;
   tree m_diag_arg;
   byte_range m_out_of_bounds_range;
@@ -165,6 +193,8 @@  public:
 	  inform (rich_loc->get_loc (),
 		  "write to beyond the end of %qE",
 		  m_diag_arg);
+
+	maybe_describe_array_bounds (rich_loc->get_loc ());
       }
 
     return warned;
@@ -245,6 +275,8 @@  public:
 	  inform (rich_loc->get_loc (),
 		  "read from after the end of %qE",
 		  m_diag_arg);
+
+	maybe_describe_array_bounds (rich_loc->get_loc ());
       }
 
     return warned;
@@ -297,8 +329,11 @@  public:
   {
     diagnostic_metadata m;
     m.add_cwe (124);
-    return warning_meta (rich_loc, m, get_controlling_option (),
-			 "buffer underflow");
+    bool warned = warning_meta (rich_loc, m, get_controlling_option (),
+				"buffer underflow");
+    if (warned)
+      maybe_describe_array_bounds (rich_loc->get_loc ());
+    return warned;
   }
 
   label_text describe_final_event (const evdesc::final_event &ev)
@@ -346,8 +381,11 @@  public:
   {
     diagnostic_metadata m;
     m.add_cwe (127);
-    return warning_meta (rich_loc, m, get_controlling_option (),
-			 "buffer underread");
+    bool warned = warning_meta (rich_loc, m, get_controlling_option (),
+				"buffer underread");
+    if (warned)
+      maybe_describe_array_bounds (rich_loc->get_loc ());
+    return warned;
   }
 
   label_text describe_final_event (const evdesc::final_event &ev)
diff --git a/gcc/testsuite/gcc.dg/analyzer/call-summaries-2.c b/gcc/testsuite/gcc.dg/analyzer/call-summaries-2.c
index 953cbd32f5a..a7a17dbd358 100644
--- a/gcc/testsuite/gcc.dg/analyzer/call-summaries-2.c
+++ b/gcc/testsuite/gcc.dg/analyzer/call-summaries-2.c
@@ -330,6 +330,7 @@  int test_returns_element_ptr (int j)
   __analyzer_eval (*returns_element_ptr (1) == 8); /* { dg-warning "TRUE" } */
   __analyzer_eval (*returns_element_ptr (2) == 9); /* { dg-warning "TRUE" } */
   return *returns_element_ptr (3); /* { dg-warning "buffer overread" } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[2\\\]'" "valid subscript note" { target *-*-* } .-1 } */
 }
 
 int returns_offset (int arr[3], int i)
diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-1.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-1.c
index 9f3cda6e02b..dc4de9b28a6 100644
--- a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-1.c
@@ -25,8 +25,9 @@  void test1 (void)
   id_sequence[2] = 345;
   id_sequence[3] = 456; /* { dg-line test1 } */
 
-  /* { dg-warning "overflow" "warning" { target *-*-* } test1 } */
-  /* { dg-message "" "note" { target *-*-* } test1 } */
+  /* { dg-warning "stack-based buffer overflow" "warning" { target *-*-* } test1 } */
+  /* { dg-message "write of 4 bytes to beyond the end of 'id_sequence'" "num bad bytes note" { target *-*-* } test1 } */
+  /* { dg-message "valid subscripts for 'id_sequence' are '\\\[0\\\]' to '\\\[2\\\]'" "valid subscript note" { target *-*-* } test1 } */
 }
 
 void test2 (void)
@@ -46,8 +47,9 @@  void test3 (void)
   for (int i = n; i >= 0; i--)
     arr[i] = i; /* { dg-line test3 } */
 
-  /* { dg-warning "overflow" "warning" { target *-*-* } test3 } */
-  /* { dg-message "" "note" { target *-*-* } test3 } */
+  /* { dg-warning "stack-based buffer overflow" "warning" { target *-*-* } test3 } */
+  /* { dg-message "write of 4 bytes to beyond the end of 'arr'" "num bad bytes note" { target *-*-* } test3 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[3\\\]'" "valid subscript note" { target *-*-* } test3 } */
 }
 
 void test4 (void)
@@ -72,7 +74,7 @@  void test5 (void)
   *last_el = 4; /* { dg-line test5 } */
 
   free (arr);
-  /* { dg-warning "overflow" "warning" { target *-*-* } test5 } */
+  /* { dg-warning "heap-based buffer overflow" "warning" { target *-*-* } test5 } */
   /* { dg-message "" "note" { target *-*-* } test5 } */
 }
 
@@ -89,9 +91,9 @@  void test6 (void)
     printf ("x=%d y=%d *p=%d *q=%d\n" , x, y, *p, *q);  /* { dg-line test6c } */
   }
 
-  /* { dg-warning "overflow" "warning" { target *-*-* } test6b } */
+  /* { dg-warning "buffer overflow" "warning" { target *-*-* } test6b } */
   /* { dg-message "" "note" { target *-*-* } test6b } */
-  /* { dg-warning "overread" "warning" { target *-*-* } test6c } */
+  /* { dg-warning "buffer overread" "warning" { target *-*-* } test6c } */
   /* { dg-message "" "note" { target *-*-* } test6c } */
 }
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-write-char-arr.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-write-char-arr.c
index 3564476c322..739ebb11590 100644
--- a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-write-char-arr.c
+++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-write-char-arr.c
@@ -4,18 +4,21 @@  void int_arr_write_element_before_start_far(char x)
 {
   arr[-100] = x; /* { dg-warning "buffer underflow" "warning" } */
   /* { dg-message "out-of-bounds write at byte -100 but 'arr' starts at byte 0" "final event" { target *-*-* } .-1 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-2 } */
 }
 
 void int_arr_write_element_before_start_near(char x)
 {
   arr[-2] = x; /* { dg-warning "buffer underflow" "warning" } */
   /* { dg-message "out-of-bounds write at byte -2 but 'arr' starts at byte 0" "final event" { target *-*-* } .-1 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-2 } */
 }
 
 void int_arr_write_element_before_start_off_by_one(char x)
 {
   arr[-1] = x; /* { dg-warning "buffer underflow" "warning" } */
   /* { dg-message "out-of-bounds write at byte -1 but 'arr' starts at byte 0" "final event" { target *-*-* } .-1 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-2 } */
 }
 
 void int_arr_write_element_at_start(char x)
@@ -33,6 +36,7 @@  void int_arr_write_element_after_end_off_by_one(char x)
   arr[10] = x; /* { dg-warning "buffer overflow" "warning" } */
   /* { dg-message "out-of-bounds write at byte 10 but 'arr' ends at byte 10" "final event" { target *-*-* } .-1 } */
   /* { dg-message "write of 1 byte to beyond the end of 'arr'" "num bad bytes note" { target *-*-* } .-2 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-3 } */
 }
 
 void int_arr_write_element_after_end_near(char x)
@@ -40,6 +44,7 @@  void int_arr_write_element_after_end_near(char x)
   arr[11] = x; /* { dg-warning "buffer overflow" "warning" } */
   /* { dg-message "out-of-bounds write at byte 11 but 'arr' ends at byte 10" "final event" { target *-*-* } .-1 } */
   /* { dg-message "write of 1 byte to beyond the end of 'arr'" "num bad bytes note" { target *-*-* } .-2 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-3 } */
 }
 
 void int_arr_write_element_after_end_far(char x)
@@ -47,4 +52,5 @@  void int_arr_write_element_after_end_far(char x)
   arr[100] = x; /* { dg-warning "buffer overflow" "warning" } */
   /* { dg-message "out-of-bounds write at byte 100 but 'arr' ends at byte 10" "final event" { target *-*-* } .-1 } */
   /* { dg-message "write of 1 byte to beyond the end of 'arr'" "num bad bytes note" { target *-*-* } .-2 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-3 } */
 }
diff --git a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-write-int-arr.c b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-write-int-arr.c
index 24a9a6bfa18..b2b37b92e01 100644
--- a/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-write-int-arr.c
+++ b/gcc/testsuite/gcc.dg/analyzer/out-of-bounds-write-int-arr.c
@@ -6,18 +6,21 @@  void int_arr_write_element_before_start_far(int32_t x)
 {
   arr[-100] = x; /* { dg-warning "buffer underflow" "warning" } */
   /* { dg-message "out-of-bounds write from byte -400 till byte -397 but 'arr' starts at byte 0" "final event" { target *-*-* } .-1 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-2 } */
 }
 
 void int_arr_write_element_before_start_near(int32_t x)
 {
   arr[-2] = x; /* { dg-warning "buffer underflow" "warning" } */
   /* { dg-message "out-of-bounds write from byte -8 till byte -5 but 'arr' starts at byte 0" "final event" { target *-*-* } .-1 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-2 } */
 }
 
 void int_arr_write_element_before_start_off_by_one(int32_t x)
 {
   arr[-1] = x; /* { dg-warning "buffer underflow" "warning" } */
   /* { dg-message "out-of-bounds write from byte -4 till byte -1 but 'arr' starts at byte 0" "final event" { target *-*-* } .-1 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-2 } */
 }
 
 void int_arr_write_element_at_start(int32_t x)
@@ -35,6 +38,7 @@  void int_arr_write_element_after_end_off_by_one(int32_t x)
   arr[10] = x; /* { dg-warning "buffer overflow" "warning" } */
   /* { dg-message "out-of-bounds write from byte 40 till byte 43 but 'arr' ends at byte 40" "final event" { target *-*-* } .-1 } */
   /* { dg-message "write of 4 bytes to beyond the end of 'arr'" "num bad bytes note" { target *-*-* } .-2 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-3 } */
 }
 
 void int_arr_write_element_after_end_near(int32_t x)
@@ -42,6 +46,7 @@  void int_arr_write_element_after_end_near(int32_t x)
   arr[11] = x; /* { dg-warning "buffer overflow" "warning" } */
   /* { dg-message "out-of-bounds write from byte 44 till byte 47 but 'arr' ends at byte 40" "final event" { target *-*-* } .-1 } */
   /* { dg-message "write of 4 bytes to beyond the end of 'arr'" "num bad bytes note" { target *-*-* } .-2 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-3 } */
 }
 
 void int_arr_write_element_after_end_far(int32_t x)
@@ -49,4 +54,5 @@  void int_arr_write_element_after_end_far(int32_t x)
   arr[100] = x; /* { dg-warning "buffer overflow" "warning" } */
   /* { dg-message "out-of-bounds write from byte 400 till byte 403 but 'arr' ends at byte 40" "final event" { target *-*-* } .-1 } */
   /* { dg-message "write of 4 bytes to beyond the end of 'arr'" "num bad bytes note" { target *-*-* } .-2 } */
+  /* { dg-message "valid subscripts for 'arr' are '\\\[0\\\]' to '\\\[9\\\]'" "valid subscript note" { target *-*-* } .-3 } */
 }