[v4,4/4] Use the .ACCESS_WITH_SIZE in bound sanitizer.

Message ID 20240124002955.3387096-5-qing.zhao@oracle.com
State New
Headers
Series New attribute "counted_by" to annotate bounds for C99 FAM(PR108896) |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed

Commit Message

Qing Zhao Jan. 24, 2024, 12:29 a.m. UTC
  Since the result type of the call to .ACCESS_WITH_SIZE is a pointer to
the element type. The original array_ref is converted to an indirect_ref,
which introduced two issues for the instrumenation of bound sanitizer:

A. The index for the original array_ref was mixed into the offset
expression for the new indirect_ref.

In order to make the instrumentation for the bound sanitizer easier, one
more argument for the function .ACCESS_WITH_SIZE is added to record this
original index for the array_ref. then later during bound instrumentation,
get the index from the additional argument from the call to the function
.ACCESS_WITH_SIZE.

B. In the current bound sanitizer, no instrumentation will be inserted for
an indirect_ref.

In order to add instrumentation for an indirect_ref with a call to
.ACCESS_WITH_SIZE, we should specially handle the indirect_ref with a
call to .ACCESS_WITH_SIZE, and get the index and bound info from the
arguments of the call.

gcc/c-family/ChangeLog:

	* c-gimplify.cc (ubsan_walk_array_refs_r): Instrument indirect_ref.
	* c-ubsan.cc (get_bound_from_access_with_size): New function.
	(ubsan_instrument_bounds_indirect_ref): New function.
	(ubsan_indirect_ref_instrumented_p): New function.
	(ubsan_maybe_instrument_indirect_ref): New function.
	* c-ubsan.h (ubsan_maybe_instrument_indirect_ref): New prototype.

gcc/c/ChangeLog:

	* c-typeck.cc (build_counted_by_ref): Minor style fix.
	(build_access_with_size_for_counted_by): Add one more argument.
	(build_array_ref): Set the 5th argument of a call to .ACCESS_WITH_SIZE
	to the index.

gcc/testsuite/ChangeLog:

	* gcc.dg/ubsan/flex-array-counted-by-bounds-2.c: New test.
	* gcc.dg/ubsan/flex-array-counted-by-bounds.c: New test.
---
 gcc/c-family/c-gimplify.cc                    |   2 +
 gcc/c-family/c-ubsan.cc                       | 130 ++++++++++++++++++
 gcc/c-family/c-ubsan.h                        |   1 +
 gcc/c/c-typeck.cc                             |  14 +-
 .../ubsan/flex-array-counted-by-bounds-2.c    |  45 ++++++
 .../ubsan/flex-array-counted-by-bounds.c      |  46 +++++++
 6 files changed, 235 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
 create mode 100644 gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c
  

Patch

diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
index 494da49791d5..25a3ca1a9a99 100644
--- a/gcc/c-family/c-gimplify.cc
+++ b/gcc/c-family/c-gimplify.cc
@@ -108,6 +108,8 @@  ubsan_walk_array_refs_r (tree *tp, int *walk_subtrees, void *data)
     }
   else if (TREE_CODE (*tp) == ARRAY_REF)
     ubsan_maybe_instrument_array_ref (tp, false);
+  else if (TREE_CODE (*tp) == INDIRECT_REF)
+    ubsan_maybe_instrument_indirect_ref (tp);
   else if (TREE_CODE (*tp) == MODIFY_EXPR)
     {
       /* Since r7-1900, we gimplify RHS before LHS.  Consider
diff --git a/gcc/c-family/c-ubsan.cc b/gcc/c-family/c-ubsan.cc
index 940982819ddf..7bb6464eb5b5 100644
--- a/gcc/c-family/c-ubsan.cc
+++ b/gcc/c-family/c-ubsan.cc
@@ -376,6 +376,7 @@  ubsan_instrument_return (location_t loc)
   return build_call_expr_loc (loc, t, 1, build_fold_addr_expr_loc (loc, data));
 }
 
+
 /* Instrument array bounds for ARRAY_REFs.  We create special builtin,
    that gets expanded in the sanopt pass, and make an array dimension
    of it.  ARRAY is the array, *INDEX is an index to the array.
@@ -501,6 +502,68 @@  ubsan_instrument_bounds (location_t loc, tree array, tree *index,
 				       *index, bound);
 }
 
+/* Get the tree that represented the number of counted_by, i.e, the maximum
+   number of the elements of the object that the call to .ACCESS_WITH_SIZE
+   points to, this number will be the bound of the corresponding array.  */
+static tree
+get_bound_from_access_with_size (tree call)
+{
+  if (!is_access_with_size_p (call))
+    return NULL_TREE;
+
+  tree ref_to_size = CALL_EXPR_ARG (call, 1);
+  unsigned int type_of_size = TREE_INT_CST_LOW (CALL_EXPR_ARG (call, 2));
+  unsigned int prec_of_size = TREE_INT_CST_LOW (CALL_EXPR_ARG (call, 3));
+  tree type = build_nonstandard_integer_type (prec_of_size, 1);
+  tree size = fold_build2 (MEM_REF, type, unshare_expr (ref_to_size),
+			   build_int_cst (ptr_type_node, 0));
+  /* Only when type_of_size is 1,i.e, the number of the elements of
+     the object type, return the size.  */
+  if (type_of_size != 1)
+    return NULL_TREE;
+  else
+    size = fold_convert (sizetype, size);
+
+  return size;
+}
+
+/* Instrument array bounds for INDIRECT_REFs whose pointers are
+   POINTER_PLUS_EXPRs of calls to .ACCESS_WITH_SIZE.  We create special
+   builtins that gets expanded in the sanopt pass, and make an array
+   dimension of it.  ARRAY is the pointer to the base of the array,
+   which is a call to .ACCESS_WITH_SIZE.
+   We get the INDEX from the 6th argument of the call to .ACCESS_WITH_SIZE
+   Return NULL_TREE if no instrumentation is emitted.  */
+
+tree
+ubsan_instrument_bounds_indirect_ref (location_t loc, tree array)
+{
+  if (!is_access_with_size_p (array))
+    return NULL_TREE;
+  tree bound = get_bound_from_access_with_size (array);
+  tree index = CALL_EXPR_ARG (array, 5);
+  /* When the index is a constant -1, it's an invalid index.  */
+  if ((TREE_CODE (index) == INTEGER_CST)
+       && TREE_INT_CST_LOW (index) == (long unsigned int) -1)
+    return NULL_TREE;
+  gcc_assert (bound);
+
+  /* Create a "(T *) 0" tree node to describe the original array type.
+     We get the original array type from the first argument of the call to
+     .ACCESS_WITH_SIZE (REF, COUNTED_BY_REF, 1, num_bytes, -1).
+
+     Originally, REF is a COMPONENT_REF with the original array type,
+     it was converted to a pointer to an ADDR_EXPR, and the ADDR_EXPR's
+     first operand is the original COMPONENT_REF.  */
+  tree ref = CALL_EXPR_ARG (array, 0);
+  tree array_type
+    = unshare_expr (TREE_TYPE (TREE_OPERAND (TREE_OPERAND (ref, 0), 0)));
+  tree zero_with_type = build_int_cst (build_pointer_type (array_type), 0);
+  return build_call_expr_internal_loc (loc, IFN_UBSAN_BOUNDS,
+				       void_type_node, 3, zero_with_type,
+				       unshare_expr (index), bound);
+}
+
 /* Return true iff T is an array that was instrumented by SANITIZE_BOUNDS.  */
 
 bool
@@ -536,6 +599,73 @@  ubsan_maybe_instrument_array_ref (tree *expr_p, bool ignore_off_by_one)
     }
 }
 
+/* Return true iff T is an INDIRECT_REF that was instrumented
+   by SANITIZE_BOUNDS.  */
+
+bool
+ubsan_indirect_ref_instrumented_p (const_tree t)
+{
+  if (TREE_CODE (t) != INDIRECT_REF)
+    return false;
+
+  tree pointer = TREE_OPERAND (t, 0);
+  if (TREE_CODE (pointer) != POINTER_PLUS_EXPR)
+    return false;
+  tree offset = NULL_TREE;
+  if (is_access_with_size_p (TREE_OPERAND (pointer, 0)))
+    offset = TREE_OPERAND (pointer, 1);
+  else if (is_access_with_size_p (TREE_OPERAND (pointer, 1)))
+    offset = TREE_OPERAND (pointer, 0);
+  else
+    return false;
+  return TREE_CODE (offset) == COMPOUND_EXPR
+	 && TREE_CODE (TREE_OPERAND (offset, 0)) == CALL_EXPR
+	 && CALL_EXPR_FN (TREE_OPERAND (offset, 0)) == NULL_TREE
+	 && CALL_EXPR_IFN (TREE_OPERAND (offset, 0)) == IFN_UBSAN_BOUNDS;
+}
+
+/* Instrument an INDIRECT_REF, if it hasn't already been instrumented.
+   Right now, we only instrument an INDIRECT_REF when its pointer is a
+   POINTER_PLUS_EXPR, with one operand is a call to .ACCESS_WITH_SIZE,
+   and the other operand is an offset to the array.  We will compute the
+   array index based on the offset and the size of each element, and use
+   the computed index for the instrumentation.  */
+
+void
+ubsan_maybe_instrument_indirect_ref (tree *expr_p)
+{
+  if (!ubsan_indirect_ref_instrumented_p (*expr_p)
+      && sanitize_flags_p (SANITIZE_BOUNDS | SANITIZE_BOUNDS_STRICT)
+      && current_function_decl != NULL_TREE)
+    {
+      tree pointer = TREE_OPERAND (*expr_p, 0);
+      if (TREE_CODE (pointer) != POINTER_PLUS_EXPR)
+	return;
+      tree array = NULL_TREE;
+      tree offset = NULL_TREE;
+      int nth_op = 0;
+      if (is_access_with_size_p (TREE_OPERAND (pointer, 0)))
+	{
+	  array = TREE_OPERAND (pointer, 0);
+	  offset = TREE_OPERAND (pointer, 1);
+	  nth_op = 1;
+	}
+      else if (is_access_with_size_p (TREE_OPERAND (pointer, 1)))
+	{
+	  array = TREE_OPERAND (pointer, 1);
+	  offset = TREE_OPERAND (pointer, 0);
+	}
+      else
+	return;
+
+      tree e = ubsan_instrument_bounds_indirect_ref (EXPR_LOCATION (*expr_p),
+						     array);
+      if (e != NULL_TREE)
+	TREE_OPERAND (pointer, nth_op)
+	  = build2 (COMPOUND_EXPR, TREE_TYPE (offset), e, offset);
+    }
+}
+
 static tree
 ubsan_maybe_instrument_reference_or_call (location_t loc, tree op, tree ptype,
 					  enum ubsan_null_ckind ckind)
diff --git a/gcc/c-family/c-ubsan.h b/gcc/c-family/c-ubsan.h
index 9df03445a2ba..ed495266e82d 100644
--- a/gcc/c-family/c-ubsan.h
+++ b/gcc/c-family/c-ubsan.h
@@ -28,6 +28,7 @@  extern tree ubsan_instrument_return (location_t);
 extern tree ubsan_instrument_bounds (location_t, tree, tree *, bool);
 extern bool ubsan_array_ref_instrumented_p (const_tree);
 extern void ubsan_maybe_instrument_array_ref (tree *, bool);
+extern void ubsan_maybe_instrument_indirect_ref (tree *);
 extern void ubsan_maybe_instrument_reference (tree *);
 extern void ubsan_maybe_instrument_member_call (tree, bool);
 
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 4020bafc8e36..4fcb99fa0a5d 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -2576,7 +2576,8 @@  build_counted_by_ref (tree datum, tree subdatum, tree *counted_by_type)
     {
       tree field_id = TREE_VALUE (TREE_VALUE (attr_counted_by));
       counted_by_ref
-	= build_component_ref (UNKNOWN_LOCATION, datum, field_id,
+	= build_component_ref (UNKNOWN_LOCATION,
+			       datum, field_id,
 			       UNKNOWN_LOCATION, UNKNOWN_LOCATION);
       counted_by_ref = build_fold_addr_expr (counted_by_ref);
 
@@ -2602,11 +2603,15 @@  build_counted_by_ref (tree datum, tree subdatum, tree *counted_by_type)
 
    to:
 
-   .ACCESS_WITH_SIZE (REF, COUNTED_BY_REF, 1, num_bytes, -1)
+   .ACCESS_WITH_SIZE (REF, COUNTED_BY_REF, 1, num_bytes, -1, INDEX)
 
    NOTE: Both the return type and the type of the first argument
    of this function have been converted from the incomplete array
    type to the corresponding pointer type.
+
+   INDEX is -1 when we build the call to .ACCESS_WITH_SIZE. and
+   will be set to the corresponding tree node when we parse the
+   index at build_array_ref.
   */
 static tree
 build_access_with_size_for_counted_by (location_t loc, tree ref,
@@ -2619,12 +2624,13 @@  build_access_with_size_for_counted_by (location_t loc, tree ref,
 
   tree call
     = build_call_expr_internal_loc (loc, IFN_ACCESS_WITH_SIZE,
-				  result_type, 5,
+				  result_type, 6,
 				  array_to_pointer_conversion (loc, ref),
 				  counted_by_ref,
 				  build_int_cst (integer_type_node, 1),
 				  build_int_cst (integer_type_node,
 						 counted_by_precision),
+				  build_int_cst (integer_type_node, -1),
 				  build_int_cst (integer_type_node, -1));
   SET_EXPR_LOCATION (call, loc);
   return call;
@@ -3006,6 +3012,8 @@  build_array_ref (location_t loc, tree array, tree index)
       gcc_assert (TREE_CODE (TREE_TYPE (ar)) == POINTER_TYPE);
       gcc_assert (TREE_CODE (TREE_TYPE (TREE_TYPE (ar))) != FUNCTION_TYPE);
 
+      if (is_access_with_size_p (ar))
+	CALL_EXPR_ARG (ar, 5) = index;
       ret = build_indirect_ref (loc, build_binary_op (loc, PLUS_EXPR, ar,
 						      index, false),
 				RO_ARRAY_INDEXING);
diff --git a/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
new file mode 100644
index 000000000000..148934975ee5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds-2.c
@@ -0,0 +1,45 @@ 
+/* test the attribute counted_by and its usage in
+   bounds sanitizer combined with VLA.  */
+/* { dg-do run } */
+/* { dg-options "-fsanitize=bounds" } */
+/* { dg-output "index 11 out of bounds for type 'int \\\[\\\*\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*index 20 out of bounds for type 'int \\\[\\\*\\\]\\\[\\\*\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*index 11 out of bounds for type 'int \\\[\\\*\\\]\\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+/* { dg-output "\[^\n\r]*index 10 out of bounds for type 'int \\\[\\\*\\\]'\[^\n\r]*(\n|\r\n|\r)" } */
+
+
+#include <stdlib.h>
+
+void __attribute__((__noinline__)) setup_and_test_vla (int n, int m)
+{
+   struct foo {
+       int n;
+       int p[][n] __attribute__((counted_by(n)));
+   } *f;
+
+   f = (struct foo *) malloc (sizeof(struct foo) + m*sizeof(int[n]));
+   f->n = m;
+   f->p[m][n-1]=1;
+   return;
+}
+
+void __attribute__((__noinline__)) setup_and_test_vla_1 (int n1, int n2, int m)
+{
+  struct foo {
+    int n;
+    int p[][n2][n1] __attribute__((counted_by(n)));
+  } *f;
+
+  f = (struct foo *) malloc (sizeof(struct foo) + m*sizeof(int[n2][n1]));
+  f->n = m;
+  f->p[m][n2][n1]=1;
+  return;
+}
+
+int main(int argc, char *argv[])
+{
+  setup_and_test_vla (10, 11);
+  setup_and_test_vla_1 (10, 11, 20);
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c
new file mode 100644
index 000000000000..81eaeb3f2681
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/flex-array-counted-by-bounds.c
@@ -0,0 +1,46 @@ 
+/* test the attribute counted_by and its usage in
+   bounds sanitizer.  */
+/* { dg-do run } */
+/* { dg-options "-fsanitize=bounds" } */
+
+#include <stdlib.h>
+
+struct flex {
+  int b;
+  int c[];
+} *array_flex;
+
+struct annotated {
+  int b;
+  int c[] __attribute__ ((counted_by (b)));
+} *array_annotated;
+
+void __attribute__((__noinline__)) setup (int normal_count, int annotated_count)
+{
+  array_flex
+    = (struct flex *)malloc (sizeof (struct flex)
+			     + normal_count *  sizeof (int));
+  array_flex->b = normal_count;
+
+  array_annotated
+    = (struct annotated *)malloc (sizeof (struct annotated)
+				  + annotated_count *  sizeof (int));
+  array_annotated->b = annotated_count;
+
+  return;
+}
+
+void __attribute__((__noinline__)) test (int normal_index, int annotated_index)
+{
+  array_flex->c[normal_index] = 1;
+  array_annotated->c[annotated_index] = 2;
+}
+
+int main(int argc, char *argv[])
+{
+  setup (10, 10);   
+  test (10, 10);
+  return 0;
+}
+
+/* { dg-output "36:21: runtime error: index 10 out of bounds for type" } */