[committed] analyzer: complain about tainted sizes with "access" attribute [PR103940]

Message ID 20220112150818.1530353-1-dmalcolm@redhat.com
State Committed
Commit 2c16dfe6268eeeb4b7924ff423e274fa00894a4d
Headers
Series [committed] analyzer: complain about tainted sizes with "access" attribute [PR103940] |

Commit Message

David Malcolm Jan. 12, 2022, 3:08 p.m. UTC
  GCC 10 gained the "access" function and type attribute, which
optionally can take a size-index param:
  https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html

-fanalyzer in trunk (for GCC 12) has gained a -Wanalyzer-tainted-size to
complain about attacker-controlled size values, but this was only being
used deep inside the region-model code when handling the hardcoded known
behavior of certain functions (memset, IIRC).

This patch extends -Wanalyzer-tainted-size to also complain about
unsanitized attacker-controlled values being passed to function
parameters marked as a size via the "access" attribute.

Note that -fanalyzer-checker=taint is currently required in
addition to -fanalyzer to use this warning, due to scaling issues
(see bug 103533).

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r12-6526-g2c16dfe6268eeeb4b7924ff423e274fa00894a4d.

gcc/analyzer/ChangeLog:
	PR analyzer/103940
	* engine.cc (impl_sm_context::impl_sm_context): Add
	"unknown_side_effects" param and use it to initialize
	new m_unknown_side_effects field.
	(impl_sm_context::unknown_side_effects_p): New.
	(impl_sm_context::m_unknown_side_effects): New.
	(exploded_node::on_stmt): Pass unknown_side_effects to sm_ctxt
	ctor.
	* sm-taint.cc: Include "stringpool.h" and "attribs.h".
	(tainted_size::tainted_size): Drop "dir" param.
	(tainted_size::get_kind): Drop "FINAL".
	(tainted_size::emit): Likewise.
	(tainted_size::m_dir): Drop unused field.
	(class tainted_access_attrib_size): New subclass.
	(taint_state_machine::on_stmt): Call check_for_tainted_size_arg on
	external functions with unknown side effects.
	(taint_state_machine::check_for_tainted_size_arg): New.
	(region_model::check_region_for_taint): Drop "dir" param from
	tainted_size ctor.
	* sm.h (sm_context::unknown_side_effects_p): New.

gcc/testsuite/ChangeLog:
	PR analyzer/103940
	* gcc.dg/analyzer/taint-size-access-attr-1.c: New test.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
---
 gcc/analyzer/engine.cc                        |  17 ++-
 gcc/analyzer/sm-taint.cc                      | 116 ++++++++++++++++--
 gcc/analyzer/sm.h                             |   3 +
 .../analyzer/taint-size-access-attr-1.c       |  63 ++++++++++
 4 files changed, 187 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/taint-size-access-attr-1.c
  

Patch

diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc
index 18fc00d3857..f2a14f52caa 100644
--- a/gcc/analyzer/engine.cc
+++ b/gcc/analyzer/engine.cc
@@ -293,14 +293,16 @@  public:
 		   const sm_state_map *old_smap,
 		   sm_state_map *new_smap,
 		   path_context *path_ctxt,
-		   stmt_finder *stmt_finder = NULL)
+		   stmt_finder *stmt_finder = NULL,
+		   bool unknown_side_effects = false)
   : sm_context (sm_idx, sm),
     m_logger (eg.get_logger ()),
     m_eg (eg), m_enode_for_diag (enode_for_diag),
     m_old_state (old_state), m_new_state (new_state),
     m_old_smap (old_smap), m_new_smap (new_smap),
     m_path_ctxt (path_ctxt),
-    m_stmt_finder (stmt_finder)
+    m_stmt_finder (stmt_finder),
+    m_unknown_side_effects (unknown_side_effects)
   {
   }
 
@@ -490,6 +492,11 @@  public:
     return m_path_ctxt;
   }
 
+  bool unknown_side_effects_p () const FINAL OVERRIDE
+  {
+    return m_unknown_side_effects;
+  }
+
   log_user m_logger;
   exploded_graph &m_eg;
   exploded_node *m_enode_for_diag;
@@ -499,6 +506,9 @@  public:
   sm_state_map *m_new_smap;
   path_context *m_path_ctxt;
   stmt_finder *m_stmt_finder;
+
+  /* Are we handling an external function with unknown side effects?  */
+  bool m_unknown_side_effects;
 };
 
 /* Subclass of stmt_finder for finding the best stmt to report the leak at,
@@ -1325,7 +1335,8 @@  exploded_node::on_stmt (exploded_graph &eg,
 	= old_state.m_checker_states[sm_idx];
       sm_state_map *new_smap = state->m_checker_states[sm_idx];
       impl_sm_context sm_ctxt (eg, sm_idx, sm, this, &old_state, state,
-			       old_smap, new_smap, path_ctxt);
+			       old_smap, new_smap, path_ctxt, NULL,
+			       unknown_side_effects);
 
       /* Allow the state_machine to handle the stmt.  */
       if (sm.on_stmt (&sm_ctxt, snode, stmt))
diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc
index 8f274df997a..54c7e6015ab 100644
--- a/gcc/analyzer/sm-taint.cc
+++ b/gcc/analyzer/sm-taint.cc
@@ -41,6 +41,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "cgraph.h"
 #include "cfg.h"
 #include "digraph.h"
+#include "stringpool.h"
+#include "attribs.h"
 #include "analyzer/supergraph.h"
 #include "analyzer/call-string.h"
 #include "analyzer/program-point.h"
@@ -102,6 +104,13 @@  public:
 
   state_t combine_states (state_t s0, state_t s1) const;
 
+private:
+  void check_for_tainted_size_arg (sm_context *sm_ctxt,
+				   const supernode *node,
+				   const gcall *call,
+				   tree callee_fndecl) const;
+
+public:
   /* State for a "tainted" value: unsanitized data potentially under an
      attacker's control.  */
   state_t m_tainted;
@@ -338,15 +347,13 @@  class tainted_size : public taint_diagnostic
 {
 public:
   tainted_size (const taint_state_machine &sm, tree arg,
-		enum bounds has_bounds,
-		enum access_direction dir)
-  : taint_diagnostic (sm, arg, has_bounds),
-    m_dir (dir)
+		enum bounds has_bounds)
+  : taint_diagnostic (sm, arg, has_bounds)
   {}
 
-  const char *get_kind () const FINAL OVERRIDE { return "tainted_size"; }
+  const char *get_kind () const OVERRIDE { return "tainted_size"; }
 
-  bool emit (rich_location *rich_loc) FINAL OVERRIDE
+  bool emit (rich_location *rich_loc) OVERRIDE
   {
     diagnostic_metadata m;
     m.add_cwe (129);
@@ -395,9 +402,44 @@  public:
 				   m_arg);
       }
   }
+};
+
+/* Subclass of tainted_size for reporting on tainted size values
+   passed to an external function annotated with attribute "access".  */
+
+class tainted_access_attrib_size : public tainted_size
+{
+public:
+  tainted_access_attrib_size (const taint_state_machine &sm, tree arg,
+			      enum bounds has_bounds, tree callee_fndecl,
+			      unsigned size_argno, const char *access_str)
+  : tainted_size (sm, arg, has_bounds),
+    m_callee_fndecl (callee_fndecl),
+    m_size_argno (size_argno), m_access_str (access_str)
+  {
+  }
+
+  const char *get_kind () const OVERRIDE
+  {
+    return "tainted_access_attrib_size";
+  }
+
+  bool emit (rich_location *rich_loc) FINAL OVERRIDE
+  {
+    bool warned = tainted_size::emit (rich_loc);
+    if (warned)
+      {
+	inform (DECL_SOURCE_LOCATION (m_callee_fndecl),
+		"parameter %i of %qD marked as a size via attribute %qs",
+		m_size_argno + 1, m_callee_fndecl, m_access_str);
+      }
+    return warned;
+  }
 
 private:
-  enum access_direction m_dir;
+  tree m_callee_fndecl;
+  unsigned m_size_argno;
+  const char *m_access_str;
 };
 
 /* Concrete taint_diagnostic subclass for reporting attacker-controlled
@@ -679,6 +721,10 @@  taint_state_machine::on_stmt (sm_context *sm_ctxt,
 				      m_start, m_tainted);
 	    return true;
 	  }
+
+	/* External function with "access" attribute. */
+	if (sm_ctxt->unknown_side_effects_p ())
+	  check_for_tainted_size_arg (sm_ctxt, node, call, callee_fndecl);
       }
   // TODO: ...etc; many other sources of untrusted data
 
@@ -826,6 +872,58 @@  taint_state_machine::combine_states (state_t s0, state_t s1) const
   gcc_unreachable ();
 }
 
+/* Check for calls to external functions marked with
+   __attribute__((access)) with a size-index: complain about
+   tainted values passed as a size to such a function.  */
+
+void
+taint_state_machine::check_for_tainted_size_arg (sm_context *sm_ctxt,
+						 const supernode *node,
+						 const gcall *call,
+						 tree callee_fndecl) const
+{
+  tree fntype = TREE_TYPE (callee_fndecl);
+  if (!fntype)
+    return;
+
+  if (!TYPE_ATTRIBUTES (fntype))
+    return;
+
+  /* Initialize a map of attribute access specifications for arguments
+     to the function function call.  */
+  rdwr_map rdwr_idx;
+  init_attr_rdwr_indices (&rdwr_idx, TYPE_ATTRIBUTES (fntype));
+
+  unsigned argno = 0;
+
+  for (tree iter = TYPE_ARG_TYPES (fntype); iter;
+       iter = TREE_CHAIN (iter), ++argno)
+    {
+      const attr_access* access = rdwr_idx.get (argno);
+      if (!access)
+	continue;
+
+      if (access->sizarg == UINT_MAX)
+	continue;
+
+      tree size_arg = gimple_call_arg (call, access->sizarg);
+
+      state_t state = sm_ctxt->get_state (call, size_arg);
+      enum bounds b;
+      if (get_taint (state, TREE_TYPE (size_arg), &b))
+	{
+	  const char* const access_str =
+	    TREE_STRING_POINTER (access->to_external_string ());
+	  tree diag_size = sm_ctxt->get_diagnostic_tree (size_arg);
+	  sm_ctxt->warn (node, call, size_arg,
+			 new tainted_access_attrib_size (*this, diag_size, b,
+							 callee_fndecl,
+							 access->sizarg,
+							 access_str));
+	}
+    }
+}
+
 } // anonymous namespace
 
 /* Internal interface to this file. */
@@ -841,7 +939,7 @@  make_taint_state_machine (logger *logger)
 
 void
 region_model::check_region_for_taint (const region *reg,
-				      enum access_direction dir,
+				      enum access_direction,
 				      region_model_context *ctxt) const
 {
   gcc_assert (reg);
@@ -931,7 +1029,7 @@  region_model::check_region_for_taint (const region *reg,
 	    if (taint_sm.get_taint (state, size_sval->get_type (), &b))
 	      {
 		tree arg = get_representative_tree (size_sval);
-		ctxt->warn (new tainted_size (taint_sm, arg, b, dir));
+		ctxt->warn (new tainted_size (taint_sm, arg, b));
 	      }
 	  }
 	  break;
diff --git a/gcc/analyzer/sm.h b/gcc/analyzer/sm.h
index 4ffde8e1c48..fccfc882bf1 100644
--- a/gcc/analyzer/sm.h
+++ b/gcc/analyzer/sm.h
@@ -271,6 +271,9 @@  public:
     return NULL;
   }
 
+  /* Are we handling an external function with unknown side effects?  */
+  virtual bool unknown_side_effects_p () const { return false; }
+
 protected:
   sm_context (int sm_idx, const state_machine &sm)
   : m_sm_idx (sm_idx), m_sm (sm) {}
diff --git a/gcc/testsuite/gcc.dg/analyzer/taint-size-access-attr-1.c b/gcc/testsuite/gcc.dg/analyzer/taint-size-access-attr-1.c
new file mode 100644
index 00000000000..724679a8cf3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/taint-size-access-attr-1.c
@@ -0,0 +1,63 @@ 
+/* Passing tainted sizes to external functions with attribute ((access)) with
+   a size-index.  */
+
+// TODO: remove need for this option:
+/* { dg-additional-options "-fanalyzer-checker=taint" } */
+
+#include "analyzer-decls.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct foo
+{
+  size_t sz;
+};
+
+char buf[100];
+
+extern void extern_fn_read_only (void *p, size_t sz) /* { dg-message "parameter 2 of 'extern_fn_read_only' marked as a size via attribute 'access \\(read_only, 1, 2\\)'" } */
+  __attribute__ ((access (read_only, 1, 2)));
+
+void test_fn_read_only (FILE *f, void *p)
+{
+  struct foo tmp;
+  if (1 == fread(&tmp, sizeof(tmp), 1, f)) { /* { dg-message "\\(\[0-9\]+\\) 'tmp' gets an unchecked value here" "event: tmp gets unchecked value" { xfail *-*-* } } */
+                                             /* { dg-message "\\(\[0-9\]+\\) following 'true' branch\\.\\.\\." "event: following true branch" { target *-*-* } .-1 } */
+    __analyzer_dump_state ("taint", tmp.sz); /* { dg-warning "state: 'tainted'" } */
+    /* { dg-message "\\(\[0-9\]+\\) \\.\\.\\.to here" "event: to here" { target *-*-* } .-1 } */
+
+    extern_fn_read_only (p, tmp.sz); /* { dg-warning "use of attacker-controlled value 'tmp.sz' as size without upper-bounds checking" } */
+  }
+}
+
+/* We shouldn't complain if the value has been sanitized.  */
+
+void test_fn_sanitized (FILE *f, void *p)
+{
+  struct foo tmp;
+  if (1 == fread(&tmp, sizeof(tmp), 1, f)) {
+    __analyzer_dump_state ("taint", tmp.sz); /* { dg-warning "state: 'tainted'" } */
+
+    if (tmp.sz > 100)
+      return;
+
+    __analyzer_dump_state ("taint", tmp.sz); /* { dg-warning "state: 'has_ub'" } */
+    
+    extern_fn_read_only (p, tmp.sz); /* { dg-bogus "use of attacker-controlled value" } */
+  }
+}
+
+/* We shouldn't complain if there was no size annotation.  */
+
+extern void extern_fn_no_size (void *p)
+  __attribute__ ((access (read_only, 1)));
+
+void test_fn_no_size (FILE *f, void *p)
+{
+  struct foo tmp;
+  if (1 == fread(&tmp, sizeof(tmp), 1, f)) {
+    __analyzer_dump_state ("taint", tmp.sz); /* { dg-warning "state: 'tainted'" } */
+    extern_fn_no_size (p);
+  }
+}