[pushed] analyzer: fix deref-before-check false +ves seen in haproxy [PR108475, PR109060]

Message ID 20230310133232.3165688-1-dmalcolm@redhat.com
State Committed
Commit c4fd232f9843bb800548a906653aeb0723cdb411
Headers
Series [pushed] analyzer: fix deref-before-check false +ves seen in haproxy [PR108475, PR109060] |

Commit Message

David Malcolm March 10, 2023, 1:32 p.m. UTC
  Integration testing showed various false positives from
-Wanalyzer-deref-before-check where the expression that's dereferenced
is different from the one that's checked, but the diagnostic is emitted
because they both evaluate to the same symbolic value.

This patch rejects such warnings, unless we have tree expressions for
both and that both tree expressions are "spelled the same way" i.e.
would be printed to the same user-facing string.

The patch has this effect on my integration tests of -fanalyzer:

  Comparison: 
    GOOD: 129        (19.20% -> 19.40%)
     BAD: 543 -> 536 (-7)

where the only affected warning is:

  -Wanalyzer-deref-before-check:
    GOOD:   1        (5.00% -> 8.33%)
     BAD:  19 ->  11 (-8)

   Known false positives: 15 -> 10 (-5)
      coreutils-9.1: 1 -> 0 (-1)
      haproxy-2.7.1: 4 -> 0 (-4)
   Suspected false positives: 4 -> 1 (-3)
      haproxy-2.7.1: 2 -> 0 (-2)
         qemu-7.2.0: 1 -> 0 (-1)

whilst retaining the known true positive.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-6581-gc4fd232f9843bb.

gcc/analyzer/ChangeLog:
	PR analyzer/108475
	PR analyzer/109060
	* sm-malloc.cc (deref_before_check::deref_before_check):
	Initialize new field m_deref_expr.  Assert that arg is non-NULL.
	(deref_before_check::emit): Reject cases where the spelling of the
	thing that was dereferenced differs from that of what is checked,
	or if the dereference expression was not found.  Remove code to
	handle NULL m_arg.
	(deref_before_check::describe_state_change): Remove code to handle
	NULL m_arg.
	(deref_before_check::describe_final_event): Likewise.
	(deref_before_check::sufficiently_similar_p): New.
	(deref_before_check::m_deref_expr): New field.
	(malloc_state_machine::maybe_complain_about_deref_before_check):
	Don't warn if the diag_ptr is NULL.

gcc/testsuite/ChangeLog:
	PR analyzer/108475
	PR analyzer/109060
	* gcc.dg/analyzer/deref-before-check-pr108475-1.c: New test.
	* gcc.dg/analyzer/deref-before-check-pr108475-haproxy-tcpcheck.c:
	New test.
	* gcc.dg/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c:
	New test.

Signed-off-by: David Malcolm <dmalcolm@redhat.com>
---
 gcc/analyzer/sm-malloc.cc                     |  81 +++++----
 .../analyzer/deref-before-check-pr108475-1.c  |  51 ++++++
 ...f-before-check-pr108475-haproxy-tcpcheck.c | 169 ++++++++++++++++++
 ...f-before-check-pr109060-haproxy-cfgparse.c |  92 ++++++++++
 4 files changed, 356 insertions(+), 37 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108475-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108475-haproxy-tcpcheck.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c
  

Patch

diff --git a/gcc/analyzer/sm-malloc.cc b/gcc/analyzer/sm-malloc.cc
index 1ea9b30fa13..16883d301d5 100644
--- a/gcc/analyzer/sm-malloc.cc
+++ b/gcc/analyzer/sm-malloc.cc
@@ -1498,8 +1498,11 @@  public:
   deref_before_check (const malloc_state_machine &sm, tree arg)
   : malloc_diagnostic (sm, arg),
     m_deref_enode (NULL),
+    m_deref_expr (NULL),
     m_check_enode (NULL)
-  {}
+  {
+    gcc_assert (arg);
+  }
 
   const char *get_kind () const final override { return "deref_before_check"; }
 
@@ -1560,6 +1563,15 @@  public:
     if (linemap_location_from_macro_definition_p (line_table, check_loc))
       return false;
 
+    /* Reject if m_deref_expr is sufficiently different from m_arg
+       for cases where the dereference is spelled differently from
+       the check, which is probably two different ways to get the
+       same svalue, and thus not worth reporting.  */
+    if (!m_deref_expr)
+      return false;
+    if (!sufficiently_similar_p (m_deref_expr, m_arg))
+      return false;
+
     /* Reject the warning if the deref's BB doesn't dominate that
        of the check, so that we don't warn e.g. for shared cleanup
        code that checks a pointer for NULL, when that code is sometimes
@@ -1572,15 +1584,10 @@  public:
 			 m_deref_enode->get_supernode ()->m_bb))
       return false;
 
-    if (m_arg)
-      return warning_at (rich_loc, get_controlling_option (),
-			 "check of %qE for NULL after already"
-			 " dereferencing it",
-			 m_arg);
-    else
-      return warning_at (rich_loc, get_controlling_option (),
-			 "check of pointer for NULL after already"
-			 " dereferencing it");
+    return warning_at (rich_loc, get_controlling_option (),
+		       "check of %qE for NULL after already"
+		       " dereferencing it",
+		       m_arg);
   }
 
   label_text describe_state_change (const evdesc::state_change &change)
@@ -1591,11 +1598,9 @@  public:
       {
 	m_first_deref_event = change.m_event_id;
 	m_deref_enode = change.m_event.get_exploded_node ();
-	if (m_arg)
-	  return change.formatted_print ("pointer %qE is dereferenced here",
-					 m_arg);
-	else
-	  return label_text::borrow ("pointer is dereferenced here");
+	m_deref_expr = change.m_expr;
+	return change.formatted_print ("pointer %qE is dereferenced here",
+				       m_arg);
       }
     return malloc_diagnostic::describe_state_change (change);
   }
@@ -1604,31 +1609,32 @@  public:
   {
     m_check_enode = ev.m_event.get_exploded_node ();
     if (m_first_deref_event.known_p ())
-      {
-	if (m_arg)
-	  return ev.formatted_print ("pointer %qE is checked for NULL here but"
-				     " it was already dereferenced at %@",
-				     m_arg, &m_first_deref_event);
-	else
-	  return ev.formatted_print ("pointer is checked for NULL here but"
-				     " it was already dereferenced at %@",
-				     &m_first_deref_event);
-      }
+      return ev.formatted_print ("pointer %qE is checked for NULL here but"
+				 " it was already dereferenced at %@",
+				 m_arg, &m_first_deref_event);
     else
-      {
-	if (m_arg)
-	  return ev.formatted_print ("pointer %qE is checked for NULL here but"
-				     " it was already dereferenced",
-				     m_arg);
-	else
-	  return ev.formatted_print ("pointer is checked for NULL here but"
-				     " it was already dereferenced");
-      }
+      return ev.formatted_print ("pointer %qE is checked for NULL here but"
+				 " it was already dereferenced",
+				 m_arg);
   }
 
 private:
+  static bool sufficiently_similar_p (tree expr_a, tree expr_b)
+  {
+    pretty_printer *pp_a = global_dc->printer->clone ();
+    pretty_printer *pp_b = global_dc->printer->clone ();
+    pp_printf (pp_a, "%qE", expr_a);
+    pp_printf (pp_b, "%qE", expr_b);
+    bool result = (strcmp (pp_formatted_text (pp_a), pp_formatted_text (pp_b))
+		   == 0);
+    delete pp_a;
+    delete pp_b;
+    return result;
+  }
+
   diagnostic_event_id_t m_first_deref_event;
   const exploded_node *m_deref_enode;
+  tree m_deref_expr;
   const exploded_node *m_check_enode;
 };
 
@@ -2141,9 +2147,10 @@  maybe_complain_about_deref_before_check (sm_context *sm_ctxt,
     return;
 
   tree diag_ptr = sm_ctxt->get_diagnostic_tree (ptr);
-  sm_ctxt->warn
-    (node, stmt, ptr,
-     make_unique<deref_before_check> (*this, diag_ptr));
+  if (diag_ptr)
+    sm_ctxt->warn
+      (node, stmt, ptr,
+       make_unique<deref_before_check> (*this, diag_ptr));
   sm_ctxt->set_next_state (stmt, ptr, m_stop);
 }
 
diff --git a/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108475-1.c b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108475-1.c
new file mode 100644
index 00000000000..fa3beaaa15f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108475-1.c
@@ -0,0 +1,51 @@ 
+/* Reduced from haproxy-2.7.1: src/tcpcheck.c.  */
+
+#define NULL ((void *)0)
+
+int
+test_1 (char **args, int cur_arg)
+{
+  char *p = NULL;
+
+  if (*args[cur_arg]) {
+    p = args[cur_arg];
+  }
+
+  if (p) { /* { dg-bogus "check of 'p' for NULL after already dereferencing it" } */
+    return 1;
+  }
+  return 0;
+}
+
+int
+test_2 (char **args, int cur_arg)
+{
+  char *p = NULL;
+  char *q = NULL;
+
+  if (*args[cur_arg]) {
+    if (*args[cur_arg + 1]) {
+      p = args[cur_arg];
+    } else {
+      q = args[cur_arg];
+    }      
+  }
+
+  if (p) { /* { dg-bogus "check of 'p' for NULL after already dereferencing it" } */
+    return 1;
+  }
+  if (q) { /* { dg-bogus "check of 'q' for NULL after already dereferencing it" } */
+    return 2;
+  }
+  return 0;
+}
+
+int test_3 (void **pp, int flag)
+{
+  void *p = NULL;
+  if (*pp && flag)
+    p = pp;
+  if (p) /* { dg-bogus "check of 'p' for NULL after already dereferencing it" } */
+    return 1;
+  return 0;    
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108475-haproxy-tcpcheck.c b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108475-haproxy-tcpcheck.c
new file mode 100644
index 00000000000..1180e17e555
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr108475-haproxy-tcpcheck.c
@@ -0,0 +1,169 @@ 
+/* Reduced from haproxy-2.7.1: src/tcpcheck.c.  */
+
+/* { dg-additional-options "-Wno-analyzer-too-complex" } */
+
+typedef __SIZE_TYPE__ size_t;
+#define NULL ((void *)0)
+
+extern void *calloc(size_t __nmemb, size_t __size)
+    __attribute__((__nothrow__, __leaf__)) __attribute__((__malloc__))
+    __attribute__((__alloc_size__(1, 2)));
+extern char *strdup(const char *__s) __attribute__((__nothrow__, __leaf__))
+__attribute__((__malloc__)) __attribute__((__nonnull__(1)));
+extern char *strstr(const char *__haystack, const char *__needle)
+    __attribute__((__nothrow__, __leaf__)) __attribute__((__pure__))
+    __attribute__((__nonnull__(1, 2)));
+extern size_t strlen(const char *__s) __attribute__((__nothrow__, __leaf__))
+__attribute__((__pure__)) __attribute__((__nonnull__(1)));
+struct list {
+  struct list *n;
+  struct list *p;
+};
+struct buffer {
+  size_t size;
+  char *area;
+  size_t data;
+  size_t head;
+};
+struct proxy;
+struct ist {
+  char *ptr;
+  size_t len;
+};
+static inline int isttest(const struct ist ist) { return ist.ptr != NULL; }
+
+enum http_meth_t {
+  HTTP_METH_OPTIONS,
+  /* [...snip...] */
+} __attribute__((packed));
+
+struct http_meth {
+  enum http_meth_t meth;
+  struct buffer str;
+};
+enum tcpcheck_send_type {
+  /* [...snip...] */
+  TCPCHK_SEND_HTTP,
+};
+enum tcpcheck_rule_type {
+  TCPCHK_ACT_SEND = 0,
+  /* [...snip...] */
+};
+struct tcpcheck_http_hdr {
+  struct ist name;
+  struct list value;
+  struct list list;
+};
+struct tcpcheck_send {
+  enum tcpcheck_send_type type;
+  union {
+    /* [...snip...] */
+    struct {
+      unsigned int flags;
+      struct http_meth meth;
+      union {
+        struct ist uri;
+        /* [...snip...] */
+      };
+      struct ist vsn;
+      struct list hdrs;
+      /* [...snip...] */
+    } http;
+  };
+};
+struct tcpcheck_rule {
+  /* [...snip...] */
+  enum tcpcheck_rule_type action;
+  /* [...snip...] */
+  union {
+    /* [...snip...] */
+    struct tcpcheck_send send;
+    /* [...snip...] */
+  };
+};
+enum http_meth_t find_http_meth(const char *str, const int len);
+void free_tcpcheck(struct tcpcheck_rule *rule, int in_pool);
+void free_tcpcheck_http_hdr(struct tcpcheck_http_hdr *hdr);
+
+#define ist(str) ({                                                    \
+	char *__x = (void *)(str);                                     \
+	(struct ist){                                                  \
+		.ptr = __x,                                            \
+		.len = __builtin_constant_p(str) ?                     \
+			((void *)str == (void *)0) ? 0 :               \
+			__builtin_strlen(__x) :                        \
+			({                                             \
+				size_t __l = 0;                        \
+				if (__x) for (__l--; __x[++__l]; ) ;   \
+				__l;                                   \
+			})                                             \
+	};                                                             \
+})
+
+struct tcpcheck_rule *proxy_parse_httpchk_req(char **args, int cur_arg,
+                                              struct proxy *px, char **errmsg) {
+  struct tcpcheck_rule *chk = NULL;
+  struct tcpcheck_http_hdr *hdr = NULL;
+  char *meth = NULL, *uri = NULL, *vsn = NULL;
+  char *hdrs, *body;
+
+  hdrs = (*args[cur_arg + 2] ? strstr(args[cur_arg + 2], "\r\n") : NULL);
+  body = (*args[cur_arg + 2] ? strstr(args[cur_arg + 2], "\r\n\r\n") : NULL);
+  if (hdrs || body) {
+    /* [...snip...] */
+    goto error;
+  }
+
+  chk = calloc(1, sizeof(*chk));
+  if (!chk) {
+    /* [...snip...] */
+    goto error;
+  }
+  chk->action = TCPCHK_ACT_SEND;
+  chk->send.type = TCPCHK_SEND_HTTP;
+  chk->send.http.flags |= 0x0004;
+  chk->send.http.meth.meth = HTTP_METH_OPTIONS;
+  ((&chk->send.http.hdrs)->n = (&chk->send.http.hdrs)->p =
+       (&chk->send.http.hdrs));
+
+  if (*args[cur_arg]) {
+    if (!*args[cur_arg + 1])
+      uri = args[cur_arg];
+    else
+      meth = args[cur_arg];
+  }
+  if (*args[cur_arg + 1])
+    uri = args[cur_arg + 1];
+  if (*args[cur_arg + 2])
+    vsn = args[cur_arg + 2];
+
+  if (meth) { /* { dg-bogus "check of 'meth' for NULL after already dereferencing it" } */
+    chk->send.http.meth.meth = find_http_meth(meth, strlen(meth));
+    chk->send.http.meth.str.area = strdup(meth);
+    chk->send.http.meth.str.data = strlen(meth);
+    if (!chk->send.http.meth.str.area) {
+      /* [...snip...] */
+      goto error;
+    }
+  }
+  if (uri) {
+    chk->send.http.uri = ist(strdup(uri));
+    if (!isttest(chk->send.http.uri)) {
+      /* [...snip...] */
+      goto error;
+    }
+  }
+  if (vsn) { /* { dg-bogus "check of 'vsn' for NULL after already dereferencing it" } */
+    chk->send.http.vsn = ist(strdup(vsn));
+    if (!isttest(chk->send.http.vsn)) {
+      /* [...snip...] */
+      goto error;
+    }
+  }
+  return chk;
+
+error:
+  free_tcpcheck_http_hdr(hdr);
+  free_tcpcheck(chk, 0);
+  return NULL;
+}
diff --git a/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c
new file mode 100644
index 00000000000..4f50882eb8a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-pr109060-haproxy-cfgparse.c
@@ -0,0 +1,92 @@ 
+/* Reduced from haproxy-2.7.1's cfgparse.c.  */
+
+typedef __SIZE_TYPE__ size_t;
+
+extern int
+strcmp(const char* __s1, const char* __s2)
+  __attribute__((__nothrow__, __leaf__)) __attribute__((__pure__))
+  __attribute__((__nonnull__(1, 2)));
+
+extern int
+strncmp(const char* __s1, const char* __s2, size_t __n)
+  __attribute__((__nothrow__, __leaf__)) __attribute__((__pure__))
+  __attribute__((__nonnull__(1, 2)));
+
+enum
+{
+ /* [...snip...] */
+  _ISdigit = ((3) < 8 ? ((1 << (3)) << 8) : ((1 << (3)) >> 8)),
+ /* [...snip...] */
+};
+
+extern const unsigned short int**
+__ctype_b_loc(void) __attribute__((__nothrow__, __leaf__))
+  __attribute__((__const__));
+
+unsigned int str2uic(const char* s);
+
+char*
+memprintf(char** out, const char* format, ...)
+  __attribute__((format(printf, 2, 3)));
+
+int
+parse_process_number(const char* arg,
+                     unsigned long* proc,
+                     int max,
+                     int* autoinc,
+                     char** err)
+{
+  if (autoinc) {
+    *autoinc = 0;
+    if (strncmp(arg, "auto:", 5) == 0) {
+      arg += 5;
+      *autoinc = 1;
+    }
+  }
+
+  if (strcmp(arg, "all") == 0) /* { dg-bogus "pointer 'dash' is dereferenced here" } */
+    *proc |= ~0UL;
+  else if (strcmp(arg, "odd") == 0)
+    *proc |= ~0UL / 3UL;
+  else if (strcmp(arg, "even") == 0)
+    *proc |= (~0UL / 3UL) << 1;
+  else {
+    const char *p, *dash = ((void*)0);
+    unsigned int low, high;
+
+    for (p = arg; *p; p++) {
+      if (*p == '-' && !dash) /* { dg-bogus "check of 'dash' for NULL after already dereferencing it" } */
+        dash = p;
+      else if (!((*__ctype_b_loc())[(int)(((unsigned char)*p))] &
+                 (unsigned short int)_ISdigit)) {
+        memprintf(err, "'%s' is not a valid number/range.", arg);
+        return -1;
+      }
+    }
+
+    low = high = str2uic(arg);
+    if (dash) /* { dg-bogus "check of 'dash' for NULL after already dereferencing it" } */
+      high = ((!*(dash + 1)) ? max : str2uic(dash + 1));
+
+    if (high < low) {
+      unsigned int swap = low;
+      low = high;
+      high = swap;
+    }
+
+    if (low < 1 || low > max || high > max) {
+      memprintf(err,
+                "'%s' is not a valid number/range."
+                " It supports numbers from 1 to %d.\n",
+                arg,
+                max);
+      return 1;
+    }
+
+    for (; low <= high; low++)
+      *proc |= 1UL << (low - 1);
+  }
+  *proc &= ~0UL >> (((unsigned int)sizeof(long) * 8) - max);
+
+  return 0;
+}