[4/4] resolv: Implement strict-error stub resolver option (bug 27929)

Message ID acc0609d6242a34436f4bea2fa499b3428d9f635.1718345824.git.fweimer@redhat.com (mailing list archive)
State Committed
Headers
Series Some DNS stub resolver fixes/enhancements |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed

Commit Message

Florian Weimer June 14, 2024, 6:21 a.m. UTC
  For now, do not enable this mode by default due to the potential
impact on compatibility with existing deployments.
---
 NEWS                                  | 10 +++++
 resolv/res_init.c                     |  1 +
 resolv/res_send.c                     | 45 ++++++++++++++++-------
 resolv/resolv.h                       |  1 +
 resolv/tst-resolv-res_init-skeleton.c | 10 +++++
 resolv/tst-resolv-semi-failure.c      | 53 ++++++++++++++++-----------
 6 files changed, 84 insertions(+), 36 deletions(-)
  

Comments

DJ Delorie June 20, 2024, 9:24 p.m. UTC | #1
Florian Weimer <fweimer@redhat.com> writes:
> For now, do not enable this mode by default due to the potential
> impact on compatibility with existing deployments.

> +* The DNS stub resolver now supports the strict-error option.  If
> +  activated, getaddrinfo for the AF_UNSPEC address family (with dual
> +  A/AAAA DNS lookups) attemps to obtain a A/AAAA response pair from

s/a/an/

> +  another DNS server if one of the responses indicates failure.  Without
> +  the strict-error option, getaddrinfo returns the A record data it has
> +  obtained even if the AAAA query failed.  The new strict error mode is
> +  incompatible with some DNS environments which do not follow the RFCs,
> +  which is why this mode is not enabled by default.  A future version
> +  of the library may turn it on by default, however.

Ok.

> diff --git a/resolv/res_init.c b/resolv/res_init.c
> +            { STRnLEN ("strict-error"), RES_STRICTERR },

Ok.

> diff --git a/resolv/res_send.c b/resolv/res_send.c

>  		if (thisansp_error) {
>  		next_ns:
> -			if (recvresp1 || (buf2 != NULL && recvresp2)) {
> -			  *resplen2 = 0;
> -			  return resplen;
> -			}

Moved later, ok.


> -			if (buf2 != NULL && !single_request)

Moved later, ok.

> +		        /* Outside of strict-error mode, use the first
> +			   response even if the second response is an
> +			   error.  This allows parallel resolution to
> +			   succeed even if the recursive resolver
> +			   always answers with SERVFAIL for AAAA
> +			   queries (which still happens in practice
> +			   unfortunately).
> +
> +			   In strict-error mode, always switch to the
> +			   next server and try to get a response from
> +			   there.  */
> +			if ((statp->options & RES_STRICTERR) == 0)

Ok.

>  			  {
> -			    /* No data from the first reply.  */
> -			    resplen = 0;
> -			    /* We are waiting for a possible second reply.  */
> -			    if (matching_query == 1)
> -			      recvresp1 = 1;
> -			    else
> -			      recvresp2 = 1;
> -
> -			    goto wait;

Moved later, ok.

> +			    if (recvresp1 || (buf2 != NULL && recvresp2))
> +			      {
> +				*resplen2 = 0;
> +				return resplen;
> +			      }
> +
> +			    if (buf2 != NULL && !single_request)
> +			      {
> +				/* No data from the first reply.  */
> +				resplen = 0;
> +				/* We are waiting for a possible
> +				   second reply.  */
> +				if (matching_query == 1)
> +				  recvresp1 = 1;
> +				else
> +				  recvresp2 = 1;
> +
> +				goto wait;
> +			      }
>  			  }

Ok.

> diff --git a/resolv/resolv.h b/resolv/resolv.h
> index f40d6c58ce..b8a0f66a5f 100644
> --- a/resolv/resolv.h
> +++ b/resolv/resolv.h
> @@ -133,6 +133,7 @@ struct res_sym {
>  #define RES_NORELOAD    0x02000000 /* No automatic configuration reload.  */
>  #define RES_TRUSTAD     0x04000000 /* Request AD bit, keep it in responses.  */
>  #define RES_NOAAAA      0x08000000 /* Suppress AAAA queries.  */
> +#define RES_STRICTERR   0x10000000 /* Report more DNS errors as errors.  */

Ok.

>          print_option_flag (fp, &options, RES_NOAAAA, "no-aaaa");
> +        print_option_flag (fp, &options, RES_STRICTERR, "strict-error");

Ok.

> @@ -741,6 +742,15 @@ struct test_case test_cases[] =
>       "nameserver 192.0.2.1\n"
>       "; nameserver[0]: [192.0.2.1]:53\n"
>      },
> +    {.name = "strict-error flag",
> +     .conf = "options strict-error\n"
> +             "nameserver 192.0.2.1\n",
> +     .expected = "options strict-error\n"
> +                 "search example.com\n"
> +                 "; search[0]: example.com\n"
> +                 "nameserver 192.0.2.1\n"
> +                 "; nameserver[0]: [192.0.2.1]:53\n"
> +    },

Ok.

> diff --git a/resolv/tst-resolv-semi-failure.c b/resolv/tst-resolv-semi-failure.c
>  
> +/* Set to 1 if strict error checking is enabled.  */
> +static int do_strict_error;

Ok.

>  static void
>  check_one (void)
>  {
> @@ -83,7 +86,10 @@ check_one (void)
>        struct addrinfo *ai;
>        int ret = getaddrinfo ("www.example", "80", &hints, &ai);
>        const char *expected;
> -      if (ret == 0 && ai->ai_next != NULL)
> +      /* In strict-error mode, a switch to the second name server
> +         happens, and both responses are received, so a single
> +         response is a bug.  */
> +      if (do_strict_error || (ret == 0 && ai->ai_next != NULL))
>          expected = ("address: STREAM/TCP 192.0.2.17 80\n"
>                      "address: STREAM/TCP 2001:db8::1 80\n");

Ok.

> @@ -99,33 +105,36 @@ check_one (void)
>  static int
>  do_test (void)
>  {
> +  for (do_strict_error = 0; do_strict_error < 2; ++do_strict_error)

Ok.

Everything else is mostly just indentation...

> +        if (do_strict_error)
> +          _res.options |= RES_STRICTERR;

Ok.

LGTM aside from that one tiny typo.
Reviewed-by: DJ Delorie <dj@redhat.com>
  

Patch

diff --git a/NEWS b/NEWS
index 495bbd5cbc..2149da63c4 100644
--- a/NEWS
+++ b/NEWS
@@ -42,6 +42,16 @@  Major new features:
   the RES_OPTIONS=-no-aaaa environment variable performs AAAA DNS
   queries when the glibc DNS stub resolver is used.
 
+* The DNS stub resolver now supports the strict-error option.  If
+  activated, getaddrinfo for the AF_UNSPEC address family (with dual
+  A/AAAA DNS lookups) attemps to obtain a A/AAAA response pair from
+  another DNS server if one of the responses indicates failure.  Without
+  the strict-error option, getaddrinfo returns the A record data it has
+  obtained even if the AAAA query failed.  The new strict error mode is
+  incompatible with some DNS environments which do not follow the RFCs,
+  which is why this mode is not enabled by default.  A future version
+  of the library may turn it on by default, however.
+
 Deprecated and removed features, and other changes affecting compatibility:
 
 * Architectures which use a 32-bit seconds-since-epoch field in struct
diff --git a/resolv/res_init.c b/resolv/res_init.c
index 243532b3ad..b838dc7064 100644
--- a/resolv/res_init.c
+++ b/resolv/res_init.c
@@ -695,6 +695,7 @@  res_setoptions (struct resolv_conf_parser *parser, const char *options)
             { STRnLEN ("use-vc"),  RES_USEVC },
             { STRnLEN ("trust-ad"), RES_TRUSTAD },
             { STRnLEN ("no-aaaa"), RES_NOAAAA },
+            { STRnLEN ("strict-error"), RES_STRICTERR },
           };
 #define noptions (sizeof (options) / sizeof (options[0]))
           bool negate_option = *cp == '-';
diff --git a/resolv/res_send.c b/resolv/res_send.c
index 9c77613f37..9a284ed44a 100644
--- a/resolv/res_send.c
+++ b/resolv/res_send.c
@@ -1234,21 +1234,38 @@  send_dg(res_state statp,
 
 		if (thisansp_error) {
 		next_ns:
-			if (recvresp1 || (buf2 != NULL && recvresp2)) {
-			  *resplen2 = 0;
-			  return resplen;
-			}
-			if (buf2 != NULL && !single_request)
+		        /* Outside of strict-error mode, use the first
+			   response even if the second response is an
+			   error.  This allows parallel resolution to
+			   succeed even if the recursive resolver
+			   always answers with SERVFAIL for AAAA
+			   queries (which still happens in practice
+			   unfortunately).
+
+			   In strict-error mode, always switch to the
+			   next server and try to get a response from
+			   there.  */
+			if ((statp->options & RES_STRICTERR) == 0)
 			  {
-			    /* No data from the first reply.  */
-			    resplen = 0;
-			    /* We are waiting for a possible second reply.  */
-			    if (matching_query == 1)
-			      recvresp1 = 1;
-			    else
-			      recvresp2 = 1;
-
-			    goto wait;
+			    if (recvresp1 || (buf2 != NULL && recvresp2))
+			      {
+				*resplen2 = 0;
+				return resplen;
+			      }
+
+			    if (buf2 != NULL && !single_request)
+			      {
+				/* No data from the first reply.  */
+				resplen = 0;
+				/* We are waiting for a possible
+				   second reply.  */
+				if (matching_query == 1)
+				  recvresp1 = 1;
+				else
+				  recvresp2 = 1;
+
+				goto wait;
+			      }
 			  }
 
 			/* don't retry if called from dig */
diff --git a/resolv/resolv.h b/resolv/resolv.h
index f40d6c58ce..b8a0f66a5f 100644
--- a/resolv/resolv.h
+++ b/resolv/resolv.h
@@ -133,6 +133,7 @@  struct res_sym {
 #define RES_NORELOAD    0x02000000 /* No automatic configuration reload.  */
 #define RES_TRUSTAD     0x04000000 /* Request AD bit, keep it in responses.  */
 #define RES_NOAAAA      0x08000000 /* Suppress AAAA queries.  */
+#define RES_STRICTERR   0x10000000 /* Report more DNS errors as errors.  */
 
 #define RES_DEFAULT	(RES_RECURSE|RES_DEFNAMES|RES_DNSRCH)
 
diff --git a/resolv/tst-resolv-res_init-skeleton.c b/resolv/tst-resolv-res_init-skeleton.c
index d3a19eb305..e41bcebd9d 100644
--- a/resolv/tst-resolv-res_init-skeleton.c
+++ b/resolv/tst-resolv-res_init-skeleton.c
@@ -129,6 +129,7 @@  print_resp (FILE *fp, res_state resp)
         print_option_flag (fp, &options, RES_NORELOAD, "no-reload");
         print_option_flag (fp, &options, RES_TRUSTAD, "trust-ad");
         print_option_flag (fp, &options, RES_NOAAAA, "no-aaaa");
+        print_option_flag (fp, &options, RES_STRICTERR, "strict-error");
         fputc ('\n', fp);
         if (options != 0)
           fprintf (fp, "; error: unresolved option bits: 0x%x\n", options);
@@ -741,6 +742,15 @@  struct test_case test_cases[] =
      "nameserver 192.0.2.1\n"
      "; nameserver[0]: [192.0.2.1]:53\n"
     },
+    {.name = "strict-error flag",
+     .conf = "options strict-error\n"
+     "nameserver 192.0.2.1\n",
+     .expected = "options strict-error\n"
+     "search example.com\n"
+     "; search[0]: example.com\n"
+     "nameserver 192.0.2.1\n"
+     "; nameserver[0]: [192.0.2.1]:53\n"
+    },
     { NULL }
   };
 
diff --git a/resolv/tst-resolv-semi-failure.c b/resolv/tst-resolv-semi-failure.c
index aa9798b5a7..b7681210f4 100644
--- a/resolv/tst-resolv-semi-failure.c
+++ b/resolv/tst-resolv-semi-failure.c
@@ -67,6 +67,9 @@  response (const struct resolv_response_context *ctx,
   resolv_response_close_record (b);
 }
 
+/* Set to 1 if strict error checking is enabled.  */
+static int do_strict_error;
+
 static void
 check_one (void)
 {
@@ -83,7 +86,10 @@  check_one (void)
       struct addrinfo *ai;
       int ret = getaddrinfo ("www.example", "80", &hints, &ai);
       const char *expected;
-      if (ret == 0 && ai->ai_next != NULL)
+      /* In strict-error mode, a switch to the second name server
+         happens, and both responses are received, so a single
+         response is a bug.  */
+      if (do_strict_error || (ret == 0 && ai->ai_next != NULL))
         expected = ("address: STREAM/TCP 192.0.2.17 80\n"
                     "address: STREAM/TCP 2001:db8::1 80\n");
       else
@@ -99,33 +105,36 @@  check_one (void)
 static int
 do_test (void)
 {
-  for (int do_single_lookup = 0; do_single_lookup < 2; ++do_single_lookup)
-    {
-      struct resolv_test *aux = resolv_test_start
-        ((struct resolv_redirect_config)
-         {
-           .response_callback = response,
-         });
+  for (do_strict_error = 0; do_strict_error < 2; ++do_strict_error)
+    for (int do_single_lookup = 0; do_single_lookup < 2; ++do_single_lookup)
+      {
+        struct resolv_test *aux = resolv_test_start
+          ((struct resolv_redirect_config)
+           {
+             .response_callback = response,
+           });
 
-      if (do_single_lookup)
-        _res.options |= RES_SNGLKUP;
+        if (do_strict_error)
+          _res.options |= RES_STRICTERR;
+        if (do_single_lookup)
+          _res.options |= RES_SNGLKUP;
 
-      for (int do_fail_aaaa = 0; do_fail_aaaa < 2; ++do_fail_aaaa)
-        {
-          fail_aaaa = do_fail_aaaa;
+        for (int do_fail_aaaa = 0; do_fail_aaaa < 2; ++do_fail_aaaa)
+          {
+            fail_aaaa = do_fail_aaaa;
 
-          rcode = 2; /* SERVFAIL.  */
-          check_one ();
+            rcode = 2; /* SERVFAIL.  */
+            check_one ();
 
-          rcode = 4; /* NOTIMP.  */
-          check_one ();
+            rcode = 4; /* NOTIMP.  */
+            check_one ();
 
-          rcode = 5; /* REFUSED.  */
-          check_one ();
-        }
+            rcode = 5; /* REFUSED.  */
+            check_one ();
+          }
 
-      resolv_test_end (aux);
-    }
+        resolv_test_end (aux);
+      }
 
   return 0;
 }