Patchwork login: pututline needs to validate the cache under write lock [BZ #24882]

login
register
mail settings
Submitter Florian Weimer
Date Aug. 12, 2019, 1:48 p.m.
Message ID <87o90uo5ok.fsf@oldenburg2.str.redhat.com>
Download mbox | patch
Permalink /patch/34059/
State New
Headers show

Comments

Florian Weimer - Aug. 12, 2019, 1:48 p.m.
2019-08-12  Florian Weimer  <fweimer@redhat.com>

	[BZ #24882]
	* login/utmp_file.c (file_locking_failed): Report EAGAIN for a
	failure to acquire the lock.
	(internal_getut_nolock): New function.
	Extracted from internal_getut_r.
	(internal_getut_r): Call it.
	(__libc_pututline): Acquire write lock before all read operations.
Florian Weimer - Aug. 21, 2019, 1:20 p.m.
* Florian Weimer:

> 2019-08-12  Florian Weimer  <fweimer@redhat.com>
>
> 	[BZ #24882]
> 	* login/utmp_file.c (file_locking_failed): Report EAGAIN for a
> 	failure to acquire the lock.
> 	(internal_getut_nolock): New function.
> 	Extracted from internal_getut_r.
> 	(internal_getut_r): Call it.
> 	(__libc_pututline): Acquire write lock before all read operations.

It has been pointed out that this causes too much overhead with high
load.  Scanning entries using the read lock only is apparently important
for performance.

Thanks,
Florian
Adhemerval Zanella Netto - Aug. 21, 2019, 7:49 p.m.
On 21/08/2019 10:20, Florian Weimer wrote:
> * Florian Weimer:
> 
>> 2019-08-12  Florian Weimer  <fweimer@redhat.com>
>>
>> 	[BZ #24882]
>> 	* login/utmp_file.c (file_locking_failed): Report EAGAIN for a
>> 	failure to acquire the lock.
>> 	(internal_getut_nolock): New function.
>> 	Extracted from internal_getut_r.
>> 	(internal_getut_r): Call it.
>> 	(__libc_pututline): Acquire write lock before all read operations.
> 
> It has been pointed out that this causes too much overhead with high
> load.  Scanning entries using the read lock only is apparently important
> for performance.

So do you intend to resend a new version or this extra overhead is required
to fully fix the issue?
Florian Weimer - Aug. 21, 2019, 8:16 p.m.
* Adhemerval Zanella:

> On 21/08/2019 10:20, Florian Weimer wrote:
>> * Florian Weimer:
>> 
>>> 2019-08-12  Florian Weimer  <fweimer@redhat.com>
>>>
>>> 	[BZ #24882]
>>> 	* login/utmp_file.c (file_locking_failed): Report EAGAIN for a
>>> 	failure to acquire the lock.
>>> 	(internal_getut_nolock): New function.
>>> 	Extracted from internal_getut_r.
>>> 	(internal_getut_r): Call it.
>>> 	(__libc_pututline): Acquire write lock before all read operations.
>> 
>> It has been pointed out that this causes too much overhead with high
>> load.  Scanning entries using the read lock only is apparently important
>> for performance.
>
> So do you intend to resend a new version or this extra overhead is required
> to fully fix the issue?

I want to fix bug 24902 first:

  <https://sourceware.org/ml/libc-alpha/2019-08/msg00562.html>

This is probably the more relevant sources of stale cache entries, due
to the internal desynchronization of the cache contents and the file
offset.

I don't think it's expected that entries are repurposed during regular
use on utmp/utmpx files, particularly not with the glibc interfaces
(although one can always write to the files directly).

Thanks,
Florian

Patch

diff --git a/login/Makefile b/login/Makefile
index 92535f0aec..fab1eed127 100644
--- a/login/Makefile
+++ b/login/Makefile
@@ -43,7 +43,8 @@  endif
 subdir-dirs = programs
 vpath %.c programs
 
-tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin
+tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin \
+  tst-pututxline-cache
 
 # Build the -lutil library with these extra functions.
 extra-libs      := libutil
@@ -71,3 +72,5 @@  endif
 $(inst_libexecdir)/pt_chown: $(objpfx)pt_chown $(+force)
 	$(make-target-directory)
 	-$(INSTALL_PROGRAM) -m 4755 -o root $< $@
+
+$(objpfx)tst-pututxline-cache: $(shared-thread-library)
diff --git a/login/tst-pututxline-cache.c b/login/tst-pututxline-cache.c
new file mode 100644
index 0000000000..f66ee17c54
--- /dev/null
+++ b/login/tst-pututxline-cache.c
@@ -0,0 +1,184 @@ 
+/* Test case for cache invalidation after concurrent write (bug 24882).
+   Copyright (C) 2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/namespace.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/xthread.h>
+#include <support/xunistd.h>
+#include <utmp.h>
+#include <utmpx.h>
+
+/* Set to the path of the utmp file.  */
+static char *utmp_file;
+
+/* Used to synchronize the subprocesses.  The barrier itself is
+   allocated in shared memory.  */
+static pthread_barrier_t *barrier;
+
+/* setutxent with error checking.  */
+static void
+xsetutxent (void)
+{
+  errno = 0;
+  setutxent ();
+  TEST_COMPARE (errno, 0);
+}
+
+/* getutxent with error checking.  */
+static struct utmpx *
+xgetutxent (void)
+{
+  errno = 0;
+  struct utmpx *result = getutxent ();
+  if (result == NULL)
+    FAIL_EXIT1 ("getutxent: %m");
+  return result;
+}
+
+static void
+put_entry (const char *id, pid_t pid, const char *user, const char *line)
+{
+  struct utmpx ut =
+    {
+     .ut_type = LOGIN_PROCESS,
+     .ut_pid = pid,
+     .ut_host = "localhost",
+    };
+  strcpy (ut.ut_id, id);
+  strncpy (ut.ut_user, user, sizeof (ut.ut_user));
+  strncpy (ut.ut_line, line, sizeof (ut.ut_line));
+  TEST_VERIFY (pututxline (&ut) != NULL);
+}
+
+/* Use two cooperating subprocesses to avoid issues related to
+   unlock-on-close semantics of POSIX advisory locks.  */
+
+static __attribute__ ((noreturn)) void
+process1 (void)
+{
+  TEST_COMPARE (utmpname (utmp_file), 0);
+
+  /* Create an entry.  */
+  xsetutxent ();
+  put_entry ("1", 101, "root", "process1");
+
+  /* Retrieve the entry.  This will fill the internal cache.  */
+  {
+    errno = 0;
+    setutxent ();
+    TEST_COMPARE (errno, 0);
+    struct utmpx ut =
+      {
+       .ut_type = LOGIN_PROCESS,
+       .ut_line = "process1",
+      };
+    struct utmpx *result = getutxline (&ut);
+    if (result == NULL)
+      FAIL_EXIT1 ("getutxline (\"process1\"): %m");
+    TEST_COMPARE (result->ut_pid, 101);
+  }
+
+  /* Signal the other process to overwrite the entry.  */
+  xpthread_barrier_wait (barrier);
+
+  /* Wait for the other process to complete the write operation.  */
+  xpthread_barrier_wait (barrier);
+
+  /* Add another entry.  Note: This time, there is no setutxent call.  */
+  put_entry ("1", 103, "root", "process1");
+
+  _exit (0);
+}
+
+static void
+process2 (void *closure)
+{
+  /* Wait for the first process to write its entry.  */
+  xpthread_barrier_wait (barrier);
+
+  /* Truncate the file.  The glibc interface does not support
+     re-purposing records, but an external expiration mechanism may
+     trigger this.  */
+  TEST_COMPARE (truncate64 (utmp_file, 0), 0);
+
+  /* Write the replacement entry.  */
+  TEST_COMPARE (utmpname (utmp_file), 0);
+  xsetutxent ();
+  put_entry ("2", 102, "user", "process2");
+
+  /* Signal the other process that the entry has been replaced.  */
+  xpthread_barrier_wait (barrier);
+}
+
+static int
+do_test (void)
+{
+  xclose (create_temp_file ("tst-tumpx-cache-write-", &utmp_file));
+  {
+    pthread_barrierattr_t attr;
+    xpthread_barrierattr_init (&attr);
+    xpthread_barrierattr_setpshared (&attr, PTHREAD_SCOPE_PROCESS);
+    barrier = support_shared_allocate (sizeof (*barrier));
+    xpthread_barrier_init (barrier, &attr, 2);
+  }
+
+  /* Run both subprocesses in parallel.  */
+  {
+    pid_t pid1 = xfork ();
+    if (pid1 == 0)
+      process1 ();
+    support_isolate_in_subprocess (process2, NULL);
+    int status;
+    xwaitpid (pid1, &status, 0);
+    TEST_COMPARE (status, 0);
+  }
+
+  /* Check that the utmpx database contains the expected records.  */
+  {
+    TEST_COMPARE (utmpname (utmp_file), 0);
+    xsetutxent ();
+
+    struct utmpx *ut = xgetutxent ();
+    TEST_COMPARE_STRING (ut->ut_id, "2");
+    TEST_COMPARE (ut->ut_pid, 102);
+    TEST_COMPARE_STRING (ut->ut_user, "user");
+    TEST_COMPARE_STRING (ut->ut_line, "process2");
+
+    ut = xgetutxent ();
+    TEST_COMPARE_STRING (ut->ut_id, "1");
+    TEST_COMPARE (ut->ut_pid, 103);
+    TEST_COMPARE_STRING (ut->ut_user, "root");
+    TEST_COMPARE_STRING (ut->ut_line, "process1");
+
+    if (getutxent () != NULL)
+      FAIL_EXIT1 ("additional utmpx entry");
+  }
+
+  xpthread_barrier_destroy (barrier);
+  support_shared_free (barrier);
+  free (utmp_file);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/login/utmp_file.c b/login/utmp_file.c
index a3e9af1fa3..78a8b9d088 100644
--- a/login/utmp_file.c
+++ b/login/utmp_file.c
@@ -84,6 +84,8 @@  file_locking_failed (int fd, int type)
 
  int status = __fcntl64_nocancel (fd, F_SETLKW, &fl) < 0;
  int saved_errno = errno;
+ if (saved_errno == EINTR)
+   saved_errno = EAGAIN;
 
  /* Reset the signal handler and alarm.  We must reset the alarm
     before resetting the handler so our alarm does not generate a
@@ -184,19 +186,9 @@  __libc_getutent_r (struct utmp *buffer, struct utmp **result)
   return 0;
 }
 
-
 static int
-internal_getut_r (const struct utmp *id, struct utmp *buffer,
-		  bool *lock_failed)
+internal_getut_nolock (const struct utmp *id, struct utmp *buffer)
 {
-  int result = -1;
-
-  if (file_locking_failed (file_fd, F_RDLCK))
-    {
-      *lock_failed = true;
-      return -1;
-    }
-
   if (id->ut_type == RUN_LVL || id->ut_type == BOOT_TIME
       || id->ut_type == OLD_TIME || id->ut_type == NEW_TIME)
     {
@@ -211,7 +203,7 @@  internal_getut_r (const struct utmp *id, struct utmp *buffer,
 	    {
 	      __set_errno (ESRCH);
 	      file_offset = -1l;
-	      goto unlock_return;
+	      return -1;
 	    }
 	  file_offset += sizeof (struct utmp);
 
@@ -232,7 +224,7 @@  internal_getut_r (const struct utmp *id, struct utmp *buffer,
 	    {
 	      __set_errno (ESRCH);
 	      file_offset = -1l;
-	      goto unlock_return;
+	      return -1;
 	    }
 	  file_offset += sizeof (struct utmp);
 
@@ -240,12 +232,22 @@  internal_getut_r (const struct utmp *id, struct utmp *buffer,
 	    break;
 	}
     }
+  return 0;
+}
 
-  result = 0;
 
-unlock_return:
+static int
+internal_getut_r (const struct utmp *id, struct utmp *buffer,
+		  bool *lock_failed)
+{
+  if (file_locking_failed (file_fd, F_RDLCK))
+    {
+      *lock_failed = true;
+      return -1;
+    }
+
+  int result = internal_getut_nolock (id, buffer);
   file_locking_unlock (file_fd);
-
   return result;
 }
 
@@ -335,7 +337,6 @@  __libc_pututline (const struct utmp *data)
 
   struct utmp buffer;
   struct utmp *pbuf;
-  int found;
 
   if (! file_writable)
     {
@@ -357,7 +358,12 @@  __libc_pututline (const struct utmp *data)
       file_writable = true;
     }
 
+  /* Exclude readers and other writers early.  */
+  if (file_locking_failed (file_fd, F_WRLCK))
+    return NULL;
+
   /* Find the correct place to insert the data.  */
+  bool found = false;
   if (file_offset > 0
       && ((last_entry.ut_type == data->ut_type
 	   && (last_entry.ut_type == RUN_LVL
@@ -365,23 +371,32 @@  __libc_pututline (const struct utmp *data)
 	       || last_entry.ut_type == OLD_TIME
 	       || last_entry.ut_type == NEW_TIME))
 	  || __utmp_equal (&last_entry, data)))
-    found = 1;
-  else
     {
-      bool lock_failed = false;
-      found = internal_getut_r (data, &buffer, &lock_failed);
-
-      if (__builtin_expect (lock_failed, false))
+      /* The cached entry matches what we have read, but another
+	 process may have changed the entry on disk.  */
+      if (__lseek64 (file_fd, file_offset, SEEK_SET) < 0)
 	{
-	  __set_errno (EAGAIN);
+	  file_locking_unlock (file_fd);
 	  return NULL;
 	}
+      if (__read_nocancel (file_fd, &last_entry, sizeof (last_entry))
+	  != sizeof (last_entry))
+	{
+	  if (__lseek64 (file_fd, file_offset, SEEK_SET) < 0)
+	    {
+	      file_locking_unlock (file_fd);
+	      return NULL;
+	    }
+	  found = false;
+	}
+      else
+	found = __utmp_equal (&last_entry, data);
     }
 
-  if (file_locking_failed (file_fd, F_WRLCK))
-    return NULL;
+  if (!found)
+    found = internal_getut_nolock (data, &buffer) >= 0;
 
-  if (found < 0)
+  if (!found)
     {
       /* We append the next entry.  */
       file_offset = __lseek64 (file_fd, 0, SEEK_END);
@@ -410,7 +425,7 @@  __libc_pututline (const struct utmp *data)
     {
       /* If we appended a new record this is only partially written.
 	 Remove it.  */
-      if (found < 0)
+      if (!found)
 	(void) __ftruncate64 (file_fd, file_offset);
       pbuf = NULL;
     }