[7/7] misc: syslog: Use RFC5424

Message ID 20211005135631.3209020-8-adhemerval.zanella@linaro.org
State Superseded
Headers
Series Use RFC5424 for syslog |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit fail Patch caused testsuite regressions

Commit Message

Adhemerval Zanella Oct. 5, 2021, 1:56 p.m. UTC
  Current syslog format provides limited timestamp information (not full
year information nor microsecond precision) and the timezone information
might incur in confusion [1] since there is no metadata information on
how it was generated.

So instead of using another ad-hoc syslog format, this patch adapts it
to follow RFC5424:

  - VERSION is set as 1.
  - TIMESTAMP is logged as described by RFC 3339 using UTC ('Z') with
    maximum allowed TIME-SECFRAC precision (microseconds).
  - HOSTNAME is obtained with gethostname (as allowed by the RFC).
  - PROCID is only set if LOG_PID is previously set by openlog.
  - MSGID and STRUCTURED-DATA are both set to NILVALUE.

Checked on x86_64-linux-gnu.

[1] https://sourceware.org/pipermail/libc-alpha/2021-March/123583.html
---
 misc/syslog.c     | 69 ++++++++++++++++++++++++++++++++---------------
 misc/tst-syslog.c | 28 +++++++++----------
 2 files changed, 60 insertions(+), 37 deletions(-)
  

Comments

Paul Eggert Oct. 5, 2021, 7:07 p.m. UTC | #1
On 10/5/21 6:56 AM, Adhemerval Zanella via Libc-alpha wrote:

> +struct timebuf_t
> +{
> +  char b[sizeof ("YYYY-MM-DDThh:mm:ss.nnnnnnZ")];
> +};

This won't work if people fiddle with their system clocks and set the 
time far in the future or past. I suggest including <intprops.h> and 
using the following instead:

   struct timebuf
   {
     /* Leave room for outlandish but possible years.
        The "+ 1" is for strftime's adding 1900 to tm_year.  */
     char b[INT_STRLEN_BOUND (int) + 1
	   + sizeof "-MM-DDThh:mm:ss.nnnnnnZ"];
   };


> +  struct tm now_tm;
> +  __gmtime64_r (&ts.tv_sec, &now_tm);

This messes up if __gmtime64_r fails, which is possible when time_t is 
very large (or very negative).

> +  __snprintf (timebuf->b, sizeof (timebuf->b),
> +              "%04d-%02d-%02dT%02d:%02d:%02d.%06dZ",
> +              now_tm.tm_year + 1900, now_tm.tm_mon + 1, now_tm.tm_mday,
> +              now_tm.tm_hour, now_tm.tm_min, now_tm.tm_sec,
> +              (int32_t) ts.tv_nsec / 1000);

This messes up if now_tm.tm_year + 1900 overflows. (strftime has a 
similar bug; it should get fixed too but one thing at a time.) Also, 
that 'int32_t' should be plain 'int', to match the format. And it's a 
bit better to not use casts when it's easy, as is the case here.

I suggest something like this instead (I haven't tested it):

   struct tm *gmtm = __gmtime64_r (&ts.tv_sec, &now_tm);
   if (gmtm == NULL)
     strcpy (timebuf->b, "-");
   else
     {
       size_t datebytes = __strftime_l (timebuf->b, sizeof timebuf->b,
                                        "%FT%T", gmtm, _nl_C_locobj_ptr);
       int usec = ts.tv_nsec / 1000;
       __snprintf (timebuf->b + datebytes, sizeof timebuf->b - datebytes,
                   ".%06dZ", usec);
     }

The "-" is as per RFC 5434's NILVALUE.
  
Adhemerval Zanella Oct. 6, 2021, 7:36 p.m. UTC | #2
On 05/10/2021 16:07, Paul Eggert wrote:
> On 10/5/21 6:56 AM, Adhemerval Zanella via Libc-alpha wrote:
> 
>> +struct timebuf_t
>> +{
>> +  char b[sizeof ("YYYY-MM-DDThh:mm:ss.nnnnnnZ")];
>> +};
> 
> This won't work if people fiddle with their system clocks and set the time far in the future or past. I suggest including <intprops.h> and using the following instead:
> 
>   struct timebuf
>   {
>     /* Leave room for outlandish but possible years.
>        The "+ 1" is for strftime's adding 1900 to tm_year.  */
>     char b[INT_STRLEN_BOUND (int) + 1
>        + sizeof "-MM-DDThh:mm:ss.nnnnnnZ"];
>   };

Indeed, I did not consider it and the suggestion makes sense.

> 
> 
>> +  struct tm now_tm;
>> +  __gmtime64_r (&ts.tv_sec, &now_tm);
> 
> This messes up if __gmtime64_r fails, which is possible when time_t is very large (or very negative).
> 
>> +  __snprintf (timebuf->b, sizeof (timebuf->b),
>> +              "%04d-%02d-%02dT%02d:%02d:%02d.%06dZ",
>> +              now_tm.tm_year + 1900, now_tm.tm_mon + 1, now_tm.tm_mday,
>> +              now_tm.tm_hour, now_tm.tm_min, now_tm.tm_sec,
>> +              (int32_t) ts.tv_nsec / 1000);
> 
> This messes up if now_tm.tm_year + 1900 overflows. (strftime has a similar bug; it should get fixed too but one thing at a time.) Also, that 'int32_t' should be plain 'int', to match the format. And it's a bit better to not use casts when it's easy, as is the case here.
> 
> I suggest something like this instead (I haven't tested it):
> 
>   struct tm *gmtm = __gmtime64_r (&ts.tv_sec, &now_tm);
>   if (gmtm == NULL)
>     strcpy (timebuf->b, "-");
>   else
>     {
>       size_t datebytes = __strftime_l (timebuf->b, sizeof timebuf->b,
>                                        "%FT%T", gmtm, _nl_C_locobj_ptr);
>       int usec = ts.tv_nsec / 1000;
>       __snprintf (timebuf->b + datebytes, sizeof timebuf->b - datebytes,
>                   ".%06dZ", usec);
>     }
> 
> The "-" is as per RFC 5434's NILVALUE.

It make sense, I changes to your suggestion thanks.
  

Patch

diff --git a/misc/syslog.c b/misc/syslog.c
index 99fa595b40..6ad9452c8a 100644
--- a/misc/syslog.c
+++ b/misc/syslog.c
@@ -42,6 +42,10 @@  static char sccsid[] = "@(#)syslog.c	8.4 (Berkeley) 3/18/94";
 #include <sys/un.h>
 #include <stdio_ext.h>
 
+#ifndef HOST_NAME_MAX
+# define HOST_NAME_MAX 64
+#endif
+
 static int LogType = SOCK_DGRAM;	/* type of socket connection */
 static int LogFile = -1;		/* fd for log */
 static bool connected;		/* have done connect */
@@ -117,6 +121,31 @@  __vsyslog_chk(int pri, int flag, const char *fmt, va_list ap)
   __vsyslog_internal (pri, fmt, ap, (flag > 0) ? PRINTF_FORTIFY : 0);
 }
 
+/* Defined by RFC5424.  */
+#define NILVALUE "-"
+
+struct timebuf_t
+{
+  char b[sizeof ("YYYY-MM-DDThh:mm:ss.nnnnnnZ")];
+};
+
+/* Fill TIMEBUF with a RFC3339 timestamp.  Use UTC time and maximum
+   TIME-SECFRAC accurancy allowed (6 digits for microseconds).  */
+static void
+syslog_rfc3339_timestamp (struct timebuf_t *timebuf)
+{
+  struct __timespec64 ts;
+  __clock_gettime64 (TIME_CLOCK_GETTIME_CLOCKID, &ts);
+  struct tm now_tm;
+  __gmtime64_r (&ts.tv_sec, &now_tm);
+
+  __snprintf (timebuf->b, sizeof (timebuf->b),
+              "%04d-%02d-%02dT%02d:%02d:%02d.%06dZ",
+              now_tm.tm_year + 1900, now_tm.tm_mon + 1, now_tm.tm_mday,
+              now_tm.tm_hour, now_tm.tm_min, now_tm.tm_sec,
+              (int32_t) ts.tv_nsec / 1000);
+}
+
 void
 __vsyslog_internal(int pri, const char *fmt, va_list ap,
 		   unsigned int mode_flags)
@@ -149,35 +178,31 @@  __vsyslog_internal(int pri, const char *fmt, va_list ap,
   if ((pri & LOG_FACMASK) == 0)
     pri |= LogFacility;
 
-  pid_t pid = LogStat & LOG_PID ? __getpid () : 0;
 
-  enum
-    {
-      timebuf_size = 3+1                     /* "%h "  */
-                     + 2+1                   /* "%e "  */
-                     + 2+1 + 2+1 + 2+1 + 1,  /* "%T "  */
+  struct timebuf_t timestamp;
+  syslog_rfc3339_timestamp (&timestamp);
 
-      bufs_size    = 1024
-    };
+  char hostname[HOST_NAME_MAX];
+  if (__gethostname (hostname, sizeof (hostname)) == -1)
+    strcpy (hostname, NILVALUE);
 
-  /* "%h %e %H:%M:%S "  */
-  char timestamp[timebuf_size];
-  time_t now = time_now ();
-  struct tm now_tm;
-  __localtime_r (&now, &now_tm);
-  __strftime_l (timestamp, sizeof (timestamp), "%h %e %T ", &now_tm,
-                _nl_C_locobj_ptr);
+  pid_t pid = LogStat & LOG_PID ? __getpid () : 0;
 
-#define SYSLOG_HEADER(__pri, __timestamp, __msgoff, pid) \
-  "<%d>%s %n%s%s%.0d%s: ",                               \
-  __pri, __timestamp, __msgoff,                          \
-  LogTag == NULL ? __progname : LogTag,                  \
-  pid != 0 ? "[" : "", pid, pid != 0 ? "]" : ""
+#define SYSLOG_HEADER(__pri, __timestamp, __msgoff, __hostname, __pid) \
+  "<%d>1 %s %n%s %s %s%.0d %s %s ",                      \
+  __pri,                                /* PRI  */       \
+  __timestamp.b,                        /* TIMESTAMP  */ \
+  __msgoff, __hostname,                 /* HOSTNAME  */  \
+  LogTag == NULL ? __progname : LogTag, /* APP-NAME  */  \
+  NILVALUE + !!__pid, __pid,            /* PROCID  */    \
+  NILVALUE,                             /* MSGID  */     \
+  NILVALUE                              /* STRUCT-DATA */
 
   /* Try to use a static buffer as an optimization.  */
+  enum { bufs_size  = 1024 };
   char bufs[bufs_size];
   int l = __snprintf (bufs, sizeof bufs,
-                      SYSLOG_HEADER (pri, timestamp, &msgoff, pid));
+                      SYSLOG_HEADER (pri, timestamp, &msgoff, hostname, pid));
   if (l < sizeof (bufs))
     {
       va_list apc;
@@ -205,7 +230,7 @@  __vsyslog_internal(int pri, const char *fmt, va_list ap,
       if (f != NULL)
         {
           __fsetlocking (f, FSETLOCKING_BYCALLER);
-          fprintf (f, SYSLOG_HEADER (pri, timestamp, &msgoff, pid));
+          fprintf (f, SYSLOG_HEADER (pri, timestamp, &msgoff, hostname, pid));
           /* Restore errno for %m format.  */
           __set_errno (saved_errno);
           __vfprintf_internal (f, fmt, ap, mode_flags);
diff --git a/misc/tst-syslog.c b/misc/tst-syslog.c
index 168fb72ebc..5d8737129b 100644
--- a/misc/tst-syslog.c
+++ b/misc/tst-syslog.c
@@ -18,6 +18,7 @@ 
 
 #include <array_length.h>
 #include <fcntl.h>
+#include <intprops.h>
 #include <paths.h>
 #include <netinet/in.h>
 #include <support/capture_subprocess.h>
@@ -201,7 +202,7 @@  check_openlog_message (const struct msg_t *msg, int msgnum,
     {
       if (options & LOG_PID)
         TEST_COMPARE (msg->pid, pid);
-      TEST_COMPARE_STRING (msg->ident, expected_ident);
+      TEST_COMPARE_STRING (msg->ident, OPENLOG_IDENT);
       TEST_COMPARE (msg->facility, LOG_LOCAL0);
     }
   else if (msgnum < 2 * array_length (priorities))
@@ -212,28 +213,25 @@  check_openlog_message (const struct msg_t *msg, int msgnum,
   return true;
 }
 
+#define NILVALUE "-"
+
 static struct msg_t
 parse_syslog_msg (const char *msg)
 {
   struct msg_t r = { .pid = -1 };
   int number;
+  char procid[INT_BUFSIZE_BOUND (pid_t)];
 
   /* The message in the form:
      <179>Apr  8 14:51:19 tst-syslog: syslog message 176 3  */
-  int n = sscanf (msg, "<%3d>%*s %*d %*d:%*d:%*d %32s %64s %*d %*d",
-                  &number, r.ident, r.msg);
-  TEST_COMPARE (n, 3);
+  int n = sscanf (msg, "<%6d>1 %*s %*s %32s %s - - %s",
+                  &number, r.ident, procid, r.msg);
+  TEST_COMPARE (n, 4);
 
   r.facility = number & LOG_FACMASK;
   r.priority = number & LOG_PRIMASK;
 
-  char *pid_start = strchr (r.ident, '[');
-  if (pid_start != NULL)
-    {
-       char *pid_end = strchr (r.ident, ']');
-       if (pid_end != NULL)
-         r.pid = strtoul (pid_start + 1, NULL, 10);
-    }
+  r.pid = strcmp (procid, NILVALUE) == 0 ? 0 : strtoul (procid, NULL, 10);
 
   return r;
 }
@@ -246,9 +244,9 @@  parse_syslog_console (const char *msg)
   struct msg_t r;
 
   /* The message in the form:
-     openlog_ident: syslog_message 128 0  */
-  int n = sscanf (msg, "%32s %64s %d %d",
-      r.ident, r.msg, &facility, &priority);
+     mymachine openlog_ident PID - syslog_message 128 0  */
+  int n = sscanf (msg, "%*s %32s %*s - - %s %d %d",
+		  r.ident, r.msg, &facility, &priority);
   TEST_COMPARE (n, 4);
 
   r.facility = facility;
@@ -384,7 +382,7 @@  check_syslog_console_read (FILE *fp)
   while (fgets (buf, sizeof (buf), fp) != NULL)
     {
       struct msg_t msg = parse_syslog_console (buf);
-      TEST_COMPARE_STRING (msg.ident, OPENLOG_IDENT ":");
+      TEST_COMPARE_STRING (msg.ident, OPENLOG_IDENT);
       TEST_COMPARE (msg.priority, priorities[msgnum]);
       TEST_COMPARE (msg.facility, LOG_LOCAL0);