[v22,1/9] posix: allow getopt_long to match translated option names

Message ID 54fc5096228e57b86e75f5e2fb8e322d5254d431.1776957778.git.vivien@planete-kraus.eu (mailing list archive)
State New
Headers
Series Support translated long option names in getopt and argp |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Vivien Kraus April 23, 2026, 4:03 p.m. UTC
  It is possible to support translated long option names in a program
with no change to glibc by duplicating the option names in the struct
options array: one version untranslated, and one version translated.
However, doing so is significant work for all packages.

With this change, getopt will try and match the untranslated options
names, then the translated option names if not found.  Abbreviations
will only match the untranslated names.

_getopt_internal{_r} has a new argument: a translation function
pointer.  This way, we can pass NULL to avoid linking to gettext in
the posix version of getopt, or pass something that calls gettext
otherwise.

The test tstgetoptl is adapted from tstgetopt and modernized to use
the test driver.
---
 NEWS                |   2 +
 manual/getopt.texi  |  25 ++++++--
 posix/Makefile      |  13 +++++
 posix/getopt.c      |  92 ++++++++++++++++++++++++-----
 posix/getopt1.c     |  11 ++--
 posix/getopt_int.h  |   9 ++-
 posix/tstgetoptl.c  | 139 ++++++++++++++++++++++++++++++++++++++++++++
 posix/tstgetoptl.po |  29 +++++++++
 8 files changed, 293 insertions(+), 27 deletions(-)
 create mode 100644 posix/tstgetoptl.c
 create mode 100644 posix/tstgetoptl.po
  

Patch

diff --git a/NEWS b/NEWS
index eac9322161..4e53a079e9 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,8 @@  Major new features:
 
 * New locale added: hrx_BR (Hunsrik language spoken in Brazil).
 
+* The getopt_long function now accepts translated long option names.
+
 Deprecated and removed features, and other changes affecting compatibility:
 
 * Although malloc and related functions currently return pointers
diff --git a/manual/getopt.texi b/manual/getopt.texi
index 79a942307c..5ae22a1595 100644
--- a/manual/getopt.texi
+++ b/manual/getopt.texi
@@ -202,6 +202,15 @@  declared in @file{getopt.h}, not @file{unistd.h}.  You should make every
 program accept long options if it uses any options, for this takes
 little extra work and helps beginners remember how to use the program.
 
+Both long option names and their translations provided by the program
+for the user's current locale are recognized.  This helps users of
+your program who do not speak English understand the meaning of the
+options, and it does not break the function of the program in scripts
+if the untranslated option names are used.  If international
+communication involves the invocation of your program, the program
+users should be encouraged to use untranslated option names or publish
+the locale used for this invocation.
+
 @deftp {Data Type} {struct option}
 @standards{GNU, getopt.h}
 This structure describes a single long option name for the sake of
@@ -213,7 +222,9 @@  The @code{struct option} structure has these fields:
 
 @table @code
 @item const char *name
-This field is the name of the option.  It is a string.
+This field is the name of the option.  It is a string.  In order for
+@command{getopt_long} to accept either the long option name or its
+translated form, you should mark this string for translation.
 
 @item int has_arg
 This field says whether the option takes an argument.  It is an integer,
@@ -248,10 +259,14 @@  When @code{getopt_long} encounters a short option, it does the same
 thing that @code{getopt} would do: it returns the character code for the
 option, and stores the option's argument (if it has one) in @code{optarg}.
 
-When @code{getopt_long} encounters a long option, it takes actions based
-on the @code{flag} and @code{val} fields of the definition of that
-option.  The option name may be abbreviated as long as the abbreviation is
-unique.
+When @code{getopt_long} encounters a long option or its translation in
+the current textdomain, it takes actions based on the @code{flag} and
+@code{val} fields of the definition of that option.  The English name
+of the option name may be abbreviated as long as the abbreviation is
+unique.  No abbreviation of the translated option name is recognized.
+Since the untranslated option names have precedence over the
+translated option names, it is not possible to hide or divert an
+option with a translation.
 
 If @code{flag} is a null pointer, then @code{getopt_long} returns the
 contents of @code{val} to indicate which option it found.  You should
diff --git a/posix/Makefile b/posix/Makefile
index a5e5162c61..8755f42bdc 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -332,6 +332,7 @@  tests := \
   tst-wordexp-nocmd \
   tst-wordexp-reuse \
   tstgetopt \
+  tstgetoptl \
   # tests
 
 # Test for the glob symbol version that was replaced in glibc 2.27.
@@ -529,6 +530,7 @@  LOCALES := \
   da_DK.ISO-8859-1 \
   de_DE.ISO-8859-1 \
   de_DE.UTF-8 \
+  en_GB.UTF-8 \
   en_US.UTF-8 \
   es_US.ISO-8859-1 \
   es_US.UTF-8 \
@@ -802,3 +804,14 @@  tst-wordexp-reuse-ENV += MALLOC_TRACE=$(objpfx)tst-wordexp-reuse.mtrace \
 $(objpfx)tst-wordexp-reuse-mem.out: $(objpfx)tst-wordexp-reuse.out
 	$(common-objpfx)malloc/mtrace $(objpfx)tst-wordexp-reuse.mtrace > $@; \
 	$(evaluate-test)
+
+# tstgetoptl uses a translation catalog for translated option names.
+tstgetoptl_mo = $(objpfx)domaindir/en_GB/LC_MESSAGES/tstgetoptl.mo
+
+$(tstgetoptl_mo): tstgetoptl.po
+	$(make-target-directory)
+	msgfmt -o $@T $<
+	mv -f $@T $@
+
+$(objpfx)tstgetoptl.out: $(tstgetoptl_mo) $(gen-locales)
+CFLAGS-tstgetoptl.c += -DOBJPFX=\"$(objpfx)\"
diff --git a/posix/getopt.c b/posix/getopt.c
index 3e10579670..cdc02d4da9 100644
--- a/posix/getopt.c
+++ b/posix/getopt.c
@@ -182,6 +182,24 @@  exchange (char **argv, struct _getopt_data *d)
   d->__last_nonopt = d->optind;
 }
 
+/* Return true iff a translation for opt_name has been found and it
+   matches the substring from argument, length argument_length.
+*/
+static bool
+match_translated_option_name (char *(*translate) (const char *msgid),
+			      const char *argument, size_t argument_length,
+			      const char *opt_name)
+{
+  const char *translated = opt_name;
+  if (translate != NULL)
+    translated = translate (opt_name);
+
+  if (strncmp (translated, argument, argument_length) != 0)
+    return false;
+  /* We know that argument is a prefix of translated.  */
+  return translated[argument_length] == '\0';
+}
+
 /* Process the argument starting with d->__nextchar as a long option.
    d->optind should *not* have been advanced over this argument.
 
@@ -194,7 +212,8 @@  static int
 process_long_option (int argc, char **argv, const char *optstring,
 		     const struct option *longopts, int *longind,
 		     int long_only, struct _getopt_data *d,
-		     int print_errors, const char *prefix)
+		     int print_errors, const char *prefix,
+		     char *(*translate) (const char *msgid))
 {
   char *nameend;
   size_t namelen;
@@ -202,6 +221,7 @@  process_long_option (int argc, char **argv, const char *optstring,
   const struct option *pfound = NULL;
   int n_options;
   int option_index;
+  const char *translated_option_name;
 
   for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++)
     /* Do nothing.  */ ;
@@ -221,7 +241,22 @@  process_long_option (int argc, char **argv, const char *optstring,
 
   if (pfound == NULL)
     {
-      /* Didn't find an exact match, so look for abbreviations.  */
+      /* Didn't find an exact match, try with translated option
+	 names.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	if (match_translated_option_name (translate, d->__nextchar, namelen, p->name))
+	  {
+	    /* Exact match found with translation.  */
+	    pfound = p;
+	    break;
+	  }
+    }
+
+  if (pfound == NULL)
+    {
+      /* Didn't find an exact match with translations, so look for
+	 abbreviations, but only for the option name in the C
+	 locale.  */
       unsigned char *ambig_set = NULL;
       int ambig_malloced = 0;
       int ambig_fallback = 0;
@@ -341,10 +376,20 @@  process_long_option (int argc, char **argv, const char *optstring,
       else
 	{
 	  if (print_errors)
-	    fprintf (stderr,
-		     _("%s: option '%s%s' doesn't allow an argument\n"),
-		     argv[0], prefix, pfound->name);
-
+	    {
+	      translated_option_name = translate (pfound->name);
+	      if (strcmp (translated_option_name, pfound->name) != 0)
+		/* Print both names of the option.  */
+		fprintf (stderr,
+			 _("%s: option '%s%s' / '%s%s' doesn't allow an argument\n"),
+			 argv[0], prefix, translated_option_name, prefix, pfound->name);
+	      else
+		/* Either the option name is not translated, or its
+		   translation is the same as the option name.  */
+		fprintf (stderr,
+			 _("%s: option '%s%s' doesn't allow an argument\n"),
+			 argv[0], prefix, pfound->name);
+	    }
 	  d->optopt = pfound->val;
 	  return '?';
 	}
@@ -356,9 +401,19 @@  process_long_option (int argc, char **argv, const char *optstring,
       else
 	{
 	  if (print_errors)
-	    fprintf (stderr,
-		     _("%s: option '%s%s' requires an argument\n"),
-		     argv[0], prefix, pfound->name);
+	    {
+	      /* Same dichotomy as when the option does not allow an
+		 argument.  */
+	      translated_option_name = translate (pfound->name);
+	      if (strcmp (translated_option_name, pfound->name) != 0)
+		fprintf (stderr,
+			 _("%s: option '%s%s' / '%s%s' requires an argument\n"),
+			 argv[0], prefix, translated_option_name, prefix, pfound->name);
+	      else
+		fprintf (stderr,
+			 _("%s: option '%s%s' requires an argument\n"),
+			 argv[0], prefix, pfound->name);
+	    }
 
 	  d->optopt = pfound->val;
 	  return optstring[0] == ':' ? ':' : '?';
@@ -470,7 +525,8 @@  _getopt_initialize (_GL_UNUSED int argc,
 int
 _getopt_internal_r (int argc, char **argv, const char *optstring,
 		    const struct option *longopts, int *longind,
-		    int long_only, struct _getopt_data *d, int posixly_correct)
+		    int long_only, struct _getopt_data *d, int posixly_correct,
+		    char *(*translate) (const char *msgid))
 {
   int print_errors = d->opterr;
 
@@ -573,7 +629,8 @@  _getopt_internal_r (int argc, char **argv, const char *optstring,
 	      d->__nextchar = argv[d->optind] + 2;
 	      return process_long_option (argc, argv, optstring, longopts,
 					  longind, long_only, d,
-					  print_errors, "--");
+					  print_errors, "--",
+					  translate);
 	    }
 
 	  /* If long_only and the ARGV-element has the form "-f",
@@ -595,7 +652,8 @@  _getopt_internal_r (int argc, char **argv, const char *optstring,
 	      d->__nextchar = argv[d->optind] + 1;
 	      code = process_long_option (argc, argv, optstring, longopts,
 					  longind, long_only, d,
-					  print_errors, "-");
+					  print_errors, "-",
+					  translate);
 	      if (code != -1)
 		return code;
 	    }
@@ -649,7 +707,8 @@  _getopt_internal_r (int argc, char **argv, const char *optstring,
 	d->__nextchar = d->optarg;
 	d->optarg = NULL;
 	return process_long_option (argc, argv, optstring, longopts, longind,
-				    0 /* long_only */, d, print_errors, "-W ");
+				    0 /* long_only */, d, print_errors, "-W ",
+				    translate);
       }
     if (temp[1] == ':')
       {
@@ -702,7 +761,7 @@  _getopt_internal_r (int argc, char **argv, const char *optstring,
 int
 _getopt_internal (int argc, char **argv, const char *optstring,
 		  const struct option *longopts, int *longind, int long_only,
-		  int posixly_correct)
+		  int posixly_correct, char *(*translate) (const char *))
 {
   int result;
 
@@ -711,7 +770,7 @@  _getopt_internal (int argc, char **argv, const char *optstring,
 
   result = _getopt_internal_r (argc, argv, optstring, longopts,
 			       longind, long_only, &getopt_data,
-			       posixly_correct);
+			       posixly_correct, translate);
 
   optind = getopt_data.optind;
   optarg = getopt_data.optarg;
@@ -729,7 +788,8 @@  _getopt_internal (int argc, char **argv, const char *optstring,
   NAME (int argc, char *const *argv, const char *optstring)	\
   {								\
     return _getopt_internal (argc, (char **)argv, optstring,	\
-			     NULL, NULL, 0, POSIXLY_CORRECT);	\
+			     NULL, NULL, 0, POSIXLY_CORRECT,	\
+			     NULL);				\
   }
 
 #ifdef _LIBC
diff --git a/posix/getopt1.c b/posix/getopt1.c
index e6fc867de0..6342e2d417 100644
--- a/posix/getopt1.c
+++ b/posix/getopt1.c
@@ -19,6 +19,9 @@ 
 
 #ifndef _LIBC
 # include <config.h>
+# include "gettext.h"
+#else
+# include <libintl.h>
 #endif
 
 #include "getopt.h"
@@ -29,7 +32,7 @@  getopt_long (int argc, char *__getopt_argv_const *argv, const char *options,
 	     const struct option *long_options, int *opt_index)
 {
   return _getopt_internal (argc, (char **) argv, options, long_options,
-			   opt_index, 0, 0);
+			   opt_index, 0, 0, gettext);
 }
 
 int
@@ -38,7 +41,7 @@  _getopt_long_r (int argc, char **argv, const char *options,
 		struct _getopt_data *d)
 {
   return _getopt_internal_r (argc, argv, options, long_options, opt_index,
-			     0, d, 0);
+			     0, d, 0, gettext);
 }
 
 /* Like getopt_long, but '-' as well as '--' can indicate a long option.
@@ -52,7 +55,7 @@  getopt_long_only (int argc, char *__getopt_argv_const *argv,
 		  const struct option *long_options, int *opt_index)
 {
   return _getopt_internal (argc, (char **) argv, options, long_options,
-			   opt_index, 1, 0);
+			   opt_index, 1, 0, gettext);
 }
 
 int
@@ -61,7 +64,7 @@  _getopt_long_only_r (int argc, char **argv, const char *options,
 		     struct _getopt_data *d)
 {
   return _getopt_internal_r (argc, argv, options, long_options, opt_index,
-			     1, d, 0);
+			     1, d, 0, gettext);
 }
 
 
diff --git a/posix/getopt_int.h b/posix/getopt_int.h
index 023d8a4fe6..579233b08c 100644
--- a/posix/getopt_int.h
+++ b/posix/getopt_int.h
@@ -22,10 +22,14 @@ 
 
 #include <getopt.h>
 
+/* The translate argument here is optional (can be NULL), it is used
+   to avoid depending on the gettext functions in the posix getopt
+   function.  */
 extern int _getopt_internal (int ___argc, char **___argv,
 			     const char *__shortopts,
 			     const struct option *__longopts, int *__longind,
-			     int __long_only, int __posixly_correct);
+			     int __long_only, int __posixly_correct,
+			     char *(*translate) (const char *msgid));
 
 
 /* Reentrant versions which can handle parsing multiple argument
@@ -102,7 +106,8 @@  extern int _getopt_internal_r (int ___argc, char **___argv,
 			       const char *__shortopts,
 			       const struct option *__longopts, int *__longind,
 			       int __long_only, struct _getopt_data *__data,
-			       int __posixly_correct);
+			       int __posixly_correct,
+			       char *(*translate) (const char *msgid));
 
 extern int _getopt_long_r (int ___argc, char **___argv,
 			   const char *__shortopts,
diff --git a/posix/tstgetoptl.c b/posix/tstgetoptl.c
new file mode 100644
index 0000000000..afefcf46f5
--- /dev/null
+++ b/posix/tstgetoptl.c
@@ -0,0 +1,139 @@ 
+/* Check that getopt uses translated option names.  */
+/* Copyright (C) 2026 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libintl.h>
+#include <locale.h>
+#include <array_length.h>
+#include <support/support.h>
+#include <support/check.h>
+
+/* This tests that --colour is accepted as a translation of --color.
+   This echoes tstgetopt.c, where --colour was an option name alias
+   for --color, so it had to be listed twice.  */
+
+/* This uses the en_GB locale so that colour means color.  As a
+   special case, we also check that non-translated options have
+   precedence over translated options, by translating "optional" as
+   "required".  We also check that getopt only matches translations
+   for actual options, by having the user pass --flavour (which is a
+   known translation of flavor) without the program recognizing a
+   --flavor option.  */
+
+static void
+prepare_localedir (void)
+{
+  unsetenv ("LANGUAGE");
+  xsetlocale (LC_MESSAGES, "en_GB.UTF-8");
+  TEST_VERIFY_EXIT (bindtextdomain ("tstgetoptl", OBJPFX "domaindir") != NULL);
+  TEST_VERIFY_EXIT (textdomain ("tstgetoptl") != NULL);
+  /* Check that the catalog is OK: */
+  TEST_COMPARE_STRING (gettext ("color"), "colour");
+  TEST_COMPARE_STRING (gettext ("flavor"), "flavour");
+}
+
+static char **
+prepare_argv (int *argc)
+{
+  static char *argv[] =
+    {
+      (char *) "tstgetoptl", (char *) "--required", (char *) "foobar",
+      (char *) "--optional=bazbug", (char *) "--col", (char *) "--color",
+      (char *) "--colour", (char *) "--flavour", NULL
+    };
+  *argc = array_length (argv) - 1;
+  return argv;
+}
+
+static void
+do_my_test (void)
+{
+  int argc;
+  char **argv = prepare_argv (&argc);
+  static const struct option options[] =
+    {
+      {"required", required_argument, NULL, 'r'},
+      {"optional", optional_argument, NULL, 'o'},
+      {"color",	   no_argument,	      NULL, 'C'},
+      /* Now colour is handled as a translation of color.  */
+      /* Note that there’s no "--flavor" option, so the "flavor" ->
+	 "flavour" translation is useless.  */
+      {NULL, 0, NULL, 0 }
+    };
+
+  /* This tests the same arguments as tstgetopt.c.  */
+
+  int Cflag = 0;
+  int index;
+  int c;
+  bool found_flavor = false;
+
+  optind = 0;
+  fputs ("Reminder that --flavor is not an option of the program.\n", stderr);
+  while ((c = getopt_long (argc, argv, "", options, NULL)) >= 0)
+    switch (c)
+      {
+      case 'C':
+	++Cflag;
+	break;
+      case '?':
+	TEST_VERIFY (!found_flavor);
+	found_flavor = true;
+	break;
+      default:
+	/* This should not happen.  */
+	support_record_failure_reset ();
+	return;
+
+      case 'r':
+	printf ("--required %s\n", optarg);
+	TEST_COMPARE_STRING (optarg, "foobar");
+	break;
+      case 'o':
+	printf ("--optional %s\n", optarg);
+	if (optarg != NULL)
+	  TEST_COMPARE_STRING (optarg, "bazbug");
+	break;
+      }
+
+  TEST_VERIFY (found_flavor);
+
+  printf ("Cflags = %d\n", Cflag);
+
+  TEST_COMPARE (Cflag, 3);
+
+  for (index = optind; index < argc; index++)
+    printf ("Non-option argument %s\n", argv[index]);
+
+  TEST_COMPARE (optind, argc);
+}
+
+int
+do_test (void)
+{
+  prepare_localedir ();
+  do_my_test ();
+  return 0;
+}
+
+#define TEST_FUNCTION do_test
+#include <support/test-driver.c>
diff --git a/posix/tstgetoptl.po b/posix/tstgetoptl.po
new file mode 100644
index 0000000000..7091884faf
--- /dev/null
+++ b/posix/tstgetoptl.po
@@ -0,0 +1,29 @@ 
+# English translations for tstgetoptl, a test case in glibc.
+# Copyright (C) 2026 THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the glibc package.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: tstgetoptl 0.0.0\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-05-27 19:29+0200\n"
+"PO-Revision-Date: 2025-05-27 19:30+0200\n"
+"Language-Team: English (British) <(nothing)>\n"
+"Language: en_GB\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ASCII\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: xxx.c:yy
+msgid "color"
+msgstr "colour"
+
+#: xxx.c:yy
+msgid "flavor"
+msgstr "flavour"
+
+# This is to make sure the translator cannot redirect options.
+#: xxx.c:yy
+msgid "optional"
+msgstr "required"