[v22,8/9] posix: Add a script for static validation of getopt_long PO files

Message ID 2955a3592a6b97298fbcd76ba9eb152739e6e363.1776957778.git.vivien@planete-kraus.eu (mailing list archive)
State New
Headers
Series Support translated long option names in getopt and argp |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Vivien Kraus April 23, 2026, 4:04 p.m. UTC
  It is better to statically check the PO files on the developer’s side,
because there is a chance to detect the problem early and not
embarrass the translation team just before a release.

This is a perl script that I made by adapting bits and pieces from
mtrace.pl.  On the test case, it should fail with the following output:

-----
Translation toto is used for more than one option:
  - bar
  - foo
bar is a translation of pub, but it is also a different option.
There were 2 failures.
-----
---
 NEWS                                          |   3 +
 manual/getopt.texi                            |  13 ++
 manual/install.texi                           |   7 +-
 posix/Makefile                                |  28 ++-
 posix/check-getopt-translations.pl            | 195 ++++++++++++++++++
 .../standalone-multiple-getopt-collisions.po  |  45 ++++
 posix/tst-check-getopt-translations.sh        |  61 ++++++
 7 files changed, 348 insertions(+), 4 deletions(-)
 create mode 100644 posix/check-getopt-translations.pl
 create mode 100644 posix/standalone-multiple-getopt-collisions.po
 create mode 100644 posix/tst-check-getopt-translations.sh
  

Patch

diff --git a/NEWS b/NEWS
index 48f7589f49..11cef60879 100644
--- a/NEWS
+++ b/NEWS
@@ -25,6 +25,9 @@  Major new features:
 * Argp parsers enable translated long option names with "command-line
   option" as the message context.
 
+* The new installed script check-getopt-translations parses PO files to
+  check for collisions between long option names and translations.
+
 Deprecated and removed features, and other changes affecting compatibility:
 
 * Although malloc and related functions currently return pointers
diff --git a/manual/getopt.texi b/manual/getopt.texi
index dbee16b62e..e39f3e3f85 100644
--- a/manual/getopt.texi
+++ b/manual/getopt.texi
@@ -394,6 +394,19 @@  not match a long option (or its abbreviation).
 
 @end deftypefun
 
+It is possible for the programmer to introduce a new option name that
+conflicts with the translation of an existing option name.  Such a
+case would disrupt the workflow of users as the new option would
+replace the existing option.  Before adding a new option to a program,
+the developer should check for collisions with all known translations.
+This can be done with the installed
+@command{check-getopt-translations} script, by calling for each PO
+file in the project:
+
+@smallexample
+check-getopt-translations "context used for translations" @file{file.po}
+@end smallexample
+
 @node Getopt Long Option Example
 @subsection Example of Parsing Long Options with @code{getopt_long}
 
diff --git a/manual/install.texi b/manual/install.texi
index 073cda0530..2d0e78f64a 100644
--- a/manual/install.texi
+++ b/manual/install.texi
@@ -601,9 +601,10 @@  verified to work to build @theglibc{}.
 Perl 5
 
 Perl is not required, but if present it is used in some tests and the
-@code{mtrace} program, to build the @glibcadj{} manual.  As of release
-time @code{perl} version 5.42.0 is the newest verified to work to
-build @theglibc{}.
+@code{mtrace} program, to build the @glibcadj{} manual.  It is also
+used for the @code{check-getopt-translations} installed script.  As of
+release time @code{perl} version 5.42.0 is the newest verified to work
+to build @theglibc{}.
 
 @item
 GNU @code{sed} 3.02 or newer
diff --git a/posix/Makefile b/posix/Makefile
index e8d5d0661c..0f99241d4a 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -379,7 +379,7 @@  xtests-time64 := \
 
 ifeq (yes,$(build-shared))
 test-srcs := \
-  globtest
+  globtest \
   # tests-src
 tests += \
   tst-exec \
@@ -390,6 +390,11 @@  tests += \
   # tests
 endif
 
+ifneq ($(PERL),no)
+test-srcs +=  \
+  tst-check-getopt-translations
+endif
+
 ifeq (yesyes,$(build-shared)$(have-thread-library))
 tests += \
   tst-_Fork \
@@ -419,6 +424,9 @@  install-others-programs := \
   $(inst_libexecdir)/getconf \
   # install-others-programs
 
+install-bin-script = check-getopt-translations
+generated += check-getopt-translations
+
 before-compile += \
   $(objpfx)posix-conf-vars-def.h \
   # before-compile
@@ -431,6 +439,7 @@  generated += \
   getconf.speclist \
   ptestcases.h \
   testcases.h \
+  tst-check-getopt-translations.out \
   tst-getconf.out \
   wordexp-tst.out \
   # generated
@@ -509,6 +518,11 @@  endif
 endif
 endif
 
+ifneq ($(PERL),no)
+tests-special += \
+  $(objpfx)tst-check-getopt-translations.out
+endif
+
 include ../Rules
 
 ifeq ($(run-built-tests),yes)
@@ -826,3 +840,15 @@  $(tst_getopt_long_collision_mo): tst-getopt_long_collision.po
 
 $(objpfx)tst-getopt_long_collision.out: $(tst_getopt_long_collision_mo) $(gen-locales)
 CFLAGS-tst-getopt_long_collision.c += -DOBJPFX=\"$(objpfx)\"
+
+$(objpfx)check-getopt-translations: check-getopt-translations.pl
+	rm -f $@.new
+	sed -e 's|@XXX@|$(address-width)|' \
+	    -e 's|@VERSION@|$(version)|' \
+	    -e 's|@PKGVERSION@|$(PKGVERSION)|' \
+	    -e 's|@REPORT_BUGS_TO@|$(REPORT_BUGS_TO)|' $^ > $@.new \
+	&& rm -f $@ && mv $@.new $@ && chmod +x $@
+
+$(objpfx)tst-check-getopt-translations.out: tst-check-getopt-translations.sh $(objpfx)check-getopt-translations standalone-multiple-getopt-collisions.po
+	$(SHELL) $^ $(common-objpfx)posix/tst-check-getopt-translations.out
+	$(evaluate-test)
diff --git a/posix/check-getopt-translations.pl b/posix/check-getopt-translations.pl
new file mode 100644
index 0000000000..c3c3cff1eb
--- /dev/null
+++ b/posix/check-getopt-translations.pl
@@ -0,0 +1,195 @@ 
+#! /usr/bin/perl
+
+# Copyright (C) 2026 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+use strict;
+use warnings;
+use Data::Dumper;
+
+my $VERSION = "@VERSION@";
+
+my $PKGVERSION = "@PKGVERSION@";
+my $REPORT_BUGS_TO = '@REPORT_BUGS_TO@';
+my $progname = $_;
+
+sub usage {
+    print "Usage: getopt-check [OPTION]... msgctxt lang.po\n";
+    print "  --help       print this help, then exit\n";
+    print "  --version    print version number, then exit\n";
+    print "\n";
+    print "For bug reporting instructions, please see:\n";
+    print "$REPORT_BUGS_TO.\n";
+    exit 0;
+}
+
+sub fatal {
+    print STDERR "$_[0]\n";
+    exit 1;
+}
+
+# This script takes two positional arguments: the context for
+# translated option names, and the PO file to check.  Then, the PO
+# file is parsed, looking at three things:
+# 1. The msgctxt: it must be equal to the first positional argument, msgctxt;
+# 2. The msgid;
+# 3. The space-separated list msgstr.
+#
+# We are looking for two different problems:
+#
+# 1. Every translation element, current or obsolete, must be unique
+# across all option names.
+# 2. For every option name, for every translation, current or
+# deprecated, if it doesn’t match the untranslated name, then it
+# should not match any other untranslated option names.
+#
+# If we detect an example of the first case, it is a problem with the
+# translator only.  They have to remove one use of the word,
+# preferably one that is deprecated.
+#
+# If we detect an example of the second case, then it is a problem
+# with the developer: they want to introduce an option name that is
+# already used for something else by users of this native language! If
+# nothing is done, these users will be surprised that the same word
+# now means another option, as the untranslated options have
+# precedence over the translations.  If the translated name is already
+# deprecated, then the language team may agree to completely remove
+# it.  Otherwise, it may be better to find a new untranslated name.
+
+ arglist: while (@ARGV) {
+     if ($ARGV[0] eq "--v" || $ARGV[0] eq "--ve" || $ARGV[0] eq "--ver" ||
+	$ARGV[0] eq "--vers" || $ARGV[0] eq "--versi" ||
+	$ARGV[0] eq "--versio" || $ARGV[0] eq "--version") {
+	print "getopt-check $PKGVERSION$VERSION\n";
+	print "Copyright (C) 2026 Free Software Foundation, Inc.\n";
+	print "This is free software; see the source for copying conditions.  There is NO\n";
+	print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
+	print "Written by Vivien Kraus <vivien\@planete-kraus.eu>\n";
+
+	exit 0;
+    } elsif ($ARGV[0] eq "--h" || $ARGV[0] eq "--he" || $ARGV[0] eq "--hel" ||
+	     $ARGV[0] eq "--help") {
+	&usage;
+    } elsif ($ARGV[0] =~ /^-/) {
+	print "$progname: unrecognized option `$ARGV[0]'\n";
+	print "Try `$progname --help' for more information.\n";
+	exit 1;
+    } else {
+	last arglist;
+    }
+}
+
+if ($#ARGV != 1) {
+    fatal "You must provide two arguments: the msgctxt for option names, and the name of the PO file.";
+}
+
+my $relevant_msgctxt = $ARGV[0];
+my $pofilename = $ARGV[1];
+my %translations;
+
+# %translation_used will be populated to detect multiple use of a
+# %translation directly when we parse.
+
+my $entry_msgid;
+
+# The ad-hoc PO file parser has 3 states:
+# 1. Waiting for msgctxt;
+# 2. Waiting for msgid;
+# 3. Waiting for msgstr.
+#
+# At the start, the state is 1.  Then, if we find "msgctxt
+# \"$relevant_msgctxt\"" in a single line, we jump to 2.  Otherwise,
+# if this is the end of the file, stop parsing.  Otherwise, whatever
+# the line, stay in 1.  This includes: the empty line, meaning we are
+# considering a new entry; or a comment, a #: location, or another
+# relevant line.
+#
+# When we are in state 2., we are waiting for the msgid (untranslated
+# option name).  If we find an empty line, we jump back to 1.  If we
+# find a line starting with "msgid \"" and ending with a double quote,
+# we store what is in the middle in $entry_msgid and jump to 3.
+# Otherwise, we stay in state 2.
+#
+# When we are in state 3., we are waiting for msgstr.  If we find an
+# empty line, drop $entry_msgid, and back to 1.  If the line starts
+# with "msgstr \"", we add a record to %translations: the key is
+# $entry_msgid, and the value, what is between the detected prefix and
+# the end quote.  Then, back to state 1.
+
+my $parser_state = 1;
+
+open (my $pofile, "<", $pofilename) || fatal "PO file name ${pofilename} cannot be read.";
+
+while (my $line = <$pofile>) {
+    chomp $line;
+    if ($parser_state == 1 && $line =~ /^msgctxt\s*"${relevant_msgctxt}"$/) {
+        $parser_state = 2;
+    } elsif ($parser_state == 2 && $line eq "") {
+        $parser_state = 1;
+    } elsif ($parser_state == 2 && $line =~ /^msgid\s*"([^"]+)"$/) {
+        $parser_state = 3;
+        $entry_msgid = $1;
+    } elsif ($parser_state == 3 && $line eq "") {
+        $parser_state = 1;
+    } elsif ($parser_state == 3 && $line =~ /^msgstr\s*"([^"]*)"$/) {
+        $translations{$entry_msgid} = $1;
+        $parser_state = 1;
+    }
+}
+
+my $number_of_errors = 0;
+
+# Verify that every option name is unique.
+my %untranslated_name;
+for my $option_name (sort(keys %translations)) {
+    my $translation = $translations{$option_name};
+    my @existing;
+    if (exists $untranslated_name{$translation}) {
+	@existing = @{$untranslated_name{$translation}};
+    }
+    push(@existing, $option_name);
+    $untranslated_name{$translation} = \@existing;
+}
+for my $translation (sort(keys %untranslated_name)) {
+    my $names = $untranslated_name{$translation};
+    if (@{$names} > 1) {
+        print STDERR "Translation ${translation} is used for more than one option:\n";
+        for my $untranslated (@{$names}) {
+            print STDERR "  - ${untranslated}\n";
+        }
+        ++$number_of_errors;
+    }
+}
+
+# Verify that every option translation does not match any other
+# untranslated name.
+for my $option_name (sort(keys %translations)) {
+    for my $other_option_name (sort(keys %translations)) {
+        if ($option_name ne $other_option_name) {
+	    if ($translations{$option_name} eq $other_option_name) {
+		print STDERR "${translations{$option_name}} is a translation of ${option_name}, but it is also a different option.\n";
+		++$number_of_errors;
+	    }
+        }
+    }
+}
+
+if ($number_of_errors eq 0) {
+    exit 0
+}
+print STDERR "There were ${number_of_errors} failures.\n";
+exit 1
diff --git a/posix/standalone-multiple-getopt-collisions.po b/posix/standalone-multiple-getopt-collisions.po
new file mode 100644
index 0000000000..14b876a2a3
--- /dev/null
+++ b/posix/standalone-multiple-getopt-collisions.po
@@ -0,0 +1,45 @@ 
+# French translations for the getopt static checker
+# Copyright (C) 2026 THE GNU C Library'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the GNU C Library.
+#
+# This has two errors:
+# 1. "toto" is used both as a translation of "foo" and "bar";
+# 2. "bar" is used as a translation of "pub", but it is another option.
+msgid ""
+msgstr ""
+"Project-Id-Version: GNU C Library (see version.h)\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-06-06 22:37+0200\n"
+"PO-Revision-Date: 2025-06-06 22:38+0200\n"
+"Language-Team: French <traduc@traduc.org>\n"
+"Language: fr\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ASCII\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+
+# This is not an option name, so it’s OK for it to clash with option
+# names.
+msgctxt "fish"
+msgid "bass"
+msgstr "bar"
+
+# This is the --foo option.
+msgctxt "command-line option"
+msgid "foo"
+msgstr "toto"
+
+# This is the --bar option.  Oops, I translated with toto here too.
+msgctxt "command-line option"
+msgid "bar"
+msgstr "toto"
+
+# Let’s go to the --pub!
+msgctxt "command-line option"
+msgid "pub"
+msgstr "bar"
+
+# Wait, it’s OK if baz is translated to baz though.
+msgctxt "command-line option"
+msgid "baz"
+msgstr "baz"
diff --git a/posix/tst-check-getopt-translations.sh b/posix/tst-check-getopt-translations.sh
new file mode 100644
index 0000000000..038fa3eafa
--- /dev/null
+++ b/posix/tst-check-getopt-translations.sh
@@ -0,0 +1,61 @@ 
+#!/bin/sh
+# Test for check-getopt-translations.
+# Copyright (C) 2026 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+set -e
+
+check_getopt_translations_program=$1; shift
+po_file=$1; shift
+logfile=$1; shift
+
+rm -f $logfile
+result=0
+expected_output="\
+Translation toto is used for more than one option:
+  - bar
+  - foo
+bar is a translation of pub, but it is also a different option.
+There were 2 failures."
+
+if output=$(${check_getopt_translations_program} "command-line option" ${po_file} 2>&1) ; then
+    echo "the errors were not caught." >> $logfile
+    echo "*** check-getopt-translations FAILED" >> $logfile
+    result=1
+fi
+
+if test "$output" != "$expected_output"; then
+    echo "Expected:" >> $logfile
+    echo "$expected_output" >> $logfile
+    echo "Actual:" >> $logfile
+    echo "$output" >> $logfile
+    echo "*** check-getopt-translations FAILED" >> $logfile
+    result=1
+fi
+
+echo "*** check-getopt-translations PASSED" >> $logfile
+
+exit $result
+
+# Preserve executable bits for this shell script.
+Local Variables:
+eval:(defun frobme () (set-file-modes buffer-file-name file-mode))
+eval:(make-local-variable 'file-mode)
+eval:(setq file-mode (file-modes (buffer-file-name)))
+eval:(make-local-variable 'after-save-hook)
+eval:(add-hook 'after-save-hook 'frobme)
+End: