@@ -25,6 +25,9 @@ Major new features:
* Argp parsers enable translated long option names with "command-line
option" as the message context.
+* The new installed script check-getopt-translations parses PO files to
+ check for collisions between long option names and translations.
+
Deprecated and removed features, and other changes affecting compatibility:
* Although malloc and related functions currently return pointers
@@ -394,6 +394,19 @@ not match a long option (or its abbreviation).
@end deftypefun
+It is possible for the programmer to introduce a new option name that
+conflicts with the translation of an existing option name. Such a
+case would disrupt the workflow of users as the new option would
+replace the existing option. Before adding a new option to a program,
+the developer should check for collisions with all known translations.
+This can be done with the installed
+@command{check-getopt-translations} script, by calling for each PO
+file in the project:
+
+@smallexample
+check-getopt-translations "context used for translations" @file{file.po}
+@end smallexample
+
@node Getopt Long Option Example
@subsection Example of Parsing Long Options with @code{getopt_long}
@@ -601,9 +601,10 @@ verified to work to build @theglibc{}.
Perl 5
Perl is not required, but if present it is used in some tests and the
-@code{mtrace} program, to build the @glibcadj{} manual. As of release
-time @code{perl} version 5.42.0 is the newest verified to work to
-build @theglibc{}.
+@code{mtrace} program, to build the @glibcadj{} manual. It is also
+used for the @code{check-getopt-translations} installed script. As of
+release time @code{perl} version 5.42.0 is the newest verified to work
+to build @theglibc{}.
@item
GNU @code{sed} 3.02 or newer
@@ -379,7 +379,7 @@ xtests-time64 := \
ifeq (yes,$(build-shared))
test-srcs := \
- globtest
+ globtest \
# tests-src
tests += \
tst-exec \
@@ -390,6 +390,11 @@ tests += \
# tests
endif
+ifneq ($(PERL),no)
+test-srcs += \
+ tst-check-getopt-translations
+endif
+
ifeq (yesyes,$(build-shared)$(have-thread-library))
tests += \
tst-_Fork \
@@ -419,6 +424,9 @@ install-others-programs := \
$(inst_libexecdir)/getconf \
# install-others-programs
+install-bin-script = check-getopt-translations
+generated += check-getopt-translations
+
before-compile += \
$(objpfx)posix-conf-vars-def.h \
# before-compile
@@ -431,6 +439,7 @@ generated += \
getconf.speclist \
ptestcases.h \
testcases.h \
+ tst-check-getopt-translations.out \
tst-getconf.out \
wordexp-tst.out \
# generated
@@ -509,6 +518,11 @@ endif
endif
endif
+ifneq ($(PERL),no)
+tests-special += \
+ $(objpfx)tst-check-getopt-translations.out
+endif
+
include ../Rules
ifeq ($(run-built-tests),yes)
@@ -826,3 +840,15 @@ $(tst_getopt_long_collision_mo): tst-getopt_long_collision.po
$(objpfx)tst-getopt_long_collision.out: $(tst_getopt_long_collision_mo) $(gen-locales)
CFLAGS-tst-getopt_long_collision.c += -DOBJPFX=\"$(objpfx)\"
+
+$(objpfx)check-getopt-translations: check-getopt-translations.pl
+ rm -f $@.new
+ sed -e 's|@XXX@|$(address-width)|' \
+ -e 's|@VERSION@|$(version)|' \
+ -e 's|@PKGVERSION@|$(PKGVERSION)|' \
+ -e 's|@REPORT_BUGS_TO@|$(REPORT_BUGS_TO)|' $^ > $@.new \
+ && rm -f $@ && mv $@.new $@ && chmod +x $@
+
+$(objpfx)tst-check-getopt-translations.out: tst-check-getopt-translations.sh $(objpfx)check-getopt-translations standalone-multiple-getopt-collisions.po
+ $(SHELL) $^ $(common-objpfx)posix/tst-check-getopt-translations.out
+ $(evaluate-test)
new file mode 100644
@@ -0,0 +1,195 @@
+#! /usr/bin/perl
+
+# Copyright (C) 2026 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+use strict;
+use warnings;
+use Data::Dumper;
+
+my $VERSION = "@VERSION@";
+
+my $PKGVERSION = "@PKGVERSION@";
+my $REPORT_BUGS_TO = '@REPORT_BUGS_TO@';
+my $progname = $_;
+
+sub usage {
+ print "Usage: getopt-check [OPTION]... msgctxt lang.po\n";
+ print " --help print this help, then exit\n";
+ print " --version print version number, then exit\n";
+ print "\n";
+ print "For bug reporting instructions, please see:\n";
+ print "$REPORT_BUGS_TO.\n";
+ exit 0;
+}
+
+sub fatal {
+ print STDERR "$_[0]\n";
+ exit 1;
+}
+
+# This script takes two positional arguments: the context for
+# translated option names, and the PO file to check. Then, the PO
+# file is parsed, looking at three things:
+# 1. The msgctxt: it must be equal to the first positional argument, msgctxt;
+# 2. The msgid;
+# 3. The space-separated list msgstr.
+#
+# We are looking for two different problems:
+#
+# 1. Every translation element, current or obsolete, must be unique
+# across all option names.
+# 2. For every option name, for every translation, current or
+# deprecated, if it doesn’t match the untranslated name, then it
+# should not match any other untranslated option names.
+#
+# If we detect an example of the first case, it is a problem with the
+# translator only. They have to remove one use of the word,
+# preferably one that is deprecated.
+#
+# If we detect an example of the second case, then it is a problem
+# with the developer: they want to introduce an option name that is
+# already used for something else by users of this native language! If
+# nothing is done, these users will be surprised that the same word
+# now means another option, as the untranslated options have
+# precedence over the translations. If the translated name is already
+# deprecated, then the language team may agree to completely remove
+# it. Otherwise, it may be better to find a new untranslated name.
+
+ arglist: while (@ARGV) {
+ if ($ARGV[0] eq "--v" || $ARGV[0] eq "--ve" || $ARGV[0] eq "--ver" ||
+ $ARGV[0] eq "--vers" || $ARGV[0] eq "--versi" ||
+ $ARGV[0] eq "--versio" || $ARGV[0] eq "--version") {
+ print "getopt-check $PKGVERSION$VERSION\n";
+ print "Copyright (C) 2026 Free Software Foundation, Inc.\n";
+ print "This is free software; see the source for copying conditions. There is NO\n";
+ print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
+ print "Written by Vivien Kraus <vivien\@planete-kraus.eu>\n";
+
+ exit 0;
+ } elsif ($ARGV[0] eq "--h" || $ARGV[0] eq "--he" || $ARGV[0] eq "--hel" ||
+ $ARGV[0] eq "--help") {
+ &usage;
+ } elsif ($ARGV[0] =~ /^-/) {
+ print "$progname: unrecognized option `$ARGV[0]'\n";
+ print "Try `$progname --help' for more information.\n";
+ exit 1;
+ } else {
+ last arglist;
+ }
+}
+
+if ($#ARGV != 1) {
+ fatal "You must provide two arguments: the msgctxt for option names, and the name of the PO file.";
+}
+
+my $relevant_msgctxt = $ARGV[0];
+my $pofilename = $ARGV[1];
+my %translations;
+
+# %translation_used will be populated to detect multiple use of a
+# %translation directly when we parse.
+
+my $entry_msgid;
+
+# The ad-hoc PO file parser has 3 states:
+# 1. Waiting for msgctxt;
+# 2. Waiting for msgid;
+# 3. Waiting for msgstr.
+#
+# At the start, the state is 1. Then, if we find "msgctxt
+# \"$relevant_msgctxt\"" in a single line, we jump to 2. Otherwise,
+# if this is the end of the file, stop parsing. Otherwise, whatever
+# the line, stay in 1. This includes: the empty line, meaning we are
+# considering a new entry; or a comment, a #: location, or another
+# relevant line.
+#
+# When we are in state 2., we are waiting for the msgid (untranslated
+# option name). If we find an empty line, we jump back to 1. If we
+# find a line starting with "msgid \"" and ending with a double quote,
+# we store what is in the middle in $entry_msgid and jump to 3.
+# Otherwise, we stay in state 2.
+#
+# When we are in state 3., we are waiting for msgstr. If we find an
+# empty line, drop $entry_msgid, and back to 1. If the line starts
+# with "msgstr \"", we add a record to %translations: the key is
+# $entry_msgid, and the value, what is between the detected prefix and
+# the end quote. Then, back to state 1.
+
+my $parser_state = 1;
+
+open (my $pofile, "<", $pofilename) || fatal "PO file name ${pofilename} cannot be read.";
+
+while (my $line = <$pofile>) {
+ chomp $line;
+ if ($parser_state == 1 && $line =~ /^msgctxt\s*"${relevant_msgctxt}"$/) {
+ $parser_state = 2;
+ } elsif ($parser_state == 2 && $line eq "") {
+ $parser_state = 1;
+ } elsif ($parser_state == 2 && $line =~ /^msgid\s*"([^"]+)"$/) {
+ $parser_state = 3;
+ $entry_msgid = $1;
+ } elsif ($parser_state == 3 && $line eq "") {
+ $parser_state = 1;
+ } elsif ($parser_state == 3 && $line =~ /^msgstr\s*"([^"]*)"$/) {
+ $translations{$entry_msgid} = $1;
+ $parser_state = 1;
+ }
+}
+
+my $number_of_errors = 0;
+
+# Verify that every option name is unique.
+my %untranslated_name;
+for my $option_name (sort(keys %translations)) {
+ my $translation = $translations{$option_name};
+ my @existing;
+ if (exists $untranslated_name{$translation}) {
+ @existing = @{$untranslated_name{$translation}};
+ }
+ push(@existing, $option_name);
+ $untranslated_name{$translation} = \@existing;
+}
+for my $translation (sort(keys %untranslated_name)) {
+ my $names = $untranslated_name{$translation};
+ if (@{$names} > 1) {
+ print STDERR "Translation ${translation} is used for more than one option:\n";
+ for my $untranslated (@{$names}) {
+ print STDERR " - ${untranslated}\n";
+ }
+ ++$number_of_errors;
+ }
+}
+
+# Verify that every option translation does not match any other
+# untranslated name.
+for my $option_name (sort(keys %translations)) {
+ for my $other_option_name (sort(keys %translations)) {
+ if ($option_name ne $other_option_name) {
+ if ($translations{$option_name} eq $other_option_name) {
+ print STDERR "${translations{$option_name}} is a translation of ${option_name}, but it is also a different option.\n";
+ ++$number_of_errors;
+ }
+ }
+ }
+}
+
+if ($number_of_errors eq 0) {
+ exit 0
+}
+print STDERR "There were ${number_of_errors} failures.\n";
+exit 1
new file mode 100644
@@ -0,0 +1,45 @@
+# French translations for the getopt static checker
+# Copyright (C) 2026 THE GNU C Library'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the GNU C Library.
+#
+# This has two errors:
+# 1. "toto" is used both as a translation of "foo" and "bar";
+# 2. "bar" is used as a translation of "pub", but it is another option.
+msgid ""
+msgstr ""
+"Project-Id-Version: GNU C Library (see version.h)\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2025-06-06 22:37+0200\n"
+"PO-Revision-Date: 2025-06-06 22:38+0200\n"
+"Language-Team: French <traduc@traduc.org>\n"
+"Language: fr\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ASCII\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+
+# This is not an option name, so it’s OK for it to clash with option
+# names.
+msgctxt "fish"
+msgid "bass"
+msgstr "bar"
+
+# This is the --foo option.
+msgctxt "command-line option"
+msgid "foo"
+msgstr "toto"
+
+# This is the --bar option. Oops, I translated with toto here too.
+msgctxt "command-line option"
+msgid "bar"
+msgstr "toto"
+
+# Let’s go to the --pub!
+msgctxt "command-line option"
+msgid "pub"
+msgstr "bar"
+
+# Wait, it’s OK if baz is translated to baz though.
+msgctxt "command-line option"
+msgid "baz"
+msgstr "baz"
new file mode 100644
@@ -0,0 +1,61 @@
+#!/bin/sh
+# Test for check-getopt-translations.
+# Copyright (C) 2026 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+set -e
+
+check_getopt_translations_program=$1; shift
+po_file=$1; shift
+logfile=$1; shift
+
+rm -f $logfile
+result=0
+expected_output="\
+Translation toto is used for more than one option:
+ - bar
+ - foo
+bar is a translation of pub, but it is also a different option.
+There were 2 failures."
+
+if output=$(${check_getopt_translations_program} "command-line option" ${po_file} 2>&1) ; then
+ echo "the errors were not caught." >> $logfile
+ echo "*** check-getopt-translations FAILED" >> $logfile
+ result=1
+fi
+
+if test "$output" != "$expected_output"; then
+ echo "Expected:" >> $logfile
+ echo "$expected_output" >> $logfile
+ echo "Actual:" >> $logfile
+ echo "$output" >> $logfile
+ echo "*** check-getopt-translations FAILED" >> $logfile
+ result=1
+fi
+
+echo "*** check-getopt-translations PASSED" >> $logfile
+
+exit $result
+
+# Preserve executable bits for this shell script.
+Local Variables:
+eval:(defun frobme () (set-file-modes buffer-file-name file-mode))
+eval:(make-local-variable 'file-mode)
+eval:(setq file-mode (file-modes (buffer-file-name)))
+eval:(make-local-variable 'after-save-hook)
+eval:(add-hook 'after-save-hook 'frobme)
+End: