localedata: en_NL: new English in the Netherlands locale [BZ #14085]

Message ID 1461298610-19221-1-git-send-email-vapier@gentoo.org
State Rejected
Delegated to: Mike Frysinger
Headers

Commit Message

Mike Frysinger April 22, 2016, 4:16 a.m. UTC
  From: Pander Musubi <pander@users.sourceforge.net>

Pander: Please take a close look as I've made significant changes.
---
 localedata/SUPPORTED     |   1 +
 localedata/locales/en_NL | 163 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+)
 create mode 100644 localedata/locales/en_NL
  

Comments

Chris Leonard April 22, 2016, 3:18 p.m. UTC | #1
+1

cjl

On Fri, Apr 22, 2016 at 12:16 AM, Mike Frysinger <vapier@gentoo.org> wrote:
> From: Pander Musubi <pander@users.sourceforge.net>
>
> Pander: Please take a close look as I've made significant changes.
> ---
>  localedata/SUPPORTED     |   1 +
>  localedata/locales/en_NL | 163 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 164 insertions(+)
>  create mode 100644 localedata/locales/en_NL
>
> diff --git a/localedata/SUPPORTED b/localedata/SUPPORTED
> index 732d28b..bf60757 100644
> --- a/localedata/SUPPORTED
> +++ b/localedata/SUPPORTED
> @@ -139,6 +139,7 @@ en_IE@euro/ISO-8859-15 \
>  en_IL/UTF-8 \
>  en_IN/UTF-8 \
>  en_NG/UTF-8 \
> +en_NL/UTF-8 \
>  en_NZ.UTF-8/UTF-8 \
>  en_NZ/ISO-8859-1 \
>  en_PH.UTF-8/UTF-8 \
> diff --git a/localedata/locales/en_NL b/localedata/locales/en_NL
> new file mode 100644
> index 0000000..5707ca0
> --- /dev/null
> +++ b/localedata/locales/en_NL
> @@ -0,0 +1,163 @@
> +comment_char %
> +escape_char /
> +
> +% This file is part of the GNU C Library and contains locale data.
> +% The Free Software Foundation does not claim any copyright interest
> +% in the locale data contained in this file.  The foregoing does not
> +% affect the license of the GNU C Library as a whole.  It does not
> +% exempt you from the conditions of the license if your use would
> +% otherwise be governed by that license.
> +
> +% English language locale for the Netherlands.
> +% Internationally oriented users who are physically located in the Netherlands
> +% use software mainly in the English language.  Therefore they have their
> +% systems usually configured to US English International.  However, due to the
> +% geographic location, it can be desirable for certain data to be represented
> +% according to the local Dutch notation while the rest remains in English.
> +
> +LC_IDENTIFICATION
> +title      "English locale for the Netherlands"
> +source     ""
> +address    "http:////www.gnu.org//software//libc//"
> +contact    ""
> +email      "bug-glibc-locales@gnu.org"
> +tel        ""
> +fax        ""
> +language   "English"
> +territory  "Netherlands"
> +revision   "1.7"
> +date       "2015-11-02"
> +
> +category "i18n:2012";LC_IDENTIFICATION
> +category "i18n:2012";LC_CTYPE
> +category "i18n:2012";LC_COLLATE
> +category "i18n:2012";LC_TIME
> +category "i18n:2012";LC_NUMERIC
> +category "i18n:2012";LC_MONETARY
> +category "i18n:2012";LC_MESSAGES
> +category "i18n:2012";LC_PAPER
> +category "i18n:2012";LC_NAME
> +category "i18n:2012";LC_ADDRESS
> +category "i18n:2012";LC_TELEPHONE
> +category "i18n:2012";LC_MEASUREMENT
> +END LC_IDENTIFICATION
> +
> +LC_CTYPE
> +copy "en_GB"
> +END LC_CTYPE
> +
> +LC_COLLATE
> +copy "en_GB"
> +END LC_COLLATE
> +
> +LC_MONETARY
> +int_curr_symbol     "<U0045><U0055><U0052><U0020>"
> +currency_symbol     "<U20AC>"
> +% Use English because of international language and unambiguous processing.
> +mon_decimal_point   "<U002E>"
> +mon_thousands_sep   "<U002C>"
> +mon_grouping        3;3
> +positive_sign       ""
> +negative_sign       "<U002D>"
> +int_frac_digits     2
> +frac_digits         2
> +p_cs_precedes       1
> +int_p_sep_by_space  1
> +p_sep_by_space      0
> +n_cs_precedes       1
> +int_n_sep_by_space  1
> +n_sep_by_space      0
> +p_sign_posn         1
> +n_sign_posn         1
> +END LC_MONETARY
> +
> +LC_NUMERIC
> +% Use English because of international language and unambiguous processing.
> +decimal_point "<U002E>"
> +thousands_sep "<U002C>"
> +grouping      3;3
> +END LC_NUMERIC
> +
> +LC_TIME
> +abday   "<U0053><U0075><U006E>";"<U004D><U006F><U006E>";/
> +        "<U0054><U0075><U0065>";"<U0057><U0065><U0064>";/
> +        "<U0054><U0068><U0075>";"<U0046><U0072><U0069>";/
> +        "<U0053><U0061><U0074>"
> +day     "<U0053><U0075><U006E><U0064><U0061><U0079>";/
> +        "<U004D><U006F><U006E><U0064><U0061><U0079>";/
> +        "<U0054><U0075><U0065><U0073><U0064><U0061><U0079>";/
> +        "<U0057><U0065><U0064><U006E><U0065><U0073><U0064><U0061><U0079>";/
> +        "<U0054><U0068><U0075><U0072><U0073><U0064><U0061><U0079>";/
> +        "<U0046><U0072><U0069><U0064><U0061><U0079>";/
> +        "<U0053><U0061><U0074><U0075><U0072><U0064><U0061><U0079>"
> +abmon   "<U004A><U0061><U006E>";"<U0046><U0065><U0062>";/
> +        "<U004D><U0061><U0072>";"<U0041><U0070><U0072>";/
> +        "<U004D><U0061><U0079>";"<U004A><U0075><U006E>";/
> +        "<U004A><U0075><U006C>";"<U0041><U0075><U0067>";/
> +        "<U0053><U0065><U0070>";"<U004F><U0063><U0074>";/
> +        "<U004E><U006F><U0076>";"<U0044><U0065><U0063>"
> +mon     "<U004A><U0061><U006E><U0075><U0061><U0072><U0079>";/
> +        "<U0046><U0065><U0062><U0072><U0075><U0061><U0072><U0079>";/
> +        "<U004D><U0061><U0072><U0063><U0068>";/
> +        "<U0041><U0070><U0072><U0069><U006C>";/
> +        "<U004D><U0061><U0079>";/
> +        "<U004A><U0075><U006E><U0065>";/
> +        "<U004A><U0075><U006C><U0079>";/
> +        "<U0041><U0075><U0067><U0075><U0073><U0074>";/
> +        "<U0053><U0065><U0070><U0074><U0065><U006D><U0062><U0065><U0072>";/
> +        "<U004F><U0063><U0074><U006F><U0062><U0065><U0072>";/
> +        "<U004E><U006F><U0076><U0065><U006D><U0062><U0065><U0072>";/
> +        "<U0044><U0065><U0063><U0065><U006D><U0062><U0065><U0072>"
> +
> +% Use mix of English because of international language and Dutch 24-hour clocks.
> +d_t_fmt "<U0025><U0061><U0020><U0025><U0064><U0020><U0025><U0062><U0020><U0025>/
> +<U0059><U0020><U0025><U0052><U0020><U0025><U005A>"
> +% Use English because of international language, this is also sortable.
> +d_fmt   "<U0025><U0059><U002D><U0025><U006D><U002D><U0025><U0064>"
> +% Use mix of English because of international language and Dutch 24-hour clocks.
> +t_fmt   "<U0025><U0052>"
> +t_fmt_ampm "<U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"
> +am_pm  "";""
> +week 7;19971130;4
> +first_weekday 2
> +END LC_TIME
> +
> +LC_MESSAGES
> +copy "en_US"
> +END LC_MESSAGES
> +
> +LC_PAPER
> +copy "nl_NL"
> +END LC_PAPER
> +
> +LC_NAME
> +copy "en_US"
> +END LC_NAME
> +
> +LC_ADDRESS
> +% use Dutch format because of local infrastructure.
> +postal_fmt    "<U0025><U0066><U0025><U004E><U0025><U0061><U0025><U004E>/
> +<U0025><U0064><U0025><U004E><U0025><U0062><U0025><U004E><U0025><U0073>/
> +<U0020><U0025><U0068><U0020><U0025><U0065><U0020><U0025><U0072><U0025>/
> +<U004E><U0025><U007A><U0020><U0025><U0054><U0025>/
> +<U004E><U0025><U0063><U0025><U004E>"
> +country_name "<U004E><U0065><U0074><U0068><U0065><U0072><U006C><U0061>/
> +<U006E><U0064><U0073>"
> +country_post "<U004E><U004C>"
> +country_ab2  "<U004E><U004C>"
> +country_ab3  "<U004E><U004C><U0044>"
> +country_num  528
> +country_car  "<U004E><U004C>"
> +lang_name    "<U0045><U006E><U0067><U006C><U0069><U0073><U0068>"
> +lang_ab      "<U0065><U006E>"
> +lang_term    "<U0065><U006E><U0067>"
> +lang_lib     "<U0065><U006E><U0067>"
> +END LC_ADDRESS
> +
> +LC_TELEPHONE
> +copy "nl_NL"
> +END LC_TELEPHONE
> +
> +LC_MEASUREMENT
> +copy "nl_NL"
> +END LC_MEASUREMENT
> --
> 2.7.4
>
  
Florian Weimer April 22, 2016, 4:10 p.m. UTC | #2
* Mike Frysinger:

> +% English language locale for the Netherlands.
> +% Internationally oriented users who are physically located in the Netherlands
> +% use software mainly in the English language.  Therefore they have their
> +% systems usually configured to US English International.  However, due to the
> +% geographic location, it can be desirable for certain data to be represented
> +% according to the local Dutch notation while the rest remains in English.

Why is this necessary?  Isn't this use case the reason for having
separate LC_* environment variables, so that you can mix-and-match
locales like this?  In other words, glibc doesn't need to provide a
pre-cooked locale.
  
Mike Frysinger April 22, 2016, 7:07 p.m. UTC | #3
On 22 Apr 2016 18:10, Florian Weimer wrote:
> * Mike Frysinger:
> > +% English language locale for the Netherlands.
> > +% Internationally oriented users who are physically located in the Netherlands
> > +% use software mainly in the English language.  Therefore they have their
> > +% systems usually configured to US English International.  However, due to the
> > +% geographic location, it can be desirable for certain data to be represented
> > +% according to the local Dutch notation while the rest remains in English.
> 
> Why is this necessary?  Isn't this use case the reason for having
> separate LC_* environment variables, so that you can mix-and-match
> locales like this?  In other words, glibc doesn't need to provide a
> pre-cooked locale.

for the majority of categories, you are certainly correct.  however,
users run into trouble when dealing with categories that commingle
language and territory details.  i highlighted this in the localedef
copy thread, but let's look at just this locale for specifics.

first, these categories can be wholly sourced from elsewhere and are
uninteresting to us for this new locale as they can be handled via
env vars as you described.
 - lang specific categories
  LC_CTYPE       = en_GB
  LC_COLLATE     = en_GB
  LC_MESSAGES    = en_GB
  LC_NAME        = en_GB
 - territory specific categories
  LC_NUMERIC     = en_GB
  LC_PAPER       = nl_NL
  LC_TELEPHONE   = nl_NL
  LC_MEASUREMENT = nl_NL

which leaves us with the ones that are actually defined in this locale:
  LC_TIME
   - day/month fields are def language specific.
   - all the other fields are largely territory specific (the way date &
     time are displayed locally).
  LC_ADDRESS
   - country_name & lang_* are def language specific.
   - all the other fields are territory specific.
  LC_IDENTIFICATION
   - clearly includes both lang & territory details, but not nearly as
     important as the categories above.  could just be lived with.

this one can be a bit murky, so i guess
  LC_MONETARY
   - currency fields are def territory related.
   - the others are semi-lang dependent/personal preference (digit spacing
     and such), but there's no way to customize on a sub-category basis.

so the question before us is how do we want to proceed ?  telling users
"that sucks but that's just how it goes" doesn't seems like the right path
to me long term.  exploding combinations of lang/territories also sucks,
but it's the only way today to accomplish this.

maybe we could spec out a new format for the env vars that'd allow people
to mix & match lang & territory themselves ?  POSIX leaves the format of
locale names up to the implementation after all, as well as the output of
the localedef tool.  we could do something like:
	LANG=[lang]:[territory]
	LANG=en_US:nl_NL
and we'd take care of filling in lang fields using en_US and territory
fields using nl_NL.  this would go beyond just category selection since
as i described above.
-mike
  
Chris Leonard April 22, 2016, 7:12 p.m. UTC | #4
Isn't ease of use ever a consideration here?

Are all decisions, even those not code-driven, but user-driven always
decided on a "maximum parsimony" basis?

Can you describe the method a novice user of Linux would employ to mix
and match separate LC_* environment variables?

cjl

On Fri, Apr 22, 2016 at 12:10 PM, Florian Weimer <fw@deneb.enyo.de> wrote:
> * Mike Frysinger:
>
>> +% English language locale for the Netherlands.
>> +% Internationally oriented users who are physically located in the Netherlands
>> +% use software mainly in the English language.  Therefore they have their
>> +% systems usually configured to US English International.  However, due to the
>> +% geographic location, it can be desirable for certain data to be represented
>> +% according to the local Dutch notation while the rest remains in English.
>
> Why is this necessary?  Isn't this use case the reason for having
> separate LC_* environment variables, so that you can mix-and-match
> locales like this?  In other words, glibc doesn't need to provide a
> pre-cooked locale.
  
Pander April 22, 2016, 8:27 p.m. UTC | #5
Hi all,

The rational behind this is the following. Most developers in the Netherlands use the English language while working with Linux et al. but need the local settings for date, hour, paper size, currency, etc. The mixing for this takes place within the LC sections. Compared to Germany or France, in the Netherlands even comments in code are usually/more in English.

Custom mixing is not that easy for the average+advanced user and impossible to get this particular setup. This local will be widely used once available. Users are, at the moment, stuck with either Dutch, US or Irish locale that don't offer what they need. In the Dutch industry, English is often the defacto language.

Adding this locale will not result in an explosion of additional locales, as no other languages than English are linga franca. There is also Danmark English one, I believe.  If you could measure the eventual usage, this locale will be very in much in demand and usage in the Netherlands.

Best,

Pander 

On 22 April 2016 18:10:34 CEST, Florian Weimer <fw@deneb.enyo.de> wrote:
>* Mike Frysinger:
>
>> +% English language locale for the Netherlands.
>> +% Internationally oriented users who are physically located in the
>Netherlands
>> +% use software mainly in the English language.  Therefore they have
>their
>> +% systems usually configured to US English International.  However,
>due to the
>> +% geographic location, it can be desirable for certain data to be
>represented
>> +% according to the local Dutch notation while the rest remains in
>English.
>
>Why is this necessary?  Isn't this use case the reason for having
>separate LC_* environment variables, so that you can mix-and-match
>locales like this?  In other words, glibc doesn't need to provide a
>pre-cooked locale.
  
Mike Frysinger April 22, 2016, 9:23 p.m. UTC | #6
On 22 Apr 2016 22:27, Pander wrote:
> The rational behind this is the following. Most developers in the Netherlands use the English language while working with Linux et al. but need the local settings for date, hour, paper size, currency, etc. The mixing for this takes place within the LC sections. Compared to Germany or France, in the Netherlands even comments in code are usually/more in English.
> 
> Custom mixing is not that easy for the average+advanced user and impossible to get this particular setup. This local will be widely used once available. Users are, at the moment, stuck with either Dutch, US or Irish locale that don't offer what they need. In the Dutch industry, English is often the defacto language.

Florian's point is that for most categories, you can (mostly) get this today:
	export LANG=nl_NL.UTF8
	export LC_MESSAGES=en_US.UTF8
	export LC_NAME=en_US.UTF8

while it's true that locale settings are a bit obscure to many users, that
doesn't mean adding a locale so people can just `export LANG=en_NL` is the
right answer.  we have a documented system for controlling the locale (and
this aligns with POSIX), and we should be using it.

i would contend that for the average user, they'll be using a DE that has
a GUI where there is a settings panel for them to easily select how they
want the time/date/currency/etc... to be displayed.

> Adding this locale will not result in an explosion of additional locales, as no other languages than English are linga franca. There is also Danmark English one, I believe.  If you could measure the eventual usage, this locale will be very in much in demand and usage in the Netherlands.

with ~250 territories defined today, we're talking about ~250 for English
alone.  that is kind of an explosion considering we have ~330 locales now
in glibc, and only ~20 of them are for English.

note: i'm mostly playing devils advocate here.  as you can see in my other
posting, i don't think the current situation is great for either side.
-mike
  
Florian Weimer April 23, 2016, 11:15 a.m. UTC | #7
* Mike Frysinger:

> +% Use English because of international language, this is also sortable.
> +d_fmt   "<U0025><U0059><U002D><U0025><U006D><U002D><U0025><U0064>"

The comment appears to be wrong.  This is an ISO 8601 date string.

> +% Use mix of English because of international language and Dutch
> 24-hour clocks.
> +t_fmt   "<U0025><U0052>"

Usually, t_fmt includes seconds, but %R does not.

> +t_fmt_ampm "<U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"

That's equivalent to %T, I think.  I don't see how the these
definitions (this one and t_fmt) align with the comment.
  
Florian Weimer April 23, 2016, 12:01 p.m. UTC | #8
* Mike Frysinger:

> which leaves us with the ones that are actually defined in this locale:
>   LC_TIME
>    - day/month fields are def language specific.
>    - all the other fields are largely territory specific (the way date &
>      time are displayed locally).

But you use ISO 8601, not the local conventions.  I'm not saying this
is wrong, my hunch is that most en_NL users could live with that (it
would definitely work for me with a hypothetical en_DE locale).

We already have a request for ISO 8601 variants of locales.  It is not
clear if this affects date_fmt or just d_fmt (we probably have
requests for both).  I think adding these variants would make more
sense, and then users can pick that locale along with nl_NL.

>   LC_ADDRESS
>    - country_name & lang_* are def language specific.
>    - all the other fields are territory specific.

geocode/geoclue uses _NL_ADDRESS_POSTAL_FMT to determine the ordering
of street name and street number.  evolution-data-server uses
_NL_ADDRESS_COUNTRY_AB2.  I can't find any other users.

So in practice, it should be fine to use nl_NL here.  Unless there are
other ways to access this information besides _NL_ADDRESS_.* and
nl_langinfo_l.*(POSTAL|COUNTRY|LANG).

(Without any users, we can't justify a decision which language to use
for the translatable fields.  Users may prefer either variant.)

>   LC_IDENTIFICATION
>    - clearly includes both lang & territory details, but not nearly as
>      important as the categories above.  could just be lived with.

_NL_IDENTIFICATION_TERRITORY is used by LightDM, but it assumes that
it is in English (which matches what glibc currently does for nl_NL,
fr_FR and de_DE at least).

> this one can be a bit murky, so i guess
>   LC_MONETARY
>    - currency fields are def territory related.
>    - the others are semi-lang dependent/personal preference (digit spacing
>      and such), but there's no way to customize on a sub-category basis.

Hmm.  I think the main obstacle here is that we want the numbering
format to match LC_NUMERIC, and you assume that users want en_GB
there.  This is probably the right choice.

So the question is: What programs actually rely on LC_MONETARY?

If there is a body of software which uses LC_MONETARY, we need a
compelling solution for this aspect of the problem.  As far as I can
see, it is the only one which is really not addressable with LC_*
settings.

> so the question before us is how do we want to proceed ?  telling users
> "that sucks but that's just how it goes" doesn't seems like the right path
> to me long term.  exploding combinations of lang/territories also sucks,
> but it's the only way today to accomplish this.

Agreed.

> maybe we could spec out a new format for the env vars that'd allow people
> to mix & match lang & territory themselves ?  POSIX leaves the format of
> locale names up to the implementation after all, as well as the output of
> the localedef tool.  we could do something like:
> 	LANG=[lang]:[territory]
> 	LANG=en_US:nl_NL
> and we'd take care of filling in lang fields using en_US and territory
> fields using nl_NL.  this would go beyond just category selection since
> as i described above.

Do we need anything besides an override for the currency symbols in
LC_MONETARY?
  
Florian Weimer April 23, 2016, 12:01 p.m. UTC | #9
* Mike Frysinger:

> Florian's point is that for most categories, you can (mostly) get this today:
> 	export LANG=nl_NL.UTF8
> 	export LC_MESSAGES=en_US.UTF8
> 	export LC_NAME=en_US.UTF8

> i would contend that for the average user, they'll be using a DE that has
> a GUI where there is a settings panel for them to easily select how they
> want the time/date/currency/etc... to be displayed.

Right, and some versions of KDE have such separated locale selectors.

>> Adding this locale will not result in an explosion of additional
>> locales, as no other languages than English are linga franca. There
>> is also Danmark English one, I believe.  If you could measure the
>> eventual usage, this locale will be very in much in demand and usage
>> in the Netherlands.
>
> with ~250 territories defined today, we're talking about ~250 for English
> alone.  that is kind of an explosion considering we have ~330 locales now
> in glibc, and only ~20 of them are for English.

My worry is that for locales such as en_NL, there is no reference what
the “right” settings should be.  (See my other message.)
  
Florian Weimer April 23, 2016, 12:05 p.m. UTC | #10
* Chris Leonard:

> Isn't ease of use ever a consideration here?

Sure.

> Are all decisions, even those not code-driven, but user-driven always
> decided on a "maximum parsimony" basis?
>
> Can you describe the method a novice user of Linux would employ to mix
> and match separate LC_* environment variables?

Some desktop environments already have a per-category locale selector.
I doubt we can ever do without this because for this mixed locale
environments, there won't always be a clear preference, and we can't
tell users that they have to live with it because that's what their
local standards body has decided.

If desktop environments refuse to add the user interface for this, or
can't arrange for the installation or generation of the required
locales, this isn't really our fault.  If we aggressively increase the
number of locales, it is likely that only a subset of them make it to
the user due to installation size concerns.
  
Florian Weimer April 23, 2016, 12:14 p.m. UTC | #11
> I understand you are playing the devils advocate. Thanks for
> that. Suppose that for all territories English is added, that will, at
> a maximum, double the number of locales, only once.
>
> That is manageable. Computers nowadays can handle that amount with
> ease.

The current dominant criticism of glibc is related to installation
size (search for Docker and Alpine Linux if you aren't aware).  As
always, the situation is more complex than some of the reports
suggest, but adding a few dozen megabytes to the installation is
probably not such a good idea.

> At the moment, remixing LC sections is not supported by any of the
> main GNU/Linux installers or by command line configuration tools or by
> configuration tools for GNOME, KDE, etc.

Not quite true.  It was supported in KDE; it had a separate selector
for LC_MESSAGES at least.  The KDE people removed it at one point
because there was no need.  If they have not added it back yet, I
think this shows this feature is not actually needed, and neither are
English locale variants.

I don't want glibc to be the place where contested decisions by
desktop environments are worked around.  If the people actually
working on end user interfaces determine that mixed locale settings
are not needed, who are we to second-guess them?  They have many more
interactions with such users than we do.

> As long as no progress is made in usability in installers and
> configuration tools, I would see fit that en_NL is added. It can
> always be removed if more elegant alternatives become available.

If the locale has actual users, we can't remove it that easily.
  
Chris Leonard April 23, 2016, 1:17 p.m. UTC | #12
On Sat, Apr 23, 2016 at 8:05 AM, Florian Weimer <fw@deneb.enyo.de> wrote:

> If desktop environments refuse to add the user interface for this, or
> can't arrange for the installation or generation of the required
> locales, this isn't really our fault.  If we aggressively increase the
> number of locales, it is likely that only a subset of them make it to
> the user due to installation size concerns.

I do take the argument about size to heart. It is an important issue
on XO laptops that deploy Sugar.  We use an image-builder to customize
languages (locales, translations) included because we face such severe
hardware restrictions on storage.

I apologize if my question sounded exasperated, thank you for the
thoughtful response.

cjl
  
Pander April 23, 2016, 2:41 p.m. UTC | #13
On 23 April 2016 14:14:03 CEST, Florian Weimer <fw@deneb.enyo.de> wrote:
>> I understand you are playing the devils advocate. Thanks for
>> that. Suppose that for all territories English is added, that will,
>at
>> a maximum, double the number of locales, only once.
>>
>> That is manageable. Computers nowadays can handle that amount with
>> ease.
>
>The current dominant criticism of glibc is related to installation
>size (search for Docker and Alpine Linux if you aren't aware).  As
>always, the situation is more complex than some of the reports
>suggest, but adding a few dozen megabytes to the installation is
>probably not such a good idea.
>
>> At the moment, remixing LC sections is not supported by any of the
>> main GNU/Linux installers or by command line configuration tools or
>by
>> configuration tools for GNOME, KDE, etc.
>
>Not quite true.  It was supported in KDE; it had a separate selector
>for LC_MESSAGES at least.  The KDE people removed it at one point
>because there was no need.  If they have not added it back yet, I
>think this shows this feature is not actually needed, and neither are
>English locale variants.
>
>I don't want glibc to be the place where contested decisions by
>desktop environments are worked around.  If the people actually
>working on end user interfaces determine that mixed locale settings
>are not needed, who are we to second-guess them?  They have many more
>interactions with such users than we do.
>
>> As long as no progress is made in usability in installers and
>> configuration tools, I would see fit that en_NL is added. It can
>> always be removed if more elegant alternatives become available.
>
>If the locale has actual users, we can't remove it that easily.

That is exactly my point. Remixing on section level (with installer, config tools or scripts) will not offer the user what this specific en_NL locale offers. Hence, users that want it, will continue to use it and give this locale a raison d'etre.
  
Pander April 23, 2016, 2:46 p.m. UTC | #14
On 23 April 2016 14:01:46 CEST, Florian Weimer <fw@deneb.enyo.de> wrote:
>* Mike Frysinger:
>
>> Florian's point is that for most categories, you can (mostly) get
>this today:
>> 	export LANG=nl_NL.UTF8
>> 	export LC_MESSAGES=en_US.UTF8
>> 	export LC_NAME=en_US.UTF8
>
>> i would contend that for the average user, they'll be using a DE that
>has
>> a GUI where there is a settings panel for them to easily select how
>they
>> want the time/date/currency/etc... to be displayed.
>
>Right, and some versions of KDE have such separated locale selectors.
>
>>> Adding this locale will not result in an explosion of additional
>>> locales, as no other languages than English are linga franca. There
>>> is also Danmark English one, I believe.  If you could measure the
>>> eventual usage, this locale will be very in much in demand and usage
>>> in the Netherlands.
>>
>> with ~250 territories defined today, we're talking about ~250 for
>English
>> alone.  that is kind of an explosion considering we have ~330 locales
>now
>> in glibc, and only ~20 of them are for English.
>
>My worry is that for locales such as en_NL, there is no reference what
>the “right” settings should be.  (See my other message.)

For me, as a Dutch person and having a lot of experience with standards, I know this is the optimal and 'right' settings for en_NL. Do you know independent Dutch developers that can review it? I have been offering this via GitHub for many years with no requests for changes.
  
Pander April 23, 2016, 2:57 p.m. UTC | #15
Thanks for your very detailed dialogues. This is what indeed a basic format like this needs. I will add an exhaustive set of use cases for en_NL to GitHub, with comparisons to en_US and nl_NL, and let you know when it is there. I will also include all your remarks and have if reviewed by some Dutch experienced users and the Dutch Linux user group. Shall we postpone the process of including the locale until then?
  
Florian Weimer April 23, 2016, 3:03 p.m. UTC | #16
> Thanks for your very detailed dialogues. This is what indeed a basic
> format like this needs. I will add an exhaustive set of use cases for
> en_NL to GitHub, with comparisons to en_US and nl_NL, and let you know
> when it is there. I will also include all your remarks and have if
> reviewed by some Dutch experienced users and the Dutch Linux user
> group. Shall we postpone the process of including the locale until
> then?

I don't know.  I didn't see the Github URL posted to this thread.
  
Pander April 23, 2016, 7:51 p.m. UTC | #17
On 04/23/2016 05:03 PM, Florian Weimer wrote:
>> Thanks for your very detailed dialogues. This is what indeed a basic
>> format like this needs. I will add an exhaustive set of use cases for
>> en_NL to GitHub, with comparisons to en_US and nl_NL, and let you know
>> when it is there. I will also include all your remarks and have if
>> reviewed by some Dutch experienced users and the Dutch Linux user
>> group. Shall we postpone the process of including the locale until
>> then?
> 
> I don't know.  I didn't see the Github URL posted to this thread.
> 

Updated version is at https://github.com/PanderMusubi/locale-en-nl
  
Marko Myllynen April 25, 2016, 1:13 p.m. UTC | #18
Hi,

On 2016-04-22 23:27, Pander wrote:
> 
> The rational behind this is the following. Most developers in the
> Netherlands use the English language while working with Linux et al.
> but need the local settings for date, hour, paper size, currency,
> etc. The mixing for this takes place within the LC sections. Compared
> to Germany or France, in the Netherlands even comments in code are
> usually/more in English.

The situation you describe is in no way specific to the Netherlands and
in fact there have been similar requests for other en_* variants in the
past already:

https://sourceware.org/bugzilla/show_bug.cgi?id=3175

> Custom mixing is not that easy for the average+advanced user and
> impossible to get this particular setup. This local will be widely
> used once available. Users are, at the moment, stuck with either
> Dutch, US or Irish locale that don't offer what they need. In the
> Dutch industry, English is often the defacto language.

Again, the Netherlands is not any kind of exception here. If we start
adding en_* variants, will English be the only exception or are we
willing to add, say, fr_* variants as well? If not, why?

Accepting en_* variants but rejecting others could be seen as a certain
kind of statement by the project, I'm not sure would that be wanted.

> Adding this locale will not result in an explosion of additional
> locales, as no other languages than English are linga franca. There
> is also Danmark English one, I believe.

At least earlier en_DK has been considered a mistake which should not be
repeated (see e.g. the BZ link above.)

In general I think the right answer is not to add arbitrary locale
variants to glibc but to make it easier for users to customize their own
environment as their see fit (see e.g.
https://sourceware.org/bugzilla/show_bug.cgi?id=18408).

Thanks,
  

Patch

diff --git a/localedata/SUPPORTED b/localedata/SUPPORTED
index 732d28b..bf60757 100644
--- a/localedata/SUPPORTED
+++ b/localedata/SUPPORTED
@@ -139,6 +139,7 @@  en_IE@euro/ISO-8859-15 \
 en_IL/UTF-8 \
 en_IN/UTF-8 \
 en_NG/UTF-8 \
+en_NL/UTF-8 \
 en_NZ.UTF-8/UTF-8 \
 en_NZ/ISO-8859-1 \
 en_PH.UTF-8/UTF-8 \
diff --git a/localedata/locales/en_NL b/localedata/locales/en_NL
new file mode 100644
index 0000000..5707ca0
--- /dev/null
+++ b/localedata/locales/en_NL
@@ -0,0 +1,163 @@ 
+comment_char %
+escape_char /
+
+% This file is part of the GNU C Library and contains locale data.
+% The Free Software Foundation does not claim any copyright interest
+% in the locale data contained in this file.  The foregoing does not
+% affect the license of the GNU C Library as a whole.  It does not
+% exempt you from the conditions of the license if your use would
+% otherwise be governed by that license.
+
+% English language locale for the Netherlands.
+% Internationally oriented users who are physically located in the Netherlands
+% use software mainly in the English language.  Therefore they have their
+% systems usually configured to US English International.  However, due to the
+% geographic location, it can be desirable for certain data to be represented
+% according to the local Dutch notation while the rest remains in English.
+
+LC_IDENTIFICATION
+title      "English locale for the Netherlands"
+source     ""
+address    "http:////www.gnu.org//software//libc//"
+contact    ""
+email      "bug-glibc-locales@gnu.org"
+tel        ""
+fax        ""
+language   "English"
+territory  "Netherlands"
+revision   "1.7"
+date       "2015-11-02"
+
+category "i18n:2012";LC_IDENTIFICATION
+category "i18n:2012";LC_CTYPE
+category "i18n:2012";LC_COLLATE
+category "i18n:2012";LC_TIME
+category "i18n:2012";LC_NUMERIC
+category "i18n:2012";LC_MONETARY
+category "i18n:2012";LC_MESSAGES
+category "i18n:2012";LC_PAPER
+category "i18n:2012";LC_NAME
+category "i18n:2012";LC_ADDRESS
+category "i18n:2012";LC_TELEPHONE
+category "i18n:2012";LC_MEASUREMENT
+END LC_IDENTIFICATION
+
+LC_CTYPE
+copy "en_GB"
+END LC_CTYPE
+
+LC_COLLATE
+copy "en_GB"
+END LC_COLLATE
+
+LC_MONETARY
+int_curr_symbol     "<U0045><U0055><U0052><U0020>"
+currency_symbol     "<U20AC>"
+% Use English because of international language and unambiguous processing.
+mon_decimal_point   "<U002E>"
+mon_thousands_sep   "<U002C>"
+mon_grouping        3;3
+positive_sign       ""
+negative_sign       "<U002D>"
+int_frac_digits     2
+frac_digits         2
+p_cs_precedes       1
+int_p_sep_by_space  1
+p_sep_by_space      0
+n_cs_precedes       1
+int_n_sep_by_space  1
+n_sep_by_space      0
+p_sign_posn         1
+n_sign_posn         1
+END LC_MONETARY
+
+LC_NUMERIC
+% Use English because of international language and unambiguous processing.
+decimal_point "<U002E>"
+thousands_sep "<U002C>"
+grouping      3;3
+END LC_NUMERIC
+
+LC_TIME
+abday   "<U0053><U0075><U006E>";"<U004D><U006F><U006E>";/
+        "<U0054><U0075><U0065>";"<U0057><U0065><U0064>";/
+        "<U0054><U0068><U0075>";"<U0046><U0072><U0069>";/
+        "<U0053><U0061><U0074>"
+day     "<U0053><U0075><U006E><U0064><U0061><U0079>";/
+        "<U004D><U006F><U006E><U0064><U0061><U0079>";/
+        "<U0054><U0075><U0065><U0073><U0064><U0061><U0079>";/
+        "<U0057><U0065><U0064><U006E><U0065><U0073><U0064><U0061><U0079>";/
+        "<U0054><U0068><U0075><U0072><U0073><U0064><U0061><U0079>";/
+        "<U0046><U0072><U0069><U0064><U0061><U0079>";/
+        "<U0053><U0061><U0074><U0075><U0072><U0064><U0061><U0079>"
+abmon   "<U004A><U0061><U006E>";"<U0046><U0065><U0062>";/
+        "<U004D><U0061><U0072>";"<U0041><U0070><U0072>";/
+        "<U004D><U0061><U0079>";"<U004A><U0075><U006E>";/
+        "<U004A><U0075><U006C>";"<U0041><U0075><U0067>";/
+        "<U0053><U0065><U0070>";"<U004F><U0063><U0074>";/
+        "<U004E><U006F><U0076>";"<U0044><U0065><U0063>"
+mon     "<U004A><U0061><U006E><U0075><U0061><U0072><U0079>";/
+        "<U0046><U0065><U0062><U0072><U0075><U0061><U0072><U0079>";/
+        "<U004D><U0061><U0072><U0063><U0068>";/
+        "<U0041><U0070><U0072><U0069><U006C>";/
+        "<U004D><U0061><U0079>";/
+        "<U004A><U0075><U006E><U0065>";/
+        "<U004A><U0075><U006C><U0079>";/
+        "<U0041><U0075><U0067><U0075><U0073><U0074>";/
+        "<U0053><U0065><U0070><U0074><U0065><U006D><U0062><U0065><U0072>";/
+        "<U004F><U0063><U0074><U006F><U0062><U0065><U0072>";/
+        "<U004E><U006F><U0076><U0065><U006D><U0062><U0065><U0072>";/
+        "<U0044><U0065><U0063><U0065><U006D><U0062><U0065><U0072>"
+
+% Use mix of English because of international language and Dutch 24-hour clocks.
+d_t_fmt "<U0025><U0061><U0020><U0025><U0064><U0020><U0025><U0062><U0020><U0025>/
+<U0059><U0020><U0025><U0052><U0020><U0025><U005A>"
+% Use English because of international language, this is also sortable.
+d_fmt   "<U0025><U0059><U002D><U0025><U006D><U002D><U0025><U0064>"
+% Use mix of English because of international language and Dutch 24-hour clocks.
+t_fmt   "<U0025><U0052>"
+t_fmt_ampm "<U0025><U0048><U003A><U0025><U004D><U003A><U0025><U0053>"
+am_pm	"";""
+week 7;19971130;4
+first_weekday 2
+END LC_TIME
+
+LC_MESSAGES
+copy "en_US"
+END LC_MESSAGES
+
+LC_PAPER
+copy "nl_NL"
+END LC_PAPER
+
+LC_NAME
+copy "en_US"
+END LC_NAME
+
+LC_ADDRESS
+% use Dutch format because of local infrastructure.
+postal_fmt    "<U0025><U0066><U0025><U004E><U0025><U0061><U0025><U004E>/
+<U0025><U0064><U0025><U004E><U0025><U0062><U0025><U004E><U0025><U0073>/
+<U0020><U0025><U0068><U0020><U0025><U0065><U0020><U0025><U0072><U0025>/
+<U004E><U0025><U007A><U0020><U0025><U0054><U0025>/
+<U004E><U0025><U0063><U0025><U004E>"
+country_name "<U004E><U0065><U0074><U0068><U0065><U0072><U006C><U0061>/
+<U006E><U0064><U0073>"
+country_post "<U004E><U004C>"
+country_ab2  "<U004E><U004C>"
+country_ab3  "<U004E><U004C><U0044>"
+country_num  528
+country_car  "<U004E><U004C>"
+lang_name    "<U0045><U006E><U0067><U006C><U0069><U0073><U0068>"
+lang_ab      "<U0065><U006E>"
+lang_term    "<U0065><U006E><U0067>"
+lang_lib     "<U0065><U006E><U0067>"
+END LC_ADDRESS
+
+LC_TELEPHONE
+copy "nl_NL"
+END LC_TELEPHONE
+
+LC_MEASUREMENT
+copy "nl_NL"
+END LC_MEASUREMENT