Patchwork [v2,5/14,BZ,#14095] update collation data from Unicode / ISO 14651

login
register
mail settings
Submitter Mike Fabian
Date Feb. 5, 2018, 4:06 p.m.
Message ID <s9d372fbeu9.fsf@taka.site>
Download mbox | patch
Permalink /patch/25813/
State New
Headers show

Comments

Patch

From 7581e8ec429a97f22f845a1e3d566d790d5e8bb6 Mon Sep 17 00:00:00 2001
From: Mike FABIAN <mfabian@redhat.com>
Date: Mon, 11 Dec 2017 20:00:24 +0100
Subject: [PATCH 05/14] Add convenience symbols like <AFTER-A>, <BEFORE-A> to
 iso14651_t1_common

	* localedata/locales/iso14651_t1_common: Add some convenient collation
	symbols like <AFTER-A>, <BEFORE-A> to make tailoring easier using
	rules similar to those in CLDR.
---
 localedata/locales/iso14651_t1_common | 120 ++++++++++++++++++++++++++++++++++
 1 file changed, 120 insertions(+)

diff --git a/localedata/locales/iso14651_t1_common b/localedata/locales/iso14651_t1_common
index c976136e1c..443e689da0 100644
--- a/localedata/locales/iso14651_t1_common
+++ b/localedata/locales/iso14651_t1_common
@@ -339,6 +339,68 @@  collating-symbol <S1F000>..<S1F9FF> % Symbols from SMP
 
 collating-symbol <SFFFF> % Guaranteed largest symbol value. Keep at end of this list
 
+% Convenience first level collation symbols added here
+% to make tailoring easier using rules similar to those in CLDR.
+
+collating-symbol <BEFORE-A>
+collating-symbol <AFTER-A>
+collating-symbol <BEFORE-B>
+collating-symbol <AFTER-B>
+collating-symbol <BEFORE-C>
+collating-symbol <AFTER-C>
+collating-symbol <BEFORE-D>
+collating-symbol <AFTER-D>
+collating-symbol <BEFORE-E>
+collating-symbol <AFTER-E>
+collating-symbol <BEFORE-F>
+collating-symbol <AFTER-F>
+collating-symbol <BEFORE-G>
+collating-symbol <AFTER-G>
+collating-symbol <BEFORE-H>
+collating-symbol <AFTER-H>
+collating-symbol <BEFORE-I>
+collating-symbol <AFTER-I>
+collating-symbol <BEFORE-J>
+collating-symbol <AFTER-J>
+collating-symbol <BEFORE-K>
+collating-symbol <AFTER-K>
+collating-symbol <BEFORE-L>
+collating-symbol <AFTER-L>
+collating-symbol <BEFORE-M>
+collating-symbol <AFTER-M>
+collating-symbol <BEFORE-N>
+collating-symbol <AFTER-N>
+collating-symbol <BEFORE-O>
+collating-symbol <AFTER-O>
+collating-symbol <BEFORE-P>
+collating-symbol <AFTER-P>
+collating-symbol <BEFORE-Q>
+collating-symbol <AFTER-Q>
+collating-symbol <BEFORE-R>
+collating-symbol <AFTER-R>
+collating-symbol <BEFORE-S>
+collating-symbol <AFTER-S>
+collating-symbol <BEFORE-T>
+collating-symbol <AFTER-T>
+collating-symbol <BEFORE-U>
+collating-symbol <AFTER-U>
+collating-symbol <BEFORE-V>
+collating-symbol <AFTER-V>
+collating-symbol <BEFORE-W>
+collating-symbol <AFTER-W>
+collating-symbol <BEFORE-X>
+collating-symbol <AFTER-X>
+collating-symbol <BEFORE-Y>
+collating-symbol <AFTER-Y>
+collating-symbol <BEFORE-Z>
+collating-symbol <AFTER-Z>
+collating-symbol <BEFORE-THORN>
+collating-symbol <AFTER-THORN>
+collating-symbol <BEFORE-EZH>
+collating-symbol <AFTER-EZH>
+collating-symbol <BEFORE-LATIN>
+collating-symbol <AFTER-LATIN>
+
 % Special fourth-level collating symbol
 
 collating-symbol <PLAIN> % Maximal level 4 weight
@@ -8225,6 +8287,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0037> % DIGIT SEVEN
 <S0038> % DIGIT EIGHT
 <S0039> % DIGIT NINE
+<BEFORE-LATIN>
+<BEFORE-A>
 <S0061> % LATIN SMALL LETTER A
 <S1D00> % LATIN LETTER SMALL CAPITAL A
 <S2C65> % LATIN SMALL LETTER A WITH STROKE
@@ -8238,6 +8302,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S1D90> % LATIN SMALL LETTER ALPHA WITH RETROFLEX HOOK
 <S0252> % LATIN SMALL LETTER TURNED ALPHA
 <SAB64> % LATIN SMALL LETTER INVERTED ALPHA
+<AFTER-A>
+<BEFORE-B>
 <S0062> % LATIN SMALL LETTER B
 <S0299> % LATIN LETTER SMALL CAPITAL B
 <S0180> % LATIN SMALL LETTER B WITH STROKE
@@ -8249,6 +8315,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0253> % LATIN SMALL LETTER B WITH HOOK
 <S0183> % LATIN SMALL LETTER B WITH TOPBAR
 <SA7B5> % LATIN SMALL LETTER BETA
+<AFTER-B>
+<BEFORE-C>
 <S0063> % LATIN SMALL LETTER C
 <S1D04> % LATIN LETTER SMALL CAPITAL C
 <S023C> % LATIN SMALL LETTER C WITH STROKE
@@ -8258,6 +8326,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0255> % LATIN SMALL LETTER C WITH CURL
 <S2184> % LATIN SMALL LETTER REVERSED C
 <SA73F> % LATIN SMALL LETTER REVERSED C WITH DOT
+<AFTER-C>
+<BEFORE-D>
 <S0064> % LATIN SMALL LETTER D
 <S1D05> % LATIN LETTER SMALL CAPITAL D
 <S1D06> % LATIN LETTER SMALL CAPITAL ETH
@@ -8270,6 +8340,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0221> % LATIN SMALL LETTER D WITH CURL
 <SA771> % LATIN SMALL LETTER DUM
 <S1E9F> % LATIN SMALL LETTER DELTA
+<AFTER-D>
+<BEFORE-E>
 <S0065> % LATIN SMALL LETTER E
 <S1D07> % LATIN LETTER SMALL CAPITAL E
 <SAB32> % LATIN SMALL LETTER BLACKLETTER E
@@ -8293,6 +8365,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S025E> % LATIN SMALL LETTER CLOSED REVERSED OPEN E
 <S029A> % LATIN SMALL LETTER CLOSED OPEN E
 <S0264> % LATIN SMALL LETTER RAMS HORN
+<AFTER-E>
+<BEFORE-F>
 <S0066> % LATIN SMALL LETTER F
 <SA730> % LATIN LETTER SMALL CAPITAL F
 <SAB35> % LATIN SMALL LETTER LENIS F
@@ -8302,6 +8376,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0192> % LATIN SMALL LETTER F WITH HOOK
 <S214E> % TURNED SMALL F
 <SA7FB> % LATIN EPIGRAPHIC LETTER REVERSED F
+<AFTER-F>
+<BEFORE-G>
 <S0067> % LATIN SMALL LETTER G
 <S0261> % LATIN SMALL LETTER SCRIPT G
 <SAB36> % LATIN SMALL LETTER SCRIPT G WITH CROSSED-TAIL
@@ -8314,6 +8390,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <SA77F> % LATIN SMALL LETTER TURNED INSULAR G
 <S0263> % LATIN SMALL LETTER GAMMA
 <S01A3> % LATIN SMALL LETTER OI
+<AFTER-G>
+<BEFORE-H>
 <S0068> % LATIN SMALL LETTER H
 <S029C> % LATIN LETTER SMALL CAPITAL H
 <S0195> % LATIN SMALL LETTER HV
@@ -8325,6 +8403,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0267> % LATIN SMALL LETTER HENG WITH HOOK
 <S02BB> % MODIFIER LETTER TURNED COMMA
 <S02BD> % MODIFIER LETTER REVERSED COMMA
+<AFTER-H>
+<BEFORE-I>
 <S0069> % LATIN SMALL LETTER I
 <S0131> % LATIN SMALL LETTER DOTLESS I
 <S026A> % LATIN LETTER SMALL CAPITAL I
@@ -8336,6 +8416,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S1D96> % LATIN SMALL LETTER I WITH RETROFLEX HOOK
 <S0269> % LATIN SMALL LETTER IOTA
 <S1D7C> % LATIN SMALL LETTER IOTA WITH STROKE
+<AFTER-I>
+<BEFORE-J>
 <S006A> % LATIN SMALL LETTER J
 <S0237> % LATIN SMALL LETTER DOTLESS J
 <S1D0A> % LATIN LETTER SMALL CAPITAL J
@@ -8343,6 +8425,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S029D> % LATIN SMALL LETTER J WITH CROSSED-TAIL
 <S025F> % LATIN SMALL LETTER DOTLESS J WITH STROKE
 <S0284> % LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
+<AFTER-J>
+<BEFORE-K>
 <S006B> % LATIN SMALL LETTER K
 <S1D0B> % LATIN LETTER SMALL CAPITAL K
 <S1D84> % LATIN SMALL LETTER K WITH PALATAL HOOK
@@ -8352,6 +8436,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <SA743> % LATIN SMALL LETTER K WITH DIAGONAL STROKE
 <SA745> % LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE
 <S029E> % LATIN SMALL LETTER TURNED K
+<AFTER-K>
+<BEFORE-L>
 <S006C> % LATIN SMALL LETTER L
 <S029F> % LATIN LETTER SMALL CAPITAL L
 <SA747> % LATIN SMALL LETTER BROKEN L
@@ -8373,6 +8459,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <SA781> % LATIN SMALL LETTER TURNED L
 <S019B> % LATIN SMALL LETTER LAMBDA WITH STROKE
 <S028E> % LATIN SMALL LETTER TURNED Y
+<AFTER-L>
+<BEFORE-M>
 <S006D> % LATIN SMALL LETTER M
 <S1D0D> % LATIN LETTER SMALL CAPITAL M
 <S1D6F> % LATIN SMALL LETTER M WITH MIDDLE TILDE
@@ -8382,6 +8470,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <SA7FD> % LATIN EPIGRAPHIC LETTER INVERTED M
 <SA7FF> % LATIN EPIGRAPHIC LETTER ARCHAIC M
 <SA773> % LATIN SMALL LETTER MUM
+<AFTER-M>
+<BEFORE-N>
 <S006E> % LATIN SMALL LETTER N
 <S0274> % LATIN LETTER SMALL CAPITAL N
 <S1D3B> % MODIFIER LETTER CAPITAL REVERSED N
@@ -8397,6 +8487,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <SA774> % LATIN SMALL LETTER NUM
 <S014B> % LATIN SMALL LETTER ENG
 <SAB3C> % LATIN SMALL LETTER ENG WITH CROSSED-TAIL
+<AFTER-N>
+<BEFORE-O>
 <S006F> % LATIN SMALL LETTER O
 <S1D0F> % LATIN LETTER SMALL CAPITAL O
 <S1D11> % LATIN SMALL LETTER SIDEWAYS O
@@ -8426,6 +8518,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <SA7B7> % LATIN SMALL LETTER OMEGA
 <S0223> % LATIN SMALL LETTER OU
 <S1D15> % LATIN LETTER SMALL CAPITAL OU
+<AFTER-O>
+<BEFORE-P>
 <S0070> % LATIN SMALL LETTER P
 <S1D18> % LATIN LETTER SMALL CAPITAL P
 <S1D7D> % LATIN SMALL LETTER P WITH STROKE
@@ -8438,12 +8532,16 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <SA7FC> % LATIN EPIGRAPHIC LETTER REVERSED P
 <S0278> % LATIN SMALL LETTER PHI
 <S2C77> % LATIN SMALL LETTER TAILLESS PHI
+<AFTER-P>
+<BEFORE-Q>
 <S0071> % LATIN SMALL LETTER Q
 <SA757> % LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER
 <SA759> % LATIN SMALL LETTER Q WITH DIAGONAL STROKE
 <S02A0> % LATIN SMALL LETTER Q WITH HOOK
 <S024B> % LATIN SMALL LETTER Q WITH HOOK TAIL
 <S0138> % LATIN SMALL LETTER KRA
+<AFTER-Q>
+<BEFORE-R>
 <S0072> % LATIN SMALL LETTER R
 <SAB45> % LATIN SMALL LETTER STIRRUP R
 <S0280> % LATIN LETTER SMALL CAPITAL R
@@ -8473,6 +8571,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <SA775> % LATIN SMALL LETTER RUM
 <SA776> % LATIN LETTER SMALL CAPITAL RUM
 <SA75D> % LATIN SMALL LETTER RUM ROTUNDA
+<AFTER-R>
+<BEFORE-S>
 <S0073> % LATIN SMALL LETTER S
 <SA731> % LATIN LETTER SMALL CAPITAL S
 <S1D74> % LATIN SMALL LETTER S WITH MIDDLE TILDE
@@ -8488,6 +8588,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0285> % LATIN SMALL LETTER SQUAT REVERSED ESH
 <S1D98> % LATIN SMALL LETTER ESH WITH RETROFLEX HOOK
 <S0286> % LATIN SMALL LETTER ESH WITH CURL
+<AFTER-S>
+<BEFORE-T>
 <S0074> % LATIN SMALL LETTER T
 <S1D1B> % LATIN LETTER SMALL CAPITAL T
 <S0167> % LATIN SMALL LETTER T WITH STROKE
@@ -8499,6 +8601,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0236> % LATIN SMALL LETTER T WITH CURL
 <SA777> % LATIN SMALL LETTER TUM
 <S0287> % LATIN SMALL LETTER TURNED T
+<AFTER-T>
+<BEFORE-U>
 <S0075> % LATIN SMALL LETTER U
 <S1D1C> % LATIN LETTER SMALL CAPITAL U
 <SAB4E> % LATIN SMALL LETTER U WITH SHORT RIGHT LEG
@@ -8521,6 +8625,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0270> % LATIN SMALL LETTER TURNED M WITH LONG LEG
 <S028A> % LATIN SMALL LETTER UPSILON
 <S1D7F> % LATIN SMALL LETTER UPSILON WITH STROKE
+<AFTER-U>
+<BEFORE-V>
 <S0076> % LATIN SMALL LETTER V
 <S1D20> % LATIN LETTER SMALL CAPITAL V
 <SA75F> % LATIN SMALL LETTER V WITH DIAGONAL STROKE
@@ -8530,10 +8636,14 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S2C74> % LATIN SMALL LETTER V WITH CURL
 <S1EFD> % LATIN SMALL LETTER MIDDLE-WELSH V
 <S028C> % LATIN SMALL LETTER TURNED V
+<AFTER-V>
+<BEFORE-W>
 <S0077> % LATIN SMALL LETTER W
 <S1D21> % LATIN LETTER SMALL CAPITAL W
 <S2C73> % LATIN SMALL LETTER W WITH HOOK
 <S028D> % LATIN SMALL LETTER TURNED W
+<AFTER-W>
+<BEFORE-X>
 <S0078> % LATIN SMALL LETTER X
 <S1D8D> % LATIN SMALL LETTER X WITH PALATAL HOOK
 <SAB56> % LATIN SMALL LETTER X WITH LOW RIGHT RING
@@ -8543,6 +8653,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <SAB53> % LATIN SMALL LETTER CHI
 <SAB54> % LATIN SMALL LETTER CHI WITH LOW RIGHT RING
 <SAB55> % LATIN SMALL LETTER CHI WITH LOW LEFT SERIF
+<AFTER-X>
+<BEFORE-Y>
 <S0079> % LATIN SMALL LETTER Y
 <S028F> % LATIN LETTER SMALL CAPITAL Y
 <S024F> % LATIN SMALL LETTER Y WITH STROKE
@@ -8550,6 +8662,8 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S1EFF> % LATIN SMALL LETTER Y WITH LOOP
 <SAB5A> % LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
 <S021D> % LATIN SMALL LETTER YOGH
+<AFTER-Y>
+<BEFORE-Z>
 <S007A> % LATIN SMALL LETTER Z
 <S1D22> % LATIN LETTER SMALL CAPITAL Z
 <S01B6> % LATIN SMALL LETTER Z WITH STROKE
@@ -8561,15 +8675,20 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0240> % LATIN SMALL LETTER Z WITH SWASH TAIL
 <S2C6C> % LATIN SMALL LETTER Z WITH DESCENDER
 <SA763> % LATIN SMALL LETTER VISIGOTHIC Z
+<AFTER-Z>
+<BEFORE-EZH>
 <S0292> % LATIN SMALL LETTER EZH
 <S1D23> % LATIN LETTER SMALL CAPITAL EZH
 <S01B9> % LATIN SMALL LETTER EZH REVERSED
 <S1D9A> % LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
 <S01BA> % LATIN SMALL LETTER EZH WITH TAIL
 <S0293> % LATIN SMALL LETTER EZH WITH CURL
+<AFTER-EZH>
+<BEFORE-THORN>
 <S00FE> % LATIN SMALL LETTER THORN
 <SA765> % LATIN SMALL LETTER THORN WITH STROKE
 <SA767> % LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER
+<AFTER-THORN>
 <S01BF> % LATIN LETTER WYNN
 <SA769> % LATIN SMALL LETTER VEND
 <SAB60> % LATIN SMALL LETTER SAKHA YAT
@@ -8612,6 +8731,7 @@  collating-element <UAABC_AAAF> from "<UAABC><UAAAF>" % collation-element for reo
 <S0298> % LATIN LETTER BILABIAL CLICK
 <S02AC> % LATIN LETTER BILABIAL PERCUSSIVE
 <S02AD> % LATIN LETTER BIDENTAL PERCUSSIVE
+<AFTER-LATIN>
 <S03B1> % GREEK SMALL LETTER ALPHA
 <S03B2> % GREEK SMALL LETTER BETA
 <S03B3> % GREEK SMALL LETTER GAMMA
-- 
2.14.3