From patchwork Fri Jul 20 18:49:07 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Carlos O'Donell X-Patchwork-Id: 28536 Received: (qmail 108897 invoked by alias); 20 Jul 2018 18:49:25 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 108879 invoked by uid 89); 20 Jul 2018 18:49:24 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-22.2 required=5.0 tests=AWL, BAYES_50, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_MANYTO, RCVD_IN_DNSWL_NONE, UPPERCASE_50_75 autolearn=ham version=3.3.2 spammy=fifty, nine, HUNDRED, FIVE X-HELO: mail-qk0-f195.google.com Return-Path: Subject: Re: [PATCH] Keep expected behaviour for [a-z] and [A-z] (Bug 23393). To: Florian Weimer , GNU C Library , Rich Felker , Mike Fabian , Zorro Lang , "Joseph S. Myers" References: <9d6f47ec-f9eb-ead0-889c-3b9aae66551c@redhat.com> From: Carlos O'Donell Openpgp: preference=signencrypt Message-ID: Date: Fri, 20 Jul 2018 14:49:07 -0400 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.8.0 MIME-Version: 1.0 In-Reply-To: On 07/19/2018 04:39 PM, Florian Weimer wrote: > On 07/19/2018 09:43 PM, Carlos O'Donell wrote: >> * Add back tests to tst-fnmatch.input and tst-regexloc.c which >> exercise that [a-z] does not match A or Z. > > [a-z] still matches ñ, diff --git a/localedata/locales/iso14651_t1_common b/localedata/locales/iso14651_t1_common index 227400cc4e..7248074a8b 100644 --- a/localedata/locales/iso14651_t1_common +++ b/localedata/locales/iso14651_t1_common @@ -63177,7 +63177,19 @@ order_start ;forward;backward;forward;forward,position ;;; % MANAT SIGN ;;; % RUBLE SIGN ;;; % LARI SIGN +% Implement rational range for [0-9] in regular expressions. +% We order the collation element order to support rational ranges. +% Collation is unaffected because the 4-level weights remain the same. ;;; % DIGIT ZERO + ;;; % DIGIT ONE + ;;; % DIGIT TWO + ;;; % DIGIT THREE + ;;; % DIGIT FOUR + ;;; % DIGIT FIVE + ;;; % DIGIT SIX + ;;; % DIGIT SEVEN + ;;; % DIGIT EIGHT + ;;; % DIGIT NINE ;;; % ARABIC-INDIC DIGIT ZERO ;;; % EXTENDED ARABIC-INDIC DIGIT ZERO ;;; % NKO DIGIT ZERO @@ -63250,7 +63262,6 @@ order_start ;forward;backward;forward;forward,position ;;; % SUBSCRIPT ZERO "";"";""; % VULGAR FRACTION ZERO THIRDS "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO - ;;; % DIGIT ONE ;;; % ARABIC-INDIC DIGIT ONE ;;; % EXTENDED ARABIC-INDIC DIGIT ONE ;;; % NKO DIGIT ONE @@ -63440,7 +63451,6 @@ order_start ;forward;backward;forward;forward,position "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ONE - ;;; % DIGIT TWO ;;; % ARABIC-INDIC DIGIT TWO ;;; % EXTENDED ARABIC-INDIC DIGIT TWO ;;; % NKO DIGIT TWO @@ -63583,7 +63593,6 @@ order_start ;forward;backward;forward;forward,position "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY TWO "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR FEBRUARY "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWO - ;;; % DIGIT THREE ;;; % ARABIC-INDIC DIGIT THREE ;;; % EXTENDED ARABIC-INDIC DIGIT THREE ;;; % NKO DIGIT THREE @@ -63709,7 +63718,6 @@ order_start ;forward;backward;forward;forward,position "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THREE "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR MARCH "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR THREE - ;;; % DIGIT FOUR ;;; % ARABIC-INDIC DIGIT FOUR ;;; % EXTENDED ARABIC-INDIC DIGIT FOUR ;;; % NKO DIGIT FOUR @@ -63829,7 +63837,6 @@ order_start ;forward;backward;forward;forward,position "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FOUR "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR APRIL "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FOUR - ;;; % DIGIT FIVE ;;; % ARABIC-INDIC DIGIT FIVE ;;; % EXTENDED ARABIC-INDIC DIGIT FIVE ;;; % NKO DIGIT FIVE @@ -63941,7 +63948,6 @@ order_start ;forward;backward;forward;forward,position "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY FIVE "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR MAY "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR FIVE - ;;; % DIGIT SIX ;;; % ARABIC-INDIC DIGIT SIX ;;; % EXTENDED ARABIC-INDIC DIGIT SIX ;;; % NKO DIGIT SIX @@ -64036,7 +64042,6 @@ order_start ;forward;backward;forward;forward,position "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SIX "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR JUNE "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SIX - ;;; % DIGIT SEVEN ;;; % ARABIC-INDIC DIGIT SEVEN ;;; % EXTENDED ARABIC-INDIC DIGIT SEVEN ;;; % NKO DIGIT SEVEN @@ -64132,7 +64137,6 @@ order_start ;forward;backward;forward;forward,position "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY SEVEN "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR JULY "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR SEVEN - ;;; % DIGIT EIGHT ;;; % ARABIC-INDIC DIGIT EIGHT ;;; % EXTENDED ARABIC-INDIC DIGIT EIGHT ;;; % NKO DIGIT EIGHT @@ -64226,7 +64230,6 @@ order_start ;forward;backward;forward;forward,position "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY EIGHT "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR AUGUST "";"";""; % IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR EIGHT - ;;; % DIGIT NINE ;;; % ARABIC-INDIC DIGIT NINE ;;; % EXTENDED ARABIC-INDIC DIGIT NINE ;;; % NKO DIGIT NINE @@ -64326,7 +64329,35 @@ order_start ;forward;backward;forward;forward,position else order_start ;forward;forward;forward;forward,position endif +% Implement rational range for [a-z] in regular expressions. +% We order the collation element order to support rational ranges. +% Collation is unaffected because the 4-level weights remain the same. ;;; % LATIN SMALL LETTER A + ;;; % LATIN SMALL LETTER B + ;;; % LATIN SMALL LETTER C + ;;; % LATIN SMALL LETTER D + ;;; % LATIN SMALL LETTER E + ;;; % LATIN SMALL LETTER F + ;;; % LATIN SMALL LETTER G + ;;; % LATIN SMALL LETTER H + ;;; % LATIN SMALL LETTER I + ;;; % LATIN SMALL LETTER J + ;;; % LATIN SMALL LETTER K + ;;; % LATIN SMALL LETTER L + ;;; % LATIN SMALL LETTER M + ;;; % LATIN SMALL LETTER N + ;;; % LATIN SMALL LETTER O + ;;; % LATIN SMALL LETTER P + ;;; % LATIN SMALL LETTER Q + ;;; % LATIN SMALL LETTER R + ;;; % LATIN SMALL LETTER S + ;;; % LATIN SMALL LETTER T + ;;; % LATIN SMALL LETTER U + ;;; % LATIN SMALL LETTER V + ;;; % LATIN SMALL LETTER W + ;;; % LATIN SMALL LETTER X + ;;; % LATIN SMALL LETTER Y + ;;; % LATIN SMALL LETTER Z ;;; % FULLWIDTH LATIN SMALL LETTER A ;;; % COMBINING LATIN SMALL LETTER A ;;; % PARENTHESIZED LATIN SMALL LETTER A @@ -64418,7 +64449,6 @@ endif ;;; % LATIN SMALL LETTER TURNED ALPHA ;;; % MODIFIER LETTER SMALL TURNED ALPHA ;;; % LATIN SMALL LETTER INVERTED ALPHA - ;;; % LATIN SMALL LETTER B ;;; % FULLWIDTH LATIN SMALL LETTER B ;;; % COMBINING LATIN SMALL LETTER B ;;; % PARENTHESIZED LATIN SMALL LETTER B @@ -64454,7 +64484,6 @@ endif ;;; % LATIN SMALL LETTER B WITH TOPBAR ;;; % LATIN SMALL LETTER BETA ;;; % COMBINING LATIN SMALL LETTER BETA - ;;; % LATIN SMALL LETTER C ;;; % FULLWIDTH LATIN SMALL LETTER C ;;; % COMBINING LATIN SMALL LETTER C ;;; % SMALL ROMAN NUMERAL ONE HUNDRED @@ -64504,7 +64533,6 @@ endif ;;; % MODIFIER LETTER SMALL C WITH CURL ;;; % LATIN SMALL LETTER REVERSED C ;;; % LATIN SMALL LETTER REVERSED C WITH DOT - ;;; % LATIN SMALL LETTER D ;;; % FULLWIDTH LATIN SMALL LETTER D ;;; % COMBINING LATIN SMALL LETTER D ;;; % SMALL ROMAN NUMERAL FIVE HUNDRED @@ -64563,7 +64591,6 @@ endif ;;; % LATIN SMALL LETTER D WITH CURL ;;; % LATIN SMALL LETTER DUM ;;; % LATIN SMALL LETTER DELTA - ;;; % LATIN SMALL LETTER E ;;; % FULLWIDTH LATIN SMALL LETTER E ;;; % COMBINING LATIN SMALL LETTER E ;;; % PARENTHESIZED LATIN SMALL LETTER E @@ -64641,7 +64668,6 @@ endif ;;; % LATIN SMALL LETTER CLOSED REVERSED OPEN E ;;; % LATIN SMALL LETTER CLOSED OPEN E ;;; % LATIN SMALL LETTER RAMS HORN - ;;; % LATIN SMALL LETTER F ;;; % FULLWIDTH LATIN SMALL LETTER F ;;; % COMBINING LATIN SMALL LETTER F ;;; % PARENTHESIZED LATIN SMALL LETTER F @@ -64680,7 +64706,6 @@ endif ;;; % LATIN SMALL LETTER F WITH HOOK ;;; % TURNED SMALL F ;;; % LATIN EPIGRAPHIC LETTER REVERSED F - ;;; % LATIN SMALL LETTER G ;;; % FULLWIDTH LATIN SMALL LETTER G ;;; % COMBINING LATIN SMALL LETTER G ;;; % PARENTHESIZED LATIN SMALL LETTER G @@ -64727,7 +64752,6 @@ endif ;;; % LATIN SMALL LETTER GAMMA ;;; % MODIFIER LETTER SMALL GAMMA ;;; % LATIN SMALL LETTER OI - ;;; % LATIN SMALL LETTER H ;;; % FULLWIDTH LATIN SMALL LETTER H ;;; % COMBINING LATIN SMALL LETTER H ;;; % PARENTHESIZED LATIN SMALL LETTER H @@ -64780,7 +64804,6 @@ endif ;;; % LATIN SMALL LETTER HENG WITH HOOK ;;; % MODIFIER LETTER TURNED COMMA ;;; % MODIFIER LETTER REVERSED COMMA - ;;; % LATIN SMALL LETTER I ;;; % FULLWIDTH LATIN SMALL LETTER I ;;; % COMBINING LATIN SMALL LETTER I ;;; % SMALL ROMAN NUMERAL ONE @@ -64844,7 +64867,6 @@ endif ;;; % LATIN SMALL LETTER IOTA ;;; % MODIFIER LETTER SMALL IOTA ;;; % LATIN SMALL LETTER IOTA WITH STROKE - ;;; % LATIN SMALL LETTER J ;;; % FULLWIDTH LATIN SMALL LETTER J ;;; % PARENTHESIZED LATIN SMALL LETTER J ;;; % DOUBLE-STRUCK ITALIC SMALL J @@ -64876,7 +64898,6 @@ endif ;;; % LATIN SMALL LETTER DOTLESS J WITH STROKE ;;; % MODIFIER LETTER SMALL DOTLESS J WITH STROKE ;;; % LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK - ;;; % LATIN SMALL LETTER K ;;; % FULLWIDTH LATIN SMALL LETTER K ;;; % COMBINING LATIN SMALL LETTER K ;;; % PARENTHESIZED LATIN SMALL LETTER K @@ -64926,7 +64947,6 @@ endif ;;; % LATIN SMALL LETTER K WITH DIAGONAL STROKE ;;; % LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE ;;; % LATIN SMALL LETTER TURNED K - ;;; % LATIN SMALL LETTER L ;;; % FULLWIDTH LATIN SMALL LETTER L ;;; % COMBINING LATIN SMALL LETTER L ;;; % SMALL ROMAN NUMERAL FIFTY @@ -64996,7 +65016,6 @@ endif ;;; % LATIN SMALL LETTER TURNED L ;;; % LATIN SMALL LETTER LAMBDA WITH STROKE ;;; % LATIN SMALL LETTER TURNED Y - ;;; % LATIN SMALL LETTER M ;;; % FULLWIDTH LATIN SMALL LETTER M ;;; % COMBINING LATIN SMALL LETTER M ;;; % SMALL ROMAN NUMERAL ONE THOUSAND @@ -65055,7 +65074,6 @@ endif ;;; % LATIN EPIGRAPHIC LETTER INVERTED M ;;; % LATIN EPIGRAPHIC LETTER ARCHAIC M ;;; % LATIN SMALL LETTER MUM - ;;; % LATIN SMALL LETTER N ;;; % FULLWIDTH LATIN SMALL LETTER N ;;; % COMBINING LATIN SMALL LETTER N ;;; % PARENTHESIZED LATIN SMALL LETTER N @@ -65114,7 +65132,6 @@ endif ;;; % LATIN SMALL LETTER ENG ;;; % MODIFIER LETTER SMALL ENG ;;; % LATIN SMALL LETTER ENG WITH CROSSED-TAIL - ;;; % LATIN SMALL LETTER O ;;; % FULLWIDTH LATIN SMALL LETTER O ;;; % COMBINING LATIN SMALL LETTER O ;;; % PARENTHESIZED LATIN SMALL LETTER O @@ -65213,7 +65230,6 @@ endif ;;; % LATIN SMALL LETTER OU ;;; % MODIFIER LETTER CAPITAL OU ;;; % LATIN LETTER SMALL CAPITAL OU - ;;; % LATIN SMALL LETTER P ;;; % FULLWIDTH LATIN SMALL LETTER P ;;; % COMBINING LATIN SMALL LETTER P ;;; % PARENTHESIZED LATIN SMALL LETTER P @@ -65262,7 +65278,6 @@ endif ;;; % LATIN SMALL LETTER PHI ;;; % MODIFIER LETTER SMALL PHI ;;; % LATIN SMALL LETTER TAILLESS PHI - ;;; % LATIN SMALL LETTER Q ;;; % FULLWIDTH LATIN SMALL LETTER Q ;;; % PARENTHESIZED LATIN SMALL LETTER Q ;;; % MATHEMATICAL BOLD SMALL Q @@ -65285,7 +65300,6 @@ endif ;;; % LATIN SMALL LETTER Q WITH HOOK ;;; % LATIN SMALL LETTER Q WITH HOOK TAIL ;;; % LATIN SMALL LETTER KRA - ;;; % LATIN SMALL LETTER R ;;; % FULLWIDTH LATIN SMALL LETTER R ;;; % COMBINING LATIN SMALL LETTER R ;;; % COMBINING LATIN SMALL LETTER R BELOW @@ -65354,7 +65368,6 @@ endif ;;; % LATIN SMALL LETTER RUM ;;; % LATIN LETTER SMALL CAPITAL RUM ;;; % LATIN SMALL LETTER RUM ROTUNDA - ;;; % LATIN SMALL LETTER S ;;; % FULLWIDTH LATIN SMALL LETTER S ;;; % COMBINING LATIN SMALL LETTER S ;;; % PARENTHESIZED LATIN SMALL LETTER S @@ -65417,7 +65430,6 @@ endif ;;; % LATIN SMALL LETTER SQUAT REVERSED ESH ;;; % LATIN SMALL LETTER ESH WITH RETROFLEX HOOK ;;; % LATIN SMALL LETTER ESH WITH CURL - ;;; % LATIN SMALL LETTER T ;;; % FULLWIDTH LATIN SMALL LETTER T ;;; % COMBINING LATIN SMALL LETTER T ;;; % PARENTHESIZED LATIN SMALL LETTER T @@ -65467,7 +65479,6 @@ endif ;;; % LATIN SMALL LETTER T WITH CURL ;;; % LATIN SMALL LETTER TUM ;;; % LATIN SMALL LETTER TURNED T - ;;; % LATIN SMALL LETTER U ;;; % FULLWIDTH LATIN SMALL LETTER U ;;; % COMBINING LATIN SMALL LETTER U ;;; % PARENTHESIZED LATIN SMALL LETTER U @@ -65552,7 +65563,6 @@ endif ;;; % LATIN SMALL LETTER UPSILON ;;; % MODIFIER LETTER SMALL UPSILON ;;; % LATIN SMALL LETTER UPSILON WITH STROKE - ;;; % LATIN SMALL LETTER V ;;; % FULLWIDTH LATIN SMALL LETTER V ;;; % COMBINING LATIN SMALL LETTER V ;;; % SMALL ROMAN NUMERAL FIVE @@ -65593,7 +65603,6 @@ endif ;;; % LATIN SMALL LETTER MIDDLE-WELSH V ;;; % LATIN SMALL LETTER TURNED V ;;; % MODIFIER LETTER SMALL TURNED V - ;;; % LATIN SMALL LETTER W ;;; % FULLWIDTH LATIN SMALL LETTER W ;;; % COMBINING LATIN SMALL LETTER W ;;; % PARENTHESIZED LATIN SMALL LETTER W @@ -65627,7 +65636,6 @@ endif ;;; % LATIN LETTER SMALL CAPITAL W ;;; % LATIN SMALL LETTER W WITH HOOK ;;; % LATIN SMALL LETTER TURNED W - ;;; % LATIN SMALL LETTER X ;;; % FULLWIDTH LATIN SMALL LETTER X ;;; % COMBINING LATIN SMALL LETTER X ;;; % SMALL ROMAN NUMERAL TEN @@ -65660,7 +65668,6 @@ endif ;;; % LATIN SMALL LETTER CHI ;;; % LATIN SMALL LETTER CHI WITH LOW RIGHT RING ;;; % LATIN SMALL LETTER CHI WITH LOW LEFT SERIF - ;;; % LATIN SMALL LETTER Y ;;; % FULLWIDTH LATIN SMALL LETTER Y ;;; % PARENTHESIZED LATIN SMALL LETTER Y ;;; % MATHEMATICAL BOLD SMALL Y @@ -65694,7 +65701,6 @@ endif ;;; % LATIN SMALL LETTER Y WITH LOOP ;;; % LATIN SMALL LETTER Y WITH SHORT RIGHT LEG ;;; % LATIN SMALL LETTER YOGH - ;;; % LATIN SMALL LETTER Z ;;; % FULLWIDTH LATIN SMALL LETTER Z ;;; % COMBINING LATIN SMALL LETTER Z ;;; % PARENTHESIZED LATIN SMALL LETTER Z @@ -65796,7 +65802,35 @@ endif ;;; % MATHEMATICAL BOLD ITALIC SMALL ALPHA ;;; % MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA ;;; % MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA +% Implement rational range for [A-Z] in regular expressions. +% We order the collation element order to support rational ranges. +% Collation is unaffected because the 4-level weights remain the same. ;;; % LATIN CAPITAL LETTER A + ;;; % LATIN CAPITAL LETTER B + ;;; % LATIN CAPITAL LETTER C + ;;; % LATIN CAPITAL LETTER D + ;;; % LATIN CAPITAL LETTER E + ;;; % LATIN CAPITAL LETTER F + ;;; % LATIN CAPITAL LETTER G + ;;; % LATIN CAPITAL LETTER H + ;;; % LATIN CAPITAL LETTER I + ;;; % LATIN CAPITAL LETTER J + ;;; % LATIN CAPITAL LETTER K + ;;; % LATIN CAPITAL LETTER L + ;;; % LATIN CAPITAL LETTER M + ;;; % LATIN CAPITAL LETTER N + ;;; % LATIN CAPITAL LETTER O + ;;; % LATIN CAPITAL LETTER P + ;;; % LATIN CAPITAL LETTER Q + ;;; % LATIN CAPITAL LETTER R + ;;; % LATIN CAPITAL LETTER S + ;;; % LATIN CAPITAL LETTER T + ;;; % LATIN CAPITAL LETTER U + ;;; % LATIN CAPITAL LETTER V + ;;; % LATIN CAPITAL LETTER W + ;;; % LATIN CAPITAL LETTER X + ;;; % LATIN CAPITAL LETTER Y + ;;; % LATIN CAPITAL LETTER Z ;;; % FULLWIDTH LATIN CAPITAL LETTER A ;;; % PARENTHESIZED LATIN CAPITAL LETTER A ;;; % MATHEMATICAL BOLD CAPITAL A @@ -65860,7 +65894,6 @@ endif ;;; % LATIN CAPITAL LETTER TURNED A ;;; % LATIN CAPITAL LETTER ALPHA ;;; % LATIN CAPITAL LETTER TURNED ALPHA - ;;; % LATIN CAPITAL LETTER B ;;; % FULLWIDTH LATIN CAPITAL LETTER B ;;; % PARENTHESIZED LATIN CAPITAL LETTER B ;;; % SCRIPT CAPITAL B @@ -65888,7 +65921,6 @@ endif ;;; % LATIN CAPITAL LETTER B WITH HOOK ;;; % LATIN CAPITAL LETTER B WITH TOPBAR ;;; % LATIN CAPITAL LETTER BETA - ;;; % LATIN CAPITAL LETTER C ;;; % FULLWIDTH LATIN CAPITAL LETTER C ;;; % ROMAN NUMERAL ONE HUNDRED ;;; % PARENTHESIZED LATIN CAPITAL LETTER C @@ -65921,7 +65953,6 @@ endif ;;; % LATIN CAPITAL LETTER C WITH HOOK ;;; % ROMAN NUMERAL REVERSED ONE HUNDRED ;;; % LATIN CAPITAL LETTER REVERSED C WITH DOT - ;;; % LATIN CAPITAL LETTER D ;;; % FULLWIDTH LATIN CAPITAL LETTER D ;;; % ROMAN NUMERAL FIVE HUNDRED ;;; % PARENTHESIZED LATIN CAPITAL LETTER D @@ -65959,7 +65990,6 @@ endif ;;; % LATIN CAPITAL LETTER AFRICAN D ;;; % LATIN CAPITAL LETTER D WITH HOOK ;;; % LATIN CAPITAL LETTER D WITH TOPBAR - ;;; % LATIN CAPITAL LETTER E ;;; % FULLWIDTH LATIN CAPITAL LETTER E ;;; % PARENTHESIZED LATIN CAPITAL LETTER E ;;; % SCRIPT CAPITAL E @@ -66010,7 +66040,6 @@ endif ;;; % LATIN CAPITAL LETTER OPEN E ;;; % EULER CONSTANT ;;; % LATIN CAPITAL LETTER REVERSED OPEN E - ;;; % LATIN CAPITAL LETTER F ;;; % FULLWIDTH LATIN CAPITAL LETTER F ;;; % PARENTHESIZED LATIN CAPITAL LETTER F ;;; % SCRIPT CAPITAL F @@ -66035,7 +66064,6 @@ endif ;;; % LATIN CAPITAL LETTER F WITH STROKE ;;; % LATIN CAPITAL LETTER F WITH HOOK ;;; % TURNED CAPITAL F - ;;; % LATIN CAPITAL LETTER G ;;; % FULLWIDTH LATIN CAPITAL LETTER G ;;; % PARENTHESIZED LATIN CAPITAL LETTER G ;;; % MATHEMATICAL BOLD CAPITAL G @@ -66071,7 +66099,6 @@ endif ;;; % LATIN CAPITAL LETTER TURNED INSULAR G ;;; % LATIN CAPITAL LETTER GAMMA ;;; % LATIN CAPITAL LETTER OI - ;;; % LATIN CAPITAL LETTER H ;;; % FULLWIDTH LATIN CAPITAL LETTER H ;;; % PARENTHESIZED LATIN CAPITAL LETTER H ;;; % SCRIPT CAPITAL H @@ -66104,7 +66131,6 @@ endif ;;; % LATIN CAPITAL LETTER H WITH DESCENDER ;;; % LATIN CAPITAL LETTER HALF H ;;; % LATIN CAPITAL LETTER HENG - ;;; % LATIN CAPITAL LETTER I ;;; % FULLWIDTH LATIN CAPITAL LETTER I ;;; % ROMAN NUMERAL ONE ;;; % PARENTHESIZED LATIN CAPITAL LETTER I @@ -66149,7 +66175,6 @@ endif ;;; % LATIN CAPITAL LETTER SMALL CAPITAL I ;;; % LATIN CAPITAL LETTER I WITH STROKE ;;; % LATIN CAPITAL LETTER IOTA - ;;; % LATIN CAPITAL LETTER J ;;; % FULLWIDTH LATIN CAPITAL LETTER J ;;; % PARENTHESIZED LATIN CAPITAL LETTER J ;;; % MATHEMATICAL BOLD CAPITAL J @@ -66172,7 +66197,6 @@ endif ;"";""; % LATIN CAPITAL LETTER J WITH CIRCUMFLEX ;;; % LATIN CAPITAL LETTER J WITH STROKE ;;; % LATIN CAPITAL LETTER J WITH CROSSED-TAIL - ;;; % LATIN CAPITAL LETTER K ;;; % KELVIN SIGN ;;; % FULLWIDTH LATIN CAPITAL LETTER K ;;; % PARENTHESIZED LATIN CAPITAL LETTER K @@ -66206,7 +66230,6 @@ endif ;;; % LATIN CAPITAL LETTER K WITH DIAGONAL STROKE ;;; % LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE ;;; % LATIN CAPITAL LETTER TURNED K - ;;; % LATIN CAPITAL LETTER L ;;; % FULLWIDTH LATIN CAPITAL LETTER L ;;; % ROMAN NUMERAL FIFTY ;;; % PARENTHESIZED LATIN CAPITAL LETTER L @@ -66249,7 +66272,6 @@ endif ;;; % LATIN CAPITAL LETTER L WITH MIDDLE TILDE ;;; % LATIN CAPITAL LETTER L WITH BELT ;;; % LATIN CAPITAL LETTER TURNED L - ;;; % LATIN CAPITAL LETTER M ;;; % FULLWIDTH LATIN CAPITAL LETTER M ;;; % ROMAN NUMERAL ONE THOUSAND ;;; % PARENTHESIZED LATIN CAPITAL LETTER M @@ -66275,7 +66297,6 @@ endif ;"";""; % LATIN CAPITAL LETTER M WITH DOT BELOW ;;; % COMBINING LATIN LETTER SMALL CAPITAL M ;;; % LATIN CAPITAL LETTER M WITH HOOK - ;;; % LATIN CAPITAL LETTER N ;;; % FULLWIDTH LATIN CAPITAL LETTER N ;;; % PARENTHESIZED LATIN CAPITAL LETTER N ;;; % DOUBLE-STRUCK CAPITAL N @@ -66312,7 +66333,6 @@ endif ;;; % LATIN CAPITAL LETTER N WITH LONG RIGHT LEG ;;; % LATIN CAPITAL LETTER N WITH DESCENDER ;;; % LATIN CAPITAL LETTER ENG - ;;; % LATIN CAPITAL LETTER O ;;; % FULLWIDTH LATIN CAPITAL LETTER O ;;; % PARENTHESIZED LATIN CAPITAL LETTER O ;;; % MATHEMATICAL BOLD CAPITAL O @@ -66377,7 +66397,6 @@ endif ;;; % LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY ;;; % LATIN CAPITAL LETTER OMEGA ;;; % LATIN CAPITAL LETTER OU - ;;; % LATIN CAPITAL LETTER P ;;; % FULLWIDTH LATIN CAPITAL LETTER P ;;; % PARENTHESIZED LATIN CAPITAL LETTER P ;;; % DOUBLE-STRUCK CAPITAL P @@ -66405,7 +66424,6 @@ endif ;;; % LATIN CAPITAL LETTER P WITH HOOK ;;; % LATIN CAPITAL LETTER P WITH FLOURISH ;;; % LATIN CAPITAL LETTER P WITH SQUIRREL TAIL - ;;; % LATIN CAPITAL LETTER Q ;;; % FULLWIDTH LATIN CAPITAL LETTER Q ;;; % PARENTHESIZED LATIN CAPITAL LETTER Q ;;; % DOUBLE-STRUCK CAPITAL Q @@ -66428,7 +66446,6 @@ endif ;;; % LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER ;;; % LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE ;;; % LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL - ;;; % LATIN CAPITAL LETTER R ;;; % FULLWIDTH LATIN CAPITAL LETTER R ;;; % PARENTHESIZED LATIN CAPITAL LETTER R ;;; % SCRIPT CAPITAL R @@ -66466,7 +66483,6 @@ endif ;;; % LATIN CAPITAL LETTER R WITH STROKE ;;; % LATIN CAPITAL LETTER R WITH TAIL ;;; % LATIN CAPITAL LETTER RUM ROTUNDA - ;;; % LATIN CAPITAL LETTER S ;;; % FULLWIDTH LATIN CAPITAL LETTER S ;;; % PARENTHESIZED LATIN CAPITAL LETTER S ;;; % TORTOISE SHELL BRACKETED LATIN CAPITAL LETTER S @@ -66502,7 +66518,6 @@ endif "";"";""; % LATIN CAPITAL LETTER SHARP S ;;; % LATIN CAPITAL LETTER S WITH SWASH TAIL ;;; % LATIN CAPITAL LETTER ESH - ;;; % LATIN CAPITAL LETTER T ;;; % FULLWIDTH LATIN CAPITAL LETTER T ;;; % PARENTHESIZED LATIN CAPITAL LETTER T ;;; % MATHEMATICAL BOLD CAPITAL T @@ -66536,7 +66551,6 @@ endif ;;; % LATIN CAPITAL LETTER T WITH HOOK ;;; % LATIN CAPITAL LETTER T WITH RETROFLEX HOOK ;;; % LATIN CAPITAL LETTER TURNED T - ;;; % LATIN CAPITAL LETTER U ;;; % FULLWIDTH LATIN CAPITAL LETTER U ;;; % PARENTHESIZED LATIN CAPITAL LETTER U ;;; % MATHEMATICAL BOLD CAPITAL U @@ -66591,7 +66605,6 @@ endif ;;; % LATIN CAPITAL LETTER TURNED H ;;; % LATIN CAPITAL LETTER TURNED M ;;; % LATIN CAPITAL LETTER UPSILON - ;;; % LATIN CAPITAL LETTER V ;;; % FULLWIDTH LATIN CAPITAL LETTER V ;;; % ROMAN NUMERAL FIVE ;;; % PARENTHESIZED LATIN CAPITAL LETTER V @@ -66622,7 +66635,6 @@ endif ;;; % LATIN CAPITAL LETTER V WITH HOOK ;;; % LATIN CAPITAL LETTER MIDDLE-WELSH V ;;; % LATIN CAPITAL LETTER TURNED V - ;;; % LATIN CAPITAL LETTER W ;;; % FULLWIDTH LATIN CAPITAL LETTER W ;;; % PARENTHESIZED LATIN CAPITAL LETTER W ;;; % MATHEMATICAL BOLD CAPITAL W @@ -66649,7 +66661,6 @@ endif ;"";""; % LATIN CAPITAL LETTER W WITH DOT ABOVE ;"";""; % LATIN CAPITAL LETTER W WITH DOT BELOW ;;; % LATIN CAPITAL LETTER W WITH HOOK - ;;; % LATIN CAPITAL LETTER X ;;; % FULLWIDTH LATIN CAPITAL LETTER X ;;; % ROMAN NUMERAL TEN ;;; % PARENTHESIZED LATIN CAPITAL LETTER X @@ -66675,7 +66686,6 @@ endif "";"";""; % ROMAN NUMERAL ELEVEN "";"";""; % ROMAN NUMERAL TWELVE ;;; % LATIN CAPITAL LETTER CHI - ;;; % LATIN CAPITAL LETTER Y ;;; % FULLWIDTH LATIN CAPITAL LETTER Y ;;; % PARENTHESIZED LATIN CAPITAL LETTER Y ;;; % MATHEMATICAL BOLD CAPITAL Y @@ -66708,7 +66718,6 @@ endif ;;; % LATIN CAPITAL LETTER Y WITH HOOK ;;; % LATIN CAPITAL LETTER Y WITH LOOP ;;; % LATIN CAPITAL LETTER YOGH - ;;; % LATIN CAPITAL LETTER Z ;;; % FULLWIDTH LATIN CAPITAL LETTER Z ;;; % PARENTHESIZED LATIN CAPITAL LETTER Z ;;; % DOUBLE-STRUCK CAPITAL Z diff --git a/posix/tst-fnmatch.input b/posix/tst-fnmatch.input index dc2ca8d01a..0b3c78fd1c 100644 --- a/posix/tst-fnmatch.input +++ b/posix/tst-fnmatch.input @@ -67,9 +67,11 @@ # https://sourceware.org/bugzilla/show_bug.cgi?id=23393 # https://sourceware.org/bugzilla/show_bug.cgi?id=23420 # -# No consensus exists on how best to handle the changes so the -# iso14651_t1_common collation element order (CEO) has been changed to -# deinterlace the a-z and A-Z regions. +# The solution was to implement rational ranges by moving the collation +# element order to fix this for [a-z], [A-Z], and [0-9]. Likewise the +# upper and lower case letters are deinterlaced to allow for accented +# ranges that don't include uppercase e.g. [a-ñ] should not include +# any uppercase letters but may include a-z and more. # # With the deinterlacing commit ac3a3b4b0d561d776b60317d6a926050c8541655 # could be reverted to re-test the correct non-interleaved expectations. @@ -77,9 +79,7 @@ # Please note that despite the region being deinterlaced, the ordering # of collation remains the same. In glibc we implement CEO and because of # that we can reorder the elements to reorder ranges without impacting -# collation which depends on weights. The collation element ordering -# could have been changed to include just a-z, A-Z, and 0-9 in three -# distinct blocks, but this needs more discussion by the community. +# collation which depends on weights. # B.6 004(C) C "!#%+,-./01234567889" "!#%+,-./01234567889" 0 @@ -477,9 +477,9 @@ C "-" "[Z-\\]]" NOMATCH # handling of ranges and the recognition of character (vs bytes). de_DE.ISO-8859-1 "a" "[a-z]" 0 de_DE.ISO-8859-1 "z" "[a-z]" 0 -de_DE.ISO-8859-1 "ä" "[a-z]" 0 -de_DE.ISO-8859-1 "ö" "[a-z]" 0 -de_DE.ISO-8859-1 "ü" "[a-z]" 0 +de_DE.ISO-8859-1 "ä" "[a-z]" NOMATCH +de_DE.ISO-8859-1 "ö" "[a-z]" NOMATCH +de_DE.ISO-8859-1 "ü" "[a-z]" NOMATCH de_DE.ISO-8859-1 "A" "[a-z]" NOMATCH de_DE.ISO-8859-1 "Z" "[a-z]" NOMATCH de_DE.ISO-8859-1 "Ä" "[a-z]" NOMATCH @@ -492,9 +492,9 @@ de_DE.ISO-8859-1 " de_DE.ISO-8859-1 "ü" "[A-Z]" NOMATCH de_DE.ISO-8859-1 "A" "[A-Z]" 0 de_DE.ISO-8859-1 "Z" "[A-Z]" 0 -de_DE.ISO-8859-1 "Ä" "[A-Z]" 0 -de_DE.ISO-8859-1 "Ö" "[A-Z]" 0 -de_DE.ISO-8859-1 "Ãœ" "[A-Z]" 0 +de_DE.ISO-8859-1 "Ä" "[A-Z]" NOMATCH +de_DE.ISO-8859-1 "Ö" "[A-Z]" NOMATCH +de_DE.ISO-8859-1 "Ãœ" "[A-Z]" NOMATCH de_DE.ISO-8859-1 "a" "[[:lower:]]" 0 de_DE.ISO-8859-1 "z" "[[:lower:]]" 0 de_DE.ISO-8859-1 "ä" "[[:lower:]]" 0 @@ -568,20 +568,34 @@ de_DE.ISO-8859-1 "ba" "[[.a.]]a" NOMATCH # And with a multibyte character set. en_US.UTF-8 "a" "[a-z]" 0 +# Test that LATIN SMALL LETTER N WITH TILDE is not in [a-z]. +en_US.UTF-8 "ñ" "[a-z]" NOMATCH en_US.UTF-8 "z" "[a-z]" 0 en_US.UTF-8 "A" "[a-z]" NOMATCH +# Test that LATIN CAPITAL LETTER N WITH TILDE is not in [a-z]. +en_US.UTF-8 "Ñ" "[a-z]" NOMATCH en_US.UTF-8 "Z" "[a-z]" NOMATCH en_US.UTF-8 "a" "[A-Z]" NOMATCH +# Test that LATIN SMALL LETTER N WITH TILDE is not in [A-Z]. +en_US.UTF-8 "ñ" "[A-Z]" NOMATCH en_US.UTF-8 "z" "[A-Z]" NOMATCH en_US.UTF-8 "A" "[A-Z]" 0 +# Test that LATIN CAPITAL LETTER N WITH TILDE is not in [A-Z]. +en_US.UTF-8 "Ñ" "[A-Z]" NOMATCH en_US.UTF-8 "Z" "[A-Z]" 0 en_US.UTF-8 "0" "[0-9]" 0 +# Test that FULLWIDTH DIGIT ZERO is not in [0-9]. +en_US.UTF-8 "0" "[0-9]" NOMATCH +# Test that VULGAR FRACTION ONE HALF is not in [0-9]. +en_US.UTF-8 "½" "[0-9]" NOMATCH en_US.UTF-8 "9" "[0-9]" 0 +# Test that FULLWIDTH DIGIT NINE is not in [0-9]. +en_US.UTF-8 "9" "[0-9]" NOMATCH de_DE.UTF-8 "a" "[a-z]" 0 de_DE.UTF-8 "z" "[a-z]" 0 -de_DE.UTF-8 "ä" "[a-z]" 0 -de_DE.UTF-8 "ö" "[a-z]" 0 -de_DE.UTF-8 "ü" "[a-z]" 0 +de_DE.UTF-8 "ä" "[a-z]" NOMATCH +de_DE.UTF-8 "ö" "[a-z]" NOMATCH +de_DE.UTF-8 "ü" "[a-z]" NOMATCH de_DE.UTF-8 "A" "[a-z]" NOMATCH de_DE.UTF-8 "Z" "[a-z]" NOMATCH de_DE.UTF-8 "Ä" "[a-z]" NOMATCH @@ -594,9 +608,9 @@ de_DE.UTF-8 "ö" "[A-Z]" NOMATCH de_DE.UTF-8 "ü" "[A-Z]" NOMATCH de_DE.UTF-8 "A" "[A-Z]" 0 de_DE.UTF-8 "Z" "[A-Z]" 0 -de_DE.UTF-8 "Ä" "[A-Z]" 0 -de_DE.UTF-8 "Ö" "[A-Z]" 0 -de_DE.UTF-8 "Ü" "[A-Z]" 0 +de_DE.UTF-8 "Ä" "[A-Z]" NOMATCH +de_DE.UTF-8 "Ö" "[A-Z]" NOMATCH +de_DE.UTF-8 "Ü" "[A-Z]" NOMATCH de_DE.UTF-8 "a" "[[:lower:]]" 0 de_DE.UTF-8 "z" "[[:lower:]]" 0 de_DE.UTF-8 "ä" "[[:lower:]]" 0