Unicode 17.0 updates: build scripts and data tables
Commit Message
Patch 0002 updates newlib data tables to Unicode 17.0.
Two of the build scripts needed an update first as the Unicode file
layout slightly changed.
They were synchronized with the same respective scripts from mintty for
this purpose. This is patch 0001.
Thomas
From c35d2803654e8c96ef0ef6d11d9b7a3604f3163f Mon Sep 17 00:00:00 2001
From: Thomas Wolff <towo@towo.net>
Date: Sat, 20 Dec 2025 00:00:00 +0000
Subject: [PATCH 1/2] Unicode table build: update scripts for generation of
width data
needed to handle recent changes in Unicode.org data file layout
---
newlib/libc/string/mkwide | 32 ++++++++++++++++----------------
newlib/libc/string/mkwidthA | 19 ++++++++++---------
2 files changed, 26 insertions(+), 25 deletions(-)
Comments
On Dec 20 14:59, Thomas Wolff wrote:
> Patch 0002 updates newlib data tables to Unicode 17.0.
> Two of the build scripts needed an update first as the Unicode file layout
> slightly changed.
> They were synchronized with the same respective scripts from mintty for this
> purpose. This is patch 0001.
> Thomas
Thank you! Pushed to main and the cygwin-3_6-branch.
Corinna
@@ -4,20 +4,24 @@
skipcheck=false
-if [ ! -r EastAsianWidth.txt ]
-then ln -s /usr/share/unicode/ucd/EastAsianWidth.txt . || exit 1
+if $skipcheck || make Blocks.txt >&2
+then true
+else echo Could not acquire Unicode data file Blocks.txt >&2
+ exit 1
fi
-if [ ! -r UnicodeData.txt ]
-then ln -s /usr/share/unicode/ucd/UnicodeData.txt . || exit 1
+if $skipcheck || make EastAsianWidth.txt >&2
+then true
+else echo Could not acquire Unicode data file EastAsianWidth.txt >&2
+ exit 1
fi
-if [ ! -r Blocks.txt ]
-then ln -s /usr/share/unicode/ucd/Blocks.txt . || exit 1
+if $skipcheck || make UnicodeData.txt >&2
+then true
+else echo Could not acquire Unicode data file UnicodeData.txt >&2
+ exit 1
fi
-sed -e "s,^\([^;]*\);[NAH],\1," -e t -e d EastAsianWidth.txt > wide.na
-sed -e "s,^\([^;]*\);[WF],\1," -e t -e d EastAsianWidth.txt > wide.fw
-
-PATH="$PATH:." # for uniset
+sed -e "s,^\([^;]*\) *; *[NAH],\1," -e t -e d EastAsianWidth.txt > wide.na
+sed -e "s,^\([^;]*\) *; *[WF],\1," -e t -e d EastAsianWidth.txt > wide.fw
nrfw=`uniset +wide.fw nr | sed -e 's,.*:,,'`
echo FW $nrfw
@@ -31,7 +35,7 @@ includes () {
nr=`uniset +wide.$2 -$1 nr | sed -e 's,.*:,,'`
test $nr != $3
}
-echo "adding compact closure of wide ranges, this may take ~10min"
+echo "adding compact closure of wide ranges, this may take a few minutes"
for b in $extrablocks `sed -e 's,^\([0-9A-F]*\)\.\.\([0-9A-F]*\).*,\1-\2,' -e t -e d Blocks.txt`
do range=$b
echo checking $range $* >&2
@@ -40,10 +44,6 @@ do range=$b
fi
done > wide.blocks
-(
-sed -e "s,^,//," -e 1q EastAsianWidth.txt
-sed -e "s,^,//," -e 1q Blocks.txt
-uniset `sed -e 's,^,+,' wide.blocks` +wide.fw c
-) > wide.t
+uniset `sed -e 's,^,+,' wide.blocks` +wide.fw c > wide.t
rm -f wide.na wide.fw wide.blocks
@@ -3,18 +3,19 @@
# generate WIDTH-A file, listing Unicode characters with width property
# Ambiguous, from EastAsianWidth.txt
-if [ ! -r EastAsianWidth.txt ]
-then ln -s /usr/share/unicode/ucd/EastAsianWidth.txt . || exit 1
+if make EastAsianWidth.txt >&2
+then true
+else echo Could not acquire Unicode data file EastAsianWidth.txt >&2
+ exit 1
fi
-if [ ! -r UnicodeData.txt ]
-then ln -s /usr/share/unicode/ucd/UnicodeData.txt . || exit 1
-fi
-if [ ! -r Blocks.txt ]
-then ln -s /usr/share/unicode/ucd/Blocks.txt . || exit 1
+if make UnicodeData.txt >&2
+then true
+else echo Could not acquire Unicode data file UnicodeData.txt >&2
+ exit 1
fi
-sed -e "s,^\([^;]*\);A,\1," -e t -e d EastAsianWidth.txt > width-a-new
+sed -e "s,^\([^;]*\) *; *A,\1," -e t -e d EastAsianWidth.txt > width-a-new
rm -f WIDTH-A
echo "# UAX #11: East Asian Ambiguous" > WIDTH-A
-PATH="$PATH:." uniset +width-a-new compact >> WIDTH-A
+uniset +width-a-new compact >> WIDTH-A
rm -f width-a-new