[v5,1/2] Port shared code information from the wiki
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
Since the shared code now has special status with respect to
copyrights, port them into a more structured format in the source tree
and add a python function that parses and returns a dictionary with
the information.
I need this to exclude these files from the Contributed-by changes and
I reckon it would be useful to know these files for future tooling.
---
SHARED-FILES | 207 +++++++++++++++++++++++++++++++++++
scripts/glibc_shared_code.py | 70 ++++++++++++
2 files changed, 277 insertions(+)
create mode 100644 SHARED-FILES
create mode 100644 scripts/glibc_shared_code.py
Comments
On 8/30/21 8:08 PM, Siddhesh Poyarekar wrote:
> Since the shared code now has special status with respect to
> copyrights, port them into a more structured format in the source tree
> and add a python function that parses and returns a dictionary with
> the information.
>
> I need this to exclude these files from the Contributed-by changes and
> I reckon it would be useful to know these files for future tooling.
LGTM.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
> ---
> SHARED-FILES | 207 +++++++++++++++++++++++++++++++++++
> scripts/glibc_shared_code.py | 70 ++++++++++++
> 2 files changed, 277 insertions(+)
> create mode 100644 SHARED-FILES
> create mode 100644 scripts/glibc_shared_code.py
>
> diff --git a/SHARED-FILES b/SHARED-FILES
> new file mode 100644
> index 0000000000..38f189727c
> --- /dev/null
> +++ b/SHARED-FILES
> @@ -0,0 +1,207 @@
> +# Files shared with other projects. Pass a file path to the
> +# get_glibc_shared_code() function in the python library
> +# scripts/glibc_shared_code.py to get a dict object with this information. See
> +# the library sources for more information.
> +
> +# The headers on most of these files indicate that glibc is the canonical
> +# source for these files, although in many cases there seem to be useful
> +# changes in the gnulib versions that could be merged back in. Not all gnulib
> +# files contain such a header and it is not always consistent in its format, so
> +# it would be useful to make sure that all gnulib files that are using glibc as
> +# upstream have a greppable header.
> +#
> +# These files are quite hard to find without a header to grep for and each file
> +# has to be compared manually so this list is likely incomplete or may contain
> +# errors.
> +gnulib:
> + argp/argp-ba.c
> + argp/argp-ba.c
> + argp/argp-eexst.c
> + argp/argp-fmtstream.c
> + argp/argp-fmtstream.h
> + argp/argp-fs-xinl.c
> + argp/argp-help.c
> + argp/argp-namefrob.h
> + argp/argp-parse.c
> + argp/argp-pv.c
> + argp/argp-pvh.c
> + argp/argp-xinl.c
> + argp/argp.h
> + crypt/md5.c
> + crypt/md5.h
> + dirent/alphasort.c
> + dirent/scandir.c
> + locale/programs/3level.h
> + # Merged from gnulib 2014-6-23
> + malloc/obstack.c
> + # Merged from gnulib 2014-6-23
> + malloc/obstack.h
> + # Merged from gnulib 2014-07-10
> + misc/error.c
> + misc/error.h
> + misc/getpass.c
> + misc/mkdtemp.c
> + posix/fnmatch_loop.c
> + # Intended to be the same. Gnulib copy contains glibc changes.
> + posix/getopt.c
> + # Intended to be the same. Gnulib copy contains glibc changes.
> + posix/getopt1.c
> + # Intended to be the same. Gnulib copy contains glibc changes.
> + posix/getopt_int.h
> + posix/glob.c
> + posix/regcomp.c
> + posix/regex.c
> + posix/regex.h
> + posix/regex_internal.c
> + posix/regex_internal.h
> + posix/regexec.c
> + posix/spawn.c
> + posix/spawn_faction_addclose.c
> + posix/spawn_faction_adddup2.c
> + posix/spawn_faction_addopen.c
> + posix/spawn_faction_destroy.c
> + posix/spawn_faction_init.c
> + posix/spawn_int.h
> + posix/spawnattr_destroy.c
> + posix/spawnattr_getdefault.c
> + posix/spawnattr_getflags.c
> + posix/spawnattr_getpgroup.c
> + posix/spawnattr_getschedparam.c
> + posix/spawnattr_getschedpolicy.c
> + posix/spawnattr_getsigmask.c
> + posix/spawnattr_init.c
> + posix/spawnattr_setdefault.c
> + posix/spawnattr_setflags.c
> + posix/spawnattr_setpgroup.c
> + posix/spawnattr_setschedparam.c
> + posix/spawnattr_setschedpolicy.c
> + posix/spawnattr_setsigmask.c
> + posix/spawnp.c
> + stdlib/atoll.c
> + stdlib/getsubopt.c
> + stdlib/setenv.c
> + stdlib/strtoll.c
> + stdlib/strtoul.c
> + # Merged from gnulib 2014-6-26, needs merge back
> + string/memchr.c
> + string/memcmp.c
> + string/memmem.c
> + string/mempcpy.c
> + string/memrchr.c
> + string/rawmemchr.c
> + string/stpcpy.c
> + string/stpncpy.c
> + string/str-two-way.h
> + string/strcasestr.c
> + string/strcspn.c
> + string/strdup.c
> + string/strndup.c
> + string/strpbrk.c
> + string/strsignal.c
> + string/strstr.c
> + string/strtok_r.c
> + string/strverscmp.c
> + sysdeps/generic/pty-private.h
> + sysdeps/generic/siglist.h
> + sysdeps/posix/euidaccess.c
> + sysdeps/posix/gai_strerror.c
> + sysdeps/posix/getcwd.c
> + sysdeps/posix/pwrite.c
> + sysdeps/posix/spawni.c
> + # Merged from gnulib 2014-6-23
> + sysdeps/posix/tempname.c
> + # Merged from gnulib 2014-6-27
> + time/mktime.c
> + time/mktime-internal.h
> + time/strptime.c
> + time/timegm.c
> +
> +# The last merge was 2014-12-11 and merged gettext 0.19.3 into glibc with a
> +# patch submitted to the gettext mailing list for changes that could be merged
> +# back.
> +#
> +# This commit was omitted from the merge as it does not appear to be compatible
> +# with how glibc expects things to work:
> +#
> +# commit 279b57fc367251666f00e8e2b599b83703451afb
> +# Author: Bruno Haible <bruno@clisp.org>
> +# Date: Fri Jun 14 12:03:49 2002 +0000
> +#
> +# Make absolute pathnames inside $LANGUAGE work.
> +gettext:
> + intl/bindtextdom.c
> + intl/dcgettext.c
> + intl/dcigettext.c
> + intl/dcngettext.c
> + intl/dgettext.c
> + intl/dngettext.c
> + intl/explodename.c
> + intl/finddomain.c
> + intl/gettext.c
> + intl/gettextP.h
> + intl/gmo.h
> + intl/hash-string.c
> + intl/hash-string.h
> + intl/l10nflist.c
> + intl/loadinfo.h
> + intl/loadmsgcat.c
> + intl/locale.alias
> + intl/localealias.c
> + intl/ngettext.c
> + intl/plural-exp.c
> + intl/plural-exp.h
> + intl/plural.y
> + intl/textdomain.c
> +
> +# The following files are shared with the upstream Unicode project and must be
> +# updated regularly to stay in sync with the upstream unicode releases.
> +#
> +# Merged from Unicode 13.0.0 release.
> +unicode:
> + localedata/unicode-gen/UnicodeData.txt
> + localedata/unicode-gen/unicode-license.txt
> + localedata/unicode-gen/DerivedCoreProperties.txt
> + localedata/unicode-gen/EastAsianWidth.txt
> + localedata/unicode-gen/PropList.txt
> +
> +# The following files are shared with the upstream tzcode project and must be
> +# updated regularly to stay in sync with the upstream releases.
> +#
> +# Update from tzcode 2017b.
> +# Latest is 2018g:
> +# https://mm.icann.org/pipermail/tz-announce/2018-October/000052.html
> +tzcode:
> + timezone/private.h
> + timezone/tzfile.h
> + timezone/zdump.c
> + timezone/zic.c
> + timezone/tzselect.ksh
> +
> +# The following files are shared with the upstream tzdata project but is not
> +# synchronized regularly. The data files themselves are used only for testing
> +# purposes and their data is never used to generate any output. We synchronize
> +# them only to stay on top of newer data that might help with testing.
> +#
> +# Currently synced to 2009i. Latest is 2018g.
> +# https://mm.icann.org/pipermail/tz-announce/2018-October/000052.html
> +tzdata:
> + timezone/africa
> + timezone/antarctica
> + timezone/asia
> + timezone/australasia
> + timezone/europe
> + timezone/northamerica
> + timezone/southamerica
> + timezone/pacificnew
> + timezone/etcetera
> + timezone/factory
> + timezone/backward
> + timezone/systemv
> + timezone/solar87
> + timezone/solar88
> + timezone/solar89
> + timezone/iso3166.tab
> + timezone/zone.tab
> + timezone/leapseconds
> + # This is yearistype.sh in the parent project
> + timezone/yearistype
> diff --git a/scripts/glibc_shared_code.py b/scripts/glibc_shared_code.py
> new file mode 100644
> index 0000000000..873a26117f
> --- /dev/null
> +++ b/scripts/glibc_shared_code.py
> @@ -0,0 +1,70 @@
> +#!/usr/bin/python
> +# Copyright (C) 2021 Free Software Foundation, Inc.
> +# This file is part of the GNU C Library.
> +#
> +# The GNU C Library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# The GNU C Library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with the GNU C Library; if not, see
> +# <https://www.gnu.org/licenses/>.
> +
> +def get_glibc_shared_code(path):
> + """ Get glibc shared code information from a file
> +
> + The input file must have project names in their own line ending with a colon
> + and all shared files in the project on their own lines following the project
> + name. Whitespaces are ignored. Lines with # as the first non-whitespace
> + character are ignored.
> +
> + Args:
> + path: The path to file containing shared code information.
> +
> + Returns:
> + A dictionary with project names as key and lists of files as values.
> + """
> +
> + projects = {}
> + with open(path, 'r') as f:
> + for line in f.readlines():
> + line = line.strip()
> + if len(line) == 0 or line[0] == '#':
> + continue
> + if line[-1] == ':':
> + cur = line[:-1]
> + projects[cur] = []
> + else:
> + projects[cur].append(line)
> +
> + return projects
> +
> +# Function testing.
> +import sys
> +from os import EX_NOINPUT
> +from os.path import exists
> +from pprint import *
> +
> +if __name__ == '__main__':
> + if len(sys.argv) != 2:
> + print('Usage: %s <file name>' % sys.argv[0])
> + print('Run this script from the base glibc source directory')
> + sys.exit(EX_NOINPUT)
> +
> + print('Testing get_glibc_shared_code with %s:\n' % sys.argv[1])
> + r = get_glibc_shared_code(sys.argv[1])
> + errors = False
> + for k in r.keys():
> + for f in r[k]:
> + if not exists(f):
> + print('%s does not exist' % f)
> + errors = True
> +
> + if not errors:
> + pprint(r)
>
new file mode 100644
@@ -0,0 +1,207 @@
+# Files shared with other projects. Pass a file path to the
+# get_glibc_shared_code() function in the python library
+# scripts/glibc_shared_code.py to get a dict object with this information. See
+# the library sources for more information.
+
+# The headers on most of these files indicate that glibc is the canonical
+# source for these files, although in many cases there seem to be useful
+# changes in the gnulib versions that could be merged back in. Not all gnulib
+# files contain such a header and it is not always consistent in its format, so
+# it would be useful to make sure that all gnulib files that are using glibc as
+# upstream have a greppable header.
+#
+# These files are quite hard to find without a header to grep for and each file
+# has to be compared manually so this list is likely incomplete or may contain
+# errors.
+gnulib:
+ argp/argp-ba.c
+ argp/argp-ba.c
+ argp/argp-eexst.c
+ argp/argp-fmtstream.c
+ argp/argp-fmtstream.h
+ argp/argp-fs-xinl.c
+ argp/argp-help.c
+ argp/argp-namefrob.h
+ argp/argp-parse.c
+ argp/argp-pv.c
+ argp/argp-pvh.c
+ argp/argp-xinl.c
+ argp/argp.h
+ crypt/md5.c
+ crypt/md5.h
+ dirent/alphasort.c
+ dirent/scandir.c
+ locale/programs/3level.h
+ # Merged from gnulib 2014-6-23
+ malloc/obstack.c
+ # Merged from gnulib 2014-6-23
+ malloc/obstack.h
+ # Merged from gnulib 2014-07-10
+ misc/error.c
+ misc/error.h
+ misc/getpass.c
+ misc/mkdtemp.c
+ posix/fnmatch_loop.c
+ # Intended to be the same. Gnulib copy contains glibc changes.
+ posix/getopt.c
+ # Intended to be the same. Gnulib copy contains glibc changes.
+ posix/getopt1.c
+ # Intended to be the same. Gnulib copy contains glibc changes.
+ posix/getopt_int.h
+ posix/glob.c
+ posix/regcomp.c
+ posix/regex.c
+ posix/regex.h
+ posix/regex_internal.c
+ posix/regex_internal.h
+ posix/regexec.c
+ posix/spawn.c
+ posix/spawn_faction_addclose.c
+ posix/spawn_faction_adddup2.c
+ posix/spawn_faction_addopen.c
+ posix/spawn_faction_destroy.c
+ posix/spawn_faction_init.c
+ posix/spawn_int.h
+ posix/spawnattr_destroy.c
+ posix/spawnattr_getdefault.c
+ posix/spawnattr_getflags.c
+ posix/spawnattr_getpgroup.c
+ posix/spawnattr_getschedparam.c
+ posix/spawnattr_getschedpolicy.c
+ posix/spawnattr_getsigmask.c
+ posix/spawnattr_init.c
+ posix/spawnattr_setdefault.c
+ posix/spawnattr_setflags.c
+ posix/spawnattr_setpgroup.c
+ posix/spawnattr_setschedparam.c
+ posix/spawnattr_setschedpolicy.c
+ posix/spawnattr_setsigmask.c
+ posix/spawnp.c
+ stdlib/atoll.c
+ stdlib/getsubopt.c
+ stdlib/setenv.c
+ stdlib/strtoll.c
+ stdlib/strtoul.c
+ # Merged from gnulib 2014-6-26, needs merge back
+ string/memchr.c
+ string/memcmp.c
+ string/memmem.c
+ string/mempcpy.c
+ string/memrchr.c
+ string/rawmemchr.c
+ string/stpcpy.c
+ string/stpncpy.c
+ string/str-two-way.h
+ string/strcasestr.c
+ string/strcspn.c
+ string/strdup.c
+ string/strndup.c
+ string/strpbrk.c
+ string/strsignal.c
+ string/strstr.c
+ string/strtok_r.c
+ string/strverscmp.c
+ sysdeps/generic/pty-private.h
+ sysdeps/generic/siglist.h
+ sysdeps/posix/euidaccess.c
+ sysdeps/posix/gai_strerror.c
+ sysdeps/posix/getcwd.c
+ sysdeps/posix/pwrite.c
+ sysdeps/posix/spawni.c
+ # Merged from gnulib 2014-6-23
+ sysdeps/posix/tempname.c
+ # Merged from gnulib 2014-6-27
+ time/mktime.c
+ time/mktime-internal.h
+ time/strptime.c
+ time/timegm.c
+
+# The last merge was 2014-12-11 and merged gettext 0.19.3 into glibc with a
+# patch submitted to the gettext mailing list for changes that could be merged
+# back.
+#
+# This commit was omitted from the merge as it does not appear to be compatible
+# with how glibc expects things to work:
+#
+# commit 279b57fc367251666f00e8e2b599b83703451afb
+# Author: Bruno Haible <bruno@clisp.org>
+# Date: Fri Jun 14 12:03:49 2002 +0000
+#
+# Make absolute pathnames inside $LANGUAGE work.
+gettext:
+ intl/bindtextdom.c
+ intl/dcgettext.c
+ intl/dcigettext.c
+ intl/dcngettext.c
+ intl/dgettext.c
+ intl/dngettext.c
+ intl/explodename.c
+ intl/finddomain.c
+ intl/gettext.c
+ intl/gettextP.h
+ intl/gmo.h
+ intl/hash-string.c
+ intl/hash-string.h
+ intl/l10nflist.c
+ intl/loadinfo.h
+ intl/loadmsgcat.c
+ intl/locale.alias
+ intl/localealias.c
+ intl/ngettext.c
+ intl/plural-exp.c
+ intl/plural-exp.h
+ intl/plural.y
+ intl/textdomain.c
+
+# The following files are shared with the upstream Unicode project and must be
+# updated regularly to stay in sync with the upstream unicode releases.
+#
+# Merged from Unicode 13.0.0 release.
+unicode:
+ localedata/unicode-gen/UnicodeData.txt
+ localedata/unicode-gen/unicode-license.txt
+ localedata/unicode-gen/DerivedCoreProperties.txt
+ localedata/unicode-gen/EastAsianWidth.txt
+ localedata/unicode-gen/PropList.txt
+
+# The following files are shared with the upstream tzcode project and must be
+# updated regularly to stay in sync with the upstream releases.
+#
+# Update from tzcode 2017b.
+# Latest is 2018g:
+# https://mm.icann.org/pipermail/tz-announce/2018-October/000052.html
+tzcode:
+ timezone/private.h
+ timezone/tzfile.h
+ timezone/zdump.c
+ timezone/zic.c
+ timezone/tzselect.ksh
+
+# The following files are shared with the upstream tzdata project but is not
+# synchronized regularly. The data files themselves are used only for testing
+# purposes and their data is never used to generate any output. We synchronize
+# them only to stay on top of newer data that might help with testing.
+#
+# Currently synced to 2009i. Latest is 2018g.
+# https://mm.icann.org/pipermail/tz-announce/2018-October/000052.html
+tzdata:
+ timezone/africa
+ timezone/antarctica
+ timezone/asia
+ timezone/australasia
+ timezone/europe
+ timezone/northamerica
+ timezone/southamerica
+ timezone/pacificnew
+ timezone/etcetera
+ timezone/factory
+ timezone/backward
+ timezone/systemv
+ timezone/solar87
+ timezone/solar88
+ timezone/solar89
+ timezone/iso3166.tab
+ timezone/zone.tab
+ timezone/leapseconds
+ # This is yearistype.sh in the parent project
+ timezone/yearistype
new file mode 100644
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+# Copyright (C) 2021 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+def get_glibc_shared_code(path):
+ """ Get glibc shared code information from a file
+
+ The input file must have project names in their own line ending with a colon
+ and all shared files in the project on their own lines following the project
+ name. Whitespaces are ignored. Lines with # as the first non-whitespace
+ character are ignored.
+
+ Args:
+ path: The path to file containing shared code information.
+
+ Returns:
+ A dictionary with project names as key and lists of files as values.
+ """
+
+ projects = {}
+ with open(path, 'r') as f:
+ for line in f.readlines():
+ line = line.strip()
+ if len(line) == 0 or line[0] == '#':
+ continue
+ if line[-1] == ':':
+ cur = line[:-1]
+ projects[cur] = []
+ else:
+ projects[cur].append(line)
+
+ return projects
+
+# Function testing.
+import sys
+from os import EX_NOINPUT
+from os.path import exists
+from pprint import *
+
+if __name__ == '__main__':
+ if len(sys.argv) != 2:
+ print('Usage: %s <file name>' % sys.argv[0])
+ print('Run this script from the base glibc source directory')
+ sys.exit(EX_NOINPUT)
+
+ print('Testing get_glibc_shared_code with %s:\n' % sys.argv[1])
+ r = get_glibc_shared_code(sys.argv[1])
+ errors = False
+ for k in r.keys():
+ for f in r[k]:
+ if not exists(f):
+ print('%s does not exist' % f)
+ errors = True
+
+ if not errors:
+ pprint(r)