[WIP] locale python scripts for cldr updates

Message ID 20160413235624.GV6588@vapier.lan
State RFC, archived
Delegated to: Mike Frysinger
Headers

Commit Message

Mike Frysinger April 13, 2016, 11:56 p.m. UTC
  the code is stabilizing a lot more now.  the last big obstacle before
considering for serious integration is cldr.py:Locale.update_cldr.
but otherwise, if people want to bang on it or review it a bit, now
would be an OK time.  i know there's a few style issues, so please
don't bother noting things that pylint itself catches -- i'll make
sure the code is lint clean before the final version for merging.

the filter.py script is more for quick hacking.  i'm not set on it
being merged.

hopefully the message comes across ... seems like when i attach
python scripts here, the e-mails get stuck/thrown away somewhere.
-mike
  

Patch

--- /dev/null
+++ locales.py
@@ -0,0 +1,568 @@ 
+# -*- coding: utf-8 -*-
+# Written by Mike Frysinger <vapier@gentoo.org> for much great glory.
+#
+# Copyright (C) 2016 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+"""Helper library for working with locale datafiles."""
+
+from __future__ import print_function
+
+import os
+import re
+import sys
+
+
+# Whether we should clean up newlines/comments.
+REWRITE_STYLE = False
+
+# Comment block that should be at the top of all files.
+FILE_HEADER = """\
+% This file is part of the GNU C Library and contains locale data.
+% The Free Software Foundation does not claim any copyright interest
+% in the locale data contained in this file.  The foregoing does not
+% affect the license of the GNU C Library as a whole.  It does not
+% exempt you from the conditions of the license if your use would
+% otherwise be governed by that license.
+
+"""
+
+# The order of content in the data files.
+CATEGORY_ORDER = (
+    'LC_IDENTIFICATION',
+    'LC_CTYPE',
+    'LC_COLLATE',
+    'LC_MONETARY',
+    'LC_NUMERIC',
+    'LC_TIME',
+    'LC_MESSAGES',
+    'LC_PAPER',
+    'LC_NAME',
+    'LC_ADDRESS',
+    'LC_TELEPHONE',
+    'LC_MEASUREMENT',
+)
+
+
+def u_encode(text):
+    """Convert unicode |text| to <U####> format."""
+    return ''.join('<U%04X>' % ord(x) for x in text)
+
+
+_U_MATCH = re.compile(r'<U([0-9A-Fa-f]+)>')
+def u_decode(text):
+    """Convert <U####> format in |text|."""
+    unirep = lambda m: chr(int(m.group(1), 16))
+    return _U_MATCH.sub(unirep, text)
+
+
+def dequote(text):
+    """Remove leading/trailing quotes."""
+    if text[0] == '"':
+        return text[1:-1]
+    else:
+        return text
+
+
+class LocaleError(Exception):
+    """Error w/Locale objects"""
+
+
+class LocaleName(object):
+    """Locale name object."""
+
+    # We support the POSIX format: <lang>_<territory>@<alt>
+    _POSIX_RE = re.compile(
+        r'(?P<lang>[^_]*)'
+        r'(_(?P<territory>[^@]*)'
+        r'(@(?P<alt>.*))?)?')
+
+    # Script mappings between POSIX & CLDR.
+    _SCRIPT_MAP_PC = {
+        'cyrillic': 'Cyrl',
+        'latin': 'Latn',
+    }
+    # Handle languages that default to a specific script.
+    _SCRIPT_MAP_LANG_PC = {
+        # XXX: Is this correct for zh_HK ?  What about Hant ?
+        # What about the cmn language (e.g. cmn_TW) ?
+        'zh': 'Hans',
+    }
+    #_SCRIPT_MAP_CP = dict((v, k) for k, v in _SCRIPT_MAP_PC)
+
+    def __init__(self, name):
+        """A new locale name in POSIX format."""
+        self.name = name
+        m = self._POSIX_RE.match(name)
+        self.lang = m.group('lang')
+        self.territory = m.group('territory')
+        self.alt = m.group('alt')
+
+    def __str__(self):
+        return self.posix
+
+    @property
+    def posix(self):
+        """Name of locale as POSIX uses it."""
+        ret = self.lang
+        if self.territory:
+            ret += '_' + self.territory
+            if self.alt:
+                ret += '@' + self.alt
+        return ret
+
+    @property
+    def cldr_lang(self):
+        """Name of language as CLDR uses it."""
+        ret = self.lang
+
+        script = self._SCRIPT_MAP_PC.get(self.alt)
+        if not script:
+            script = self._SCRIPT_MAP_LANG_PC.get(self.lang)
+        if script:
+            ret += '_' + script
+
+        return ret
+
+    @property
+    def cldr(self):
+        """Name of locale as CLDR uses it."""
+        # First deal with ugly variants.
+        if (self.lang, self.territory, self.alt) == ('ca', 'ES', 'valencia'):
+            return 'ca_ES_VALENCIA'
+
+        ret = self.cldr_lang
+        if self.territory:
+            ret += '_' + self.territory
+        return ret
+
+
+class LocaleCategory(object):
+    """Content for a single locale category."""
+
+    FIELDS = ()
+
+    def __init__(self, name='', content=(), header=(), comment_char=None,
+                 copies={}):
+        self.name = name.lower()
+        self.content = content
+        self.header = header
+        self.fields = dict((k, None) for k in self.FIELDS)
+
+        self._merge_content(content, comment_char, copies)
+
+    def _merge_content(self, content, comment_char, copies):
+        for line in content:
+            line = line.split(None, 1)
+            if len(line) == 2:
+                k, v = line
+                if k in self.FIELDS:
+                    if comment_char:
+                        v = v.rsplit(comment_char, 1)[0].rstrip()
+                    self.fields[k] = u_decode(dequote(v))
+                if k == 'copy':
+                    if comment_char:
+                        v = v.rsplit(comment_char, 1)[0].rstrip()
+                    v = u_decode(dequote(v))
+                    copy = copies[v]
+                    if (isinstance(copy, Locale) and
+                            self.NAME in copy.categories):
+                        self._merge_content(getattr(copy, self.name).content,
+                                            copy.comment_char, copies)
+
+    def __str__(self):
+        padding = '\n' if REWRITE_STYLE else ''
+        ret = ''
+        if self.header:
+            ret += padding + '\n'.join(self.header) + '\n'
+        lc_name = self.name.upper()
+        ret += (padding +
+                '\n'.join([lc_name] + self.content + ['END %s' % lc_name]) +
+                '\n')
+        return ret
+
+
+class LCIdentification(LocaleCategory):
+    """LC_IDENTIFICATION object."""
+
+    NAME = 'LC_IDENTIFICATION'
+    FIELDS = (
+        'title',
+        'source',
+        'address',
+        'contact',
+        'email',
+        'tel',
+        'fax',
+        'language',
+        'territory',
+        'audience',
+        'application',
+        'abbreviation',
+        'revision',
+        'date',
+        'category',
+    )
+
+
+class LCCtype(LocaleCategory):
+    """LC_CTYPE object."""
+
+    NAME = 'LC_CTYPE'
+    FIELDS = (
+    )
+
+
+class LCCollate(LocaleCategory):
+    """LC_COLLATE object."""
+
+    NAME = 'LC_COLLATE'
+    FIELDS = (
+    )
+
+
+class LCMonetary(LocaleCategory):
+    """LC_MONETARY object."""
+
+    NAME = 'LC_MONETARY'
+    FIELDS = (
+        'int_curr_symbol',
+        'currency_symbol',
+        'mon_decimal_point',
+        'mon_thousands_sep',
+        'mon_grouping',
+        'positive_sign',
+        'negative_sign',
+        'int_frac_digits',
+        'frac_digits',
+        'p_cs_precedes',
+        'p_sep_by_space',
+        'n_cs_precedes',
+        'n_sep_by_space',
+        'p_sign_posn',
+        'n_sign_posn',
+        'int_p_cs_precedes',
+        'int_n_cs_precedes',
+        'int_p_sep_by_space',
+        'int_n_sep_by_space',
+        'int_p_sign_posn',
+        'int_n_sign_posn',
+    )
+
+
+class LCNumeric(LocaleCategory):
+    """LC_NUMERIC object."""
+
+    NAME = 'LC_NUMERIC'
+    FIELDS = (
+        'decimal_point',
+        'thousands_sep',
+        'grouping',
+    )
+
+
+class LCTime(LocaleCategory):
+    """LC_TIME object."""
+
+    NAME = 'LC_TIME'
+    FIELDS = (
+        'abday',
+        'day',
+        'abmon',
+        'mon',
+        'am_pm',
+        'd_t_fmt',
+        'd_fmt',
+        't_fmt',
+        't_fmt_ampm',
+        'era',
+        'era_year',
+        'era_d_fmt',
+        'alt_digits',
+        'era_d_t_fmt',
+        'era_t_fmt',
+        'week',
+        'first_weekday',
+        'first_workday',
+        'cal_direction',
+        'date_fmt',
+    )
+
+
+class LCMessages(LocaleCategory):
+    """LC_MESSAGES object."""
+
+    NAME = 'LC_MESSAGES'
+    FIELDS = (
+        'yesexpr',
+        'noexpr',
+        'yesstr',
+        'nostr',
+    )
+
+
+class LCPaper(LocaleCategory):
+    """LC_PAPER object."""
+
+    NAME = 'LC_PAPER'
+    FIELDS = (
+        'height',
+        'width',
+    )
+
+
+class LCName(LocaleCategory):
+    """LC_NAME object."""
+
+    NAME = 'LC_NAME'
+    FIELDS = (
+        'name_fmt',
+        'name_gen',
+        'name_mr',
+        'name_mrs',
+        'name_miss',
+        'name_ms',
+    )
+
+
+class LCAddress(LocaleCategory):
+    """LC_ADDRESS object."""
+
+    NAME = 'LC_ADDRESS'
+    FIELDS = (
+        'postal_fmt',
+        'country_name',
+        'country_post',
+        'country_ab2',
+        'country_ab3',
+        'country_car',
+        'country_num',
+        'country_isbn',
+        'lang_name',
+        'lang_ab',
+        'lang_term',
+        'lang_lib',
+    )
+
+
+class LCTelephone(LocaleCategory):
+    """LC_TELEPHONE object."""
+
+    NAME = 'LC_TELEPHONE'
+    FIELDS = (
+        'tel_int_fmt',
+        'tel_dom_fmt',
+        'int_select',
+        'int_prefix',
+    )
+
+
+class LCMeasurement(LocaleCategory):
+    """LC_MEASUREMENT object."""
+
+    NAME = 'LC_MEASUREMENT'
+    FIELDS = (
+        'measurement',
+    )
+
+
+class Locale(object):
+    """Content for a locale file itself."""
+
+    _COPY_CACHE = {}
+
+    def __init__(self, name=None, path=None):
+        self.name = name
+        self.path = path
+        self.locale = LocaleName(name)
+        self.header = [] #FILE_HEADER.splitlines()
+        for cat in CATEGORY_ORDER:
+            setattr(self, cat.lower(), None)
+        self.categories = []
+        self.cldr = None
+        self.escape_char = '\\'
+        self.comment_char = '#'
+
+        if path is not None:
+            self.read(path)
+
+    @staticmethod
+    def _trim_extra_lines(lines, leading=True, trailing=True,
+                          consecutive=True, comments=False):
+        """Helper to clean up the style of the data files."""
+        if not REWRITE_STYLE:
+            return lines
+
+        # Clear leading blank lines.
+        if leading:
+            while lines and not lines[0]:
+                lines.pop(0)
+
+        # Clear trailing blank lines.
+        if trailing:
+            while lines and not lines[-1]:
+                lines.pop(-1)
+
+        # Clear consecutive blank lines.
+        if consecutive:
+            i = 0
+            while i < len(lines) - 1:
+                if not lines[i] and not lines[i + 1]:
+                    lines.pop(i)
+                else:
+                    i += 1
+
+        # Trim blank comment lines that start/end a section.
+        if comments:
+            i = 0
+            while i < len(lines):
+                if (lines[i] == '%' and
+                        (i == 0 or not lines[i - 1] or lines[i - 1][0] != '%')):
+                    lines.pop(i)
+                elif (lines[i] == '%' and
+                      (i == len(lines) - 1 or not lines[i + 1] or
+                       lines[i + 1][0] != '%')):
+                    lines.pop(i)
+                else:
+                    i += 1
+
+        return lines
+
+    def readfp(self, fp):
+        """Load the locale content from |fp|"""
+        Locale._COPY_CACHE[self.locale] = self
+
+        lines = [x.rstrip() for x in fp.readlines()]
+        self._trim_extra_lines(lines)
+
+        # Process the leading few lines.
+        comment_line = 'comment_char %'
+        if lines[0] != comment_line:
+            print('%s: warning: first line should be: "%s", not "%s"' %
+                  (self.locale, comment_line, lines[0]))
+        escape_line = 'escape_char /'
+        if lines[1] != escape_line:
+            print('%s: warning: second line should be: "%s", not "%s"' %
+                  (self.locale, escape_line, lines[0]))
+
+        # Now walk each locale category.
+        while lines:
+            # Extract any leading comments.
+            header = []
+            while lines:
+                line = lines[0]
+                if line.startswith('LC_'):
+                    break
+                elif not line or line[0] == self.comment_char:
+                    header.append(line)
+                    lines.pop(0)
+                    continue
+                elif line.startswith('comment_char'):
+                    self.comment_char = line.split()[1]
+                    if self.comment_char != '%':
+                        raise LocaleError('%s: bad comment_char: %s' %
+                                          (self.locale, line))
+                    header.append(line)
+                    lines.pop(0)
+                    continue
+                elif line.startswith('escape_char'):
+                    self.escape_char = line.split()[1]
+                    if self.escape_char != '/':
+                        raise LocaleError('%s: bad escape_char: %s' %
+                                          (self.locale, line))
+                    header.append(line)
+                    lines.pop(0)
+                    continue
+                else:
+                    break
+            self._trim_extra_lines(header)
+
+            if not lines:
+                if header:
+                    print('%s: throwing away trailing lines: %r' %
+                          (self.name, header), file=sys.stderr)
+                return
+
+            line = lines.pop(0)
+            if line[0:3] != 'LC_':
+                raise LocaleError('%s: bad line state: %s' % (self.name, line))
+
+            cat = line.split()[0]
+            if cat not in CATEGORY_ORDER:
+                raise LocaleError('%s: unknown cateogry: %s' % (self.name, cat))
+
+            cat_lines = []
+            full_line = ''
+            while lines:
+                # Accumulate multilines.
+                line = lines.pop(0)
+                if line.endswith(self.escape_char):
+                    full_line += line[:-1]
+                    continue
+                elif full_line:
+                    line = full_line + line.lstrip()
+                    full_line = ''
+
+                # Halt when we get to the end of this category.
+                if line.split()[0:2] == ['END', cat]:
+                    break
+                cat_lines.append(line)
+
+                # Deal with loading other locales.
+                if line.startswith('copy '):
+                    copy = u_decode(dequote(line.split()[1]))
+                    self._load_copy(copy)
+
+            self._trim_extra_lines(cat_lines)
+            lc_obj_name = 'LC%s%s' % (cat[3], cat[4:].lower())
+            lc_obj = getattr(sys.modules[__name__], lc_obj_name)
+            lc = lc_obj(name=cat, content=cat_lines, header=header,
+                        comment_char=self.comment_char, copies=self._COPY_CACHE)
+            setattr(self, cat.lower(), lc)
+            self.categories.append(cat)
+
+    def read(self, path):
+        """Load the locale file from |path|"""
+        self.readfp(open(path))
+
+    def _load_copy(self, copy):
+        """Load the locale named by |copy|"""
+        if not self.path:
+            return
+        if copy in Locale._COPY_CACHE:
+            return
+        # Flag it as in progress to avoid loops.
+        path = os.path.join(os.path.dirname(self.path), copy)
+        Locale._COPY_CACHE[copy] = 'loading'
+        Locale._COPY_CACHE[copy] = Locale(name=copy, path=path)
+
+    def writefp(self, fp):
+        """Write the locale content to |fp|"""
+        if REWRITE_STYLE:
+            header = ['comment_char %', 'escape_char /']
+        else:
+            header = self.header
+        if header:
+            fp.write('\n'.join(header) + '\n')
+
+        for category in self.categories:
+            lc = getattr(self, category.lower())
+            fp.write(str(lc))
+
+    def write(self, path):
+        """Write the locale content to |path|"""
+        self.writefp(open(path, 'w'))
--- /dev/null
+++ locale_lint.py
@@ -0,0 +1,446 @@ 
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# Written by Mike Frysinger <vapier@gentoo.org> for much great glory.
+#
+# Copyright (C) 2016 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+"""Linting tool for locale datafiles."""
+
+# TODO: Validate set of locale data files and SUPPORTED file.
+
+from __future__ import print_function
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+
+import locales
+
+
+def get_parser():
+    """Return an argument parser for this module."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('locales', nargs='*', help='Locales to lint')
+    return parser
+
+
+class Check(object):
+    """Check class for locale problems."""
+
+    def __init__(self, locale, lc):
+        self.locale = locale
+        self.lc = lc
+        self.failed = False
+
+    def fail(self, key, msg):
+        """Set state to failed and show |msg| for |key|."""
+        self.failed = True
+        print('ERROR: %s: %s.%s: %s' % (self.locale.name, self.lc.NAME, key, msg))
+
+    def assertTrue(self, key, value, msg):
+        """Verify |value| is a boolean True value."""
+        if not value:
+            self.fail(key, msg)
+
+    def assertIn(self, key, value, exp_set, msg):
+        """Verify |value| is in |exp_set|."""
+        self.assertTrue(key, value in exp_set, msg)
+
+    def assertEqual(self, key, value, exp_value, msg):
+        """Verify |value| is equal to |exp_value|."""
+        self.assertTrue(key, value == exp_value, msg)
+
+    def assertNotEqual(self, key, value, exp_value, msg):
+        """Verify |value| is not equal to |exp_value|."""
+        self.assertTrue(key, value != exp_value, msg)
+
+    def assertDefined(self, key, value):
+        """Verify |value| is set to something (allows empty string)."""
+        if value is None:
+            self.fail(key, 'missing definition')
+
+    def assertNonEmpty(self, key, value):
+        """Verify |value| is set to a non-empty value."""
+        if not value:
+            self.fail(key, 'missing value')
+
+    def assertEmpty(self, key, value):
+        """Verify |value| is set to an empty value."""
+        if value:
+            self.fail(key, 'value should be left empty')
+
+    def assertFormat(self, key, value, exp_formats, msg):
+        m = re.match(r'%%[^%s]' % (exp_formats,), value)
+        if m:
+            self.fail(key, msg)
+
+
+class CheckLCIdentification(Check):
+    """Check LC_IDENTIFICATION object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+        self.assertNonEmpty('email', lc.fields['email'])
+
+        self.assertEmpty('tel', lc.fields['tel'])
+        self.assertEmpty('fax', lc.fields['fax'])
+
+        # TODO: Check language & territory.
+        # TODO: Check category fields are one of:
+        # i18n:2002  posix:1993
+
+
+class CheckLCCtype(Check):
+    """Check LC_CTYPE object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+
+class CheckLCCollate(Check):
+    """Check LC_COLLATE object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+
+class CheckLCMonetary(Check):
+    """Check LC_MONETARY object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+        k = 'int_curr_symbol'
+        v = lc.fields[k]
+        self.assertDefined(k, v)
+        if v:
+            self.assertEqual(k, len(v), 4,
+                             'symbol should be 4 characters, not %s' % (v,))
+            # TODO: We can validate the value against ISO 4217.
+            self.assertEqual(k, v[3], ' ',
+                             'symbol must end with a space, not %s' % (v[3],))
+
+        for k in ('currency_symbol', 'mon_decimal_point', 'mon_thousands_sep',
+                  'positive_sign', 'negative_sign', 'mon_grouping',
+                  'int_frac_digits', 'frac_digits'):
+            self.assertDefined(k, lc.fields[k])
+
+        # XXX: The value of -1 is permitted for the POSIX locale.
+
+        valid_values = (None, '0', '1')
+        for k in ('p_cs_precedes', 'n_cs_precedes', 'int_p_cs_precedes', 'int_n_cs_precedes'):
+            v = lc.fields[k]
+            self.assertIn(k, v, valid_values,
+                          'should be 0 or 1, not %s' % (v,))
+
+        valid_values = (None, '0', '1', '2')
+        for k in ('p_sep_by_space', 'n_sep_by_space', 'int_p_sep_by_space', 'int_n_sep_by_space'):
+            v = lc.fields[k]
+            self.assertIn(k, v, valid_values,
+                          'should be between [0, 2], not %s' % (v,))
+
+        valid_values = (None, '0', '1', '2', '3', '4')
+        for k in ('p_sign_posn', 'n_sign_posn', 'int_p_sign_posn', 'int_n_sign_posn'):
+            v = lc.fields[k]
+            self.assertIn(k, v, valid_values,
+                          'should be between [0, 4], not %s' % (v,))
+
+
+class CheckLCNumeric(Check):
+    """Check LC_NUMERIC object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+        # TODO: grouping: Verify it's a list of positive ints (and -1).
+        k = 'grouping'
+        self.assertDefined(k, lc.fields[k])
+
+        k = 'decimal_point'
+        self.assertNonEmpty(k, lc.fields[k])
+
+
+class CheckLCTime(Check):
+    """Check LC_TIME object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+        valid_len = 7
+        for k in ('abday', 'day'):
+            v = lc.fields[k]
+            if v:
+                v = v.split(';')
+                self.assertEqual(k, len(v), valid_len,
+                                 'need %s elements: %s' % (valid_len, v))
+
+        valid_len = 12
+        for k in ('abmon', 'mon'):
+            v = lc.fields[k]
+            if v:
+                v = v.split(';')
+                self.assertEqual(k, len(v), valid_len,
+                                 'need %s elements: %s' % (valid_len, v))
+
+        k = 'date_fmt'
+        default_value = '%a %b %e %H:%M:%S %Z %Y'
+        v = lc.fields[k]
+        self.assertNotEqual(k, v, default_value,
+                            'value (%s) is same as the default; delete it' % (v,))
+
+        # Should we filter out date/time fields rather than allow each one full
+        # access to the strftime api?
+        valid_values = '-aAbBcCdDeEFgGhHIjklmMnOpPrRsStTuUVwWxXyYzZ'
+        for k in ('d_t_fmt', 'd_fmt', 't_fmt'):
+            v = lc.fields[k]
+            if v:
+                self.assertFormat(k, v, valid_values,
+                                  'only %s formats are accepted, not %s' % (valid_values, v))
+
+        # TODO: am_pm: Verify it has 2 entries.
+
+        k = 'week'
+        v = lc.fields[k]
+        if v:
+            default_week = '7;19971130;4'
+            if v == default_week:
+                self.fail(k, 'value (%s) is same as the default; delete it' % (default_week,))
+
+                k = 'first_weekday'
+                self.assertNotEqual(k, lc.fields[k], '1', 'value (%s) is same as the default; delete it' % (v,))
+
+                k = 'first_workday'
+                self.assertNotEqual(k, lc.fields[k], '2', 'value (%s) is same as the default; delete it' % (v,))
+            else:
+                va = v.split(';')
+                if len(va) != 3:
+                    self.fail(k, 'value should have 3 fields, not %s' % (v,))
+                else:
+                    default_start = default_week.split(';')[1]
+                    self.assertEqual(k, va[1], default_start,
+                                     'should be %s, not %s (remember to adjust other fields too)' % (default_start, va[1]))
+
+        k = 'first_weekday'
+        v = lc.fields[k]
+        valid_values = (None, '1', '2')
+        self.assertIn(k, v, valid_values,
+                      'should be 1 or 2, not %s' % (v,))
+
+        k = 'first_workday'
+        v = lc.fields[k]
+        valid_values = (None, '1', '2')
+        self.assertIn(k, v, valid_values,
+                      'should be 1 or 2, not %s' % (v,))
+
+        k = 'cal_direction'
+        v = lc.fields[k]
+        valid_values = (None, '1', '2', '3')
+        self.assertIn(k, v, valid_values,
+                      'should be between [0, 3], not %s' % (v,))
+
+
+class CheckLCMessages(Check):
+    """Check LC_MESSAGES object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+        for k in ('yesexpr', 'noexpr'):
+            v = lc.fields[k]
+            if v:
+                try:
+                    re.compile(v)
+                except re.error:
+                    self.fail(k, 'invalid regular expression: %s' % (v,))
+
+
+class CheckLCPaper(Check):
+    """Check LC_PAPER object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+        paper = (lc.fields['height'], lc.fields['width'])
+        valid_values = (
+            ('279', '216'),  # US-Letter.
+            ('297', '210'),  # A4.
+            # XXX: Drop this?  Need to implement copy directives.
+            (None, None),    # Not set.
+        )
+        self.assertIn('(height, width)', paper, valid_values,
+                      '%r' % (paper,))
+
+
+class CheckLCName(Check):
+    """Check LC_NAME object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+        k = 'name_fmt'
+        v = lc.fields[k]
+        self.assertNonEmpty(k, v)
+        # Same value as ld-name.c.
+        valid_values = 'dfFgGlomMpsSt'
+        if v:
+            self.assertFormat(k, v, valid_values,
+                              'only %s formats are accepted, not %s' % (valid_values, v))
+
+        for k in ('name_gen', 'name_mr', 'name_mrs', 'name_miss', 'name_ms'):
+            self.assertDefined(k, lc.fields[k])
+
+
+class CheckLCAddress(Check):
+    """Check LC_ADDRESS object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+        k = 'postal_fmt'
+        v = lc.fields[k]
+        self.assertNonEmpty(k, v)
+        # Same value as ld-address.c.
+        valid_values = 'afdbshNtreCzTSc%'
+        if v:
+            self.assertFormat(k, v, valid_values,
+                              'only %s formats are accepted, not %s' % (valid_values, v))
+
+        k = 'country_ab2'
+        v = lc.fields[k]
+        self.assertDefined(k, v)
+        if v:
+            self.assertEqual(k, len(v), 2, 'must be 2 letters, not %s' % (v,))
+
+        # XXX: We can validate lang_ab more.
+        k = 'lang_ab'
+        v = lc.fields[k]
+        if len(self.locale.locale.lang) == 2:
+            self.assertDefined(k, v)
+            if v:
+                self.assertEqual(k, len(v), 2, 'must be 2 letters, not %s' % (v,))
+                self.assertEqual(k, v, v.lower(), 'must be lowercase, not %s' % (v,))
+
+        for k in ('country_ab3', 'lang_term', 'lang_lib'):
+            v = lc.fields[k]
+            self.assertDefined(k, v)
+            if v:
+                self.assertEqual(k, len(v), 3, 'must be 3 letters, not %s' % (v,))
+
+        # TODO: We can validate country_post, country_car, country_isbn.
+        for k in ('country_name', 'country_post', 'country_car', 'country_isbn',
+                  'lang_name'):
+            self.assertDefined(k, lc.fields[k])
+
+        # TODO: We can validate this value more.
+        k = 'country_num'
+        v = lc.fields[k]
+        self.assertNonEmpty(k, v)
+        if v:
+            if isinstance(v, int):
+                v = '%03i' % v
+            self.assertEqual(k, '', re.sub(r'[0-9]', '', v),
+                             'must be 3 numbers, not %s' % (v,))
+            self.assertEqual(k, len(v), 3, 'must be 3 numbers, not %s' % (v,))
+
+
+class CheckLCTelephone(Check):
+    """Check LC_TELEPHONE object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+        # XXX: ld-telephone.c is more restrictive.
+        valid_values = 'aAcCelt'
+        for k in ('tel_int_fmt', 'tel_dom_fmt'):
+            v = lc.fields[k]
+            self.assertNonEmpty(k, v)
+            if v:
+                self.assertFormat(k, v, valid_values,
+                                  'only %s formats are accepted, not %s' % (valid_values, v))
+
+        for k in ('int_select', 'int_prefix'):
+            self.assertDefined(k, lc.fields[k])
+
+
+class CheckLCMeasurement(Check):
+    """Check LC_MEASUREMENT object for problems."""
+
+    def run(self):
+        lc = self.lc
+
+        k = 'measurement'
+        v = lc.fields[k]
+        valid_values = (
+            '1',   # Imperial units.
+            '2',   # Metric units.
+            # XXX: Drop this?  Need to implement copy directives.
+            None,  # Noet set.
+        )
+        self.assertIn(k, v, valid_values, 'should be 1 or 2, not %s' % (v,))
+
+
+def check(loc):
+    """Check locale |loc| object for problems."""
+    ret = True
+    for cat in locales.CATEGORY_ORDER:
+        if cat in loc.categories:
+            # TODO: We should throw an error if |cat| is missing.
+            lc = getattr(loc, cat.lower())
+            checker = getattr(sys.modules[__name__], 'CheckLC%s%s' %
+                              (cat[3], cat[4:].lower()), None)
+            check = checker(loc, lc)
+            check.run()
+            if check.failed:
+                ret = False
+    return ret
+
+
+def main(argv):
+    """The main entry point."""
+    parser = get_parser()
+    opts = parser.parse_args(argv)
+
+    # These are not "real" locales, so skip them.
+    SKIP_LOCALES = () #'i18n', 'iso14651', 'translit', 'C', 'POSIX')
+
+    # Process all the locales the user told us to.
+    ret = 0
+    for locale in opts.locales:
+        name = os.path.basename(locale)
+        if name.split('_', 1)[0] in SKIP_LOCALES:
+            continue
+
+        try:
+            loc = locales.Locale(name=name, path=locale)
+        except UnicodeDecodeError:
+            print('%s: bad encodings' % (locale,))
+            subprocess.check_call(['file', locale])
+        except locales.LocaleError as e:
+            print('%s: %s' % (name, e))
+            continue
+        if not check(loc):
+            #print('%s: please correct issues' % name)
+            ret = 1
+    return ret
+
+
+if __name__ == '__main__':
+    exit(main(sys.argv[1:]))
--- /dev/null
+++ cldr.py
@@ -0,0 +1,1204 @@ 
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# Written by Mike Frysinger <vapier@gentoo.org> for much great glory.
+#
+# Copyright (C) 2016 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+"""Helper tool for importing current CLDR data.
+
+See http://cldr.unicode.org/ for more details."""
+
+# TODO: Need to handle copy directives better so we can see when a value
+# has changed for a specific locale, but it's copying the (wrong) values
+# from others.
+# TODO: Add missing fields.
+# TODO: Add support for updating locale/iso-3166.def via supplementalData.xml.
+# TODO: Add support for updating locale/iso-4217.def.
+# TODO: In cases where a locale & lang do not exist in the CLDR, we should
+# still be able to update English names in the description and aspects that
+# are territory specific (and lang independent).
+# TODO: To address the previous case, we should split CldrLocale up into a
+# base class and CldrLanguage and CldrTerritory children.  Then the CldrLocale
+# object would take care of blending those into its own results.
+# TODO: Add ISBN support: https://www.isbn-international.org/range_file_generation
+
+from __future__ import print_function
+
+import argparse
+import datetime
+import errno
+import logging
+import os
+import re
+import subprocess
+import sys
+import time
+from xml.etree import ElementTree
+
+import locales
+u_encode = locales.u_encode
+u_decode = locales.u_decode
+
+
+# Where to store CLDR/etc... data files we fetch.
+DEFAULT_WORKING_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                   'cldr-%(version)s')
+
+
+def get_parser():
+    """Return an argument parser for this module."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--working-dir', default=DEFAULT_WORKING_DIR,
+                        help='Where to download files (default: %(default)s)')
+    parser.add_argument('-v', '--version', default=Cldr.CURR_VERSION,
+                        help='Version of CLDR to use (default: %(default)s)')
+    parser.add_argument('locales', nargs='*', help='Locales to generate')
+    return parser
+
+
+def logging_init(debug=False):
+    """Set up the logging module."""
+    fmt = '%(asctime)s: %(levelname)-7s: '
+    fmt += '%(message)s'
+    # 'Sat, 05 Oct 2013 18:58:50 -0400 (EST)'
+    tzname = time.strftime('%Z', time.localtime())
+    datefmt = '%a, %d %b %Y %H:%M:%S ' + tzname
+    level = logging.DEBUG if debug else logging.INFO
+    handler = logging.StreamHandler(stream=sys.stdout)
+    formatter = logging.Formatter(fmt, datefmt)
+
+    handler.setFormatter(formatter)
+
+    logger = logging.getLogger()
+    logger.addHandler(handler)
+    logger.setLevel(level)
+
+
+class cached_property(object):  # pylint: disable=invalid-name
+    """Like @property but cached"""
+
+    def __init__(self, func):
+        self.func = func
+
+    def __get__(self, instance, _owner):
+        if instance is None:
+            return self
+        value = instance.__dict__[self.func.__name__] = self.func(instance)
+        return value
+
+
+class Iso639(object):
+    """Content for the ISO-639 database."""
+
+    # Link to upstream ISO-639-2 database.
+    ISO639_2_URI = 'http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt'
+
+    # Path to our local copy of the ISO-639 database.
+    PATH = os.path.join(os.path.dirname(os.path.dirname(
+        os.path.realpath(__file__))), 'locale', 'iso-639.def')
+
+    # Regex to process our local ISO-639 def file.
+    _LINE_MATCH = re.compile(
+        r'^(DEFINE_LANGUAGE_CODE \("([^"]*)", ([^,]*), ([^,]*), ([^,]*)\)'
+        r'|DEFINE_LANGUAGE_CODE3 \("([^"]*)", ([^,]*), ([^,]*)\))$')
+
+    def __init__(self):
+        self.db = {}
+        with open(self.PATH) as fp:
+            for line in fp:
+                m = self._LINE_MATCH.match(line)
+                if m:
+                    if m.group(1) is None:
+                        # DEFINE_LANGUAGE_CODE3 form.
+                        self.db[m.group(6)] = (m.group(5), m.group(7))
+                    else:
+                        # DEFINE_LANGUAGE_CODE form.
+                        self.db[m.group(2)] = (m.group(1), m.group(3),
+                                               m.group(4))
+
+    def get_term(self, lang):
+        """Return the ISO 639-2/T (Terminology) code."""
+        entry = self.db.get(lang, ())
+        if len(entry) == 3:
+            return entry[1]
+
+    def get_bib(self, lang):
+        """Return the ISO 639-2/B (Bibliographic) code."""
+        entry = self.db.get(lang, ())
+        if len(entry) == 3:
+            return entry[2]
+
+    def _download_uri(self, path):
+        """Download the ISO-639-2 db."""
+        iso639 = os.path.join(path, os.path.basename(self.ISO639_2_URI))
+        if not os.path.exists(iso639):
+            subprocess.check_call(['wget', '-O', iso639, self.ISO639_2_URI])
+        self._load_iso639(iso639)
+
+    @staticmethod
+    def _load_iso639(db):
+        """Load ISO-639-2 database.
+
+        http://www.loc.gov/standards/iso639-2/ascii_8bits.html
+
+        An alpha-3 (bibliographic) code,
+        an alpha-3 (terminologic) code (when given),
+        an alpha-2 code (when given),
+        an English name, and
+        a French name of a language are all separated by pipe (|) characters.
+        """
+        db = {}
+        with open(db) as fp:
+            for line in fp:
+                bcode, tcode, code, _en, _fr = line.rstrip().split('|')
+                if code:
+                    db[code] = (bcode, tcode)
+        return db
+
+
+class CarDatabase(object):
+    """Content for international licence plate country code."""
+
+    # Path to our local copy of the database.
+    PATH = os.path.join(os.path.dirname(os.path.dirname(
+        os.path.realpath(__file__))), 'locale', 'car.def')
+
+    def __init__(self):
+        lines = [x.strip() for x in open(self.PATH).readlines() if '|' in x]
+        self.db = dict(x.split('|') for x in lines)
+
+    def get(self, territory):
+        return self.db.get(territory)
+
+
+class CldrLocale(object):
+    """Content for a single locale in the cldr database."""
+
+    _DAY_KEYS = ('sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat')
+
+    def __init__(self, cldr, locale, iso639, cardb):
+        self._lang = None
+        self._territory = None
+
+        self.cldr = cldr
+        self.locale = locale
+        self.iso639 = iso639
+        self.cardb = cardb
+
+        # Try a few variations to try and find a suitable data source.
+
+        # Try the original locale name.
+        try:
+            self.locale_root = cldr.load_lang(locale.cldr)
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                raise
+
+            # See if there is a "world" locale for this lang.
+            try:
+                self.locale_root = cldr.load_lang(locale.lang + '_001')
+                # Override the territory though so it isn't "world".
+                self._territory = locale.territory
+            except OSError as e:
+                if e.errno != errno.ENOENT:
+                    raise
+
+                # Generate the locale ourselves.
+                self.locale_root = self.generate_locale(locale.lang, locale.territory)
+
+        # We might have languages that are not in CLDR.
+
+        # Try the language w/script name details.
+        try:
+            self.lang_root = cldr.load_lang(locale.cldr_lang)
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                raise
+
+            # Try the plain language then.
+            try:
+                self.lang_root = cldr.load_lang(locale.cldr_lang)
+            except OSError as e:
+                if e.errno != errno.ENOENT:
+                    raise
+
+                # Stub out the lang.
+                #self._lang = locale.lang
+                self.lang_root = None
+
+    @staticmethod
+    def generate_locale(lang, territory):
+        """Generate a simple locale XML for this lang/territory.
+
+        Used when we have a locale that isn't in CLDR, but CLDR does have
+        the lang and we know the territory.
+        """
+        return ElementTree.fromstring(
+            '<ldml><identity>' +
+            ('<language type="%s"/>' % lang) +
+            ('<territory type="%s"/>' % territory) +
+            '</identity></ldml>'
+        )
+
+    @cached_property
+    def lang(self):
+        """The locale's short language code."""
+        root = self.locale_root.find('identity/language')
+        return root.get('type')
+
+    @cached_property
+    def territory(self):
+        """The locale's short territory code."""
+        if self._territory:
+            return self._territory
+        root = self.locale_root.find('identity/territory')
+        return root.get('type')
+
+    @cached_property
+    def en_lang(self):
+        """The name of the language in English."""
+        root = self.cldr.load_lang('en')
+        names = root.find('localeDisplayNames')
+        # First see if the locale has a name before we fall back to the lang.
+        langs_root = names.find('languages')
+        lang_root = langs_root.find('language[@type="%s"]' % self.locale)
+        if lang_root is None:
+            lang_root = langs_root.find('language[@type="%s"]' % self.lang)
+        # The CLDR is missing some languages.
+        if lang_root is None:
+            logging.warning('%s: en_lang: CLDR is missing english name for '
+                            'the language', self.locale)
+            return None
+        return lang_root.findtext('.')
+
+    @cached_property
+    def en_territory(self):
+        """The name of the territory in English."""
+        root = self.cldr.load_lang('en')
+        names = root.find('localeDisplayNames')
+        return names.find('territories/territory[@type="%s"]' %
+                          self.territory).findtext('.')
+
+    @cached_property
+    def country_ab2(self):
+        """Two-letter ISO-3166 country code."""
+        # TODO: Implement this.
+
+    @cached_property
+    def country_ab3(self):
+        """Three-letter ISO-3166 country code."""
+        # TODO: Implement this.
+
+    @cached_property
+    def lang_name(self):
+        """The localized name for the language."""
+        for root in (self.locale_root, self.lang_root):
+            if root is None:
+                continue
+
+            names = root.find('localeDisplayNames')
+            if names is not None:
+                langs_root = names.find('languages')
+                if langs_root is not None:
+                    lang_root = langs_root.find('language[@type="%s"]' %
+                                                self.lang)
+                    if lang_root is not None:
+                        return lang_root.findtext('.')
+
+    @cached_property
+    def unicode_language_subtag(self):
+        """Two-letter ISO 639-1 code"""
+        #root = self.cldr.load_supp('supplementalMetadata')
+        #alias = root.find('metadata/alias/languageAlias[@type="%s"]' % self.lang)
+        return self.lang if len(self.lang) == 2 else ''
+
+    @cached_property
+    def lang_term(self):
+        """Three-letter ISO 639-2/T (Terminology) code"""
+        return self.iso639.get_term(self.lang)
+
+    @cached_property
+    def lang_lib(self):
+        """Three-letter ISO 639-2/B (Bibliographic) code"""
+        return self.iso639.get_bib(self.lang)
+
+    @cached_property
+    def country_name(self):
+        """The localiezd name for the territory."""
+        for root in (self.locale_root, self.lang_root):
+            if root is None:
+                continue
+
+            names = root.find('localeDisplayNames')
+            if names is not None:
+                name = names.find('territories/territory[@type="%s"]' %
+                                  self.territory)
+                if name is not None:
+                    return name.findtext('.')
+
+    @cached_property
+    def country_num(self):
+        """ISO 3166-1 numeric code."""
+        root = self.cldr.load_supp('supplementalData')
+        codes = root.find('codeMappings/territoryCodes[@type="%s"]' %
+                          self.territory)
+        return int(codes.get('numeric'))
+
+    @cached_property
+    def country_car(self):
+        """International licence plate country code."""
+        return self.cardb.get(self.territory)
+
+    @cached_property
+    def country_term(self):
+        """ISO 3166-1 alpha-3 code"""
+        root = self.cldr.load_supp('supplementalData')
+        codes = root.find('codeMappings/territoryCodes[@type="%s"]' %
+                          self.territory)
+        return codes.get('alpha3').lower()
+
+    @cached_property
+    def tel_int_fmt(self):
+        """Telephone format for international calling."""
+        # TODO: Implement this.
+
+    @cached_property
+    def tel_dom_fmt(self):
+        """Telephone format for domestic calling."""
+        # TODO: Implement this.
+
+    @cached_property
+    def int_select(self):
+        """Telephone prefix for calling international numbers."""
+        # TODO: Implement this.
+
+    @cached_property
+    def int_prefix(self):
+        """Telephone international country code prefix."""
+        root = self.cldr.load_supp('telephoneCodeData')
+        code = root.find('telephoneCodeData/codesByTerritory[@territory="%s"]'
+                         '/telephoneCountryCode' % self.territory)
+        # The CLDR is missing some territories.
+        if code is None:
+            logging.warning('%s: int_prefix: CLDR is missing country code; '
+                            'try https://countrycode.org/%s',
+                            self.locale, self.territory)
+            return None
+        return code.get('code')
+
+    @cached_property
+    def int_curr_symbol(self):
+        """Need to rectify w/locale/iso-4217.def."""
+        # The xmlpath support in python is not complete, so we need to search
+        # for the currency w/missing @to attribute ourselves.
+        root = self.cldr.load_supp('supplementalData')
+        currencies = root.find('currencyData/region[@iso3166="%s"]' %
+                               self.territory)
+        for currency in currencies.getchildren():
+            if 'to' not in currency.keys():
+                return currency.get('iso4217')
+
+        raise ValueError('Could not find a currency for %s' % (self.territory,))
+
+    @cached_property
+    def currency_symbol(self):
+        """Need to rectify w/locale/iso-4217.def."""
+        def filter_markers(sym):
+            """Strip out some content we don't care about like the RTL marker."""
+            return sym.replace(u'\u200f', '')
+
+        # First search the locale, then the lang dbs.
+        for root in (self.locale_root, self.lang_root):
+            if root is None:
+                continue
+
+            numbers_root = root.find('numbers')
+            if numbers_root is None:
+                continue
+            symbol_ele = numbers_root.find('currencies/currency[@type="%s"]'
+                                           '/symbol' % self.int_curr_symbol)
+            if symbol_ele is not None:
+                return filter_markers(symbol_ele.findtext('.'))
+
+        # Try the common currency database.
+        chars_root = self.cldr.load_supp('characters')
+        for symbol_ele in chars_root.find('characters'
+                                          '/character-fallback').getchildren():
+            if symbol_ele.findtext('substitute') == self.int_curr_symbol:
+                return filter_markers(symbol_ele.get('value'))
+
+        # A few symbols have no translation.
+        return None #self.int_curr_symbol
+
+    @cached_property
+    def number_system(self):
+        """Get the active number system for this locale."""
+        for root in (self.locale_root, self.lang_root):
+            if root is None:
+                continue
+
+            numbers_root = root.find('numbers')
+            if numbers_root is None:
+                continue
+
+            # If there's a default labeled, use it.  Otherwise just go with
+            # the first one found.  It should be the only one.
+            num_sys_ele = root.find('defaultNumberingSystem')
+            if num_sys_ele is None:
+                return numbers_root.find('symbols')
+            else:
+                return numbers_root.find('symbols[@numberSystem="%s"]' %
+                                         num_sys_ele.findtext('.'))
+
+    @cached_property
+    def decimal_point(self):
+        """The symbol used to denote decimal points."""
+        num_symbols_root = self.number_system
+        try:
+            return num_symbols_root.find('decimal').findtext('.')
+        except AttributeError:
+            return None
+
+    @cached_property
+    def thousands_sep(self):
+        """The symbol used to group thousands digits."""
+        num_symbols_root = self.number_system
+        try:
+            return num_symbols_root.find('group').findtext('.')
+        except AttributeError:
+            return None
+
+    @cached_property
+    def grouping(self):
+        # TODO: Implement this.
+        pass
+
+    def _lookup_day_mon(self, cal_field, cal_type, cal_idxs):
+        """Look up various calendar fields."""
+        for root in (self.locale_root, self.lang_root):
+            if root is None:
+                continue
+
+            dates_root = root.find('dates')
+            if dates_root is None:
+                continue
+            calendars_root = dates_root.find('calendars')
+            if calendars_root is None:
+                continue
+            # XXX: Look up type in calendarPreference ?
+            calendar_root = calendars_root.find('calendar[@type="gregorian"]')
+            if calendar_root is None:
+                continue
+
+            dm_root = None
+            for key in ('stand-alone', 'format', 'narrow'):
+                ctx_root = calendar_root.find('%ss/%sContext[@type="%s"]' %
+                                              (cal_field, cal_field, key))
+                if ctx_root is None:
+                    continue
+                dm_root = ctx_root.find('%sWidth[@type="%s"]' %
+                                        (cal_field, cal_type))
+                if dm_root is None:
+                    continue
+
+                ret = [dm_root.find('%s[@type="%s"]' % (cal_field, x))
+                       for x in cal_idxs]
+                if None not in ret:
+                    return [x.findtext('.') for x in ret]
+
+    def _lookup_day(self, width_type):
+        """Internal helper for abday/day lookups."""
+        return self._lookup_day_mon('day', width_type, self._DAY_KEYS)
+
+    def _lookup_mon(self, width_type):
+        """Internal helper for abmon/mon lookups."""
+        return self._lookup_day_mon('month', width_type, range(1, 13))
+
+    @cached_property
+    def abday(self):
+        """Abbreviated localized names for the days of the week."""
+        return self._lookup_day('abbreviated')
+
+    @cached_property
+    def day(self):
+        """Full localized names for the days of the week."""
+        return self._lookup_day('wide')
+
+    @cached_property
+    def abmon(self):
+        """Abbreviated localized names for the months."""
+        return self._lookup_mon('abbreviated')
+
+    @cached_property
+    def mon(self):
+        """Full localized names for the months."""
+        return self._lookup_mon('wide')
+
+    # http://www.unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns
+    _CLDR_TO_POSIX_FMT = {
+        # year
+        'y':    '%%-y',
+        'yy':   '%%y',
+        'yyy':  '%%-Y',
+        'yyyy': '%%Y',
+        # month
+        'M':    '%%-m',
+        'MM':   '%%m',
+        'MMM':  '%%b',
+        'MMMM': '%%B',
+        # day
+        'd':    '%%-d',
+        'dd':   '%%d',
+        # period
+        'a':    '%%p',
+        # hour
+        'h':    '%%-I',
+        'hh':   '%%I',
+        'H':    '%%-H',
+        'HH':   '%%H',
+        # minute
+        'm':    '%%-M',
+        'mm':   '%%M',
+        # second
+        's':    '%%-S',
+        'ss':   '%%S',
+    }
+
+    @classmethod
+    def _to_posix_fmt(cls, fmt):
+        """Convert the CLDR notation to what POSIX uses."""
+        lookup = lambda m: cls._CLDR_TO_POSIX_FMT[m.group(1)]
+        return re.sub(r'\b(' + '|'.join(cls._CLDR_TO_POSIX_FMT.keys()) + r')\b',
+                      lookup, fmt)
+
+    @cached_property
+    def hours_format(self):
+        """Return 24 or 12 depending on preferred %H or %h format"""
+        root = self.cldr.load_supp('supplementalData')
+        datasets = root.find('timeData')
+        pref = None
+        for dataset in datasets.findall('hours'):
+            territories = dataset.get('regions')
+            value = dataset.get('preferred')
+
+            # TODO: Make this walk logic more robust/common.
+            territories = territories.split()
+            if '001' in territories:
+                if pref is None:
+                    # The allowed field makes this tricky.
+                    #pref = value
+                    pass
+            if self.territory in territories:
+                pref = value
+
+        if pref == 'H':
+            return '24'
+        elif pref == 'h':
+            return '12'
+        elif pref is None:
+            return None
+        else:
+            raise ValueError('Unknown hour value: %s' % pref)
+
+    @cached_property
+    def am_pm(self):
+        """Localized AM/PM time fields when 12 hour clocks are used."""
+        if self.hours_format == '24':
+            return ['', '']
+        elif self.hours_format is None:
+            return None
+
+        return self._lookup_day_mon('dayPeriod', 'abbreviated', ('am', 'pm'))
+
+    def _lookup_d_t_fmt(self, dt, dt_type='medium'):
+        """Internal helper for various fmt lookups."""
+        for root in (self.locale_root, self.lang_root):
+            if root is None:
+                continue
+
+            dates_root = root.find('dates')
+            if dates_root is None:
+                continue
+            calendars_root = dates_root.find('calendars')
+            # XXX: Look up type in calendarPreference ?
+            calendar_root = calendars_root.find('calendar[@type="gregorian"]')
+
+            fmts = calendar_root.find('%sFormats/%sFormatLength[@type="%s"]'
+                                      '/%sFormat/pattern' %
+                                      (dt, dt, dt_type, dt))
+            if fmts is not None:
+                return fmts.findtext('.')
+
+    @cached_property
+    def d_t_fmt(self):
+        """Appropriate date and time representation (%c)
+
+        Example:
+        $ date +'%a %d %b %Y %r %Z'
+        Tue 09 Feb 2016 06:39:48 PM EST
+        """
+        return self._to_posix_fmt(
+            self._lookup_d_t_fmt('dateTime').replace(
+                '{0}', self._t_fmt).replace(
+                    '{1}', self._d_fmt))
+
+    @cached_property
+    def _d_fmt(self):
+        """Internal helper for the raw d_fmt field."""
+        return self._lookup_d_t_fmt('date')
+
+    @cached_property
+    def d_fmt(self):
+        """Appropriate date representation (%x)
+
+        Example:
+        $ date +'%m/%d/%Y'
+        02/09/2016
+        """
+        return self._to_posix_fmt(self._d_fmt)
+
+    @cached_property
+    def _t_fmt(self):
+        """Internal helper for the raw t_fmt field."""
+        return self._lookup_d_t_fmt('time')
+
+    @cached_property
+    def t_fmt(self):
+        """Appropriate time representation (%X)
+
+        Example:
+        $ date +%r
+        06:41:21 PM
+        """
+        return self._to_posix_fmt(self._t_fmt)
+
+    @cached_property
+    def t_fmt_ampm(self):
+        """Appropriate AM/PM time representation (%r)
+
+        Example:
+        $ date +'%I:%M:%S %p'
+        06:41:21 PM
+        """
+        if self.hours_format == '24':
+            return ''
+        elif self.hours_format is None:
+            return None
+
+        return None
+
+    @cached_property
+    def date_fmt(self):
+        """Appropriate date representation (date(1))
+
+        $ date +'%a %b %e %H:%M:%S %Z %Y'
+        Tue Feb  9 06:39:48 EST 2016
+        """
+        pass
+
+    @cached_property
+    def week(self):
+        """DAYSINWEEK;WEEKSTARTDATE;MINWEEKLEN field"""
+        root = self.cldr.load_supp('supplementalData')
+        data = root.find('weekData')
+        ret = None
+        for start in data.findall('minDays'):
+            territories = start.get('territories')
+            value = start.get('count')
+
+            # TODO: Make this walk logic more robust/common.
+            territories = territories.split()
+            if '001' in territories:
+                if ret is None:
+                    ret = value
+            if self.territory in territories:
+                ret = value
+
+        # Just hardcode this as no one changes it.
+        daysinweek = 7
+
+        # Hardcode this as well as there's no advantage to it otherwise.
+        # It's also what CLDR bases things on.
+        weekstartdate = 19971130
+
+        minweeklen = int(ret)
+
+        return (daysinweek, weekstartdate, minweeklen)
+
+    @cached_property
+    def first_weekday(self):
+        """Number of day in the week for the first column in the calendar.
+
+        Sunday = 1, Monday = 2, ...
+        """
+        root = self.cldr.load_supp('supplementalData')
+        data = root.find('weekData')
+        first = None
+        for start in data.findall('firstDay'):
+            territories = start.get('territories')
+            day = start.get('day')
+
+            # Throw out ones we don't care about.
+            if start.get('alt') is not None:
+                continue
+
+            # TODO: Make this walk logic more robust/common.
+            territories = territories.split()
+            if '001' in territories:
+                if first is None:
+                    first = day
+            if self.territory in territories:
+                first = day
+
+        # We add +1 for index->day-of-week adjustment,
+        return self._DAY_KEYS.index(first) + 1
+
+    @cached_property
+    def first_workday(self):
+        """Number of day in the week for the first working day.
+
+        Sunday = 1, Monday = 2, ...
+        """
+        root = self.cldr.load_supp('supplementalData')
+        data = root.find('weekData')
+        first = None
+        for start in data.findall('weekendEnd'):
+            territories = start.get('territories')
+            day = start.get('day')
+
+            # TODO: Make this walk logic more robust/common.
+            territories = territories.split()
+            if '001' in territories:
+                if first is None:
+                    first = day
+            if self.territory in territories:
+                first = day
+
+        # We add +1 for index->day-of-week adjustment,
+        # and we add +1 for weekendEnd->workdayStart.
+        # We do the % to handle sat->sun wrapping.
+        return ((self._DAY_KEYS.index(first) + 1) % 7) + 1
+
+    @cached_property
+    def measurement(self):
+        """Return 1 for metric and 2 for imperial"""
+        root = self.cldr.load_supp('supplementalData')
+        measurement = None
+        for system in root.findall('measurementData/measurementSystem'):
+            territories = system.get('territories')
+            stype = system.get('type')
+
+            # Throw out ones we don't care about.
+            if system.get('category') == 'temperature' or stype == 'UK':
+                continue
+
+            # TODO: Make this walk logic more robust/common.
+            territories = territories.split()
+            if '001' in territories:
+                if measurement is None:
+                    measurement = stype
+            if self.territory in territories:
+                measurement = stype
+
+        # We don't use imperial settings for Myanmar even though CLDR does.
+        # https://en.wikipedia.org/wiki/Myanmar_units_of_measurement
+        if self.territory == 'MM':
+            if measurement == 'US':
+                measurement = 'metric'
+            else:
+                raise ValueError('CLDR is updated; drop this hack')
+
+        if measurement == 'metric':
+            return 1
+        elif measurement == 'US':
+            return 2
+        else:
+            raise ValueError('Do not understand type %s' % measurement)
+
+    @cached_property
+    def measurement_copy(self):
+        """We copy other locales for most"""
+        if self.locale in ('en_US', 'i18n'):
+            return None
+        elif self.measurement == 1:
+            return 'i18n'
+        elif self.measurement == 2:
+            return 'en_US'
+        else:
+            raise ValueError('Unknown measurement %s' % self.measurement)
+
+    @cached_property
+    def paper(self):
+        """Return the paper type"""
+        root = self.cldr.load_supp('supplementalData')
+        paper = None
+        for system in root.findall('measurementData/paperSize'):
+            territories = system.get('territories')
+            stype = system.get('type')
+
+            # TODO: Make this walk logic more robust/common.
+            territories = territories.split()
+            if '001' in territories:
+                if paper is None:
+                    paper = stype
+            if self.territory in territories:
+                paper = stype
+
+        return paper
+
+    @cached_property
+    def paper_height(self):
+        """Return the height of paper (in mm)"""
+        return {'A4': 297, 'US-Letter': 279}.get(self.paper)
+
+    @cached_property
+    def paper_width(self):
+        """Return the width of paper (in mm)"""
+        return {'A4': 210, 'US-Letter': 216}.get(self.paper)
+
+    @cached_property
+    def paper_copy(self):
+        """We copy other locales for most"""
+        if self.locale in ('en_US', 'i18n'):
+            return None
+        elif self.paper == 'A4':
+            return 'i18n'
+        elif self.paper == 'US-Letter':
+            return 'en_US'
+        else:
+            raise ValueError('Unknown paper %s' % self.paper)
+
+
+class Cldr(object):
+    """Content for the cldr database."""
+
+    # The current release version that we use.
+    CURR_VERSION = '29'
+
+    # Where to find the CLDR data.
+    URI = 'http://unicode.org/Public/cldr/%(version)s/core.zip'
+
+    def __init__(self, path, version):
+        fields = {'version': version}
+        self.dir = path % fields
+        self.uri = self.URI % fields
+        self.version = version
+        self.date = None
+        self.main_dbs = {}
+        self.supp_dbs = {}
+        self.iso639 = Iso639()
+        self.cardb = CarDatabase()
+
+        # Set up the working dir.
+        if not os.path.exists(self.dir):
+            os.makedirs(self.dir)
+
+    def download(self):
+        """Download the current cldr database."""
+        # Download the CLDR data.
+        archive = os.path.join(self.dir, 'core.zip')
+        if not os.path.exists(archive):
+            subprocess.check_call(['wget', '-O', archive, self.uri])
+        self.date = datetime.datetime.fromtimestamp(os.path.getmtime(archive))
+
+        # Unpack the CLDR data.
+        common_dir = os.path.join(self.dir, 'common')
+        if not os.path.exists(common_dir):
+            subprocess.check_call(['unzip', '-u', 'core.zip'], cwd=self.dir)
+
+    def _load_db(self, db, subdir, cache):
+        """Load the database |db| out of |subdir| using |cache|."""
+        if db not in cache:
+            db_path = os.path.join(self.dir, 'common', subdir, '%s.xml' % db)
+            tree = ElementTree.parse(db_path)
+            cache[db] = tree.getroot()
+        return cache[db]
+
+    def _load_main(self, db):
+        """Load database |db| from the main repo."""
+        return self._load_db(db, 'main', self.main_dbs)
+
+    def load_lang(self, lang):
+        """Load the language |lang| database."""
+        return self._load_main(lang)
+
+    def load_supp(self, db):
+        """Load database |db| from the supplemental repo."""
+        return self._load_db(db, 'supplemental', self.supp_dbs)
+
+    def locale(self, locale):
+        """Get an object for a specific cldr |locale|."""
+        return CldrLocale(self, locale, self.iso639, self.cardb)
+
+
+class Locale(locales.Locale):
+    """An updated locale datafile."""
+
+    def update_cldr(self, cldr):
+        """Merge CLDR updates in to this locale."""
+        cldr_locale = cldr.locale(self.locale)
+        if cldr_locale is None:
+            logging.warning('%s: no CLDR entry found for %s',
+                            self.name, self.locale)
+            return
+
+        # Start updating the actual data.
+        cldr_values = {
+            'generator': os.path.basename(__file__),
+            'english_territory_name': cldr_locale.en_territory,
+            'source_name': 'Unicode Common Locale Data Repository (CLDR)',
+            'source_version': cldr.version,
+            'source_uri': cldr.uri.replace('/', '//'),
+            'source_date': cldr.date.strftime('%Y-%m-%d'),
+            'lang': cldr_locale.lang,
+            'territory': cldr_locale.territory,
+            'locale': cldr_locale.locale,
+        }
+        if cldr_locale.en_lang:
+            cldr_values.update({
+                'english_lang_name': u_decode(cldr_locale.en_lang),
+            })
+
+        all_values = {}
+        all_values['LC_IDENTIFICATION'] = {
+            #'source':    'Based on %(source_name)s',
+            #'address':   '%(source_uri)s',
+            #'contact':   'http:////cldr.unicode.org//index//process',
+            #'email':     'bug-glibc-locales@gnu.org',
+            'tel':       '',
+            'fax':       '',
+            'territory': '%(english_territory_name)s',
+            #'revision':  '%(source_version)s',
+            #'date':      '%(source_date)s',
+        }
+        if cldr_locale.en_lang:
+            all_values['LC_IDENTIFICATION'].update({
+                'title':     ('%(english_lang_name)s language locale for '
+                              '%(english_territory_name)s'),
+                'language':  '%(english_lang_name)s',
+            })
+
+        # These are based on the charset, not the locale.
+        all_values['LC_CTYPE'] = {}
+        all_values['LC_COLLATE'] = {}
+        all_values['LC_TIME'] = {
+            #'abday': cldr_locale.abday,
+            #'day': cldr_locale.day,
+            #'abmon': cldr_locale.abmon,
+            #'mon': cldr_locale.mon,
+            #'am_pm': cldr_locale.am_pm,
+            #'d_t_fmt': cldr_locale.d_t_fmt,
+            #'d_fmt': cldr_locale.d_fmt,
+            #'t_fmt': cldr_locale.t_fmt,
+            #'t_fmt_ampm': cldr_locale.t_fmt_ampm,
+            #'date_fmt': cldr_locale.date_fmt,
+            #'week': cldr_locale.week,
+            #'first_weekday': cldr_locale.first_weekday,
+            #'first_workday': cldr_locale.first_workday,
+        }
+        all_values['LC_NUMERIC'] = {
+            #'decimal_point': cldr_locale.decimal_point,
+            #'thousands_sep': cldr_locale.thousands_sep,
+            #'grouping': cldr_locale.grouping,
+        }
+        all_values['LC_MONETARY'] = {
+            'int_curr_symbol': cldr_locale.int_curr_symbol + ' ',
+            'currency_symbol': cldr_locale.currency_symbol,
+        }
+        # See lang/posix/messages/{yes,no}str.
+        all_values['LC_MESSAGES'] = {
+            #'yesexpr': cldr_locale.yesexpr,
+            #'noexpr': cldr_locale.noexpr,
+        }
+        all_values['LC_PAPER'] = {
+            'height': cldr_locale.paper_height,
+            'width': cldr_locale.paper_width,
+            #'copy': cldr_locale.paper_copy,
+        }
+        # XXX: Need a data source for this.
+        all_values['LC_NAME'] = {
+        }
+        all_values['LC_ADDRESS'] = {
+            #'postal_fmt':
+            'country_name': cldr_locale.country_name,
+            #'country_post':
+            'country_ab2': cldr_locale.country_ab2,
+            'country_ab3': cldr_locale.country_ab3,
+            'country_num': cldr_locale.country_num,
+            'country_car': cldr_locale.country_car,
+            #'country_isbn':
+            'lang_name': cldr_locale.lang_name,
+            'lang_ab': cldr_locale.unicode_language_subtag,
+            'lang_term': cldr_locale.lang_term,
+            'lang_lib': cldr_locale.lang_lib,
+        }
+        all_values['LC_TELEPHONE'] = {
+            #'tel_int_fmt': cldr_locale.tel_int_fmt,
+            #'tel_dom_fmt': cldr_locale.tel_dom_fmt,
+            #'int_select': cldr_locale.int_select,
+        }
+        if cldr_locale.int_prefix:
+            all_values['LC_TELEPHONE'].update({
+                'int_prefix': cldr_locale.int_prefix,
+            })
+        all_values['LC_MEASUREMENT'] = {
+            'measurement': cldr_locale.measurement,
+            #'copy': cldr_locale.measurement_copy,
+        }
+
+        a = str(cldr_locale.lang)
+        b = self.lc_address.fields['lang_ab']
+        if b and a != b:
+            print('%s: mismatch: %s %s' % (self.name, a, b))
+
+        # Walk all the categories.
+        for category in self.categories:
+            lc = getattr(self, category.lower())
+            values = all_values[category]
+            if not values:
+                continue
+
+            # Walk each line in this locale category.
+            start_of_line = None
+            full_line = ''
+            i = 0
+            seen_keys = set()
+            while i < len(lc.content):
+                line = lc.content[i]
+                if not line:
+                    i += 1
+                    continue
+
+                # If the line ends with an escape it is wrapped, so unwrap it
+                # before we check for updates to the value.
+                if (not line.startswith(self.comment_char) and
+                    line.endswith(self.escape_char)):
+                    if not full_line:
+                        start_of_line = i
+                    full_line += line[:-1].lstrip()
+                    i += 1
+                    continue
+                elif full_line:
+                    line = full_line + line.lstrip()
+                    full_line = ''
+                else:
+                    start_of_line = None
+
+                # Process this line.
+                key = line.split()[0]
+                new_value = values.get(key)
+                seen_keys.add(key)
+                if new_value is not None:
+                    is_int = isinstance(new_value, int)
+                    is_list = isinstance(new_value, (tuple, list, set))
+                    if not is_int and is_list:
+                        is_int = isinstance(new_value[0], int)
+                    if is_int:
+                        if is_list:
+                            new_value = ';'.join(str(x) for x in new_value)
+                        else:
+                            new_value = str(new_value)
+                        m = re.match(r'\s*(.*?)\s+([0-9;]+)$', line)
+                    else:
+                        if is_list:
+                            new_value = '";"'.join(u_encode(x % cldr_values)
+                                                   for x in new_value)
+                        elif key != 'copy':
+                            new_value %= cldr_values
+                            if category != 'LC_IDENTIFICATION':
+                                new_value = u_encode(new_value)
+                        m = re.match(r'\s*([^"]*)"(.*)"$', line)
+
+                    # We should standardize case at some point.
+                    if m and new_value.lower() != m.group(2).lower():
+                        disp_key = ('%s:%s' % (category.upper(), key)
+                                    if key == 'copy' else key)
+                        logging.info('%s: %s: changing {%s} to {%s}',
+                                     self.name, disp_key,
+                                     u_decode(m.group(2)),
+                                     u_decode(new_value))
+                        leading_line = m.group(1)
+
+                        # This is tricky as we have to delete most of the
+                        # multiline, then update the one remaining.
+                        if start_of_line is not None:
+                            #for _ in range(start_of_line, i):
+                            #    lc.content.pop(start_of_line)
+                            del lc.content[start_of_line:i]
+                            i = start_of_line
+                            if '";"' in new_value:
+                                leading_line = leading_line.rstrip() + '\t'
+                                num_tabs = (len(leading_line) // 8) + 1
+                                new_value = new_value.replace(
+                                    '";"',
+                                    '";/\n' + ('\t' * num_tabs) + '"')
+
+                        # Finally deploy the updated line.
+                        fmt = '%s %s' if is_int else '%s"%s"'
+                        lc.content[i] = fmt % (leading_line, new_value)
+
+                i += 1
+
+            missing_keys = set(values.keys()) - seen_keys
+            for key in missing_keys:
+                # TODO: Merge with the logic above.
+                new_value = str(values[key])
+                old_value = str(lc.fields.get(key))
+                if new_value is not None and new_value != old_value:
+                    logging.info('%s: %s: changing {%s} to {%s}',
+                                 self.name, key, old_value, new_value)
+                    lc.content.append('%s "%s"' % (key, new_value))
+
+
+def main(argv):
+    """The main entry point."""
+    parser = get_parser()
+    opts = parser.parse_args(argv)
+    logging_init(opts)
+
+    # Get a handle to the cldr database.
+    cldr = Cldr(opts.working_dir, opts.version)
+    cldr.download()
+
+    # These are not "real" locales, so skip them.
+    SKIP_LOCALES = ('i18n', 'iso14651', 'translit', 'C', 'POSIX')
+
+    # Process all the locales the user told us to.
+    for locale in opts.locales:
+        name = os.path.basename(locale)
+        if name.split('_', 1)[0] in SKIP_LOCALES:
+            continue
+
+        logging.info('Updating %s', locale)
+        try:
+            loc = Locale(name=name, path=locale)
+            try:
+                loc.update_cldr(cldr)
+            except Exception:
+                logging.error('%s: updating failed', locale, exc_info=True)
+            loc.write(locale + '.new')
+            os.rename(locale + '.new', locale)
+        except UnicodeDecodeError:
+            logging.error('%s: bad encodings', locale, exc_info=True)
+            subprocess.check_call(['file', locale])
+        except (IndexError, locales.LocaleError):
+            logging.error('%s: loading failed', locale, exc_info=True)
+
+
+if __name__ == '__main__':
+    exit(main(sys.argv[1:]))
--- /dev/null
+++ filter.py
@@ -0,0 +1,69 @@ 
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+# Written by Mike Frysinger <vapier@gentoo.org> for much great glory.
+#
+# Copyright (C) 2016 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+"""Simple script to quickly make locale files readable.
+
+Example: %(prog)s locales/en_US | less
+"""
+
+from __future__ import print_function
+
+import argparse
+import sys
+
+import locales
+
+
+def process(_opts, fp):
+    for line in fp:
+        try:
+            line = locales.u_decode(line)
+        except ValueError as e:
+            # Python's chr() does not support the full UTF-8 codepoint
+            # range.  Just use the line as-is if it fails.
+            print('FILTER ERROR: %s' % e)
+        print(line, end='')
+
+
+def process_path(opts, path):
+    return process(opts, open(path))
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('files', nargs='*')
+    return parser
+
+
+def main(argv):
+    parser = get_parser()
+    opts = parser.parse_args(argv)
+    if not opts.files:
+        process(opts, sys.stdin)
+    else:
+        for f in opts.files:
+            process_path(opts, f)
+
+
+if __name__ == '__main__':
+    exit(main(sys.argv[1:]))