pycodestyle (PEP 8) cleanup in Python scripts

These are mainly whitespace changes. I didn't fix "E501 line too long", which would require more significant surgery.
2022-03-09 10:51:41 +01:00 · 2022-03-09 10:51:41 +01:00 · ddf590b811
parent e80a7a1f3d
commit ddf590b811
2 changed files with 40 additions and 24 deletions
--- a/contrib/unaccent/generate_unaccent_rules.py
+++ b/contrib/unaccent/generate_unaccent_rules.py
@ -55,6 +55,7 @@ COMBINING_MARK_RANGES = ((0x0300, 0x0362),  # Mn: Accents, IPA
                         (0x20dd, 0x20E0),   # Me: Symbols
                         (0x20e2, 0x20e4),)  # Me: Screen, keycap, triangle
 def print_record(codepoint, letter):
    if letter:
        output = chr(codepoint) + "\t" + letter
@ -63,12 +64,14 @@ def print_record(codepoint, letter):
    print(output)
 class Codepoint:
    def __init__(self, id, general_category, combining_ids):
        self.id = id
        self.general_category = general_category
        self.combining_ids = combining_ids
 def is_mark_to_remove(codepoint):
    """Return true if this is a combining mark to remove."""
    if not is_mark(codepoint):
@ -79,6 +82,7 @@ def is_mark_to_remove(codepoint):
            return True
    return False
 def is_plain_letter(codepoint):
    """Return true if codepoint represents a "plain letter"."""
    for begin, end in PLAIN_LETTER_RANGES:
@ -86,10 +90,12 @@ def is_plain_letter(codepoint):
            return True
    return False
 def is_mark(codepoint):
    """Returns true for diacritical marks (combining codepoints)."""
    return codepoint.general_category in ("Mn", "Me", "Mc")
 def is_letter_with_marks(codepoint, table):
    """Returns true for letters combined with one or more marks."""
    # See https://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values
@ -105,16 +111,18 @@ def is_letter_with_marks(codepoint, table):
    # Check if the base letter of this letter has marks.
    codepoint_base = codepoint.combining_ids[0]
-    if (is_plain_letter(table[codepoint_base]) is False and \
+    if is_plain_letter(table[codepoint_base]) is False and \
-        is_letter_with_marks(table[codepoint_base], table) is False):
+       is_letter_with_marks(table[codepoint_base], table) is False:
        return False
    return True
 def is_letter(codepoint, table):
    """Return true for letter with or without diacritical marks."""
    return is_plain_letter(codepoint) or is_letter_with_marks(codepoint, table)
 def get_plain_letter(codepoint, table):
    """Return the base codepoint without marks. If this codepoint has more
    than one combining character, do a recursive lookup on the table to
@ -133,15 +141,18 @@ def get_plain_letter(codepoint, table):
    # Should not come here
    assert(False)
 def is_ligature(codepoint, table):
    """Return true for letters combined with letters."""
    return all(is_letter(table[i], table) for i in codepoint.combining_ids)
 def get_plain_letters(codepoint, table):
    """Return a list of plain letters from a ligature."""
    assert(is_ligature(codepoint, table))
    return [get_plain_letter(table[id], table) for id in codepoint.combining_ids]
 def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
    """Parse the XML file and return a set of tuples (src, trg), where "src"
    is the original character and "trg" the substitute."""
@ -189,6 +200,7 @@ def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
    return charactersSet
 def special_cases():
    """Returns the special cases which are not handled by other methods"""
    charactersSet = set()
@ -204,6 +216,7 @@ def special_cases():
    return charactersSet
 def main(args):
    # https://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings
    decomposition_type_pattern = re.compile(" *<[^>]*> *")
@ -242,7 +255,7 @@ def main(args):
            elif args.noLigaturesExpansion is False and is_ligature(codepoint, table):
                charactersSet.add((codepoint.id,
                                   "".join(chr(combining_codepoint.id)
-                                     for combining_codepoint \
+                                           for combining_codepoint
                                           in get_plain_letters(codepoint, table))))
        elif is_mark_to_remove(codepoint):
            charactersSet.add((codepoint.id, None))
@ -258,6 +271,7 @@ def main(args):
    for characterPair in charactersList:
        print_record(characterPair[0], characterPair[1])
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='This script builds unaccent.rules on standard output when given the contents of UnicodeData.txt and Latin-ASCII.xml given as arguments.')
    parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt.", type=str, required=True, dest='unicodeDataFilePath')
--- a/src/test/locale/sort-test.py
+++ b/src/test/locale/sort-test.py
@ -1,6 +1,8 @@
 #! /usr/bin/env python
-import sys, string, locale
+import locale
 import sys
 locale.setlocale(locale.LC_ALL, "")
 if len(sys.argv) != 2: