pycodestyle (PEP 8) cleanup in Python scripts
These are mainly whitespace changes. I didn't fix "E501 line too long", which would require more significant surgery.
This commit is contained in:
parent
e80a7a1f3d
commit
ddf590b811
|
@ -55,6 +55,7 @@ COMBINING_MARK_RANGES = ((0x0300, 0x0362), # Mn: Accents, IPA
|
||||||
(0x20dd, 0x20E0), # Me: Symbols
|
(0x20dd, 0x20E0), # Me: Symbols
|
||||||
(0x20e2, 0x20e4),) # Me: Screen, keycap, triangle
|
(0x20e2, 0x20e4),) # Me: Screen, keycap, triangle
|
||||||
|
|
||||||
|
|
||||||
def print_record(codepoint, letter):
|
def print_record(codepoint, letter):
|
||||||
if letter:
|
if letter:
|
||||||
output = chr(codepoint) + "\t" + letter
|
output = chr(codepoint) + "\t" + letter
|
||||||
|
@ -63,12 +64,14 @@ def print_record(codepoint, letter):
|
||||||
|
|
||||||
print(output)
|
print(output)
|
||||||
|
|
||||||
|
|
||||||
class Codepoint:
|
class Codepoint:
|
||||||
def __init__(self, id, general_category, combining_ids):
|
def __init__(self, id, general_category, combining_ids):
|
||||||
self.id = id
|
self.id = id
|
||||||
self.general_category = general_category
|
self.general_category = general_category
|
||||||
self.combining_ids = combining_ids
|
self.combining_ids = combining_ids
|
||||||
|
|
||||||
|
|
||||||
def is_mark_to_remove(codepoint):
|
def is_mark_to_remove(codepoint):
|
||||||
"""Return true if this is a combining mark to remove."""
|
"""Return true if this is a combining mark to remove."""
|
||||||
if not is_mark(codepoint):
|
if not is_mark(codepoint):
|
||||||
|
@ -79,6 +82,7 @@ def is_mark_to_remove(codepoint):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_plain_letter(codepoint):
|
def is_plain_letter(codepoint):
|
||||||
"""Return true if codepoint represents a "plain letter"."""
|
"""Return true if codepoint represents a "plain letter"."""
|
||||||
for begin, end in PLAIN_LETTER_RANGES:
|
for begin, end in PLAIN_LETTER_RANGES:
|
||||||
|
@ -86,10 +90,12 @@ def is_plain_letter(codepoint):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_mark(codepoint):
|
def is_mark(codepoint):
|
||||||
"""Returns true for diacritical marks (combining codepoints)."""
|
"""Returns true for diacritical marks (combining codepoints)."""
|
||||||
return codepoint.general_category in ("Mn", "Me", "Mc")
|
return codepoint.general_category in ("Mn", "Me", "Mc")
|
||||||
|
|
||||||
|
|
||||||
def is_letter_with_marks(codepoint, table):
|
def is_letter_with_marks(codepoint, table):
|
||||||
"""Returns true for letters combined with one or more marks."""
|
"""Returns true for letters combined with one or more marks."""
|
||||||
# See https://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values
|
# See https://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values
|
||||||
|
@ -105,16 +111,18 @@ def is_letter_with_marks(codepoint, table):
|
||||||
|
|
||||||
# Check if the base letter of this letter has marks.
|
# Check if the base letter of this letter has marks.
|
||||||
codepoint_base = codepoint.combining_ids[0]
|
codepoint_base = codepoint.combining_ids[0]
|
||||||
if (is_plain_letter(table[codepoint_base]) is False and \
|
if is_plain_letter(table[codepoint_base]) is False and \
|
||||||
is_letter_with_marks(table[codepoint_base], table) is False):
|
is_letter_with_marks(table[codepoint_base], table) is False:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def is_letter(codepoint, table):
|
def is_letter(codepoint, table):
|
||||||
"""Return true for letter with or without diacritical marks."""
|
"""Return true for letter with or without diacritical marks."""
|
||||||
return is_plain_letter(codepoint) or is_letter_with_marks(codepoint, table)
|
return is_plain_letter(codepoint) or is_letter_with_marks(codepoint, table)
|
||||||
|
|
||||||
|
|
||||||
def get_plain_letter(codepoint, table):
|
def get_plain_letter(codepoint, table):
|
||||||
"""Return the base codepoint without marks. If this codepoint has more
|
"""Return the base codepoint without marks. If this codepoint has more
|
||||||
than one combining character, do a recursive lookup on the table to
|
than one combining character, do a recursive lookup on the table to
|
||||||
|
@ -133,15 +141,18 @@ def get_plain_letter(codepoint, table):
|
||||||
# Should not come here
|
# Should not come here
|
||||||
assert(False)
|
assert(False)
|
||||||
|
|
||||||
|
|
||||||
def is_ligature(codepoint, table):
|
def is_ligature(codepoint, table):
|
||||||
"""Return true for letters combined with letters."""
|
"""Return true for letters combined with letters."""
|
||||||
return all(is_letter(table[i], table) for i in codepoint.combining_ids)
|
return all(is_letter(table[i], table) for i in codepoint.combining_ids)
|
||||||
|
|
||||||
|
|
||||||
def get_plain_letters(codepoint, table):
|
def get_plain_letters(codepoint, table):
|
||||||
"""Return a list of plain letters from a ligature."""
|
"""Return a list of plain letters from a ligature."""
|
||||||
assert(is_ligature(codepoint, table))
|
assert(is_ligature(codepoint, table))
|
||||||
return [get_plain_letter(table[id], table) for id in codepoint.combining_ids]
|
return [get_plain_letter(table[id], table) for id in codepoint.combining_ids]
|
||||||
|
|
||||||
|
|
||||||
def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
|
def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
|
||||||
"""Parse the XML file and return a set of tuples (src, trg), where "src"
|
"""Parse the XML file and return a set of tuples (src, trg), where "src"
|
||||||
is the original character and "trg" the substitute."""
|
is the original character and "trg" the substitute."""
|
||||||
|
@ -189,6 +200,7 @@ def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
|
||||||
|
|
||||||
return charactersSet
|
return charactersSet
|
||||||
|
|
||||||
|
|
||||||
def special_cases():
|
def special_cases():
|
||||||
"""Returns the special cases which are not handled by other methods"""
|
"""Returns the special cases which are not handled by other methods"""
|
||||||
charactersSet = set()
|
charactersSet = set()
|
||||||
|
@ -204,6 +216,7 @@ def special_cases():
|
||||||
|
|
||||||
return charactersSet
|
return charactersSet
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
# https://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings
|
# https://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings
|
||||||
decomposition_type_pattern = re.compile(" *<[^>]*> *")
|
decomposition_type_pattern = re.compile(" *<[^>]*> *")
|
||||||
|
@ -242,7 +255,7 @@ def main(args):
|
||||||
elif args.noLigaturesExpansion is False and is_ligature(codepoint, table):
|
elif args.noLigaturesExpansion is False and is_ligature(codepoint, table):
|
||||||
charactersSet.add((codepoint.id,
|
charactersSet.add((codepoint.id,
|
||||||
"".join(chr(combining_codepoint.id)
|
"".join(chr(combining_codepoint.id)
|
||||||
for combining_codepoint \
|
for combining_codepoint
|
||||||
in get_plain_letters(codepoint, table))))
|
in get_plain_letters(codepoint, table))))
|
||||||
elif is_mark_to_remove(codepoint):
|
elif is_mark_to_remove(codepoint):
|
||||||
charactersSet.add((codepoint.id, None))
|
charactersSet.add((codepoint.id, None))
|
||||||
|
@ -258,6 +271,7 @@ def main(args):
|
||||||
for characterPair in charactersList:
|
for characterPair in charactersList:
|
||||||
print_record(characterPair[0], characterPair[1])
|
print_record(characterPair[0], characterPair[1])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser(description='This script builds unaccent.rules on standard output when given the contents of UnicodeData.txt and Latin-ASCII.xml given as arguments.')
|
parser = argparse.ArgumentParser(description='This script builds unaccent.rules on standard output when given the contents of UnicodeData.txt and Latin-ASCII.xml given as arguments.')
|
||||||
parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt.", type=str, required=True, dest='unicodeDataFilePath')
|
parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt.", type=str, required=True, dest='unicodeDataFilePath')
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#! /usr/bin/env python
|
#! /usr/bin/env python
|
||||||
|
|
||||||
import sys, string, locale
|
import locale
|
||||||
|
import sys
|
||||||
|
|
||||||
locale.setlocale(locale.LC_ALL, "")
|
locale.setlocale(locale.LC_ALL, "")
|
||||||
|
|
||||||
if len(sys.argv) != 2:
|
if len(sys.argv) != 2:
|
||||||
|
|
Loading…
Reference in New Issue