diff --git a/README.md b/README.md index 45d43e4..1285ad9 100644 --- a/README.md +++ b/README.md @@ -241,8 +241,11 @@ options: ```INI [main] -path_filter_fat = True ; replace FAT file system unsafe characters in filenames with _ -path_filter_special = False ; replace special characters in filenames with _ +path_filter_dot = True ; replace leading dot with _ +path_filter_posix = True ; replace illegal chars in *nix OSes with _ +path_filter_vfat = False ; replace illegal chars in VFAT filesystems with _ +path_filter_whitespace = False ; replace all whitespace chars with _ +path_filter_printable = False ; replace all non printable ASCII chars with _ [musicbrainz] server = https://musicbrainz.org ; use MusicBrainz server at host[:port] diff --git a/whipper/common/path.py b/whipper/common/path.py index dc8306b..3099bbd 100644 --- a/whipper/common/path.py +++ b/whipper/common/path.py @@ -24,46 +24,34 @@ import re class PathFilter: """Filter path components for safe storage on file systems.""" - def __init__(self, slashes=True, quotes=True, fat=True, special=False): + def __init__(self, dot=True, posix=True, vfat=False, whitespace=False, + printable=False): """ Init PathFilter. - :param slashes: whether to convert slashes to dashes - :type slashes: bool - :param quotes: whether to normalize quotes - :type quotes: bool - :param fat: whether to strip characters illegal on FAT filesystems - :type fat: bool - :param special: whether to strip special characters - :type special: bool + :param dot: whether to strip leading dot + :param posix: whether to strip illegal chars in *nix OSes + :param vfat: whether to strip illegal chars in VFAT filesystems + :param whitespace: whether to strip all whitespace chars + :param printable: whether to strip all non printable ASCII chars """ - self._slashes = slashes - self._quotes = quotes - self._fat = fat - self._special = special + self._dot = dot + self._posix = posix + self._vfat = vfat + self._whitespace = whitespace + self._printable = printable def filter(self, path): - if self._slashes: - path = re.sub(r'[/\\]', '-', path, re.UNICODE) - - def separators(path): - # replace separators with a space-hyphen or hyphen - path = re.sub(r'[:]', ' -', path, re.UNICODE) - path = re.sub(r'[|]', '-', path, re.UNICODE) - return path - - # change all fancy single/double quotes to normal quotes - if self._quotes: - path = re.sub(r'[\xc2\xb4\u2018\u2019\u201b]', "'", path) - path = re.sub(r'[\u201c\u201d\u201f]', '"', path) - - if self._special: - path = separators(path) - path = re.sub(r'[*?&!\'\"$()`{}\[\]<>]', '_', path) - - if self._fat: - path = separators(path) - # : and | already gone, but leave them here for reference - path = re.sub(r'[:*?"<>|]', '_', path) - + R_CH = '_' + if self._dot: + if path[0] == '.': + path = R_CH + path[1:] + if self._posix: + path = re.sub(r'[\/\x00]', R_CH, path) + if self._vfat: + path = re.sub(r'[\x00-\x1F\x7F\"\*\/\:\<\>\?\\\|]', R_CH, path) + if self._whitespace: + path = re.sub(r'\s', R_CH, path) + if self._printable: + path = re.sub(r'[^\x20-\x7E]', R_CH, path) return path diff --git a/whipper/common/program.py b/whipper/common/program.py index 3af4f31..9269c9d 100644 --- a/whipper/common/program.py +++ b/whipper/common/program.py @@ -70,8 +70,11 @@ class Program: d = {} for key, default in list({ - 'fat': True, - 'special': False + 'dot': True, + 'posix': True, + 'vfat': False, + 'whitespace': False, + 'printable': False }.items()): value = None value = self._config.getboolean('main', 'path_filter_' + key) diff --git a/whipper/test/test_common_path.py b/whipper/test/test_common_path.py index 0f59678..494969f 100644 --- a/whipper/test/test_common_path.py +++ b/whipper/test/test_common_path.py @@ -2,29 +2,64 @@ # vi:si:et:sw=4:sts=4:ts=4 from whipper.common import path - from whipper.test import common +# TODO: Right now you're testing different strings for different functions. +# I think it'd make more sense to come up with a selection of strings to test +# and then test that set of strings for the entire matrix to make sure that +# they all behave correctly in all instances. +# class FilterTestCase(common.TestCase): - def setUp(self): - self._filter = path.PathFilter(special=True) + self._filter_none = path.PathFilter(dot=False, posix=False, + vfat=False, whitespace=False, + printable=False) + self._filter_dot = path.PathFilter(dot=True, posix=False, + vfat=False, whitespace=False, + printable=False) + self._filter_posix = path.PathFilter(dot=False, posix=True, + vfat=False, whitespace=False, + printable=False) + self._filter_vfat = path.PathFilter(dot=False, posix=False, + vfat=True, whitespace=False, + printable=False) + self._filter_whitespace = path.PathFilter(dot=False, posix=False, + vfat=False, whitespace=True, + printable=False) + self._filter_printable = path.PathFilter(dot=False, posix=False, + vfat=False, whitespace=False, + printable=True) + self._filter_all = path.PathFilter(dot=True, posix=True, vfat=True, + whitespace=True, printable=True) - def testSlash(self): - part = 'A Charm/A Blade' - self.assertEqual(self._filter.filter(part), 'A Charm-A Blade') - - def testFat(self): - part = 'A Word: F**k you?' - self.assertEqual(self._filter.filter(part), 'A Word - F__k you_') - - def testSpecial(self): + def testNone(self): part = '<<< $&*!\' "()`{}[]spaceship>>>' - self.assertEqual(self._filter.filter(part), - '___ _____ ________spaceship___') + self.assertEqual(self._filter_posix.filter(part), part) - def testGreatest(self): + def testDot(self): + part = '.弐' + self.assertEqual(self._filter_dot.filter(part), '_弐') + + def testPosix(self): + part = 'A Charm/A \x00Blade' + self.assertEqual(self._filter_posix.filter(part), 'A Charm_A _Blade') + + def testVfat(self): + part = 'A Word: F**k you?' + self.assertEqual(self._filter_vfat.filter(part), 'A Word_ F__k you_') + + def testWhitespace(self): + part = 'This is just a test!' + self.assertEqual(self._filter_whitespace.filter(part), + 'This_is_just_a_test!') + + def testPrintable(self): + part = 'Supper’s Ready† 😽' + self.assertEqual(self._filter_printable.filter(part), + 'Supper_s Ready_ _') + + def testAll(self): part = 'Greatest Ever! Soul: The Definitive Collection' - self.assertEqual(self._filter.filter(part), - 'Greatest Ever_ Soul - The Definitive Collection') + self.assertEqual(self._filter_all.filter(part), + 'Greatest_Ever!_Soul__The_Definitive_Collection')