Merge pull request #324 from whipper-team/feature/issue-313-pathfilter-questions

Rewrite PathFilter
This commit is contained in:
JoeLametta
2020-02-22 19:10:35 +01:00
committed by GitHub
4 changed files with 86 additions and 57 deletions

View File

@@ -241,8 +241,11 @@ options:
```INI
[main]
path_filter_fat = True ; replace FAT file system unsafe characters in filenames with _
path_filter_special = False ; replace special characters in filenames with _
path_filter_dot = True ; replace leading dot with _
path_filter_posix = True ; replace illegal chars in *nix OSes with _
path_filter_vfat = False ; replace illegal chars in VFAT filesystems with _
path_filter_whitespace = False ; replace all whitespace chars with _
path_filter_printable = False ; replace all non printable ASCII chars with _
[musicbrainz]
server = https://musicbrainz.org ; use MusicBrainz server at host[:port]

View File

@@ -24,46 +24,34 @@ import re
class PathFilter:
"""Filter path components for safe storage on file systems."""
def __init__(self, slashes=True, quotes=True, fat=True, special=False):
def __init__(self, dot=True, posix=True, vfat=False, whitespace=False,
printable=False):
"""
Init PathFilter.
:param slashes: whether to convert slashes to dashes
:type slashes: bool
:param quotes: whether to normalize quotes
:type quotes: bool
:param fat: whether to strip characters illegal on FAT filesystems
:type fat: bool
:param special: whether to strip special characters
:type special: bool
:param dot: whether to strip leading dot
:param posix: whether to strip illegal chars in *nix OSes
:param vfat: whether to strip illegal chars in VFAT filesystems
:param whitespace: whether to strip all whitespace chars
:param printable: whether to strip all non printable ASCII chars
"""
self._slashes = slashes
self._quotes = quotes
self._fat = fat
self._special = special
self._dot = dot
self._posix = posix
self._vfat = vfat
self._whitespace = whitespace
self._printable = printable
def filter(self, path):
if self._slashes:
path = re.sub(r'[/\\]', '-', path, re.UNICODE)
def separators(path):
# replace separators with a space-hyphen or hyphen
path = re.sub(r'[:]', ' -', path, re.UNICODE)
path = re.sub(r'[|]', '-', path, re.UNICODE)
return path
# change all fancy single/double quotes to normal quotes
if self._quotes:
path = re.sub(r'[\xc2\xb4\u2018\u2019\u201b]', "'", path)
path = re.sub(r'[\u201c\u201d\u201f]', '"', path)
if self._special:
path = separators(path)
path = re.sub(r'[*?&!\'\"$()`{}\[\]<>]', '_', path)
if self._fat:
path = separators(path)
# : and | already gone, but leave them here for reference
path = re.sub(r'[:*?"<>|]', '_', path)
R_CH = '_'
if self._dot:
if path[0] == '.':
path = R_CH + path[1:]
if self._posix:
path = re.sub(r'[\/\x00]', R_CH, path)
if self._vfat:
path = re.sub(r'[\x00-\x1F\x7F\"\*\/\:\<\>\?\\\|]', R_CH, path)
if self._whitespace:
path = re.sub(r'\s', R_CH, path)
if self._printable:
path = re.sub(r'[^\x20-\x7E]', R_CH, path)
return path

View File

@@ -70,8 +70,11 @@ class Program:
d = {}
for key, default in list({
'fat': True,
'special': False
'dot': True,
'posix': True,
'vfat': False,
'whitespace': False,
'printable': False
}.items()):
value = None
value = self._config.getboolean('main', 'path_filter_' + key)

View File

@@ -2,29 +2,64 @@
# vi:si:et:sw=4:sts=4:ts=4
from whipper.common import path
from whipper.test import common
# TODO: Right now you're testing different strings for different functions.
# I think it'd make more sense to come up with a selection of strings to test
# and then test that set of strings for the entire matrix to make sure that
# they all behave correctly in all instances.
# <Freso 2018-11-04, GitHub comment>
class FilterTestCase(common.TestCase):
def setUp(self):
self._filter = path.PathFilter(special=True)
self._filter_none = path.PathFilter(dot=False, posix=False,
vfat=False, whitespace=False,
printable=False)
self._filter_dot = path.PathFilter(dot=True, posix=False,
vfat=False, whitespace=False,
printable=False)
self._filter_posix = path.PathFilter(dot=False, posix=True,
vfat=False, whitespace=False,
printable=False)
self._filter_vfat = path.PathFilter(dot=False, posix=False,
vfat=True, whitespace=False,
printable=False)
self._filter_whitespace = path.PathFilter(dot=False, posix=False,
vfat=False, whitespace=True,
printable=False)
self._filter_printable = path.PathFilter(dot=False, posix=False,
vfat=False, whitespace=False,
printable=True)
self._filter_all = path.PathFilter(dot=True, posix=True, vfat=True,
whitespace=True, printable=True)
def testSlash(self):
part = 'A Charm/A Blade'
self.assertEqual(self._filter.filter(part), 'A Charm-A Blade')
def testFat(self):
part = 'A Word: F**k you?'
self.assertEqual(self._filter.filter(part), 'A Word - F__k you_')
def testSpecial(self):
def testNone(self):
part = '<<< $&*!\' "()`{}[]spaceship>>>'
self.assertEqual(self._filter.filter(part),
'___ _____ ________spaceship___')
self.assertEqual(self._filter_posix.filter(part), part)
def testGreatest(self):
def testDot(self):
part = '.弐'
self.assertEqual(self._filter_dot.filter(part), '_弐')
def testPosix(self):
part = 'A Charm/A \x00Blade'
self.assertEqual(self._filter_posix.filter(part), 'A Charm_A _Blade')
def testVfat(self):
part = 'A Word: F**k you?'
self.assertEqual(self._filter_vfat.filter(part), 'A Word_ F__k you_')
def testWhitespace(self):
part = 'This is just a test!'
self.assertEqual(self._filter_whitespace.filter(part),
'This_is_just_a_test!')
def testPrintable(self):
part = 'Suppers Ready† 😽'
self.assertEqual(self._filter_printable.filter(part),
'Supper_s Ready_ _')
def testAll(self):
part = 'Greatest Ever! Soul: The Definitive Collection'
self.assertEqual(self._filter.filter(part),
'Greatest Ever_ Soul - The Definitive Collection')
self.assertEqual(self._filter_all.filter(part),
'Greatest_Ever!_Soul__The_Definitive_Collection')