From e6f13ccf84db59d6e7f0ca9f95fe0f784ae31671 Mon Sep 17 00:00:00 2001 From: Thomas Vander Stichele Date: Fri, 11 Sep 2009 15:40:44 +0000 Subject: [PATCH] * HACKING: Note unicode handling. * morituri/test/test_image_image.py: * morituri/image/table.py: * morituri/program/cdparanoia.py: * morituri/common/checksum.py: Use unicode for paths. Use repr for path representation. * morituri/test/test_common_checksum.py: Add test for unicode audio file name. --- ChangeLog | 13 +++++++++++++ HACKING | 11 +++++++++++ morituri/common/checksum.py | 20 +++++++++++++------- morituri/image/table.py | 15 +++++++++++++-- morituri/program/cdparanoia.py | 2 ++ morituri/test/test_common_checksum.py | 14 +++++++++++++- morituri/test/test_image_image.py | 6 +++--- 7 files changed, 68 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9b1202c..85dc4f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2009-09-11 Thomas Vander Stichele + + * HACKING: + Note unicode handling. + * morituri/test/test_image_image.py: + * morituri/image/table.py: + * morituri/program/cdparanoia.py: + * morituri/common/checksum.py: + Use unicode for paths. + Use repr for path representation. + * morituri/test/test_common_checksum.py: + Add test for unicode audio file name. + 2009-09-11 Thomas Vander Stichele * morituri/image/cue.py: diff --git a/HACKING b/HACKING index e656a1e..d71e7d1 100644 --- a/HACKING +++ b/HACKING @@ -17,3 +17,14 @@ track 2: frame start 17811, 18481 CD frames track 11: frame start 166858, 25103 CD frames (14760564 audio frames) 191961 total CD frames + +unicode +------- +- All text files should be read and written as unicode. +- All strings that came from the outside should be converted to unicode objects. +- Use asserts liberally to ensure this so we catch problems earlier. +- All gst.parse_launch() pipelines should be passed as utf-8; use + encode('utf-8') +- morituri.extern.log.log is not unicode-safe; don't pass it unicode objects; + for example, always use %r to log paths +- run with RIP_DEBUG=5 once in a while to catch unicode/logging errors. diff --git a/morituri/common/checksum.py b/morituri/common/checksum.py index 4938af3..59eeafe 100644 --- a/morituri/common/checksum.py +++ b/morituri/common/checksum.py @@ -46,13 +46,17 @@ class ChecksumTask(task.Task): ie 16 bit stereo is 4 bytes per frame. If frameLength < 0 it is treated as 'unknown' and calculated. + @type path: unicode @type frameStart: int @param frameStart: the frame to start at """ - self.debug('Creating checksum task on %s from %d to %d', + assert type(path) is unicode, "%r is not unicode" % path + + # use repr/%r because path can be unicode + self.debug('Creating checksum task on %r from %d to %d', path, frameStart, frameLength) if not os.path.exists(path): - raise IndexError, '%s does not exist' % path + raise IndexError, '%r does not exist' % path self._path = path self._frameStart = frameStart @@ -71,7 +75,8 @@ class ChecksumTask(task.Task): self._pipeline = gst.parse_launch(''' filesrc location="%s" ! decodebin ! audio/x-raw-int ! - appsink name=sink sync=False emit-signals=True''' % self._path) + appsink name=sink sync=False emit-signals=True''' % + self._path.encode('utf-8')) self.debug('pausing pipeline') self._pipeline.set_state(gst.STATE_PAUSED) @@ -108,8 +113,9 @@ class ChecksumTask(task.Task): gst.SEEK_FLAG_FLUSH, gst.SEEK_TYPE_SET, self._frameStart, gst.SEEK_TYPE_SET, self._frameEnd + 1) # half-inclusive interval - gst.debug('CRCing %s from sector %d to sector %d' % ( - self._path, self._frameStart / common.SAMPLES_PER_FRAME, + gst.debug('CRCing %r from sector %d to sector %d' % ( + self._path, + self._frameStart / common.SAMPLES_PER_FRAME, (self._frameEnd + 1) / common.SAMPLES_PER_FRAME)) # FIXME: sending it with frameEnd set screws up the seek, we don't get # everything for flac; fixed in recent -good @@ -182,7 +188,7 @@ class ChecksumTask(task.Task): if not self._last: # see http://bugzilla.gnome.org/show_bug.cgi?id=578612 print 'ERROR: not a single buffer gotten' - #raise + # FIXME: instead of print, do something useful else: self._checksum = self._checksum % 2 ** 32 self.debug("last offset %r", self._last.offset) @@ -226,7 +232,7 @@ class AccurateRipChecksumTask(ChecksumTask): self._discFrameCounter = 0 # 1-based def __repr__(self): - return "" % ( + return "" % ( self._trackNumber, self._path) def do_checksum_buffer(self, buf, checksum): diff --git a/morituri/image/table.py b/morituri/image/table.py index 217c5b3..655fb75 100644 --- a/morituri/image/table.py +++ b/morituri/image/table.py @@ -79,6 +79,12 @@ class Track: self.cdtext = {} def index(self, number, absolute=None, path=None, relative=None, counter=None): + """ + @type path: unicode or None + """ + if path is not None: + assert type(path) is unicode, "%r is not unicode" % path + i = Index(number, absolute, path, relative, counter) self.indexes[number] = i @@ -111,6 +117,7 @@ class Index: """ @ivar counter: counter for the index source; distinguishes between the matching FILE lines in .cue files for example + @type path: unicode or None """ number = None absolute = None @@ -119,6 +126,10 @@ class Index: counter = None def __init__(self, number, absolute=None, path=None, relative=None, counter=None): + + if path is not None: + assert type(path) is unicode, "%r is not unicode" % path + self.number = number self.absolute = absolute self.path = path @@ -528,7 +539,7 @@ class Table(object, log.Loggable): Assumes all indexes have an absolute offset and will raise if not. """ - self.debug('setFile: track %d, index %d, path %s, ' + self.debug('setFile: track %d, index %d, path %r, ' 'length %r, counter %r', track, index, path, length, counter) t = self.tracks[track - 1] @@ -542,7 +553,7 @@ class Table(object, log.Loggable): i.path = path i.relative = i.absolute - start i.counter = counter - self.debug('Setting path %s, relative %r on ' + self.debug('Setting path %r, relative %r on ' 'track %d, index %d, counter %r', path, i.relative, track, index, counter) try: diff --git a/morituri/program/cdparanoia.py b/morituri/program/cdparanoia.py index 2bc0d89..c064b3b 100644 --- a/morituri/program/cdparanoia.py +++ b/morituri/program/cdparanoia.py @@ -362,6 +362,7 @@ class ReadVerifyTrackTask(task.MultiSeparateTask): self.debug('read and verify with taglist %r', taglist) # FIXME: choose a dir on the same disk/dir as the final path fd, tmppath = tempfile.mkstemp(suffix='.morituri.wav') + tmppath = unicode(tmppath) os.close(fd) self._tmpwavpath = tmppath @@ -376,6 +377,7 @@ class ReadVerifyTrackTask(task.MultiSeparateTask): fd, tmpoutpath = tempfile.mkstemp(suffix='.morituri.%s' % profile.extension) + tmpoutpath = unicode(tmpoutpath) os.close(fd) self._tmppath = tmpoutpath self.tasks.append(encode.EncodeTask(tmppath, tmpoutpath, profile, diff --git a/morituri/test/test_common_checksum.py b/morituri/test/test_common_checksum.py index dd720a8..e458f9a 100644 --- a/morituri/test/test_common_checksum.py +++ b/morituri/test/test_common_checksum.py @@ -12,6 +12,8 @@ import gst from morituri.common import task, checksum +from morituri.test import common + def h(i): return "0x%08x" % i @@ -19,9 +21,19 @@ class EmptyTestCase(unittest.TestCase): def testEmpty(self): # this test makes sure that checksumming empty files doesn't hang self.runner = task.SyncRunner(verbose=False) - fd, path = tempfile.mkstemp(suffix='morituri.test.empty') + fd, path = tempfile.mkstemp(suffix=u'morituri.test.empty') checksumtask = checksum.ChecksumTask(path) # FIXME: do we want a specific error for this ? self.assertRaises(gst.QueryError, self.runner.run, checksumtask, verbose=False) os.unlink(path) + + def testUnicodePath(self): + # this test makes sure we can checksum a unicode path + self.runner = task.SyncRunner(verbose=False) + fd, path = tempfile.mkstemp( + suffix=u'morituri.test.B\xeate Noire.empty') + checksumtask = checksum.ChecksumTask(path) + self.assertRaises(gst.QueryError, self.runner.run, + checksumtask, verbose=False) + os.unlink(path) diff --git a/morituri/test/test_image_image.py b/morituri/test/test_image_image.py index 7dd7340..a35bf4f 100644 --- a/morituri/test/test_image_image.py +++ b/morituri/test/test_image_image.py @@ -16,7 +16,7 @@ def h(i): class TrackSingleTestCase(unittest.TestCase): def setUp(self): self.image = image.Image(os.path.join(os.path.dirname(__file__), - 'track-single.cue')) + u'track-single.cue')) self.runner = task.SyncRunner(verbose=False) self.image.setup(self.runner) @@ -46,7 +46,7 @@ class TrackSingleTestCase(unittest.TestCase): class TrackSeparateTestCase(unittest.TestCase): def setUp(self): self.image = image.Image(os.path.join(os.path.dirname(__file__), - 'track-separate.cue')) + u'track-separate.cue')) self.runner = task.SyncRunner(verbose=False) self.image.setup(self.runner) @@ -75,7 +75,7 @@ class TrackSeparateTestCase(unittest.TestCase): class AudioLengthTestCase(unittest.TestCase): def testLength(self): - path = os.path.join(os.path.dirname(__file__), 'track.flac') + path = os.path.join(os.path.dirname(__file__), u'track.flac') t = image.AudioLengthTask(path) runner = task.SyncRunner() runner.run(t, verbose=False)