From dfa3efe662874969bc07bc395729e4c0af9bbc5e Mon Sep 17 00:00:00 2001 From: Thomas Vander Stichele Date: Fri, 11 Sep 2009 22:16:46 +0000 Subject: [PATCH] * HACKING: More unicode notes. * morituri/common/program.py: * morituri/image/table.py: * morituri/image/toc.py: * morituri/result/result.py: * morituri/rip/cd.py: * morituri/test/test_image_toc.py: Further unicode fixes, for options, CD-Text, paths, ... --- ChangeLog | 12 ++++++++++++ HACKING | 6 ++++++ morituri/common/program.py | 9 +++++++-- morituri/image/table.py | 2 ++ morituri/image/toc.py | 9 +++++++-- morituri/result/result.py | 1 + morituri/rip/cd.py | 28 ++++++++++++++++++---------- morituri/test/test_image_toc.py | 19 +++++++++++++++++++ 8 files changed, 72 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8dc94bb..c257737 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2009-09-12 Thomas Vander Stichele + + * HACKING: + More unicode notes. + * morituri/common/program.py: + * morituri/image/table.py: + * morituri/image/toc.py: + * morituri/result/result.py: + * morituri/rip/cd.py: + * morituri/test/test_image_toc.py: + Further unicode fixes, for options, CD-Text, paths, ... + 2009-09-11 Thomas Vander Stichele * morituri/program/cdrdao.py: diff --git a/HACKING b/HACKING index 4def318..123a08d 100644 --- a/HACKING +++ b/HACKING @@ -29,3 +29,9 @@ unicode for example, always use %r to log paths - run with RIP_DEBUG=5 once in a while to catch unicode/logging errors. - Also use unicode prefix/suffix in tempfile.* methods; to force unicode. +- filesystems on Unix do not have an encoding. file names are bytes. + However, most distros default to a utf-8 interpretation +- You can either treat paths as byte strings all the way without interpreting + (even when writing them to other files), or assume utf-8 on in and out. +- also direct output to a file; redirection sets codec to ASCII and brings out + unicode bugs diff --git a/morituri/common/program.py b/morituri/common/program.py index ff5b12c..638c506 100644 --- a/morituri/common/program.py +++ b/morituri/common/program.py @@ -213,12 +213,17 @@ class Program(log.Loggable): Also works for the disc name, using disc variables for the template. @param outdir: the directory where to write the files - @type outdir: str + @type outdir: unicode @param template: the template for writing the file - @type template: str + @type template: unicode @param i: track number (0 for HTOA) @type i: int + + @rtype: unicode """ + assert type(outdir) is unicode, "%r is not unicode" % outdir + assert type(template) is unicode, "%r is not unicode" % template + # returns without extension v = {} diff --git a/morituri/image/table.py b/morituri/image/table.py index 655fb75..1fc9dfd 100644 --- a/morituri/image/table.py +++ b/morituri/image/table.py @@ -60,6 +60,8 @@ class Track: @type indexes: dict of number -> L{Index} @ivar isrc: ISRC code (12 alphanumeric characters) @type isrc: str + @ivar cdtext: dictionary of CD Text information; see L{CDTEXT_KEYS}. + @type cdtext: str -> unicode """ number = None diff --git a/morituri/image/toc.py b/morituri/image/toc.py index c86fdcf..0fff6c1 100644 --- a/morituri/image/toc.py +++ b/morituri/image/toc.py @@ -128,16 +128,21 @@ class TocFile(object, log.Loggable): if m: key = m.group('key') value = m.group('value') + # usually, value is encoded with octal escapes and in latin-1 + # FIXME: other encodings are possible, does cdrdao handle + # them ? + value = value.decode('string-escape').decode('latin-1') if key in table.CDTEXT_FIELDS: # FIXME: consider ISRC separate for now, but this # is a limitation of our parser approach if state == 'HEADER': self.table.cdtext[key] = value - self.debug('Found disc CD-Text %s: %s', key, value) + self.debug('Found disc CD-Text %s: %r', key, value) elif state == 'TRACK': if key != 'ISRC' or not currentTrack \ or currentTrack.isrc is not None: - self.debug('Found track CD-Text %s: %s', key, value) + self.debug('Found track CD-Text %s: %r', + key, value) currentTrack.cdtext[key] = value # look for header elements diff --git a/morituri/result/result.py b/morituri/result/result.py index db0dd02..b5d3b7a 100644 --- a/morituri/result/result.py +++ b/morituri/result/result.py @@ -26,6 +26,7 @@ from morituri.result import logger class TrackResult: """ + @type filename: unicode @ivar testcrc: 4-byte CRC for the test read @type testcrc: int @ivar copycrc: 4-byte CRC for the copy read diff --git a/morituri/rip/cd.py b/morituri/rip/cd.py index 238ad5e..71cac1c 100644 --- a/morituri/rip/cd.py +++ b/morituri/rip/cd.py @@ -69,6 +69,9 @@ class Rip(logcommand.LogCommand): default, "', '".join(encode.PROFILES.keys())), default=default) + def handleOptions(self, options): + options.track_template = options.track_template.decode('utf-8') + options.disc_template = options.disc_template.decode('utf-8') def do(self, args): prog = program.Program() @@ -117,8 +120,8 @@ See http://sourceforge.net/tracker/?func=detail&aid=604751&group_id=2171&atid=1 if metadatas: print 'Matching releases:' for metadata in metadatas: - print 'Artist :', metadata.artist - print 'Title :', metadata.title + print 'Artist : %s' % metadata.artist.encode('utf-8') + print 'Title : %s' % metadata.title.encode('utf-8') # Select one of the returned releases. We just pick the first one. prog.metadata = metadatas[0] @@ -140,7 +143,8 @@ See http://sourceforge.net/tracker/?func=detail&aid=604751&group_id=2171&atid=1 "full table's AR URL %s differs from toc AR URL %s" % ( itable.getAccurateRipURL(), ittoc.getAccurateRipURL()) - prog.outdir = self.options.output_directory or os.getcwd() + prog.outdir = (self.options.output_directory or os.getcwd()) + prog.outdir = prog.outdir.decode('utf-8') profile = encode.PROFILES[self.options.profile]() # result @@ -168,6 +172,8 @@ See http://sourceforge.net/tracker/?func=detail&aid=604751&group_id=2171&atid=1 path = prog.getPath(prog.outdir, self.options.track_template, mbdiscid, number) + '.' + profile.extension trackResult.number = number + + assert type(path) is unicode, "%r is not unicode" % path trackResult.filename = path if number > 0: trackResult.pregap = itable.tracks[number - 1].getPregap() @@ -175,14 +181,16 @@ See http://sourceforge.net/tracker/?func=detail&aid=604751&group_id=2171&atid=1 # FIXME: optionally allow overriding reripping if os.path.exists(path): print 'Verifying track %d of %d: %s' % ( - number, len(itable.tracks), os.path.basename(path)) + number, len(itable.tracks), + os.path.basename(path).encode('utf-8')) if not prog.verifyTrack(runner, trackResult): print 'Verification failed, reripping...' os.unlink(path) if not os.path.exists(path): print 'Ripping track %d of %d: %s' % ( - number, len(itable.tracks), os.path.basename(path)) + number, len(itable.tracks), + os.path.basename(path).encode('utf-8')) prog.ripTrack(runner, trackResult, offset=int(self.options.offset), device=self.parentCommand.options.device, @@ -239,18 +247,18 @@ See http://sourceforge.net/tracker/?func=detail&aid=604751&group_id=2171&atid=1 if not os.path.exists(dirname): os.makedirs(dirname) - self.debug('writing cue file for %s', discName) + self.debug('writing cue file for %r', discName) prog.writeCue(discName) # write .m3u file - m3uPath = '%s.m3u' % discName + m3uPath = u'%s.m3u' % discName handle = open(m3uPath, 'w') - handle.write('#EXTM3U\n') + handle.write(u'#EXTM3U\n') if htoapath: - handle.write('#EXTINF:%d,%s\n' % ( + handle.write(u'#EXTINF:%d,%s\n' % ( itable.getTrackStart(1) / common.FRAMES_PER_SECOND, os.path.basename(htoapath[:-4]))) - handle.write('%s\n' % os.path.basename(htoapath)) + handle.write(u'%s\n' % os.path.basename(htoapath)) for i, track in enumerate(itable.tracks): if not track.audio: diff --git a/morituri/test/test_image_toc.py b/morituri/test/test_image_toc.py index 5efadf5..d740089 100644 --- a/morituri/test/test_image_toc.py +++ b/morituri/test/test_image_toc.py @@ -208,3 +208,22 @@ class CapitalMergeTestCase(unittest.TestCase): self.assertEquals(self.table.getMusicBrainzDiscId(), "MAj3xXf6QMy7G.BIFOyHyq4MySE-") +class UnicodeTestCase(unittest.TestCase): + def setUp(self): + self._performer = u'Jos\xe9 Gonz\xe1lez' + self.toc = toc.TocFile(os.path.join(os.path.dirname(__file__), + '%s.toc' % self._performer)) + self.toc.parse() + self.assertEquals(len(self.toc.table.tracks), 10) + + def testGetTrackLength(self): + t = self.toc.table.tracks[0] + # first track has known length because the .toc is a single file + self.assertEquals(self.toc.getTrackLength(t), 12001) + # last track has unknown length + t = self.toc.table.tracks[-1] + self.assertEquals(self.toc.getTrackLength(t), -1) + + def testGetTrackPerformer(self): + t = self.toc.table.tracks[0] + self.assertEquals(t.cdtext['PERFORMER'], self._performer)