More unicode notes. * morituri/common/program.py: * morituri/image/table.py: * morituri/image/toc.py: * morituri/result/result.py: * morituri/rip/cd.py: * morituri/test/test_image_toc.py: Further unicode fixes, for options, CD-Text, paths, ...
374 lines
14 KiB
Python
374 lines
14 KiB
Python
# -*- Mode: Python; test-case-name: morituri.test.test_image_toc -*-
|
|
# vi:si:et:sw=4:sts=4:ts=4
|
|
|
|
# Morituri - for those about to RIP
|
|
|
|
# Copyright (C) 2009 Thomas Vander Stichele
|
|
|
|
# This file is part of morituri.
|
|
#
|
|
# morituri is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# morituri is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with morituri. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
"""
|
|
Reading .toc files
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import codecs
|
|
|
|
from morituri.common import common, log
|
|
from morituri.image import table
|
|
|
|
# shared
|
|
_CDTEXT_CANDIDATE_RE = re.compile(r'(?P<key>\w+) "(?P<value>.+)"')
|
|
|
|
# header
|
|
_CATALOG_RE = re.compile(r'^CATALOG "(?P<catalog>\d+)"$')
|
|
|
|
# records
|
|
_TRACK_RE = re.compile(r"""
|
|
^TRACK # TRACK
|
|
\s(?P<mode>.+)$ # mode (AUDIO, MODE2_FORM_MIX, MODEx/2xxx, ...)
|
|
""", re.VERBOSE)
|
|
|
|
_ISRC_RE = re.compile(r'^ISRC "(?P<isrc>\w+)"$')
|
|
|
|
# a HTOA is marked in the cdrdao's TOC as SILENCE
|
|
_SILENCE_RE = re.compile(r"""
|
|
^SILENCE # SILENCE
|
|
\s(?P<length>.*)$ # pre-gap length
|
|
""", re.VERBOSE)
|
|
|
|
# ZERO is used as pre-gap source when switching mode
|
|
_ZERO_RE = re.compile(r"""
|
|
^ZERO # ZERO
|
|
\s(?P<mode>.+) # mode (AUDIO, MODEx/2xxx, ...)
|
|
\s(?P<length>.*)$ # zero length
|
|
""", re.VERBOSE)
|
|
|
|
|
|
_FILE_RE = re.compile(r"""
|
|
^FILE # FILE
|
|
\s+"(?P<name>.*)" # 'file name' in quotes
|
|
\s+(?P<start>.+) # start offset
|
|
\s(?P<length>.+)$ # stop offset
|
|
""", re.VERBOSE)
|
|
|
|
_DATAFILE_RE = re.compile(r"""
|
|
^DATAFILE # DATA FILE
|
|
\s+"(?P<name>.*)" # 'file name' in quotes
|
|
\s+(?P<length>\S+) # start offset
|
|
\s*.* # possible // comment
|
|
""", re.VERBOSE)
|
|
|
|
|
|
# FIXME: start can be 0
|
|
_START_RE = re.compile(r"""
|
|
^START # START
|
|
\s(?P<length>.*)$ # pre-gap length
|
|
""", re.VERBOSE)
|
|
|
|
|
|
_INDEX_RE = re.compile(r"""
|
|
^INDEX # INDEX
|
|
\s(?P<offset>.+)$ # start offset
|
|
""", re.VERBOSE)
|
|
|
|
class TocFile(object, log.Loggable):
|
|
def __init__(self, path):
|
|
"""
|
|
@type path: unicode
|
|
"""
|
|
assert type(path) is unicode, "%r is not unicode" % path
|
|
self._path = path
|
|
self._messages = []
|
|
self.table = table.Table()
|
|
self.logName = '<TocFile %08x>' % id(self)
|
|
|
|
def parse(self):
|
|
# these two objects start as None then get set as real objects,
|
|
# so no need to complain about them here
|
|
__pychecker__ = 'no-objattrs'
|
|
currentFile = None
|
|
currentTrack = None
|
|
|
|
state = 'HEADER'
|
|
counter = 0
|
|
trackNumber = 0
|
|
indexNumber = 0
|
|
absoluteOffset = 0 # running absolute offset of where each track starts
|
|
relativeOffset = 0 # running relative offset, relative to counter source
|
|
currentLength = 0 # accrued during TRACK record parsing, current track
|
|
totalLength = 0 # accrued during TRACK record parsing, total disc
|
|
pregapLength = 0 # length of the pre-gap, current track
|
|
|
|
|
|
# the first track's INDEX 1 can only be gotten from the .toc
|
|
# file once the first pregap is calculated; so we add INDEX 1
|
|
# at the end of each parsed TRACK record
|
|
handle = codecs.open(self._path, "r", "utf-8")
|
|
|
|
for number, line in enumerate(handle.readlines()):
|
|
line = line.rstrip()
|
|
|
|
# look for CDTEXT stuff in either header or tracks
|
|
m = _CDTEXT_CANDIDATE_RE.search(line)
|
|
if m:
|
|
key = m.group('key')
|
|
value = m.group('value')
|
|
# usually, value is encoded with octal escapes and in latin-1
|
|
# FIXME: other encodings are possible, does cdrdao handle
|
|
# them ?
|
|
value = value.decode('string-escape').decode('latin-1')
|
|
if key in table.CDTEXT_FIELDS:
|
|
# FIXME: consider ISRC separate for now, but this
|
|
# is a limitation of our parser approach
|
|
if state == 'HEADER':
|
|
self.table.cdtext[key] = value
|
|
self.debug('Found disc CD-Text %s: %r', key, value)
|
|
elif state == 'TRACK':
|
|
if key != 'ISRC' or not currentTrack \
|
|
or currentTrack.isrc is not None:
|
|
self.debug('Found track CD-Text %s: %r',
|
|
key, value)
|
|
currentTrack.cdtext[key] = value
|
|
|
|
# look for header elements
|
|
m = _CATALOG_RE.search(line)
|
|
if m:
|
|
self.table.catalog = m.group('catalog')
|
|
self.debug("Found catalog number %s", self.table.catalog)
|
|
|
|
# look for TRACK lines
|
|
m = _TRACK_RE.search(line)
|
|
if m:
|
|
state = 'TRACK'
|
|
|
|
# set index 1 of previous track if there was one, using
|
|
# pregapLength if applicable
|
|
if currentTrack:
|
|
# FIXME: why not set absolute offsets too ?
|
|
currentTrack.index(1, path=currentFile.path,
|
|
absolute=absoluteOffset + pregapLength,
|
|
relative=relativeOffset + pregapLength, counter=counter)
|
|
self.debug('track %d, added index %r',
|
|
currentTrack.number, currentTrack.getIndex(1))
|
|
|
|
trackNumber += 1
|
|
absoluteOffset += currentLength
|
|
relativeOffset += currentLength
|
|
totalLength += currentLength
|
|
currentLength = 0
|
|
indexNumber = 1
|
|
trackMode = m.group('mode')
|
|
pregapLength = 0
|
|
|
|
# FIXME: track mode
|
|
self.debug('found track %d, mode %s', trackNumber, trackMode)
|
|
audio = trackMode == 'AUDIO'
|
|
currentTrack = table.Track(trackNumber, audio=audio)
|
|
self.table.tracks.append(currentTrack)
|
|
continue
|
|
|
|
# look for ISRC lines
|
|
m = _ISRC_RE.search(line)
|
|
if m:
|
|
isrc = m.group('isrc')
|
|
currentTrack.isrc = isrc
|
|
self.debug('Found ISRC code %s', isrc)
|
|
|
|
# look for SILENCE lines
|
|
m = _SILENCE_RE.search(line)
|
|
if m:
|
|
length = m.group('length')
|
|
self.debug('SILENCE of %r', length)
|
|
if currentFile is not None:
|
|
self.debug('SILENCE after FILE, increasing counter')
|
|
counter += 1
|
|
currentFile = None
|
|
currentLength += common.msfToFrames(length)
|
|
|
|
# look for ZERO lines
|
|
m = _ZERO_RE.search(line)
|
|
if m:
|
|
if currentFile is not None:
|
|
self.debug('ZERO after FILE, increasing counter')
|
|
counter += 1
|
|
currentFile = None
|
|
length = m.group('length')
|
|
currentLength += common.msfToFrames(length)
|
|
|
|
# look for FILE lines
|
|
m = _FILE_RE.search(line)
|
|
if m:
|
|
filePath = m.group('name')
|
|
start = m.group('start')
|
|
length = m.group('length')
|
|
self.debug('FILE %s, start %r, length %r',
|
|
filePath, common.msfToFrames(start),
|
|
common.msfToFrames(length))
|
|
if not currentFile or filePath != currentFile.path:
|
|
counter += 1
|
|
relativeOffset = 0
|
|
self.debug('track %d, switched to new FILE, increased counter to %d',
|
|
trackNumber, counter)
|
|
currentFile = File(filePath, start, length)
|
|
#absoluteOffset += common.msfToFrames(start)
|
|
currentLength += common.msfToFrames(length)
|
|
|
|
# look for DATAFILE lines
|
|
m = _DATAFILE_RE.search(line)
|
|
if m:
|
|
filePath = m.group('name')
|
|
length = m.group('length')
|
|
# print 'THOMAS', length
|
|
self.debug('FILE %s, length %r',
|
|
filePath, common.msfToFrames(length))
|
|
if not currentFile or filePath != currentFile.path:
|
|
counter += 1
|
|
relativeOffset = 0
|
|
self.debug('track %d, switched to new FILE, increased counter to %d',
|
|
trackNumber, counter)
|
|
# FIXME: assume that a MODE2_FORM_MIX track always starts at 0
|
|
currentFile = File(filePath, 0, length)
|
|
#absoluteOffset += common.msfToFrames(start)
|
|
currentLength += common.msfToFrames(length)
|
|
|
|
|
|
# look for START lines
|
|
m = _START_RE.search(line)
|
|
if m:
|
|
if not currentTrack:
|
|
self.message(number, 'START without preceding TRACK')
|
|
print 'ouch'
|
|
continue
|
|
|
|
length = common.msfToFrames(m.group('length'))
|
|
currentTrack.index(0, path=currentFile.path,
|
|
absolute=absoluteOffset,
|
|
relative=relativeOffset, counter=counter)
|
|
self.debug('track %d, added index %r',
|
|
currentTrack.number, currentTrack.getIndex(0))
|
|
# store the pregapLength to add it when we index 1 for this
|
|
# track on the next iteration
|
|
pregapLength = length
|
|
|
|
# look for INDEX lines
|
|
m = _INDEX_RE.search(line)
|
|
if m:
|
|
if not currentTrack:
|
|
self.message(number, 'INDEX without preceding TRACK')
|
|
print 'ouch'
|
|
continue
|
|
|
|
indexNumber += 1
|
|
offset = common.msfToFrames(m.group('offset'))
|
|
currentTrack.index(indexNumber, path=currentFile.path,
|
|
relative=offset, counter=counter)
|
|
self.debug('track %d, added index %r',
|
|
currentTrack.number, currentTrack.getIndex(indexNumber))
|
|
|
|
# handle index 1 of final track, if any
|
|
if currentTrack:
|
|
currentTrack.index(1, path=currentFile.path,
|
|
absolute=absoluteOffset + pregapLength,
|
|
relative=relativeOffset + pregapLength, counter=counter)
|
|
self.debug('track %d, added index %r',
|
|
currentTrack.number, currentTrack.getIndex(1))
|
|
|
|
# totalLength was added up to the penultimate track
|
|
self.table.leadout = totalLength + currentLength
|
|
self.debug('leadout: %r', self.table.leadout)
|
|
|
|
def message(self, number, message):
|
|
"""
|
|
Add a message about a given line in the cue file.
|
|
|
|
@param number: line number, counting from 0.
|
|
"""
|
|
self._messages.append((number + 1, message))
|
|
|
|
def getTrackLength(self, track):
|
|
# returns track length in frames, or -1 if can't be determined and
|
|
# complete file should be assumed
|
|
# FIXME: this assumes a track can only be in one file; is this true ?
|
|
i = self.table.tracks.index(track)
|
|
if i == len(self.table.tracks) - 1:
|
|
# last track, so no length known
|
|
return -1
|
|
|
|
thisIndex = track.indexes[1] # FIXME: could be more
|
|
nextIndex = self.table.tracks[i + 1].indexes[1] # FIXME: could be 0
|
|
|
|
c = thisIndex.counter
|
|
if c is not None and c == nextIndex.counter:
|
|
# they belong to the same source, so their relative delta is length
|
|
return nextIndex.relative - thisIndex.relative
|
|
|
|
# FIXME: more logic
|
|
return -1
|
|
|
|
def getRealPath(self, path):
|
|
"""
|
|
Translate the .cue's FILE to an existing path.
|
|
|
|
@type path: unicode
|
|
"""
|
|
assert type(path) is unicode, "%r is not unicode" % path
|
|
|
|
if os.path.exists(path):
|
|
return path
|
|
|
|
# .cue FILE statements have Windows-style path separators, so convert
|
|
tpath = os.path.join(*path.split('\\'))
|
|
candidatePaths = []
|
|
|
|
# if the path is relative:
|
|
# - check relatively to the cue file
|
|
# - check only the filename part relative to the cue file
|
|
if tpath == os.path.abspath(tpath):
|
|
candidatePaths.append(tpath)
|
|
else:
|
|
candidatePaths.append(os.path.join(
|
|
os.path.dirname(self._path), tpath))
|
|
candidatePaths.append(os.path.join(
|
|
os.path.dirname(self._path), os.path.basename(tpath)))
|
|
|
|
for candidate in candidatePaths:
|
|
noext, _ = os.path.splitext(candidate)
|
|
for ext in ['wav', 'flac']:
|
|
cpath = '%s.%s' % (noext, ext)
|
|
if os.path.exists(cpath):
|
|
return cpath
|
|
|
|
raise KeyError, "Cannot find file for %r" % path
|
|
|
|
class File:
|
|
"""
|
|
I represent a FILE line in a .toc file.
|
|
"""
|
|
def __init__(self, path, start, length):
|
|
"""
|
|
@type path: unicode
|
|
"""
|
|
assert type(path) is unicode, "%r is not unicode" % path
|
|
|
|
self.path = path
|
|
#self.start = start
|
|
#self.length = length
|
|
|
|
def __repr__(self):
|
|
return '<File %r>' % (self.path, )
|