* Add encoding using Xiph.org 'flac' program. This adds a FlacEncodeTask that encodes wave files to flac files. This commit also replaces morituri's EncodeTask with FlacEncodeTask, however, in morituri, EncodeTask also does the tagging. FlacEncodeTask will not perform the tagging. So we will need an extra task for the tagging - this will be added soon. Meanwhile, do not merge this commit to master yet. * Add tagging using mutagen. Replace the gstreamer tagging code with mutagen tagging code. getTagList is rewritten to return a dictionary of tags, which are then simply passed to mutagen. The way it is set up right now is not the best - I don't think it makes sense for tagging to take place in program/cdparanoia.py ; but this is how the current code did it. I suggest that, when we rip out all the gstreamer code, we also move the tagging to a more sensible place; and then also make the tagging not be an actual 'task.Task'. * Add gstreamer-less CRC32 version Only works on wave files at this point. Should not be a problem, I think. * Use proper musicbrainz tags and ALBUM tag. * Add mutagen to .travis.yml
457 lines
15 KiB
Python
457 lines
15 KiB
Python
# -*- Mode: Python; test-case-name: morituri.test.test_common_checksum -*-
|
|
# vi:si:et:sw=4:sts=4:ts=4
|
|
|
|
# Morituri - for those about to RIP
|
|
|
|
# Copyright (C) 2009 Thomas Vander Stichele
|
|
|
|
# This file is part of morituri.
|
|
#
|
|
# morituri is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# morituri is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with morituri. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import os
|
|
import struct
|
|
import zlib
|
|
import binascii
|
|
import wave
|
|
|
|
import gst
|
|
|
|
from morituri.common import common, task
|
|
from morituri.common import gstreamer as cgstreamer
|
|
|
|
from morituri.extern.task import gstreamer
|
|
from morituri.extern.task import task as etask
|
|
|
|
from morituri.program.arc import accuraterip_checksum
|
|
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# checksums are not CRC's. a CRC is a specific type of checksum.
|
|
|
|
|
|
class ChecksumTask(gstreamer.GstPipelineTask):
|
|
"""
|
|
I am a task that calculates a checksum of the decoded audio data.
|
|
|
|
@ivar checksum: the resulting checksum
|
|
"""
|
|
|
|
logCategory = 'ChecksumTask'
|
|
|
|
# this object needs a main loop to stop
|
|
description = 'Calculating checksum'
|
|
|
|
def __init__(self, path, sampleStart=0, sampleLength=-1):
|
|
"""
|
|
A sample is considered a set of samples for each channel;
|
|
ie 16 bit stereo is 4 bytes per sample.
|
|
If sampleLength < 0 it is treated as 'unknown' and calculated.
|
|
|
|
@type path: unicode
|
|
@type sampleStart: int
|
|
@param sampleStart: the sample to start at
|
|
"""
|
|
|
|
# sampleLength can be e.g. -588 when it is -1 * SAMPLES_PER_FRAME
|
|
|
|
assert type(path) is unicode, "%r is not unicode" % path
|
|
|
|
self.logName = "ChecksumTask 0x%x" % id(self)
|
|
|
|
# use repr/%r because path can be unicode
|
|
if sampleLength < 0:
|
|
logger.debug(
|
|
'Creating checksum task on %r from sample %d until the end',
|
|
path, sampleStart)
|
|
else:
|
|
logger.debug(
|
|
'Creating checksum task on %r from sample %d for %d samples',
|
|
path, sampleStart, sampleLength)
|
|
|
|
if not os.path.exists(path):
|
|
raise IndexError('%r does not exist' % path)
|
|
|
|
self._path = path
|
|
self._sampleStart = sampleStart
|
|
self._sampleLength = sampleLength
|
|
self._sampleEnd = None
|
|
self._checksum = 0
|
|
self._bytes = 0 # number of bytes received
|
|
self._first = None
|
|
self._last = None
|
|
self._adapter = gst.Adapter()
|
|
|
|
self.checksum = None # result
|
|
|
|
cgstreamer.removeAudioParsers()
|
|
|
|
### gstreamer.GstPipelineTask implementations
|
|
|
|
def getPipelineDesc(self):
|
|
return '''
|
|
filesrc location="%s" !
|
|
decodebin name=decode ! audio/x-raw-int !
|
|
appsink name=sink sync=False emit-signals=True
|
|
''' % gstreamer.quoteParse(self._path).encode('utf-8')
|
|
|
|
def _getSampleLength(self):
|
|
# get length in samples of file
|
|
sink = self.pipeline.get_by_name('sink')
|
|
|
|
logger.debug('query duration')
|
|
try:
|
|
length, qformat = sink.query_duration(gst.FORMAT_DEFAULT)
|
|
except gst.QueryError, e:
|
|
self.setException(e)
|
|
return None
|
|
|
|
# wavparse 0.10.14 returns in bytes
|
|
if qformat == gst.FORMAT_BYTES:
|
|
logger.debug('query returned in BYTES format')
|
|
length /= 4
|
|
logger.debug('total sample length of file: %r', length)
|
|
|
|
return length
|
|
|
|
|
|
def paused(self):
|
|
sink = self.pipeline.get_by_name('sink')
|
|
|
|
length = self._getSampleLength()
|
|
if length is None:
|
|
return
|
|
|
|
if self._sampleLength < 0:
|
|
self._sampleLength = length - self._sampleStart
|
|
logger.debug('sampleLength is queried as %d samples',
|
|
self._sampleLength)
|
|
else:
|
|
logger.debug('sampleLength is known, and is %d samples' %
|
|
self._sampleLength)
|
|
|
|
self._sampleEnd = self._sampleStart + self._sampleLength - 1
|
|
logger.debug('sampleEnd is sample %d' % self._sampleEnd)
|
|
|
|
logger.debug('event')
|
|
|
|
|
|
if self._sampleStart == 0 and self._sampleEnd + 1 == length:
|
|
logger.debug('No need to seek, crcing full file')
|
|
else:
|
|
# the segment end only is respected since -good 0.10.14.1
|
|
event = gst.event_new_seek(1.0, gst.FORMAT_DEFAULT,
|
|
gst.SEEK_FLAG_FLUSH,
|
|
gst.SEEK_TYPE_SET, self._sampleStart,
|
|
gst.SEEK_TYPE_SET, self._sampleEnd + 1) # half-inclusive
|
|
logger.debug('CRCing %r from frame %d to frame %d (excluded)' % (
|
|
self._path,
|
|
self._sampleStart / common.SAMPLES_PER_FRAME,
|
|
(self._sampleEnd + 1) / common.SAMPLES_PER_FRAME))
|
|
# FIXME: sending it with sampleEnd set screws up the seek, we
|
|
# don't get # everything for flac; fixed in recent -good
|
|
result = sink.send_event(event)
|
|
logger.debug('event sent, result %r', result)
|
|
if not result:
|
|
msg = 'Failed to select samples with GStreamer seek event'
|
|
logger.critical(msg)
|
|
raise Exception(msg)
|
|
sink.connect('new-buffer', self._new_buffer_cb)
|
|
sink.connect('eos', self._eos_cb)
|
|
|
|
logger.debug('scheduling setting to play')
|
|
# since set_state returns non-False, adding it as timeout_add
|
|
# will repeatedly call it, and block the main loop; so
|
|
# gobject.timeout_add(0L, self.pipeline.set_state, gst.STATE_PLAYING)
|
|
# would not work.
|
|
|
|
def play():
|
|
self.pipeline.set_state(gst.STATE_PLAYING)
|
|
return False
|
|
self.schedule(0, play)
|
|
|
|
#self.pipeline.set_state(gst.STATE_PLAYING)
|
|
logger.debug('scheduled setting to play')
|
|
|
|
def stopped(self):
|
|
logger.debug('stopped')
|
|
if not self._last:
|
|
# see http://bugzilla.gnome.org/show_bug.cgi?id=578612
|
|
logger.debug(
|
|
'not a single buffer gotten, setting exception EmptyError')
|
|
self.setException(common.EmptyError('not a single buffer gotten'))
|
|
return
|
|
else:
|
|
self._checksum = self._checksum % 2 ** 32
|
|
logger.debug("last buffer's sample offset %r", self._last.offset)
|
|
logger.debug("last buffer's sample size %r", len(self._last) / 4)
|
|
last = self._last.offset + len(self._last) / 4 - 1
|
|
logger.debug("last sample offset in buffer: %r", last)
|
|
logger.debug("requested sample end: %r", self._sampleEnd)
|
|
logger.debug("requested sample length: %r", self._sampleLength)
|
|
logger.debug("checksum: %08X", self._checksum)
|
|
logger.debug("bytes: %d", self._bytes)
|
|
if self._sampleEnd != last:
|
|
msg = 'did not get all samples, %d of %d missing' % (
|
|
self._sampleEnd - last, self._sampleEnd)
|
|
logger.warning(msg)
|
|
self.setExceptionAndTraceback(common.MissingFrames(msg))
|
|
return
|
|
|
|
self.checksum = self._checksum
|
|
|
|
### subclass methods
|
|
|
|
def do_checksum_buffer(self, buf, checksum):
|
|
"""
|
|
Subclasses should implement this.
|
|
|
|
@param buf: a byte buffer containing two 16-bit samples per
|
|
channel.
|
|
@type buf: C{str}
|
|
@param checksum: the checksum so far, as returned by the
|
|
previous call.
|
|
@type checksum: C{int}
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
### private methods
|
|
|
|
def _new_buffer_cb(self, sink):
|
|
buf = sink.emit('pull-buffer')
|
|
gst.log('received new buffer at offset %r with length %r' % (
|
|
buf.offset, buf.size))
|
|
if self._first is None:
|
|
self._first = buf.offset
|
|
logger.debug('first sample is sample offset %r', self._first)
|
|
self._last = buf
|
|
|
|
assert len(buf) % 4 == 0, "buffer is not a multiple of 4 bytes"
|
|
|
|
# FIXME: gst-python 0.10.14.1 doesn't have adapter_peek/_take wrapped
|
|
# see http://bugzilla.gnome.org/show_bug.cgi?id=576505
|
|
self._adapter.push(buf)
|
|
|
|
while self._adapter.available() >= common.BYTES_PER_FRAME:
|
|
# FIXME: in 0.10.14.1, take_buffer leaks a ref
|
|
buf = self._adapter.take_buffer(common.BYTES_PER_FRAME)
|
|
|
|
self._checksum = self.do_checksum_buffer(buf, self._checksum)
|
|
self._bytes += len(buf)
|
|
|
|
# update progress
|
|
sample = self._first + self._bytes / 4
|
|
samplesDone = sample - self._sampleStart
|
|
progress = float(samplesDone) / float((self._sampleLength))
|
|
# marshal to the main thread
|
|
self.schedule(0, self.setProgress, progress)
|
|
|
|
def _eos_cb(self, sink):
|
|
# get the last one; FIXME: why does this not get to us before ?
|
|
#self._new_buffer_cb(sink)
|
|
logger.debug('eos, scheduling stop')
|
|
self.schedule(0, self.stop)
|
|
|
|
class CRC32TaskOld(ChecksumTask):
|
|
"""
|
|
I do a simple CRC32 check.
|
|
"""
|
|
|
|
description = 'Calculating CRC'
|
|
|
|
def do_checksum_buffer(self, buf, checksum):
|
|
return zlib.crc32(buf, checksum)
|
|
|
|
class CRC32Task(etask.Task):
|
|
# TODO: Support sampleStart, sampleLength later on (should be trivial, just
|
|
# add change the read part in _crc32 to skip some samples and/or not
|
|
# read too far)
|
|
def __init__(self, path, sampleStart=0, sampleLength=-1):
|
|
self.path = path
|
|
|
|
def start(self, runner):
|
|
etask.Task.start(self, runner)
|
|
self.schedule(0.0, self._crc32)
|
|
|
|
def _crc32(self):
|
|
w = wave.open(self.path)
|
|
d = w._data_chunk.read()
|
|
|
|
self.checksum = binascii.crc32(d) & 0xffffffff
|
|
self.stop()
|
|
|
|
|
|
class FastAccurateRipChecksumTask(etask.Task):
|
|
description = 'Calculating (Fast) AccurateRip checksum'
|
|
|
|
def __init__(self, path, trackNumber, trackCount, wave, v2=False):
|
|
self.path = path
|
|
self.trackNumber = trackNumber
|
|
self.trackCount = trackCount
|
|
self._wave = wave
|
|
self._v2 = v2
|
|
self.checksum = None
|
|
|
|
def start(self, runner):
|
|
etask.Task.start(self, runner)
|
|
self.schedule(0.0, self._arc)
|
|
|
|
def _arc(self):
|
|
arc = accuraterip_checksum(self.path, self.trackNumber, self.trackCount,
|
|
self._wave, self._v2)
|
|
self.checksum = arc
|
|
|
|
self.stop()
|
|
|
|
|
|
class AccurateRipChecksumTask(ChecksumTask):
|
|
"""
|
|
I implement the AccurateRip checksum.
|
|
|
|
See http://www.accuraterip.com/
|
|
"""
|
|
|
|
description = 'Calculating AccurateRip checksum'
|
|
|
|
def __init__(self, path, trackNumber, trackCount, sampleStart=0,
|
|
sampleLength=-1):
|
|
ChecksumTask.__init__(self, path, sampleStart, sampleLength)
|
|
self._trackNumber = trackNumber
|
|
self._trackCount = trackCount
|
|
self._discFrameCounter = 0 # 1-based
|
|
|
|
def __repr__(self):
|
|
return "<AccurateRipCheckSumTask of track %d in %r>" % (
|
|
self._trackNumber, self._path)
|
|
|
|
def do_checksum_buffer(self, buf, checksum):
|
|
self._discFrameCounter += 1
|
|
|
|
# on first track ...
|
|
if self._trackNumber == 1:
|
|
# ... skip first 4 CD frames
|
|
if self._discFrameCounter <= 4:
|
|
gst.debug('skipping frame %d' % self._discFrameCounter)
|
|
return checksum
|
|
# ... on 5th frame, only use last value
|
|
elif self._discFrameCounter == 5:
|
|
values = struct.unpack("<I", buf[-4:])
|
|
checksum += common.SAMPLES_PER_FRAME * 5 * values[0]
|
|
checksum &= 0xFFFFFFFF
|
|
return checksum
|
|
|
|
# on last track, skip last 5 CD frames
|
|
if self._trackNumber == self._trackCount:
|
|
discFrameLength = self._sampleLength / common.SAMPLES_PER_FRAME
|
|
if self._discFrameCounter > discFrameLength - 5:
|
|
logger.debug('skipping frame %d', self._discFrameCounter)
|
|
return checksum
|
|
|
|
# self._bytes is updated after do_checksum_buffer
|
|
factor = self._bytes / 4 + 1
|
|
values = struct.unpack("<%dI" % (len(buf) / 4), buf)
|
|
for value in values:
|
|
checksum += factor * value
|
|
factor += 1
|
|
# offset = self._bytes / 4 + i + 1
|
|
# if offset % common.SAMPLES_PER_FRAME == 0:
|
|
# print 'frame %d, ends before %d, last value %08x, CRC %08x' % (
|
|
# offset / common.SAMPLES_PER_FRAME, offset, value, sum)
|
|
|
|
checksum &= 0xFFFFFFFF
|
|
return checksum
|
|
|
|
|
|
class TRMTask(task.GstPipelineTask):
|
|
"""
|
|
I calculate a MusicBrainz TRM fingerprint.
|
|
|
|
@ivar trm: the resulting trm
|
|
"""
|
|
|
|
trm = None
|
|
description = 'Calculating fingerprint'
|
|
|
|
def __init__(self, path):
|
|
if not os.path.exists(path):
|
|
raise IndexError('%s does not exist' % path)
|
|
|
|
self.path = path
|
|
self._trm = None
|
|
self._bus = None
|
|
|
|
def getPipelineDesc(self):
|
|
return '''
|
|
filesrc location="%s" !
|
|
decodebin ! audioconvert ! audio/x-raw-int !
|
|
trm name=trm !
|
|
appsink name=sink sync=False emit-signals=True''' % self.path
|
|
|
|
def parsed(self):
|
|
sink = self.pipeline.get_by_name('sink')
|
|
sink.connect('new-buffer', self._new_buffer_cb)
|
|
|
|
def paused(self):
|
|
gst.debug('query duration')
|
|
|
|
self._length, qformat = self.pipeline.query_duration(gst.FORMAT_TIME)
|
|
gst.debug('total length: %r' % self._length)
|
|
gst.debug('scheduling setting to play')
|
|
# since set_state returns non-False, adding it as timeout_add
|
|
# will repeatedly call it, and block the main loop; so
|
|
# gobject.timeout_add(0L, self.pipeline.set_state, gst.STATE_PLAYING)
|
|
# would not work.
|
|
|
|
|
|
# FIXME: can't move this to base class because it triggers too soon
|
|
# in the case of checksum
|
|
|
|
def bus_eos_cb(self, bus, message):
|
|
gst.debug('eos, scheduling stop')
|
|
self.schedule(0, self.stop)
|
|
|
|
def bus_tag_cb(self, bus, message):
|
|
taglist = message.parse_tag()
|
|
if 'musicbrainz-trmid' in taglist.keys():
|
|
self._trm = taglist['musicbrainz-trmid']
|
|
|
|
def _new_buffer_cb(self, sink):
|
|
# this is just for counting progress
|
|
buf = sink.emit('pull-buffer')
|
|
position = buf.timestamp
|
|
if buf.duration != gst.CLOCK_TIME_NONE:
|
|
position += buf.duration
|
|
self.setProgress(float(position) / self._length)
|
|
|
|
def stopped(self):
|
|
self.trm = self._trm
|
|
|
|
class MaxSampleTask(ChecksumTask):
|
|
"""
|
|
I check for the biggest sample value.
|
|
"""
|
|
|
|
description = 'Finding highest sample value'
|
|
|
|
def do_checksum_buffer(self, buf, checksum):
|
|
values = struct.unpack("<%dh" % (len(buf) / 2), buf)
|
|
absvalues = [abs(v) for v in values]
|
|
m = max(absvalues)
|
|
if checksum < m:
|
|
checksum = m
|
|
|
|
return checksum
|
|
|