Files
whipper-gui/morituri/common/checksum.py
Merlijn Wajer 6ddb5d0114 Add gstreamer-less flac encoder and tagging (#121)
* Add encoding using Xiph.org 'flac' program.

This adds a FlacEncodeTask that encodes wave files to flac files.
This commit also replaces morituri's EncodeTask with FlacEncodeTask, however, in
morituri, EncodeTask also does the tagging.

FlacEncodeTask will not perform the tagging.
So we will need an extra task for the tagging - this will be added soon.

Meanwhile, do not merge this commit to master yet.

* Add tagging using mutagen.

Replace the gstreamer tagging code with mutagen tagging code.
getTagList is rewritten to return a dictionary of tags, which are then simply
passed to mutagen.

The way it is set up right now is not the best - I don't think it makes sense
for tagging to take place in program/cdparanoia.py ; but this is how the current
code did it.

I suggest that, when we rip out all the gstreamer code, we also move the tagging
to a more sensible place; and then also make the tagging not be an actual
'task.Task'.

* Add gstreamer-less CRC32 version

Only works on wave files at this point. Should not be a problem, I think.

* Use proper musicbrainz tags and ALBUM tag.

* Add mutagen to .travis.yml
2017-02-02 21:50:47 +01:00

457 lines
15 KiB
Python

# -*- Mode: Python; test-case-name: morituri.test.test_common_checksum -*-
# vi:si:et:sw=4:sts=4:ts=4
# Morituri - for those about to RIP
# Copyright (C) 2009 Thomas Vander Stichele
# This file is part of morituri.
#
# morituri is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# morituri is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with morituri. If not, see <http://www.gnu.org/licenses/>.
import os
import struct
import zlib
import binascii
import wave
import gst
from morituri.common import common, task
from morituri.common import gstreamer as cgstreamer
from morituri.extern.task import gstreamer
from morituri.extern.task import task as etask
from morituri.program.arc import accuraterip_checksum
import logging
logger = logging.getLogger(__name__)
# checksums are not CRC's. a CRC is a specific type of checksum.
class ChecksumTask(gstreamer.GstPipelineTask):
"""
I am a task that calculates a checksum of the decoded audio data.
@ivar checksum: the resulting checksum
"""
logCategory = 'ChecksumTask'
# this object needs a main loop to stop
description = 'Calculating checksum'
def __init__(self, path, sampleStart=0, sampleLength=-1):
"""
A sample is considered a set of samples for each channel;
ie 16 bit stereo is 4 bytes per sample.
If sampleLength < 0 it is treated as 'unknown' and calculated.
@type path: unicode
@type sampleStart: int
@param sampleStart: the sample to start at
"""
# sampleLength can be e.g. -588 when it is -1 * SAMPLES_PER_FRAME
assert type(path) is unicode, "%r is not unicode" % path
self.logName = "ChecksumTask 0x%x" % id(self)
# use repr/%r because path can be unicode
if sampleLength < 0:
logger.debug(
'Creating checksum task on %r from sample %d until the end',
path, sampleStart)
else:
logger.debug(
'Creating checksum task on %r from sample %d for %d samples',
path, sampleStart, sampleLength)
if not os.path.exists(path):
raise IndexError('%r does not exist' % path)
self._path = path
self._sampleStart = sampleStart
self._sampleLength = sampleLength
self._sampleEnd = None
self._checksum = 0
self._bytes = 0 # number of bytes received
self._first = None
self._last = None
self._adapter = gst.Adapter()
self.checksum = None # result
cgstreamer.removeAudioParsers()
### gstreamer.GstPipelineTask implementations
def getPipelineDesc(self):
return '''
filesrc location="%s" !
decodebin name=decode ! audio/x-raw-int !
appsink name=sink sync=False emit-signals=True
''' % gstreamer.quoteParse(self._path).encode('utf-8')
def _getSampleLength(self):
# get length in samples of file
sink = self.pipeline.get_by_name('sink')
logger.debug('query duration')
try:
length, qformat = sink.query_duration(gst.FORMAT_DEFAULT)
except gst.QueryError, e:
self.setException(e)
return None
# wavparse 0.10.14 returns in bytes
if qformat == gst.FORMAT_BYTES:
logger.debug('query returned in BYTES format')
length /= 4
logger.debug('total sample length of file: %r', length)
return length
def paused(self):
sink = self.pipeline.get_by_name('sink')
length = self._getSampleLength()
if length is None:
return
if self._sampleLength < 0:
self._sampleLength = length - self._sampleStart
logger.debug('sampleLength is queried as %d samples',
self._sampleLength)
else:
logger.debug('sampleLength is known, and is %d samples' %
self._sampleLength)
self._sampleEnd = self._sampleStart + self._sampleLength - 1
logger.debug('sampleEnd is sample %d' % self._sampleEnd)
logger.debug('event')
if self._sampleStart == 0 and self._sampleEnd + 1 == length:
logger.debug('No need to seek, crcing full file')
else:
# the segment end only is respected since -good 0.10.14.1
event = gst.event_new_seek(1.0, gst.FORMAT_DEFAULT,
gst.SEEK_FLAG_FLUSH,
gst.SEEK_TYPE_SET, self._sampleStart,
gst.SEEK_TYPE_SET, self._sampleEnd + 1) # half-inclusive
logger.debug('CRCing %r from frame %d to frame %d (excluded)' % (
self._path,
self._sampleStart / common.SAMPLES_PER_FRAME,
(self._sampleEnd + 1) / common.SAMPLES_PER_FRAME))
# FIXME: sending it with sampleEnd set screws up the seek, we
# don't get # everything for flac; fixed in recent -good
result = sink.send_event(event)
logger.debug('event sent, result %r', result)
if not result:
msg = 'Failed to select samples with GStreamer seek event'
logger.critical(msg)
raise Exception(msg)
sink.connect('new-buffer', self._new_buffer_cb)
sink.connect('eos', self._eos_cb)
logger.debug('scheduling setting to play')
# since set_state returns non-False, adding it as timeout_add
# will repeatedly call it, and block the main loop; so
# gobject.timeout_add(0L, self.pipeline.set_state, gst.STATE_PLAYING)
# would not work.
def play():
self.pipeline.set_state(gst.STATE_PLAYING)
return False
self.schedule(0, play)
#self.pipeline.set_state(gst.STATE_PLAYING)
logger.debug('scheduled setting to play')
def stopped(self):
logger.debug('stopped')
if not self._last:
# see http://bugzilla.gnome.org/show_bug.cgi?id=578612
logger.debug(
'not a single buffer gotten, setting exception EmptyError')
self.setException(common.EmptyError('not a single buffer gotten'))
return
else:
self._checksum = self._checksum % 2 ** 32
logger.debug("last buffer's sample offset %r", self._last.offset)
logger.debug("last buffer's sample size %r", len(self._last) / 4)
last = self._last.offset + len(self._last) / 4 - 1
logger.debug("last sample offset in buffer: %r", last)
logger.debug("requested sample end: %r", self._sampleEnd)
logger.debug("requested sample length: %r", self._sampleLength)
logger.debug("checksum: %08X", self._checksum)
logger.debug("bytes: %d", self._bytes)
if self._sampleEnd != last:
msg = 'did not get all samples, %d of %d missing' % (
self._sampleEnd - last, self._sampleEnd)
logger.warning(msg)
self.setExceptionAndTraceback(common.MissingFrames(msg))
return
self.checksum = self._checksum
### subclass methods
def do_checksum_buffer(self, buf, checksum):
"""
Subclasses should implement this.
@param buf: a byte buffer containing two 16-bit samples per
channel.
@type buf: C{str}
@param checksum: the checksum so far, as returned by the
previous call.
@type checksum: C{int}
"""
raise NotImplementedError
### private methods
def _new_buffer_cb(self, sink):
buf = sink.emit('pull-buffer')
gst.log('received new buffer at offset %r with length %r' % (
buf.offset, buf.size))
if self._first is None:
self._first = buf.offset
logger.debug('first sample is sample offset %r', self._first)
self._last = buf
assert len(buf) % 4 == 0, "buffer is not a multiple of 4 bytes"
# FIXME: gst-python 0.10.14.1 doesn't have adapter_peek/_take wrapped
# see http://bugzilla.gnome.org/show_bug.cgi?id=576505
self._adapter.push(buf)
while self._adapter.available() >= common.BYTES_PER_FRAME:
# FIXME: in 0.10.14.1, take_buffer leaks a ref
buf = self._adapter.take_buffer(common.BYTES_PER_FRAME)
self._checksum = self.do_checksum_buffer(buf, self._checksum)
self._bytes += len(buf)
# update progress
sample = self._first + self._bytes / 4
samplesDone = sample - self._sampleStart
progress = float(samplesDone) / float((self._sampleLength))
# marshal to the main thread
self.schedule(0, self.setProgress, progress)
def _eos_cb(self, sink):
# get the last one; FIXME: why does this not get to us before ?
#self._new_buffer_cb(sink)
logger.debug('eos, scheduling stop')
self.schedule(0, self.stop)
class CRC32TaskOld(ChecksumTask):
"""
I do a simple CRC32 check.
"""
description = 'Calculating CRC'
def do_checksum_buffer(self, buf, checksum):
return zlib.crc32(buf, checksum)
class CRC32Task(etask.Task):
# TODO: Support sampleStart, sampleLength later on (should be trivial, just
# add change the read part in _crc32 to skip some samples and/or not
# read too far)
def __init__(self, path, sampleStart=0, sampleLength=-1):
self.path = path
def start(self, runner):
etask.Task.start(self, runner)
self.schedule(0.0, self._crc32)
def _crc32(self):
w = wave.open(self.path)
d = w._data_chunk.read()
self.checksum = binascii.crc32(d) & 0xffffffff
self.stop()
class FastAccurateRipChecksumTask(etask.Task):
description = 'Calculating (Fast) AccurateRip checksum'
def __init__(self, path, trackNumber, trackCount, wave, v2=False):
self.path = path
self.trackNumber = trackNumber
self.trackCount = trackCount
self._wave = wave
self._v2 = v2
self.checksum = None
def start(self, runner):
etask.Task.start(self, runner)
self.schedule(0.0, self._arc)
def _arc(self):
arc = accuraterip_checksum(self.path, self.trackNumber, self.trackCount,
self._wave, self._v2)
self.checksum = arc
self.stop()
class AccurateRipChecksumTask(ChecksumTask):
"""
I implement the AccurateRip checksum.
See http://www.accuraterip.com/
"""
description = 'Calculating AccurateRip checksum'
def __init__(self, path, trackNumber, trackCount, sampleStart=0,
sampleLength=-1):
ChecksumTask.__init__(self, path, sampleStart, sampleLength)
self._trackNumber = trackNumber
self._trackCount = trackCount
self._discFrameCounter = 0 # 1-based
def __repr__(self):
return "<AccurateRipCheckSumTask of track %d in %r>" % (
self._trackNumber, self._path)
def do_checksum_buffer(self, buf, checksum):
self._discFrameCounter += 1
# on first track ...
if self._trackNumber == 1:
# ... skip first 4 CD frames
if self._discFrameCounter <= 4:
gst.debug('skipping frame %d' % self._discFrameCounter)
return checksum
# ... on 5th frame, only use last value
elif self._discFrameCounter == 5:
values = struct.unpack("<I", buf[-4:])
checksum += common.SAMPLES_PER_FRAME * 5 * values[0]
checksum &= 0xFFFFFFFF
return checksum
# on last track, skip last 5 CD frames
if self._trackNumber == self._trackCount:
discFrameLength = self._sampleLength / common.SAMPLES_PER_FRAME
if self._discFrameCounter > discFrameLength - 5:
logger.debug('skipping frame %d', self._discFrameCounter)
return checksum
# self._bytes is updated after do_checksum_buffer
factor = self._bytes / 4 + 1
values = struct.unpack("<%dI" % (len(buf) / 4), buf)
for value in values:
checksum += factor * value
factor += 1
# offset = self._bytes / 4 + i + 1
# if offset % common.SAMPLES_PER_FRAME == 0:
# print 'frame %d, ends before %d, last value %08x, CRC %08x' % (
# offset / common.SAMPLES_PER_FRAME, offset, value, sum)
checksum &= 0xFFFFFFFF
return checksum
class TRMTask(task.GstPipelineTask):
"""
I calculate a MusicBrainz TRM fingerprint.
@ivar trm: the resulting trm
"""
trm = None
description = 'Calculating fingerprint'
def __init__(self, path):
if not os.path.exists(path):
raise IndexError('%s does not exist' % path)
self.path = path
self._trm = None
self._bus = None
def getPipelineDesc(self):
return '''
filesrc location="%s" !
decodebin ! audioconvert ! audio/x-raw-int !
trm name=trm !
appsink name=sink sync=False emit-signals=True''' % self.path
def parsed(self):
sink = self.pipeline.get_by_name('sink')
sink.connect('new-buffer', self._new_buffer_cb)
def paused(self):
gst.debug('query duration')
self._length, qformat = self.pipeline.query_duration(gst.FORMAT_TIME)
gst.debug('total length: %r' % self._length)
gst.debug('scheduling setting to play')
# since set_state returns non-False, adding it as timeout_add
# will repeatedly call it, and block the main loop; so
# gobject.timeout_add(0L, self.pipeline.set_state, gst.STATE_PLAYING)
# would not work.
# FIXME: can't move this to base class because it triggers too soon
# in the case of checksum
def bus_eos_cb(self, bus, message):
gst.debug('eos, scheduling stop')
self.schedule(0, self.stop)
def bus_tag_cb(self, bus, message):
taglist = message.parse_tag()
if 'musicbrainz-trmid' in taglist.keys():
self._trm = taglist['musicbrainz-trmid']
def _new_buffer_cb(self, sink):
# this is just for counting progress
buf = sink.emit('pull-buffer')
position = buf.timestamp
if buf.duration != gst.CLOCK_TIME_NONE:
position += buf.duration
self.setProgress(float(position) / self._length)
def stopped(self):
self.trm = self._trm
class MaxSampleTask(ChecksumTask):
"""
I check for the biggest sample value.
"""
description = 'Finding highest sample value'
def do_checksum_buffer(self, buf, checksum):
values = struct.unpack("<%dh" % (len(buf) / 2), buf)
absvalues = [abs(v) for v in values]
m = max(absvalues)
if checksum < m:
checksum = m
return checksum