From 6ddb5d0114a480a8f65404ea3ea9216f628f633f Mon Sep 17 00:00:00 2001 From: Merlijn Wajer Date: Thu, 2 Feb 2017 21:50:47 +0100 Subject: [PATCH] Add gstreamer-less flac encoder and tagging (#121) * Add encoding using Xiph.org 'flac' program. This adds a FlacEncodeTask that encodes wave files to flac files. This commit also replaces morituri's EncodeTask with FlacEncodeTask, however, in morituri, EncodeTask also does the tagging. FlacEncodeTask will not perform the tagging. So we will need an extra task for the tagging - this will be added soon. Meanwhile, do not merge this commit to master yet. * Add tagging using mutagen. Replace the gstreamer tagging code with mutagen tagging code. getTagList is rewritten to return a dictionary of tags, which are then simply passed to mutagen. The way it is set up right now is not the best - I don't think it makes sense for tagging to take place in program/cdparanoia.py ; but this is how the current code did it. I suggest that, when we rip out all the gstreamer code, we also move the tagging to a more sensible place; and then also make the tagging not be an actual 'task.Task'. * Add gstreamer-less CRC32 version Only works on wave files at this point. Should not be a problem, I think. * Use proper musicbrainz tags and ALBUM tag. * Add mutagen to .travis.yml --- .travis.yml | 2 +- morituri/command/debug.py | 2 +- morituri/common/checksum.py | 23 +++++++++++-- morituri/common/encode.py | 45 ++++++++++++++++++++++++++ morituri/common/program.py | 59 +++++++++------------------------- morituri/image/image.py | 4 +-- morituri/program/cdparanoia.py | 12 +++++-- morituri/program/flac.py | 18 +++++++++++ 8 files changed, 112 insertions(+), 53 deletions(-) create mode 100644 morituri/program/flac.py diff --git a/.travis.yml b/.travis.yml index ef3f8c3..f0ad5c5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ install: # Testing dependencies - sudo apt-get install -qq gstreamer0.10-tools python-gst0.10 - - sudo pip install twisted + - sudo pip install twisted mutagen # Build bundled C utils - cd src diff --git a/morituri/command/debug.py b/morituri/command/debug.py index d180016..8271486 100644 --- a/morituri/command/debug.py +++ b/morituri/command/debug.py @@ -187,7 +187,7 @@ class Encode(BaseCommand): logger.debug('Encoding %s to %s', fromPath.encode('utf-8'), toPath.encode('utf-8')) - encodetask = encode.EncodeTask(fromPath, toPath, profile) + encodetask = encode.FlacEncodeTask(fromPath, toPath) runner.run(encodetask) diff --git a/morituri/common/checksum.py b/morituri/common/checksum.py index 8786d15..0e89dab 100644 --- a/morituri/common/checksum.py +++ b/morituri/common/checksum.py @@ -23,6 +23,8 @@ import os import struct import zlib +import binascii +import wave import gst @@ -262,8 +264,7 @@ class ChecksumTask(gstreamer.GstPipelineTask): logger.debug('eos, scheduling stop') self.schedule(0, self.stop) - -class CRC32Task(ChecksumTask): +class CRC32TaskOld(ChecksumTask): """ I do a simple CRC32 check. """ @@ -273,6 +274,24 @@ class CRC32Task(ChecksumTask): def do_checksum_buffer(self, buf, checksum): return zlib.crc32(buf, checksum) +class CRC32Task(etask.Task): + # TODO: Support sampleStart, sampleLength later on (should be trivial, just + # add change the read part in _crc32 to skip some samples and/or not + # read too far) + def __init__(self, path, sampleStart=0, sampleLength=-1): + self.path = path + + def start(self, runner): + etask.Task.start(self, runner) + self.schedule(0.0, self._crc32) + + def _crc32(self): + w = wave.open(self.path) + d = w._data_chunk.read() + + self.checksum = binascii.crc32(d) & 0xffffffff + self.stop() + class FastAccurateRipChecksumTask(etask.Task): description = 'Calculating (Fast) AccurateRip checksum' diff --git a/morituri/common/encode.py b/morituri/common/encode.py index 1203fe7..9bf2e02 100644 --- a/morituri/common/encode.py +++ b/morituri/common/encode.py @@ -25,12 +25,15 @@ import os import shutil import tempfile +from mutagen.flac import FLAC + from morituri.common import common from morituri.common import gstreamer as cgstreamer from morituri.common import task as ctask from morituri.extern.task import task, gstreamer from morituri.program import sox +from morituri.program import flac import logging logger = logging.getLogger(__name__) @@ -182,6 +185,48 @@ class SoxPeakTask(task.Task): self.peak = sox.peak_level(self.track_path) self.stop() +class FlacEncodeTask(task.Task): + description = 'Encoding to FLAC' + + def __init__(self, track_path, track_out_path, what="track"): + self.track_path = track_path + self.track_out_path = track_out_path + self.new_path = None + self.description = 'Encoding %s to FLAC' % what + + def start(self, runner): + task.Task.start(self, runner) + self.schedule(0.0, self._flac_encode) + + def _flac_encode(self): + self.new_path = flac.encode(self.track_path, self.track_out_path) + self.stop() + +# TODO: Wizzup: Do we really want this as 'Task'...? +# I only made it a task for now because that it's easier to integrate in +# program/cdparanoia.py - where morituri currently does the tagging. +# We should just move the tagging to a more sensible place. +class TaggingTask(task.Task): + description = 'Writing tags to FLAC' + + def __init__(self, track_path, tags): + self.track_path = track_path + self.tags = tags + + def start(self, runner): + task.Task.start(self, runner) + self.schedule(0.0, self._tag) + + def _tag(self): + w = FLAC(self.track_path) + + for k, v in self.tags.items(): + w[k] = v + + w.save() + + self.stop() + class EncodeTask(ctask.GstPipelineTask): """ I am a task that encodes a .wav file. diff --git a/morituri/common/program.py b/morituri/common/program.py index 9f65827..2729507 100644 --- a/morituri/common/program.py +++ b/morituri/common/program.py @@ -450,58 +450,29 @@ class Program: # htoa defaults to disc's artist title = 'Hidden Track One Audio' - # here to avoid import gst eating our options - import gst + tags = {} - ret = gst.TagList() - - # gst-python 0.10.15.1 does not handle unicode -> utf8 string - # conversion - # see http://bugzilla.gnome.org/show_bug.cgi?id=584445 if self.metadata and not self.metadata.various: - ret["album-artist"] = albumArtist.encode('utf-8') - ret[gst.TAG_ARTIST] = trackArtist.encode('utf-8') - ret[gst.TAG_TITLE] = title.encode('utf-8') - ret[gst.TAG_ALBUM] = disc.encode('utf-8') + tags['ALBUMARTIST'] = albumArtist + tags['ARTIST'] = trackArtist + tags['TITLE'] = title + tags['ALBUM'] = disc + + tags['TRACKNUMBER'] = u'%s' % number - # gst-python 0.10.15.1 does not handle tags that are UINT - # see gst-python commit 26fa6dd184a8d6d103eaddf5f12bd7e5144413fb - # FIXME: no way to compare against 'master' version after 0.10.15 - if gst.pygst_version >= (0, 10, 15): - ret[gst.TAG_TRACK_NUMBER] = number if self.metadata: - # works, but not sure we want this - # if gst.pygst_version >= (0, 10, 15): - # ret[gst.TAG_TRACK_COUNT] = len(self.metadata.tracks) - # hack to get a GstDate which we cannot instantiate directly in - # 0.10.15.1 - # FIXME: The dates are strings and must have the format 'YYYY', - # 'YYYY-MM' or 'YYYY-MM-DD'. - # GstDate expects a full date, so default to - # Jan and 1st if MM and DD are missing - date = self.metadata.release - if date: - logger.debug('Converting release date %r to structure', date) - if len(date) == 4: - date += '-01' - if len(date) == 7: - date += '-01' + tags['DATE'] = self.metadata.release - s = gst.structure_from_string('hi,date=(GstDate)%s' % - str(date)) - ret[gst.TAG_DATE] = s['date'] - - # no musicbrainz info for htoa tracks if number > 0: - ret["musicbrainz-trackid"] = mbidTrack - ret["musicbrainz-artistid"] = mbidTrackArtist - ret["musicbrainz-albumid"] = mbidAlbum - ret["musicbrainz-albumartistid"] = mbidTrackAlbum - ret["musicbrainz-discid"] = mbDiscId + tags['MUSICBRAINZ_TRACKID'] = mbidTrack + tags['MUSICBRAINZ_ARTISTID'] = mbidTrackArtist + tags['MUSICBRAINZ_ALBUMID'] = mbidAlbum + tags['MUSICBRAINZ_ALBUMARTISTID'] = mbidTrackAlbum + tags['MUSICBRAINZ_DISCID'] = mbDiscId - # FIXME: gst.TAG_ISRC + # TODO/FIXME: ISRC tag - return ret + return tags def getHTOA(self): """ diff --git a/morituri/image/image.py b/morituri/image/image.py index be2de71..42c692c 100644 --- a/morituri/image/image.py +++ b/morituri/image/image.py @@ -240,8 +240,8 @@ class ImageEncodeTask(task.MultiSeparateTask): root, ext = os.path.splitext(os.path.basename(path)) outpath = os.path.join(outdir, root + '.' + profile.extension) logger.debug('schedule encode to %r', outpath) - taskk = encode.EncodeTask(path, os.path.join(outdir, - root + '.' + profile.extension), profile) + taskk = encode.EncodeTaskFlac(path, os.path.join(outdir, + root + '.' + profile.extension)) self.addTask(taskk) try: diff --git a/morituri/program/cdparanoia.py b/morituri/program/cdparanoia.py index 2bd6162..6daa672 100644 --- a/morituri/program/cdparanoia.py +++ b/morituri/program/cdparanoia.py @@ -490,12 +490,18 @@ class ReadVerifyTrackTask(task.MultiSeparateTask): # here to avoid import gst eating our options from morituri.common import encode - self.tasks.append(encode.EncodeTask(tmppath, tmpoutpath, profile, - taglist=taglist, what=what)) + self.tasks.append(encode.FlacEncodeTask(tmppath, tmpoutpath)) + + # MerlijnWajer: XXX: We run the CRC32Task on the wav file, because it's + # in general stupid to run the CRC32 on the flac file since it already + # has --verify. We should just get rid of this CRC32 step. # make sure our encoding is accurate - self.tasks.append(checksum.CRC32Task(tmpoutpath)) + self.tasks.append(checksum.CRC32Task(tmppath)) self.tasks.append(encode.SoxPeakTask(tmppath)) + # TODO: Move tagging outside of cdparanoia + self.tasks.append(encode.TaggingTask(tmpoutpath, taglist)) + self.checksum = None def stop(self): diff --git a/morituri/program/flac.py b/morituri/program/flac.py new file mode 100644 index 0000000..d83162b --- /dev/null +++ b/morituri/program/flac.py @@ -0,0 +1,18 @@ +from subprocess import check_call, CalledProcessError + +import logging +logger = logging.getLogger(__name__) + +def encode(infile, outfile): + """ + Encodes infile to outfile, with flac. + Uses '-f' because morituri already creates the file. + """ + try: + # TODO: Replace with Popen so that we can catch stderr and write it to + # logging + check_call(['flac', '--silent', '--verify', '-o', outfile, + '-f', infile]) + except CalledProcessError: + logger.exception('flac failed') + raise