Add gstreamer-less flac encoder and tagging (#121)

* Add encoding using Xiph.org 'flac' program.

This adds a FlacEncodeTask that encodes wave files to flac files.
This commit also replaces morituri's EncodeTask with FlacEncodeTask, however, in
morituri, EncodeTask also does the tagging.

FlacEncodeTask will not perform the tagging.
So we will need an extra task for the tagging - this will be added soon.

Meanwhile, do not merge this commit to master yet.

* Add tagging using mutagen.

Replace the gstreamer tagging code with mutagen tagging code.
getTagList is rewritten to return a dictionary of tags, which are then simply
passed to mutagen.

The way it is set up right now is not the best - I don't think it makes sense
for tagging to take place in program/cdparanoia.py ; but this is how the current
code did it.

I suggest that, when we rip out all the gstreamer code, we also move the tagging
to a more sensible place; and then also make the tagging not be an actual
'task.Task'.

* Add gstreamer-less CRC32 version

Only works on wave files at this point. Should not be a problem, I think.

* Use proper musicbrainz tags and ALBUM tag.

* Add mutagen to .travis.yml
This commit is contained in:
Merlijn Wajer
2017-02-02 21:50:47 +01:00
committed by JoeLametta
parent 4dc7a3cf77
commit 6ddb5d0114
8 changed files with 112 additions and 53 deletions

View File

@@ -11,7 +11,7 @@ install:
# Testing dependencies
- sudo apt-get install -qq gstreamer0.10-tools python-gst0.10
- sudo pip install twisted
- sudo pip install twisted mutagen
# Build bundled C utils
- cd src

View File

@@ -187,7 +187,7 @@ class Encode(BaseCommand):
logger.debug('Encoding %s to %s',
fromPath.encode('utf-8'),
toPath.encode('utf-8'))
encodetask = encode.EncodeTask(fromPath, toPath, profile)
encodetask = encode.FlacEncodeTask(fromPath, toPath)
runner.run(encodetask)

View File

@@ -23,6 +23,8 @@
import os
import struct
import zlib
import binascii
import wave
import gst
@@ -262,8 +264,7 @@ class ChecksumTask(gstreamer.GstPipelineTask):
logger.debug('eos, scheduling stop')
self.schedule(0, self.stop)
class CRC32Task(ChecksumTask):
class CRC32TaskOld(ChecksumTask):
"""
I do a simple CRC32 check.
"""
@@ -273,6 +274,24 @@ class CRC32Task(ChecksumTask):
def do_checksum_buffer(self, buf, checksum):
return zlib.crc32(buf, checksum)
class CRC32Task(etask.Task):
# TODO: Support sampleStart, sampleLength later on (should be trivial, just
# add change the read part in _crc32 to skip some samples and/or not
# read too far)
def __init__(self, path, sampleStart=0, sampleLength=-1):
self.path = path
def start(self, runner):
etask.Task.start(self, runner)
self.schedule(0.0, self._crc32)
def _crc32(self):
w = wave.open(self.path)
d = w._data_chunk.read()
self.checksum = binascii.crc32(d) & 0xffffffff
self.stop()
class FastAccurateRipChecksumTask(etask.Task):
description = 'Calculating (Fast) AccurateRip checksum'

View File

@@ -25,12 +25,15 @@ import os
import shutil
import tempfile
from mutagen.flac import FLAC
from morituri.common import common
from morituri.common import gstreamer as cgstreamer
from morituri.common import task as ctask
from morituri.extern.task import task, gstreamer
from morituri.program import sox
from morituri.program import flac
import logging
logger = logging.getLogger(__name__)
@@ -182,6 +185,48 @@ class SoxPeakTask(task.Task):
self.peak = sox.peak_level(self.track_path)
self.stop()
class FlacEncodeTask(task.Task):
description = 'Encoding to FLAC'
def __init__(self, track_path, track_out_path, what="track"):
self.track_path = track_path
self.track_out_path = track_out_path
self.new_path = None
self.description = 'Encoding %s to FLAC' % what
def start(self, runner):
task.Task.start(self, runner)
self.schedule(0.0, self._flac_encode)
def _flac_encode(self):
self.new_path = flac.encode(self.track_path, self.track_out_path)
self.stop()
# TODO: Wizzup: Do we really want this as 'Task'...?
# I only made it a task for now because that it's easier to integrate in
# program/cdparanoia.py - where morituri currently does the tagging.
# We should just move the tagging to a more sensible place.
class TaggingTask(task.Task):
description = 'Writing tags to FLAC'
def __init__(self, track_path, tags):
self.track_path = track_path
self.tags = tags
def start(self, runner):
task.Task.start(self, runner)
self.schedule(0.0, self._tag)
def _tag(self):
w = FLAC(self.track_path)
for k, v in self.tags.items():
w[k] = v
w.save()
self.stop()
class EncodeTask(ctask.GstPipelineTask):
"""
I am a task that encodes a .wav file.

View File

@@ -450,58 +450,29 @@ class Program:
# htoa defaults to disc's artist
title = 'Hidden Track One Audio'
# here to avoid import gst eating our options
import gst
tags = {}
ret = gst.TagList()
# gst-python 0.10.15.1 does not handle unicode -> utf8 string
# conversion
# see http://bugzilla.gnome.org/show_bug.cgi?id=584445
if self.metadata and not self.metadata.various:
ret["album-artist"] = albumArtist.encode('utf-8')
ret[gst.TAG_ARTIST] = trackArtist.encode('utf-8')
ret[gst.TAG_TITLE] = title.encode('utf-8')
ret[gst.TAG_ALBUM] = disc.encode('utf-8')
tags['ALBUMARTIST'] = albumArtist
tags['ARTIST'] = trackArtist
tags['TITLE'] = title
tags['ALBUM'] = disc
tags['TRACKNUMBER'] = u'%s' % number
# gst-python 0.10.15.1 does not handle tags that are UINT
# see gst-python commit 26fa6dd184a8d6d103eaddf5f12bd7e5144413fb
# FIXME: no way to compare against 'master' version after 0.10.15
if gst.pygst_version >= (0, 10, 15):
ret[gst.TAG_TRACK_NUMBER] = number
if self.metadata:
# works, but not sure we want this
# if gst.pygst_version >= (0, 10, 15):
# ret[gst.TAG_TRACK_COUNT] = len(self.metadata.tracks)
# hack to get a GstDate which we cannot instantiate directly in
# 0.10.15.1
# FIXME: The dates are strings and must have the format 'YYYY',
# 'YYYY-MM' or 'YYYY-MM-DD'.
# GstDate expects a full date, so default to
# Jan and 1st if MM and DD are missing
date = self.metadata.release
if date:
logger.debug('Converting release date %r to structure', date)
if len(date) == 4:
date += '-01'
if len(date) == 7:
date += '-01'
tags['DATE'] = self.metadata.release
s = gst.structure_from_string('hi,date=(GstDate)%s' %
str(date))
ret[gst.TAG_DATE] = s['date']
# no musicbrainz info for htoa tracks
if number > 0:
ret["musicbrainz-trackid"] = mbidTrack
ret["musicbrainz-artistid"] = mbidTrackArtist
ret["musicbrainz-albumid"] = mbidAlbum
ret["musicbrainz-albumartistid"] = mbidTrackAlbum
ret["musicbrainz-discid"] = mbDiscId
tags['MUSICBRAINZ_TRACKID'] = mbidTrack
tags['MUSICBRAINZ_ARTISTID'] = mbidTrackArtist
tags['MUSICBRAINZ_ALBUMID'] = mbidAlbum
tags['MUSICBRAINZ_ALBUMARTISTID'] = mbidTrackAlbum
tags['MUSICBRAINZ_DISCID'] = mbDiscId
# FIXME: gst.TAG_ISRC
# TODO/FIXME: ISRC tag
return ret
return tags
def getHTOA(self):
"""

View File

@@ -240,8 +240,8 @@ class ImageEncodeTask(task.MultiSeparateTask):
root, ext = os.path.splitext(os.path.basename(path))
outpath = os.path.join(outdir, root + '.' + profile.extension)
logger.debug('schedule encode to %r', outpath)
taskk = encode.EncodeTask(path, os.path.join(outdir,
root + '.' + profile.extension), profile)
taskk = encode.EncodeTaskFlac(path, os.path.join(outdir,
root + '.' + profile.extension))
self.addTask(taskk)
try:

View File

@@ -490,12 +490,18 @@ class ReadVerifyTrackTask(task.MultiSeparateTask):
# here to avoid import gst eating our options
from morituri.common import encode
self.tasks.append(encode.EncodeTask(tmppath, tmpoutpath, profile,
taglist=taglist, what=what))
self.tasks.append(encode.FlacEncodeTask(tmppath, tmpoutpath))
# MerlijnWajer: XXX: We run the CRC32Task on the wav file, because it's
# in general stupid to run the CRC32 on the flac file since it already
# has --verify. We should just get rid of this CRC32 step.
# make sure our encoding is accurate
self.tasks.append(checksum.CRC32Task(tmpoutpath))
self.tasks.append(checksum.CRC32Task(tmppath))
self.tasks.append(encode.SoxPeakTask(tmppath))
# TODO: Move tagging outside of cdparanoia
self.tasks.append(encode.TaggingTask(tmpoutpath, taglist))
self.checksum = None
def stop(self):

18
morituri/program/flac.py Normal file
View File

@@ -0,0 +1,18 @@
from subprocess import check_call, CalledProcessError
import logging
logger = logging.getLogger(__name__)
def encode(infile, outfile):
"""
Encodes infile to outfile, with flac.
Uses '-f' because morituri already creates the file.
"""
try:
# TODO: Replace with Popen so that we can catch stderr and write it to
# logging
check_call(['flac', '--silent', '--verify', '-o', outfile,
'-f', infile])
except CalledProcessError:
logger.exception('flac failed')
raise