Merge pull request #187 from RecursiveForest/rewrite-accuraterip

AccurateRip V2 support
This commit is contained in:
JoeLametta
2017-09-15 23:12:40 +02:00
committed by GitHub
20 changed files with 836 additions and 864 deletions

View File

@@ -1,6 +1,7 @@
# -*- Mode: Python; test-case-name: whipper.test.test_common_accurip -*-
# vi:si:et:sw=4:sts=4:ts=4
# Copyright (C) 2017 Samantha Baldwin
# Copyright (C) 2009 Thomas Vander Stichele
# This file is part of whipper.
@@ -18,128 +19,261 @@
# You should have received a copy of the GNU General Public License
# along with whipper. If not, see <http://www.gnu.org/licenses/>.
import errno
import os
import requests
import struct
import urlparse
import urllib2
from errno import EEXIST
from os import makedirs
from os.path import dirname, exists, join
from whipper.common import directory
from whipper.program.arc import accuraterip_checksum
import logging
logger = logging.getLogger(__name__)
_CACHE_DIR = directory.cache_path()
ACCURATERIP_URL = "http://www.accuraterip.com/accuraterip/"
_CACHE_DIR = join(directory.cache_path(), 'accurip')
class AccuCache:
def __init__(self):
if not os.path.exists(_CACHE_DIR):
logger.debug('Creating cache directory %s', _CACHE_DIR)
os.makedirs(_CACHE_DIR)
def _getPath(self, url):
# split path starts with /
return os.path.join(_CACHE_DIR, urlparse.urlparse(url)[2][1:])
def retrieve(self, url, force=False):
logger.debug("Retrieving AccurateRip URL %s", url)
path = self._getPath(url)
logger.debug("Cached path: %s", path)
if force:
logger.debug("forced to download")
self.download(url)
elif not os.path.exists(path):
logger.debug("%s does not exist, downloading", path)
self.download(url)
if not os.path.exists(path):
logger.debug("%s does not exist, not in database", path)
return None
data = self._read(url)
return getAccurateRipResponses(data)
def download(self, url):
# FIXME: download url as a task too
try:
handle = urllib2.urlopen(url)
data = handle.read()
except urllib2.HTTPError, e:
if e.code == 404:
return None
else:
raise
self._cache(url, data)
return data
def _cache(self, url, data):
path = self._getPath(url)
try:
os.makedirs(os.path.dirname(path))
except OSError, e:
logger.debug('Could not make dir %s: %r' % (
path, str(e)))
if e.errno != errno.EEXIST:
raise
handle = open(path, 'wb')
handle.write(data)
handle.close()
def _read(self, url):
logger.debug("Reading %s from cache", url)
path = self._getPath(url)
handle = open(path, 'rb')
data = handle.read()
handle.close()
return data
class EntryNotFound(Exception):
pass
def getAccurateRipResponses(data):
ret = []
while data:
trackCount = struct.unpack("B", data[0])[0]
nbytes = 1 + 12 + trackCount * (1 + 8)
ret.append(AccurateRipResponse(data[:nbytes]))
data = data[nbytes:]
return ret
class AccurateRipResponse(object):
class _AccurateRipResponse(object):
"""
I represent the response of the AccurateRip online database.
An AccurateRip response contains a collection of metadata identifying a
particular digital audio compact disc.
@type checksums: list of str
For disc level metadata it contains the track count, two internal disc
IDs, and the CDDB disc ID.
A checksum and a confidence score is stored sequentially for each track in
the disc index, which excludes any audio hidden in track pre-gaps (such as
HTOA).
The response is stored as a packed binary structure.
"""
trackCount = None
discId1 = ""
discId2 = ""
cddbDiscId = ""
confidences = None
checksums = None
def __init__(self, data):
self.trackCount = struct.unpack("B", data[0])[0]
"""
The checksums and confidences arrays are indexed by relative track
position, so track 1 will have array index 0, track 2 will have array
index 1, and so forth. HTOA and other hidden tracks are not included.
"""
self.num_tracks = struct.unpack("B", data[0])[0]
self.discId1 = "%08x" % struct.unpack("<L", data[1:5])[0]
self.discId2 = "%08x" % struct.unpack("<L", data[5:9])[0]
self.cddbDiscId = "%08x" % struct.unpack("<L", data[9:13])[0]
self.confidences = []
self.checksums = []
pos = 13
for _ in range(self.trackCount):
for _ in range(self.num_tracks):
confidence = struct.unpack("B", data[pos])[0]
checksum = "%08x" % struct.unpack("<L", data[pos + 1:pos + 5])[0]
pos += 9
self.confidences.append(confidence)
self.checksums.append(checksum)
pos += 9
def __eq__(self, other):
return [
self.num_tracks, self.discId1, self.discId2, self.cddbDiscId,
self.confidences, self.checksums
] == [
other.num_tracks, other.discId1, other.discId2, other.cddbDiscId,
other.confidences, other.checksums
]
def _split_responses(raw_entry):
responses = []
while raw_entry:
track_count = struct.unpack("B", raw_entry[0])[0]
nbytes = 1 + 12 + track_count * (1 + 8)
responses.append(_AccurateRipResponse(raw_entry[:nbytes]))
raw_entry = raw_entry[nbytes:]
return responses
def calculate_checksums(track_paths):
"""
Return ARv1 and ARv2 checksums as two arrays of character strings in a
dictionary: {'v1': ['deadbeef', ...], 'v2': [...]}
Return None instead of checksum string for unchecksummable tracks.
HTOA checksums are not included in the database and are not calculated.
"""
track_count = len(track_paths)
v1_checksums = []
v2_checksums = []
logger.debug('checksumming %d tracks' % track_count)
# This is done sequentially because it is very fast.
for i, path in enumerate(track_paths):
v1_sum = accuraterip_checksum(
path, i+1, track_count, wave=True, v2=False
)
if not v1_sum:
logger.error(
'could not calculate AccurateRip v1 checksum for track %d %r' %
(i+1, path)
)
v1_checksums.append(None)
else:
v1_checksums.append("%08x" % v1_sum)
v2_sum = accuraterip_checksum(
path, i+1, track_count, wave=True, v2=True
)
if not v2_sum:
logger.error(
'could not calculate AccurateRip v2 checksum for track %d %r' %
(i+1, path)
)
v2_checksums.append(None)
else:
v2_checksums.append("%08x" % v2_sum)
return {'v1': v1_checksums, 'v2': v2_checksums}
def _download_entry(path):
url = ACCURATERIP_URL + path
logger.debug('downloading AccurateRip entry from %s', url)
try:
resp = requests.get(url)
except requests.exceptions.ConnectionError as e:
logger.error('error retrieving AccurateRip entry: %r' % e)
return None
if not resp.ok:
logger.error('error retrieving AccurateRip entry: %s %s %r' % (
resp.status_code, resp.reason, resp
))
return None
return resp.content
def _save_entry(raw_entry, path):
logger.debug('saving AccurateRip entry to %s', path)
# XXX: os.makedirs(exist_ok=True) in py3
try:
makedirs(dirname(path))
except OSError, e:
if e.errno != EEXIST:
logger.error('could not save entry to %s: %r' % (path, str(e)))
return
open(path, 'wb').write(raw_entry)
def get_db_entry(path):
"""
Retrieve cached AccurateRip disc entry as array of _AccurateRipResponses.
Downloads entry from accuraterip.com on cache fault.
`path' is in the format of the output of table.accuraterip_path().
"""
cached_path = join(_CACHE_DIR, path)
if exists(cached_path):
logger.debug('found accuraterip entry at %s', cached_path)
raw_entry = open(cached_path, 'rb').read()
else:
raw_entry = _download_entry(path)
if raw_entry:
_save_entry(raw_entry, cached_path)
if not raw_entry:
logger.warning('entry not found in AccurateRip database')
raise EntryNotFound
return _split_responses(raw_entry)
def _assign_checksums_and_confidences(tracks, checksums, responses):
for i, track in enumerate(tracks):
for v in ('v1', 'v2'):
track.AR[v]['CRC'] = checksums[v][i]
track.AR['DBMaxConfidence'], track.AR['DBMaxConfidenceCRC'] = max(
[(r.confidences[i], r.checksums[i]) for r in responses],
key=lambda t: t[0]
)
def _match_responses(tracks, responses):
"""
Match and save track accuraterip response checksums against
all non-hidden tracks.
Returns True if every track has a match for every entry for either
AccurateRip version.
"""
for r in responses:
for i, track in enumerate(tracks):
for v in ('v1', 'v2'):
if track.AR[v]['CRC'] == r.checksums[i]:
if r.confidences[i] > track.AR[v]['DBConfidence']:
track.AR[v]['DBCRC'] = r.checksums[i]
track.AR[v]['DBConfidence'] = r.confidences[i]
logger.debug(
'track %d matched response %s in AccurateRip'
' database: %s crc %s confidence %s' %
(i, r.cddbDiscId, v, track.AR[v]['DBCRC'],
track.AR[v]['DBConfidence'])
)
return any((
all([t.AR['v1']['DBCRC'] for t in tracks]),
all([t.AR['v2']['DBCRC'] for t in tracks])
))
def verify_result(result, responses, checksums):
"""
Verify track AccurateRip checksums against database responses.
Stores track checksums and database values on result.
"""
if not (result and responses and checksums):
return False
# exclude HTOA from AccurateRip verification
# NOTE: if pre-gap hidden audio support is expanded to include
# tracks other than HTOA, this is invalid.
tracks = filter(lambda t: t.number != 0, result.tracks)
if not tracks:
return False
_assign_checksums_and_confidences(tracks, checksums, responses)
return _match_responses(tracks, responses)
def print_report(result):
"""
Print AccurateRip verification results to stdout.
"""
for i, track in enumerate(result.tracks):
status = 'rip NOT accurate'
conf = '(not found)'
db = 'notfound'
if track.AR['DBMaxConfidence'] is not None:
db = track.AR['DBMaxConfidenceCRC']
conf = '(max confidence %3d)' % track.AR['DBMaxConfidence']
if track.AR['v1']['DBCRC'] or track.AR['v2']['DBCRC']:
status = 'rip accurate'
db = ', '.join(filter(None, (
track.AR['v1']['DBCRC'],
track.AR['v2']['DBCRC']
)))
max_conf = max(
[track.AR[v]['DBConfidence'] for v in ('v1', 'v2')]
)
if max_conf:
if max_conf < track.AR['DBMaxConfidence']:
conf = '(confidence %3d of %3d)' % (
max_conf, track.AR['DBMaxConfidence']
)
# htoa tracks (i == 0) do not have an ARCRC
if track.number == 0:
print('track 0: unknown (not tracked)')
continue
if not (track.AR['v1']['CRC'] or track.AR['v2']['CRC']):
logger.error(
'no track AR CRC on non-HTOA track %d' % track.number
)
print('track %2d: unknown (error)' % track.number)
else:
print('track %2d: %-16s %-23s v1 [%s], v2 [%s], DB [%s]' % (
track.number, status, conf,
track.AR['v1']['CRC'], track.AR['v2']['CRC'], db
))

View File

@@ -24,8 +24,6 @@ import wave
from whipper.extern.task import task as etask
from whipper.program.arc import accuraterip_checksum
import logging
logger = logging.getLogger(__name__)
@@ -49,27 +47,3 @@ class CRC32Task(etask.Task):
self.checksum = binascii.crc32(d) & 0xffffffff
self.stop()
class FastAccurateRipChecksumTask(etask.Task):
description = 'Calculating (Fast) AccurateRip checksum'
def __init__(self, path, trackNumber, trackCount, wave, v2=False):
self.path = path
self.trackNumber = trackNumber
self.trackCount = trackCount
self._wave = wave
self._v2 = v2
self.checksum = None
def start(self, runner):
etask.Task.start(self, runner)
self.schedule(0.0, self._arc)
def _arc(self):
arc = accuraterip_checksum(self.path, self.trackNumber,
self.trackCount,
self._wave, self._v2)
self.checksum = arc
self.stop()

View File

@@ -23,12 +23,12 @@ Common functionality and class for all programs using whipper.
"""
import musicbrainzngs
import re
import os
import sys
import time
from whipper.common import common, mbngs, cache, path
from whipper.common import checksum
from whipper.common import accurip, cache, checksum, common, mbngs, path
from whipper.program import cdrdao, cdparanoia
from whipper.image import image
from whipper.extern.task import task
@@ -178,34 +178,34 @@ class Program:
template_part += ' (%s)' % metadata.barcode
return template_part
def getPath(self, outdir, template, mbdiscid, i, disambiguate=False):
def getPath(self, outdir, template, mbdiscid, metadata, track_number=None):
"""
Based on the template, get a complete path for the given track,
minus extension.
Also works for the disc name, using disc variables for the template.
Return disc or track path relative to outdir according to
template. Track paths do not include extension.
@param outdir: the directory where to write the files
@type outdir: unicode
@param template: the template for writing the file
@type template: unicode
@param i: track number (0 for HTOA, or for disc)
@type i: int
Tracks are named according to the track template, filling in
the variables and adding the file extension. Variables
exclusive to the track template are:
- %t: track number
- %a: track artist
- %n: track title
- %s: track sort name
@rtype: unicode
Disc files (.cue, .log, .m3u) are named according to the disc
template, filling in the variables and adding the file
extension. Variables for both disc and track template are:
- %A: album artist
- %S: album sort name
- %d: disc title
- %y: release year
- %r: release type, lowercase
- %R: Release type, normal case
- %x: audio extension, lowercase
- %X: audio extension, uppercase
"""
assert type(outdir) is unicode, "%r is not unicode" % outdir
assert type(template) is unicode, "%r is not unicode" % template
# the template is similar to grip, except for %s/%S/%r/%R
# see #gripswitches
# returns without extension
v = {}
v['t'] = '%02d' % i
# default values
v['A'] = 'Unknown Artist'
v['d'] = mbdiscid # fallback for title
v['r'] = 'unknown'
@@ -215,59 +215,38 @@ class Program:
v['x'] = 'flac'
v['X'] = v['x'].upper()
v['y'] = '0000'
if track_number is not None:
v['a'] = v['A']
v['t'] = '%02d' % track_number
if track_number == 0:
v['n'] = 'Hidden Track One Audio'
else:
v['n'] = 'Unknown Track %d' % track_number
v['a'] = v['A']
if i == 0:
v['n'] = 'Hidden Track One Audio'
else:
v['n'] = 'Unknown Track %d' % i
if self.metadata:
release = self.metadata.release or '0000'
if metadata:
release = metadata.release or '0000'
v['y'] = release[:4]
v['A'] = self._filter.filter(self.metadata.artist)
v['S'] = self._filter.filter(self.metadata.sortName)
v['d'] = self._filter.filter(self.metadata.title)
v['B'] = self.metadata.barcode
v['C'] = self.metadata.catalogNumber
if self.metadata.releaseType:
v['R'] = self.metadata.releaseType
v['r'] = self.metadata.releaseType.lower()
if i > 0:
try:
v['a'] = self._filter.filter(
self.metadata.tracks[i - 1].artist)
v['s'] = self._filter.filter(
self.metadata.tracks[i - 1].sortName)
v['n'] = self._filter.filter(
self.metadata.tracks[i - 1].title)
except IndexError, e:
print 'ERROR: no track %d found, %r' % (i, e)
raise
else:
v['A'] = self._filter.filter(metadata.artist)
v['S'] = self._filter.filter(metadata.sortName)
v['d'] = self._filter.filter(metadata.title)
v['B'] = metadata.barcode
v['C'] = metadata.catalogNumber
if metadata.releaseType:
v['R'] = metadata.releaseType
v['r'] = metadata.releaseType.lower()
if track_number > 0:
v['a'] = self._filter.filter(
metadata.tracks[track_number - 1].artist)
v['s'] = self._filter.filter(
metadata.tracks[track_number - 1].sortName)
v['n'] = self._filter.filter(
metadata.tracks[track_number - 1].title)
elif track_number == 0:
# htoa defaults to disc's artist
v['a'] = self._filter.filter(self.metadata.artist)
v['a'] = self._filter.filter(metadata.artist)
# when disambiguating, use catalogNumber then barcode
if disambiguate:
templateParts = template.split(os.sep)
# Find the section of the template with the release name
for i, part in enumerate(templateParts):
if "%d" in part:
templateParts[i] = self.addDisambiguation(part, self.metadata) # noqa: E501
break
else:
# No parts of the template contain the release
templateParts[-1] = self.addDisambiguation(templateParts[-1], self.metadata) # noqa: E501
template = os.path.join(*templateParts)
logger.debug('Disambiguated template to %r' % template)
import re
template = re.sub(r'%(\w)', r'%(\1)s', template)
ret = os.path.join(outdir, template % v)
return ret
return os.path.join(outdir, template % v)
def getCDDB(self, cddbdiscid):
"""
@@ -579,118 +558,55 @@ class Program:
t = image.ImageRetagTask(cueImage, taglists)
runner.run(t)
def verifyImage(self, runner, responses):
def verifyImage(self, runner, table):
"""
verify table against accuraterip and cue_path track lengths
Verify our image against the given AccurateRip responses.
Needs an initialized self.result.
Will set accurip and friends on each TrackResult.
Populates self.result.tracks with above TrackResults.
"""
logger.debug('verifying Image against %d AccurateRip responses',
len(responses or []))
cueImage = image.Image(self.cuePath)
# assigns track lengths
verifytask = image.ImageVerifyTask(cueImage)
cuetask = image.AccurateRipChecksumTask(cueImage)
runner.run(verifytask)
runner.run(cuetask)
if verifytask.exception:
logger.error(verifytask.exceptionMessage)
return False
self._verifyImageWithChecksums(responses, cuetask.checksums)
responses = accurip.get_db_entry(table.accuraterip_path())
logger.info('%d AccurateRip response(s) found' % len(responses))
def _verifyImageWithChecksums(self, responses, checksums):
# loop over tracks to set our calculated AccurateRip CRC's
for i, csum in enumerate(checksums):
trackResult = self.result.getTrackResult(i + 1)
trackResult.ARCRC = csum
checksums = accurip.calculate_checksums([
os.path.join(os.path.dirname(self.cuePath), t.indexes[1].path)
for t in filter(lambda t: t.number != 0, cueImage.cue.table.tracks)
])
if not (checksums and any(checksums['v1']) and any(checksums['v2'])):
return False
return accurip.verify_result(self.result, responses, checksums)
if not responses:
logger.warning('No AccurateRip responses, cannot verify.')
return
def write_m3u(self, discname):
m3uPath = u'%s.m3u' % discname
with open(m3uPath, 'w') as f:
f.write(u'#EXTM3U\n'.encode('utf-8'))
for track in self.result.tracks:
if not track.filename:
# false positive htoa
continue
if track.number == 0:
length = (self.result.table.getTrackStart(1) /
common.FRAMES_PER_SECOND)
else:
length = (self.result.table.getTrackLength(track.number) /
common.FRAMES_PER_SECOND)
# now loop to match responses
for i, csum in enumerate(checksums):
trackResult = self.result.getTrackResult(i + 1)
confidence = None
response = None
# match against each response's checksum for this track
for j, r in enumerate(responses):
if "%08x" % csum == r.checksums[i]:
response = r
logger.debug(
"Track %02d matched response %d of %d in "
"AccurateRip database",
i + 1, j + 1, len(responses))
trackResult.accurip = True
# FIXME: maybe checksums should be ints
trackResult.ARDBCRC = int(r.checksums[i], 16)
# arsum = csum
confidence = r.confidences[i]
trackResult.ARDBConfidence = confidence
if not trackResult.accurip:
logger.warning("Track %02d: not matched in "
"AccurateRip database", i + 1)
# I have seen AccurateRip responses with 0 as confidence
# for example, Best of Luke Haines, disc 1, track 1
maxConfidence = -1
maxResponse = None
for r in responses:
if r.confidences[i] > maxConfidence:
maxConfidence = r.confidences[i]
maxResponse = r
logger.debug('Track %02d: found max confidence %d' % (
i + 1, maxConfidence))
trackResult.ARDBMaxConfidence = maxConfidence
if not response:
logger.warning('Track %02d: none of the responses matched.',
i + 1)
trackResult.ARDBCRC = int(
maxResponse.checksums[i], 16)
else:
trackResult.ARDBCRC = int(response.checksums[i], 16)
# TODO MW: Update this further for ARv2 code
def getAccurateRipResults(self):
"""
@rtype: list of str
"""
res = []
# loop over tracks
for i, trackResult in enumerate(self.result.tracks):
status = 'rip NOT accurate'
if trackResult.accurip:
status = 'rip accurate '
c = "(not found) "
ar = ", DB [notfound]"
if trackResult.ARDBMaxConfidence:
c = "(max confidence %3d)" % trackResult.ARDBMaxConfidence
if trackResult.ARDBConfidence is not None:
if trackResult.ARDBConfidence \
< trackResult.ARDBMaxConfidence:
c = "(confidence %3d of %3d)" % (
trackResult.ARDBConfidence,
trackResult.ARDBMaxConfidence)
ar = ", DB [%08x]" % trackResult.ARDBCRC
# htoa tracks (i == 0) do not have an ARCRC
if trackResult.ARCRC is None:
assert trackResult.number == 0, \
'no trackResult.ARCRC on non-HTOA track %d' % \
trackResult.number
res.append("Track 0: unknown (not tracked)")
else:
res.append("Track %2d: %s %s [%08x]%s" % (
trackResult.number, status, c, trackResult.ARCRC, ar))
return res
target_path = common.getRelativePath(track.filename, m3uPath)
u = u'#EXTINF:%d,%s\n' % (length, target_path)
f.write(u.encode('utf-8'))
u = '%s\n' % target_path
f.write(u.encode('utf-8'))
def writeCue(self, discName):
assert self.result.table.canCue()