Merge pull request #435 from ABCbum/develop

Fix whipper's MusicBrainz Disc ID calculation for CDs with data tracks that are not positioned at the end of the disc
2019-12-28 13:56:14 +01:00
parent 8f5559ebc8 8c41f4ddb3
commit 25104145f0
6 changed files with 50 additions and 65 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ install:
    # Dependencies
    - sudo apt-get -qq update
    - pip install --upgrade -qq pip
-    - sudo apt-get -qq install cdparanoia cdrdao flac gir1.2-glib-2.0 libcdio-dev libgirepository1.0-dev libiso9660-dev libsndfile1-dev sox swig libcdio-utils
+    - sudo apt-get -qq install cdparanoia cdrdao flac gir1.2-glib-2.0 libcdio-dev libgirepository1.0-dev libiso9660-dev libsndfile1-dev sox swig libcdio-utils libdiscid0
    # newer version of pydcio requires newer version of libcdio than travis has
    - pip install pycdio==0.21
    # install rest of dependencies
--- a/3
+++ b/3
@@ -10,6 +10,7 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
    flac \
    gir1.2-glib-2.0 \
    git \
+    libdiscid0 \
    libiso9660-dev \
    libsndfile1-dev \
    libtool \
@@ -27,7 +28,7 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
    sox \
    swig \
    && apt-get clean && rm -rf /var/lib/apt/lists/* \
-    && pip3 --no-cache-dir install pycdio==2.1.0
+    && pip3 --no-cache-dir install pycdio==2.1.0 discid

 # libcdio-paranoia / libcdio-utils are wrongfully packaged in Debian, thus built manually
 # see https://github.com/whipper-team/whipper/pull/237#issuecomment-367985625
--- a/README.md
+++ b/README.md
@@ -131,6 +131,7 @@ Whipper relies on the following packages in order to run correctly and provide a
 - [setuptools](https://pypi.python.org/pypi/setuptools), for installation, plugins support
 - [requests](https://pypi.python.org/pypi/requests), for retrieving AccurateRip database entries
 - [pycdio](https://pypi.python.org/pypi/pycdio/), for drive identification (required for drive offset and caching behavior to be stored in the configuration file).
+- [discid](https://pypi.org/project/discid/), for calculating Musicbrainz disc id.
  - To avoid bugs it's advised to use the most recent `pycdio` version with the corresponding `libcdio` release or, if stuck to old pycdio versions, **0.20**/**0.21** with `libcdio` ≥ **0.90** ≤ **0.94**. All other combinations won't probably work.
 - [ruamel.yaml](https://pypi.org/project/ruamel.yaml/), for generating well formed YAML report logfiles
 - [libsndfile](http://www.mega-nerd.com/libsndfile/), for reading wav files
@@ -148,6 +149,7 @@ Some dependencies aren't available in the PyPI. They can be probably installed u
 - [flac](https://xiph.org/flac/)
 - [sox](http://sox.sourceforge.net/)
 - [git](https://git-scm.com/) or [mercurial](https://www.mercurial-scm.org/)
+- [libdiscid](https://musicbrainz.org/doc/libdiscid)

 PyPI installable dependencies are listed in the [requirements.txt](https://github.com/whipper-team/whipper/blob/master/requirements.txt) file and can be installed issuing the following command:

--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ PyGObject
 requests
 ruamel.yaml
 setuptools_scm
+discid
--- a/whipper/image/table.py
+++ b/whipper/image/table.py
@@ -340,50 +340,15 @@ class Table:
            logger.debug('getMusicBrainzDiscId: returning cached %r',
                         self.mbdiscid)
            return self.mbdiscid
+
+        from discid import put
+
        values = self._getMusicBrainzValues()

-        # MusicBrainz disc id does not take into account data tracks
-        import base64
-        import hashlib
-        sha1 = hashlib.sha1
-
-        sha = sha1()
-
-        # number of first track
-        sha.update(("%02X" % values[0]).encode())
-
-        # number of last track
-        sha.update(("%02X" % values[1]).encode())
-
-        sha.update(("%08X" % values[2]).encode())
-
-        # offsets of tracks
-        for i in range(1, 100):
-            try:
-                offset = values[2 + i]
-            except IndexError:
-                offset = 0
-            sha.update(("%08X" % offset).encode())
-
-        digest = sha.digest()
-        assert len(digest) == 20, \
-            "digest should be 20 chars, not %d" % len(digest)
-
-        # The RFC822 spec uses +, /, and = characters, all of which are special
-        # HTTP/URL characters. To avoid the problems with dealing with that, I
-        # (Rob) used ., _, and -
-
-        # base64 altchars specify replacements for + and /
-        result = base64.b64encode(digest, b'._').decode()
-
-        # now replace =
-        result = result.replace("=", "-")
-        assert len(result) == 28, \
-            "Result should be 28 characters, not %d" % len(result)
-
-        logger.debug('getMusicBrainzDiscId: returning %r', result)
-        self.mbdiscid = result
-        return result
+        disc = put(values[0], values[1], values[2], values[3:])
+        logger.debug('getMusicBrainzDiscId: returning %r', disc.id)
+        self.mbdiscid = disc.id
+        return disc.id

    def getMusicBrainzSubmitURL(self):
        host = config.Config().get_musicbrainz_server()
@@ -443,30 +408,39 @@ class Table:
        # number of first track
        result.append(1)

-        # number of last audio track
-        result.append(self.getAudioTracks())
+        # number of last audio track (default: number of audio tracks)
+        lastTrack = self.getAudioTracks()
+        result.append(lastTrack)

-        leadout = self.leadout
-        # if the disc is multi-session, last track is the data track,
-        # and we should subtract 11250 + 150 from the last track's offset
-        # for the leadout
-        if self.hasDataTracks():
-            assert not self.tracks[-1].audio
-            leadout = self.tracks[-1].getIndex(1).absolute - 11250 - 150
-
-        # treat leadout offset as track 0 offset
-        result.append(150 + leadout)
+        dataTrackLast = False
+        additional = 0
+        offsets = []

        # offsets of tracks
-        for i in range(1, 100):
-            try:
-                track = self.tracks[i - 1]
-                if not track.audio:
-                    continue
-                offset = track.getIndex(1).absolute + 150
-                result.append(offset)
-            except IndexError:
-                pass
+        for i in range(0, len(self.tracks)):
+            track = self.tracks[i]
+            if not track.audio:
+                # if the data track is not at the end
+                if i < len(self.tracks) - 1:
+                    additional += 1
+                else:
+                    # if the data track is last
+                    dataTrackLast = True
+                    sectors = self.tracks[-1].getIndex(1).absolute - 11400
+                    # treat leadout offset as track 0 offset
+                    sectors += 150
+                continue
+            offset = track.getIndex(1).absolute + 150
+            offsets.append(offset)
+
+        if not dataTrackLast:
+            # the end of the last audio track, +1 since getTrackEnd returned
+            # value is always down by 1 unit. Which means that's actually
+            # offsets[-1] + getTrackLength(lastTrack).
+            sectors = self.getTrackEnd(lastTrack + additional) + 1 + 150
+
+        result.append(sectors)
+        result.extend(offsets)

        logger.debug('MusicBrainz values: %r', result)
        return result
--- a/whipper/test/test_image_toc.py
+++ b/whipper/test/test_image_toc.py
@@ -271,6 +271,13 @@ class CapitalMergeTestCase(common.TestCase):
        self.assertEqual(self.table.getFrameLength(), 173530)
        self.assertEqual(self.table.duration(), 2313733)

+    def testMusicBrainzDataTrackFirst(self):
+        self.table = copy.deepcopy(self.toc2.table)
+        self.table.merge(self.toc1.table)
+        print(self.table.tracks)
+        self.assertEqual(self.table.getMusicBrainzDiscId(),
+                         "QTYYFFAgNK4Np2EHjfPTBavqtw8-")
+

 class UnicodeTestCase(common.TestCase, common.UnicodeTestMixin):