Dopo-Goto-Downloader/download_tracks.py

#!/usr/bin/env python3
"""
Dopo Goto Music Downloader
Downloads music tracks and cover images from tracks.json
"""

####################################################################################################
# Copyright (C) 2026 by WallyHackenslacker wallyhackenslacker@noreply.git.hackenslacker.space      #
#                                                                                                  #
# Permission to use, copy, modify, and/or distribute this software for any purpose with or without #
# fee is hereby granted.                                                                           #
#                                                                                                  #
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS     #
# SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE  #
# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES      #
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,        #
# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE #
# OF THIS SOFTWARE.                                                                                #
####################################################################################################

import os
import re
import json
import time
import random
import requests
from urllib.parse import urlparse, unquote


def parse_js_object(content: str) -> dict:
    """Convert JavaScript object notation to valid JSON and parse it."""
    # Quote unquoted keys - keys appear after { , [ or at line start with whitespace
    # Match word characters that are followed by : but not already quoted
    content = re.sub(r'(^|[{\[,])\s*(\w+)(\s*:)', r'\1"\2"\3', content, flags=re.MULTILINE)

    # Remove trailing commas before ] or } (not valid in JSON)
    content = re.sub(r',(\s*[\]}])', r'\1', content)

    # Remove the outer braces and extract content
    content = content.strip()
    if content.startswith('{'):
        content = content[1:]
    if content.endswith('}'):
        content = content[:-1]

    # The structure has albums: {first_album}, {second_album}, ...
    # We need to wrap the albums in an array
    # Find "albums": and wrap everything after it in an array
    albums_match = re.search(r'"albums"\s*:\s*\{', content)
    if albums_match:
        # Find the start position after "albums":
        start_pos = albums_match.start()
        prefix = content[:start_pos]
        rest = content[start_pos:]

        # Replace "albums": { with "albums": [{
        rest = re.sub(r'"albums"\s*:\s*\{', '"albums": [{', rest, count=1)

        # Add closing bracket at the end
        rest = rest.rstrip().rstrip(',')
        rest += ']'

        content = prefix + rest

    # Wrap in braces to make valid JSON object
    content = '{' + content + '}'

    # Parse the JSON
    return json.loads(content)


def download_file(url: str, filepath: str) -> bool:
    """Download a file from URL to the specified filepath."""
    try:
        print(f"  Downloading: {os.path.basename(filepath)}")
        response = requests.get(url, stream=True, timeout=60)
        response.raise_for_status()

        with open(filepath, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)

        print(f"  Downloaded: {os.path.basename(filepath)}")
        return True
    except requests.RequestException as e:
        print(f"  Error downloading {url}: {e}")
        return False


def get_filename_from_url(url: str) -> str:
    """Extract and decode filename from URL."""
    parsed = urlparse(url)
    path = unquote(parsed.path)
    return os.path.basename(path)


def get_extension_from_url(url: str) -> str:
    """Extract file extension from URL."""
    filename = get_filename_from_url(url)
    _, ext = os.path.splitext(filename)
    return ext


def main():
    # Read the tracks.json file
    script_dir = os.path.dirname(os.path.abspath(__file__))
    tracks_file = os.path.join(script_dir, 'tracks.json')

    print(f"Reading {tracks_file}...")
    with open(tracks_file, 'r', encoding='utf-8') as f:
        content = f.read()

    # Parse the JavaScript object notation
    print("Parsing track data...")
    data = parse_js_object(content)
    albums = data.get('albums', [])

    print(f"Found {len(albums)} albums\n")

    # Process each album
    for album_idx, album in enumerate(albums, 1):
        album_name = album.get('name', f'Unknown Album {album_idx}')
        cover_url = album.get('cover', '')
        tracks = album.get('tracks', [])

        # Create album directory
        album_dir = os.path.join(script_dir, f"Dopo Goto - {album_name}")
        os.makedirs(album_dir, exist_ok=True)

        print(f"[{album_idx}/{len(albums)}] Processing: {album_name}")
        print(f"  Directory: {album_dir}")

        # Download cover image
        if cover_url:
            cover_ext = get_extension_from_url(cover_url)
            cover_filepath = os.path.join(album_dir, f"cover{cover_ext}")

            if os.path.exists(cover_filepath):
                print(f"  Cover already exists, skipping")
            else:
                download_file(cover_url, cover_filepath)
                # Random delay after download
                delay = random.uniform(0.5, 3.0)
                print(f"  Waiting {delay:.1f}s...")
                time.sleep(delay)

        # Download tracks
        for track_idx, track in enumerate(tracks, 1):
            track_name = track.get('name', f'Track {track_idx}')
            track_url = track.get('file', '')

            if not track_url:
                print(f"  Skipping track {track_idx}: no URL")
                continue

            # Get filename from URL
            track_filename = get_filename_from_url(track_url)
            track_filepath = os.path.join(album_dir, track_filename)

            if os.path.exists(track_filepath):
                print(f"  [{track_idx}/{len(tracks)}] Already exists: {track_filename}")
                continue

            print(f"  [{track_idx}/{len(tracks)}] {track_name}")
            download_file(track_url, track_filepath)

            # Random delay between downloads
            delay = random.uniform(0.5, 3.0)
            print(f"  Waiting {delay:.1f}s...")
            time.sleep(delay)

        print(f"  Album complete!\n")

    print("All downloads complete!")


if __name__ == '__main__':
    main()