diff --git a/cowboy bebop lofi.url b/cowboy bebop lofi.url new file mode 100755 index 0000000..5d0aa32 --- /dev/null +++ b/cowboy bebop lofi.url @@ -0,0 +1,5 @@ +[{000214A0-0000-0000-C000-000000000046}] +Prop3=19,11 +[InternetShortcut] +IDList= +URL=https://www.youtube.com/watch?v=Z-Rl3LhVakc diff --git a/download-soundcloud.sh b/download-soundcloud.sh new file mode 100755 index 0000000..495412f --- /dev/null +++ b/download-soundcloud.sh @@ -0,0 +1,8 @@ +#!/bin/bash +echo "Downloading $1" +source .env/bin/activate +scdl -l $1 + +./process_music.sh + +echo "Done!" diff --git a/download-spotify.sh b/download-spotify.sh new file mode 100755 index 0000000..c808b71 --- /dev/null +++ b/download-spotify.sh @@ -0,0 +1,8 @@ +#!/bin/bash +echo "Downloading spotify song $1" +source .env/bin/activate +python3 download_spotify.py $1 + +./process_music.sh + +echo "Done!" diff --git a/download-youtube.sh b/download-youtube.sh new file mode 100755 index 0000000..b11e219 --- /dev/null +++ b/download-youtube.sh @@ -0,0 +1,8 @@ +#!/bin/bash +echo "Downloading youtube song $1" +source .env/bin/activate +python3 download_youtube.py $1 + +./process_music.sh + +echo "Done!" diff --git a/download_soundcloud.py b/download_soundcloud.py new file mode 100755 index 0000000..95471e4 --- /dev/null +++ b/download_soundcloud.py @@ -0,0 +1,18 @@ +import sys +from subprocess import call +import subprocess +def main(): + if (len(sys.argv) != 2): + print("Wrong parameters!") + sys.exit(-1) + url = sys.argv[1].strip() + print("passing link " + str(url)) + if ("soundcloud" not in url): + print("Url has to be a soundcloud link!") + sys.exit(-1) + print("executing command") + call("cd /home/sem/TODO\ MUSIC/",shell=True) + call("./download-soundcloud.sh " + url,shell=True) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/download_spotify.py b/download_spotify.py new file mode 100755 index 0000000..17668e0 --- /dev/null +++ b/download_spotify.py @@ -0,0 +1,26 @@ +import sys +from subprocess import call +import re + +url_pattern = "^https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*)$" + +def main(): + if (len(sys.argv) != 2): + print("Wrong parameters!") + sys.exit(-1) + url = sys.argv[1].strip() + print("passing link " + str(url)) + if (re.match(url_pattern,url) == None): + print("Url has to be a valid link!") + sys.exit(-1) + + if ("spotify" not in url or "https://" not in url): + print("Url has to be a spotify link!") + sys.exit(-1) + print("executing command") + call("cd \"/home/sem/TODO MUSIC\"", shell=True) + call("spotdl download " + url, shell=True, user="sem") +# call("./download-spotify.sh " + url, shell=True, user="sem") # run as user sem to have the SPOTIPY_CLIENT_ID and SPOTIPY_CLIENT_SECRET environment variables set + +if __name__ == "__main__": + main() diff --git a/download_youtube.py b/download_youtube.py new file mode 100644 index 0000000..94546fd --- /dev/null +++ b/download_youtube.py @@ -0,0 +1,18 @@ +from __future__ import unicode_literals +import yt_dlp +import sys + +ydl_opts = { + 'format': 'bestaudio/best', + 'postprocessors': [{ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'mp3', + 'preferredquality': '0', # 0 = best quality + }], +} + +url = sys.argv[1] +print("Downloading youtube song " + url) + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([url]) diff --git a/example result.txt b/example result.txt new file mode 100755 index 0000000..ad51798 --- /dev/null +++ b/example result.txt @@ -0,0 +1,888 @@ +[ + { + "album": { + "album_type": "compilation", + "artists": [ + { + "external_urls": { + "spotify": "https://open.spotify.com/artist/7dGJo4pcD2V6oG8kP0tJRR" + }, + "href": "https://api.spotify.com/v1/artists/7dGJo4pcD2V6oG8kP0tJRR", + "id": "7dGJo4pcD2V6oG8kP0tJRR", + "name": "Eminem", + "type": "artist", + "uri": "spotify:artist:7dGJo4pcD2V6oG8kP0tJRR", + } + ], + "available_markets": [ + "AD", + "AE", + "AG", + "AL", + "AM", + "AO", + "AR", + "AT", + "AU", + "AZ", + "BA", + "BB", + "BD", + "BE", + "BF", + "BG", + "BH", + "BI", + "BJ", + "BN", + "BO", + "BR", + "BS", + "BT", + "BW", + "BY", + "BZ", + "CA", + "CD", + "CG", + "CH", + "CI", + "CL", + "CM", + "CO", + "CR", + "CV", + "CW", + "CY", + "CZ", + "DE", + "DJ", + "DK", + "DM", + "DO", + "DZ", + "EC", + "EE", + "EG", + "ES", + "FI", + "FJ", + "FM", + "FR", + "GA", + "GB", + "GD", + "GE", + "GH", + "GM", + "GN", + "GQ", + "GR", + "GT", + "GW", + "GY", + "HK", + "HN", + "HR", + "HT", + "HU", + "ID", + "IE", + "IL", + "IN", + "IQ", + "IS", + "IT", + "JM", + "JO", + "JP", + "KE", + "KG", + "KH", + "KI", + "KM", + "KN", + "KR", + "KW", + "KZ", + "LA", + "LB", + "LC", + "LI", + "LK", + "LR", + "LS", + "LT", + "LU", + "LV", + "LY", + "MA", + "MC", + "MD", + "ME", + "MG", + "MH", + "MK", + "ML", + "MN", + "MO", + "MR", + "MT", + "MU", + "MV", + "MW", + "MX", + "MY", + "MZ", + "NA", + "NE", + "NG", + "NI", + "NL", + "NO", + "NP", + "NR", + "NZ", + "OM", + "PA", + "PE", + "PG", + "PH", + "PK", + "PL", + "PS", + "PT", + "PW", + "PY", + "QA", + "RO", + "RS", + "RW", + "SA", + "SB", + "SC", + "SE", + "SG", + "SI", + "SK", + "SL", + "SM", + "SN", + "SR", + "ST", + "SV", + "SZ", + "TD", + "TG", + "TH", + "TJ", + "TL", + "TN", + "TO", + "TR", + "TT", + "TV", + "TW", + "TZ", + "UA", + "UG", + "US", + "UY", + "UZ", + "VC", + "VE", + "VN", + "VU", + "WS", + "XK", + "ZA", + "ZM", + "ZW", + ], + "external_urls": { + "spotify": "https://open.spotify.com/album/5qENHeCSlwWpEzb25peRmQ" + }, + "href": "https://api.spotify.com/v1/albums/5qENHeCSlwWpEzb25peRmQ", + "id": "5qENHeCSlwWpEzb25peRmQ", + "images": [ + { + "height": 640, + "url": "https://i.scdn.co/image/ab67616d0000b273eab40fc794b88b9d1e012578", + "width": 640, + }, + { + "height": 300, + "url": "https://i.scdn.co/image/ab67616d00001e02eab40fc794b88b9d1e012578", + "width": 300, + }, + { + "height": 64, + "url": "https://i.scdn.co/image/ab67616d00004851eab40fc794b88b9d1e012578", + "width": 64, + }, + ], + "name": "Curtain Call: The Hits (Deluxe Edition)", + "release_date": "2005-12-06", + "release_date_precision": "day", + "total_tracks": 24, + "type": "album", + "uri": "spotify:album:5qENHeCSlwWpEzb25peRmQ", + }, + "artists": [ + { + "external_urls": { + "spotify": "https://open.spotify.com/artist/7dGJo4pcD2V6oG8kP0tJRR" + }, + "href": "https://api.spotify.com/v1/artists/7dGJo4pcD2V6oG8kP0tJRR", + "id": "7dGJo4pcD2V6oG8kP0tJRR", + "name": "Eminem", + "type": "artist", + "uri": "spotify:artist:7dGJo4pcD2V6oG8kP0tJRR", + } + ], + "available_markets": [ + "AD", + "AE", + "AG", + "AL", + "AM", + "AO", + "AR", + "AT", + "AU", + "AZ", + "BA", + "BB", + "BD", + "BE", + "BF", + "BG", + "BH", + "BI", + "BJ", + "BN", + "BO", + "BR", + "BS", + "BT", + "BW", + "BY", + "BZ", + "CA", + "CD", + "CG", + "CH", + "CI", + "CL", + "CM", + "CO", + "CR", + "CV", + "CW", + "CY", + "CZ", + "DE", + "DJ", + "DK", + "DM", + "DO", + "DZ", + "EC", + "EE", + "EG", + "ES", + "FI", + "FJ", + "FM", + "FR", + "GA", + "GB", + "GD", + "GE", + "GH", + "GM", + "GN", + "GQ", + "GR", + "GT", + "GW", + "GY", + "HK", + "HN", + "HR", + "HT", + "HU", + "ID", + "IE", + "IL", + "IN", + "IQ", + "IS", + "IT", + "JM", + "JO", + "JP", + "KE", + "KG", + "KH", + "KI", + "KM", + "KN", + "KR", + "KW", + "KZ", + "LA", + "LB", + "LC", + "LI", + "LK", + "LR", + "LS", + "LT", + "LU", + "LV", + "LY", + "MA", + "MC", + "MD", + "ME", + "MG", + "MH", + "MK", + "ML", + "MN", + "MO", + "MR", + "MT", + "MU", + "MV", + "MW", + "MX", + "MY", + "MZ", + "NA", + "NE", + "NG", + "NI", + "NL", + "NO", + "NP", + "NR", + "NZ", + "OM", + "PA", + "PE", + "PG", + "PH", + "PK", + "PL", + "PS", + "PT", + "PW", + "PY", + "QA", + "RO", + "RS", + "RW", + "SA", + "SB", + "SC", + "SE", + "SG", + "SI", + "SK", + "SL", + "SM", + "SN", + "SR", + "ST", + "SV", + "SZ", + "TD", + "TG", + "TH", + "TJ", + "TL", + "TN", + "TO", + "TR", + "TT", + "TV", + "TW", + "TZ", + "UA", + "UG", + "US", + "UY", + "UZ", + "VC", + "VE", + "VN", + "VU", + "WS", + "XK", + "ZA", + "ZM", + "ZW", + ], + "disc_number": 1, + "duration_ms": 205760, + "explicit": True, + "external_ids": {"isrc": "USUM70506387"}, + "external_urls": { + "spotify": "https://open.spotify.com/track/0fMWtRhzOoWoCo7dRHVEEa" + }, + "href": "https://api.spotify.com/v1/tracks/0fMWtRhzOoWoCo7dRHVEEa", + "id": "0fMWtRhzOoWoCo7dRHVEEa", + "is_local": False, + "name": "FACK", + "popularity": 63, + "preview_url": None, + "track_number": 2, + "type": "track", + "uri": "spotify:track:0fMWtRhzOoWoCo7dRHVEEa", + }, + { + "album": { + "album_type": "compilation", + "artists": [ + { + "external_urls": { + "spotify": "https://open.spotify.com/artist/7dGJo4pcD2V6oG8kP0tJRR" + }, + "href": "https://api.spotify.com/v1/artists/7dGJo4pcD2V6oG8kP0tJRR", + "id": "7dGJo4pcD2V6oG8kP0tJRR", + "name": "Eminem", + "type": "artist", + "uri": "spotify:artist:7dGJo4pcD2V6oG8kP0tJRR", + } + ], + "available_markets": [ + "AD", + "AE", + "AG", + "AL", + "AM", + "AO", + "AR", + "AT", + "AU", + "AZ", + "BA", + "BB", + "BD", + "BE", + "BF", + "BG", + "BH", + "BI", + "BJ", + "BN", + "BO", + "BR", + "BS", + "BT", + "BW", + "BY", + "BZ", + "CA", + "CD", + "CG", + "CH", + "CI", + "CL", + "CM", + "CO", + "CR", + "CV", + "CW", + "CY", + "CZ", + "DE", + "DJ", + "DK", + "DM", + "DO", + "DZ", + "EC", + "EE", + "EG", + "ES", + "FI", + "FJ", + "FM", + "FR", + "GA", + "GB", + "GD", + "GE", + "GH", + "GM", + "GN", + "GQ", + "GR", + "GT", + "GW", + "GY", + "HK", + "HN", + "HR", + "HT", + "HU", + "ID", + "IE", + "IL", + "IN", + "IQ", + "IS", + "IT", + "JM", + "JO", + "JP", + "KE", + "KG", + "KH", + "KI", + "KM", + "KN", + "KR", + "KW", + "KZ", + "LA", + "LB", + "LC", + "LI", + "LK", + "LR", + "LS", + "LT", + "LU", + "LV", + "LY", + "MA", + "MC", + "MD", + "ME", + "MG", + "MH", + "MK", + "ML", + "MN", + "MO", + "MR", + "MT", + "MU", + "MV", + "MW", + "MX", + "MY", + "MZ", + "NA", + "NE", + "NG", + "NI", + "NL", + "NO", + "NP", + "NR", + "NZ", + "OM", + "PA", + "PE", + "PG", + "PH", + "PK", + "PL", + "PS", + "PT", + "PW", + "PY", + "QA", + "RO", + "RS", + "RW", + "SA", + "SB", + "SC", + "SE", + "SG", + "SI", + "SK", + "SL", + "SM", + "SN", + "SR", + "ST", + "SV", + "SZ", + "TD", + "TG", + "TH", + "TJ", + "TL", + "TN", + "TO", + "TR", + "TT", + "TV", + "TW", + "TZ", + "UA", + "UG", + "US", + "UY", + "UZ", + "VC", + "VE", + "VN", + "VU", + "WS", + "XK", + "ZA", + "ZM", + "ZW", + ], + "external_urls": { + "spotify": "https://open.spotify.com/album/1zUY4PAFrNUOCeaEBrEHzh" + }, + "href": "https://api.spotify.com/v1/albums/1zUY4PAFrNUOCeaEBrEHzh", + "id": "1zUY4PAFrNUOCeaEBrEHzh", + "images": [ + { + "height": 640, + "url": "https://i.scdn.co/image/ab67616d0000b2733686f349e17589993adacddd", + "width": 640, + }, + { + "height": 300, + "url": "https://i.scdn.co/image/ab67616d00001e023686f349e17589993adacddd", + "width": 300, + }, + { + "height": 64, + "url": "https://i.scdn.co/image/ab67616d000048513686f349e17589993adacddd", + "width": 64, + }, + ], + "name": "Curtain Call: The Hits", + "release_date": "2005-12-06", + "release_date_precision": "day", + "total_tracks": 17, + "type": "album", + "uri": "spotify:album:1zUY4PAFrNUOCeaEBrEHzh", + }, + "artists": [ + { + "external_urls": { + "spotify": "https://open.spotify.com/artist/7dGJo4pcD2V6oG8kP0tJRR" + }, + "href": "https://api.spotify.com/v1/artists/7dGJo4pcD2V6oG8kP0tJRR", + "id": "7dGJo4pcD2V6oG8kP0tJRR", + "name": "Eminem", + "type": "artist", + "uri": "spotify:artist:7dGJo4pcD2V6oG8kP0tJRR", + } + ], + "available_markets": [ + "AD", + "AE", + "AG", + "AL", + "AM", + "AO", + "AR", + "AT", + "AU", + "AZ", + "BA", + "BB", + "BD", + "BE", + "BF", + "BG", + "BH", + "BI", + "BJ", + "BN", + "BO", + "BR", + "BS", + "BT", + "BW", + "BY", + "BZ", + "CA", + "CD", + "CG", + "CH", + "CI", + "CL", + "CM", + "CO", + "CR", + "CV", + "CW", + "CY", + "CZ", + "DE", + "DJ", + "DK", + "DM", + "DO", + "DZ", + "EC", + "EE", + "EG", + "ES", + "FI", + "FJ", + "FM", + "FR", + "GA", + "GB", + "GD", + "GE", + "GH", + "GM", + "GN", + "GQ", + "GR", + "GT", + "GW", + "GY", + "HK", + "HN", + "HR", + "HT", + "HU", + "ID", + "IE", + "IL", + "IN", + "IQ", + "IS", + "IT", + "JM", + "JO", + "JP", + "KE", + "KG", + "KH", + "KI", + "KM", + "KN", + "KR", + "KW", + "KZ", + "LA", + "LB", + "LC", + "LI", + "LK", + "LR", + "LS", + "LT", + "LU", + "LV", + "LY", + "MA", + "MC", + "MD", + "ME", + "MG", + "MH", + "MK", + "ML", + "MN", + "MO", + "MR", + "MT", + "MU", + "MV", + "MW", + "MX", + "MY", + "MZ", + "NA", + "NE", + "NG", + "NI", + "NL", + "NO", + "NP", + "NR", + "NZ", + "OM", + "PA", + "PE", + "PG", + "PH", + "PK", + "PL", + "PS", + "PT", + "PW", + "PY", + "QA", + "RO", + "RS", + "RW", + "SA", + "SB", + "SC", + "SE", + "SG", + "SI", + "SK", + "SL", + "SM", + "SN", + "SR", + "ST", + "SV", + "SZ", + "TD", + "TG", + "TH", + "TJ", + "TL", + "TN", + "TO", + "TR", + "TT", + "TV", + "TW", + "TZ", + "UA", + "UG", + "US", + "UY", + "UZ", + "VC", + "VE", + "VN", + "VU", + "WS", + "XK", + "ZA", + "ZM", + "ZW", + ], + "disc_number": 1, + "duration_ms": 205760, + "explicit": True, + "external_ids": {"isrc": "USUM70506387"}, + "external_urls": { + "spotify": "https://open.spotify.com/track/3vllUzZChqpEaAat9brOYV" + }, + "href": "https://api.spotify.com/v1/tracks/3vllUzZChqpEaAat9brOYV", + "id": "3vllUzZChqpEaAat9brOYV", + "is_local": False, + "name": "FACK", + "popularity": 42, + "preview_url": None, + "track_number": 2, + "type": "track", + "uri": "spotify:track:3vllUzZChqpEaAat9brOYV", + }, +] diff --git a/make_folders.py b/make_folders.py new file mode 100755 index 0000000..755be0e --- /dev/null +++ b/make_folders.py @@ -0,0 +1,765 @@ +from ast import keyword +import sys +import mutagen +import eyed3 +from eyed3.id3.frames import ImageFrame +from mutagen.flac import FLAC +from mutagen.mp3 import MP3 +from mutagen.id3 import ID3 +from mutagen.wave import WAVE +from mutagen.oggvorbis import OggVorbis +from mutagen.id3 import ID3, TIT2, TALB, TPE1, TPE2, COMM, TCOM, TCON, TDRC, TRCK, TPUB, POPM, APIC +from mutagen.easyid3 import EasyID3 +import os +from os import listdir +from os.path import isfile, join + +from icrawler.builtin import GoogleImageCrawler + +import spotipy +from spotipy.oauth2 import SpotifyClientCredentials + +from datetime import datetime + +import requests +import time + +import logging +logging.basicConfig( + level=logging.INFO, + format="{asctime} - {levelname} - [{funcName}:{lineno}] - {message}", + style="{", + datefmt="%Y-%m-%d %H:%M", + ) + +# TODO check if title is correct (not like "artist - name") + +def switch_ID3_flag_tag(audio, ID3_tag, flac_tag): + try: + audio[flac_tag] = str(audio[ID3_tag][0].text[0]) + logging.info("switched ID3 tag " + ID3_tag + " to flac tag " + flac_tag + " with value " + str(audio[flac_tag]) + ", type " + str(type(audio[flac_tag]))) + audio.pop(ID3_tag, None) + except: + logging.info("could not switch ID3 tag " + ID3_tag + " to flac tag " + flac_tag) + +def remove_flac_ID3_tags(audio, x: str): + if x.endswith(".flac"): + logging.info("switching ID3 tags to flac tags for file " + x) + + switch_ID3_flag_tag(audio,"TPE2","BAND") + switch_ID3_flag_tag(audio, "TPE1", "ARTIST") + switch_ID3_flag_tag(audio, "TIT2", "TITLE") + switch_ID3_flag_tag(audio, "TALB", "ALBUM") + switch_ID3_flag_tag(audio, "COMM", "COMMENT") + switch_ID3_flag_tag(audio, "TCOM", "COMPOSER") + switch_ID3_flag_tag(audio, "TCON", "GENRE") + switch_ID3_flag_tag(audio, "TRCK", "TRACKNUMBER") + switch_ID3_flag_tag(audio, "TDRC", "DATE") + switch_ID3_flag_tag(audio, "TPUB", "PUBLISHER") + + audio.pop("POPM", None) + audio.pop("APIC", None) + + + +def make_folder(foldername): + try: + logging.info("Creating folder " + foldername) + if "/" in foldername: + folders = foldername.split('/') + pos = "." + for fold in folders: + pos = join(pos,fold) + if not os.path.exists(pos): + os.mkdir(pos) + else: + os.mkdir(foldername) + except Exception as err: + logging.error("could not create folder " + foldername) + logging.error(err) + + +def search_google_images_and_save(x: str, audio): + if x.endswith(".flac"): + remove_flac_ID3_tags(audio,x) + + audio.save(x) + + found_image = False + if (check_tag(audio, x, "TALB","album")): + # search for artist and album + if x.endswith(".flac"): + songpath = join(".",str(audio["artist"]),str(audio["album"])) + else: + songpath = join(".",str(audio["TPE2"]),str(audio["TALB"])) + if "\x00" in songpath: + # having null bytes in os.replace will throw an error + songpath = songpath.replace("\x00",", ") + make_folder(songpath) + os.replace(join(".",x),join(songpath,x)) + google_keyword = "" + if x.endswith(".flac"): + google_keyword = str(audio["artist"]) + " " + str(audio["album"]) + " album" + else: + google_keyword = str(audio["TPE2"]) + " " + str(audio["TALB"]) + " album" + logging.info("Moved file! Now searching for album art... keyword is " + google_keyword) + google_Crawler = GoogleImageCrawler(storage = {'root_dir': songpath}) + try: + google_Crawler.crawl(keyword = str(audio["TPE2"]) + " " + str(audio["TALB"]) + " album", max_num = 1) + found_image = True + except: + logging.info("could not find Google result by album, searching by track and artist") + if (found_image): + logging.info("changing name of cover art file...") + logging.info(songpath) + for f in listdir(songpath): + logging.info(f) + if (isfile(join(songpath,f)) and f.split(".")[-1].lower() in ["jpg","png"]): + os.replace(join(songpath,f),join(songpath,"Cover." + f.split(".")[-1].lower())) + logging.info("Done!") + #TODO search for song name and artist if album not found on google images + # else: + # songpath = join(".",str(audio["TPE1"]),str(audio["TIT2"])) + # make_folder(songpath) + # os.replace(join(".",x),join(songpath,x)) + # logging.info("Moved file! Now searching for album art... keyword is " + str(audio["TPE1"]) + " " + str(audio["TIT2"])) + # google_Crawler = GoogleImageCrawler(storage = {'root_dir': songpath}) + + + else: + # search for song name and artist + # search for artist and album + # use TPE2 (album artist), because we want to find the album + songpath = join(".",str(audio["TPE2"]),str(audio["TIT2"])) + make_folder(songpath) + os.replace(join(".",x),join(songpath,x)) + logging.info("Moved file! Now searching for album art... keyword is " + str(audio["TPE2"]) + " " + str(audio["TIT2"])) + google_Crawler = GoogleImageCrawler(storage = {'root_dir': songpath}) + try: + google_Crawler.crawl(keyword = str(audio["TPE2"]) + " " + str(audio["TIT2"]), max_num = 1) + found_image = True + except: + logging.info("could not find Google result by track and artist") + if (found_image): + logging.info("changing name of cover art file...") + logging.info(songpath) + for f in listdir(songpath): + logging.info(f) + if (isfile(join(songpath,f)) and f.split(".")[-1].lower() in ["jpg","png"]): + os.replace(join(songpath,f),join(songpath,"Cover." + f.split(".")[-1].lower())) + logging.info("Done!") + +# TIT2 = title, +# TPE1 = artist, +# TPE2 = band, +# TALB = album, +# COMM = comment, +# TCOM = composer, +# TCON = genre, +# TRCK = number, +# TDRC = year, +# TPUB = publisher + +def create_ID3_tag(audio, tagname: str, textvalue: str): + logging.info("creating ID3 tag " + tagname + " with value " + textvalue) + if tagname == "TALB": + audio[tagname] = TALB(encoding=3,text=textvalue) + elif tagname == "TIT2": + audio[tagname] = TIT2(encoding=3,text=textvalue) + elif tagname == "TPE1": + audio[tagname] = TPE1(encoding=3,text=textvalue) + elif tagname == "TPE2": + audio[tagname] = TPE2(encoding=3,text=textvalue) + elif tagname == "COMM": + audio[tagname] = COMM(encoding=3,text=textvalue) + elif tagname == "TCOM": + audio[tagname] = TCOM(encoding=3,text=textvalue) + elif tagname == "TCON": + audio[tagname] = TCON(encoding=3,text=textvalue) + elif tagname == "TRCK": + audio[tagname] = TRCK(encoding=3,text=textvalue) + elif tagname == "TDRC": + try: + audio[tagname] = TDRC(encoding=3,text=textvalue) + except: + pass + elif tagname == "TPUB": + audio[tagname] = TPUB(encoding=3,text=textvalue) + +def check_tag(audio, filename: str, ID3_tag: str, normal_tag) -> bool: + res = False + + # check if the ID3 artist tag exists + if (ID3_tag in audio.keys() and len(str(audio[ID3_tag])) != 0): + logging.info(ID3_tag + " ID3 tag found! " + str(audio[ID3_tag])) + + # apply it to the general album tag + if audio[ID3_tag] is not str and filename.endswith(".mp3"): + audio[normal_tag] = audio[ID3_tag] + else: + audio[normal_tag] = audio[ID3_tag] + logging.info("Set " + normal_tag + " to " + str(audio[normal_tag])) + res = True + + # check if general album tag exists + elif (normal_tag in audio.keys() and len(str(audio[normal_tag])) != 0): + logging.info(normal_tag + " normal tag found! " + str(audio[normal_tag])) + if audio[normal_tag] is not str: + audio[normal_tag] = audio[normal_tag][0] + logging.info("normal tag is not str, set it to " + str(audio[normal_tag][0])) + if (not filename.endswith(".flac")): + #apply it to the ID3 tag + if audio[normal_tag] is not str: + create_ID3_tag(audio, ID3_tag,audio[normal_tag][0]) + else: + create_ID3_tag(audio, ID3_tag,audio[normal_tag]) + else: + logging.debug(filename + " is a flac file, not creating ID3 tag") + res = True + + return res + +def check_title_songname(x: str, audio): + logging.info("checking title by name " + x) + extension = x.split(".")[-1].lower() + if (extension in ["mp3","flac","ogg","wav","m4a","mp4"]): + x = x.rsplit(".",1)[0] # remove the file extension + logging.info("file has extension " + extension + ". removing it from title. New title: " + x) + + items = x.split(" - ") + if (len(items) > 2): + logging.info("song title has more than 1 part after the -: " + str(items)) + if (items[1].count(".mp3") >= 1): + logging.info("setting TIT2 tag to: " + str(items[1].strip().rstrip().rsplit(".")[0])) # get second part of title and remove the file extension + audio["TIT2"] = TIT2(encoding=3,text=str(items[1].strip().rstrip().rsplit(".")[0])) + elif (items[1].count(".flac") >= 1): + logging.info("setting title tag to " + str(items[1].strip().rstrip().rsplit(".")[0])) # get second part of title and remove the file extension + audio["title"] = str(items[1].strip().rstrip().rsplit(".")[0]) + else: + logging.info("title: " + str(items[1].strip().rstrip())) # get second part of title and remove the file extension + audio["TIT2"] = TIT2(encoding=3,text=str(items[1].strip().rstrip())) + audio["title"] = TIT2(encoding=3,text=str(items[1].strip().rstrip())) + else: + logging.info("song title has only 1 part after the -: " + items[1]) + if ("TIT2" not in audio.keys()): + song_title = items[1].strip().rstrip() + logging.info("TIT2 tag not found, creating it. Using song title: " + song_title) + audio["TIT2"] = TIT2(encoding=3,text=song_title) + audio["title"] = TIT2(encoding=3,text=song_title) + +def check_for_multiple_artists(audio, filename: str, name: str): + logging.info("checking for multiple artists for name " + name) + artists = [] + if (" x " in name): + artists = name.split(" x ") + elif (" X " in name): + artists = name.split(" X ") + elif ("," in name): + artists = name.split(",") + elif ("/" in name): + artists = name.split("/") + elif ("\x00" in name): + artists = name.split("\x00") + + if (len(artists) > 0): + logging.info("multiple artists: " + str(artists)) + if filename.endswith(".flac"): + audio["artist"] = TPE2(encoding=3,text=["\0".join(artists)]) + else: + audio["TPE1"] = TPE2(encoding=3,text=["\0".join(artists)]) + else: + logging.info("no multiple artists found in name " + name) + + +# checks for any artist from the song name. If it exists it sets the properties of the file +def check_artist_songname(x: str, audio): + items = x.split(" - ") + logging.info("Checking artist by name. items: " + str(items)) + if (len(items) > 2): + check_for_multiple_artists(audio, x, items[0].strip()) + + else: # no multiple artists in name + logging.info("Setting artist tags TPE1 and TPE2 to " + str(items[0].strip().rstrip())) + + audio["TPE1"] = TPE1(encoding=3,text=str(items[0].strip().rstrip())) + audio["TPE2"] = TPE2(encoding=3,text=str(items[0].strip().rstrip())) + audio["artist"] = audio["TPE1"] + +def check_artist(audio, filename: str) -> bool: + res = False + + # check if the ID3 artist tag exists + if ("TPE1" in audio.keys()): + if (len(str(audio["TPE1"])) != 0): + logging.info("TPE1 tag was found! " + str(audio["TPE1"])) + + # apply it to general artist tag + audio["TPE2"] = TPE2(encoding=3,text=str(audio["TPE1"])) + audio["artist"] = audio["TPE1"] + logging.info("checking for multiple artists") + check_for_multiple_artists(audio, filename ,str(audio["TPE1"])) + res = True + + + # if no TPE1, check if the ID3 band tag exists + elif ("TPE2" in audio.keys()): + if (len(str(audio["TPE2"])) != 0): + logging.info("TPE2 tag was found! " + str(audio["TPE2"])) + + # apply it to TPE1 and general artist tags + audio["TPE1"] = TPE1(encoding=3,text=str(audio["TPE2"])) + audio["artist"] = audio["TPE1"] + check_for_multiple_artists(audio,filename,str(audio["TPE2"])) + res = True + + # check if artist audio tag exists + elif ("artist" in audio.keys()): + if (len(str(audio["artist"])) != 0): + logging.info("artist tag was found! " + str(audio["artist"])) + + artist = "" + if (audio["artist"] is not str): + artist = audio["artist"][0] + else: + artist = audio["artist"] + + logging.info("artist: " + artist) + # apply to both ID3 artist tags + audio["TPE1"] = TPE1(encoding=3,text=artist) + audio["TPE2"] = TPE2(encoding=3,text=artist) + + if (audio["TPE2"][0].text is not str): + audio["TPE2"][0].text = str(audio["TPE2"][0].text[0]) + + if (audio["TPE1"] is not str): + audio["TPE1"] = audio["TPE1"][0] + + logging.info("Set TPE1 and TPE2 to " + str(audio["TPE1"][0]) + " and " + str(audio["TPE2"][0])) + check_for_multiple_artists(audio,filename,artist) + res = True + + return res + +def check_spotify_genre(genres,audio): + genre = "" + if (len(genres) > 0): + if (len(genres) == 1): + audio["TCON"] = TCON(encoding=3,text=str(genres[0])) + else: + for i in range(len(genres)): + if (i == 0): + genre = str(genres[i]) + else: + genre += "," + str(genres[i]) + + logging.info("genre set to " + genre) + audio["TCON"] = TCON(encoding=3,text=genre) + + audio["genre"] = audio["TCON"] + +def embed_music_file(audiostr: str, coverfile: str): + try: + new_audio = ID3(audiostr) + with open(coverfile,'rb') as albumart: + new_audio.add(APIC( + encoding=3, + mime='image/jpeg', + type=3, desc=u'Cover image', + data=albumart.read() + )) + new_audio.save(audiostr) + logging.info("Finished!") + except: + logging.info("could not embed music file") + + +def check_spotify_album_and_save(spotify, audio,x: str) -> bool: + found = False + logging.info("Searching on spotify for album...") + querystring = "" + if x.endswith(".flac"): + querystring = "artist:{0} album:{1}".format(str(audio["artist"]),str(audio["album"])) + else: + querystring = "artist:{0} album:{1}".format(str(audio["TPE2"]),str(audio["TALB"])) + + logging.info("query string: " + querystring) + try: + results = spotify.search(q=querystring,type='album') + except Exception as err: + logging.error("could not search on spotify") + logging.error(err) + logging.info("waiting 30 seconds before trying again") + time.sleep(30) + try: + results = spotify.search(q=querystring,type='album') + except Exception as err: + logging.error("could not search on spotify again") + logging.error(err) + return found + + if (len(results["albums"]["items"]) > 0): + logging.info("album found on spotify!") + found = True + album = results["albums"]["items"][0] + album_artist = album["artists"][0]["name"] + + if (x.endswith(".flac")): + try: + if str(audio["album_artist"]) != album_artist: + audio["album_artist"] = album_artist + except: + audio["album_artist"] = album_artist + else: + if (str(audio["TPE2"]) != album_artist): + audio["TPE2"] = TPE2(encoding=3,text=album_artist) + + album_image_url = album["images"][0]["url"] + album_name = album["name"] + + if x.endswith(".flac"): + if str(audio["album"] != album_name): + audio["album"] = album_name + elif (str(audio["TALB"]) != album_name): + audio["TALB"] = TALB(encoding=3,text=album_name) + + if (x.endswith(".flac")): + audio["year"] = album["release_date"] + else: + audio["TDRC"] = TDRC(encoding=3,text=album["release_date"]) + + artist_search = spotify.artist(album['artists'][0]['external_urls']['spotify']) + logging.info("genres: " + str(artist_search['genres'])) + check_spotify_genre(artist_search['genres'],audio) + + comment ="Spotify ID: {0}. Release date precision: {1}, total tracks in album: {2}. This album has {3} version(s)".format(album["id"],album["release_date_precision"], album["total_tracks"],len(results["albums"]["items"])) + logging.info("Comment: " + comment) + if x.endswith(".flac"): + audio["comment"] = comment + else: + audio["COMM"] = COMM(encoding=3,text=comment) + + if x.endswith(".flac"): + remove_flac_ID3_tags(audio,x) + + audio.save(x) + + songpath = "" + + if x.endswith(".flac"): + songpath = join(".",str(audio["artist"][0]),str(audio["album"][0])) + make_folder(join(".",str(audio["artist"][0]))) + else: + if "/" in audio["TPE2"]: + audio["TPE2"] = audio["TPE2"].replace("/","") + songpath = join(".",str(audio["TPE2"]),str(audio["TALB"])) + make_folder(join(".",str(audio["TPE2"]))) + + + + if (not x.endswith(".flac")): + if ("/" in str(audio["TALB"])): + logging.info("album contains /") + folders = str(audio["TALB"]).split('/') + logging.info(folders) + pos = join(".",str(audio["TPE2"])) + for fold in folders: + make_folder(join(pos,fold)) + pos = join(pos,fold) + logging.info(pos) + + make_folder(songpath) + os.replace(join(".",x),join(songpath,x)) + logging.info("moved song file, now downloading cover art") + + img_data = requests.get(str(album_image_url)).content + with open(join(songpath,"Cover.jpg"),'wb') as handler: + handler.write(img_data) + logging.info("done getting cover art!") + + logging.info("now setting cover art..") + embed_music_file(join(songpath,x),join(songpath,"Cover.jpg")) + else: + logging.info("No album found on spotify") + return found + +def check_spotify_and_save(spotify, audio,x: str) -> bool: + found = False + logging.info("Searching spotify for file " + x) + artist = "" + track = "" + if x.endswith(".flac"): + if audio["artist"] is not str: + artist = str(audio["artist"][0]) + else: + artist = str(audio["artist"]) + if audio["title"] is not str: + track = str(audio["title"][0]) + else: + track = str(audio["title"]) + else: + if audio["TPE2"] is not str: + logging.info("TPE2 is not str") + artist = str(audio["TPE2"][0]) + else: + artist = str(audio["TPE2"]) + + if audio["TIT2"] is not str: + track = str(audio["TIT2"][0]) + else: + track = str(audio["TIT2"]) + + querystring = "artist:{0} track:{1}".format(artist.split("\00")[0],track) + logging.info("query string: " + querystring) + results = spotify.search(q=querystring,type='track') + if (len(results['tracks']['items']) > 0): + logging.info("track found on spotify!") + found = True + album = results['tracks']['items'][0]["album"] + found_artist = album["artists"][0]["name"] + if (found_artist != artist): + logging.info("Changing album artist from " + artist + " to " + found_artist) + if x.endswith(".flac"): + audio["album_artist"] = found_artist + else: + audio["TPE2"] = TPE2(encoding=3,text=found_artist) + found_album = album["name"] + if (len(found_album) > 0): + if x.endswith(".flac"): + audio["album"] = found_album + else: + audio["TALB"] = TALB(encoding=3,text=found_album) + else: + if (x.endswith(".flac")): + audio["album"] = audio["title"][0] + else: + audio["TALB"] = TALB(encoding=3,text=str(audio["TIT2"])) + + comment ="Spotify ID: {0}. Release date precision: {1}, total tracks in album: {2}. This album has {3} version(s)".format(album["id"],album["release_date_precision"], album["total_tracks"],len(results["albums"]["items"])) + logging.info("Comment: " + comment) + if x.endswith(".flac"): + audio["comment"] = comment + else: + audio["COMM"] = COMM(encoding=3,text=comment) + + try: + year = str(datetime.strptime(album["release_date"], '%Y-%m-%d').year) + + if x.endswith(".flac"): + audio["year"] = year + else: + audio["TDRC"] = TDRC(encoding=3,text=year) + + except Exception as err: + logging.info(err) + year = str(album["release_date"]) + audio["year"] = year + + + if x.endswith(".flac"): + audio["TRACKNUMBER"] = str(results['tracks']['items'][0]["track_number"]) +"/" + str(album["total_tracks"]) + else: + audio["TRCK"] = TRCK(encoding=3,text=str(results['tracks']['items'][0]["track_number"]) +"/" + str(album["total_tracks"])) + + if x.endswith(".flac"): + audio["popularity"] = str(results['tracks']['items'][0]["popularity"]) + else: + audio["POPM"] = POPM(encoding=3,text=str(results['tracks']['items'][0]["popularity"])) + + + found_image_url = album["images"][0]["url"] + logging.info("found cover art image at " + str(found_image_url)) + + artist_search = spotify.artist(results['tracks']['items'][0]['artists'][0]['external_urls']['spotify']) + logging.info("genres: " + str(artist_search['genres'])) + check_spotify_genre(artist_search['genres'],audio) + + # remove ID3 tags if it's a flac file, otherwise it will throw an error + remove_flac_ID3_tags(audio,x) + + audio.save(x) + + artist_path = "" + songpath = "" + if x.endswith(".flac"): + artist_path = str(audio["artist"][0]) + else: + if audio["TPE2"] is not str: + artist_path = str(audio["TPE2"][0]) + else: + artist_path = str(audio["TPE2"]) + + logging.info("artist path: " + artist_path) + if x.endswith(".flac"): + songpath = join(".",artist_path,str(audio["ALBUM"][0])) + else: + songpath = join(".",artist_path,str(audio["TALB"])) + logging.info("song path: " + songpath) + + make_folder(join(".",artist_path)) + + if (not x.endswith(".flac") and "/" in str(audio["TALB"])): + logging.info("album contains /") + folders = str(audio["TALB"]).split('/') + logging.info(folders) + pos = join(".",str(audio["TPE2"])) + for fold in folders: + make_folder(join(pos,fold)) + pos = join(pos,fold) + logging.info(pos) + + make_folder(songpath) + os.replace(join(".",x),join(songpath,x)) + logging.info("moved song file, now downloading cover art") + + img_data = requests.get(str(found_image_url)).content + with open(join(songpath,"Cover.jpg"),'wb') as handler: + handler.write(img_data) + logging.info("done getting cover art!") + + logging.info("now setting cover art..") + embed_music_file(join(songpath,x),join(songpath,"Cover.jpg")) + + + return found + +def main(): + + # for spotipy to be able to log in, the environment variables SPOTIPY_CLIENT_ID and SPOTIPY_CLIENT_SECRET have to be set + # these can be obtained from the spotify developer dashboard + # they are defined in /etc/profile.d/spotipy.sh + spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials()) + + onlyfiles = [f for f in listdir(".") if (isfile(join(".",f)) and f.split(".")[-1] in ['mp3','mp4','ogg','wav','flac','m4a','MP3','FLAC','OGG','MP4','WAV','M4A'])] + # TIT2 = title, + # TPE1 = artist, + # TPE2 = band, + # TALB = album, + # COMM = comment, + # TCOM = composer, + # TCON = genre, + # TRCK = number, + # TDRC = year, + # TPUB = publisher + + # use: audio["TRCK"] = TRCK(encoding=3, text=u'track_number') and replace the tags with appropriate values + for x in onlyfiles: + logging.info("------------------------------------------------") + logging.info(x) + + # try to open tags, if the file has none, create a new ID3 object + try: + audio = mutagen.File(x) + except mutagen.mp3.HeaderNotFoundError as err: + logging.info(err) + logging.info("header not found") + audio = mutagen.File(x,easy=True) + audio.add_tags() + except mutagen.id3.ID3NoHeaderError: + logging.info("no header") + audio = mutagen.File(x,easy=True) + audio.add_tags() + except: + logging.info("opening as ID3") + audio = ID3(x) + audio.add_tags() + + logging.info(type(audio)) + try: + if (audio.tags == None): + logging.info("audio has no tags") + audio.add_tags() + except: + pass + + has_valid_artist = check_artist(audio,x) + has_valid_album = check_tag(audio,x,"TALB","album") + has_valid_title = check_tag(audio,x,"TIT2","title") + if (has_valid_title): + if x.endswith(".flac"): + logging.info("Found valid title in title tag: " + str(audio["TITLE"])) + if audio["TITLE"] is not str: + check_title_songname(audio["TITLE"][0],audio) + else: + check_title_songname(audio["TITLE"],audio) + else: + logging.info("Found valid title in TTI2 tag: " + str(audio["TIT2"]) + ". type: " + str(type(audio["TIT2"]))) + if not isinstance(audio["TIT2"], str): + if (isinstance(audio["TIT2"][0], str)): + check_title_songname(audio["TIT2"][0],audio) + else: + logging.info(type(audio["TIT2"][0])) + check_title_songname(audio["TIT2"][0].text[0],audio) + else: + check_title_songname(audio["TIT2"].text[0],audio) + else: + logging.info("No valid title found in TTI2 tag, using name " + x) + check_title_songname(x,audio) + has_valid_title = True + + if (has_valid_artist == False): + logging.info("No valid artist found, checking for artist by songname of the file (" + x + ")") + if (" - " in x): + check_artist_songname(x, audio) + has_valid_artist = True + + if (has_valid_artist == False and has_valid_title): + logging.info("No valid artist found but valid title found, checking for multiple artists") + check_for_multiple_artists(audio,x,str(audio["TIT2"])) + has_valid_artist = check_artist(audio, x) # check again + + check_tag(audio,x,"COMM","comment") + check_tag(audio,x,"TCOM","composer") + + has_genre = check_tag(audio,x,"TCON","genre") + if (has_genre): + if x.endswith(".flac"): + audio["genre"] = str(audio["genre"]).replace(" & ",",") + else: + audio["TCON"] = TCON(encoding=3, text=str(audio["TCON"]).replace(" & ",",")) # convert genres like Hip-Hop & Rap to Hip-Hop,Rap + + check_tag(audio,x,"TRCK","track") + check_tag(audio,x,"TDRC","year") + check_tag(audio,x,"TPUB","publisher") + + if (has_valid_artist and has_valid_title): + found = False + try: + found = check_spotify_and_save(spotify, audio,x) + except Exception as err: + logging.error("could not find track on spotify: " + str(err)) + found = False + + if (found == False): + if (x.endswith(".flac")): + logging.info("valid artist found. making folder for artist " + str(audio["artist"][0])) + make_folder(join(".",str(audio["artist"][0]))) + else: + logging.info("valid artist found. making folder for artist " + str(audio["TPE1"])) + if "/" in audio["TPE2"]: + audio["TPE2"] = audio["TPE2"].replace("/","") + make_folder(join(".",str(audio["TPE2"]))) + + if (has_valid_album): + if (x.endswith(".flac")): + make_folder(join(".",str(audio["artist"][0]),str(audio["album"][0]))) + else: + make_folder(join(".",str(audio["TPE2"]),str(audio["TALB"]))) + + logging.info("spotify did not find artist and track, searching for album...") + if (has_valid_album): + album_found = check_spotify_album_and_save(spotify,audio,x) + if (album_found == False): + logging.info("Nothing found on spotify, searching Google Images...") + search_google_images_and_save(x, audio) + else: + if x.endswith(".flac"): + audio["album"] = audio["title"][0] + else: + audio["TALB"] = TALB(encoding=3,text=str(audio["TIT2"])) + search_google_images_and_save(x, audio) + + logging.info("------------------------------------------------") + + +if __name__ == "__main__": + main() diff --git a/process_music.sh b/process_music.sh new file mode 100755 index 0000000..332907f --- /dev/null +++ b/process_music.sh @@ -0,0 +1,11 @@ +source .env/bin/activate +python3 make_folders.py +for f in ./*.mp3; do ffprobe "$f"; done +for f in ./*.flac; do ffprobe "$f"; done +mkdir ../temp +rm -r ../temp/* +mv ./*/ ../temp #move all directories +mv ./*.mp3 ../temp #move all mp3 files +mv ./*.flac ../temp #move all flac files +mv ./*.m4a ../temp #move all m4a files +rsync -arv ../temp/* ../Music diff --git a/youtube-dl-command.txt b/youtube-dl-command.txt new file mode 100644 index 0000000..88dd503 --- /dev/null +++ b/youtube-dl-command.txt @@ -0,0 +1 @@ +youtube-dl -i -x --prefer-ffmpeg --audio-format mp3