From c04244bd9c2b620f33441242ca5c02118e075373 Mon Sep 17 00:00:00 2001 From: Philipp Rauch Date: Fri, 18 Sep 2020 09:21:54 +0200 Subject: [PATCH] add tvdb support --- .gitignore | 3 +- config.ini | 23 ++---------- main.py | 97 ++++++++++++++++++++++++++++++++++-------------- requirements.txt | 2 + seasons.ini | 25 +++++++++++++ 5 files changed, 103 insertions(+), 47 deletions(-) create mode 100644 seasons.ini diff --git a/.gitignore b/.gitignore index 6711f11..b823a93 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ +.venv .vscode -mediathek.code-workspace \ No newline at end of file +mediathek.code-workspace diff --git a/config.ini b/config.ini index 1332016..929acc1 100644 --- a/config.ini +++ b/config.ini @@ -1,19 +1,4 @@ -[DEFAULT] -save_path = /download -default_language = ger - -[Hubert und Staller] -rss = https://mediathekviewweb.de/feed?query=%23Hubert%2Bund%2BStaller%20!ARD -languages = ger -ger = ^(?P.*)\s+\(S(?P\d+)/E(?P\d+)\)$ - -[Hubert ohne Staller] -rss = https://mediathekviewweb.de/feed?query=%23Hubert%2Bohne%2BStaller%20!ARD -languages = ger -ger = ^(?P.*)\s+\(S(?P\d+)/E(?P\d+)\)$ - -[Doctor Who] -rss = https://mediathekviewweb.de/feed?query=%23Doctor%20!ARD -languages = ger,eng -ger = ^(?P.*)\s+\(S(?P\d+)/E?(?P\d+|Weihnachtsspecial)\)$ -eng = ^(?P.*)\s+\(S(?P\d+)/E?(?P\d+|Weihnachtsspecial)\).*Originalversion.*$ \ No newline at end of file +[TheTVDB] +apikey = 5446e56f161ea9bb5eff69f7bb791a6c +userkey = 5EFB34A8443702.64312763 +username = smokephil \ No newline at end of file diff --git a/main.py b/main.py index c1c0f7b..8de6bf5 100755 --- a/main.py +++ b/main.py @@ -6,37 +6,68 @@ import os import re import shutil import tempfile -import urllib.request +# import urllib.request from configparser import ConfigParser -from pprint import pp import feedparser import requests +import tvdbsimple as tvdb +from fuzzywuzzy import fuzz from pymkv import MKVFile, MKVTrack logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) -config = ConfigParser(interpolation=None) +seasons = ConfigParser(interpolation=None) +seasons.read('seasons.ini') +config = ConfigParser() config.read('config.ini') +tvdb.KEYS.API_KEY = config['TheTVDB']['apikey'] -def build_filename(name, titel_dict): - if titel_dict == []: + +def get_match(query, + choices, + key=lambda x: x, + score_cutoff=0, + scorer=fuzz.ratio): + res = [(item, scorer(query, key(item))) for item in choices] + return sorted([x for x in res if x[1] >= score_cutoff], + key=lambda x: x[1], + reverse=True) + + +def get_episod_info(season_id, name): + episodes = tvdb.Series_Episodes(season_id, 'de').all() + matches = get_match(name, + episodes, + key=lambda x: x['episodeName'], + score_cutoff=90) + + if len(matches) >= 1: + titel_dict = {} + titel_dict['title'] = matches[0][0]['episodeName'] + titel_dict['season'] = str(matches[0][0]['airedSeason']) + titel_dict['episod'] = str(matches[0][0]['airedEpisodeNumber']) + return titel_dict + + +def build_filename(name: str, titel_dict: dict): + if titel_dict == {}: return None - if titel_dict[0]['episod'].isdigit(): - filename = '{name} S{season:>02}E{episod:>02} {titel}'.format( - name=name, **titel_dict[0]) + if titel_dict['episod'].isdigit(): + filename = '{name} S{season:>02}E{episod:>02} {title}'.format( + name=name, **titel_dict) else: - filename = '{name} S{season:>02}.{episod} {titel}'.format( - name=name, **titel_dict[0]) + filename = '{name} S{season:>02}.{episod} {title}'.format(name=name, + **titel_dict) return filename.strip().replace(' ', '.') def make_mkv(data): series_name = data['series'] - default_lang = config[series_name]['default_language'] - lang_sort = config[series_name]['languages'].split(',') + default_lang = seasons[series_name]['default_language'] + lang_sort = seasons[series_name]['languages'].split(',') def add_video(lang, path): logging.debug('[MKV] add video (%s, %s)', lang, path) @@ -63,29 +94,35 @@ def make_mkv(data): def parse_feed(series_name): - d = feedparser.parse(config[series_name]['rss']) + d = feedparser.parse(seasons[series_name]['rss']) episodes = dict() for item in d['entries']: + lang = 'ger' # init lang title = item['title'] link = item['link'] - for lang in config[series_name]['languages'].split(','): - pattern = config[series_name][lang] + for lang in seasons[series_name]['languages'].split(','): + pattern = seasons[series_name][lang] + season_id = seasons.get(series_name, 'thetvdb', fallback=None) groups = [x.groupdict() for x in re.finditer(pattern, title, re.M)] if len(groups) == 1: break - filename = build_filename(series_name, groups) - if filename is None: + if groups == []: logging.warning('skip %s', title) continue + + titel_dict = groups[0] + if season_id is not None: + titel_dict = get_episod_info(season_id, titel_dict['title']) + filename = build_filename(series_name, titel_dict) if filename not in episodes: episodes[filename] = dict() episodes[filename]['lang'] = dict() episodes[filename]['lang'][lang] = link - season = '{season:>02}'.format(**groups[0]) - episod = '{episod:>02}'.format(**groups[0]) - basepath = os.path.join(config[series_name]['save_path'], series_name, + season = '{season:>02}'.format(**titel_dict) + episod = '{episod:>02}'.format(**titel_dict) + basepath = os.path.join(seasons[series_name]['save_path'], series_name, f'Season {season:>02}') mkvpath = os.path.join(basepath, filename + '.mkv') @@ -98,7 +135,7 @@ def parse_feed(series_name): return episodes -def download_files(episodes): +def download_files(episodes, dryrun=False): for episod in sorted(episodes.keys()): item = episodes[episod] if os.path.exists(item['mkvpath']): @@ -117,26 +154,32 @@ def download_files(episodes): logging.info('start downloading...') logging.debug('source: %s', link) logging.debug('destination: %s', filepath) + if dryrun: + continue try: - response = urllib.request.urlopen(link) + # response = urllib.request.urlopen(link) with requests.get(link, stream=True) as r: with open(filepath, 'wb') as f: shutil.copyfileobj(r.raw, f) item['lang'][lang] = filepath except: - logging.error("download") - pass + logging.error("could not download file.") - make_mkv(item) + if dryrun: + continue + try: + make_mkv(item) + except FileNotFoundError: + logging.error("could not build mkv.") temp.cleanup() def run(): logging.info('====== START ======') - for series_name in config.sections(): + for series_name in seasons.sections(): episodes = parse_feed(series_name) - download_files(episodes) + download_files(episodes, dryrun=False) logging.info('====== END ======') diff --git a/requirements.txt b/requirements.txt index fa38469..97758a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ feedparser==5.2.1 requests==2.24.0 pymkv==1.0.5 +fuzzywuzzy==0.18.0 +tvdbsimple==1.0.6 diff --git a/seasons.ini b/seasons.ini new file mode 100644 index 0000000..ac09d6a --- /dev/null +++ b/seasons.ini @@ -0,0 +1,25 @@ +[DEFAULT] +save_path = /download +default_language = ger + +[Hubert und Staller] +rss = https://mediathekviewweb.de/feed?query=%23Hubert%2Bund%2BStaller%20!ARD +languages = ger +ger = ^(?P.*)\s+\(S(?P<season>\d+)/E(?P<episod>\d+)\)$ + +[Hubert ohne Staller] +rss = https://mediathekviewweb.de/feed?query=%23Hubert%2Bohne%2BStaller%20!ARD +languages = ger +ger = ^(?P<title>.*)\s+\(S(?P<season>\d+)/E(?P<episod>\d+)\)$ + +[Doctor Who] +rss = https://mediathekviewweb.de/feed?query=%23Doctor%20!ARD +languages = ger,eng +ger = ^(?P<title>.*)\s+\(S(?P<season>\d+)/E?(?P<episod>\d+|Weihnachtsspecial)\)$ +eng = ^(?P<title>.*)\s+\(S(?P<season>\d+)/E?(?P<episod>\d+|Weihnachtsspecial)\).*Originalversion.*$ + +[Professor T.] +rss = https://mediathekviewweb.de/feed?query=%23Professor%2BT.%20%20%3E40 +languages = ger +ger = ^(?P<title>.*?)( - Staffel \d+)?\s?(\(Audiodeskription\))?$ +thetvdb = 323666