add tvdb support

2020-09-18 09:21:54 +02:00 · 2020-09-18 09:21:54 +02:00 · c04244bd9c
commit c04244bd9c
parent 1f645a75a1
5 changed files with 103 additions and 47 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
+.venv
 .vscode
 mediathek.code-workspace
--- a/config.ini
+++ b/config.ini
@ -1,19 +1,4 @@
-[DEFAULT]
-save_path = /download
-default_language = ger
-
-[Hubert und Staller]
-rss = https://mediathekviewweb.de/feed?query=%23Hubert%2Bund%2BStaller%20!ARD
-languages = ger
-ger =  ^(?P<titel>.*)\s+\(S(?P<season>\d+)/E(?P<episod>\d+)\)$
-
-[Hubert ohne Staller]
-rss = https://mediathekviewweb.de/feed?query=%23Hubert%2Bohne%2BStaller%20!ARD
-languages = ger
-ger = ^(?P<titel>.*)\s+\(S(?P<season>\d+)/E(?P<episod>\d+)\)$
-
-[Doctor Who]
-rss = https://mediathekviewweb.de/feed?query=%23Doctor%20!ARD
-languages = ger,eng
-ger = ^(?P<titel>.*)\s+\(S(?P<season>\d+)/E?(?P<episod>\d+|Weihnachtsspecial)\)$
-eng = ^(?P<titel>.*)\s+\(S(?P<season>\d+)/E?(?P<episod>\d+|Weihnachtsspecial)\).*Originalversion.*$
+[TheTVDB]
+apikey = 5446e56f161ea9bb5eff69f7bb791a6c
+userkey = 5EFB34A8443702.64312763
+username = smokephil
--- a/main.py
+++ b/main.py
@ -6,37 +6,68 @@ import os
 import re
 import shutil
 import tempfile
-import urllib.request
+# import urllib.request
 from configparser import ConfigParser
-from pprint import pp

 import feedparser
 import requests
+import tvdbsimple as tvdb
+from fuzzywuzzy import fuzz
 from pymkv import MKVFile, MKVTrack

 logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG)

-config = ConfigParser(interpolation=None)
+seasons = ConfigParser(interpolation=None)
+seasons.read('seasons.ini')
+config = ConfigParser()
 config.read('config.ini')

+tvdb.KEYS.API_KEY = config['TheTVDB']['apikey']

-def build_filename(name, titel_dict):
-    if titel_dict == []:
+
+def get_match(query,
+              choices,
+              key=lambda x: x,
+              score_cutoff=0,
+              scorer=fuzz.ratio):
+    res = [(item, scorer(query, key(item))) for item in choices]
+    return sorted([x for x in res if x[1] >= score_cutoff],
+                  key=lambda x: x[1],
+                  reverse=True)
+
+
+def get_episod_info(season_id, name):
+    episodes = tvdb.Series_Episodes(season_id, 'de').all()
+    matches = get_match(name,
+                        episodes,
+                        key=lambda x: x['episodeName'],
+                        score_cutoff=90)
+
+    if len(matches) >= 1:
+        titel_dict = {}
+        titel_dict['title'] = matches[0][0]['episodeName']
+        titel_dict['season'] = str(matches[0][0]['airedSeason'])
+        titel_dict['episod'] = str(matches[0][0]['airedEpisodeNumber'])
+        return titel_dict
+
+
+def build_filename(name: str, titel_dict: dict):
+    if titel_dict == {}:
        return None
-    if titel_dict[0]['episod'].isdigit():
-        filename = '{name} S{season:>02}E{episod:>02} {titel}'.format(
-            name=name, **titel_dict[0])
+    if titel_dict['episod'].isdigit():
+        filename = '{name} S{season:>02}E{episod:>02} {title}'.format(
+            name=name, **titel_dict)
    else:
-        filename = '{name} S{season:>02}.{episod} {titel}'.format(
-            name=name, **titel_dict[0])
+        filename = '{name} S{season:>02}.{episod} {title}'.format(name=name,
+                                                                  **titel_dict)

    return filename.strip().replace(' ', '.')


 def make_mkv(data):
    series_name = data['series']
-    default_lang = config[series_name]['default_language']
-    lang_sort = config[series_name]['languages'].split(',')
+    default_lang = seasons[series_name]['default_language']
+    lang_sort = seasons[series_name]['languages'].split(',')

    def add_video(lang, path):
        logging.debug('[MKV] add video (%s, %s)', lang, path)
@ -63,29 +94,35 @@ def make_mkv(data):


 def parse_feed(series_name):
-    d = feedparser.parse(config[series_name]['rss'])
+    d = feedparser.parse(seasons[series_name]['rss'])

    episodes = dict()
    for item in d['entries']:
+        lang = 'ger'  # init lang
        title = item['title']
        link = item['link']
-        for lang in config[series_name]['languages'].split(','):
-            pattern = config[series_name][lang]
+        for lang in seasons[series_name]['languages'].split(','):
+            pattern = seasons[series_name][lang]
+            season_id = seasons.get(series_name, 'thetvdb', fallback=None)
            groups = [x.groupdict() for x in re.finditer(pattern, title, re.M)]
            if len(groups) == 1:
                break

-        filename = build_filename(series_name, groups)
-        if filename is None:
+        if groups == []:
            logging.warning('skip %s', title)
            continue
+
+        titel_dict = groups[0]
+        if season_id is not None:
+            titel_dict = get_episod_info(season_id, titel_dict['title'])
+        filename = build_filename(series_name, titel_dict)
        if filename not in episodes:
            episodes[filename] = dict()
            episodes[filename]['lang'] = dict()
        episodes[filename]['lang'][lang] = link
-        season = '{season:>02}'.format(**groups[0])
-        episod = '{episod:>02}'.format(**groups[0])
-        basepath = os.path.join(config[series_name]['save_path'], series_name,
+        season = '{season:>02}'.format(**titel_dict)
+        episod = '{episod:>02}'.format(**titel_dict)
+        basepath = os.path.join(seasons[series_name]['save_path'], series_name,
                                f'Season {season:>02}')
        mkvpath = os.path.join(basepath, filename + '.mkv')

@ -98,7 +135,7 @@ def parse_feed(series_name):
    return episodes


-def download_files(episodes):
+def download_files(episodes, dryrun=False):
    for episod in sorted(episodes.keys()):
        item = episodes[episod]
        if os.path.exists(item['mkvpath']):
@ -117,26 +154,32 @@ def download_files(episodes):
            logging.info('start downloading...')
            logging.debug('source: %s', link)
            logging.debug('destination: %s', filepath)
+            if dryrun:
+                continue
            try:
-                response = urllib.request.urlopen(link)
+                # response = urllib.request.urlopen(link)
                with requests.get(link, stream=True) as r:
                    with open(filepath, 'wb') as f:
                        shutil.copyfileobj(r.raw, f)

                item['lang'][lang] = filepath
            except:
-                logging.error("download")
-                pass
+                logging.error("could not download file.")

-        make_mkv(item)
+        if dryrun:
+            continue
+        try:
+            make_mkv(item)
+        except FileNotFoundError:
+            logging.error("could not build mkv.")
        temp.cleanup()


 def run():
    logging.info('====== START ======')
-    for series_name in config.sections():
+    for series_name in seasons.sections():
        episodes = parse_feed(series_name)
-        download_files(episodes)
+        download_files(episodes, dryrun=False)

    logging.info('====== END ======')

--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,5 @@
 feedparser==5.2.1
 requests==2.24.0
 pymkv==1.0.5
+fuzzywuzzy==0.18.0
+tvdbsimple==1.0.6
--- a/seasons.ini
+++ b/seasons.ini
@ -0,0 +1,25 @@
+[DEFAULT]
+save_path = /download
+default_language = ger
+
+[Hubert und Staller]
+rss = https://mediathekviewweb.de/feed?query=%23Hubert%2Bund%2BStaller%20!ARD
+languages = ger
+ger =  ^(?P<title>.*)\s+\(S(?P<season>\d+)/E(?P<episod>\d+)\)$
+
+[Hubert ohne Staller]
+rss = https://mediathekviewweb.de/feed?query=%23Hubert%2Bohne%2BStaller%20!ARD
+languages = ger
+ger = ^(?P<title>.*)\s+\(S(?P<season>\d+)/E(?P<episod>\d+)\)$
+
+[Doctor Who]
+rss = https://mediathekviewweb.de/feed?query=%23Doctor%20!ARD
+languages = ger,eng
+ger = ^(?P<title>.*)\s+\(S(?P<season>\d+)/E?(?P<episod>\d+|Weihnachtsspecial)\)$
+eng = ^(?P<title>.*)\s+\(S(?P<season>\d+)/E?(?P<episod>\d+|Weihnachtsspecial)\).*Originalversion.*$
+
+[Professor T.]
+rss = https://mediathekviewweb.de/feed?query=%23Professor%2BT.%20%20%3E40
+languages = ger
+ger = ^(?P<title>.*?)( - Staffel \d+)?\s?(\(Audiodeskription\))?$
+thetvdb = 323666