From 3a5be5da2ae31d30ada0dfb461b852138ee4bfcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20D=C3=BCster?= Date: Sat, 18 Jan 2025 18:00:59 +0100 Subject: [PATCH 1/2] [ie/gotowebinar] Add extractor --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/gotowebinar.py | 57 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 yt_dlp/extractor/gotowebinar.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9d3d353683..cdf9a5d1f7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -764,6 +764,7 @@ from .goplay import GoPlayIE from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE +from .gotowebinar import GoTo_WebinarIE from .gputechconf import GPUTechConfIE from .graspop import GraspopIE from .gronkh import ( diff --git a/yt_dlp/extractor/gotowebinar.py b/yt_dlp/extractor/gotowebinar.py new file mode 100644 index 0000000000..1da3e701fd --- /dev/null +++ b/yt_dlp/extractor/gotowebinar.py @@ -0,0 +1,57 @@ +from .common import InfoExtractor +from ..utils import ExtractorError + + +class GoTo_WebinarIE(InfoExtractor): + _VALID_URL = r'https?://(register|attendee)\.gotowebinar\.com/recording/viewRecording/(?P[0-9]+)/(?P[0-9]+)/(?P[^?]+)(?:\?registrantKey=(?P[0-9]+))?' + _TESTS = [ + { + # Source: https://community.intel.com/t5/Processors/Deriving-core-numbering-on-sockets-without-disabled-tiles/m-p/1263389 + 'url': 'https://register.gotowebinar.com/recording/viewRecording/8573274081823101697/1166504161772360449/mfratkin@tacc.utexas.edu?registrantKey=6636963737074316811&type=ATTENDEEEMAILRECORDINGLINK', + 'info_dict': { + 'id': '8573274081823101697-1166504161772360449', + 'title': 'Topology and Cache Coherence in Knights Landing and Skylake Xeon Processors', + 'description': 'md5:2d673910d31bfb4918a0605ea60561dd', + 'creators': ['IXPUG Committee'], + 'ext': 'mp4', + }, + }, + ] + + def _real_extract(self, url): + webinar_key, recording_key, email, registrant_key = self._match_valid_url(url).group('webinar_key', 'recording_key', 'email', 'registrant_key') + video_id = f'{webinar_key}-{recording_key}' + + if not registrant_key: + registrant_metadata = self._download_json( + f'https://globalattspa.gotowebinar.com/api/webinars/{webinar_key}/registrants?email={email}', + video_id, + note='Downloading registrant metadata', + errnote='Unable to download registrant metadata') + if not (registrant_key := registrant_metadata.get('registrantKey')): + raise ExtractorError('Unable to retrieve registrant key') + + important_metadata = self._download_json( + f'https://api.services.gotomeeting.com/registrationservice/api/v1/webinars/{webinar_key}/registrants/{registrant_key}/recordingAssets?type=FOLLOWUPEMAILRECORDINGLINK&client=spa', + video_id, + note='Downloading important recording metadata', + errnote='Unable to important download recording metadata') + + non_important_metadata = self._download_json( + f'https://global.gotowebinar.com/api/webinars/{webinar_key}', + video_id, + note='Downloading non-important recording metadata', + errnote='Unable to non-important download recording metadata', + fatal=False) + + creator = non_important_metadata.get('organizerName') + + return { + 'id': video_id, + 'url': important_metadata.get('cdnLocation'), + 'ext': 'mp4', + 'is_live': False, + 'title': non_important_metadata.get('subject'), + 'description': non_important_metadata.get('description'), + 'creators': [creator] if creator else None, + } From a7603c3ecb89b73b2f7d54e55e108d1b1985cb9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20D=C3=BCster?= Date: Mon, 15 Sep 2025 11:05:33 +0200 Subject: [PATCH 2/2] fixup! [ie/gotowebinar] Add extractor --- yt_dlp/extractor/_extractors.py | 2 +- yt_dlp/extractor/gotowebinar.py | 74 +++++++++++++++++++++------------ 2 files changed, 49 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index cdf9a5d1f7..85d2931518 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -764,7 +764,7 @@ from .goplay import GoPlayIE from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE -from .gotowebinar import GoTo_WebinarIE +from .gotowebinar import GoToWebinarIE from .gputechconf import GPUTechConfIE from .graspop import GraspopIE from .gronkh import ( diff --git a/yt_dlp/extractor/gotowebinar.py b/yt_dlp/extractor/gotowebinar.py index 1da3e701fd..6e46f5f361 100644 --- a/yt_dlp/extractor/gotowebinar.py +++ b/yt_dlp/extractor/gotowebinar.py @@ -1,57 +1,79 @@ from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ExtractorError, parse_iso8601, traverse_obj -class GoTo_WebinarIE(InfoExtractor): - _VALID_URL = r'https?://(register|attendee)\.gotowebinar\.com/recording/viewRecording/(?P[0-9]+)/(?P[0-9]+)/(?P[^?]+)(?:\?registrantKey=(?P[0-9]+))?' +class GoToWebinarIE(InfoExtractor): + _VALID_URL = r'''(?x)https?:// + (register|attendee)\.gotowebinar\.com/recording/viewRecording/ + (?P[0-9]+)/(?P[0-9]+)/(?P[^?#]+) + (?:\?(?:[^#]+&)?registrantKey=(?P[0-9]+))?''' _TESTS = [ { - # Source: https://community.intel.com/t5/Processors/Deriving-core-numbering-on-sockets-without-disabled-tiles/m-p/1263389 - 'url': 'https://register.gotowebinar.com/recording/viewRecording/8573274081823101697/1166504161772360449/mfratkin@tacc.utexas.edu?registrantKey=6636963737074316811&type=ATTENDEEEMAILRECORDINGLINK', + # Source: https://associationofanaesthetists-publications.onlinelibrary.wiley.com/doi/am-pdf/10.1111/anae.15209 + 'url': 'https://register.gotowebinar.com/recording/viewRecording/8054623469383961613/3917240379133570566/andrewmortimore@anaesthetists.org?registrantKey=2674782344143402252&type=ABSENTEEEMAILRECORDINGLINK', 'info_dict': { - 'id': '8573274081823101697-1166504161772360449', - 'title': 'Topology and Cache Coherence in Knights Landing and Skylake Xeon Processors', - 'description': 'md5:2d673910d31bfb4918a0605ea60561dd', - 'creators': ['IXPUG Committee'], + 'id': '8054623469383961613', + 'title': 'Webinar: COVID-19: By Trainees, For Trainees ', + 'description': 'md5:9702e0662f45ee74ff2168de4d6d5d6a', + 'creators': ['E-education Dept'], + 'timestamp': 1590824700, + 'upload_date': '20200530', + 'ext': 'mp4', + }, + }, + { + 'url': 'https://attendee.gotowebinar.com/recording/viewRecording/7594846188203875084/5457693551948244743/stoll@berkeley.edu', + 'info_dict': { + 'id': '7594846188203875084', + 'title': 'Climate change, mental health, and eco-anxiety: How the global pandemic can help us prepare', + 'description': 'md5:390f0dffd516a53a4728bd755c85def4', + 'creators': ['Environmental Public Health'], + 'timestamp': 1586548800, + 'upload_date': '20200410', 'ext': 'mp4', }, }, ] def _real_extract(self, url): - webinar_key, recording_key, email, registrant_key = self._match_valid_url(url).group('webinar_key', 'recording_key', 'email', 'registrant_key') - video_id = f'{webinar_key}-{recording_key}' + webinar_key, email, registrant_key = self._match_valid_url(url).group( + 'webinar_key', 'email', 'registrant_key', + ) if not registrant_key: registrant_metadata = self._download_json( f'https://globalattspa.gotowebinar.com/api/webinars/{webinar_key}/registrants?email={email}', - video_id, + webinar_key, note='Downloading registrant metadata', - errnote='Unable to download registrant metadata') + errnote='Unable to download registrant metadata', + ) if not (registrant_key := registrant_metadata.get('registrantKey')): raise ExtractorError('Unable to retrieve registrant key') - important_metadata = self._download_json( + recording_data = self._download_json( f'https://api.services.gotomeeting.com/registrationservice/api/v1/webinars/{webinar_key}/registrants/{registrant_key}/recordingAssets?type=FOLLOWUPEMAILRECORDINGLINK&client=spa', - video_id, + webinar_key, note='Downloading important recording metadata', - errnote='Unable to important download recording metadata') + errnote='Unable to important download recording metadata', + ) - non_important_metadata = self._download_json( + metadata = self._download_json( f'https://global.gotowebinar.com/api/webinars/{webinar_key}', - video_id, + webinar_key, note='Downloading non-important recording metadata', errnote='Unable to non-important download recording metadata', - fatal=False) - - creator = non_important_metadata.get('organizerName') + fatal=False, + ) return { - 'id': video_id, - 'url': important_metadata.get('cdnLocation'), + 'id': webinar_key, + 'url': recording_data.get('cdnLocation'), 'ext': 'mp4', 'is_live': False, - 'title': non_important_metadata.get('subject'), - 'description': non_important_metadata.get('description'), - 'creators': [creator] if creator else None, + **traverse_obj(metadata, { + 'title': ('subject', {str}), + 'description': ('description', {str}), + 'creators': ('organizerName', {str}, all), + 'timestamp': ('times', 0, 'startTime', {parse_iso8601}), + }), }