diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index a850bdd102..7e2acdd79a 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -212,9 +212,20 @@ class FFmpegPostProcessor(PostProcessor): @staticmethod def stream_copy_opts(copy=True, *, ext=None): yield from ('-map', '0') + + if ext in ('mkv', 'mka'): + # Some streams, such as JSON attachments, are considered of unknown + # type by FFmpeg but we still want to copy them. + yield '-copy_unknown' + else: + # Most containers don't really like unknown streams. Let's make + # sure to get rid of them. + yield '-ignore_unknown' + # Don't copy Apple TV chapters track, bin_data # See https://github.com/yt-dlp/yt-dlp/issues/2, #19042, #19024, https://trac.ffmpeg.org/ticket/6016 - yield from ('-dn', '-ignore_unknown') + yield '-dn' + if copy: yield from ('-c', 'copy') if ext in ('mp4', 'mov', 'm4a'): @@ -386,6 +397,21 @@ class FFmpegPostProcessor(PostProcessor): string = string[1:] if string[0] == "'" else "'" + string return string[:-1] if string[-1] == "'" else string + "'" + def _escape_stream_specifier(self, string): + # Escape `'` and `/` as per the docs: + # https://ffmpeg.org/ffmpeg-utils.html#toc-Quoting-and-escaping + out = string.replace('\\', '\\\\') + out = out.replace("'", "\\'") + + if (is_outdated_version(self._get_version('ffmpeg'), '7.1')): + # Versions older than 7.1 take arguments with colons verbatim. + return out + + # Since 7.1, stream specifier arguments with colons must be escaped or + # they will be interpreted as part of the specifier. This was done to + # reduce ambiguity, but it also broke compatibility. + return out.replace(':', r'\:') + def force_keyframes(self, filename, timestamps): timestamps = orderedSet(timestamps) if timestamps[0] == 0: @@ -549,7 +575,7 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor): @staticmethod def _options(target_ext): - yield from FFmpegPostProcessor.stream_copy_opts(False) + yield from FFmpegPostProcessor.stream_copy_opts(False, ext=target_ext) if target_ext == 'avi': yield from ('-c:v', 'libxvid', '-vtag', 'XVID') @@ -575,7 +601,7 @@ class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP): @staticmethod def _options(target_ext): - return FFmpegPostProcessor.stream_copy_opts() + return FFmpegPostProcessor.stream_copy_opts(ext=target_ext) class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): @@ -612,13 +638,18 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): webm_vtt_warn = False mp4_ass_warn = False + json_subs = {} + for lang, sub_info in subtitles.items(): if not os.path.exists(sub_info.get('filepath', '')): self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing') continue sub_ext = sub_info['ext'] if sub_ext == 'json': - self.report_warning('JSON subtitles cannot be embedded') + if info['ext'] in ('mkv', 'mka'): + json_subs[lang] = sub_info['filepath'] + else: + self.report_warning('JSON subtitles can only be embedded in mkv/mka files.') elif ext != 'webm' or (ext == 'webm' and sub_ext == 'vtt'): sub_langs.append(lang) sub_names.append(sub_info.get('name')) @@ -631,31 +662,52 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): mp4_ass_warn = True self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues') - if not sub_langs: + if not sub_langs and not json_subs: return [], info input_files = [filename, *sub_filenames] - opts = [ - *self.stream_copy_opts(ext=info['ext']), - # Don't copy the existing subtitles, we may be running the - # postprocessor a second time - '-map', '-0:s', - ] + opts = [*self.stream_copy_opts(ext=info['ext'])] + + if sub_langs and sub_names: + # We have regular subtitles available to embed. Don't copy the + # existing subtitles, we may be running the postprocessor a second + # time. + opts.extend([ + '-map', '-0:s', + ]) + for i, (lang, name) in enumerate(zip(sub_langs, sub_names, strict=True)): - opts.extend(['-map', f'{i + 1}:0']) lang_code = ISO639Utils.short2long(lang) or lang - opts.extend([f'-metadata:s:s:{i}', f'language={lang_code}']) + opts.extend([ + '-map', f'{i + 1}:0', + f'-metadata:s:s:{i}', f'language={lang_code}', + ]) + if name: opts.extend([f'-metadata:s:s:{i}', f'handler_name={name}', f'-metadata:s:s:{i}', f'title={name}']) + for json_lang, json_filename in json_subs.items(): + filename_arg = self._ffmpeg_filename_argument(json_filename) + filename_ss = self._escape_stream_specifier(filename_arg) + # FFmpeg sets the filename tag as the basename of the argument, + # protocol and all, for some reason. + filename_ss = os.path.basename(filename_ss) + + opts.extend([ + '-map', f'-0:m:filename:{json_lang}.json?', + '-attach', filename_arg, + f'-metadata:s:m:filename:{filename_ss}', 'mimetype=application/json', + f'-metadata:s:m:filename:{filename_ss}', f'filename={json_lang}.json', + ]) + temp_filename = prepend_extension(filename, 'temp') self.to_screen(f'Embedding subtitles in "{filename}"') self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) os.replace(temp_filename, filename) - files_to_delete = [] if self._already_have_subtitle else sub_filenames + files_to_delete = [] if self._already_have_subtitle else [*sub_filenames, *json_subs.values()] return files_to_delete, info @@ -670,7 +722,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): @staticmethod def _options(target_ext): audio_only = target_ext == 'm4a' - yield from FFmpegPostProcessor.stream_copy_opts(not audio_only) + yield from FFmpegPostProcessor.stream_copy_opts(not audio_only, ext=target_ext) if audio_only: yield from ('-vn', '-acodec', 'copy') @@ -798,15 +850,23 @@ class FFmpegMetadataPP(FFmpegPostProcessor): write_json_file(self._downloader.sanitize_info(info, self.get_param('clean_infojson', True)), infofn) info['infojson_filename'] = infofn - old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') - if old_stream is not None: - yield ('-map', f'-0:{old_stream}') - new_stream -= 1 + filename_arg = self._ffmpeg_filename_argument(infofn) + filename_ss = self._escape_stream_specifier(filename_arg) + # FFmpeg sets the filename tag as the basename of the argument, protocol + # and all, for some reason. + filename_ss = os.path.basename(filename_ss) yield ( - '-attach', self._ffmpeg_filename_argument(infofn), - f'-metadata:s:{new_stream}', 'mimetype=application/json', - f'-metadata:s:{new_stream}', 'filename=info.json', + # In order to override any old info.json reliably we need to + # instruct FFmpeg to consider valid tracks without a codec id, like + # JSON attachments. + '-copy_unknown', + # This map operation allows us to actually replace any previous + # info.json data. + '-map', '-0:m:filename:info.json?', + '-attach', filename_arg, + f'-metadata:s:m:filename:{filename_ss}', 'mimetype=application/json', + f'-metadata:s:m:filename:{filename_ss}', 'filename=info.json', ) @@ -854,7 +914,7 @@ class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor): stretched_ratio = info.get('stretched_ratio') if stretched_ratio not in (None, 1): self._fixup('Fixing aspect ratio', info['filepath'], [ - *self.stream_copy_opts(), '-aspect', f'{stretched_ratio:f}']) + *self.stream_copy_opts(ext=info['ext']), '-aspect', f'{stretched_ratio:f}']) return [], info @@ -862,7 +922,7 @@ class FFmpegFixupM4aPP(FFmpegFixupPostProcessor): @PostProcessor._restrict_to(images=False, video=False) def run(self, info): if info.get('container') == 'm4a_dash': - self._fixup('Correcting container', info['filepath'], [*self.stream_copy_opts(), '-f', 'mp4']) + self._fixup('Correcting container', info['filepath'], [*self.stream_copy_opts(ext=info['ext']), '-f', 'mp4']) return [], info @@ -885,7 +945,7 @@ class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor): if self.get_audio_codec(info['filepath']) == 'aac': args.extend(['-bsf:a', 'aac_adtstoasc']) self._fixup('Fixing MPEG-TS in MP4 container', info['filepath'], [ - *self.stream_copy_opts(), *args]) + *self.stream_copy_opts(ext=info['ext']), *args]) return [], info @@ -906,7 +966,7 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor): opts = ['-vf', 'setpts=PTS-STARTPTS'] else: opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS'] - self._fixup('Fixing frame timestamp', info['filepath'], [*opts, *self.stream_copy_opts(False), '-ss', self.trim]) + self._fixup('Fixing frame timestamp', info['filepath'], [*opts, *self.stream_copy_opts(False, ext=info['ext']), '-ss', self.trim]) return [], info @@ -915,7 +975,7 @@ class FFmpegCopyStreamPP(FFmpegFixupPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): - self._fixup(self.MESSAGE, info['filepath'], self.stream_copy_opts()) + self._fixup(self.MESSAGE, info['filepath'], self.stream_copy_opts(ext=info['ext'])) return [], info @@ -1044,7 +1104,7 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor): self.to_screen(f'Splitting video by chapters; {len(chapters)} chapters found') for idx, chapter in enumerate(chapters): destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info) - self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())]) + self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts(ext=info['ext']))]) if in_file != info['filepath']: self._delete_downloaded_files(in_file, msg=None) return [], info