From b32e6d738cd60b113f8657c4070ebe441a194830 Mon Sep 17 00:00:00 2001 From: justumen Date: Wed, 31 Jul 2024 11:58:55 +0200 Subject: [PATCH] Upgrades for images to video --- README.md | 2 +- images_to_video.py | 90 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 69 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index d582d49..1c36d7f 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ - **v0.2 Ollama**: Improve ollama node with system prompt + model selection. - **v0.3 Save Image to Folder**: Add a new node : Save image to a chosen folder. - **v0.3 Save Images**: Add comfyui Metadata / workflow to all my image-related nodes. -- **v0.3 Images to video**: Support transparency with webm format. As well as an audio stream. +- **v0.4 Images to video**: Support transparency option with webm format, options encoders. As well as input for audio stream. # 📝 Nodes descriptions diff --git a/images_to_video.py b/images_to_video.py index 2645b63..901f45f 100644 --- a/images_to_video.py +++ b/images_to_video.py @@ -14,8 +14,12 @@ class imagesToVideo: "images": ("IMAGE",), "fps": ("INT", {"default": 24, "min": 1, "max": 60}), "name_prefix": ("STRING", {"default": "output/imgs2video/me"}), - "format": (["mp4", "webm"],), + "format": (["mp4", "webm"], {"default": "mp4"}), + "mp4_encoder": (["libx264 (H.264)", "h264_nvenc (H.264 / NVIDIA GPU)", "libx265 (H.265)", "hevc_nvenc (H.265 / NVIDIA GPU)"], {"default": "h264_nvenc (H.264 / NVIDIA GPU)"}), + "webm_encoder": (["libvpx-vp9", "libaom-av1 (VERY SLOW)"], {"default": "libvpx-vp9"}), "crf": ("INT", {"default": 19, "min": 0, "max": 63}), + "force_transparency": ("BOOLEAN", {"default": False}), + # "preset": (["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"], {"default": "medium"}), }, "optional": { "audio": ("AUDIO",), @@ -25,10 +29,10 @@ class imagesToVideo: RETURN_TYPES = ("STRING",) RETURN_NAMES = ("comment",) FUNCTION = "image_to_video" - OUTPUT_NODE = True + OUTPUT_NODE = False CATEGORY = "Bjornulf" - - def image_to_video(self, images, fps, name_prefix, format, crf, audio=None): + + def image_to_video(self, images, fps, name_prefix, format, crf, force_transparency, mp4_encoder, webm_encoder, audio=None): # Remove any existing extension name_prefix = os.path.splitext(name_prefix)[0] @@ -69,7 +73,7 @@ class imagesToVideo: sample_rate = audio['sample_rate'] sf.write(temp_audio_file, waveform, sample_rate) - # Construct the FFmpeg command based on the selected format + # Construct the FFmpeg command based on the selected format and encoder ffmpeg_cmd = [ "ffmpeg", "-y", @@ -80,27 +84,69 @@ class imagesToVideo: if temp_audio_file: ffmpeg_cmd.extend(["-i", temp_audio_file]) + if force_transparency: + ffmpeg_cmd.extend([ + "-vf", "scale=iw:ih,format=rgba,split[s0][s1];[s0]lutrgb=r=0:g=0:b=0:a=0[transparent];[transparent][s1]overlay", + ]) + if format == "mp4": - ffmpeg_cmd.extend([ - "-filter_complex", "[0:v]scale=iw:ih,format=rgba,split[s0][s1];[s0]lutrgb=r=0:g=0:b=0:a=0[transparent];[transparent][s1]overlay", - "-crf", str(crf), - "-c:v", "libx264", - "-pix_fmt", "yuv420p", - ]) - comment = "MP4 format: Widely compatible, efficient compression, no transparency support." + if mp4_encoder == "h264_nvenc (H.264 / NVIDIA GPU)": + mp4_encoder = "h264_nvenc" + ffmpeg_cmd.extend([ + "-c:v", mp4_encoder, + # "-preset", "p" + preset, # NVENC uses different preset names + "-cq", str(crf), # NVENC uses -cq instead of -crf + ]) + if mp4_encoder == "hevc_nvenc (H.265 / NVIDIA GPU)": + mp4_encoder = "hevc_nvenc" + ffmpeg_cmd.extend([ + "-c:v", mp4_encoder, + # "-preset", "p" + preset, # NVENC uses different preset names + "-cq", str(crf), # NVENC uses -cq instead of -crf + ]) + elif mp4_encoder == "libx264": + ffmpeg_cmd.extend([ + "-c:v", mp4_encoder, + # "-preset", preset, + "-crf", str(crf), + ]) + elif mp4_encoder == "libx265": + ffmpeg_cmd.extend([ + "-c:v", mp4_encoder, + # "-preset", preset, + "-crf", str(crf), + "-tag:v", "hvc1", # For better compatibility + ]) + ffmpeg_cmd.extend(["-pix_fmt", "yuv420p"]) #No transparency + comment = """MP4 format : Widely compatible, efficient compression, No transparency support. +H.264: Fast encoding, widely compatible, larger file sizes for the same quality. +H.265: More efficient compression, smaller file sizes, better for high-resolution video, slower encoding, BUT less universal support.""" elif format == "webm": - ffmpeg_cmd.extend([ - "-filter_complex", "[0:v]scale=iw:ih,format=rgba,split[s0][s1];[s0]lutrgb=r=0:g=0:b=0:a=0[transparent];[transparent][s1]overlay", - "-c:v", "libvpx-vp9", - "-pix_fmt", "yuva420p", - "-b:v", "0", - "-crf", str(crf), - "-auto-alt-ref", "0", - ]) - comment = "WebM format: Supports transparency, open format, smaller file size, but less compatible than MP4." + if webm_encoder == "libvpx-vp9": + # cpu_used = preset_to_cpu_used.get(preset, 3) # Default to 3 if preset not found + ffmpeg_cmd.extend([ + "-c:v", webm_encoder, + # "-cpu-used", str(cpu_used), + "-deadline", "realtime", + "-crf", str(crf), + "-b:v", "0", + "-pix_fmt", "yuva420p", #Transparency + ]) + elif webm_encoder == "libaom-av1 (VERY SLOW)": + # cpu_used = preset_to_cpu_used.get(preset, 3) # Default to 3 if preset not found + webm_encoder = "libaom-av1" + ffmpeg_cmd.extend([ + "-c:v", webm_encoder, + # "-cpu-used", str(cpu_used), + "-deadline", "realtime", + "-crf", str(crf), + "-b:v", "0", + "-pix_fmt", "yuva420p", #Transparency + ]) + comment = """WebM format: Supports transparency, open format, smaller file size, but less compatible than MP4.""" if temp_audio_file: - ffmpeg_cmd.extend(["-c:a", "libvorbis", "-shortest"]) + ffmpeg_cmd.extend(["-c:a", "libvorbis" if format == "webm" else "aac", "-shortest"]) ffmpeg_cmd.append(output_file)