Upgrades for images to video

2026-05-16 09:57:35 -03:00 · 2024-07-31 11:58:55 +02:00
parent 198225dbc3
commit b32e6d738c
2 changed files with 69 additions and 23 deletions
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 - **v0.2 Ollama**: Improve ollama node with system prompt + model selection.
 - **v0.3 Save Image to Folder**: Add a new node : Save image to a chosen folder.
 - **v0.3 Save Images**: Add comfyui Metadata / workflow to all my image-related nodes.
- **v0.3 Images to video**: Support transparency with webm format. As well as an audio stream.
+- **v0.4 Images to video**: Support transparency option with webm format, options encoders. As well as input for audio stream. 
 # 📝 Nodes descriptions
--- a/images_to_video.py
+++ b/images_to_video.py
@@ -14,8 +14,12 @@ class imagesToVideo:
                "images": ("IMAGE",),
                "fps": ("INT", {"default": 24, "min": 1, "max": 60}),
                "name_prefix": ("STRING", {"default": "output/imgs2video/me"}),
-                "format": (["mp4", "webm"],),
+                "format": (["mp4", "webm"], {"default": "mp4"}),
                "mp4_encoder": (["libx264 (H.264)", "h264_nvenc (H.264 / NVIDIA GPU)", "libx265 (H.265)", "hevc_nvenc (H.265 / NVIDIA GPU)"], {"default": "h264_nvenc (H.264 / NVIDIA GPU)"}),
                "webm_encoder": (["libvpx-vp9", "libaom-av1 (VERY SLOW)"], {"default": "libvpx-vp9"}),
                "crf": ("INT", {"default": 19, "min": 0, "max": 63}),
                "force_transparency": ("BOOLEAN", {"default": False}),
                # "preset": (["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"], {"default": "medium"}),
            },
            "optional": {
                "audio": ("AUDIO",),
@@ -25,10 +29,10 @@ class imagesToVideo:
    RETURN_TYPES = ("STRING",)
    RETURN_NAMES = ("comment",)
    FUNCTION = "image_to_video"
-    OUTPUT_NODE = True
+    OUTPUT_NODE = False
    CATEGORY = "Bjornulf"
-    def image_to_video(self, images, fps, name_prefix, format, crf, audio=None):
+    def image_to_video(self, images, fps, name_prefix, format, crf, force_transparency, mp4_encoder, webm_encoder, audio=None):
        # Remove any existing extension
        name_prefix = os.path.splitext(name_prefix)[0]
@@ -69,7 +73,7 @@ class imagesToVideo:
            sample_rate = audio['sample_rate']
            sf.write(temp_audio_file, waveform, sample_rate)
-        # Construct the FFmpeg command based on the selected format
+        # Construct the FFmpeg command based on the selected format and encoder
        ffmpeg_cmd = [
            "ffmpeg",
            "-y",
@@ -80,27 +84,69 @@ class imagesToVideo:
        if temp_audio_file:
            ffmpeg_cmd.extend(["-i", temp_audio_file])
        if force_transparency:
            ffmpeg_cmd.extend([
                "-vf", "scale=iw:ih,format=rgba,split[s0][s1];[s0]lutrgb=r=0:g=0:b=0:a=0[transparent];[transparent][s1]overlay",
            ])
        if format == "mp4":
            if mp4_encoder == "h264_nvenc (H.264 / NVIDIA GPU)":
                mp4_encoder = "h264_nvenc"
                ffmpeg_cmd.extend([
-                "-filter_complex", "[0:v]scale=iw:ih,format=rgba,split[s0][s1];[s0]lutrgb=r=0:g=0:b=0:a=0[transparent];[transparent][s1]overlay",
+                    "-c:v", mp4_encoder,
-                "-crf", str(crf),
+                    # "-preset", "p" + preset,  # NVENC uses different preset names
-                "-c:v", "libx264",
+                    "-cq", str(crf),  # NVENC uses -cq instead of -crf
                "-pix_fmt", "yuv420p",
                ])
-            comment = "MP4 format: Widely compatible, efficient compression, no transparency support."
+            if mp4_encoder == "hevc_nvenc (H.265 / NVIDIA GPU)":
                mp4_encoder = "hevc_nvenc"
                ffmpeg_cmd.extend([
                    "-c:v", mp4_encoder,
                    # "-preset", "p" + preset,  # NVENC uses different preset names
                    "-cq", str(crf),  # NVENC uses -cq instead of -crf
                ])
            elif mp4_encoder == "libx264":
                ffmpeg_cmd.extend([
                    "-c:v", mp4_encoder,
                    # "-preset", preset,
                    "-crf", str(crf),
                ])
            elif mp4_encoder == "libx265":
                ffmpeg_cmd.extend([
                    "-c:v", mp4_encoder,
                    # "-preset", preset,
                    "-crf", str(crf),
                    "-tag:v", "hvc1",  # For better compatibility
                ])
            ffmpeg_cmd.extend(["-pix_fmt", "yuv420p"]) #No transparency
            comment = """MP4 format : Widely compatible, efficient compression, No transparency support.
 H.264: Fast encoding, widely compatible, larger file sizes for the same quality.
 H.265: More efficient compression, smaller file sizes, better for high-resolution video, slower encoding, BUT less universal support."""
        elif format == "webm":
            if webm_encoder == "libvpx-vp9":
                # cpu_used = preset_to_cpu_used.get(preset, 3)  # Default to 3 if preset not found
                ffmpeg_cmd.extend([
-                "-filter_complex", "[0:v]scale=iw:ih,format=rgba,split[s0][s1];[s0]lutrgb=r=0:g=0:b=0:a=0[transparent];[transparent][s1]overlay",
+                    "-c:v", webm_encoder,
-                "-c:v", "libvpx-vp9",
+                    # "-cpu-used", str(cpu_used),
-                "-pix_fmt", "yuva420p",
+                    "-deadline", "realtime",
                "-b:v", "0",
                    "-crf", str(crf),
-                "-auto-alt-ref", "0",
+                    "-b:v", "0",
                    "-pix_fmt", "yuva420p", #Transparency
                ])
-            comment = "WebM format: Supports transparency, open format, smaller file size, but less compatible than MP4."
+            elif webm_encoder == "libaom-av1 (VERY SLOW)":
                # cpu_used = preset_to_cpu_used.get(preset, 3)  # Default to 3 if preset not found
                webm_encoder = "libaom-av1"
                ffmpeg_cmd.extend([
                    "-c:v", webm_encoder,
                    # "-cpu-used", str(cpu_used),
                    "-deadline", "realtime",
                    "-crf", str(crf),
                    "-b:v", "0",
                    "-pix_fmt", "yuva420p", #Transparency
                ])
            comment = """WebM format: Supports transparency, open format, smaller file size, but less compatible than MP4."""
        if temp_audio_file:
-            ffmpeg_cmd.extend(["-c:a", "libvorbis", "-shortest"])
+            ffmpeg_cmd.extend(["-c:a", "libvorbis" if format == "webm" else "aac", "-shortest"])
        ffmpeg_cmd.append(output_file)