From b32e6d738cd60b113f8657c4070ebe441a194830 Mon Sep 17 00:00:00 2001
From: justumen <anthony.eriksen@gmail.com>
Date: Wed, 31 Jul 2024 11:58:55 +0200
Subject: [PATCH] Upgrades for images to video

---
 README.md          |  2 +-
 images_to_video.py | 90 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 69 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index d582d49..1c36d7f 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 - **v0.2 Ollama**: Improve ollama node with system prompt + model selection.
 - **v0.3 Save Image to Folder**: Add a new node : Save image to a chosen folder.
 - **v0.3 Save Images**: Add comfyui Metadata / workflow to all my image-related nodes.
-- **v0.3 Images to video**: Support transparency with webm format. As well as an audio stream.
+- **v0.4 Images to video**: Support transparency option with webm format, options encoders. As well as input for audio stream. 
 
 # 📝 Nodes descriptions
 
diff --git a/images_to_video.py b/images_to_video.py
index 2645b63..901f45f 100644
--- a/images_to_video.py
+++ b/images_to_video.py
@@ -14,8 +14,12 @@ class imagesToVideo:
                 "images": ("IMAGE",),
                 "fps": ("INT", {"default": 24, "min": 1, "max": 60}),
                 "name_prefix": ("STRING", {"default": "output/imgs2video/me"}),
-                "format": (["mp4", "webm"],),
+                "format": (["mp4", "webm"], {"default": "mp4"}),
+                "mp4_encoder": (["libx264 (H.264)", "h264_nvenc (H.264 / NVIDIA GPU)", "libx265 (H.265)", "hevc_nvenc (H.265 / NVIDIA GPU)"], {"default": "h264_nvenc (H.264 / NVIDIA GPU)"}),
+                "webm_encoder": (["libvpx-vp9", "libaom-av1 (VERY SLOW)"], {"default": "libvpx-vp9"}),
                 "crf": ("INT", {"default": 19, "min": 0, "max": 63}),
+                "force_transparency": ("BOOLEAN", {"default": False}),
+                # "preset": (["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"], {"default": "medium"}),
             },
             "optional": {
                 "audio": ("AUDIO",),
@@ -25,10 +29,10 @@ class imagesToVideo:
     RETURN_TYPES = ("STRING",)
     RETURN_NAMES = ("comment",)
     FUNCTION = "image_to_video"
-    OUTPUT_NODE = True
+    OUTPUT_NODE = False
     CATEGORY = "Bjornulf"
-
-    def image_to_video(self, images, fps, name_prefix, format, crf, audio=None):
+    
+    def image_to_video(self, images, fps, name_prefix, format, crf, force_transparency, mp4_encoder, webm_encoder, audio=None):
         # Remove any existing extension
         name_prefix = os.path.splitext(name_prefix)[0]
         
@@ -69,7 +73,7 @@ class imagesToVideo:
             sample_rate = audio['sample_rate']
             sf.write(temp_audio_file, waveform, sample_rate)
 
-        # Construct the FFmpeg command based on the selected format
+        # Construct the FFmpeg command based on the selected format and encoder
         ffmpeg_cmd = [
             "ffmpeg",
             "-y",
@@ -80,27 +84,69 @@ class imagesToVideo:
         if temp_audio_file:
             ffmpeg_cmd.extend(["-i", temp_audio_file])
 
+        if force_transparency:
+            ffmpeg_cmd.extend([
+                "-vf", "scale=iw:ih,format=rgba,split[s0][s1];[s0]lutrgb=r=0:g=0:b=0:a=0[transparent];[transparent][s1]overlay",
+            ])
+
         if format == "mp4":
-            ffmpeg_cmd.extend([
-                "-filter_complex", "[0:v]scale=iw:ih,format=rgba,split[s0][s1];[s0]lutrgb=r=0:g=0:b=0:a=0[transparent];[transparent][s1]overlay",
-                "-crf", str(crf),
-                "-c:v", "libx264",
-                "-pix_fmt", "yuv420p",
-            ])
-            comment = "MP4 format: Widely compatible, efficient compression, no transparency support."
+            if mp4_encoder == "h264_nvenc (H.264 / NVIDIA GPU)":
+                mp4_encoder = "h264_nvenc"
+                ffmpeg_cmd.extend([
+                    "-c:v", mp4_encoder,
+                    # "-preset", "p" + preset,  # NVENC uses different preset names
+                    "-cq", str(crf),  # NVENC uses -cq instead of -crf
+                ])
+            if mp4_encoder == "hevc_nvenc (H.265 / NVIDIA GPU)":
+                mp4_encoder = "hevc_nvenc"
+                ffmpeg_cmd.extend([
+                    "-c:v", mp4_encoder,
+                    # "-preset", "p" + preset,  # NVENC uses different preset names
+                    "-cq", str(crf),  # NVENC uses -cq instead of -crf
+                ])
+            elif mp4_encoder == "libx264":
+                ffmpeg_cmd.extend([
+                    "-c:v", mp4_encoder,
+                    # "-preset", preset,
+                    "-crf", str(crf),
+                ])
+            elif mp4_encoder == "libx265":
+                ffmpeg_cmd.extend([
+                    "-c:v", mp4_encoder,
+                    # "-preset", preset,
+                    "-crf", str(crf),
+                    "-tag:v", "hvc1",  # For better compatibility
+                ])
+            ffmpeg_cmd.extend(["-pix_fmt", "yuv420p"]) #No transparency
+            comment = """MP4 format : Widely compatible, efficient compression, No transparency support.
+H.264: Fast encoding, widely compatible, larger file sizes for the same quality.
+H.265: More efficient compression, smaller file sizes, better for high-resolution video, slower encoding, BUT less universal support."""
         elif format == "webm":
-            ffmpeg_cmd.extend([
-                "-filter_complex", "[0:v]scale=iw:ih,format=rgba,split[s0][s1];[s0]lutrgb=r=0:g=0:b=0:a=0[transparent];[transparent][s1]overlay",
-                "-c:v", "libvpx-vp9",
-                "-pix_fmt", "yuva420p",
-                "-b:v", "0",
-                "-crf", str(crf),
-                "-auto-alt-ref", "0",
-            ])
-            comment = "WebM format: Supports transparency, open format, smaller file size, but less compatible than MP4."
+            if webm_encoder == "libvpx-vp9":
+                # cpu_used = preset_to_cpu_used.get(preset, 3)  # Default to 3 if preset not found
+                ffmpeg_cmd.extend([
+                    "-c:v", webm_encoder,
+                    # "-cpu-used", str(cpu_used),
+                    "-deadline", "realtime",
+                    "-crf", str(crf),
+                    "-b:v", "0",
+                    "-pix_fmt", "yuva420p", #Transparency
+                ])
+            elif webm_encoder == "libaom-av1 (VERY SLOW)":
+                # cpu_used = preset_to_cpu_used.get(preset, 3)  # Default to 3 if preset not found
+                webm_encoder = "libaom-av1"
+                ffmpeg_cmd.extend([
+                    "-c:v", webm_encoder,
+                    # "-cpu-used", str(cpu_used),
+                    "-deadline", "realtime",
+                    "-crf", str(crf),
+                    "-b:v", "0",
+                    "-pix_fmt", "yuva420p", #Transparency
+                ])
+            comment = """WebM format: Supports transparency, open format, smaller file size, but less compatible than MP4."""
 
         if temp_audio_file:
-            ffmpeg_cmd.extend(["-c:a", "libvorbis", "-shortest"])
+            ffmpeg_cmd.extend(["-c:a", "libvorbis" if format == "webm" else "aac", "-shortest"])
 
         ffmpeg_cmd.append(output_file)