diff --git a/.gitignore b/.gitignore index a12fe06..a0b43b1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,5 @@ SaveText/ API_example/ clear_vram.py web/js/clear_vram.js -*.txt -speakers \ No newline at end of file +speakers +*.text \ No newline at end of file diff --git a/CUSTOM_STRING.py.txt b/CUSTOM_STRING.py.txt deleted file mode 100644 index a643868..0000000 --- a/CUSTOM_STRING.py.txt +++ /dev/null @@ -1,11 +0,0 @@ -class CustomStringType: - @classmethod - def INPUT_TYPES(s): - return {"required": {"value": ("STRING", {"multiline": True})}} - - RETURN_TYPES = ("CUSTOM_STRING",) - FUNCTION = "passthrough" - CATEGORY = "Bjornulf" - - def passthrough(self, value): - return (value,) \ No newline at end of file diff --git a/README.md b/README.md index 2c23058..17bfa13 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# πŸ”— Comfyui : Bjornulf_custom_nodes v0.50 πŸ”— +# πŸ”— Comfyui : Bjornulf_custom_nodes v0.51 πŸ”— -A list of 59 custom nodes for Comfyui : Display, manipulate, and edit text, images, videos, loras and more. +A list of 61 custom nodes for Comfyui : Display, manipulate, and edit text, images, videos, loras and more. You can manage looping operations, generate randomized content, trigger logical conditions, pause and manually control your workflows and even work with external AI tools, like Ollama or Text To Speech. # Coffee : β˜•β˜•β˜•β˜•β˜• 5/5 @@ -82,6 +82,8 @@ You can manage looping operations, generate randomized content, trigger logical `44.` [πŸ–ΌπŸ‘ˆ Select an Image, Pick](#44----select-an-image-pick) `46.` [πŸ–ΌπŸ” Image Details](#46----image-details) `47.` [πŸ–Ό Combine Images](#47----combine-images) +`60.` [πŸ–ΌπŸ–Ό Merge Images/Videos πŸ“ΉπŸ“Ή (Horizontally)](#60) +`61.` [πŸ–ΌπŸ–Ό Merge Images/Videos πŸ“ΉπŸ“Ή (Vertically)](#61) ## πŸš€ Load checkpoints πŸš€ `40.` [🎲 Random (Model+Clip+Vae) - aka Checkpoint / Model](#40----random-modelclipvae---aka-checkpoint--model) @@ -102,6 +104,8 @@ You can manage looping operations, generate randomized content, trigger logical `52.` [πŸ”ŠπŸ“Ή Audio Video Sync](#52----audio-video-sync) `58.` [πŸ“ΉπŸ”— Concat Videos](#58----concat-videos) `59.` [πŸ“ΉπŸ”Š Combine Video + Audio](#59----combine-video--audio) +`60.` [πŸ–ΌπŸ–Ό Merge Images/Videos πŸ“ΉπŸ“Ή (Horizontally)](#60) +`61.` [πŸ–ΌπŸ–Ό Merge Images/Videos πŸ“ΉπŸ“Ή (Vertically)](#61) ## πŸ€– AI πŸ€– `19.` [πŸ¦™ Ollama](#19----ollama) @@ -256,6 +260,7 @@ cd /where/you/installed/ComfyUI && python main.py - **v0.48**: Two new nodes for loras : Random Lora Selector and Loop Lora Selector. - **v0.49**: New node : Loop Sequential (Integer) - Loop through a range of integer values. (But once per workflow run), audio sync is smarter and adapt the video duration to the audio duration. add requirements.txt - **v0.50**: allow audio in Images to Video path (tmp video). Add three new nodes : Concat Videos, combine video/audio and Loop Sequential (input Lines). save text changes to write inside COmfyui folder. Fix random line from input outputing LIST. ❗ Breaking change to audio/video sync node, allowing different types as input. +- **v0.50**: Fix some issues with audio/video sync node. Add two new nodes : merge images/videos vertical and horizontal. # πŸ“ Nodes descriptions @@ -823,6 +828,7 @@ Display the details of an image. (width, height, has_transparency, orientation, **Description:** Combine multiple images (A single image or a list of images.) +If you want to merge several images into a single image, check node 60 or 61. There are two types of logic to "combine images". With "all_in_one" enabled, it will combine all the images into one tensor. Otherwise it will send the images one by one. (check examples below) : @@ -978,3 +984,21 @@ Video : Use list of images or video path. Audio : Use audio path or audio type. ![combine video audio](screenshots/combine_video_audio.png) + +### 60 - πŸ–ΌπŸ–Ό Merge Images/Videos πŸ“ΉπŸ“Ή (Horizontally) + +**Description:** +Merge images or videos horizontally. + +![merge images](screenshots/merge_images_h.png) + +Here is on possible example for videos with node 60 and 61 : + +![merge videos](screenshots/merge_videos.png) + +### 61 - πŸ–ΌπŸ–Ό Merge Images/Videos πŸ“ΉπŸ“Ή (Vertically) + +**Description:** +Merge images or videos vertically. + +![merge images](screenshots/merge_images_v.png) \ No newline at end of file diff --git a/__init__.py b/__init__.py index 193d4a2..d2cd4a7 100644 --- a/__init__.py +++ b/__init__.py @@ -62,9 +62,13 @@ from .loop_sequential_integer import LoopIntegerSequential from .loop_lines_sequential import LoopLinesSequential from .concat_videos import ConcatVideos from .combine_video_audio import CombineVideoAudio +from .images_merger_horizontal import MergeImagesHorizontally +from .images_merger_vertical import MergeImagesVertically NODE_CLASS_MAPPINGS = { "Bjornulf_ollamaLoader": ollamaLoader, + "Bjornulf_MergeImagesHorizontally": MergeImagesHorizontally, + "Bjornulf_MergeImagesVertically": MergeImagesVertically, "Bjornulf_CombineVideoAudio": CombineVideoAudio, "Bjornulf_ConcatVideos": ConcatVideos, "Bjornulf_LoopLinesSequential": LoopLinesSequential, @@ -128,6 +132,8 @@ NODE_CLASS_MAPPINGS = { NODE_DISPLAY_NAME_MAPPINGS = { "Bjornulf_WriteText": "βœ’ Write Text", + "Bjornulf_MergeImagesHorizontally": "πŸ–ΌπŸ–Ό Merge Images/Videos πŸ“ΉπŸ“Ή (Horizontally)", + "Bjornulf_MergeImagesVertically": "πŸ–ΌπŸ–Ό Merge Images/Videos πŸ“ΉπŸ“Ή (Vertically)", "Bjornulf_CombineVideoAudio": "πŸ“ΉπŸ”Š Combine Video + Audio", "Bjornulf_ConcatVideos": "πŸ“ΉπŸ”— Concat Videos", "Bjornulf_LoopLinesSequential": "β™»πŸ“ Loop Sequential (input Lines)", diff --git a/audio_video_sync.py b/audio_video_sync.py index e12a205..79c06d8 100644 --- a/audio_video_sync.py +++ b/audio_video_sync.py @@ -231,12 +231,13 @@ class AudioVideoSync: def process_audio(self, audio_tensor, sample_rate, target_duration, original_duration, max_speedup, max_slowdown): """Process audio to match video duration.""" - if audio_tensor.dim() == 3: - audio_tensor = audio_tensor.squeeze(0) - elif audio_tensor.dim() == 1: + # Ensure audio tensor has correct dimensions + if audio_tensor.dim() == 2: audio_tensor = audio_tensor.unsqueeze(0) + elif audio_tensor.dim() == 1: + audio_tensor = audio_tensor.unsqueeze(0).unsqueeze(0) - current_duration = audio_tensor.shape[1] / sample_rate + current_duration = audio_tensor.shape[-1] / sample_rate # Calculate synchronized video duration if target_duration > original_duration: @@ -256,17 +257,17 @@ class AudioVideoSync: # Adjust audio length if current_duration < sync_duration: silence_samples = int((sync_duration - current_duration) * sample_rate) - silence = torch.zeros(audio_tensor.shape[0], silence_samples) - processed_audio = torch.cat([audio_tensor, silence], dim=1) + silence = torch.zeros(audio_tensor.shape[0], audio_tensor.shape[1], silence_samples) + processed_audio = torch.cat([audio_tensor, silence], dim=-1) else: required_samples = int(sync_duration * sample_rate) - processed_audio = audio_tensor[:, :required_samples] + processed_audio = audio_tensor[..., :required_samples] return processed_audio, sync_duration def save_audio(self, audio_tensor, sample_rate, target_duration, original_duration, max_speedup, max_slowdown): - """Save processed audio to file.""" + """Save processed audio to file and return consistent AUDIO format.""" timestamp = self.generate_timestamp() output_path = os.path.join(self.sync_audio_dir, f"sync_audio_{timestamp}.wav") @@ -275,12 +276,29 @@ class AudioVideoSync: max_speedup, max_slowdown ) - torchaudio.save(output_path, processed_audio, sample_rate) - return os.path.abspath(output_path) + # Save with proper format + torchaudio.save(output_path, processed_audio.squeeze(0), sample_rate) + + # Return consistent AUDIO format + return { + 'waveform': processed_audio, + 'sample_rate': sample_rate + } def load_audio_from_path(self, audio_path): - """Load audio from file path.""" + """Load audio from file path and format it consistently with AUDIO input.""" waveform, sample_rate = torchaudio.load(audio_path) + + # Ensure waveform has 3 dimensions (batch, channels, samples) like AUDIO input + if waveform.dim() == 2: + waveform = waveform.unsqueeze(0) # Add batch dimension + + # Convert to float32 and normalize to range [0, 1] if needed + if waveform.dtype != torch.float32: + waveform = waveform.float() + if waveform.max() > 1.0: + waveform = waveform / 32768.0 # Normalize 16-bit audio + return {'waveform': waveform, 'sample_rate': sample_rate} def extract_frames(self, video_path): @@ -297,7 +315,10 @@ class AudioVideoSync: # Load frames and convert to tensor frames = [] frame_files = sorted(os.listdir(temp_dir)) - transform = transforms.Compose([transforms.ToTensor()]) + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Lambda(lambda x: x * 255) # Scale to 0-255 range + ]) for frame_file in frame_files: image = Image.open(os.path.join(temp_dir, frame_file)) @@ -307,6 +328,13 @@ class AudioVideoSync: # Stack frames into a single tensor frames_tensor = torch.stack(frames) + # Ensure the tensor is in the correct format (B, C, H, W) + if frames_tensor.dim() == 3: + frames_tensor = frames_tensor.unsqueeze(0) + + # Convert to uint8 + frames_tensor = frames_tensor.byte() + # Clean up temporary directory for frame_file in frame_files: os.remove(os.path.join(temp_dir, frame_file)) @@ -350,25 +378,35 @@ class AudioVideoSync: sync_video_path = self.create_sync_video( video_path, original_duration, audio_duration, max_speedup, max_slowdown ) - sync_audio_path = self.save_audio( + + # Process and save audio, getting consistent AUDIO format back + sync_audio = self.save_audio( AUDIO['waveform'], AUDIO['sample_rate'], audio_duration, original_duration, max_speedup, max_slowdown ) + + # Get sync_audio_path separately + sync_audio_path = os.path.join(self.sync_audio_dir, f"sync_audio_{self.generate_timestamp()}.wav") + torchaudio.save(sync_audio_path, sync_audio['waveform'].squeeze(0), sync_audio['sample_rate']) # Get final properties sync_video_duration, _, sync_frame_count = self.get_video_info(sync_video_path) - sync_audio_duration = torchaudio.info(sync_audio_path).num_frames / AUDIO['sample_rate'] + sync_audio_duration = sync_audio['waveform'].shape[-1] / sync_audio['sample_rate'] video_frames = self.extract_frames(sync_video_path) + # Convert video_frames to the format expected by ComfyUI + video_frames = video_frames.float() / 255.0 + video_frames = video_frames.permute(0, 2, 3, 1) + return ( video_frames, - AUDIO, + sync_audio, # Now returns consistent AUDIO format sync_audio_path, sync_video_path, - original_duration, # input_video_duration + original_duration, sync_video_duration, - audio_duration, # input_audio_duration + audio_duration, sync_audio_duration, sync_frame_count ) \ No newline at end of file diff --git a/create_seed_from_text.txt b/create_seed_from_text.txt deleted file mode 100644 index 95fa53c..0000000 --- a/create_seed_from_text.txt +++ /dev/null @@ -1,31 +0,0 @@ -import random -import hashlib - -class TextToStringAndSeed: - @classmethod - def INPUT_TYPES(s): - return { - "required": { - "text": ("STRING", {"forceInput": True}), - }, - } - - RETURN_NAMES = ("text","random_seed") - RETURN_TYPES = ("STRING", "INT") - FUNCTION = "process" - CATEGORY = "utils" - - def process(self, text): - # Generate a hash from the input text - text_hash = hashlib.md5(text.encode()).hexdigest() - - # Use the hash to seed the random number generator - random.seed(text_hash) - - # Generate a random seed (integer) - random_seed = random.randint(0, 2**32 - 1) - - # Reset the random seed to ensure randomness in subsequent calls - random.seed() - - return (text, random_seed) \ No newline at end of file diff --git a/images_merger_horizontal.py b/images_merger_horizontal.py new file mode 100644 index 0000000..0c9f664 --- /dev/null +++ b/images_merger_horizontal.py @@ -0,0 +1,56 @@ +import torch +import numpy as np +from PIL import Image + +class MergeImagesHorizontally: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image1": ("IMAGE",), + "image2": ("IMAGE",), + }, + "optional": { + "image3": ("IMAGE",), + "image4": ("IMAGE",), + } + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "combine_images" + + CATEGORY = "Bjornulf" + + def combine_images(self, image1, image2, image3=None, image4=None): + # Collect all provided images + images = [image1, image2] + if image3 is not None: + images.append(image3) + if image4 is not None: + images.append(image4) + + # Calculate the total width and maximum height + total_width = sum(img.shape[2] for img in images) # Sum of widths + max_height = max(img.shape[1] for img in images) # Maximum height + + # Create a new tensor for the combined image + combined_image = torch.zeros((images[0].shape[0], max_height, total_width, 3), dtype=images[0].dtype, device=images[0].device) + + # Paste images side by side + current_x = 0 + for img in images: + b, h, w, c = img.shape + combined_image[:, :h, current_x:current_x+w, :] = img + + # Blend the edge pixels if it's not the last image + # if current_x + w < total_width: + # combined_image[:, :h, current_x+w-1:current_x+w+1, :] = torch.mean( + # torch.stack([ + # combined_image[:, :h, current_x+w-1:current_x+w, :], + # combined_image[:, :h, current_x+w:current_x+w+1, :] + # ]), dim=0 + # ) + + current_x += w + + return (combined_image,) diff --git a/images_merger_vertical.py b/images_merger_vertical.py new file mode 100644 index 0000000..b3782f1 --- /dev/null +++ b/images_merger_vertical.py @@ -0,0 +1,46 @@ +import torch +import numpy as np +from PIL import Image + +class MergeImagesVertically: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image1": ("IMAGE",), + "image2": ("IMAGE",), + }, + "optional": { + "image3": ("IMAGE",), + "image4": ("IMAGE",), + } + } + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "combine_images" + + CATEGORY = "Bjornulf" + + def combine_images(self, image1, image2, image3=None, image4=None): + # Collect all provided images + images = [image1, image2] + if image3 is not None: + images.append(image3) + if image4 is not None: + images.append(image4) + + # Calculate the total width and maximum height + total_width = sum(img.shape[1] for img in images) + max_height = max(img.shape[2] for img in images) + + # Create a new tensor for the combined image + combined_image = torch.zeros((images[0].shape[0], total_width, max_height, 3), dtype=images[0].dtype, device=images[0].device) + + # Paste images side by side + current_x = 0 + for img in images: + w, h = img.shape[1:3] + combined_image[:, current_x:current_x+w, :h, :] = img[:, :, :, :] + current_x += w + + return (combined_image,) \ No newline at end of file diff --git a/ollama_ip.txt b/ollama_ip.txt new file mode 100644 index 0000000..d8878c2 --- /dev/null +++ b/ollama_ip.txt @@ -0,0 +1 @@ +http://0.0.0.0:11434 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 54330c4..bc3a8ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "bjornulf_custom_nodes" description = "Nodes: Ollama, Text to Speech, Combine Texts, Random Texts, Save image for Bjornulf LobeChat, Text with random Seed, Random line from input, Combine images, Image to grayscale (black & white), Remove image Transparency (alpha), Resize Image, ..." -version = "0.50" +version = "0.51" license = {file = "LICENSE"} [project.urls] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..46ac383 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +ollama +pydub \ No newline at end of file diff --git a/screenshots/merge_images_h.png b/screenshots/merge_images_h.png new file mode 100644 index 0000000..28a23ad Binary files /dev/null and b/screenshots/merge_images_h.png differ diff --git a/screenshots/merge_images_v.png b/screenshots/merge_images_v.png new file mode 100644 index 0000000..1e8d783 Binary files /dev/null and b/screenshots/merge_images_v.png differ diff --git a/screenshots/merge_videos.png b/screenshots/merge_videos.png new file mode 100644 index 0000000..7b52b88 Binary files /dev/null and b/screenshots/merge_videos.png differ diff --git a/web/js/BJORNULF_TYPES.js.txt b/web/js/BJORNULF_TYPES.js.txt deleted file mode 100644 index db6345c..0000000 --- a/web/js/BJORNULF_TYPES.js.txt +++ /dev/null @@ -1,29 +0,0 @@ -import { app } from "../../../scripts/app.js"; - -app.registerExtension({ - name: "Bjornulf.CustomBjornulfType", - async beforeRegisterNodeDef(nodeType, nodeData, app) { - if (nodeData.name === "Bjornulf_WriteImageCharacters") { - const onNodeCreated = nodeType.prototype.onNodeCreated; - nodeType.prototype.onNodeCreated = function () { - onNodeCreated?.apply(this, arguments); - const myInput = this.inputs.find(input => input.name === "BJORNULF_CHARACTER"); - if (myInput) { - myInput.type = "BJORNULF_CHARACTER"; - } - }; - } - else if (nodeData.name === "Bjornulf_WriteImageCharacter") { - - } - }, - async setup(app) { - app.registerCustomNodeType("BJORNULF_CHARACTER", (value) => { - return { - type: "BJORNULF_CHARACTER", - data: { value: value || "" }, - name: "BJORNULF_CHARACTER" - }; - }); - } -}); \ No newline at end of file diff --git a/web/js/CUSTOM_STRING.js.txt b/web/js/CUSTOM_STRING.js.txt deleted file mode 100644 index 5202e8b..0000000 --- a/web/js/CUSTOM_STRING.js.txt +++ /dev/null @@ -1,52 +0,0 @@ -import { app } from "../../../scripts/app.js"; - -app.registerExtension({ - name: "Bjornulf.CustomStringType", - async beforeRegisterNodeDef(nodeType, nodeData, app) { - if (nodeData.name === "Bjornulf_WriteImageAllInOne") { - const onNodeCreated = nodeType.prototype.onNodeCreated; - nodeType.prototype.onNodeCreated = function () { - onNodeCreated?.apply(this, arguments); - const locationInput = this.inputs.find(input => input.name === "location"); - if (locationInput) { - locationInput.type = "CUSTOM_STRING"; - } - }; - } - }, - async setup(app) { - app.registerCustomNodeType("CUSTOM_STRING", (value) => { - return { - type: "CustomStringType", - data: { value: value || "" }, - name: "CustomStringType" - }; - }); - } -}); - - - // Override the default onConnectionCreated method - const originalOnConnectionCreated = LGraphCanvas.prototype.onConnectionCreated; - LGraphCanvas.prototype.onConnectionCreated = function(connection, e, node_for_click) { - if (node_for_click && node_for_click.type === "WriteImageAllInOne" && connection.targetInput.name === "location") { - // Check if the connected node is not already a CustomString - if (connection.origin_node.type !== "CustomString") { - // Create a new CustomString node - const customStringNode = LiteGraph.createNode("CustomString"); - // Position the new node - customStringNode.pos = [connection.origin_node.pos[0] + 200, connection.origin_node.pos[1]]; - this.graph.add(customStringNode); - - // Connect the new CustomString node - connection.origin_node.connect(connection.origin_slot, customStringNode, 0); - customStringNode.connect(0, node_for_click, connection.target_slot); - - // Remove the original connection - connection.origin_node.disconnectOutput(connection.origin_slot, node_for_click); - - return true; // Prevent the original connection - } - } - return originalOnConnectionCreated.apply(this, arguments); - }; \ No newline at end of file