diff --git a/README.md b/README.md index 90f3461..863cf2c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# πŸ”— Comfyui : Bjornulf_custom_nodes v0.66 πŸ”— +# πŸ”— Comfyui : Bjornulf_custom_nodes v0.67 πŸ”— -A list of 119 custom nodes for Comfyui : Display, manipulate, create and edit text, images, videos, loras, generate characters and more. +A list of 120 custom nodes for Comfyui : Display, manipulate, create and edit text, images, videos, loras, generate characters and more. You can manage looping operations, generate randomized content, trigger logical conditions, pause and manually control your workflows and even work with external AI tools, like Ollama or Text To Speech. # Coffee : β˜•β˜•β˜•β˜•β˜• 5/5 @@ -192,6 +192,7 @@ Support me and my work : ❀️❀️❀️ ❀️ `59.` [πŸ“ΉπŸ”Š Combine Video + Audio](#59----combine-video--audio) `66.` [πŸ”ŠβžœπŸ“ STT - Speech to Text](#66----stt---speech-to-text) `118.` [πŸ”Š TTS Configuration βš™](#118----tts-configuration-) +`120.` [πŸ“βžœπŸ”Š Kokoro - Text to Speech](#120) ## πŸ’» System πŸ’» `34.` [🧹 Free VRAM hack](#34----free-vram-hack) @@ -360,6 +361,7 @@ cd /where/you/installed/ComfyUI && python main.py - **0.64**: remove "import wget", added some keywords to text generators. - **0.65**: ❗Breaking changes : Combine Text inputs are now all optional (PLease remake your nodes, sorry.) Add 6 new nodes : any2int, any2float, load text from folder, load text from path, load lora from path. Also upgraded the Save text node. - **0.66**: Add lora hunyuan CIVIT ai + download, add TTS configuration node, edit requirements.txt +- **0.67**: Add kokoro TTS node. # πŸ“ Nodes descriptions @@ -1679,4 +1681,12 @@ Take a CivitAI Lora to use with Hunyuan. (NSFW list not on github of course.) The workflow below is included : `workflows/HUNYUAN_basic_lora.json`) : -![hunyuan lora](screenshots/hunyuan_lora.png) \ No newline at end of file +![hunyuan lora](screenshots/hunyuan_lora.png) + +#### 120 - πŸ“βžœπŸ”Š Kokoro - Text to Speech + +**Description:** +Another Text to Speech node based on Kokoro. : https://github.com/thewh1teagle/kokoro-onnx +Lightweight, much simpler, no configuration and fully integrated into Comfyui. (No external backend to run.) + +![tts kokoro](screenshots/kokoro_tts.png) \ No newline at end of file diff --git a/__init__.py b/__init__.py index 22568b0..6e49b95 100644 --- a/__init__.py +++ b/__init__.py @@ -94,10 +94,12 @@ from .loader_lora_with_path import LoaderLoraWithPath from .load_text import LoadTextFromFolder, LoadTextFromPath from .string_splitter import TextSplitin5 from .line_selector import LineSelector +from .text_to_speech_kokoro import KokoroTTS # from .text_generator_t2v import TextGeneratorText2Video NODE_CLASS_MAPPINGS = { "Bjornulf_LineSelector": LineSelector, "Bjornulf_XTTSConfig": XTTSConfig, + "Bjornulf_KokoroTTS": KokoroTTS, # "Bjornulf_TextGeneratorText2Video": TextGeneratorText2Video, "Bjornulf_LatentResolutionSelector": LatentResolutionSelector, "Bjornulf_LoaderLoraWithPath": LoaderLoraWithPath, @@ -223,10 +225,13 @@ NODE_CLASS_MAPPINGS = { } NODE_DISPLAY_NAME_MAPPINGS = { + "Bjornulf_XTTSConfig": "πŸ”Š TTS Configuration βš™", + "Bjornulf_TextToSpeech": "πŸ“βžœπŸ”Š TTS - Text to Speech", # "Bjornulf_HiResFix": "HiResFix", # "Bjornulf_ImageBlend": "🎨 Image Blend", # "Bjornulf_APIHiResCivitAI": "🎨➜🎨 API Image hires fix (CivitAI)", # "Bjornulf_CivitAILoraSelector": "lora Civit", + "Bjornulf_KokoroTTS": "πŸ“βžœπŸ”Š Kokoro - Text to Speech", "Bjornulf_LineSelector": "πŸ“πŸ‘ˆ Line selector (🎲 Or random)", "Bjornulf_LoaderLoraWithPath": "πŸ“₯πŸ‘‘ Load Lora with Path", # "Bjornulf_TextGeneratorText2Video": "πŸ”₯πŸ“πŸ“Ή Text Generator for text to video πŸ“ΉπŸ“πŸ”₯", diff --git a/pyproject.toml b/pyproject.toml index 3b8447a..5251bb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "bjornulf_custom_nodes" -description = "116 ComfyUI nodes : Display, manipulate, and edit text, images, videos, loras, generate characters and more. Manage looping operations, generate randomized content, use logical conditions and work with external AI tools, like Ollama or Text To Speech." -version = "0.66" +description = "120 ComfyUI nodes : Display, manipulate, and edit text, images, videos, loras, generate characters and more. Manage looping operations, generate randomized content, use logical conditions and work with external AI tools, like Ollama or Text To Speech Kokoro, etc..." +version = "0.67" license = {file = "LICENSE"} [project.urls] diff --git a/requirements.txt b/requirements.txt index 08e983d..d3594b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ faster_whisper ffmpeg-python civitai-py fal_client -importlib \ No newline at end of file +sounddevice +kokoro_onnx \ No newline at end of file diff --git a/screenshots/kokoro_tts.png b/screenshots/kokoro_tts.png new file mode 100644 index 0000000..df488a0 Binary files /dev/null and b/screenshots/kokoro_tts.png differ diff --git a/text_to_speech_kokoro.py b/text_to_speech_kokoro.py new file mode 100644 index 0000000..c8e1612 --- /dev/null +++ b/text_to_speech_kokoro.py @@ -0,0 +1,145 @@ +import os +import requests +import random + +VOICE_OPTIONS = { + "af_bella": "Bella (American Female) - af_bella", + "af_nicole": "Nicole (American Female) - af_nicole", + "af_sarah": "Sarah (American Female) - af_sarah", + "af_sky": "Sky (American Female) - af_sky", + "af": "Default (American Female) - af", + "am_adam": "Adam (American Male) - am_adam", + "am_michael": "Michael (American Male) - am_michael", + "bf_emma": "Emma (British Female) - bf_emma", + "bf_isabella": "Isabella (British Female) - bf_isabella", + "bm_george": "George (British Male) - bm_george", + "bm_lewis": "Lewis (British Male) - bm_lewis" +} + +# Create a reversed mapping for display to value +VOICE_DISPLAY_TO_VALUE = {v: k for k, v in VOICE_OPTIONS.items()} + +LANGUAGE_OPTIONS = { + "en-us": "English (US)", + "en-gb": "English (UK)", + "fr-fr": "French", + "ja": "Japanese", + "ko": "Korean", + "cmn": "Chinese (Mandarin)" +} + +def download_if_not_exists(url, dest_path): + """Download a file from a URL if it doesn't already exist.""" + if not os.path.exists(dest_path): + print(f"Downloading {os.path.basename(dest_path)}...") + os.makedirs(os.path.dirname(dest_path), exist_ok=True) + response = requests.get(url, stream=True) + response.raise_for_status() + with open(dest_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + print(f"Downloaded {os.path.basename(dest_path)}") + +class KokoroTTS: + BASE_DIR = "Bjornulf/Kokoro" + MODEL_FILE = os.path.join(BASE_DIR, "kokoro-v0_19.onnx") + VOICES_FILE = os.path.join(BASE_DIR, "voices.bin") + + VOICE_LANGUAGES = { + 'af': 'en-us', 'am': 'en-us', 'bf': 'en-gb', 'bm': 'en-gb' + } + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "text": ("STRING", {"multiline": True}), + "voice": (list(VOICE_OPTIONS.values()), {"default": "Default (American Female) - af"}), + "language": (list(LANGUAGE_OPTIONS.keys()), {"default": "en-us"}), + "speed": ("FLOAT", {"default": 1.0, "min": 0.5, "max": 2.0, "step": 0.1}), + "autoplay": ("BOOLEAN", {"default": True}), + "save_audio": ("BOOLEAN", {"default": True}), + "overwrite": ("BOOLEAN", {"default": False}), + "seed": ("INT", {"default": 0}), + } + } + + RETURN_TYPES = ("AUDIO",) + FUNCTION = "generate_audio" + CATEGORY = "Bjornulf/Kokoro" + + def generate_audio(self, text: str, voice: str, language: str, speed: float, + autoplay: bool, save_audio: bool, + overwrite: bool, seed: int): + random.seed(seed) + + config = { + "model_path": self.MODEL_FILE, + "voices_path": self.VOICES_FILE, + "speed": speed, + "language": language + } + + download_if_not_exists( + "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx", + config["model_path"] + ) + download_if_not_exists( + "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin", + config["voices_path"] + ) + + try: + from kokoro_onnx import Kokoro + import soundfile as sf + import torch + import numpy as np + from pydub import AudioSegment + from pydub.playback import play + + voice_id = VOICE_DISPLAY_TO_VALUE[voice] + kokoro = Kokoro(config["model_path"], config["voices_path"]) + + # Check if file exists and overwrite is False + sanitized_text = ''.join(c if c.isalnum() else '_' for c in text[:50]) + save_path = os.path.join("Bjornulf_TTS_Kokoro", voice_id, f"{sanitized_text}.wav") + full_path = os.path.abspath(save_path) + + if os.path.exists(full_path) and not overwrite: + print(f"File exists: {full_path}. Loading existing audio.") + samples, sample_rate = sf.read(full_path) + if autoplay: + audio_segment = AudioSegment.from_file(full_path) + play(audio_segment) + else: + # Generate new audio + samples, sample_rate = kokoro.create( + text, + voice=voice_id, + speed=config["speed"], + lang=language + ) + + if save_audio: + os.makedirs(os.path.dirname(full_path), exist_ok=True) + sf.write(full_path, samples, sample_rate) + + if autoplay: + try: + audio_segment = AudioSegment( + samples.tobytes(), + frame_rate=sample_rate, + sample_width=samples.dtype.itemsize, + channels=1 + ) + play(audio_segment) + except Exception as e: + print(f"Autoplay error: {e}") + + audio_tensor = torch.from_numpy(samples).unsqueeze(0) + audio_output = {"waveform": audio_tensor.unsqueeze(0), "sample_rate": sample_rate} + return (audio_output,) + + except Exception as e: + print(f"Error in Kokoro TTS: {e}") + return ({"waveform": torch.zeros(1, 1, 1), "sample_rate": 22050},) \ No newline at end of file diff --git a/web/js/text_to_speech_kokoro.js b/web/js/text_to_speech_kokoro.js new file mode 100644 index 0000000..64ce6ac --- /dev/null +++ b/web/js/text_to_speech_kokoro.js @@ -0,0 +1,14 @@ +import { app } from "../../../scripts/app.js"; + +app.registerExtension({ + name: "Bjornulf.KokoroTTS", + async nodeCreated(node) { + if (node.comfyClass === "Bjornulf_KokoroTTS") { + // Set seed widget to hidden input + const seedWidget = node.widgets.find((w) => w.name === "seed"); + if (seedWidget) { + seedWidget.type = "HIDDEN"; + } + } + } +}); \ No newline at end of file