0.46

2026-03-21 20:52:11 -03:00 · 2024-09-28 17:45:23 +02:00
parent 3d6014206f
commit c1cebdf1de
20 changed files with 561 additions and 79 deletions
--- a/text_to_speech.py
+++ b/text_to_speech.py
@@ -9,53 +9,34 @@ import os
 import sys
 import random
 import re
+from typing import Dict, Any, List, Tuple

 class Everything(str):
    def __ne__(self, __value: object) -> bool:
        return False

 language_map = {
-    "ar": "Arabic",
-    "cs": "Czech",
-    "de": "German",
-    "en": "English",
-    "es": "Spanish",
-    "fr": "French",
-    "hi": "Hindi",
-    "hu": "Hungarian",
-    "it": "Italian",
-    "ja": "Japanese",
-    "ko": "Korean",
-    "nl": "Dutch",
-    "pl": "Polish",
-    "pt": "Portuguese",
-    "ru": "Russian",
-    "tr": "Turkish",
+    "ar": "Arabic", "cs": "Czech", "de": "German", "en": "English",
+    "es": "Spanish", "fr": "French", "hi": "Hindi", "hu": "Hungarian",
+    "it": "Italian", "ja": "Japanese", "ko": "Korean", "nl": "Dutch",
+    "pl": "Polish", "pt": "Portuguese", "ru": "Russian", "tr": "Turkish",
    "zh-cn": "Chinese"
 }

 class TextToSpeech:
-        
    @classmethod
-    def INPUT_TYPES(cls):
+    def INPUT_TYPES(cls) -> Dict[str, Any]:
        speakers_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "speakers")
-        speaker_options = []
-
-        for root, dirs, files in os.walk(speakers_dir):
-            for file in files:
-                if file.endswith(".wav"):
-                    rel_path = os.path.relpath(os.path.join(root, file), speakers_dir)
-                    speaker_options.append(rel_path)
-
-        if not speaker_options:
-            speaker_options.append("No WAV files found")
-            
-        language_options = list(language_map.values())
-
+        speaker_options = [os.path.relpath(os.path.join(root, file), speakers_dir)
+                           for root, _, files in os.walk(speakers_dir)
+                           for file in files if file.endswith(".wav")]
+        
+        speaker_options = speaker_options or ["No WAV files found"]
+        
        return {
            "required": {
                "text": ("STRING", {"multiline": True}),
-                "language": (language_options, {
+                "language": (list(language_map.values()), {
                    "default": language_map["en"],
                    "display": "dropdown"
                }),
@@ -69,44 +50,45 @@ class TextToSpeech:
                "seed": ("INT", {"default": 0}),
            },
            "optional": {
-                "input": (Everything("*"), {"forceInput": True}),
+                "connect_to_workflow": (Everything("*"), {"forceInput": True}),
            }
        }

-    RETURN_TYPES = ("AUDIO",)
+    RETURN_TYPES = ("AUDIO", "STRING", "STRING", "FLOAT")
+    RETURN_NAMES = ("AUDIO", "audio_path", "full_path", "duration")
    FUNCTION = "generate_audio"
    CATEGORY = "Bjornulf"
    
    @staticmethod
-    def get_language_code(language_name):
-        for code, name in language_map.items():
-            if name == language_name:
-                return code
-        return "en"
+    def get_language_code(language_name: str) -> str:
+        return next((code for code, name in language_map.items() if name == language_name), "en")
    
    @staticmethod
-    def sanitize_text(text):
-        sanitized = re.sub(r'[^\w\s-]', '', text).replace(' ', '_')
-        return sanitized[:50]
+    def sanitize_text(text: str) -> str:
+        return re.sub(r'[^\w\s-]', '', text).replace(' ', '_')[:50]
    
-    def generate_audio(self, text, language, autoplay, seed, save_audio, overwrite, speaker_wav, input=None):
+    def generate_audio(self, text: str, language: str, autoplay: bool, seed: int,
+                       save_audio: bool, overwrite: bool, speaker_wav: str,
+                       connect_to_workflow: Any = None) -> Tuple[Dict[str, Any], str, str, float]:
        language_code = self.get_language_code(language)
        sanitized_text = self.sanitize_text(text)

        save_path = os.path.join("Bjornulf_TTS", language, speaker_wav, f"{sanitized_text}.wav")
-        os.makedirs(os.path.dirname(save_path), exist_ok=True)
+        full_path = os.path.abspath(save_path)
+        os.makedirs(os.path.dirname(full_path), exist_ok=True)

-        if os.path.exists(save_path) and not overwrite:
-            print(f"Using existing audio file: {save_path}")
-            audio_data = self.load_audio_file(save_path)
+        if os.path.exists(full_path) and not overwrite:
+            print(f"Using existing audio file: {full_path}")
+            audio_data = self.load_audio_file(full_path)
        else:
            audio_data = self.create_new_audio(text, language_code, speaker_wav, seed)
            if save_audio:
-                self.save_audio_file(audio_data, save_path)
+                self.save_audio_file(audio_data, full_path)

-        return self.process_audio_data(autoplay, audio_data)
+        audio_output, _, duration = self.process_audio_data(autoplay, audio_data, full_path if save_audio else None)
+        return (audio_output, save_path, full_path, duration)

-    def create_new_audio(self, text, language_code, speaker_wav, seed):
+    def create_new_audio(self, text: str, language_code: str, speaker_wav: str, seed: int) -> io.BytesIO:
        random.seed(seed)
        if speaker_wav == "No WAV files found":
            print("Error: No WAV files available for text-to-speech.")
@@ -133,17 +115,17 @@ class TextToSpeech:
            print(f"Unexpected error: {e}")
            return io.BytesIO()

-    def play_audio(self, audio):
+    def play_audio(self, audio: AudioSegment) -> None:
        if sys.platform.startswith('win'):
            try:
                import winsound
-                winsound.PlaySound(audio, winsound.SND_MEMORY)
+                winsound.PlaySound(audio.raw_data, winsound.SND_MEMORY)
            except Exception as e:
                print(f"An error occurred: {e}")
        else:
            play(audio)
            
-    def process_audio_data(self, autoplay, audio_data):
+    def process_audio_data(self, autoplay: bool, audio_data: io.BytesIO, save_path: str) -> Tuple[Dict[str, Any], str, float]:
        try:
            audio = AudioSegment.from_mp3(audio_data)
            sample_rate = audio.frame_rate
@@ -151,23 +133,22 @@ class TextToSpeech:
            audio_np = np.array(audio.get_array_of_samples()).astype(np.float32)
            audio_np /= np.iinfo(np.int16).max
            
-            if num_channels == 1:
-                audio_np = audio_np.reshape(1, -1)
-            else:
-                audio_np = audio_np.reshape(-1, num_channels).T
+            audio_np = audio_np.reshape(-1, num_channels).T if num_channels > 1 else audio_np.reshape(1, -1)
            
            audio_tensor = torch.from_numpy(audio_np)
            
            if autoplay:
                self.play_audio(audio)
            
-            return ({"waveform": audio_tensor.unsqueeze(0), "sample_rate": sample_rate},)
+            duration = len(audio) / 1000.0  # Convert milliseconds to seconds
+            
+            return ({"waveform": audio_tensor.unsqueeze(0), "sample_rate": sample_rate}, save_path or "", duration)
    
        except Exception as e:
            print(f"Error processing audio data: {e}")
-            return ({"waveform": torch.zeros(1, 1, 1, dtype=torch.float32), "sample_rate": 22050},)
+            return ({"waveform": torch.zeros(1, 1, 1, dtype=torch.float32), "sample_rate": 22050}, "", 0.0)

-    def save_audio_file(self, audio_data, save_path):
+    def save_audio_file(self, audio_data: io.BytesIO, save_path: str) -> None:
        try:
            with open(save_path, 'wb') as f:
                f.write(audio_data.getvalue())
@@ -175,11 +156,11 @@ class TextToSpeech:
        except Exception as e:
            print(f"Error saving audio file: {e}")

-    def load_audio_file(self, file_path):
+    def load_audio_file(self, file_path: str) -> io.BytesIO:
        try:
            with open(file_path, 'rb') as f:
                audio_data = io.BytesIO(f.read())
            return audio_data
        except Exception as e:
            print(f"Error loading audio file: {e}")
-            return io.BytesIO()
+            return io.BytesIO()