0.15

2026-05-06 08:16:43 -03:00 · 2024-09-09 12:33:34 +02:00
parent aedb53c326
commit 1d3366ca7b
18 changed files with 605 additions and 8 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,5 @@ SaveText/
 API_example/
 clear_vram.py
 web/js/clear_vram.js
-CUSTOM_STRING.py.txt
-web/js/BJORNULF_TYPES.js.txt
-web/js/CUSTOM_STRING.js.txt
-save_api_image.py.txt
+*.txt
+speakers
--- a/README.md
+++ b/README.md
@@ -1,8 +1,9 @@
-# 🔗 Comfyui : Bjornulf_custom_nodes v0.14 🔗
+# 🔗 Comfyui : Bjornulf_custom_nodes v0.15 🔗

 # Dependencies

 - `pip install ollama` (you can also install ollama if you want :  https://ollama.com/download) - You don't need to really install it if you don't want to use my ollama node. (BUT you need to run `pip install ollama`)
+- `pip install pydub` (for TTS node)

 # 📝 Changelog

@@ -23,6 +24,7 @@
 - **v0.12**: Combine images : Add option to move vertically and horizontally. (from -50% to 150%)
 - **v0.13**: Add a new node: Load image with transparency (alpha) - Load an image with transparency.
 - **v0.14**: Add a new node: Cut image from a mask
+- **v0.15**: Add two new nodes: TTS - Text to Speech and Character Description Generator

 # 📝 Nodes descriptions

@@ -241,3 +243,21 @@ The default `Load Image` node will not load the transparency.

 **Description:**  
 Cut an image from a mask.  
+
+## 31 - 🔊 TTS - Text to Speech
+![Cut image](screenshots/tts.png)
+
+**Description:**  
+Use my TTS server to generate speech from text.  
+❗ Of course you need to use my TTS server : <https://github.com/justUmen/Bjornulf_XTTS>  
+After having that installed, you NEED to create a link in my Comfyui custom node folder called `speakers` : `ComfyUI/custom_nodes/Bjornulf_custom_nodes/speakers`  
+That link must must be a link to the folder where you store the voice samples you use for my TTS, like `default.wav`.  
+If my TTS server is running on port 8020 (You can test in browser with the link <http://localhost:8020/tts_stream?language=en&speaker_wav=default&text=Hello>) and voice samples are good, you can use this node to generate speech from text.  
+
+### 32 - 🧑📝 Character Description Generator
+![Cut image](screenshots/characters.png)
+
+**Description:**  
+Generate a character description based on a json file in the folder `characters` : `ComfyUI/custom_nodes/Bjornulf_custom_nodes/characters`  
+Make your own json file with your own characters, and use this node to generate a description.  
+❗ For now it's very basic node, a lot of things are going to be added.  
--- a/init.py
+++ b/init.py
@@ -35,6 +35,8 @@ from .loop_lines import LoopAllLines
 from .random_seed_with_text import TextToStringAndSeed
 from .load_image_alpha import LoadImageWithTransparency
 from .image_mask_cutter import ImageMaskCutter
+from .character_description import CharacterDescriptionGenerator
+from .text_to_speech import TextToSpeech
 # from .check_black_image import CheckBlackImage
 # from .clear_vram import ClearVRAM

@@ -43,6 +45,8 @@ from .image_mask_cutter import ImageMaskCutter
 NODE_CLASS_MAPPINGS = {
    # "Bjornulf_CustomStringType": CustomStringType,
    "Bjornulf_ollamaLoader": ollamaLoader,
+    "Bjornulf_TextToSpeech": TextToSpeech,
+    "Bjornulf_CharacterDescriptionGenerator": CharacterDescriptionGenerator,
    "Bjornulf_ImageMaskCutter": ImageMaskCutter,
    "Bjornulf_LoadImageWithTransparency": LoadImageWithTransparency,
    "Bjornulf_LoopAllLines": LoopAllLines,
@@ -86,6 +90,8 @@ NODE_CLASS_MAPPINGS = {
 NODE_DISPLAY_NAME_MAPPINGS = {
    # "Bjornulf_CustomStringType": "!!! CUSTOM STRING TYPE !!!",
    "Bjornulf_ollamaLoader": "🦙 Ollama (Description)",
+    "Bjornulf_TextToSpeech": "🔊 TTS - Text to Speech",
+    "Bjornulf_CharacterDescriptionGenerator": "🧑📝 Character Description Generator",
    "Bjornulf_ImageMaskCutter": "🖼✂ Cut Image with Mask",
    "Bjornulf_LoadImageWithTransparency": "🖼 Load Image with Transparency ▢",
    "Bjornulf_GreenScreenToTransparency": "🟩➜▢ Green Screen to Transparency",
--- a/character_description.py
+++ b/character_description.py
@@ -0,0 +1,122 @@
+import os
+import json
+
+class CharacterDescriptionGenerator:
+    @classmethod
+    def INPUT_TYPES(s):
+        current_dir = os.path.dirname(os.path.realpath(__file__))
+        characters_folder = os.path.join(current_dir, "characters")
+        
+        if not os.path.exists(characters_folder):
+            print(f"Warning: 'characters' folder not found at {characters_folder}")
+            return {"required": {"character_file": (["No character files found"],)}}
+        
+        json_files = [f for f in os.listdir(characters_folder) if f.endswith('.json')]
+        
+        if not json_files:
+            print(f"Warning: No JSON files found in {characters_folder}")
+            return {"required": {"character_file": (["No character files found"],)}}
+        
+        return {"required": {"character_file": (json_files,)}}
+    
+    RETURN_TYPES = ("STRING", "STRING", "STRING")
+    RETURN_NAMES = ("sentences", "words", "name")
+    FUNCTION = "generate_descriptions"
+    CATEGORY = "Bjornulf"
+
+    def generate_descriptions(self, character_file):
+        current_dir = os.path.dirname(os.path.realpath(__file__))
+        file_path = os.path.join(current_dir, "characters", character_file)
+        
+        if not os.path.exists(file_path):
+            return (f"Error: File {character_file} not found.", "")
+        
+        try:
+            with open(file_path, 'r') as file:
+                data = json.load(file)
+            
+            name = data.get('name', 'Unknown')
+            age = data.get('age', 'Unknown')
+            gender = data.get('gender', 'Unknown').lower()
+            height = data.get('height', 'Unknown')
+            weight = data.get('weight', 'Unknown')
+
+            face = data.get('face', {})
+            eyes = face.get('eyes', {})
+            hair = face.get('hair', {})
+            body_type = face.get('body_type', {})
+
+            # Define pronouns based on gender
+            if gender in ['female', 'f']:
+                pronouns = {
+                    'subject': 'She',
+                    'object': 'her',
+                    'possessive': 'her'
+                }
+            elif gender in ['male', 'm']:
+                pronouns = {
+                    'subject': 'He',
+                    'object': 'him',
+                    'possessive': 'his'
+                }
+            else:
+                pronouns = {
+                    'subject': 'They',
+                    'object': 'them',
+                    'possessive': 'their'
+                }
+
+            # Generate sentences description
+            sentences = f"{name} is a {age}-year-old {gender} standing {height} tall and weighing {weight}. "
+
+            if face:
+                sentences += f"{pronouns['subject']} has an {face.get('shape', 'unknown').lower()} face with a {face.get('complexion', 'unknown').lower()} complexion. "
+                
+                if eyes:
+                    sentences += f"{pronouns['possessive'].capitalize()} {eyes.get('color', 'unknown').lower()} eyes are {eyes.get('shape', 'unknown').lower()} "
+                    sentences += f"with {eyes.get('feature', 'unknown').lower()}. "
+                
+                sentences += f"{pronouns['possessive'].capitalize()} nose is {face.get('nose', 'unknown').lower()}, and {pronouns['possessive']} lips are {face.get('lips', 'unknown').lower()}. "
+                sentences += f"{pronouns['subject']} has {face.get('cheekbones', 'unknown').lower()} cheekbones and a {face.get('jawline', 'unknown').lower()} jawline. "
+
+            if hair:
+                sentences += f"{name}'s {hair.get('color', 'unknown')} hair is {hair.get('length', 'unknown').lower()} and {hair.get('texture', 'unknown').lower()}, "
+                sentences += f"{hair.get('style', 'unknown').lower()}. "
+
+            if body_type:
+                sentences += f"{pronouns['subject']} has a {body_type.get('build', 'unknown').lower()} body type with a {body_type.get('figure', 'unknown').lower()} figure, "
+                sentences += f"{body_type.get('shoulders', 'unknown').lower()} shoulders, a {body_type.get('waist', 'unknown').lower()} waist, "
+                sentences += f"and {body_type.get('hips', 'unknown').lower()} hips."
+
+            
+            # Generate words description
+            words_list = [
+                f"{age} years old",
+                gender,
+                f"{height} tall",
+                f"{weight} weight",
+                face.get('shape', 'unknown').lower() + " face",
+                face.get('complexion', 'unknown').lower() + " complexion",
+                eyes.get('color', 'unknown').lower() + " eyes",
+                eyes.get('shape', 'unknown').lower() + " eyes",
+                eyes.get('feature', 'unknown').lower() + " eyelashes",
+                face.get('nose', 'unknown').lower() + " nose",
+                face.get('lips', 'unknown').lower() + " lips",
+                face.get('cheekbones', 'unknown').lower() + " cheekbones",
+                face.get('jawline', 'unknown').lower() + " jawline",
+                hair.get('color', 'unknown') + " hair",
+                hair.get('length', 'unknown').lower() + " hair",
+                hair.get('texture', 'unknown').lower() + " hair",
+                hair.get('style', 'unknown').lower() + " hairstyle",
+                body_type.get('build', 'unknown').lower() + " build",
+                body_type.get('figure', 'unknown').lower() + " figure",
+                body_type.get('shoulders', 'unknown').lower() + " shoulders",
+                body_type.get('waist', 'unknown').lower() + " waist",
+                body_type.get('hips', 'unknown').lower() + " hips"
+            ]
+            words = ", ".join(words_list)
+            
+            return (sentences, words, character_file.replace('.json', ''))
+        
+        except Exception as e:
+            return (f"Error processing {character_file}: {str(e)}", "")
--- a/characters/Albertine.json
+++ b/characters/Albertine.json
@@ -0,0 +1,34 @@
+{
+  "name": "Albertine",
+  "nationality": "French",
+  "age": 78,
+  "gender": "Female",
+  "height": "158 cm",
+  "weight": "62 kg",
+  "face": {
+    "shape": "Oval",
+    "complexion": "Pale with age spots",
+    "eyes": {
+      "color": "Grey",
+      "shape": "Hooded",
+      "feature": "Soft wrinkles around the eyes"
+    },
+    "nose": "Long and slender",
+    "lips": "Thin with a gentle smile",
+    "cheekbones": "Prominent",
+    "jawline": "Soft and slightly sagging"
+  },
+  "hair": {
+    "color": "Silver grey",
+    "length": "Short",
+    "texture": "Straight",
+    "style": "Neatly styled in a classic bob"
+  },
+  "body_type": {
+    "build": "Slender but frail",
+    "figure": "Rectangle-shaped",
+    "shoulders": "Narrow and slightly stooped",
+    "waist": "Less defined due to age",
+    "hips": "Slim"
+  }
+}
--- a/characters/Amina.json
+++ b/characters/Amina.json
@@ -0,0 +1,34 @@
+{
+  "name": "Amina",
+  "nationality": "Nigerian",
+  "age": 22,
+  "gender": "Female",
+  "height": "160 cm",
+  "weight": "52 kg",
+  "face": {
+    "shape": "Round",
+    "complexion": "Dark with warm undertones",
+    "eyes": {
+      "color": "Brown",
+      "shape": "Almond-shaped",
+      "feature": "Bright and expressive"
+    },
+    "nose": "Wide and flat",
+    "lips": "Full and glossy",
+    "cheekbones": "High and prominent",
+    "jawline": "Softly rounded"
+  },
+  "hair": {
+    "color": "Dark Brown",
+    "length": "Short",
+    "texture": "Coily",
+    "style": "Natural afro or styled in twists"
+  },
+  "body_type": {
+    "build": "Slender and fit",
+    "figure": "Straight",
+    "shoulders": "Narrow",
+    "waist": "Slightly defined",
+    "hips": "Moderate"
+  }
+}
--- a/characters/Chinedu.json
+++ b/characters/Chinedu.json
@@ -0,0 +1,35 @@
+{
+    "name": "Chinedu",
+    "nationality": "Nigerian",
+    "age": 22,
+    "gender": "Male",
+    "height": "175 cm",
+    "weight": "68 kg",
+    "face": {
+      "shape": "Oval",
+      "complexion": "Dark with warm undertones",
+      "eyes": {
+        "color": "Brown",
+        "shape": "Almond-shaped",
+        "feature": "Bright and sharp, with a confident gaze"
+      },
+      "nose": "Wide and slightly rounded",
+      "lips": "Full with a natural matte finish",
+      "cheekbones": "High and subtly defined",
+      "jawline": "Defined with a slight roundness"
+    },
+    "hair": {
+      "color": "Dark Brown",
+      "length": "Short",
+      "texture": "Coily",
+      "style": "Close-cut or short fade, neatly trimmed"
+    },
+    "body_type": {
+      "build": "Athletic and fit",
+      "figure": "Lean",
+      "shoulders": "Broad",
+      "waist": "Moderately defined",
+      "hips": "Narrow"
+    }
+  }
+  
--- a/characters/Jessica.json
+++ b/characters/Jessica.json
@@ -0,0 +1,34 @@
+{
+    "name": "Jessica",
+    "nationality": "Irish",
+    "age": 28,
+    "gender": "Female",
+    "height": "170 cm",
+    "weight": "59 kg",
+    "face": {
+      "shape": "Oval",
+      "complexion": "Fair with a light tan",
+      "eyes": {
+        "color": "Hazel",
+        "shape": "Almond-shaped",
+        "feature": "Long eyelashes"
+      },
+      "nose": "Small and slightly upturned",
+      "lips": "Full and naturally pink",
+      "cheekbones": "High and defined",
+      "jawline": "Softly defined"
+    },
+    "hair": {
+      "color": "Auburn",
+      "length": "Shoulder-length",
+      "texture": "Wavy",
+      "style": "Usually worn in loose curls or a messy bun"
+    },
+    "body_type": {
+      "build": "Athletic and toned",
+      "figure": "Hourglass",
+      "shoulders": "Broad but feminine",
+      "waist": "Narrow",
+      "hips": "Curvy"
+    }
+  }
--- a/characters/Julien.json
+++ b/characters/Julien.json
@@ -0,0 +1,35 @@
+{
+    "name": "Julien",
+    "nationality": "French",
+    "age": 25,
+    "gender": "Male",
+    "height": "175 cm",
+    "weight": "68 kg",
+    "face": {
+      "shape": "Heart",
+      "complexion": "Medium with warm undertones",
+      "eyes": {
+        "color": "Green",
+        "shape": "Round",
+        "feature": "Defined eyebrows"
+      },
+      "nose": "Straight and narrow",
+      "lips": "Slightly full with a well-defined cupid's bow",
+      "cheekbones": "Subtle and rounded",
+      "jawline": "Sharp and angular"
+    },
+    "hair": {
+      "color": "Black",
+      "length": "Short",
+      "texture": "Straight",
+      "style": "Neatly styled, often with a side part or slightly tousled"
+    },
+    "body_type": {
+      "build": "Slim and athletic",
+      "figure": "V-shaped",
+      "shoulders": "Broad",
+      "waist": "Defined",
+      "hips": "Moderately rounded"
+    }
+  }
+  
--- a/characters/Lily.json
+++ b/characters/Lily.json
@@ -0,0 +1,34 @@
+{
+  "name": "Lily",
+  "nationality": "Swedish",
+  "age": 8,
+  "gender": "Female",
+  "height": "120 cm",
+  "weight": "25 kg",
+  "face": {
+    "shape": "Round",
+    "complexion": "Light with freckles",
+    "eyes": {
+      "color": "Blue",
+      "shape": "Big and round",
+      "feature": "Sparkly and curious"
+    },
+    "nose": "Small and button-like",
+    "lips": "Thin and rosy",
+    "cheekbones": "Soft and subtle",
+    "jawline": "Rounded"
+  },
+  "hair": {
+    "color": "Blonde",
+    "length": "Medium-length",
+    "texture": "Straight",
+    "style": "Usually in pigtails or a simple headband"
+  },
+  "body_type": {
+    "build": "Petite",
+    "figure": "Childlike",
+    "shoulders": "Small and narrow",
+    "waist": "Slightly chubby",
+    "hips": "Not yet developed"
+  }
+}
--- a/characters/Lukas.json
+++ b/characters/Lukas.json
@@ -0,0 +1,35 @@
+{
+    "name": "Lukas",
+    "nationality": "Swedish",
+    "age": 8,
+    "gender": "Male",
+    "height": "125 cm",
+    "weight": "27 kg",
+    "face": {
+      "shape": "Round",
+      "complexion": "Light with a few freckles",
+      "eyes": {
+        "color": "Blue",
+        "shape": "Big and round",
+        "feature": "Bright and inquisitive"
+      },
+      "nose": "Small and rounded",
+      "lips": "Thin and slightly pink",
+      "cheekbones": "Soft and subtle",
+      "jawline": "Rounded"
+    },
+    "hair": {
+      "color": "Blonde",
+      "length": "Short",
+      "texture": "Straight",
+      "style": "Neatly trimmed or with a bit of tousle"
+    },
+    "body_type": {
+      "build": "Petite",
+      "figure": "Childlike",
+      "shoulders": "Small and narrow",
+      "waist": "Slightly chubby",
+      "hips": "Not yet developed"
+    }
+  }
+  
--- a/characters/Marcel.json
+++ b/characters/Marcel.json
@@ -0,0 +1,35 @@
+{
+    "name": "Marcel",
+    "nationality": "French",
+    "age": 78,
+    "gender": "Male",
+    "height": "170 cm",
+    "weight": "70 kg",
+    "face": {
+      "shape": "Oval",
+      "complexion": "Pale with age spots",
+      "eyes": {
+        "color": "Grey",
+        "shape": "Hooded",
+        "feature": "Soft wrinkles around the eyes"
+      },
+      "nose": "Long and slightly hooked",
+      "lips": "Thin with a gentle expression",
+      "cheekbones": "Moderately prominent",
+      "jawline": "Soft with slight sagging, more pronounced than the female counterpart"
+    },
+    "hair": {
+      "color": "Silver grey",
+      "length": "Short",
+      "texture": "Straight",
+      "style": "Neatly styled, side-parted with a bit of thinning at the top"
+    },
+    "body_type": {
+      "build": "Lean but slightly frail due to age",
+      "figure": "Rectangle-shaped",
+      "shoulders": "Narrow and slightly stooped",
+      "waist": "Undefined due to age",
+      "hips": "Slim with slightly bowed legs"
+    }
+  }
+  
--- a/characters/Sean.json
+++ b/characters/Sean.json
@@ -0,0 +1,35 @@
+{
+    "name": "Sean",
+    "nationality": "Irish",
+    "age": 28,
+    "gender": "Male",
+    "height": "180 cm",
+    "weight": "78 kg",
+    "face": {
+      "shape": "Oval",
+      "complexion": "Fair with a light tan",
+      "eyes": {
+        "color": "Hazel",
+        "shape": "Almond-shaped",
+        "feature": "Defined eyebrows and a friendly gaze"
+      },
+      "nose": "Straight and slightly prominent",
+      "lips": "Full with a natural pink hue",
+      "cheekbones": "High and chiseled",
+      "jawline": "Strong and defined"
+    },
+    "hair": {
+      "color": "Auburn",
+      "length": "Short",
+      "texture": "Wavy",
+      "style": "Neatly styled with a bit of wave, often kept short and tidy"
+    },
+    "body_type": {
+      "build": "Athletic and toned",
+      "figure": "V-shaped",
+      "shoulders": "Broad and strong",
+      "waist": "Defined",
+      "hips": "Moderate"
+    }
+  }
+  
--- a/characters/Vanessa.json
+++ b/characters/Vanessa.json
@@ -0,0 +1,34 @@
+{
+  "name": "Vanessa",
+  "nationality": "French",
+  "age": 25,
+  "gender": "Female",
+  "height": "165 cm",
+  "weight": "54 kg",
+  "face": {
+    "shape": "Heart",
+    "complexion": "Medium with warm undertones",
+    "eyes": {
+      "color": "Green",
+      "shape": "Round",
+      "feature": "Thick eyebrows"
+    },
+    "nose": "Straight and narrow",
+    "lips": "Slightly thin with a defined cupid's bow",
+    "cheekbones": "Subtle and rounded",
+    "jawline": "Sharp and angular"
+  },
+  "hair": {
+    "color": "Black",
+    "length": "Long",
+    "texture": "Straight",
+    "style": "Often worn in a sleek ponytail or braided"
+  },
+  "body_type": {
+    "build": "Slim and petite",
+    "figure": "Pear-shaped",
+    "shoulders": "Narrow",
+    "waist": "Defined",
+    "hips": "Broader and rounded"
+  }
+}
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "bjornulf_custom_nodes"
-description = "Nodes: Ollama, Green Screen to Transparency, Save image for Bjornulf LobeChat, Text with random Seed, Random line from input, Combine images (Background+Overlay alpha), Image to grayscale (black & white), Remove image Transparency (alpha), Resize Image, ..."
-version = "0.14"
+description = "Nodes: Ollama, Text to Speech, Save image for Bjornulf LobeChat, Text with random Seed, Random line from input, Combine images (Background+Overlay alpha), Image to grayscale (black & white), Remove image Transparency (alpha), Resize Image, ..."
+version = "0.15"
 license = {file = "LICENSE"}

 [project.urls]
--- a/screenshots/characters.png
+++ b/screenshots/characters.png
--- a/screenshots/tts.png
+++ b/screenshots/tts.png
--- a/text_to_speech.py
+++ b/text_to_speech.py
@@ -0,0 +1,106 @@
+import requests
+import numpy as np
+import io
+import torch
+from pydub import AudioSegment
+import urllib.parse
+import os
+
+class TextToSpeech:
+    @classmethod
+    def INPUT_TYPES(cls):
+        # speakers_dir = "speakers"
+        speakers_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "speakers")
+        speaker_options = []
+
+        for root, dirs, files in os.walk(speakers_dir):
+            for file in files:
+                if file.endswith(".wav"):
+                    rel_path = os.path.relpath(os.path.join(root, file), speakers_dir)
+                    speaker_options.append(rel_path)
+
+        # If no .wav files are found, add a default option
+        if not speaker_options:
+            speaker_options.append("No WAV files found")
+
+        return {
+            "required": {
+                "text": ("STRING", {"multiline": True}),
+                "language": (["ar", "cs", "de", "en", "es", "fr", "hi", "hu", "it", "ja", "ko", "nl", "pl", "pt", "ru", "tr", "zh-cn"], {
+                    "default": "en",
+                    "display": "dropdown",
+                    "labels": ["Arabic", "Czech", "German", "English", "Spanish", "French", "Hindi", "Hungarian", "Italian", "Japanese", "Korean", "Dutch", "Polish", "Portuguese", "Russian", "Turkish", "Chinese"]
+                }),
+                "speaker_wav": (speaker_options, {
+                    "default": speaker_options[0],
+                    "display": "dropdown"
+                }),
+            }
+        }
+
+    RETURN_TYPES = ("AUDIO",)
+    FUNCTION = "generate_audio"
+    CATEGORY = "audio"
+
+    def generate_audio(self, text, language, speaker_wav):
+        # Check if a valid speaker_wav was selected
+        if speaker_wav == "No WAV files found":
+            print("Error: No WAV files available for text-to-speech.")
+            return ({"waveform": torch.zeros(1, 1, 1, dtype=torch.float32), "sample_rate": 22050},)
+        encoded_text = urllib.parse.quote(text)  # Encode spaces and special characters
+        url = f"http://localhost:8020/tts_stream?language={language}&speaker_wav={speaker_wav}&text={encoded_text}"
+        try:
+            response = requests.get(url, stream=True)
+            response.raise_for_status()
+
+            audio_data = io.BytesIO()
+            for chunk in response.iter_content(chunk_size=8192):
+                audio_data.write(chunk)
+            
+            audio_data.seek(0)
+            return self.process_audio_data(audio_data)
+
+        except requests.RequestException as e:
+            print(f"Error generating audio: {e}")
+            return ({"waveform": torch.zeros(1, 1, 1, dtype=torch.float32), "sample_rate": 22050},)
+        except Exception as e:
+            print(f"Unexpected error: {e}")
+            return ({"waveform": torch.zeros(1, 1, 1, dtype=torch.float32), "sample_rate": 22050},)
+
+    def process_audio_data(self, audio_data):
+        try:
+            # Load MP3 data
+            audio = AudioSegment.from_mp3(audio_data)
+            
+            # Get audio properties
+            sample_rate = audio.frame_rate
+            num_channels = audio.channels
+            
+            # Convert to numpy array
+            audio_np = np.array(audio.get_array_of_samples()).astype(np.float32)
+            
+            # Normalize to [-1, 1]
+            audio_np /= np.iinfo(np.int16).max
+            
+            print(f"Raw audio data shape: {audio_np.shape}")
+            
+            # Reshape to (num_channels, num_samples)
+            if num_channels == 1:
+                audio_np = audio_np.reshape(1, -1)
+            else:
+                audio_np = audio_np.reshape(-1, num_channels).T
+            
+            # Convert to torch tensor
+            audio_tensor = torch.from_numpy(audio_np)
+            
+            print(f"Final audio tensor shape: {audio_tensor.shape}")
+            print(f"Audio data type: {audio_tensor.dtype}")
+            print(f"Audio data min: {audio_tensor.min()}, max: {audio_tensor.max()}")
+            
+            # Wrap the tensor in a list to match the expected format
+            return ({"waveform": audio_tensor.unsqueeze(0), "sample_rate": sample_rate},)
+    
+        except Exception as e:
+            print(f"Error processing audio data: {e}")
+            raise
+