Files
Bjornulf_custom_nodes/text_analyzer.py
justumen 39dfb0220a 0.77
2025-03-19 17:36:25 +01:00

112 lines
3.8 KiB
Python

import re
import subprocess
import sys
# Load the spaCy model once globally to keep it lightweight
def ensure_model(model_name="en_core_web_sm"):
try:
# Try to load the model
import spacy
spacy.load(model_name)
except OSError:
# If the model isn't found, download it
print(f"Model '{model_name}' not found. Downloading now...")
subprocess.check_call([sys.executable, "-m", "spacy", "download", model_name])
print(f"Model '{model_name}' downloaded successfully.")
class TextAnalyzer:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"text": ("STRING", {"multiline": True})
}
}
# Define the output types and their names
RETURN_TYPES = ("INT", "INT", "INT", "STRING", "STRING", "FLOAT", "STRING", "STRING", "STRING", "STRING", "STRING", "STRING")
RETURN_NAMES = ("number_lines", "number_words", "number_characters", "language", "sentiment", "sentiment_polarity", "type", "character", "sentence", "subject", "action", "target")
FUNCTION = "analyze"
CATEGORY = "Bjornulf"
def analyze(self, text):
from langdetect import detect
from textblob import TextBlob
import spacy
ensure_model()
nlp = spacy.load("en_core_web_sm")
# **Statistics**
# Count lines by splitting on newline characters
lines = len(text.split('\n'))
# Count words by splitting on whitespace
words = len(text.split())
# Count total characters including spaces and punctuation
characters = len(text)
# **Dialog or Description Detection**
# Check if the text starts with a name followed by a colon (e.g., "Jessica:")
dialog_match = re.match(r'^([A-Za-z]+):', text)
if dialog_match:
type_ = 'dialog'
character = dialog_match.group(1) # Extract the character name
spoken_text = text[dialog_match.end():].strip() # Text after the colon
else:
type_ = 'description'
character = None
spoken_text = text
# **Language Detection**
try:
language = detect(spoken_text)
except:
language = 'unknown'
# **Sentiment Analysis**
blob = TextBlob(spoken_text)
polarity = blob.sentiment.polarity
if polarity > 0:
sentiment = 'positive'
elif polarity < 0:
sentiment = 'negative'
else:
sentiment = 'neutral'
# **Subject, Action, Target Extraction**
# Only perform NLP if the language is English (for spaCy compatibility)
if language == 'en':
doc = nlp(spoken_text)
action = None
subject = None
target = None
# Look for the main verb (action) and its subject and object
for token in doc:
if token.dep_ == 'ROOT' and token.pos_ == 'VERB':
action = token.text
subject_tokens = [w for w in token.children if w.dep_ == 'nsubj']
target_tokens = [w for w in token.children if w.dep_ == 'dobj']
if subject_tokens:
subject = subject_tokens[0].text
if target_tokens:
target = target_tokens[0].text
break
else:
subject, action, target = None, None, None
# **Return Results**
# Convert None to empty strings for ComfyUI compatibility
return (
lines,
words,
characters,
language,
sentiment,
polarity,
type_,
character or "",
spoken_text or "",
subject or "",
action or "",
target or ""
)