From d4f300645d68f4a4ecf91676f5885008376c1265 Mon Sep 17 00:00:00 2001
From: Will Miao <13051207myq@gmail.com>
Date: Fri, 21 Mar 2025 11:32:51 +0800
Subject: [PATCH] Enhance ExifUtils to extract prompts from node references in
 workflows

- Updated the logic in ExifUtils to first identify KSampler nodes and store references to positive and negative prompt nodes.
- Added a helper function to follow these references and extract text content from CLIP Text Encode nodes.
- Implemented a fallback mechanism to extract prompts using traditional methods if references are not available.
- Improved code readability with additional comments and structured handling of node data.
---
 py/utils/exif_utils.py | 84 +++++++++++++++++++++++++++++++++++-------
 refs/prompt.json       |  1 +
 2 files changed, 72 insertions(+), 13 deletions(-)
 create mode 100644 refs/prompt.json

diff --git a/py/utils/exif_utils.py b/py/utils/exif_utils.py
index 6c11333e..f31ec575 100644
--- a/py/utils/exif_utils.py
+++ b/py/utils/exif_utils.py
@@ -317,7 +317,11 @@ class ExifUtils:
                 "clip_skip": ""
             }
             
-            # Process each node in the workflow to extract parameters
+            # First pass: find the KSampler node to get basic parameters and node references
+            # Store node references to follow for prompts
+            positive_ref = None
+            negative_ref = None
+            
             for node_id, node_data in workflow_data.items():
                 if not isinstance(node_data, dict):
                     continue
@@ -327,7 +331,7 @@ class ExifUtils:
                 if not inputs:
                     continue
                 
-                # KSampler nodes contain most generation parameters
+                # KSampler nodes contain most generation parameters and references to prompt nodes
                 if "KSampler" in node_data.get("class_type", ""):
                     # Extract basic sampling parameters
                     gen_params["steps"] = inputs.get("steps", "")
@@ -336,18 +340,11 @@ class ExifUtils:
                     gen_params["seed"] = inputs.get("seed", "")
                     if isinstance(gen_params["seed"], list) and len(gen_params["seed"]) > 1:
                         gen_params["seed"] = gen_params["seed"][1]  # Use the actual value if it's a list
-                
-                # CLIP Text Encode nodes contain prompts
-                elif "CLIPTextEncode" in node_data.get("class_type", ""):
-                    # Check for negative prompt nodes
-                    title = node_data.get("_meta", {}).get("title", "").lower()
-                    prompt_text = inputs.get("text", "")
                     
-                    if "negative" in title:
-                        gen_params["negative_prompt"] = prompt_text
-                    elif prompt_text and not "negative" in title and gen_params["prompt"] == "":
-                        gen_params["prompt"] = prompt_text
-                
+                    # Get references to positive and negative prompt nodes
+                    positive_ref = inputs.get("positive", "")
+                    negative_ref = inputs.get("negative", "")
+                    
                 # CLIPSetLastLayer contains clip_skip information
                 elif "CLIPSetLastLayer" in node_data.get("class_type", ""):
                     gen_params["clip_skip"] = inputs.get("stop_at_clip_layer", "")
@@ -367,6 +364,67 @@ class ExifUtils:
                     if isinstance(resolution, str) and "x" in resolution:
                         gen_params["size"] = resolution.split(" ")[0]  # Extract just the dimensions
             
+            # Helper function to follow node references and extract text content
+            def get_text_from_node_ref(node_ref, workflow_data):
+                if not node_ref or not isinstance(node_ref, list) or len(node_ref) < 2:
+                    return ""
+                
+                node_id, slot_idx = node_ref
+                
+                # If we can't find the node, return empty string
+                if node_id not in workflow_data:
+                    return ""
+                
+                node = workflow_data[node_id]
+                inputs = node.get("inputs", {})
+                
+                # Direct text input in CLIP Text Encode nodes
+                if "CLIPTextEncode" in node.get("class_type", ""):
+                    text = inputs.get("text", "")
+                    if isinstance(text, str):
+                        return text
+                    elif isinstance(text, list) and len(text) >= 2:
+                        # If text is a reference to another node, follow it
+                        return get_text_from_node_ref(text, workflow_data)
+                
+                # Other nodes might have text input with different field names
+                for field_name, field_value in inputs.items():
+                    if field_name == "text" and isinstance(field_value, str):
+                        return field_value
+                    elif isinstance(field_value, list) and len(field_value) >= 2 and field_name in ["text"]:
+                        # If it's a reference to another node, follow it
+                        return get_text_from_node_ref(field_value, workflow_data)
+                
+                return ""
+            
+            # Extract prompts by following references from KSampler node
+            if positive_ref:
+                gen_params["prompt"] = get_text_from_node_ref(positive_ref, workflow_data)
+            
+            if negative_ref:
+                gen_params["negative_prompt"] = get_text_from_node_ref(negative_ref, workflow_data)
+            
+            # Fallback: if we couldn't extract prompts via references, use the traditional method
+            if not gen_params["prompt"] or not gen_params["negative_prompt"]:
+                for node_id, node_data in workflow_data.items():
+                    if not isinstance(node_data, dict):
+                        continue
+                        
+                    inputs = node_data.get("inputs", {})
+                    if not inputs:
+                        continue
+                    
+                    if "CLIPTextEncode" in node_data.get("class_type", ""):
+                        # Check for negative prompt nodes
+                        title = node_data.get("_meta", {}).get("title", "").lower()
+                        prompt_text = inputs.get("text", "")
+                        
+                        if isinstance(prompt_text, str):
+                            if "negative" in title and not gen_params["negative_prompt"]:
+                                gen_params["negative_prompt"] = prompt_text
+                            elif prompt_text and not "negative" in title and not gen_params["prompt"]:
+                                gen_params["prompt"] = prompt_text
+            
             return gen_params
             
         except Exception as e:
diff --git a/refs/prompt.json b/refs/prompt.json
new file mode 100644
index 00000000..3cddda22
--- /dev/null
+++ b/refs/prompt.json
@@ -0,0 +1 @@
+{"5": {"inputs": {"filename": "%time_%basemodelname_%seed", "path": "IL/%date", "extension": "jpeg", "steps": 20, "cfg": 8.0, "modelname": "", "sampler_name": "euler_ancestral", "scheduler": "karras", "positive": ["37", 0], "negative": "bad quality, worst quality, worst detail, sketch ,signature, watermark, patreon logo, nsfw", "seed_value": ["41", 3], "width": ["39", 1], "height": ["39", 2], "lossless_webp": true, "quality_jpeg_or_webp": 100, "optimize_png": false, "counter": 0, "denoise": 1.0, "clip_skip": 2, "time_format": "%Y-%m-%d-%H%M%S", "save_workflow_as_json": false, "embed_workflow_in_png": false, "images": ["38", 0]}, "class_type": "Image Saver", "_meta": {"title": "Image Saver"}}, "28": {"inputs": {"ckpt_name": "il\\waiNSFWIllustrious_v110.safetensors"}, "class_type": "CheckpointLoaderSimple", "_meta": {"title": "Load Checkpoint"}}, "29": {"inputs": {"stop_at_clip_layer": -2, "clip": ["28", 1]}, "class_type": "CLIPSetLastLayer", "_meta": {"title": "CLIP Set Last Layer"}}, "30": {"inputs": {"text": "worst quality, normal quality, anatomical nonsense, bad anatomy,interlocked fingers, extra fingers,watermark,simple background, loli,", "clip": ["52", 1]}, "class_type": "CLIPTextEncode", "_meta": {"title": "CLIP Text Encode (Prompt)"}}, "32": {"inputs": {"seed": ["41", 3], "steps": 20, "cfg": 8.0, "sampler_name": "euler_ancestral", "scheduler": "karras", "denoise": 1.0, "model": ["52", 0], "positive": ["43", 0], "negative": ["30", 0], "latent_image": ["39", 0]}, "class_type": "KSampler", "_meta": {"title": "KSampler"}}, "33": {"inputs": {"text": "in the style of ck-rw, masterpiece, best quality, good quality, very aesthetic, absurdres, newest, 8K, depth of field, focused subject,dynamic angle, from above, almost side view, wabi sabi, 1girl, holographic, holofoil glitter, faint, glowing, ethereal, neon hair, glowing hair, long hair, looking at viewer, relaxing, sci-fi", "anything": ["37", 0]}, "class_type": "easy showAnything", "_meta": {"title": "trigger applied positive"}}, "34": {"inputs": {"string": "masterpiece, best quality, good quality, very aesthetic, absurdres, newest, 8K, depth of field, focused subject,dynamic angle, from above, almost side view, wabi sabi, 1girl, holographic, holofoil glitter, faint, glowing, ethereal, neon hair, glowing hair, long hair, looking at viewer, relaxing, sci-fi", "strip_newlines": true}, "class_type": "StringConstantMultiline", "_meta": {"title": "positive"}}, "37": {"inputs": {"string1": ["51", 0], "string2": ["44", 0], "delimiter": ", "}, "class_type": "JoinStrings", "_meta": {"title": "Join Strings"}}, "38": {"inputs": {"samples": ["32", 0], "vae": ["28", 2]}, "class_type": "VAEDecode", "_meta": {"title": "VAE Decode"}}, "39": {"inputs": {"resolution": "832x1216 (0.68)", "batch_size": 1, "width_override": 0, "height_override": 0}, "class_type": "SDXLEmptyLatentSizePicker+", "_meta": {"title": "\ud83d\udd27 SDXL Empty Latent Size Picker"}}, "41": {"inputs": {"seed": 739661789497760}, "class_type": "Seed", "_meta": {"title": "Seed"}}, "42": {"inputs": {"images": ["38", 0]}, "class_type": "PreviewImage", "_meta": {"title": "Preview Image"}}, "43": {"inputs": {"text": ["33", 0], "clip": ["52", 1]}, "class_type": "CLIPTextEncode", "_meta": {"title": "CLIP Text Encode (Prompt)"}}, "44": {"inputs": {"file_path": "", "dictionary_name": "[filename]", "label": "TextBatch", "mode": "automatic", "index": 0, "multiline_text": ["34", 0]}, "class_type": "Text Load Line From File", "_meta": {"title": "Text Load Line From File"}, "is_changed": [NaN]}, "51": {"inputs": {"group_mode": false, "toggle_trigger_words": [{"text": "in the style of ck-rw", "active": true}, {"text": "__dummy_item__", "active": false, "_isDummy": true}, {"text": "__dummy_item__", "active": false, "_isDummy": true}], "orinalMessage": "in the style of ck-rw", "trigger_words": ["52", 2]}, "class_type": "TriggerWord Toggle (LoraManager)", "_meta": {"title": "TriggerWord Toggle (LoraManager)"}}, "52": {"inputs": {"text": "<lora:ck-shadow-circuit-IL-000012:0.78> <lora:MoriiMee_Gothic_Niji_Style_Illustrious_r1:0.45> <lora:ck-nc-cyberpunk-IL-000011:0.4> <lora:ck-neon-retrowave-IL-000012:0.8>", "loras": [{"name": "ck-shadow-circuit-IL-000012", "strength": 0.78, "active": false}, {"name": "MoriiMee_Gothic_Niji_Style_Illustrious_r1", "strength": 0.45, "active": false}, {"name": "ck-nc-cyberpunk-IL-000011", "strength": 0.4, "active": false}, {"name": "ck-neon-retrowave-IL-000012", "strength": 0.8, "active": true}, {"name": "__dummy_item1__", "strength": 0, "active": false, "_isDummy": true}, {"name": "__dummy_item2__", "strength": 0, "active": false, "_isDummy": true}], "model": ["28", 0], "clip": ["29", 0]}, "class_type": "Lora Loader (LoraManager)", "_meta": {"title": "Lora Loader (LoraManager)"}}, "53": {"inputs": {"image": "2025-03-12-094411__1085931612515899.jpeg", "upload": "image"}, "class_type": "LoadImage", "_meta": {"title": "Load Image"}, "is_changed": ["1b62c290a107b22e8356d50e27ace831ed6f11cc55c686ac837193c786c03a6f"]}}
\ No newline at end of file