Compare commits

...

8 Commits

Author SHA1 Message Date
Will Miao
1f4edbeb9d chore(release): bump version to v1.1.1 2026-06-14 23:49:44 +08:00
Will Miao
a256558a0e fix(downloads): delete history entries on retry and add dedup for bug #980
- retry_from_history() and retry_all_failed() now DELETE the original
  history entry after re-queuing it. Previously the old entry stayed
  in history causing exponential growth on repeated retry→cancel→retry
  cycles.
- Add deduplicate() called once on singleton creation to clean up
  existing duplicate queue/history entries left by the bug:
  1. In-status dedup (keep highest id per model+version+status)
  2. Cross-status dedup (prefer completed > failed > canceled)
  3. Queue dedup (keep highest rowid per model+version)
  4. Orphan queue cleanup (source='retry' entries obsoleted by
     terminal history entries)
2026-06-14 22:52:44 +08:00
Will Miao
818b9113f0 fix(preview): add Cache-Control header to FileResponse for browser caching (#975)
Chrome does not cache 206 Partial Content responses for <video> elements
without an explicit Cache-Control header. When VirtualScroller recycles
cards and creates new <video> elements with the same URL, Chrome
re-downloads the full video (several MB each) instead of using the cache.

Verified via Chrome DevTools: same .mp4 URL appears 2-3 times in network
trace as separate requests with no cache hit, each returning 206. With
Cache-Control: max-age=86400, the browser will reuse the cached response
for 24 hours across scroll cycles.

Video preview files are ~3.5MB while image previews are ~50-100KB (due
to WebP optimization), making caching especially impactful for videos.
2026-06-14 17:36:59 +08:00
Will Miao
6a4fd020dc fix(api): return JSON error responses for all /api/* routes — prevent JSON.parse crashes on 404/500 2026-06-14 13:13:01 +08:00
Will Miao
7a23040452 fix(save-image): sanitize invalid filename chars from %pprompt%, %nprompt%, %model% patterns (#978) 2026-06-14 09:33:12 +08:00
Will Miao
138024aefe fix(preview): revert to FileResponse as default for all platforms (#975)
The previous commit (a19ddc14) restored Linux sendfile but kept the
manual streaming path for Windows via sys.platform guard. A Windows
user reports performance is still worse than v1.0.5.

Switch back to web.FileResponse for all files on all platforms as the
default. The IOCP crash is an edge case (fast scrolling through many
video previews) that affects few users, while the Python chunked I/O
performance penalty affects everyone.

_stream_file() is kept as an unused fallback for a future compat
setting toggle.
2026-06-13 21:43:44 +08:00
Will Miao
a19ddc14f6 perf(preview): restore Linux sendfile, add cache headers, increase chunk size (#975)
- Restrict manual video streaming to Windows only (sys.platform == 'win32');
  Linux/macOS now uses kernel sendfile (zero-copy DMA) via aiohttp FileResponse
- Add Cache-Control: public, max-age=86400 to streaming responses so browsers
  cache video previews across scroll cycles
- Increase chunk size from 256KB to 1MB to reduce async iteration overhead on
  Windows where streaming is still required
2026-06-13 20:06:58 +08:00
Will Miao
7001ced694 fix(rate-limit): respect server retry_after instead of capping at 30s 2026-06-13 18:01:13 +08:00
8 changed files with 245 additions and 16 deletions

View File

@@ -33,6 +33,7 @@ from .utils.example_images_migration import ExampleImagesMigration
from .services.websocket_manager import ws_manager
from .services.example_images_cleanup_service import ExampleImagesCleanupService
from .middleware.csp_middleware import relax_csp_for_remote_media
from .middleware.error_middleware import api_json_error
logger = logging.getLogger(__name__)
@@ -76,6 +77,11 @@ class LoraManager:
"""Initialize and register all routes using the new refactored architecture"""
app = PromptServer.instance.app
# Register JSON error middleware for /api/* routes as the outermost
# middleware so it catches errors from all other middlewares.
if api_json_error not in app.middlewares:
app.middlewares.insert(0, api_json_error)
if relax_csp_for_remote_media not in app.middlewares:
# Ensure CSP relaxer executes after ComfyUI's block_external_middleware so it can
# see and extend the restrictive header instead of being overwritten by it.

View File

@@ -0,0 +1,71 @@
"""JSON error middleware for API routes.
Ensures all responses to /api/* requests return valid JSON that the
browser-extension frontend can JSON.parse() without crashing, even when
the route does not exist (404) or the handler raises an exception (500).
Extension consumers call response.json() unconditionally — an HTML error
page causes ``SyntaxError: unexpected end of data`` that leaks into the
popup UI as a toast notification.
"""
from __future__ import annotations
import logging
from typing import Awaitable, Callable
from aiohttp import web
logger = logging.getLogger(__name__)
@web.middleware
async def api_json_error(
request: web.Request,
handler: Callable[[web.Request], Awaitable[web.Response]],
) -> web.Response:
"""Return JSON ``{"success": false, "error": "..."}`` for API errors.
Only intercepts paths starting with ``/api/`` — all other routes
(frontend pages, static files, WebSocket upgrades) pass through
unchanged.
"""
if not request.path.startswith("/api/"):
return await handler(request)
try:
response = await handler(request)
return response
except web.HTTPException as exc:
# Let redirects (301, 302, 307, 308) propagate — they are not errors.
if exc.status < 400:
raise
logger.warning(
"API %s %s returned HTTP %d: %s",
request.method,
request.path,
exc.status,
exc.reason,
)
return web.json_response(
{"success": False, "error": f"{exc.status}: {exc.reason}"},
status=exc.status,
)
except Exception as exc:
logger.error(
"API %s %s raised unhandled exception: %s",
request.method,
request.path,
exc,
exc_info=True,
)
return web.json_response(
{
"success": False,
"error": f"500: Internal Server Error ({type(exc).__name__})",
},
status=500,
)

View File

@@ -11,7 +11,7 @@ from ..metadata_collector.metadata_processor import MetadataProcessor
from ..metadata_collector import get_metadata
from ..utils.constants import CARD_PREVIEW_WIDTH
from ..utils.exif_utils import ExifUtils
from ..utils.utils import calculate_recipe_fingerprint
from ..utils.utils import calculate_recipe_fingerprint, sanitize_folder_name
from PIL import Image, PngImagePlugin
import piexif
import logging
@@ -309,12 +309,14 @@ class SaveImageLM:
filename = filename.replace(segment, str(h))
elif key == "pprompt" and "prompt" in metadata_dict:
prompt = metadata_dict.get("prompt", "").replace("\n", " ")
prompt = sanitize_folder_name(prompt)
if len(parts) >= 2:
length = int(parts[1])
prompt = prompt[:length]
filename = filename.replace(segment, prompt.strip())
elif key == "nprompt" and "negative_prompt" in metadata_dict:
prompt = metadata_dict.get("negative_prompt", "").replace("\n", " ")
prompt = sanitize_folder_name(prompt)
if len(parts) >= 2:
length = int(parts[1])
prompt = prompt[:length]
@@ -328,6 +330,7 @@ class SaveImageLM:
model = "model_unavailable"
else:
model = os.path.splitext(os.path.basename(model_value))[0]
model = sanitize_folder_name(model)
if len(parts) >= 2:
length = int(parts[1])
model = model[:length]

View File

@@ -13,7 +13,7 @@ from ...config import config as global_config
logger = logging.getLogger(__name__)
_CHUNK_SIZE = 256 * 1024 # 256 KB
_CHUNK_SIZE = 1024 * 1024 # 1 MB — balance between streaming iteration overhead and per-chunk memory
# Video file extensions that bypass native sendfile on Windows
# to avoid IOCP/ProactorEventLoop crashes during client disconnect.
@@ -55,16 +55,19 @@ class PreviewHandler:
logger.debug("Preview file not found at %s", str(resolved))
raise web.HTTPNotFound(text="Preview file not found")
# Video files: stream manually to avoid Windows native sendfile crash.
# aiohttp's FileResponse uses _sendfile_native on Windows (IOCP-based),
# which breaks when the client disconnects mid-transfer — this happens
# constantly when users scroll through a gallery of animated previews.
suffix = resolved.suffix.lower()
if suffix in _VIDEO_EXTENSIONS:
return await self._stream_file(request, resolved)
# aiohttp's FileResponse handles range requests and content headers for us.
return web.FileResponse(path=resolved, chunk_size=_CHUNK_SIZE)
# aiohttp's FileResponse handles range requests, content headers, and
# uses kernel sendfile (zero-copy DMA) on Linux/macOS. On Windows it
# uses IOCP-based _sendfile_native which can crash when the client
# disconnects mid-transfer during fast scrolling. The _stream_file()
# fallback is kept for a future compat toggle.
#
# Set explicit Cache-Control so the browser can cache video (and image)
# previews across VirtualScroller recycling cycles. Without this,
# Chrome does not cache 206 Partial Content responses for <video>
# elements, causing the same video to be re-downloaded on every scroll.
resp = web.FileResponse(path=resolved, chunk_size=_CHUNK_SIZE)
resp.headers["Cache-Control"] = "public, max-age=86400"
return resp
async def _stream_file(
self, request: web.Request, path: Path
@@ -83,6 +86,10 @@ class PreviewHandler:
resp.content_type = content_type
resp.content_length = file_size
# Allow browser caching: video previews rarely change during a session.
# The frontend already appends ?t={version} to bust cache on update.
resp.headers["Cache-Control"] = "public, max-age=86400"
await resp.prepare(request)
try:

View File

@@ -82,6 +82,7 @@ class DownloadQueueService:
async with cls._class_lock:
if cls._instance is None:
cls._instance = cls()
await cls._instance.deduplicate()
return cls._instance
def __init__(self, db_path: Optional[str] = None) -> None:
@@ -608,7 +609,9 @@ class DownloadQueueService:
Looks up the history record by its primary key. If the status is
``failed`` or ``canceled`` a new queue entry is created with the
same model metadata and a fresh download id.
same model metadata and a fresh download id, and the original
history entry is **deleted** to prevent exponential growth when
the retried item is later canceled or fails again and re-retried.
"""
async with self._lock:
conn = self._get_conn()
@@ -645,6 +648,10 @@ class DownloadQueueService:
now,
),
)
conn.execute(
"DELETE FROM download_history WHERE id = ?",
(item_id,),
)
conn.commit()
queued = conn.execute(
"SELECT * FROM download_queue WHERE download_id = ?",
@@ -656,6 +663,9 @@ class DownloadQueueService:
async def retry_all_failed(self) -> int:
"""Re-queue all failed and canceled downloads from history.
Each history entry is **deleted** after being re-queued so that
repeated retry-all calls do not cause exponential growth.
Returns the number of items that were re-queued.
"""
async with self._lock:
@@ -691,6 +701,10 @@ class DownloadQueueService:
now,
),
)
conn.execute(
"DELETE FROM download_history WHERE id = ?",
(row["id"],),
)
count += 1
conn.commit()
@@ -732,3 +746,126 @@ class DownloadQueueService:
"failed": history_stats.get("failed", 0),
"canceled": history_stats.get("canceled", 0),
}
# ------------------------------------------------------------------
# Deduplication (one-time cleanup for bug #980)
# ------------------------------------------------------------------
async def deduplicate(self) -> dict[str, int]:
"""Remove duplicate entries caused by the retry-amplification bug.
The bug (issue #980) caused the same download to appear N times in
both the queue and history tables when ``retry_all_failed`` was
called repeatedly without deleting the original history rows.
This method is called **once** when the singleton is first created.
It is idempotent — after the first run there will be no duplicates
to remove, so subsequent calls are a no-op.
Returns a dict with the count of removed rows per table.
"""
result: dict[str, int] = {
"removed_history": 0,
"removed_queue": 0,
"removed_orphan_queue": 0,
}
async with self._lock:
conn = self._get_conn()
# 1. History: for each (model_id, model_version_id, status) triplet
# keep only the row with the highest id (most recently inserted).
conn.execute("""
DELETE FROM download_history
WHERE id NOT IN (
SELECT MAX(id)
FROM download_history
GROUP BY model_id, model_version_id, status
)
""")
result["removed_history"] = conn.execute(
"SELECT changes()"
).fetchone()[0]
# 2. Cross-status dedup: for each (model_id, model_version_id),
# keep only the entry with the highest-priority terminal status.
# Priority: completed (3) > failed (2) > canceled (1).
# This prevents the same model version from having both a
# 'failed' and a 'canceled' entry (or a 'completed' alongside
# either) after the bug-created duplicates are removed.
conn.execute("""
DELETE FROM download_history
WHERE id NOT IN (
SELECT dh.id
FROM download_history dh
INNER JOIN (
SELECT model_id, model_version_id,
MAX(CASE status
WHEN 'completed' THEN 3
WHEN 'failed' THEN 2
WHEN 'canceled' THEN 1
ELSE 0
END) AS best_prio
FROM download_history
GROUP BY model_id, model_version_id
) best
ON dh.model_id = best.model_id
AND dh.model_version_id = best.model_version_id
AND CASE dh.status
WHEN 'completed' THEN 3
WHEN 'failed' THEN 2
WHEN 'canceled' THEN 1
ELSE 0
END = best.best_prio
GROUP BY dh.model_id, dh.model_version_id
HAVING dh.id = MAX(dh.id)
)
""")
result["removed_history"] += conn.execute(
"SELECT changes()"
).fetchone()[0]
# 3. Queue: for each (model_id, model_version_id) keep only the
# row with the latest added_at (most recently enqueued).
conn.execute("""
DELETE FROM download_queue
WHERE rowid NOT IN (
SELECT MAX(rowid)
FROM download_queue
WHERE status IN ('queued', 'downloading', 'paused', 'waiting')
GROUP BY model_id, model_version_id
)
AND status IN ('queued', 'downloading', 'paused', 'waiting')
""")
result["removed_queue"] = conn.execute(
"SELECT changes()"
).fetchone()[0]
# 4. Remove orphaned queue entries — items that were re-queued
# (source='retry') but whose model version already has a
# terminal history entry. These are artifacts of the buggy
# retry cycle that were never cleaned up.
conn.execute("""
DELETE FROM download_queue
WHERE source = 'retry'
AND (model_id, model_version_id) IN (
SELECT model_id, model_version_id
FROM download_history
WHERE status IN ('failed', 'canceled')
)
AND status IN ('queued', 'waiting')
""")
result["removed_orphan_queue"] = conn.execute(
"SELECT changes()"
).fetchone()[0]
conn.commit()
logger.info(
"Deduplicate: removed %s history rows, %s queue rows, "
"%s orphaned queue rows",
result["removed_history"],
result["removed_queue"],
result["removed_orphan_queue"],
)
return result

View File

@@ -81,7 +81,11 @@ class _RateLimitRetryHelper:
def _calculate_delay(self, retry_after: Optional[float], attempt: int) -> float:
if retry_after is not None:
return min(self._max_delay, max(0.0, retry_after))
# Cap at 1800s (30 min) as a safety ceiling. The old 30s cap was
# too low — CivArchive can return retry_after ~1500s, causing all
# retries to fail. A generous ceiling protects against pathological
# server values while still respecting the server's guidance.
return min(1800.0, max(0.0, retry_after))
base_delay = self._base_delay * (2 ** max(0, attempt - 1))
jitter_span = base_delay * self._jitter_ratio

View File

@@ -1,7 +1,7 @@
[project]
name = "comfyui-lora-manager"
description = "Revolutionize your workflow with the ultimate LoRA companion for ComfyUI!"
version = "1.1.0"
version = "1.1.1"
license = {file = "LICENSE"}
dependencies = [
"aiohttp",

View File

@@ -2,6 +2,7 @@ import os
import sys
import json
from py.middleware.cache_middleware import cache_control
from py.middleware.error_middleware import api_json_error
from py.utils.settings_paths import ensure_settings_file
# Set environment variable to indicate standalone mode
@@ -157,7 +158,7 @@ class StandaloneServer:
def __init__(self):
self.app = web.Application(
logger=logger,
middlewares=[cache_control],
middlewares=[api_json_error, cache_control],
client_max_size=256 * 1024 * 1024,
handler_args={
"max_field_size": HEADER_SIZE_LIMIT,