perf(check-model-exists): eliminate SQLite connection-per-query overhead and skip redundant history checks

Root cause: 231 concurrent /check-model-exists requests on 175K-lora library
caused ~9.4s wall clock time. The bottleneck was two-fold:

1. DownloadedVersionHistoryService opened a new sqlite3.connect() for every
   query under asyncio.Lock. With a large WAL from 175K entries, each
   connect() took ~8ms. Serialized by the lock across 231 requests, the
   230th request waited ~1848ms just for lock acquisition.

2. check_model_exists always queried download history even when the model
   was found locally. The history result (hasBeenDownloaded /
   downloadedVersionIds) is only used by the UI when the model is NOT
   found locally; when found, the 'in library' indicator takes priority.

Changes:
- downloaded_version_history_service.py: added persistent _get_conn() that
  creates the SQLite connection once and reuses it across all queries
- misc_handlers.py: early-return from check_model_exists when the model
  exists locally, bypassing the history service entirely (lock skipped)

Expected: per-request wait time drops from ~1912ms to <3ms, wall clock
from ~9.4s to <0.3s for the 175K-lora user's 231-card page.
This commit is contained in:
Will Miao
2026-05-02 13:31:20 +08:00
parent 502b7eab31
commit d324b57274
2 changed files with 159 additions and 142 deletions

View File

@@ -1791,15 +1791,19 @@ class ModelLibraryHandler:
exists = True
model_type = "embedding"
if exists:
return web.json_response(
{
"success": True,
"exists": True,
"modelType": model_type,
"hasBeenDownloaded": False,
}
)
history_service = await self._get_download_history_service()
has_been_downloaded = False
history_type = model_type
if history_type:
has_been_downloaded = await history_service.has_been_downloaded(
history_type,
model_version_id,
)
else:
history_type = None
for candidate_type in ("lora", "checkpoint", "embedding"):
if await history_service.has_been_downloaded(
candidate_type,
@@ -1812,8 +1816,8 @@ class ModelLibraryHandler:
return web.json_response(
{
"success": True,
"exists": exists,
"modelType": model_type if exists else history_type,
"exists": False,
"modelType": history_type,
"hasBeenDownloaded": has_been_downloaded,
}
)
@@ -1833,29 +1837,35 @@ class ModelLibraryHandler:
model_type = None
versions = []
downloaded_version_ids = []
history_service = await self._get_download_history_service()
if lora_versions:
model_type = "lora"
versions = self._with_downloaded_flag(lora_versions)
downloaded_version_ids = await history_service.get_downloaded_version_ids(
model_type,
model_id,
return web.json_response(
{
"success": True,
"modelType": "lora",
"versions": self._with_downloaded_flag(lora_versions),
"downloadedVersionIds": [],
}
)
elif checkpoint_versions:
model_type = "checkpoint"
versions = self._with_downloaded_flag(checkpoint_versions)
downloaded_version_ids = await history_service.get_downloaded_version_ids(
model_type,
model_id,
if checkpoint_versions:
return web.json_response(
{
"success": True,
"modelType": "checkpoint",
"versions": self._with_downloaded_flag(checkpoint_versions),
"downloadedVersionIds": [],
}
)
elif embedding_versions:
model_type = "embedding"
versions = self._with_downloaded_flag(embedding_versions)
downloaded_version_ids = await history_service.get_downloaded_version_ids(
model_type,
model_id,
if embedding_versions:
return web.json_response(
{
"success": True,
"modelType": "embedding",
"versions": self._with_downloaded_flag(embedding_versions),
"downloadedVersionIds": [],
}
)
else:
history_service = await self._get_download_history_service()
for candidate_type in ("lora", "checkpoint", "embedding"):
candidate_downloaded_version_ids = (
await history_service.get_downloaded_version_ids(

View File

@@ -64,6 +64,7 @@ class DownloadedVersionHistoryService:
self._db_path = db_path or _resolve_database_path()
self._settings = settings_manager or get_settings_manager()
self._lock = asyncio.Lock()
self._conn: sqlite3.Connection | None = None
self._schema_initialized = False
self._ensure_directory()
self._initialize_schema()
@@ -78,6 +79,12 @@ class DownloadedVersionHistoryService:
conn.row_factory = sqlite3.Row
return conn
def _get_conn(self) -> sqlite3.Connection:
if self._conn is None:
self._conn = sqlite3.connect(self._db_path, check_same_thread=False)
self._conn.row_factory = sqlite3.Row
return self._conn
def _initialize_schema(self) -> None:
if self._schema_initialized:
return
@@ -116,7 +123,7 @@ class DownloadedVersionHistoryService:
timestamp = time.time()
async with self._lock:
with self._connect() as conn:
conn = self._get_conn()
conn.execute(
"""
INSERT INTO downloaded_model_versions (
@@ -180,7 +187,7 @@ class DownloadedVersionHistoryService:
return
async with self._lock:
with self._connect() as conn:
conn = self._get_conn()
conn.executemany(
"""
INSERT INTO downloaded_model_versions (
@@ -208,7 +215,7 @@ class DownloadedVersionHistoryService:
timestamp = time.time()
async with self._lock:
with self._connect() as conn:
conn = self._get_conn()
conn.execute(
"""
INSERT INTO downloaded_model_versions (
@@ -238,7 +245,7 @@ class DownloadedVersionHistoryService:
return False
async with self._lock:
with self._connect() as conn:
conn = self._get_conn()
row = conn.execute(
"""
SELECT is_deleted_override
@@ -258,7 +265,7 @@ class DownloadedVersionHistoryService:
return []
async with self._lock:
with self._connect() as conn:
conn = self._get_conn()
rows = conn.execute(
"""
SELECT version_id
@@ -291,7 +298,7 @@ class DownloadedVersionHistoryService:
params: list[object] = [normalized_type, *normalized_model_ids]
async with self._lock:
with self._connect() as conn:
conn = self._get_conn()
rows = conn.execute(
f"""
SELECT model_id, version_id