import os import logging from typing import Dict, Optional from .base_model_service import BaseModelService from .auto_tag_service import extract_auto_tags from ..utils.models import EmbeddingMetadata from ..config import config logger = logging.getLogger(__name__) class EmbeddingService(BaseModelService): """Embedding-specific service implementation""" def __init__(self, scanner, update_service=None): """Initialize Embedding service Args: scanner: Embedding scanner instance update_service: Optional service for remote update tracking. """ super().__init__("embedding", scanner, EmbeddingMetadata, update_service=update_service) async def format_response(self, embedding_data: Dict) -> Optional[Dict]: """Format Embedding data for API response. Returns None when the entry is missing critical fields (corrupted cache row), so the handler layer can filter it out. See issue #730. """ # Guard against corrupted cache entries missing critical fields file_path = embedding_data.get("file_path") if not file_path or not isinstance(file_path, str): logger.warning( "Skipping corrupted embedding entry (missing file_path): %s", embedding_data.get("file_name", ""), ) return None # Get sub_type from cache entry (new canonical field) sub_type = embedding_data.get("sub_type", "embedding") file_name = embedding_data.get("file_name") or "" model_name = embedding_data.get("model_name") or file_name folder = embedding_data.get("folder") or "" return { "model_name": model_name, "file_name": file_name, "preview_url": config.get_preview_static_url(embedding_data.get("preview_url", "")), "preview_nsfw_level": embedding_data.get("preview_nsfw_level", 0), "base_model": embedding_data.get("base_model", ""), "folder": folder, "sha256": embedding_data.get("sha256", ""), "file_path": file_path.replace(os.sep, "/"), "file_size": embedding_data.get("size", 0), "modified": embedding_data.get("modified", ""), "tags": embedding_data.get("tags", []), "from_civitai": embedding_data.get("from_civitai", True), # "usage_count": embedding_data.get("usage_count", 0), # TODO: Enable when embedding usage tracking is implemented "notes": embedding_data.get("notes", ""), "sub_type": sub_type, "favorite": embedding_data.get("favorite", False), "exclude": bool(embedding_data.get("exclude", False)), "update_available": bool(embedding_data.get("update_available", False)), "skip_metadata_refresh": bool(embedding_data.get("skip_metadata_refresh", False)), "civitai": self.filter_civitai_data(embedding_data.get("civitai", {}), minimal=True), "auto_tags": embedding_data.get("auto_tags") or extract_auto_tags(embedding_data), "version_count": embedding_data.get("version_count"), } def find_duplicate_hashes(self) -> Dict: """Find Embeddings with duplicate SHA256 hashes""" return self.scanner._hash_index.get_duplicate_hashes() def find_duplicate_filenames(self) -> Dict: """Find Embeddings with conflicting filenames""" return self.scanner._hash_index.get_duplicate_filenames()