refactor(downloader): enhance download_to_memory to return response headers and improve error handling

This commit is contained in:
Will Miao
2025-09-15 18:53:04 +08:00
parent 4540e47055
commit 6f9245df01
2 changed files with 136 additions and 73 deletions

View File

@@ -366,8 +366,9 @@ class Downloader:
self,
url: str,
use_auth: bool = False,
custom_headers: Optional[Dict[str, str]] = None
) -> Tuple[bool, Union[bytes, str]]:
custom_headers: Optional[Dict[str, str]] = None,
return_headers: bool = False
) -> Tuple[bool, Union[bytes, str], Optional[Dict]]:
"""
Download a file to memory (for small files like preview images)
@@ -375,9 +376,10 @@ class Downloader:
url: Download URL
use_auth: Whether to include authentication headers
custom_headers: Additional headers to include in request
return_headers: Whether to return response headers along with content
Returns:
Tuple[bool, Union[bytes, str]]: (success, content or error message)
Tuple[bool, Union[bytes, str], Optional[Dict]]: (success, content or error message, response headers if requested)
"""
try:
session = await self.session
@@ -395,19 +397,26 @@ class Downloader:
async with session.get(url, headers=headers, proxy=self.proxy_url) as response:
if response.status == 200:
content = await response.read()
return True, content
if return_headers:
return True, content, dict(response.headers)
else:
return True, content, None
elif response.status == 401:
return False, "Unauthorized access - invalid or missing API key"
error_msg = "Unauthorized access - invalid or missing API key"
return False, error_msg, None
elif response.status == 403:
return False, "Access forbidden"
error_msg = "Access forbidden"
return False, error_msg, None
elif response.status == 404:
return False, "File not found"
error_msg = "File not found"
return False, error_msg, None
else:
return False, f"Download failed with status {response.status}"
error_msg = f"Download failed with status {response.status}"
return False, error_msg, None
except Exception as e:
logger.error(f"Error downloading to memory from {url}: {e}")
return False, str(e)
return False, str(e), None
async def get_response_headers(
self,

View File

@@ -23,17 +23,60 @@ class ExampleImagesProcessor:
return ''.join(random.choice(chars) for _ in range(length))
@staticmethod
def get_civitai_optimized_url(image_url):
"""Convert Civitai image URL to its optimized WebP version"""
def get_civitai_optimized_url(media_url):
"""Convert Civitai media URL (image or video) to its optimized version"""
base_pattern = r'(https://image\.civitai\.com/[^/]+/[^/]+)'
match = re.match(base_pattern, image_url)
match = re.match(base_pattern, media_url)
if match:
base_url = match.group(1)
return f"{base_url}/optimized=true/image.webp"
return f"{base_url}/optimized=true"
return image_url
return media_url
@staticmethod
def _get_file_extension_from_content_or_headers(content, headers, fallback_url=None):
"""Determine file extension from content magic bytes or headers"""
# Check magic bytes for common formats
if content:
if content.startswith(b'\xFF\xD8\xFF'):
return '.jpg'
elif content.startswith(b'\x89PNG\r\n\x1A\n'):
return '.png'
elif content.startswith(b'GIF87a') or content.startswith(b'GIF89a'):
return '.gif'
elif content.startswith(b'RIFF') and b'WEBP' in content[:12]:
return '.webp'
elif content.startswith(b'\x00\x00\x00\x18ftypmp4') or content.startswith(b'\x00\x00\x00\x20ftypmp4'):
return '.mp4'
elif content.startswith(b'\x1A\x45\xDF\xA3'):
return '.webm'
# Check Content-Type header
if headers:
content_type = headers.get('content-type', '').lower()
type_map = {
'image/jpeg': '.jpg',
'image/png': '.png',
'image/gif': '.gif',
'image/webp': '.webp',
'video/mp4': '.mp4',
'video/webm': '.webm',
'video/quicktime': '.mov'
}
if content_type in type_map:
return type_map[content_type]
# Fallback to URL extension if available
if fallback_url:
filename = os.path.basename(fallback_url.split('?')[0])
ext = os.path.splitext(filename)[1].lower()
if ext in SUPPORTED_MEDIA_EXTENSIONS['images'] or ext in SUPPORTED_MEDIA_EXTENSIONS['videos']:
return ext
# Default fallback
return '.jpg'
@staticmethod
async def download_model_images(model_hash, model_name, model_images, model_dir, optimize, downloader):
"""Download images for a single model
@@ -48,45 +91,49 @@ class ExampleImagesProcessor:
if not image_url:
continue
# Get image filename from URL
image_filename = os.path.basename(image_url.split('?')[0])
image_ext = os.path.splitext(image_filename)[1].lower()
# Handle images and videos
is_image = image_ext in SUPPORTED_MEDIA_EXTENSIONS['images']
is_video = image_ext in SUPPORTED_MEDIA_EXTENSIONS['videos']
if not (is_image or is_video):
logger.debug(f"Skipping unsupported file type: {image_filename}")
continue
# Use 0-based indexing instead of 1-based indexing
save_filename = f"image_{i}{image_ext}"
# If optimizing images and this is a Civitai image, use their pre-optimized WebP version
if is_image and optimize and 'civitai.com' in image_url:
# Apply optimization for Civitai URLs if enabled
original_url = image_url
if optimize and 'civitai.com' in image_url:
image_url = ExampleImagesProcessor.get_civitai_optimized_url(image_url)
save_filename = f"image_{i}.webp"
# Check if already downloaded
save_path = os.path.join(model_dir, save_filename)
if os.path.exists(save_path):
logger.debug(f"File already exists: {save_path}")
continue
# Download the file
# Download the file first to determine the actual file type
try:
logger.debug(f"Downloading {save_filename} for {model_name}")
logger.debug(f"Downloading media file {i} for {model_name}")
# Download using the unified downloader
success, content = await downloader.download_to_memory(
# Download using the unified downloader with headers
success, content, headers = await downloader.download_to_memory(
image_url,
use_auth=False # Example images don't need auth
use_auth=False, # Example images don't need auth
return_headers=True
)
if success:
# Determine file extension from content or headers
media_ext = ExampleImagesProcessor._get_file_extension_from_content_or_headers(
content, headers, original_url
)
# Check if the detected file type is supported
is_image = media_ext in SUPPORTED_MEDIA_EXTENSIONS['images']
is_video = media_ext in SUPPORTED_MEDIA_EXTENSIONS['videos']
if not (is_image or is_video):
logger.debug(f"Skipping unsupported file type: {media_ext}")
continue
# Use 0-based indexing with the detected extension
save_filename = f"image_{i}{media_ext}"
save_path = os.path.join(model_dir, save_filename)
# Check if already downloaded
if os.path.exists(save_path):
logger.debug(f"File already exists: {save_path}")
continue
# Save the file
with open(save_path, 'wb') as f:
f.write(content)
elif "404" in str(content):
error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale"
logger.warning(error_msg)
@@ -119,45 +166,49 @@ class ExampleImagesProcessor:
if not image_url:
continue
# Get image filename from URL
image_filename = os.path.basename(image_url.split('?')[0])
image_ext = os.path.splitext(image_filename)[1].lower()
# Handle images and videos
is_image = image_ext in SUPPORTED_MEDIA_EXTENSIONS['images']
is_video = image_ext in SUPPORTED_MEDIA_EXTENSIONS['videos']
if not (is_image or is_video):
logger.debug(f"Skipping unsupported file type: {image_filename}")
continue
# Use 0-based indexing instead of 1-based indexing
save_filename = f"image_{i}{image_ext}"
# If optimizing images and this is a Civitai image, use their pre-optimized WebP version
if is_image and optimize and 'civitai.com' in image_url:
# Apply optimization for Civitai URLs if enabled
original_url = image_url
if optimize and 'civitai.com' in image_url:
image_url = ExampleImagesProcessor.get_civitai_optimized_url(image_url)
save_filename = f"image_{i}.webp"
# Check if already downloaded
save_path = os.path.join(model_dir, save_filename)
if os.path.exists(save_path):
logger.debug(f"File already exists: {save_path}")
continue
# Download the file
# Download the file first to determine the actual file type
try:
logger.debug(f"Downloading {save_filename} for {model_name}")
logger.debug(f"Downloading media file {i} for {model_name}")
# Download using the unified downloader
success, content = await downloader.download_to_memory(
# Download using the unified downloader with headers
success, content, headers = await downloader.download_to_memory(
image_url,
use_auth=False # Example images don't need auth
use_auth=False, # Example images don't need auth
return_headers=True
)
if success:
# Determine file extension from content or headers
media_ext = ExampleImagesProcessor._get_file_extension_from_content_or_headers(
content, headers, original_url
)
# Check if the detected file type is supported
is_image = media_ext in SUPPORTED_MEDIA_EXTENSIONS['images']
is_video = media_ext in SUPPORTED_MEDIA_EXTENSIONS['videos']
if not (is_image or is_video):
logger.debug(f"Skipping unsupported file type: {media_ext}")
continue
# Use 0-based indexing with the detected extension
save_filename = f"image_{i}{media_ext}"
save_path = os.path.join(model_dir, save_filename)
# Check if already downloaded
if os.path.exists(save_path):
logger.debug(f"File already exists: {save_path}")
continue
# Save the file
with open(save_path, 'wb') as f:
f.write(content)
elif "404" in str(content):
error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale"
logger.warning(error_msg)
@@ -569,4 +620,7 @@ class ExampleImagesProcessor:
return web.json_response({
'success': False,
'error': str(e)
}, status=500)
}, status=500)