From 6f9245df0130ebc116dcdef20a230e6f1a64f18d Mon Sep 17 00:00:00 2001
From: Will Miao <13051207myq@gmail.com>
Date: Mon, 15 Sep 2025 18:53:04 +0800
Subject: [PATCH] refactor(downloader): enhance download_to_memory to return
 response headers and improve error handling

---
 py/services/downloader.py            |  27 ++--
 py/utils/example_images_processor.py | 182 +++++++++++++++++----------
 2 files changed, 136 insertions(+), 73 deletions(-)

diff --git a/py/services/downloader.py b/py/services/downloader.py
index dd2c8c32..4f6b5f97 100644
--- a/py/services/downloader.py
+++ b/py/services/downloader.py
@@ -366,8 +366,9 @@ class Downloader:
         self,
         url: str,
         use_auth: bool = False,
-        custom_headers: Optional[Dict[str, str]] = None
-    ) -> Tuple[bool, Union[bytes, str]]:
+        custom_headers: Optional[Dict[str, str]] = None,
+        return_headers: bool = False
+    ) -> Tuple[bool, Union[bytes, str], Optional[Dict]]:
         """
         Download a file to memory (for small files like preview images)
         
@@ -375,9 +376,10 @@ class Downloader:
             url: Download URL
             use_auth: Whether to include authentication headers
             custom_headers: Additional headers to include in request
+            return_headers: Whether to return response headers along with content
             
         Returns:
-            Tuple[bool, Union[bytes, str]]: (success, content or error message)
+            Tuple[bool, Union[bytes, str], Optional[Dict]]: (success, content or error message, response headers if requested)
         """
         try:
             session = await self.session
@@ -395,19 +397,26 @@ class Downloader:
             async with session.get(url, headers=headers, proxy=self.proxy_url) as response:
                 if response.status == 200:
                     content = await response.read()
-                    return True, content
+                    if return_headers:
+                        return True, content, dict(response.headers)
+                    else:
+                        return True, content, None
                 elif response.status == 401:
-                    return False, "Unauthorized access - invalid or missing API key"
+                    error_msg = "Unauthorized access - invalid or missing API key"
+                    return False, error_msg, None
                 elif response.status == 403:
-                    return False, "Access forbidden"
+                    error_msg = "Access forbidden"
+                    return False, error_msg, None
                 elif response.status == 404:
-                    return False, "File not found"
+                    error_msg = "File not found"
+                    return False, error_msg, None
                 else:
-                    return False, f"Download failed with status {response.status}"
+                    error_msg = f"Download failed with status {response.status}"
+                    return False, error_msg, None
                     
         except Exception as e:
             logger.error(f"Error downloading to memory from {url}: {e}")
-            return False, str(e)
+            return False, str(e), None
     
     async def get_response_headers(
         self,
diff --git a/py/utils/example_images_processor.py b/py/utils/example_images_processor.py
index 9dba4e2c..f1cfd2bf 100644
--- a/py/utils/example_images_processor.py
+++ b/py/utils/example_images_processor.py
@@ -23,17 +23,60 @@ class ExampleImagesProcessor:
         return ''.join(random.choice(chars) for _ in range(length))
     
     @staticmethod
-    def get_civitai_optimized_url(image_url):
-        """Convert Civitai image URL to its optimized WebP version"""
+    def get_civitai_optimized_url(media_url):
+        """Convert Civitai media URL (image or video) to its optimized version"""
         base_pattern = r'(https://image\.civitai\.com/[^/]+/[^/]+)'
-        match = re.match(base_pattern, image_url)
+        match = re.match(base_pattern, media_url)
         
         if match:
             base_url = match.group(1)
-            return f"{base_url}/optimized=true/image.webp"
+            return f"{base_url}/optimized=true"
         
-        return image_url
+        return media_url
     
+    @staticmethod
+    def _get_file_extension_from_content_or_headers(content, headers, fallback_url=None):
+        """Determine file extension from content magic bytes or headers"""
+        # Check magic bytes for common formats
+        if content:
+            if content.startswith(b'\xFF\xD8\xFF'):
+                return '.jpg'
+            elif content.startswith(b'\x89PNG\r\n\x1A\n'):
+                return '.png'
+            elif content.startswith(b'GIF87a') or content.startswith(b'GIF89a'):
+                return '.gif'
+            elif content.startswith(b'RIFF') and b'WEBP' in content[:12]:
+                return '.webp'
+            elif content.startswith(b'\x00\x00\x00\x18ftypmp4') or content.startswith(b'\x00\x00\x00\x20ftypmp4'):
+                return '.mp4'
+            elif content.startswith(b'\x1A\x45\xDF\xA3'):
+                return '.webm'
+        
+        # Check Content-Type header
+        if headers:
+            content_type = headers.get('content-type', '').lower()
+            type_map = {
+                'image/jpeg': '.jpg',
+                'image/png': '.png',
+                'image/gif': '.gif',
+                'image/webp': '.webp',
+                'video/mp4': '.mp4',
+                'video/webm': '.webm',
+                'video/quicktime': '.mov'
+            }
+            if content_type in type_map:
+                return type_map[content_type]
+        
+        # Fallback to URL extension if available
+        if fallback_url:
+            filename = os.path.basename(fallback_url.split('?')[0])
+            ext = os.path.splitext(filename)[1].lower()
+            if ext in SUPPORTED_MEDIA_EXTENSIONS['images'] or ext in SUPPORTED_MEDIA_EXTENSIONS['videos']:
+                return ext
+        
+        # Default fallback
+        return '.jpg'
+
     @staticmethod
     async def download_model_images(model_hash, model_name, model_images, model_dir, optimize, downloader):
         """Download images for a single model
@@ -48,45 +91,49 @@ class ExampleImagesProcessor:
             if not image_url:
                 continue
             
-            # Get image filename from URL
-            image_filename = os.path.basename(image_url.split('?')[0])
-            image_ext = os.path.splitext(image_filename)[1].lower()
-            
-            # Handle images and videos
-            is_image = image_ext in SUPPORTED_MEDIA_EXTENSIONS['images']
-            is_video = image_ext in SUPPORTED_MEDIA_EXTENSIONS['videos']
-            
-            if not (is_image or is_video):
-                logger.debug(f"Skipping unsupported file type: {image_filename}")
-                continue
-            
-            # Use 0-based indexing instead of 1-based indexing
-            save_filename = f"image_{i}{image_ext}"
-            
-            # If optimizing images and this is a Civitai image, use their pre-optimized WebP version
-            if is_image and optimize and 'civitai.com' in image_url:
+            # Apply optimization for Civitai URLs if enabled
+            original_url = image_url
+            if optimize and 'civitai.com' in image_url:
                 image_url = ExampleImagesProcessor.get_civitai_optimized_url(image_url)
-                save_filename = f"image_{i}.webp"
             
-            # Check if already downloaded
-            save_path = os.path.join(model_dir, save_filename)
-            if os.path.exists(save_path):
-                logger.debug(f"File already exists: {save_path}")
-                continue
-            
-            # Download the file
+            # Download the file first to determine the actual file type
             try:
-                logger.debug(f"Downloading {save_filename} for {model_name}")
+                logger.debug(f"Downloading media file {i} for {model_name}")
                 
-                # Download using the unified downloader
-                success, content = await downloader.download_to_memory(
+                # Download using the unified downloader with headers
+                success, content, headers = await downloader.download_to_memory(
                     image_url,
-                    use_auth=False  # Example images don't need auth
+                    use_auth=False,  # Example images don't need auth
+                    return_headers=True
                 )
                 
                 if success:
+                    # Determine file extension from content or headers
+                    media_ext = ExampleImagesProcessor._get_file_extension_from_content_or_headers(
+                        content, headers, original_url
+                    )
+                    
+                    # Check if the detected file type is supported
+                    is_image = media_ext in SUPPORTED_MEDIA_EXTENSIONS['images']
+                    is_video = media_ext in SUPPORTED_MEDIA_EXTENSIONS['videos']
+                    
+                    if not (is_image or is_video):
+                        logger.debug(f"Skipping unsupported file type: {media_ext}")
+                        continue
+                    
+                    # Use 0-based indexing with the detected extension
+                    save_filename = f"image_{i}{media_ext}"
+                    save_path = os.path.join(model_dir, save_filename)
+                    
+                    # Check if already downloaded
+                    if os.path.exists(save_path):
+                        logger.debug(f"File already exists: {save_path}")
+                        continue
+                    
+                    # Save the file
                     with open(save_path, 'wb') as f:
                         f.write(content)
+                    
                 elif "404" in str(content):
                     error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale"
                     logger.warning(error_msg)
@@ -119,45 +166,49 @@ class ExampleImagesProcessor:
             if not image_url:
                 continue
             
-            # Get image filename from URL
-            image_filename = os.path.basename(image_url.split('?')[0])
-            image_ext = os.path.splitext(image_filename)[1].lower()
-            
-            # Handle images and videos
-            is_image = image_ext in SUPPORTED_MEDIA_EXTENSIONS['images']
-            is_video = image_ext in SUPPORTED_MEDIA_EXTENSIONS['videos']
-            
-            if not (is_image or is_video):
-                logger.debug(f"Skipping unsupported file type: {image_filename}")
-                continue
-            
-            # Use 0-based indexing instead of 1-based indexing
-            save_filename = f"image_{i}{image_ext}"
-            
-            # If optimizing images and this is a Civitai image, use their pre-optimized WebP version
-            if is_image and optimize and 'civitai.com' in image_url:
+            # Apply optimization for Civitai URLs if enabled
+            original_url = image_url
+            if optimize and 'civitai.com' in image_url:
                 image_url = ExampleImagesProcessor.get_civitai_optimized_url(image_url)
-                save_filename = f"image_{i}.webp"
             
-            # Check if already downloaded
-            save_path = os.path.join(model_dir, save_filename)
-            if os.path.exists(save_path):
-                logger.debug(f"File already exists: {save_path}")
-                continue
-            
-            # Download the file
+            # Download the file first to determine the actual file type
             try:
-                logger.debug(f"Downloading {save_filename} for {model_name}")
+                logger.debug(f"Downloading media file {i} for {model_name}")
                 
-                # Download using the unified downloader
-                success, content = await downloader.download_to_memory(
+                # Download using the unified downloader with headers
+                success, content, headers = await downloader.download_to_memory(
                     image_url,
-                    use_auth=False  # Example images don't need auth
+                    use_auth=False,  # Example images don't need auth
+                    return_headers=True
                 )
                 
                 if success:
+                    # Determine file extension from content or headers
+                    media_ext = ExampleImagesProcessor._get_file_extension_from_content_or_headers(
+                        content, headers, original_url
+                    )
+                    
+                    # Check if the detected file type is supported
+                    is_image = media_ext in SUPPORTED_MEDIA_EXTENSIONS['images']
+                    is_video = media_ext in SUPPORTED_MEDIA_EXTENSIONS['videos']
+                    
+                    if not (is_image or is_video):
+                        logger.debug(f"Skipping unsupported file type: {media_ext}")
+                        continue
+                    
+                    # Use 0-based indexing with the detected extension
+                    save_filename = f"image_{i}{media_ext}"
+                    save_path = os.path.join(model_dir, save_filename)
+                    
+                    # Check if already downloaded
+                    if os.path.exists(save_path):
+                        logger.debug(f"File already exists: {save_path}")
+                        continue
+                    
+                    # Save the file
                     with open(save_path, 'wb') as f:
                         f.write(content)
+                    
                 elif "404" in str(content):
                     error_msg = f"Failed to download file: {image_url}, status code: 404 - Model metadata might be stale"
                     logger.warning(error_msg)
@@ -569,4 +620,7 @@ class ExampleImagesProcessor:
             return web.json_response({
                 'success': False,
                 'error': str(e)
-            }, status=500)
\ No newline at end of file
+            }, status=500)
+
+
+    
\ No newline at end of file