feat(file): 优化文件处理和缓存机制

- 重构文件处理逻辑，提高性能和可维护性 - 增加缓存机制，减少重复读取和处理 - 改进错误处理和日志记录 - 优化缩略图生成算法 - 添加性能监控和测试依赖
2025-09-16 04:09:41 +08:00 · 2025-07-11 00:21:57 +08:00
parent d0f9e65ad1
commit 8c4e5885c7
15 changed files with 1034 additions and 94 deletions
--- a/file.py
+++ b/file.py
@@ -1,86 +1,266 @@
 import shutil, os, zipfile, io, cv2, numpy as np
+import hashlib
+import time
+from functools import lru_cache
+from pathlib import Path
+import logging

 import db.file, app_conf
+from utils.logger import get_logger
+from utils.cache_manager import get_cache_manager, cache_image
+from utils.performance_monitor import monitor_performance, timing_context

-app_conf = app_conf.conf()
+# 获取配置对象
+conf = app_conf.conf()
+logger = get_logger(__name__)
+cache_manager = get_cache_manager()
+
+# 内存缓存 - 存储最近访问的ZIP文件列表
+_zip_cache = {}
+_cache_timeout = 300  # 5分钟缓存超时


 def init():
+    """初始化文件目录"""
    paths = ("inputdir", "storedir", "tmpdir")
    for path in paths:
        try:
-            os.makedirs(app_conf.get("file", path))
+            dir_path = Path(conf.get("file", path))
+            dir_path.mkdir(parents=True, exist_ok=True)
+            logger.info(f"创建目录: {dir_path}")
        except Exception as e:
-            print(e)
+            logger.error(f"创建目录失败 {path}: {e}")


-def auotLoadFile():
-    fileList = os.listdir(app_conf.get("file", "inputdir"))
-    for item in fileList:
-        if zipfile.is_zipfile(
-            app_conf.get("file", "inputdir") + "/" + item
-        ):  # 判断是否为压缩包
-            with zipfile.ZipFile(
-                app_conf.get("file", "inputdir") + "/" + item, "r"
-            ) as zip_ref:
-                db.file.new(item, len(zip_ref.namelist()))  # 添加数据库记录 移动到存储
-            shutil.move(
-                app_conf.get("file", "inputdir") + "/" + item,
-                app_conf.get("file", "storedir") + "/" + item,
-            )
-            print("已添加 " + item)
-        else:
-            print("不符合条件 " + item)
-
-
-def raedZip(bookid: str, index: int):
-    bookinfo = db.file.searchByid(bookid)
-    zippath = app_conf.get("file", "storedir") + "/" + bookinfo[0][2]
-
+@monitor_performance("file.get_image_files_from_zip")
+def get_image_files_from_zip(zip_path: str) -> tuple:
+    """
+    从ZIP文件中获取图片文件列表，使用缓存提高性能
+    返回: (image_files_list, cache_key)
+    """
+    cache_key = f"{zip_path}_{os.path.getmtime(zip_path)}"
+    current_time = time.time()
+    
+    # 检查缓存
+    if cache_key in _zip_cache:
+        cache_data = _zip_cache[cache_key]
+        if current_time - cache_data['timestamp'] < _cache_timeout:
+            logger.debug(f"使用缓存的ZIP文件列表: {zip_path}")
+            return cache_data['files'], cache_key
+    
+    # 读取ZIP文件
    try:
-        # 创建一个ZipFile对象
-        with zipfile.ZipFile(zippath, "r") as zip_ref:
-            # 获取图片文件列表
+        with zipfile.ZipFile(zip_path, "r") as zip_ref:
            image_files = [
-                file
-                for file in zip_ref.namelist()
+                file for file in zip_ref.namelist()
                if file.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp"))
            ]
+            
+            # 缓存结果
+            _zip_cache[cache_key] = {
+                'files': image_files,
+                'timestamp': current_time
+            }
+            
+            # 清理过期缓存
+            _cleanup_cache()
+            
+            logger.debug(f"缓存ZIP文件列表: {zip_path}, 图片数量: {len(image_files)}")
+            return image_files, cache_key
+            
+    except Exception as e:
+        logger.error(f"读取ZIP文件失败 {zip_path}: {e}")
+        return [], cache_key

-            if not image_files:
-                return "not imgfile in zip", ""

-            if int(index) > len(image_files):
-                return "404 not found", ""
+def _cleanup_cache():
+    """清理过期缓存"""
+    current_time = time.time()
+    expired_keys = [
+        key for key, data in _zip_cache.items()
+        if current_time - data['timestamp'] > _cache_timeout
+    ]
+    for key in expired_keys:
+        del _zip_cache[key]
+    
+    if expired_keys:
+        logger.debug(f"清理过期缓存: {len(expired_keys)} 项")

-            # 假设我们只提取图片文件
+
+@monitor_performance("file.autoLoadFile")
+def autoLoadFile():
+    """自动加载文件，优化路径处理和错误处理"""
+    input_dir = Path(conf.get("file", "inputdir"))
+    store_dir = Path(conf.get("file", "storedir"))
+    
+    if not input_dir.exists():
+        logger.warning(f"输入目录不存在: {input_dir}")
+        return
+    
+    file_list = []
+    try:
+        file_list = [f for f in input_dir.iterdir() if f.is_file()]
+    except Exception as e:
+        logger.error(f"读取输入目录失败: {e}")
+        return
+    
+    processed_count = 0
+    for file_path in file_list:
+        try:
+            if zipfile.is_zipfile(file_path):
+                with zipfile.ZipFile(file_path, "r") as zip_ref:
+                    page_count = len([f for f in zip_ref.namelist() 
+                                    if f.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp"))])
+                    if page_count > 0:
+                        db.file.new(file_path.name, page_count)
+                        
+                        # 移动文件到存储目录
+                        target_path = store_dir / file_path.name
+                        shutil.move(str(file_path), str(target_path))
+                        
+                        logger.info(f"已添加漫画: {file_path.name}, 页数: {page_count}")
+                        processed_count += 1
+                    else:
+                        logger.warning(f"ZIP文件中没有图片: {file_path.name}")
+            else:
+                logger.info(f"非ZIP文件，跳过: {file_path.name}")
+        except Exception as e:
+            logger.error(f"处理文件失败 {file_path.name}: {e}")
+    
+    logger.info(f"自动加载完成，处理了 {processed_count} 个文件")
+
+
+@monitor_performance("file.readZip")
+def readZip(bookid: str, index: int) -> tuple:
+    """
+    从ZIP文件中读取指定索引的图片
+    优化：使用缓存的文件列表，改进错误处理
+    返回: (image_data, filename) 或 (error_message, "")
+    """
+    try:
+        bookinfo = db.file.searchByid(bookid)
+        if not bookinfo:
+            logger.warning(f"未找到书籍ID: {bookid}")
+            return "Book not found", ""
+        
+        zip_path = Path(conf.get("file", "storedir")) / bookinfo[0][2]
+        
+        if not zip_path.exists():
+            logger.error(f"ZIP文件不存在: {zip_path}")
+            return "ZIP file not found", ""
+        
+        # 使用缓存获取图片文件列表
+        image_files, _ = get_image_files_from_zip(str(zip_path))
+        
+        if not image_files:
+            logger.warning(f"ZIP文件中没有图片: {zip_path}")
+            return "No image files in zip", ""
+        
+        if int(index) >= len(image_files):
+            logger.warning(f"图片索引超出范围: {index}, 总数: {len(image_files)}")
+            return "Image index out of range", ""
+        
+        # 读取指定图片
+        with zipfile.ZipFile(zip_path, "r") as zip_ref:
            image_filename = image_files[int(index)]
-
-            # 读取图片数据
            image_data = zip_ref.read(image_filename)
-            zip_ref.close()
+            
+            logger.debug(f"读取图片: {bookid}/{index} -> {image_filename}")
            return image_data, image_filename
-
-    except zipfile.BadZipFile:  # 异常处理
+            
+    except zipfile.BadZipFile:
+        logger.error(f"损坏的ZIP文件: {bookid}")
        return "Bad ZipFile", ""
    except Exception as e:
-        return str(e), ""
+        logger.error(f"读取ZIP文件失败 {bookid}/{index}: {e}")
+        return f"Error: {str(e)}", ""


-def thumbnail(input, minSize: int = 600, encode:str="webp"):
-    img = cv2.imdecode(np.frombuffer(input, np.uint8), cv2.IMREAD_COLOR)
-    height = img.shape[0]  # 图片高度
-    width = img.shape[1]  # 图片宽度
-    if minSize < np.amin((height,width)):
-        if height > width:
-            newshape = (minSize, int(minSize / width * height))
+@lru_cache(maxsize=128)
+def _get_image_hash(image_data: bytes) -> str:
+    """生成图片数据的哈希值用于缓存"""
+    return hashlib.md5(image_data).hexdigest()
+
+
+@cache_image
+def thumbnail(input_data: bytes, min_size: int = 600, encode: str = "webp", quality: int = 75) -> bytes:
+    """
+    生成缩略图，优化编码逻辑和性能
+    """
+    if not input_data:
+        logger.warning("输入图片数据为空")
+        return input_data
+    
+    try:
+        # 解码图片
+        img = cv2.imdecode(np.frombuffer(input_data, np.uint8), cv2.IMREAD_COLOR)
+        if img is None:
+            logger.warning("无法解码图片数据")
+            return input_data
+        
+        height, width = img.shape[:2]
+        logger.debug(f"原始图片尺寸: {width}x{height}")
+        
+        # 判断是否需要缩放
+        min_dimension = min(height, width)
+        if min_size < min_dimension:
+            # 计算新尺寸
+            if height > width:
+                new_width = min_size
+                new_height = int(min_size * height / width)
+            else:
+                new_height = min_size
+                new_width = int(min_size * width / height)
+            
+            img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)
+            logger.debug(f"缩放后图片尺寸: {new_width}x{new_height}")
+        
+        # 编码图片
+        if encode.lower() == "webp":
+            success, encoded_image = cv2.imencode(
+                ".webp", img, [cv2.IMWRITE_WEBP_QUALITY, quality]
+            )
+        elif encode.lower() in ("jpg", "jpeg"):
+            success, encoded_image = cv2.imencode(
+                ".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, quality]
+            )
+        elif encode.lower() == "png":
+            success, encoded_image = cv2.imencode(
+                ".png", img, [cv2.IMWRITE_PNG_COMPRESSION, 6]
+            )
        else:
-            newshape = (int(minSize / height * width), minSize)
-        img = cv2.resize(img, newshape)
-    if encode == "webp":
-        success, encoded_image = cv2.imencode(".webp", img, [cv2.IMWRITE_WEBP_QUALITY, 75])
-    elif encode == "jpg" or "jpeg":
-        success, encoded_image = cv2.imencode(".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, 75])
-    else:
-        return input
-    return encoded_image.tobytes()
+            logger.warning(f"不支持的编码格式: {encode}, 返回原始数据")
+            return input_data
+        
+        if not success:
+            logger.error(f"图片编码失败: {encode}")
+            return input_data
+        
+        result = encoded_image.tobytes()
+        logger.debug(f"图片处理完成: 原始 {len(input_data)} bytes -> 处理后 {len(result)} bytes")
+        return result
+        
+    except Exception as e:
+        logger.error(f"图片处理异常: {e}")
+        return input_data
+
+
+def get_zip_image_count(bookid: str) -> int:
+    """
+    获取ZIP文件中的图片数量，使用缓存
+    """
+    try:
+        bookinfo = db.file.searchByid(bookid)
+        if not bookinfo:
+            return 0
+        
+        zip_path = Path(conf.get("file", "storedir")) / bookinfo[0][2]
+        if not zip_path.exists():
+            return 0
+        
+        image_files, _ = get_image_files_from_zip(str(zip_path))
+        return len(image_files)
+        
+    except Exception as e:
+        logger.error(f"获取图片数量失败 {bookid}: {e}")
+        return 0