feat(file): 优化文件处理和缓存机制

- 重构文件处理逻辑,提高性能和可维护性
- 增加缓存机制,减少重复读取和处理
- 改进错误处理和日志记录
- 优化缩略图生成算法
- 添加性能监控和测试依赖
This commit is contained in:
2025-07-11 00:21:57 +08:00
parent d0f9e65ad1
commit 8c4e5885c7
15 changed files with 1034 additions and 94 deletions

296
file.py
View File

@@ -1,86 +1,266 @@
import shutil, os, zipfile, io, cv2, numpy as np
import hashlib
import time
from functools import lru_cache
from pathlib import Path
import logging
import db.file, app_conf
from utils.logger import get_logger
from utils.cache_manager import get_cache_manager, cache_image
from utils.performance_monitor import monitor_performance, timing_context
app_conf = app_conf.conf()
# 获取配置对象
conf = app_conf.conf()
logger = get_logger(__name__)
cache_manager = get_cache_manager()
# 内存缓存 - 存储最近访问的ZIP文件列表
_zip_cache = {}
_cache_timeout = 300 # 5分钟缓存超时
def init():
"""初始化文件目录"""
paths = ("inputdir", "storedir", "tmpdir")
for path in paths:
try:
os.makedirs(app_conf.get("file", path))
dir_path = Path(conf.get("file", path))
dir_path.mkdir(parents=True, exist_ok=True)
logger.info(f"创建目录: {dir_path}")
except Exception as e:
print(e)
logger.error(f"创建目录失败 {path}: {e}")
def auotLoadFile():
fileList = os.listdir(app_conf.get("file", "inputdir"))
for item in fileList:
if zipfile.is_zipfile(
app_conf.get("file", "inputdir") + "/" + item
): # 判断是否为压缩包
with zipfile.ZipFile(
app_conf.get("file", "inputdir") + "/" + item, "r"
) as zip_ref:
db.file.new(item, len(zip_ref.namelist())) # 添加数据库记录 移动到存储
shutil.move(
app_conf.get("file", "inputdir") + "/" + item,
app_conf.get("file", "storedir") + "/" + item,
)
print("已添加 " + item)
else:
print("不符合条件 " + item)
def raedZip(bookid: str, index: int):
bookinfo = db.file.searchByid(bookid)
zippath = app_conf.get("file", "storedir") + "/" + bookinfo[0][2]
@monitor_performance("file.get_image_files_from_zip")
def get_image_files_from_zip(zip_path: str) -> tuple:
"""
从ZIP文件中获取图片文件列表使用缓存提高性能
返回: (image_files_list, cache_key)
"""
cache_key = f"{zip_path}_{os.path.getmtime(zip_path)}"
current_time = time.time()
# 检查缓存
if cache_key in _zip_cache:
cache_data = _zip_cache[cache_key]
if current_time - cache_data['timestamp'] < _cache_timeout:
logger.debug(f"使用缓存的ZIP文件列表: {zip_path}")
return cache_data['files'], cache_key
# 读取ZIP文件
try:
# 创建一个ZipFile对象
with zipfile.ZipFile(zippath, "r") as zip_ref:
# 获取图片文件列表
with zipfile.ZipFile(zip_path, "r") as zip_ref:
image_files = [
file
for file in zip_ref.namelist()
file for file in zip_ref.namelist()
if file.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp"))
]
# 缓存结果
_zip_cache[cache_key] = {
'files': image_files,
'timestamp': current_time
}
# 清理过期缓存
_cleanup_cache()
logger.debug(f"缓存ZIP文件列表: {zip_path}, 图片数量: {len(image_files)}")
return image_files, cache_key
except Exception as e:
logger.error(f"读取ZIP文件失败 {zip_path}: {e}")
return [], cache_key
if not image_files:
return "not imgfile in zip", ""
if int(index) > len(image_files):
return "404 not found", ""
def _cleanup_cache():
"""清理过期缓存"""
current_time = time.time()
expired_keys = [
key for key, data in _zip_cache.items()
if current_time - data['timestamp'] > _cache_timeout
]
for key in expired_keys:
del _zip_cache[key]
if expired_keys:
logger.debug(f"清理过期缓存: {len(expired_keys)}")
# 假设我们只提取图片文件
@monitor_performance("file.autoLoadFile")
def autoLoadFile():
"""自动加载文件,优化路径处理和错误处理"""
input_dir = Path(conf.get("file", "inputdir"))
store_dir = Path(conf.get("file", "storedir"))
if not input_dir.exists():
logger.warning(f"输入目录不存在: {input_dir}")
return
file_list = []
try:
file_list = [f for f in input_dir.iterdir() if f.is_file()]
except Exception as e:
logger.error(f"读取输入目录失败: {e}")
return
processed_count = 0
for file_path in file_list:
try:
if zipfile.is_zipfile(file_path):
with zipfile.ZipFile(file_path, "r") as zip_ref:
page_count = len([f for f in zip_ref.namelist()
if f.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp"))])
if page_count > 0:
db.file.new(file_path.name, page_count)
# 移动文件到存储目录
target_path = store_dir / file_path.name
shutil.move(str(file_path), str(target_path))
logger.info(f"已添加漫画: {file_path.name}, 页数: {page_count}")
processed_count += 1
else:
logger.warning(f"ZIP文件中没有图片: {file_path.name}")
else:
logger.info(f"非ZIP文件跳过: {file_path.name}")
except Exception as e:
logger.error(f"处理文件失败 {file_path.name}: {e}")
logger.info(f"自动加载完成,处理了 {processed_count} 个文件")
@monitor_performance("file.readZip")
def readZip(bookid: str, index: int) -> tuple:
"""
从ZIP文件中读取指定索引的图片
优化:使用缓存的文件列表,改进错误处理
返回: (image_data, filename) 或 (error_message, "")
"""
try:
bookinfo = db.file.searchByid(bookid)
if not bookinfo:
logger.warning(f"未找到书籍ID: {bookid}")
return "Book not found", ""
zip_path = Path(conf.get("file", "storedir")) / bookinfo[0][2]
if not zip_path.exists():
logger.error(f"ZIP文件不存在: {zip_path}")
return "ZIP file not found", ""
# 使用缓存获取图片文件列表
image_files, _ = get_image_files_from_zip(str(zip_path))
if not image_files:
logger.warning(f"ZIP文件中没有图片: {zip_path}")
return "No image files in zip", ""
if int(index) >= len(image_files):
logger.warning(f"图片索引超出范围: {index}, 总数: {len(image_files)}")
return "Image index out of range", ""
# 读取指定图片
with zipfile.ZipFile(zip_path, "r") as zip_ref:
image_filename = image_files[int(index)]
# 读取图片数据
image_data = zip_ref.read(image_filename)
zip_ref.close()
logger.debug(f"读取图片: {bookid}/{index} -> {image_filename}")
return image_data, image_filename
except zipfile.BadZipFile: # 异常处理
except zipfile.BadZipFile:
logger.error(f"损坏的ZIP文件: {bookid}")
return "Bad ZipFile", ""
except Exception as e:
return str(e), ""
logger.error(f"读取ZIP文件失败 {bookid}/{index}: {e}")
return f"Error: {str(e)}", ""
def thumbnail(input, minSize: int = 600, encode:str="webp"):
img = cv2.imdecode(np.frombuffer(input, np.uint8), cv2.IMREAD_COLOR)
height = img.shape[0] # 图片高度
width = img.shape[1] # 图片宽度
if minSize < np.amin((height,width)):
if height > width:
newshape = (minSize, int(minSize / width * height))
@lru_cache(maxsize=128)
def _get_image_hash(image_data: bytes) -> str:
"""生成图片数据的哈希值用于缓存"""
return hashlib.md5(image_data).hexdigest()
@cache_image
def thumbnail(input_data: bytes, min_size: int = 600, encode: str = "webp", quality: int = 75) -> bytes:
"""
生成缩略图,优化编码逻辑和性能
"""
if not input_data:
logger.warning("输入图片数据为空")
return input_data
try:
# 解码图片
img = cv2.imdecode(np.frombuffer(input_data, np.uint8), cv2.IMREAD_COLOR)
if img is None:
logger.warning("无法解码图片数据")
return input_data
height, width = img.shape[:2]
logger.debug(f"原始图片尺寸: {width}x{height}")
# 判断是否需要缩放
min_dimension = min(height, width)
if min_size < min_dimension:
# 计算新尺寸
if height > width:
new_width = min_size
new_height = int(min_size * height / width)
else:
new_height = min_size
new_width = int(min_size * width / height)
img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)
logger.debug(f"缩放后图片尺寸: {new_width}x{new_height}")
# 编码图片
if encode.lower() == "webp":
success, encoded_image = cv2.imencode(
".webp", img, [cv2.IMWRITE_WEBP_QUALITY, quality]
)
elif encode.lower() in ("jpg", "jpeg"):
success, encoded_image = cv2.imencode(
".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, quality]
)
elif encode.lower() == "png":
success, encoded_image = cv2.imencode(
".png", img, [cv2.IMWRITE_PNG_COMPRESSION, 6]
)
else:
newshape = (int(minSize / height * width), minSize)
img = cv2.resize(img, newshape)
if encode == "webp":
success, encoded_image = cv2.imencode(".webp", img, [cv2.IMWRITE_WEBP_QUALITY, 75])
elif encode == "jpg" or "jpeg":
success, encoded_image = cv2.imencode(".jpg", img, [cv2.IMWRITE_JPEG_QUALITY, 75])
else:
return input
return encoded_image.tobytes()
logger.warning(f"不支持的编码格式: {encode}, 返回原始数据")
return input_data
if not success:
logger.error(f"图片编码失败: {encode}")
return input_data
result = encoded_image.tobytes()
logger.debug(f"图片处理完成: 原始 {len(input_data)} bytes -> 处理后 {len(result)} bytes")
return result
except Exception as e:
logger.error(f"图片处理异常: {e}")
return input_data
def get_zip_image_count(bookid: str) -> int:
"""
获取ZIP文件中的图片数量使用缓存
"""
try:
bookinfo = db.file.searchByid(bookid)
if not bookinfo:
return 0
zip_path = Path(conf.get("file", "storedir")) / bookinfo[0][2]
if not zip_path.exists():
return 0
image_files, _ = get_image_files_from_zip(str(zip_path))
return len(image_files)
except Exception as e:
logger.error(f"获取图片数量失败 {bookid}: {e}")
return 0