ComiPy/compress_comic.py

import cv2
import zipfile
import argparse
import os
import re
import numpy as np
from io import BytesIO
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import threading

# 线程锁，用于安全写入 ZIP 和打印
output_zip_lock = threading.Lock()
print_lock = threading.Lock()

def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]

def resize_long_image(img, max_short_edge=640):
    """
    专为长图优化：限制短边（通常是宽度），保持比例
    例如：宽 1000px, 高 5000px → 缩放为 宽 640px, 高 3200px
    """
    h, w = img.shape[:2]
    short_edge = min(w, h)
    if short_edge <= max_short_edge:
        return img  # 不需要缩放

    scale = max_short_edge / short_edge
    new_w = int(w * scale)
    new_h = int(h * scale)

    # 使用高质量插值缩小
    resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LANCZOS4)
    return resized

def convert_to_webp_data(img, quality=95):
    encode_param = [int(cv2.IMWRITE_WEBP_QUALITY), quality]
    success, buffer = cv2.imencode('.webp', img, encode_param)
    if not success:
        raise RuntimeError("WebP 编码失败")
    return buffer.tobytes()

def process_image(args):
    """单张图像处理函数（用于多线程）"""
    filename, img_data = args
    try:
        nparr = np.frombuffer(img_data, np.uint8)
        img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
        if img is None:
            with print_lock:
                tqdm.write(f"⚠️ 无法解码图像: {filename}")
            return None

        # 缩放长图（限制短边）
        img = resize_long_image(img, max_short_edge=640)

        # 转为 WebP
        webp_bytes = convert_to_webp_data(img, quality=95)

        webp_name = os.path.splitext(filename)[0] + '.webp'
        return (webp_name, webp_bytes)

    except Exception as e:
        with print_lock:
            tqdm.write(f"❌ 处理 {filename} 失败: {e}")
        return None

def main():
    parser = argparse.ArgumentParser(description="压缩漫画ZIP：长图优化 + 多线程加速")
    parser.add_argument('-i', '--input', required=True, help='输入的ZIP文件路径')
    parser.add_argument('--workers', type=int, default=4, help='并行线程数（默认4）')
    args = parser.parse_args()

    input_zip_path = args.input
    if not os.path.isfile(input_zip_path):
        print(f"❌ 错误：文件 {input_zip_path} 不存在")
        return

    base_name = os.path.splitext(input_zip_path)[0]
    output_zip_path = f"{base_name}-lite.zip"

    with zipfile.ZipFile(input_zip_path, 'r') as input_zip:
        image_files = [f for f in input_zip.namelist() if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))]
        image_files.sort(key=natural_sort_key)

        if not image_files:
            print("⚠️ 警告：ZIP中未找到图片文件")
            return

        print(f"📦 找到 {len(image_files)} 张图片（包括长图），使用 {args.workers} 个线程处理...")

        # 读取所有图像数据用于多线程处理
        img_data_list = [(filename, input_zip.read(filename)) for filename in image_files]

    # 多线程处理
    results = []
    with ThreadPoolExecutor(max_workers=args.workers) as executor:
        # 提交所有任务
        futures = [executor.submit(process_image, item) for item in img_data_list]
        # 使用 tqdm 显示进度
        for future in tqdm(futures, desc="🚀 压缩中", unit="img"):
            result = future.result()
            if result is not None:
                results.append(result)

    # 写入输出 ZIP（主线程完成，避免并发写 ZIP）
    with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as output_zip:
        for webp_name, webp_bytes in results:
            output_zip.writestr(webp_name, webp_bytes)

    print(f"✅ 压缩完成！输出文件：{output_zip_path}")

if __name__ == '__main__':
    main()