4 weeks ago · 28c7df8fc4
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,33 @@
 
				+# 忽略 Python 虚拟环境
			
 
				+venv/
			
 
				+.venv/
			
 
				+env/
			
 
				+
			
 
				+# 忽略缓存和编译文件
			
 
				+**/__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+.pytest_cache/
			
 
				+.coverage
			
 
				+htmlcov/
			
 
				+
			
 
				+# 忽略 Docker 和本地配置
			
 
				+.git/
			
 
				+.gitignore
			
 
				+.dockerignore
			
 
				+Dockerfile
			
 
				+docker-compose.yml
			
 
				+.env
			
 
				+
			
 
				+# 忽略视频处理产生的临时文件（重要！）
			
 
				+scenes_cache/
			
 
				+*.mp4
			
 
				+*.jpg
			
 
				+
			
 
				+# 忽略 IDE 配置
			
 
				+.vscode/
			
 
				+.idea/
			
 
				+
			
 
				+ai_output/
			
 
				+ai_upload/
			
 
				+README.md
			
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,10 @@
 
				+.idea/
			
 
				+*logs*/
			
 
				+venv/
			
 
				+*.iml
			
 
				+*.log
			
 
				+*.db
			
 
				+__pycache__
			
 
				+*/__pycache__
			
 
				+ai_output/
			
 
				+ai_upload/
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -0,0 +1,23 @@
 
				+FROM nvidia/cuda:12.1.0-base-ubuntu22.04
			
 
				+
			
 
				+ENV DEBIAN_FRONTEND=noninteractive
			
 
				+
			
 
				+RUN apt-get update && apt-get install -y \
			
 
				+    python3.10 \
			
 
				+    python3-pip \
			
 
				+    ffmpeg \
			
 
				+    curl \
			
 
				+    fonts-wqy-microhei \
			
 
				+    && rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+WORKDIR /app
			
 
				+
			
 
				+COPY ./requirements.txt /app/requirements.txt
			
 
				+
			
 
				+RUN pip3 install --no-cache-dir --upgrade -r /app/requirements.txt
			
 
				+
			
 
				+COPY . /app
			
 
				+
			
 
				+EXPOSE 8010
			
 
				+
			
 
				+CMD ["sh", "-c", "uvicorn ai_server:app --host 0.0.0.0 --port 8010"]
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1,17 @@
 
				+一个基于 Python 的 AI 项目, 依赖 CUDA, 通过在代码中加载 AI 模型和调用 ollama 提供的 AI 模型, 提供了以下功能:
			
 
				+- 图像理解
			
 
				+- 语音识别
			
 
				+- 文本分析
			
 
				+- 文本翻译
			
 
				+
			
 
				+## 依赖
			
 
				+导出依赖
			
 
				+```
			
 
				+pip freeze > requirements.txt
			
 
				+```
			
 
				+> pip freeze 会导出当前环境下所有安装的包
			
 
				+
			
 
				+安装依赖
			
 
				+```
			
 
				+pip install -r requirements.txt
			
 
				+```
			
--- a/ai_server.py
+++ b/ai_server.py
@@ -0,0 +1,40 @@
 
				+import asyncio
			
 
				+import logging
			
 
				+logging.basicConfig(
			
 
				+    level=logging.INFO,
			
 
				+    format='%(asctime)s [%(name)s] %(levelname)s: %(message)s'
			
 
				+)
			
 
				+
			
 
				+from contextlib import asynccontextmanager
			
 
				+from fastapi import FastAPI
			
 
				+from route import gpu, file, audio, text, image
			
 
				+import service.ai_task as ai_task
			
 
				+import service.ai_asr as pyasr
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+# 获取 uvicorn 的 logger
			
 
				+# logger = logging.getLogger("uvicorn.error")
			
 
				+
			
 
				+@asynccontextmanager
			
 
				+async def lifespan(app: FastAPI):
			
 
				+    logger.info("🚀 服务已启动...")
			
 
				+    asyncio.create_task(pyasr.init_funasr())
			
 
				+    await ai_task.start_worker()
			
 
				+    yield
			
 
				+    logger.info("🛑 服务已停止")
			
 
				+app = FastAPI(title="GPU Worker Server", lifespan=lifespan)
			
 
				+
			
 
				+# 挂载子路由
			
 
				+app.include_router(gpu.router)
			
 
				+app.include_router(file.router)
			
 
				+app.include_router(audio.router)
			
 
				+app.include_router(text.router)
			
 
				+app.include_router(image.router)
			
 
				+
			
 
				+@app.get("/")
			
 
				+async def root():
			
 
				+    return {"message": "Welcome to pyai"}
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import uvicorn
			
 
				+    uvicorn.run(app, host="0.0.0.0", port=8010)
			
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,22 @@
 
				+services:
			
 
				+  pyai:
			
 
				+    image: pyai:12345678
			
 
				+    container_name: pyai
			
 
				+    restart: always
			
 
				+    network_mode: host
			
 
				+    volumes:
			
 
				+      - /etc/localtime:/etc/localtime:ro
			
 
				+      - /opt/docker/pyai/ai_uploads:/app/ai_uploads
			
 
				+      - /opt/docker/pyai/ai_outputs:/app/ai_outputs
			
 
				+      - /opt/docker/pyai/model_cache/modelscope:/root/.cache/modelscope
			
 
				+    environment:
			
 
				+      - MODELSCOPE_CACHE=/root/.cache/modelscope
			
 
				+      - OLLAMA_HOST=http://127.0.0.1:11434
			
 
				+      - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
			
 
				+    deploy:
			
 
				+      resources:
			
 
				+        reservations:
			
 
				+          devices:
			
 
				+            - driver: nvidia
			
 
				+              count: 1
			
 
				+              capabilities: [gpu]
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,56 @@
 
				+import json
			
 
				+import os
			
 
				+
			
 
				+from setting import OUTPUT_DIR
			
 
				+
			
 
				+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
			
 
				+
			
 
				+import logging
			
 
				+logging.basicConfig(
			
 
				+    level=logging.INFO,
			
 
				+    format='%(asctime)s [%(name)s] %(levelname)s: %(message)s'
			
 
				+)
			
 
				+
			
 
				+import torch
			
 
				+import service.pyav as pyav
			
 
				+import service.pygpu as pygpu
			
 
				+import service.ai_asr as ai_asr
			
 
				+import service.ai_image_ollama as ai_image
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+def process(audio_path, srt_path, video_path):
			
 
				+    try:
			
 
				+        # 执行识别
			
 
				+        ai_asr.generate_srt(audio_path, srt_path)
			
 
				+        pyav.generate_video(audio_path, srt_path, video_path)
			
 
				+    except torch.OutOfMemoryError:
			
 
				+        logger.error(f"❌ 显存溢出！跳过文件: {audio_path}")
			
 
				+        pygpu.clear_gpu_memory()  # 发生 OOM 后强制清理一次
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"💥 处理 {audio_path} 时发生未知错误: {e}")
			
 
				+    finally:
			
 
				+        # 每一个文件处理完都主动清理一次，确保下一个文件有足够的初始空间
			
 
				+        pygpu.clear_gpu_memory()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    audio = 'abc.mp3'
			
 
				+    srt = 'abc.mp3.srt'
			
 
				+    video = 'abc.mp3.mp4'
			
 
				+    ai_asr.generate_srt(audio, srt)
			
 
				+    pyav.generate_video(audio, srt, video)
			
 
				+
			
 
				+    video = 'abc.mp4'
			
 
				+    video_dict = pyav.get_video_info(video)
			
 
				+    start_scene_times = pyav.get_scene_times(video, 0.3)
			
 
				+    # 按场景分割视频
			
 
				+    # pyav.split_video_by_scenes(video, start_scene_times, OUTPUT_DIR)
			
 
				+    # 计算的每个场景的抽帧时间点
			
 
				+    target_times = pyav.calculate_mid_points(video, start_scene_times)
			
 
				+    # 使用 ffmpeg 抽帧
			
 
				+    scenes = pyav.extract_frames(video, target_times, OUTPUT_DIR)
			
 
				+    # 使用 ai 对抽帧的画面进行识别
			
 
				+    scenes_result = ai_image.describe_frame(scenes)
			
 
				+    video_dict['scenes'] = scenes_result
			
 
				+    print(json.dumps(video_dict, indent=8, ensure_ascii=False))
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,120 @@
 
				+accelerate==1.13.0
			
 
				+aliyun-python-sdk-core==2.16.0
			
 
				+aliyun-python-sdk-kms==2.16.5
			
 
				+annotated-doc==0.0.4
			
 
				+annotated-types==0.7.0
			
 
				+antlr4-python3-runtime==4.9.3
			
 
				+anyio==4.13.0
			
 
				+audioread==3.1.0
			
 
				+bitsandbytes==0.49.2
			
 
				+certifi==2026.2.25
			
 
				+cffi==2.0.0
			
 
				+charset-normalizer==3.4.7
			
 
				+click==8.3.2
			
 
				+contourpy==1.3.2
			
 
				+crcmod==1.7
			
 
				+cryptography==46.0.6
			
 
				+cuda-bindings==13.2.0
			
 
				+cuda-pathfinder==1.5.1
			
 
				+cuda-toolkit==13.0.2
			
 
				+cycler==0.12.1
			
 
				+decorator==5.2.1
			
 
				+editdistance==0.8.1
			
 
				+exceptiongroup==1.3.1
			
 
				+fastapi==0.135.3
			
 
				+filelock==3.25.2
			
 
				+fonttools==4.62.1
			
 
				+fsspec==2026.3.0
			
 
				+funasr==1.3.1
			
 
				+h11==0.16.0
			
 
				+hf-xet==1.4.3
			
 
				+httpcore==1.0.9
			
 
				+httpx==0.28.1
			
 
				+huggingface_hub==1.9.0
			
 
				+hydra-core==1.3.2
			
 
				+idna==3.11
			
 
				+jaconv==0.5.0
			
 
				+jamo==0.4.1
			
 
				+jieba==0.42.1
			
 
				+Jinja2==3.1.6
			
 
				+jmespath==0.10.0
			
 
				+joblib==1.5.3
			
 
				+kaldiio==2.18.1
			
 
				+kiwisolver==1.5.0
			
 
				+lazy-loader==0.5
			
 
				+librosa==0.11.0
			
 
				+llvmlite==0.47.0
			
 
				+markdown-it-py==4.0.0
			
 
				+MarkupSafe==3.0.3
			
 
				+mdurl==0.1.2
			
 
				+modelscope==1.35.3
			
 
				+mpmath==1.3.0
			
 
				+msgpack==1.1.2
			
 
				+networkx==3.4.2
			
 
				+numba==0.65.0
			
 
				+numpy==2.2.6
			
 
				+nvidia-cublas==13.1.0.3
			
 
				+nvidia-cuda-cupti==13.0.85
			
 
				+nvidia-cuda-nvrtc==13.0.88
			
 
				+nvidia-cuda-runtime==13.0.96
			
 
				+nvidia-cudnn-cu13==9.19.0.56
			
 
				+nvidia-cufft==12.0.0.61
			
 
				+nvidia-cufile==1.15.1.6
			
 
				+nvidia-curand==10.4.0.35
			
 
				+nvidia-cusolver==12.0.4.66
			
 
				+nvidia-cusparse==12.6.3.3
			
 
				+nvidia-cusparselt-cu13==0.8.0
			
 
				+nvidia-ml-py==13.595.45
			
 
				+nvidia-nccl-cu13==2.28.9
			
 
				+nvidia-nvjitlink==13.0.88
			
 
				+nvidia-nvshmem-cu13==3.4.5
			
 
				+nvidia-nvtx==13.0.85
			
 
				+ollama==0.6.1
			
 
				+omegaconf==2.3.0
			
 
				+opencv-python==4.13.0.92
			
 
				+oss2==2.19.1
			
 
				+packaging==26.0
			
 
				+pillow==12.2.0
			
 
				+platformdirs==4.9.4
			
 
				+pooch==1.9.0
			
 
				+protobuf==7.34.1
			
 
				+psutil==7.2.2
			
 
				+pycparser==3.0
			
 
				+pycryptodome==3.23.0
			
 
				+pydantic==2.12.5
			
 
				+pydantic_core==2.41.5
			
 
				+Pygments==2.20.0
			
 
				+pynndescent==0.6.0
			
 
				+pyparsing==3.3.2
			
 
				+python-dateutil==2.9.0.post0
			
 
				+python-multipart==0.0.22
			
 
				+pytorch-wpe==0.0.1
			
 
				+PyYAML==6.0.3
			
 
				+regex==2026.4.4
			
 
				+requests==2.33.1
			
 
				+rich==14.3.3
			
 
				+safetensors==0.7.0
			
 
				+scikit-learn==1.7.2
			
 
				+scipy==1.15.3
			
 
				+sentencepiece==0.2.1
			
 
				+shellingham==1.5.4
			
 
				+six==1.17.0
			
 
				+soundfile==0.13.1
			
 
				+soxr==1.0.0
			
 
				+starlette==1.0.0
			
 
				+sympy==1.14.0
			
 
				+tensorboardX==2.6.5
			
 
				+threadpoolctl==3.6.0
			
 
				+tokenizers==0.22.2
			
 
				+torch==2.11.0
			
 
				+torch-complex==0.4.4
			
 
				+torchaudio==2.11.0
			
 
				+tqdm==4.67.3
			
 
				+transformers==5.5.0
			
 
				+triton==3.6.0
			
 
				+typer==0.24.1
			
 
				+typing-inspection==0.4.2
			
 
				+typing_extensions==4.15.0
			
 
				+umap-learn==0.5.11
			
 
				+urllib3==2.6.3
			
 
				+uvicorn==0.43.0
			
--- a/route/__init__.py
+++ b/route/__init__.py
--- a/route/audio.py
+++ b/route/audio.py
@@ -0,0 +1,99 @@
 
				+import logging
			
 
				+from pathlib import Path
			
 
				+
			
 
				+from fastapi import APIRouter, HTTPException
			
 
				+import os
			
 
				+import uuid
			
 
				+import shutil
			
 
				+from fastapi import UploadFile, File
			
 
				+from starlette.concurrency import run_in_threadpool
			
 
				+import service.ai_task as ai_task
			
 
				+import service.pyav as pyav
			
 
				+from setting import UPLOAD_DIR, OUTPUT_DIR
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+# 创建路由对象，可以统一设置前缀 (prefix) 和 标签 (tags)
			
 
				+router = APIRouter(
			
 
				+    prefix="/api1/audio",
			
 
				+    tags=["audio"]
			
 
				+)
			
 
				+
			
 
				+@router.post("/asr")
			
 
				+async def upload_audio(file: UploadFile = File(...)):
			
 
				+    # 1. 统一生成一次 task_id，确保前后一致
			
 
				+    task_id = str(uuid.uuid4())[:8]
			
 
				+    ext = file.filename.split('.')[-1]
			
 
				+    save_path = os.path.join(UPLOAD_DIR, f"{task_id}.{ext}")
			
 
				+
			
 
				+    # 2. 解决 IO 阻塞方案 A: 使用 run_in_threadpool (推荐)
			
 
				+    # 这样会将同步的写入操作丢进单独的线程，不阻塞主事件循环
			
 
				+    def save_file():
			
 
				+        with open(save_path, "wb") as buffer:
			
 
				+            shutil.copyfileobj(file.file, buffer)
			
 
				+    await run_in_threadpool(save_file)
			
 
				+
			
 
				+    # 3. 构造路径并存入队列
			
 
				+    srt_path = os.path.join(OUTPUT_DIR, f"{task_id}.srt")
			
 
				+    video_path = os.path.join(OUTPUT_DIR, f"{task_id}.mp4")
			
 
				+
			
 
				+    # 传递已经确定好的 task_id
			
 
				+    await ai_task.put_task(task_id, save_path, srt_path, video_path)
			
 
				+    return {
			
 
				+        "status": "queued",
			
 
				+        "task_id": task_id,
			
 
				+        "message": "文件已上传并加入 GPU 处理队列",
			
 
				+        "srt_preview_path": f"{OUTPUT_DIR}/{task_id}.srt"
			
 
				+    }
			
 
				+
			
 
				+@router.get("/tasks")
			
 
				+async def get_queue_status():
			
 
				+    return {"queue_size": ai_task.get_tasks()}
			
 
				+
			
 
				+@router.get("/result/{task_id}")
			
 
				+async def get_asr_result(task_id: str):
			
 
				+    file_name = check_file_prefix(UPLOAD_DIR, task_id)
			
 
				+    if not file_name:
			
 
				+        raise HTTPException(status_code=404, detail="音频文件不存在")
			
 
				+    audio_path = f"{UPLOAD_DIR}/{file_name}"
			
 
				+
			
 
				+    txt_path = f"{OUTPUT_DIR}/{task_id}.txt"
			
 
				+    if not os.path.exists(txt_path):
			
 
				+        raise HTTPException(status_code=404, detail="音频文本文件不存在")
			
 
				+
			
 
				+    srt_path = f"{OUTPUT_DIR}/{task_id}.srt"
			
 
				+    if not os.path.exists(srt_path):
			
 
				+        raise HTTPException(status_code=404, detail="字幕文件不存在")
			
 
				+
			
 
				+    video_path = f"{OUTPUT_DIR}/{task_id}.mp4"
			
 
				+    if not os.path.exists(video_path):
			
 
				+        raise HTTPException(status_code=404, detail="视频文件不存在")
			
 
				+
			
 
				+    with open(txt_path, "r", encoding="utf-8") as f:
			
 
				+        text = f.read()
			
 
				+
			
 
				+    info = pyav.get_media_info(audio_path)
			
 
				+    srt = pyav.parse_srt_to_list(srt_path)
			
 
				+    return {
			
 
				+        "task_id": task_id,
			
 
				+        "duration": info['duration'],
			
 
				+        "text": text,
			
 
				+        "srt": srt,
			
 
				+        "audio_url": f"/api1/file/audio/{file_name}",
			
 
				+        "video_url": f"/api1/file/video/{task_id}.mp4"
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def check_file_prefix(directory, prefix):
			
 
				+    # 1. 转化为 Path 对象
			
 
				+    path = Path(directory)
			
 
				+    # 2. 匹配所有以 prefix 开头的文件
			
 
				+    # 如果要匹配特定后缀，可以使用 f"{prefix}*.jpg"
			
 
				+    matched_files = list(path.glob(f"{prefix}*"))
			
 
				+
			
 
				+    count = len(matched_files)
			
 
				+    if count == 1:
			
 
				+        file_path = matched_files[0]
			
 
				+        return file_path.name
			
 
				+    else:
			
 
				+        return None
			
--- a/route/file.py
+++ b/route/file.py
@@ -0,0 +1,56 @@
 
				+from fastapi import APIRouter, HTTPException
			
 
				+from fastapi.responses import FileResponse
			
 
				+import os
			
 
				+from setting import UPLOAD_DIR, OUTPUT_DIR
			
 
				+
			
 
				+router = APIRouter(prefix="/api1/file", tags=["file"])
			
 
				+
			
 
				+@router.get("/image/{filename}")
			
 
				+async def get_image(filename: str):
			
 
				+    # 1. 构建完整路径
			
 
				+    file_path = os.path.join(UPLOAD_DIR, filename)
			
 
				+    return get_file(file_path)
			
 
				+    #
			
 
				+    # # 2. 安全检查：防止目录穿越漏洞 (Directory Traversal)
			
 
				+    # # 确保用户请求的文件确实在 UPLOAD_DIR 目录下
			
 
				+    # real_path = os.path.realpath(file_path)
			
 
				+    # if not real_path.startswith(os.path.realpath(UPLOAD_DIR)):
			
 
				+    #     raise HTTPException(status_code=403, detail="拒绝访问该路径")
			
 
				+    #
			
 
				+    # # 3. 检查文件是否存在
			
 
				+    # if not os.path.exists(real_path):
			
 
				+    #     raise HTTPException(status_code=404, detail="图片不存在")
			
 
				+    #
			
 
				+    # # 4. 返回文件流
			
 
				+    # # media_type 会根据后缀自动识别（如 image/jpeg），也可以手动指定
			
 
				+    # return FileResponse(real_path)
			
 
				+
			
 
				+
			
 
				+@router.get("/audio/{filename}")
			
 
				+async def get_audio(filename: str):
			
 
				+    # 1. 构建完整路径
			
 
				+    file_path = os.path.join(UPLOAD_DIR, filename)
			
 
				+    return get_file(file_path)
			
 
				+
			
 
				+
			
 
				+@router.get("/video/{filename}")
			
 
				+async def get_video(filename: str):
			
 
				+    # 1. 构建完整路径
			
 
				+    file_path = os.path.join(OUTPUT_DIR, filename)
			
 
				+    return get_file(file_path)
			
 
				+
			
 
				+
			
 
				+def get_file(file_path):
			
 
				+    # 2. 安全检查：防止目录穿越漏洞 (Directory Traversal)
			
 
				+    # 确保用户请求的文件确实在 UPLOAD_DIR 目录下
			
 
				+    real_path = os.path.realpath(file_path)
			
 
				+    if not (real_path.startswith(os.path.realpath(UPLOAD_DIR)) or real_path.startswith(os.path.realpath(OUTPUT_DIR))):
			
 
				+        raise HTTPException(status_code=403, detail="拒绝访问该路径")
			
 
				+
			
 
				+    # 3. 检查文件是否存在
			
 
				+    if not os.path.exists(real_path):
			
 
				+        raise HTTPException(status_code=404, detail="视频不存在")
			
 
				+
			
 
				+    # 4. 返回文件流
			
 
				+    # media_type 会根据后缀自动识别（如 image/jpeg），也可以手动指定
			
 
				+    return FileResponse(real_path)
			
--- a/route/gpu.py
+++ b/route/gpu.py
@@ -0,0 +1,25 @@
 
				+import logging
			
 
				+from fastapi import APIRouter, HTTPException
			
 
				+import service.pygpu as pygpu
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+# 创建路由对象，可以统一设置前缀 (prefix) 和 标签 (tags)
			
 
				+router = APIRouter(
			
 
				+    prefix="/api1/gpu",
			
 
				+    tags=["gpu"]
			
 
				+)
			
 
				+
			
 
				+@router.get("/info")
			
 
				+async def gpu_info():
			
 
				+    try:
			
 
				+        gpu_result = pygpu.get_gpu_memory_info(0)
			
 
				+        torch_result = pygpu.get_torch_memory_usage()
			
 
				+        result = pygpu.get_ollama_resource()
			
 
				+        return {
			
 
				+            "gpu": gpu_result,
			
 
				+            "torch": torch_result,
			
 
				+            "ollama": result,
			
 
				+        }
			
 
				+    except Exception as e:
			
 
				+        raise HTTPException(status_code=500, detail=str(e))
			
--- a/route/image.py
+++ b/route/image.py
@@ -0,0 +1,55 @@
 
				+import logging
			
 
				+import os
			
 
				+import uuid
			
 
				+from typing import List
			
 
				+
			
 
				+from fastapi import APIRouter, UploadFile, File, Form, HTTPException
			
 
				+import shutil
			
 
				+from starlette.concurrency import run_in_threadpool
			
 
				+from setting import UPLOAD_DIR
			
 
				+import service.ai_image_ollama as ai_image
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+# 创建路由对象，可以统一设置前缀 (prefix) 和 标签 (tags)
			
 
				+router = APIRouter(
			
 
				+    prefix="/api1/image",
			
 
				+    tags=["video"]
			
 
				+)
			
 
				+
			
 
				+@router.post("/analyze")
			
 
				+async def upload_image(
			
 
				+        file: UploadFile = File(...),
			
 
				+        prompts: List[str] = Form(...)
			
 
				+):
			
 
				+    if not file:
			
 
				+        raise HTTPException(status_code=400, detail="文件不能为空")
			
 
				+
			
 
				+    if not prompts:
			
 
				+        raise HTTPException(status_code=400, detail="Prompts 不能为空")
			
 
				+
			
 
				+    task_id = str(uuid.uuid4())[:8]
			
 
				+    ext = file.filename.split('.')[-1]
			
 
				+    save_filename = f"{task_id}.{ext}"
			
 
				+    save_path = os.path.join(UPLOAD_DIR, save_filename)
			
 
				+
			
 
				+    # 异步保存文件
			
 
				+    def save_file():
			
 
				+        with open(save_path, "wb") as buffer:
			
 
				+            shutil.copyfileobj(file.file, buffer)
			
 
				+    await run_in_threadpool(save_file)
			
 
				+
			
 
				+    try:
			
 
				+        # 读取字节流进行 AI 处理
			
 
				+        with open(save_path, 'rb') as f:
			
 
				+            image_bytes = f.read()
			
 
				+            result = ai_image.describe_image(prompts, image_bytes)
			
 
				+            return {
			
 
				+                "task_id": task_id,
			
 
				+                "model_name": result['model_name'],
			
 
				+                "image_url": f"/api1/file/image/{save_filename}",
			
 
				+                "results": result['results']
			
 
				+            }
			
 
				+    except Exception as e:
			
 
				+        # 记录日志并抛出错误
			
 
				+        raise HTTPException(status_code=500, detail=f"AI 推理失败: {str(e)}")
			
--- a/route/text.py
+++ b/route/text.py
@@ -0,0 +1,89 @@
 
				+import logging
			
 
				+from typing import Optional
			
 
				+
			
 
				+from fastapi import APIRouter, HTTPException
			
 
				+from pydantic import BaseModel
			
 
				+import service.ai_text_ollama as ai_text
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+# 创建路由对象，可以统一设置前缀 (prefix) 和 标签 (tags)
			
 
				+router = APIRouter(
			
 
				+    prefix="/api1/text",
			
 
				+    tags=["text"]
			
 
				+)
			
 
				+
			
 
				+class TranslationRequest(BaseModel):
			
 
				+    text: str
			
 
				+
			
 
				+@router.post("/translate")
			
 
				+async def translate(request: TranslationRequest):
			
 
				+    if not request.text:
			
 
				+        raise HTTPException(status_code=400, detail="输入文本不能为空")
			
 
				+
			
 
				+    try:
			
 
				+        final_result = ai_text.translate2zh(request.text)
			
 
				+        return {"original": request.text, "translation": final_result}
			
 
				+    except Exception as e:
			
 
				+        raise HTTPException(status_code=500, detail=str(e))
			
 
				+
			
 
				+
			
 
				+# 定义接收 JSON 的模型
			
 
				+class SummarizeRequest(BaseModel):
			
 
				+    text: str
			
 
				+    prompt: Optional[str] = ""  # 允许 prompt 为空字符串或不传
			
 
				+
			
 
				+
			
 
				+@router.post("/summarize")
			
 
				+async def get_summarize(request: SummarizeRequest):
			
 
				+    if not request.text.strip():
			
 
				+        raise HTTPException(status_code=400, detail="输入文本不能为空")
			
 
				+
			
 
				+    try:
			
 
				+        max_length = 256
			
 
				+        combined_prompt = f"""
			
 
				+            你是一个专业的视频内容分析师。请对以下识别结果进行精炼的中文总结。
			
 
				+
			
 
				+            要求字数不超过{max_length}字，语气客观专业：
			
 
				+            {request.text}
			
 
				+            """
			
 
				+
			
 
				+        result = ai_text.summarize(combined_prompt)
			
 
				+        return {
			
 
				+            "model_name": result['model_name'],
			
 
				+            "prompt": combined_prompt,
			
 
				+            "result": result['result']
			
 
				+        }
			
 
				+    except Exception as e:
			
 
				+        raise HTTPException(status_code=500, detail=f"总结失败: {str(e)}")
			
 
				+
			
 
				+
			
 
				+@router.post("/tag")
			
 
				+async def get_tag(request: SummarizeRequest):
			
 
				+    if not request.text.strip():
			
 
				+        raise HTTPException(status_code=400, detail="输入文本不能为空")
			
 
				+
			
 
				+    try:
			
 
				+        combined_prompt = f"""
			
 
				+            你是一个专业的视频内容分析师。请从下方文本中提取出 1-10 个关键词标签。
			
 
				+
			
 
				+            限制条件：
			
 
				+            1. 标签必须是中文。
			
 
				+            2. 标签要包含：人物特征、动作行为、场景环境、氛围。
			
 
				+            3. 严禁输出任何解释性文字，只输出标签。
			
 
				+            4. 格式要求：标签之间用英文逗号分隔。
			
 
				+
			
 
				+            文本内容：
			
 
				+            {request.text}
			
 
				+
			
 
				+            标签结果：
			
 
				+            """
			
 
				+
			
 
				+        result = ai_text.summarize(combined_prompt)
			
 
				+        return {
			
 
				+            "model_name": result['model_name'],
			
 
				+            "prompt": combined_prompt,
			
 
				+            "result": result['result']
			
 
				+        }
			
 
				+    except Exception as e:
			
 
				+        raise HTTPException(status_code=500, detail=f"总结失败: {str(e)}")
			
--- a/service/__init__.py
+++ b/service/__init__.py
--- a/service/ai_asr.py
+++ b/service/ai_asr.py
@@ -0,0 +1,84 @@
 
				+import logging
			
 
				+import asyncio
			
 
				+import re
			
 
				+import time
			
 
				+from typing import Optional
			
 
				+
			
 
				+import service.pyav as pyav
			
 
				+import torch
			
 
				+from funasr import AutoModel
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+asr_model: Optional[AutoModel] = None
			
 
				+is_model_ready = False
			
 
				+
			
 
				+
			
 
				+def get_asr_model():
			
 
				+    """获取模型实例的接口"""
			
 
				+    global asr_model
			
 
				+    return asr_model
			
 
				+
			
 
				+
			
 
				+def check_ready():
			
 
				+    """检查模型是否加载完成"""
			
 
				+    global is_model_ready
			
 
				+    return is_model_ready
			
 
				+
			
 
				+
			
 
				+async def init_funasr():
			
 
				+    """异步初始化函数"""
			
 
				+    global asr_model, is_model_ready
			
 
				+
			
 
				+    if is_model_ready:
			
 
				+        return
			
 
				+
			
 
				+    logger.info("⏳ [ASR] 开始异步加载 funasr 模型...")
			
 
				+    start_time = time.time()
			
 
				+
			
 
				+    try:
			
 
				+        # 使用 run_in_executor 避免阻塞主事件循环
			
 
				+        loop = asyncio.get_event_loop()
			
 
				+
			
 
				+        # 定义具体的加载逻辑
			
 
				+        def load():
			
 
				+            return AutoModel(
			
 
				+                model="paraformer-zh",
			
 
				+                vad_model="fsmn-vad",
			
 
				+                vad_kwargs={"max_single_segment_time": 30000},
			
 
				+                punc_model="ct-punc",
			
 
				+                device="cuda:0" if torch.cuda.is_available() else "cpu",
			
 
				+                disable_update=True
			
 
				+            )
			
 
				+
			
 
				+        asr_model = await loop.run_in_executor(None, load)
			
 
				+        is_model_ready = True
			
 
				+        logger.info(f"✅ [ASR] 模型加载成功！耗时 {(time.time() - start_time):.2f}s")
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"❌ [ASR] 模型加载失败: {e}")
			
 
				+        is_model_ready = False
			
 
				+
			
 
				+
			
 
				+def get_text(audio_path):
			
 
				+    start_time = time.time()
			
 
				+    logger.info("⏳ 开始进行音频识别...")
			
 
				+    result = asr_model.generate(input=[audio_path], cache={}, batch_size_s=300)
			
 
				+    logger.info(f"✅ 音频识别完成, 耗时 {(time.time() - start_time):.2f}秒")
			
 
				+
			
 
				+    # 清理文本中的空字符
			
 
				+    text = result[0]['text'].replace(" ", "")
			
 
				+    timestamps = result[0]['timestamp']
			
 
				+    return {
			
 
				+        'text': text,
			
 
				+        'timestamps': timestamps
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def generate_srt(audio_path, srt_path):
			
 
				+    result = get_text(audio_path)
			
 
				+    text = result['text']
			
 
				+    timestamp_list = result['timestamps']
			
 
				+
			
 
				+    # 使用正则表达式将文本按标点切分为一个 list，保留标点
			
 
				+    text_list = re.split(r"([。！？；，])", text)
			
 
				+    srt_list = pyav.get_precise_srt(text_list, timestamp_list)
			
 
				+    pyav.save_srt_file(srt_list, srt_path)
			
--- a/service/ai_image_ollama.py
+++ b/service/ai_image_ollama.py
@@ -0,0 +1,80 @@
 
				+import logging
			
 
				+from setting import ollama_client
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+model_name = 'moondream'
			
 
				+
			
 
				+def describe_frame(scene_list):
			
 
				+    results = []
			
 
				+    logger.info(f"🚀 开始分析视频，共有 {len(scene_list)} 个场景待处理...")
			
 
				+    for i, scene in enumerate(scene_list):
			
 
				+        logger.info(f"\n🎬 正在处理场景 {i + 1}/{len(scene_list)} (时间点: {scene['frame_pos']}s)")
			
 
				+        frame_path = scene['frame_path']
			
 
				+        with open(frame_path, 'rb') as f:
			
 
				+            image_bytes = f.read()
			
 
				+            image_data = {
			
 
				+                "scene_start": scene['scene_start'],
			
 
				+                "scene_end": scene['scene_end'],
			
 
				+                "frame_pos": scene['frame_pos'],
			
 
				+                "frame_path": frame_path,
			
 
				+                "prompts": []
			
 
				+            }
			
 
				+
			
 
				+            prompts = [
			
 
				+                """
			
 
				+                Analyze the physical interaction between the individuals. Is there any intimate or sexual contact visible? Describe the positioning of their bodies and limbs objectively
			
 
				+                """,
			
 
				+                """
			
 
				+                Describe the clothing status of all individuals. Is there any visible nudity, undergarments, or partially exposed sensitive areas? Identify the specific body parts shown.
			
 
				+                """,
			
 
				+                """
			
 
				+                Observe the posture and movement. Does the scene depict a sexual act or a highly suggestive sexual position?provide a neutral description of the pose.
			
 
				+                """
			
 
				+            ]
			
 
				+            for i, p in enumerate(prompts):
			
 
				+                try:
			
 
				+                    response = ollama_client.chat(
			
 
				+                        model=model_name,
			
 
				+                        messages=[{
			
 
				+                            'role': 'user',
			
 
				+                            'content': p,
			
 
				+                            'images': [image_bytes]
			
 
				+                        }]
			
 
				+                    )
			
 
				+                    eng_text = response['message']['content']
			
 
				+                    eng_text1 = eng_text.strip()
			
 
				+                    prompt_item = {
			
 
				+                        "prompt": p,
			
 
				+                        "result": eng_text1
			
 
				+                    }
			
 
				+                    image_data["prompts"].append(prompt_item)
			
 
				+                except Exception as e:
			
 
				+                    logger.error(f"❌ Prompt {i + 1} 推理失败: {e}")
			
 
				+
			
 
				+            results.append(image_data)
			
 
				+
			
 
				+    return results
			
 
				+
			
 
				+
			
 
				+def describe_image(prompts, image_bytes):
			
 
				+    results = []
			
 
				+    for i, p in enumerate(prompts):
			
 
				+        response = ollama_client.chat(
			
 
				+            model=model_name,
			
 
				+            messages=[{
			
 
				+                'role': 'user',
			
 
				+                'content': p,
			
 
				+                'images': [image_bytes]
			
 
				+            }]
			
 
				+        )
			
 
				+        eng_text = response['message']['content']
			
 
				+        prompt_item = {
			
 
				+            "prompt": p,
			
 
				+            "result": eng_text.strip()
			
 
				+        }
			
 
				+        results.append(prompt_item)
			
 
				+
			
 
				+    return {
			
 
				+                "model_name": model_name,
			
 
				+                "results": results
			
 
				+            }
			
--- a/service/ai_task.py
+++ b/service/ai_task.py
@@ -0,0 +1,79 @@
 
				+import logging
			
 
				+import time
			
 
				+import re
			
 
				+import asyncio
			
 
				+from concurrent.futures import ThreadPoolExecutor
			
 
				+
			
 
				+import service.pyav as pyav
			
 
				+import service.pygpu as pygpu
			
 
				+import service.ai_text_ollama as ai_text
			
 
				+import service.ai_asr as pyasr
			
 
				+from setting import OUTPUT_DIR
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+executor = ThreadPoolExecutor(max_workers=1)
			
 
				+task_queue = asyncio.Queue()
			
 
				+
			
 
				+async def start_worker():
			
 
				+    asyncio.create_task(gpu_worker())  # 开启 Worker
			
 
				+
			
 
				+
			
 
				+# --- 后台工作进程 (Consumer) ---
			
 
				+async def gpu_worker():
			
 
				+    logger.info("🏃 Worker 开始监听任务队列")
			
 
				+    while True:
			
 
				+        if not pyasr.check_ready():
			
 
				+            logger.info("休眠 10s 等待模型加载完成...")
			
 
				+            await asyncio.sleep(10)
			
 
				+            continue
			
 
				+
			
 
				+        # 获取任务
			
 
				+        task = await task_queue.get()
			
 
				+        task_id, audio_path, srt_path, video_path = task
			
 
				+        logger.info(f"⚡ 开始处理任务 [{task_id}]: {audio_path}")
			
 
				+        try:
			
 
				+            start_t = time.time()
			
 
				+            # 2. 推理 (在线程池中运行同步识别，防止阻塞事件循环)
			
 
				+            # 使用 run_in_executor 将同步函数包装成异步，不再阻塞 Event Loop
			
 
				+            # 这样在处理视频的同时，FastAPI 依然可以接收新请求并 put 到队列中
			
 
				+            loop = asyncio.get_event_loop()
			
 
				+            result = await loop.run_in_executor(
			
 
				+                executor,
			
 
				+                pyasr.get_text,
			
 
				+                audio_path
			
 
				+            )
			
 
				+
			
 
				+            text = result['text']
			
 
				+            timestamp_list = result['timestamps']
			
 
				+            text_list = re.split(r"([。！？；，])", text)
			
 
				+            srt_list = pyav.get_precise_srt(text_list, timestamp_list)
			
 
				+            pyav.save_srt_file(srt_list, srt_path)
			
 
				+
			
 
				+            with open(f"{OUTPUT_DIR}/{task_id}.txt", "w", encoding="utf-8") as f:
			
 
				+                f.write(text)
			
 
				+
			
 
				+            await loop.run_in_executor(
			
 
				+                executor,
			
 
				+                pyav.generate_video,
			
 
				+                audio_path,
			
 
				+                srt_path,
			
 
				+                video_path
			
 
				+            )
			
 
				+            logger.info(f"🎉 任务 [{task_id}] 完成，耗时: {time.time() - start_t:.2f}s")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"❌ 任务 [{task_id}] 失败: {str(e)}")
			
 
				+        finally:
			
 
				+            # 4. 定时/按需清理 GPU 显存
			
 
				+            task_queue.task_done()
			
 
				+            pygpu.clear_gpu_memory()
			
 
				+
			
 
				+
			
 
				+async def put_task(task_id, save_path, srt_path, video_path):
			
 
				+    await task_queue.put((task_id, save_path, srt_path, video_path))
			
 
				+
			
 
				+def get_tasks():
			
 
				+    return task_queue.qsize()
			
 
				+
			
 
				+def translate_to_zh(text):
			
 
				+    result = ai_text.translate2zh(text)
			
 
				+    return result
			
--- a/service/ai_text.py
+++ b/service/ai_text.py
@@ -0,0 +1,67 @@
 
				+import logging
			
 
				+import os
			
 
				+os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
			
 
				+
			
 
				+import time
			
 
				+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
			
 
				+import torch
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+model_name = "Qwen/Qwen2.5-1.5B-Instruct"
			
 
				+start_time = time.time()
			
 
				+logger.info(f"⏳ 开始加载 {model_name} 模型...")
			
 
				+try:
			
 
				+    # 1. 定义量化配置
			
 
				+    quantization_config = BitsAndBytesConfig(
			
 
				+        load_in_4bit=True,
			
 
				+        bnb_4bit_compute_dtype=torch.float16,  # 1650 显卡建议设为 fp16
			
 
				+        bnb_4bit_quant_type="nf4",  # 高精度量化类型
			
 
				+        bnb_4bit_use_double_quant=True  # 进一步压缩显存
			
 
				+    )
			
 
				+
			
 
				+    # 2. 加载模型
			
 
				+    model = AutoModelForCausalLM.from_pretrained(
			
 
				+        model_name,
			
 
				+        quantization_config=quantization_config,  # 使用配置对象
			
 
				+        device_map="auto"  # 自动分配到 GPU
			
 
				+    )
			
 
				+
			
 
				+    tokenizer = AutoTokenizer.from_pretrained(model_name)
			
 
				+    logger.info(f"✅ {model_name} 模型加载成功！耗时 {(time.time() - start_time):.2f}秒")
			
 
				+except Exception as e:
			
 
				+    logger.info(f"模型加载失败: {e}")
			
 
				+    logger.info(f"✅ {model_name} 模型加载失败: {e}")
			
 
				+    raise e
			
 
				+
			
 
				+
			
 
				+def translate2zh(text):
			
 
				+    # 构建适合 Moondream 场景的 Prompt
			
 
				+    prompt = f"你是一个专业的图像描述翻译官。请将下面这段英文描述翻译成自然、地道的中文，直接输出结果，不要解释：\n{text}"
			
 
				+
			
 
				+    messages = [{"role": "user", "content": prompt}]
			
 
				+    input_text = tokenizer.apply_chat_template(
			
 
				+        messages,
			
 
				+        tokenize=False,
			
 
				+        add_generation_prompt=True
			
 
				+    )
			
 
				+
			
 
				+    model_inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
			
 
				+
			
 
				+    # 推理
			
 
				+    max_tokens = 128
			
 
				+    with torch.no_grad():
			
 
				+        generated_ids = model.generate(
			
 
				+            **model_inputs,
			
 
				+            max_new_tokens=max_tokens,
			
 
				+            do_sample=False  # 翻译建议关闭随机性，保证结果稳定
			
 
				+        )
			
 
				+
			
 
				+    # 解码
			
 
				+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
			
 
				+    # 提取助手回答的部分
			
 
				+    final_result = response.split("assistant\n")[-1].strip()
			
 
				+    return final_result
			
 
				+
			
 
				+
			
 
				+def summarize():
			
 
				+    pass
			
--- a/service/ai_text_ollama.py
+++ b/service/ai_text_ollama.py
@@ -0,0 +1,44 @@
 
				+import logging
			
 
				+import time
			
 
				+from setting import ollama_client
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+model_name = "qwen2.5:1.5b"
			
 
				+
			
 
				+def translate2zh(text):
			
 
				+    start_time = time.time()
			
 
				+    logger.info(f"⏳ 正在通过 Ollama 调用 {model_name}...")
			
 
				+
			
 
				+    prompt = f"你是一个专业的图像描述翻译官。请将下面这段英文描述翻译成自然、地道的中文，直接输出结果，不要解释：\n{text}"
			
 
				+    try:
			
 
				+        # Ollama 会自动处理 4-bit 加载和显存分配
			
 
				+        response = ollama_client.chat(
			
 
				+            model=model_name,
			
 
				+            messages=[{'role': 'user', 'content': prompt}],
			
 
				+            # options={
			
 
				+            #     "num_gpu": 1,  # 强制使用 GPU
			
 
				+            #     "temperature": 0.7,
			
 
				+            #     "top_p": 0.9
			
 
				+            # }
			
 
				+        )
			
 
				+
			
 
				+        duration = time.time() - start_time
			
 
				+        logger.info(f"✅ 推理成功！耗时 {duration:.2f}秒")
			
 
				+        return response['message']['content']
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"❌ Ollama 调用失败: {e}")
			
 
				+        raise e
			
 
				+
			
 
				+
			
 
				+def summarize(content):
			
 
				+    response = ollama_client.chat(
			
 
				+        model=model_name,
			
 
				+        messages=[{'role': 'user', 'content': content}],
			
 
				+        options={"temperature": 0.3}  # 较低的温度使总结更稳定
			
 
				+    )
			
 
				+    result = response['message']['content'].strip()
			
 
				+    return {
			
 
				+        "model_name": model_name,
			
 
				+        "result": result
			
 
				+    }
			
--- a/service/pyav.py
+++ b/service/pyav.py
@@ -0,0 +1,533 @@
 
				+import json
			
 
				+import os
			
 
				+import re
			
 
				+import hashlib
			
 
				+import logging
			
 
				+import shutil
			
 
				+import subprocess
			
 
				+from pathlib import Path
			
 
				+import cv2
			
 
				+import librosa
			
 
				+import numpy as np
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+def get_video_info(video_path):
			
 
				+    """获取视频基础元数据"""
			
 
				+    cap = cv2.VideoCapture(video_path)
			
 
				+    if not cap.isOpened():
			
 
				+        return None
			
 
				+
			
 
				+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
			
 
				+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
			
 
				+    fps = cap.get(cv2.CAP_PROP_FPS)
			
 
				+    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
			
 
				+    duration = int(frame_count / fps) if fps > 0 else 0
			
 
				+    size_byte = int(os.path.getsize(video_path))
			
 
				+
			
 
				+    # 判定横竖屏：1 为横屏, 0 为竖屏
			
 
				+    horizontal = 1 if width >= height else 0
			
 
				+
			
 
				+    cap.release()
			
 
				+    return {
			
 
				+        "video_path": video_path,
			
 
				+        "duration": duration,
			
 
				+        "size_byte": size_byte,
			
 
				+        "width": width,
			
 
				+        "height": height,
			
 
				+        "horizontal": horizontal,
			
 
				+        "scenes": []
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def get_media_info(media_path):
			
 
				+    """获取视频基础元数据（含音频）"""
			
 
				+    # 1. 使用 ffprobe 获取详细流信息
			
 
				+    cmd = [
			
 
				+        'ffprobe', '-v', 'quiet', '-print_format', 'json',
			
 
				+        '-show_streams', '-show_format', media_path
			
 
				+    ]
			
 
				+
			
 
				+    try:
			
 
				+        result = subprocess.check_output(cmd, encoding='utf-8')
			
 
				+        data = json.loads(result)
			
 
				+    except Exception as e:
			
 
				+        print(f"ffprobe 解析失败: {e}")
			
 
				+        return None
			
 
				+
			
 
				+    # 提取视频流和音频流
			
 
				+    video_stream = next((s for s in data['streams'] if s['codec_type'] == 'video'), None)
			
 
				+    audio_stream = next((s for s in data['streams'] if s['codec_type'] == 'audio'), None)
			
 
				+
			
 
				+    # 2. 基础视频信息
			
 
				+    width = int(video_stream.get('width', 0)) if video_stream else 0
			
 
				+    height = int(video_stream.get('height', 0)) if video_stream else 0
			
 
				+    duration = float(data['format'].get('duration', 0))
			
 
				+    size_byte = int(os.path.getsize(media_path))
			
 
				+    horizontal = 1 if width >= height else 0
			
 
				+
			
 
				+    # 3. 构造返回结构
			
 
				+    info = {
			
 
				+        "media_path": media_path,
			
 
				+        "duration": round(duration, 2),
			
 
				+        "size_byte": size_byte,
			
 
				+        "width": width,
			
 
				+        "height": height,
			
 
				+        "horizontal": horizontal,
			
 
				+        # 新增音频字段
			
 
				+        "has_audio": audio_stream is not None,
			
 
				+        "audio_info": {
			
 
				+            "codec": audio_stream.get('codec_name'),
			
 
				+            "sample_rate": audio_stream.get('sample_rate'),
			
 
				+            "channels": audio_stream.get('channels'),
			
 
				+            "bit_rate": audio_stream.get('bit_rate')
			
 
				+        } if audio_stream else None,
			
 
				+        "scenes": []
			
 
				+    }
			
 
				+
			
 
				+    return info
			
 
				+
			
 
				+
			
 
				+def get_scene_times(video_path, threshold=0.3):
			
 
				+    cmd = [
			
 
				+        'ffmpeg',
			
 
				+        '-hide_banner',
			
 
				+        '-i', video_path,
			
 
				+        '-threads', '0',
			
 
				+        '-vf', f"select='eq(n,0)+gt(scene,{threshold})',showinfo",
			
 
				+        '-vsync', 'vfr',
			
 
				+        '-f', 'null', '-' # 仅测试检测，不实际写文件；如需写文件请换回你的参数
			
 
				+    ]
			
 
				+
			
 
				+    scene_start_times = []
			
 
				+    try:
			
 
				+        # 2. 启动子进程
			
 
				+        # stderr=subprocess.PIPE 捕获日志，stdout=subprocess.DEVNULL 忽略正常输出
			
 
				+        process = subprocess.Popen(
			
 
				+            cmd,
			
 
				+            stdout=subprocess.DEVNULL,
			
 
				+            stderr=subprocess.PIPE,
			
 
				+            universal_newlines=True,
			
 
				+            encoding='utf-8'
			
 
				+        )
			
 
				+
			
 
				+        # 3. 实时解析日志
			
 
				+        # 使用 stdout/stderr 迭代时，建议处理编码或可能的读取中断
			
 
				+        try:
			
 
				+            # showinfo 的输出在 stderr
			
 
				+            for line in process.stderr:
			
 
				+                if "pts_time:" in line:
			
 
				+                    match = re.search(r"pts_time:(\d+\.\d+)", line)
			
 
				+                    if match:
			
 
				+                        time_val = float(match.group(1))
			
 
				+                        scene_start_times.append(time_val)
			
 
				+                        logger.info(f"检测到新场景起始点: {time_val}s")
			
 
				+        except Exception as e:
			
 
				+            process.kill()  # 如果读取过程崩溃，强制结束进程
			
 
				+            raise RuntimeError(f"读取 FFmpeg 输出时发生错误: {e}")
			
 
				+
			
 
				+        # 4. 等待进程结束并检查退出码
			
 
				+        process.wait()
			
 
				+
			
 
				+        if process.returncode != 0:
			
 
				+            # 获取最后几行错误信息（如果有）
			
 
				+            raise subprocess.CalledProcessError(process.returncode, cmd)
			
 
				+
			
 
				+    except FileNotFoundError:
			
 
				+        # 当系统环境变量里找不到 'ffmpeg' 命令时触发
			
 
				+        raise RuntimeError("系统未安装 FFmpeg 或未将其添加到环境变量 PATH 中")
			
 
				+
			
 
				+    except subprocess.CalledProcessError as e:
			
 
				+        # FFmpeg 执行过程中报错（如视频解码失败、参数错误）
			
 
				+        raise RuntimeError(f"FFmpeg 处理视频失败，退出码: {e.returncode}")
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        # 其他未知异常
			
 
				+        raise RuntimeError(f"发生未知错误: {e}")
			
 
				+
			
 
				+    if not scene_start_times:
			
 
				+        return [0.0]
			
 
				+
			
 
				+    if scene_start_times[0] > 0.5:
			
 
				+        # 手动把第一个点修正为 0.0
			
 
				+        scene_start_times.insert(0, 0.0)
			
 
				+
			
 
				+    return scene_start_times
			
 
				+
			
 
				+
			
 
				+def split_video_by_scenes(video_path, scene_start_times, output_dir="segment"):
			
 
				+    """
			
 
				+    根据给定的起始时间列表分割视频
			
 
				+    """
			
 
				+    if not scene_start_times:
			
 
				+        logger.info("没有检测到场景，跳过分割。")
			
 
				+        return
			
 
				+
			
 
				+    # 添加一个结束标识，方便循环计算时长
			
 
				+    # 这里不需要准确的视频总长，FFmpeg 处理最后一个片段时会自动截取到末尾
			
 
				+    times = scene_start_times + [None]
			
 
				+    for i in range(len(times) - 1):
			
 
				+        start_time = times[i]
			
 
				+        next_time = times[i + 1]
			
 
				+
			
 
				+        output_file = f"{output_dir}/segment_{i:03d}.mp4"
			
 
				+
			
 
				+        # 构建命令
			
 
				+        # -ss 放在 -i 前面可以实现快速定位（基于关键帧）
			
 
				+        cmd = [
			
 
				+            'ffmpeg', '-hide_banner', '-y',
			
 
				+            '-ss', str(start_time),
			
 
				+            '-i', video_path
			
 
				+        ]
			
 
				+
			
 
				+        # 如果不是最后一个片段，指定持续时间 -t
			
 
				+        if next_time is not None:
			
 
				+            duration = next_time - start_time
			
 
				+            cmd.extend(['-t', str(duration)])
			
 
				+
			
 
				+        # 使用 copy 模式不重编码，速度极快
			
 
				+        cmd.extend(['-c', 'copy', output_file])
			
 
				+
			
 
				+        try:
			
 
				+            subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
			
 
				+            logger.info(f"完成: {output_file} (起始点: {start_time}s)")
			
 
				+        except subprocess.CalledProcessError as e:
			
 
				+            logger.info(f"分割片段 {i} 失败: {e.stderr.decode()}")
			
 
				+
			
 
				+
			
 
				+def calculate_mid_points(video_path, scene_start_times):
			
 
				+    """
			
 
				+    计算每个场景的详细信息：
			
 
				+    1. 获取视频总时长以确定最后一个场景的边界。
			
 
				+    2. 如果场景时长 < 0.5s，抽帧点取起始点；否则取中点。
			
 
				+    3. 返回格式化的字典列表。
			
 
				+    """
			
 
				+    # 1. 获取视频总时长
			
 
				+    duration_cmd = [
			
 
				+        'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
			
 
				+        '-of', 'default=noprint_wrappers=1:nokey=1', video_path
			
 
				+    ]
			
 
				+    try:
			
 
				+        total_duration = float(subprocess.check_output(duration_cmd).decode().strip())
			
 
				+    except Exception as e:
			
 
				+        # 如果获取时长失败，可以根据需求抛出异常或设置一个保守值
			
 
				+        raise RuntimeError(f"无法获取视频时长: {e}")
			
 
				+
			
 
				+    # 2. 构建结束时间点（下一个场景的开始即当前场景的结束）
			
 
				+    end_times = scene_start_times[1:] + [total_duration]
			
 
				+
			
 
				+    scenes = []
			
 
				+    for start, end in zip(scene_start_times, end_times):
			
 
				+        duration = end - start
			
 
				+
			
 
				+        # 3. 计算抽帧位置逻辑
			
 
				+        if duration < 0.5:
			
 
				+            mid_point = start
			
 
				+        else:
			
 
				+            mid_point = (start + end) / 2
			
 
				+
			
 
				+        # 4. 组装成指定的字典格式
			
 
				+        scenes.append({
			
 
				+            "scene_start": round(start, 3),
			
 
				+            "scene_end": round(end, 3),
			
 
				+            "frame_pos": round(mid_point, 3)
			
 
				+        })
			
 
				+
			
 
				+    return scenes
			
 
				+
			
 
				+
			
 
				+def extract_frames(video_path, scenes, output_dir="thumbnails"):
			
 
				+    """
			
 
				+    执行实际的 FFmpeg 抽帧操作。
			
 
				+    输入: scenes 字典列表。
			
 
				+    输出: 带有 'frame_path' 绝对路径的 scenes 字典列表。
			
 
				+    """
			
 
				+    if not os.path.exists(output_dir):
			
 
				+        os.makedirs(output_dir)
			
 
				+
			
 
				+    # 获取输出目录的绝对路径，确保返回的路径是完整的
			
 
				+    abs_output_dir = os.path.abspath(output_dir)
			
 
				+
			
 
				+    logger.info(f"开始执行抽帧任务，目标数量: {len(scenes)}")
			
 
				+
			
 
				+    for i, scene in enumerate(scenes):
			
 
				+        ts = scene["frame_pos"]
			
 
				+        # 文件命名保持之前的规范：序号_时间戳.jpg
			
 
				+        file_name = f"scene_{i + 1:03d}_{ts}s.jpg"
			
 
				+        output_file_path = os.path.join(abs_output_dir, file_name)
			
 
				+
			
 
				+        # 使用快速定位 (-ss 在 -i 前)
			
 
				+        cmd = [
			
 
				+            'ffmpeg', '-hide_banner', '-loglevel', 'error',
			
 
				+            '-ss', str(ts),
			
 
				+            '-i', video_path,
			
 
				+            '-frames:v', '1',
			
 
				+            '-q:v', '2',
			
 
				+            '-vf', 'scale=640:-1',  # 预览图建议缩放，速度更快
			
 
				+            output_file_path, '-y'
			
 
				+        ]
			
 
				+
			
 
				+        try:
			
 
				+            subprocess.run(cmd, check=True)
			
 
				+            # 抽帧成功后，将绝对路径存入字典
			
 
				+            scene["frame_path"] = output_file_path
			
 
				+
			
 
				+            if (i + 1) % 5 == 0 or (i + 1) == len(scenes):
			
 
				+                logger.info(f"进度: {i + 1}/{len(scenes)}")
			
 
				+        except subprocess.CalledProcessError:
			
 
				+            logger.info(f"错误: 无法提取 {ts}s 处的帧")
			
 
				+            scene["frame_path"] = None  # 如果提取失败，可以标记为 None
			
 
				+
			
 
				+    logger.info(f"任务完成，存储路径: {abs_output_dir}")
			
 
				+    return scenes
			
 
				+
			
 
				+
			
 
				+# subtitles 滤镜位于 filter_complex 的字符串内部，FFmpeg 会对其进行二次解析。如果路径包含 \、: 或空格，解析就会崩溃
			
 
				+# 将 srt 文件临时改名为一个完全合法的名字并复制文件, 处理完成后再删除复制的文件
			
 
				+def get_safe_temp_srt(srt_path):
			
 
				+    """
			
 
				+    根据原始路径生成一个位于同目录下的 SHA256 临时文件名
			
 
				+    """
			
 
				+    srt_obj = Path(srt_path).resolve()
			
 
				+    # 计算路径或内容的 hash (建议计算路径的 hash 即可，速度快)
			
 
				+    path_hash = hashlib.sha256(str(srt_obj).encode('utf-8')).hexdigest()
			
 
				+
			
 
				+    # 构造临时文件路径：与原文件同目录，名字为 hash.srt
			
 
				+    temp_srt_path = srt_obj.parent / f"{path_hash}.srt"
			
 
				+    return temp_srt_path
			
 
				+
			
 
				+
			
 
				+def generate_video(audio_path, srt_path, video_output):
			
 
				+    # 预处理 srt 路径
			
 
				+    # 1. 转为绝对路径
			
 
				+    # 2. 统一使用正斜杠 /
			
 
				+    # 3. 处理 subtitles 滤镜特有的转义：将 ':' 替换为 '\:'
			
 
				+    temp_srt_path = get_safe_temp_srt(srt_path)
			
 
				+    shutil.copy(srt_path, temp_srt_path)
			
 
				+
			
 
				+    font_name = 'WenQuanYi Micro Hei'
			
 
				+    font_size = 20
			
 
				+    font_color = '&H0000FFFF&'
			
 
				+
			
 
				+    """调用 FFmpeg 合成视频"""
			
 
				+    # 建议设置：
			
 
				+    # -rc vbr: 使用可变码率模式
			
 
				+    # -cq 28: 控制质量。数值越大，体积越小。推荐范围 24-32
			
 
				+    # -b:v 0: 在 cq 模式下，将目标码率设为 0，让编码器完全根据质量控制
			
 
				+    command = [
			
 
				+        'ffmpeg', '-y',
			
 
				+        '-hide_banner',
			
 
				+        '-i', audio_path,
			
 
				+        '-filter_complex',
			
 
				+        f"[0:a]showwaves=s=854x480:mode=line:colors=0x00FFFF[v];"
			
 
				+        f"[v]subtitles={temp_srt_path}:charenc=UTF-8:force_style='FontName={font_name},FontSize={font_size},PrimaryColour={font_color},Alignment=2'[v_out]",
			
 
				+        '-map', '[v_out]',
			
 
				+        '-map', '0:a',
			
 
				+        '-c:v', 'libx264',  # 使用 CPU 编码压缩率更高
			
 
				+        '-preset', 'veryfast',  # 编码速度预设。想要体积更小可以改为 'medium'，但速度会慢一点
			
 
				+        '-crf', '28',  # 质量控制：23 是默认，28 体积更小，对于 480p 波形图完全够用
			
 
				+        '-pix_fmt', 'yuv420p',  # 增强兼容性，确保所有播放器都能看
			
 
				+        '-c:a', 'aac',
			
 
				+        '-b:a', '128k',  # 音频码率限制在 128k
			
 
				+        '-shortest',
			
 
				+        video_output
			
 
				+    ]
			
 
				+
			
 
				+    try:
			
 
				+        # 使用 Popen 启动进程，将 stderr 重定向到 PIPE
			
 
				+        # 注意：FFmpeg 的进度信息是在 stderr 输出的
			
 
				+        process = subprocess.Popen(
			
 
				+            command,
			
 
				+            stdout=subprocess.PIPE,
			
 
				+            stderr=subprocess.STDOUT,  # 将 stderr 合并到 stdout 统一处理
			
 
				+            text=True,
			
 
				+            encoding='utf-8',
			
 
				+            errors='replace'
			
 
				+        )
			
 
				+
			
 
				+        # 用于记录最后几行日志，方便报错时排查
			
 
				+        error_log_buffer = []
			
 
				+        logger.info(f"🎬 开始合成视频: {video_output}")
			
 
				+        # 实时读取并打印输出内容
			
 
				+        while True:
			
 
				+            line = process.stdout.readline()
			
 
				+            if not line and process.poll() is not None:
			
 
				+                break
			
 
				+
			
 
				+            if line:
			
 
				+                clean_line = line.strip()
			
 
				+                error_log_buffer.append(clean_line)
			
 
				+                # 保持缓冲区大小，只留最后 20 行
			
 
				+                if len(error_log_buffer) > 20:
			
 
				+                    error_log_buffer.pop(0)
			
 
				+
			
 
				+                # 如果是进度行，则在同一行刷新；如果是警告/错误，则换行打印
			
 
				+                if "frame=" in clean_line or "size=" in clean_line:
			
 
				+                    # 这里的 line 会包含诸如 "frame=  123 fps= 30 size=  512kB time=00:00:05.12..." 的进度信息
			
 
				+                    # 使用 end='' 是因为 readline 自带换行符
			
 
				+                    logger.info(f"\rFFmpeg 进度: {clean_line}")
			
 
				+
			
 
				+        # 检查最终退出状态
			
 
				+        process.wait()
			
 
				+
			
 
				+        if process.returncode != 0:
			
 
				+            # 拼接最后的错误片段
			
 
				+            last_errors = "\n".join(error_log_buffer)
			
 
				+            raise RuntimeError(
			
 
				+                f"FFmpeg 执行失败 (退出码 {process.returncode})\n"
			
 
				+                f"--- 最后 20 行日志 ---\n{last_errors}"
			
 
				+            )
			
 
				+
			
 
				+        logger.info(f"\n🚀 视频合成成功: {video_output}")
			
 
				+    finally:
			
 
				+        if temp_srt_path.exists():
			
 
				+            temp_srt_path.unlink()
			
 
				+
			
 
				+
			
 
				+def get_precise_srt(text_list, timestamp_list, max_chars=20):
			
 
				+    total_ts = len(timestamp_list)
			
 
				+    raw_parts = text_list
			
 
				+    sentences = []
			
 
				+    # 合并标点到前面的短句
			
 
				+    for i in range(0, len(raw_parts) - 1, 2):
			
 
				+        sentences.append(raw_parts[i] + raw_parts[i + 1])
			
 
				+    if len(raw_parts) % 2 == 1:
			
 
				+        sentences.append(raw_parts[-1])
			
 
				+
			
 
				+    ts_idx = 0
			
 
				+    line_count = 1
			
 
				+
			
 
				+    srt_list = []
			
 
				+    for sentence in sentences:
			
 
				+        sentence = sentence.strip()
			
 
				+        if not sentence or ts_idx >= total_ts:
			
 
				+            continue
			
 
				+
			
 
				+        # 2. 如果单句太长，进行硬切分（按 max_chars）
			
 
				+        sub_sentences = [sentence[i:i + max_chars] for i in range(0, len(sentence), max_chars)]
			
 
				+
			
 
				+        for s in sub_sentences:
			
 
				+            # 统计这行里有多少个字符是对应时间戳的
			
 
				+            # 注意：Paraformer 的时间戳通常不包含标点，需要过滤掉标点再计数
			
 
				+            pure_words = re.sub(r'[^\w\u4e00-\u9fa5]', '', s)  # 仅保留中文字符和字母数字
			
 
				+            num_words = len(pure_words)
			
 
				+
			
 
				+            if num_words == 0:
			
 
				+                continue
			
 
				+
			
 
				+            # --- 关键防护：检查 ts_idx 是否越界 ---
			
 
				+            if ts_idx >= total_ts:
			
 
				+                break
			
 
				+
			
 
				+            # 获取开始时间
			
 
				+            start_t = timestamp_list[ts_idx][0]
			
 
				+
			
 
				+            # 计算结束索引，确保不越界
			
 
				+            end_pos = ts_idx + num_words - 1
			
 
				+            if end_pos >= total_ts:
			
 
				+                end_pos = total_ts - 1
			
 
				+
			
 
				+            end_t = timestamp_list[end_pos][1]
			
 
				+
			
 
				+            # 写入 SRT 格式
			
 
				+            # f.write(f"{line_count}\n")
			
 
				+            # f.write(f"{format_time_srt(start_t)} --> {format_time_srt(end_t)}\n")
			
 
				+            # f.write(f"{s}\n\n")
			
 
				+            srt_list.append({
			
 
				+                "line": line_count,
			
 
				+                "time": f"{format_time_srt(start_t)} --> {format_time_srt(end_t)}",
			
 
				+                "text": s
			
 
				+            })
			
 
				+
			
 
				+            # 更新索引
			
 
				+            ts_idx += num_words
			
 
				+            line_count += 1
			
 
				+    return srt_list
			
 
				+
			
 
				+
			
 
				+def format_time_srt(ms):
			
 
				+    """毫秒转 SRT 格式: HH:MM:SS,mmm"""
			
 
				+    s, ms = divmod(ms, 1000)
			
 
				+    m, s = divmod(s, 60)
			
 
				+    h, m = divmod(m, 60)
			
 
				+    return f"{h:02}:{m:02}:{s:02},{int(ms):03}"
			
 
				+
			
 
				+
			
 
				+def save_srt_file(srt_list, output_path):
			
 
				+    """
			
 
				+    将 srt 列表写入文件
			
 
				+    :param srt_list: 包含 line, time, text 字典的列表
			
 
				+    :param output_path: 输出路径 (如 'output.srt')
			
 
				+    """
			
 
				+    with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+        for entry in srt_list:
			
 
				+            # 1. 写入序号 (line)
			
 
				+            f.write(f"{entry['line']}\n")
			
 
				+            # 2. 写入时间轴 (time)
			
 
				+            f.write(f"{entry['time']}\n")
			
 
				+            # 3. 写入文本 (text)
			
 
				+            f.write(f"{entry['text']}\n")
			
 
				+            # 4. 写入一个空行作为分隔符
			
 
				+            f.write("\n")
			
 
				+
			
 
				+
			
 
				+def parse_srt_to_list(file_path):
			
 
				+    """
			
 
				+    将 SRT 文件还原为 [{line, time, text}, ...] 结构
			
 
				+    """
			
 
				+    with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+        content = f.read().strip()
			
 
				+
			
 
				+    # 正则表达式解释：
			
 
				+    # (\d+)                -> 匹配序号 (line)
			
 
				+    # (\d{2}:\d{2}:.*)     -> 匹配时间轴 (time)
			
 
				+    # ([\s\S]*?)           -> 匹配文本内容 (text)，支持多行
			
 
				+    # (?=\n\d+\n|\Z)       -> 断言后面紧跟下一个序号或文件末尾
			
 
				+    pattern = re.compile(r'(\d+)\n(\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3})\n([\s\S]*?)(?=\n\d+\n|\Z)')
			
 
				+
			
 
				+    matches = pattern.findall(content)
			
 
				+
			
 
				+    srt_list = []
			
 
				+    for m in matches:
			
 
				+        srt_list.append({
			
 
				+            "line": int(m[0]),
			
 
				+            "time": m[1],
			
 
				+            "text": m[2].strip()  # 去掉文本末尾可能的换行
			
 
				+        })
			
 
				+    return srt_list
			
 
				+
			
 
				+
			
 
				+def analyze_audio_energy(audio_path, segment_ms=100):
			
 
				+    """
			
 
				+    按时间片段分析音频能量，帮助确定静音阈值
			
 
				+    :param audio_path: 音频文件路径
			
 
				+    :param segment_ms: 检查的时间块大小（毫秒）
			
 
				+    """
			
 
				+    # 1. 加载音频
			
 
				+    sr = 16000
			
 
				+    y, _ = librosa.load(audio_path, sr=sr)
			
 
				+
			
 
				+    # 2. 计算每个片段的能量 (RMS)
			
 
				+    hop_length = int(sr * segment_ms / 1000)
			
 
				+    energy_list = []
			
 
				+
			
 
				+    logger.info(f"{'时间 (秒)':<10} | {'能量值 (RMS)':<15} | {'状态估计'}")
			
 
				+    logger.info("-" * 45)
			
 
				+
			
 
				+    for i in range(0, len(y), hop_length):
			
 
				+        segment = y[i: i + hop_length]
			
 
				+        if len(segment) == 0: break
			
 
				+
			
 
				+        rms = np.sqrt(np.mean(segment ** 2))
			
 
				+        energy_list.append(rms)
			
 
				+
			
 
				+        # 打印进度和数值
			
 
				+        time_sec = i / sr
			
 
				+        status = "🤫 静音" if rms < 0.005 else "🗣️ 有声"
			
 
				+        logger.info(f"{time_sec:>8.2f}s | {rms:>15.6f} | {status}")
			
 
				+
			
 
				+    # 3. 输出统计建议
			
 
				+    logger.info("-" * 45)
			
 
				+    logger.info(f"最大能量: {max(energy_list):.6f}")
			
 
				+    logger.info(f"最小能量: {min(energy_list):.6f}")
			
 
				+    logger.info(f"建议阈值: {np.percentile(energy_list, 20):.6f} (取前20%分位数作为参考)")
			
--- a/service/pygpu.py
+++ b/service/pygpu.py
@@ -0,0 +1,100 @@
 
				+import logging
			
 
				+from datetime import datetime
			
 
				+import gc
			
 
				+import pynvml
			
 
				+import torch
			
 
				+from setting import ollama_client
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+def get_gpu_memory_info(device_index=0):
			
 
				+    pynvml.nvmlInit()
			
 
				+    handle = pynvml.nvmlDeviceGetHandleByIndex(device_index)
			
 
				+    info = pynvml.nvmlDeviceGetMemoryInfo(handle)
			
 
				+
			
 
				+    # print(f"显卡型号: {pynvml.nvmlDeviceGetName(handle)}")
			
 
				+    # print(f"总显存: {info.total / 1024 ** 2:.2f} MB")
			
 
				+    # print(f"已用显存: {info.used / 1024 ** 2:.2f} MB")
			
 
				+    # print(f"空闲显存: {info.free / 1024 ** 2:.2f} MB")
			
 
				+    result = {
			
 
				+        "model": pynvml.nvmlDeviceGetName(handle),
			
 
				+        "mem_total": f"{info.total / 1024 ** 2:.2f} MB",
			
 
				+        "mem_used": f"{info.used / 1024 ** 2:.2f} MB",
			
 
				+        "mem_free": f"{info.free / 1024 ** 2:.2f} MB",
			
 
				+    }
			
 
				+    pynvml.nvmlShutdown()
			
 
				+    return  result
			
 
				+
			
 
				+
			
 
				+def get_torch_memory_usage():
			
 
				+    if torch.cuda.is_available():
			
 
				+        # 获取当前设备索引
			
 
				+        device = torch.cuda.current_device()
			
 
				+        # 显存缓存（PyTorch 预占用的）
			
 
				+        reserved = torch.cuda.memory_reserved(device) / 1024 ** 2
			
 
				+        # 已经分配给 Tensor 的显存
			
 
				+        allocated = torch.cuda.memory_allocated(device) / 1024 ** 2
			
 
				+
			
 
				+        # reserved 是实际占有的 gpu 内存
			
 
				+        # allocated 是 reserved 中实际使用的部分
			
 
				+        return {
			
 
				+            "reserved": f"{reserved:.2f} MB",
			
 
				+            "allocated": f"{allocated:.2f} MB"
			
 
				+        }
			
 
				+    else:
			
 
				+        return {
			
 
				+                "message": "CUDA N/A",
			
 
				+            }
			
 
				+
			
 
				+
			
 
				+# --- GPU 显存深度清理函数 ---
			
 
				+def clear_gpu_memory():
			
 
				+    if torch.cuda.is_available():
			
 
				+        # 释放 PyTorch 占用的缓存
			
 
				+        torch.cuda.empty_cache()
			
 
				+        # 释放进程间共享内存
			
 
				+        torch.cuda.ipc_collect()
			
 
				+    # 强制进行 Python 层的垃圾回收
			
 
				+    gc.collect()
			
 
				+    logger.info(f"[{datetime.now().strftime('%H:%M:%S')}] 🧹 GPU 显存深度清理完成")
			
 
				+
			
 
				+
			
 
				+def get_ollama_resource():
			
 
				+    result = []
			
 
				+    try:
			
 
				+        # 获取当前运行中的模型列表
			
 
				+        response = ollama_client.ps()
			
 
				+        if not response['models']:
			
 
				+            return result
			
 
				+
			
 
				+        for model in response['models']:
			
 
				+            name = model['name']
			
 
				+            size_vram = model.get('size_vram', 0)
			
 
				+            size = model.get('size', 0)
			
 
				+
			
 
				+            # 计算显存占比
			
 
				+            if size > 0:
			
 
				+                gpu_percentage = (size_vram / size) * 100
			
 
				+            else:
			
 
				+                gpu_percentage = 0
			
 
				+
			
 
				+            # print(f"模型名称: {name}")
			
 
				+            # print(f"总大小: {size / 1024 ** 3:.2f} GB")
			
 
				+            # print(f"显存(VRAM)大小: {size_vram / 1024 ** 3:.2f} GB")
			
 
				+            if gpu_percentage >= 100:
			
 
				+                stat = "🚀 完全运行在 GPU 上"
			
 
				+            elif gpu_percentage > 0:
			
 
				+                stat = f"🌓 混合模式 (GPU 占比 {gpu_percentage:.2f}%)"
			
 
				+            else:
			
 
				+                stat = "🐌 完全运行在 CPU 上"
			
 
				+
			
 
				+            result.append({
			
 
				+                'model_name': name,
			
 
				+                'size': f"{size / 1024 ** 3:.2f} GB",
			
 
				+                'size_vram': f"{size_vram / 1024 ** 3:.2f} GB",
			
 
				+                'gpu_percentage': f"{gpu_percentage:.2f}",
			
 
				+                'stat': stat
			
 
				+            })
			
 
				+    except Exception as e:
			
 
				+        print(f"无法连接到 Ollama 服务: {e}")
			
 
				+    return result
			
--- a/setting.py
+++ b/setting.py
@@ -0,0 +1,11 @@
 
				+import os
			
 
				+from ollama import Client
			
 
				+
			
 
				+# 从环境变量获取地址，默认指向 compose 中的服务名
			
 
				+OLLAMA_URL = os.getenv("OLLAMA_HOST", "http://127.0.0.1:11434")
			
 
				+ollama_client = Client(host=OLLAMA_URL)
			
 
				+
			
 
				+UPLOAD_DIR = "ai_uploads"
			
 
				+OUTPUT_DIR = "ai_outputs"
			
 
				+os.makedirs(UPLOAD_DIR, exist_ok=True)
			
 
				+os.makedirs(OUTPUT_DIR, exist_ok=True)