2 месяцев назад · 28c7df8fc4
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,33 @@
 
															+# 忽略 Python 虚拟环境
														
 
															+venv/
														
 
															+.venv/
														
 
															+env/
														
 
															+
														
 
															+# 忽略缓存和编译文件
														
 
															+**/__pycache__/
														
 
															+*.py[cod]
														
 
															+*$py.class
														
 
															+.pytest_cache/
														
 
															+.coverage
														
 
															+htmlcov/
														
 
															+
														
 
															+# 忽略 Docker 和本地配置
														
 
															+.git/
														
 
															+.gitignore
														
 
															+.dockerignore
														
 
															+Dockerfile
														
 
															+docker-compose.yml
														
 
															+.env
														
 
															+
														
 
															+# 忽略视频处理产生的临时文件（重要！）
														
 
															+scenes_cache/
														
 
															+*.mp4
														
 
															+*.jpg
														
 
															+
														
 
															+# 忽略 IDE 配置
														
 
															+.vscode/
														
 
															+.idea/
														
 
															+
														
 
															+ai_output/
														
 
															+ai_upload/
														
 
															+README.md
														
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,10 @@
 
															+.idea/
														
 
															+*logs*/
														
 
															+venv/
														
 
															+*.iml
														
 
															+*.log
														
 
															+*.db
														
 
															+__pycache__
														
 
															+*/__pycache__
														
 
															+ai_output/
														
 
															+ai_upload/
														
--- a/Dockerfile
+++ b/Dockerfile
@@ -0,0 +1,23 @@
 
															+FROM nvidia/cuda:12.1.0-base-ubuntu22.04
														
 
															+
														
 
															+ENV DEBIAN_FRONTEND=noninteractive
														
 
															+
														
 
															+RUN apt-get update && apt-get install -y \
														
 
															+    python3.10 \
														
 
															+    python3-pip \
														
 
															+    ffmpeg \
														
 
															+    curl \
														
 
															+    fonts-wqy-microhei \
														
 
															+    && rm -rf /var/lib/apt/lists/*
														
 
															+
														
 
															+WORKDIR /app
														
 
															+
														
 
															+COPY ./requirements.txt /app/requirements.txt
														
 
															+
														
 
															+RUN pip3 install --no-cache-dir --upgrade -r /app/requirements.txt
														
 
															+
														
 
															+COPY . /app
														
 
															+
														
 
															+EXPOSE 8010
														
 
															+
														
 
															+CMD ["sh", "-c", "uvicorn ai_server:app --host 0.0.0.0 --port 8010"]
														
--- a/README.md
+++ b/README.md
@@ -0,0 +1,17 @@
 
															+一个基于 Python 的 AI 项目, 依赖 CUDA, 通过在代码中加载 AI 模型和调用 ollama 提供的 AI 模型, 提供了以下功能:
														
 
															+- 图像理解
														
 
															+- 语音识别
														
 
															+- 文本分析
														
 
															+- 文本翻译
														
 
															+
														
 
															+## 依赖
														
 
															+导出依赖
														
 
															+```
														
 
															+pip freeze > requirements.txt
														
 
															+```
														
 
															+> pip freeze 会导出当前环境下所有安装的包
														
 
															+
														
 
															+安装依赖
														
 
															+```
														
 
															+pip install -r requirements.txt
														
 
															+```
														
--- a/ai_server.py
+++ b/ai_server.py
@@ -0,0 +1,40 @@
 
															+import asyncio
														
 
															+import logging
														
 
															+logging.basicConfig(
														
 
															+    level=logging.INFO,
														
 
															+    format='%(asctime)s [%(name)s] %(levelname)s: %(message)s'
														
 
															+)
														
 
															+
														
 
															+from contextlib import asynccontextmanager
														
 
															+from fastapi import FastAPI
														
 
															+from route import gpu, file, audio, text, image
														
 
															+import service.ai_task as ai_task
														
 
															+import service.ai_asr as pyasr
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+# 获取 uvicorn 的 logger
														
 
															+# logger = logging.getLogger("uvicorn.error")
														
 
															+
														
 
															+@asynccontextmanager
														
 
															+async def lifespan(app: FastAPI):
														
 
															+    logger.info("🚀 服务已启动...")
														
 
															+    asyncio.create_task(pyasr.init_funasr())
														
 
															+    await ai_task.start_worker()
														
 
															+    yield
														
 
															+    logger.info("🛑 服务已停止")
														
 
															+app = FastAPI(title="GPU Worker Server", lifespan=lifespan)
														
 
															+
														
 
															+# 挂载子路由
														
 
															+app.include_router(gpu.router)
														
 
															+app.include_router(file.router)
														
 
															+app.include_router(audio.router)
														
 
															+app.include_router(text.router)
														
 
															+app.include_router(image.router)
														
 
															+
														
 
															+@app.get("/")
														
 
															+async def root():
														
 
															+    return {"message": "Welcome to pyai"}
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    import uvicorn
														
 
															+    uvicorn.run(app, host="0.0.0.0", port=8010)
														
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,22 @@
 
															+services:
														
 
															+  pyai:
														
 
															+    image: pyai:12345678
														
 
															+    container_name: pyai
														
 
															+    restart: always
														
 
															+    network_mode: host
														
 
															+    volumes:
														
 
															+      - /etc/localtime:/etc/localtime:ro
														
 
															+      - /opt/docker/pyai/ai_uploads:/app/ai_uploads
														
 
															+      - /opt/docker/pyai/ai_outputs:/app/ai_outputs
														
 
															+      - /opt/docker/pyai/model_cache/modelscope:/root/.cache/modelscope
														
 
															+    environment:
														
 
															+      - MODELSCOPE_CACHE=/root/.cache/modelscope
														
 
															+      - OLLAMA_HOST=http://127.0.0.1:11434
														
 
															+      - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
														
 
															+    deploy:
														
 
															+      resources:
														
 
															+        reservations:
														
 
															+          devices:
														
 
															+            - driver: nvidia
														
 
															+              count: 1
														
 
															+              capabilities: [gpu]
														
--- a/main.py
+++ b/main.py
@@ -0,0 +1,56 @@
 
															+import json
														
 
															+import os
														
 
															+
														
 
															+from setting import OUTPUT_DIR
														
 
															+
														
 
															+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
														
 
															+
														
 
															+import logging
														
 
															+logging.basicConfig(
														
 
															+    level=logging.INFO,
														
 
															+    format='%(asctime)s [%(name)s] %(levelname)s: %(message)s'
														
 
															+)
														
 
															+
														
 
															+import torch
														
 
															+import service.pyav as pyav
														
 
															+import service.pygpu as pygpu
														
 
															+import service.ai_asr as ai_asr
														
 
															+import service.ai_image_ollama as ai_image
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+def process(audio_path, srt_path, video_path):
														
 
															+    try:
														
 
															+        # 执行识别
														
 
															+        ai_asr.generate_srt(audio_path, srt_path)
														
 
															+        pyav.generate_video(audio_path, srt_path, video_path)
														
 
															+    except torch.OutOfMemoryError:
														
 
															+        logger.error(f"❌ 显存溢出！跳过文件: {audio_path}")
														
 
															+        pygpu.clear_gpu_memory()  # 发生 OOM 后强制清理一次
														
 
															+    except Exception as e:
														
 
															+        logger.error(f"💥 处理 {audio_path} 时发生未知错误: {e}")
														
 
															+    finally:
														
 
															+        # 每一个文件处理完都主动清理一次，确保下一个文件有足够的初始空间
														
 
															+        pygpu.clear_gpu_memory()
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    audio = 'abc.mp3'
														
 
															+    srt = 'abc.mp3.srt'
														
 
															+    video = 'abc.mp3.mp4'
														
 
															+    ai_asr.generate_srt(audio, srt)
														
 
															+    pyav.generate_video(audio, srt, video)
														
 
															+
														
 
															+    video = 'abc.mp4'
														
 
															+    video_dict = pyav.get_video_info(video)
														
 
															+    start_scene_times = pyav.get_scene_times(video, 0.3)
														
 
															+    # 按场景分割视频
														
 
															+    # pyav.split_video_by_scenes(video, start_scene_times, OUTPUT_DIR)
														
 
															+    # 计算的每个场景的抽帧时间点
														
 
															+    target_times = pyav.calculate_mid_points(video, start_scene_times)
														
 
															+    # 使用 ffmpeg 抽帧
														
 
															+    scenes = pyav.extract_frames(video, target_times, OUTPUT_DIR)
														
 
															+    # 使用 ai 对抽帧的画面进行识别
														
 
															+    scenes_result = ai_image.describe_frame(scenes)
														
 
															+    video_dict['scenes'] = scenes_result
														
 
															+    print(json.dumps(video_dict, indent=8, ensure_ascii=False))
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,120 @@
 
															+accelerate==1.13.0
														
 
															+aliyun-python-sdk-core==2.16.0
														
 
															+aliyun-python-sdk-kms==2.16.5
														
 
															+annotated-doc==0.0.4
														
 
															+annotated-types==0.7.0
														
 
															+antlr4-python3-runtime==4.9.3
														
 
															+anyio==4.13.0
														
 
															+audioread==3.1.0
														
 
															+bitsandbytes==0.49.2
														
 
															+certifi==2026.2.25
														
 
															+cffi==2.0.0
														
 
															+charset-normalizer==3.4.7
														
 
															+click==8.3.2
														
 
															+contourpy==1.3.2
														
 
															+crcmod==1.7
														
 
															+cryptography==46.0.6
														
 
															+cuda-bindings==13.2.0
														
 
															+cuda-pathfinder==1.5.1
														
 
															+cuda-toolkit==13.0.2
														
 
															+cycler==0.12.1
														
 
															+decorator==5.2.1
														
 
															+editdistance==0.8.1
														
 
															+exceptiongroup==1.3.1
														
 
															+fastapi==0.135.3
														
 
															+filelock==3.25.2
														
 
															+fonttools==4.62.1
														
 
															+fsspec==2026.3.0
														
 
															+funasr==1.3.1
														
 
															+h11==0.16.0
														
 
															+hf-xet==1.4.3
														
 
															+httpcore==1.0.9
														
 
															+httpx==0.28.1
														
 
															+huggingface_hub==1.9.0
														
 
															+hydra-core==1.3.2
														
 
															+idna==3.11
														
 
															+jaconv==0.5.0
														
 
															+jamo==0.4.1
														
 
															+jieba==0.42.1
														
 
															+Jinja2==3.1.6
														
 
															+jmespath==0.10.0
														
 
															+joblib==1.5.3
														
 
															+kaldiio==2.18.1
														
 
															+kiwisolver==1.5.0
														
 
															+lazy-loader==0.5
														
 
															+librosa==0.11.0
														
 
															+llvmlite==0.47.0
														
 
															+markdown-it-py==4.0.0
														
 
															+MarkupSafe==3.0.3
														
 
															+mdurl==0.1.2
														
 
															+modelscope==1.35.3
														
 
															+mpmath==1.3.0
														
 
															+msgpack==1.1.2
														
 
															+networkx==3.4.2
														
 
															+numba==0.65.0
														
 
															+numpy==2.2.6
														
 
															+nvidia-cublas==13.1.0.3
														
 
															+nvidia-cuda-cupti==13.0.85
														
 
															+nvidia-cuda-nvrtc==13.0.88
														
 
															+nvidia-cuda-runtime==13.0.96
														
 
															+nvidia-cudnn-cu13==9.19.0.56
														
 
															+nvidia-cufft==12.0.0.61
														
 
															+nvidia-cufile==1.15.1.6
														
 
															+nvidia-curand==10.4.0.35
														
 
															+nvidia-cusolver==12.0.4.66
														
 
															+nvidia-cusparse==12.6.3.3
														
 
															+nvidia-cusparselt-cu13==0.8.0
														
 
															+nvidia-ml-py==13.595.45
														
 
															+nvidia-nccl-cu13==2.28.9
														
 
															+nvidia-nvjitlink==13.0.88
														
 
															+nvidia-nvshmem-cu13==3.4.5
														
 
															+nvidia-nvtx==13.0.85
														
 
															+ollama==0.6.1
														
 
															+omegaconf==2.3.0
														
 
															+opencv-python==4.13.0.92
														
 
															+oss2==2.19.1
														
 
															+packaging==26.0
														
 
															+pillow==12.2.0
														
 
															+platformdirs==4.9.4
														
 
															+pooch==1.9.0
														
 
															+protobuf==7.34.1
														
 
															+psutil==7.2.2
														
 
															+pycparser==3.0
														
 
															+pycryptodome==3.23.0
														
 
															+pydantic==2.12.5
														
 
															+pydantic_core==2.41.5
														
 
															+Pygments==2.20.0
														
 
															+pynndescent==0.6.0
														
 
															+pyparsing==3.3.2
														
 
															+python-dateutil==2.9.0.post0
														
 
															+python-multipart==0.0.22
														
 
															+pytorch-wpe==0.0.1
														
 
															+PyYAML==6.0.3
														
 
															+regex==2026.4.4
														
 
															+requests==2.33.1
														
 
															+rich==14.3.3
														
 
															+safetensors==0.7.0
														
 
															+scikit-learn==1.7.2
														
 
															+scipy==1.15.3
														
 
															+sentencepiece==0.2.1
														
 
															+shellingham==1.5.4
														
 
															+six==1.17.0
														
 
															+soundfile==0.13.1
														
 
															+soxr==1.0.0
														
 
															+starlette==1.0.0
														
 
															+sympy==1.14.0
														
 
															+tensorboardX==2.6.5
														
 
															+threadpoolctl==3.6.0
														
 
															+tokenizers==0.22.2
														
 
															+torch==2.11.0
														
 
															+torch-complex==0.4.4
														
 
															+torchaudio==2.11.0
														
 
															+tqdm==4.67.3
														
 
															+transformers==5.5.0
														
 
															+triton==3.6.0
														
 
															+typer==0.24.1
														
 
															+typing-inspection==0.4.2
														
 
															+typing_extensions==4.15.0
														
 
															+umap-learn==0.5.11
														
 
															+urllib3==2.6.3
														
 
															+uvicorn==0.43.0
														
--- a/route/__init__.py
+++ b/route/__init__.py
--- a/route/audio.py
+++ b/route/audio.py
@@ -0,0 +1,99 @@
 
															+import logging
														
 
															+from pathlib import Path
														
 
															+
														
 
															+from fastapi import APIRouter, HTTPException
														
 
															+import os
														
 
															+import uuid
														
 
															+import shutil
														
 
															+from fastapi import UploadFile, File
														
 
															+from starlette.concurrency import run_in_threadpool
														
 
															+import service.ai_task as ai_task
														
 
															+import service.pyav as pyav
														
 
															+from setting import UPLOAD_DIR, OUTPUT_DIR
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+# 创建路由对象，可以统一设置前缀 (prefix) 和 标签 (tags)
														
 
															+router = APIRouter(
														
 
															+    prefix="/api1/audio",
														
 
															+    tags=["audio"]
														
 
															+)
														
 
															+
														
 
															+@router.post("/asr")
														
 
															+async def upload_audio(file: UploadFile = File(...)):
														
 
															+    # 1. 统一生成一次 task_id，确保前后一致
														
 
															+    task_id = str(uuid.uuid4())[:8]
														
 
															+    ext = file.filename.split('.')[-1]
														
 
															+    save_path = os.path.join(UPLOAD_DIR, f"{task_id}.{ext}")
														
 
															+
														
 
															+    # 2. 解决 IO 阻塞方案 A: 使用 run_in_threadpool (推荐)
														
 
															+    # 这样会将同步的写入操作丢进单独的线程，不阻塞主事件循环
														
 
															+    def save_file():
														
 
															+        with open(save_path, "wb") as buffer:
														
 
															+            shutil.copyfileobj(file.file, buffer)
														
 
															+    await run_in_threadpool(save_file)
														
 
															+
														
 
															+    # 3. 构造路径并存入队列
														
 
															+    srt_path = os.path.join(OUTPUT_DIR, f"{task_id}.srt")
														
 
															+    video_path = os.path.join(OUTPUT_DIR, f"{task_id}.mp4")
														
 
															+
														
 
															+    # 传递已经确定好的 task_id
														
 
															+    await ai_task.put_task(task_id, save_path, srt_path, video_path)
														
 
															+    return {
														
 
															+        "status": "queued",
														
 
															+        "task_id": task_id,
														
 
															+        "message": "文件已上传并加入 GPU 处理队列",
														
 
															+        "srt_preview_path": f"{OUTPUT_DIR}/{task_id}.srt"
														
 
															+    }
														
 
															+
														
 
															+@router.get("/tasks")
														
 
															+async def get_queue_status():
														
 
															+    return {"queue_size": ai_task.get_tasks()}
														
 
															+
														
 
															+@router.get("/result/{task_id}")
														
 
															+async def get_asr_result(task_id: str):
														
 
															+    file_name = check_file_prefix(UPLOAD_DIR, task_id)
														
 
															+    if not file_name:
														
 
															+        raise HTTPException(status_code=404, detail="音频文件不存在")
														
 
															+    audio_path = f"{UPLOAD_DIR}/{file_name}"
														
 
															+
														
 
															+    txt_path = f"{OUTPUT_DIR}/{task_id}.txt"
														
 
															+    if not os.path.exists(txt_path):
														
 
															+        raise HTTPException(status_code=404, detail="音频文本文件不存在")
														
 
															+
														
 
															+    srt_path = f"{OUTPUT_DIR}/{task_id}.srt"
														
 
															+    if not os.path.exists(srt_path):
														
 
															+        raise HTTPException(status_code=404, detail="字幕文件不存在")
														
 
															+
														
 
															+    video_path = f"{OUTPUT_DIR}/{task_id}.mp4"
														
 
															+    if not os.path.exists(video_path):
														
 
															+        raise HTTPException(status_code=404, detail="视频文件不存在")
														
 
															+
														
 
															+    with open(txt_path, "r", encoding="utf-8") as f:
														
 
															+        text = f.read()
														
 
															+
														
 
															+    info = pyav.get_media_info(audio_path)
														
 
															+    srt = pyav.parse_srt_to_list(srt_path)
														
 
															+    return {
														
 
															+        "task_id": task_id,
														
 
															+        "duration": info['duration'],
														
 
															+        "text": text,
														
 
															+        "srt": srt,
														
 
															+        "audio_url": f"/api1/file/audio/{file_name}",
														
 
															+        "video_url": f"/api1/file/video/{task_id}.mp4"
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def check_file_prefix(directory, prefix):
														
 
															+    # 1. 转化为 Path 对象
														
 
															+    path = Path(directory)
														
 
															+    # 2. 匹配所有以 prefix 开头的文件
														
 
															+    # 如果要匹配特定后缀，可以使用 f"{prefix}*.jpg"
														
 
															+    matched_files = list(path.glob(f"{prefix}*"))
														
 
															+
														
 
															+    count = len(matched_files)
														
 
															+    if count == 1:
														
 
															+        file_path = matched_files[0]
														
 
															+        return file_path.name
														
 
															+    else:
														
 
															+        return None
														
--- a/route/file.py
+++ b/route/file.py
@@ -0,0 +1,56 @@
 
															+from fastapi import APIRouter, HTTPException
														
 
															+from fastapi.responses import FileResponse
														
 
															+import os
														
 
															+from setting import UPLOAD_DIR, OUTPUT_DIR
														
 
															+
														
 
															+router = APIRouter(prefix="/api1/file", tags=["file"])
														
 
															+
														
 
															+@router.get("/image/{filename}")
														
 
															+async def get_image(filename: str):
														
 
															+    # 1. 构建完整路径
														
 
															+    file_path = os.path.join(UPLOAD_DIR, filename)
														
 
															+    return get_file(file_path)
														
 
															+    #
														
 
															+    # # 2. 安全检查：防止目录穿越漏洞 (Directory Traversal)
														
 
															+    # # 确保用户请求的文件确实在 UPLOAD_DIR 目录下
														
 
															+    # real_path = os.path.realpath(file_path)
														
 
															+    # if not real_path.startswith(os.path.realpath(UPLOAD_DIR)):
														
 
															+    #     raise HTTPException(status_code=403, detail="拒绝访问该路径")
														
 
															+    #
														
 
															+    # # 3. 检查文件是否存在
														
 
															+    # if not os.path.exists(real_path):
														
 
															+    #     raise HTTPException(status_code=404, detail="图片不存在")
														
 
															+    #
														
 
															+    # # 4. 返回文件流
														
 
															+    # # media_type 会根据后缀自动识别（如 image/jpeg），也可以手动指定
														
 
															+    # return FileResponse(real_path)
														
 
															+
														
 
															+
														
 
															+@router.get("/audio/{filename}")
														
 
															+async def get_audio(filename: str):
														
 
															+    # 1. 构建完整路径
														
 
															+    file_path = os.path.join(UPLOAD_DIR, filename)
														
 
															+    return get_file(file_path)
														
 
															+
														
 
															+
														
 
															+@router.get("/video/{filename}")
														
 
															+async def get_video(filename: str):
														
 
															+    # 1. 构建完整路径
														
 
															+    file_path = os.path.join(OUTPUT_DIR, filename)
														
 
															+    return get_file(file_path)
														
 
															+
														
 
															+
														
 
															+def get_file(file_path):
														
 
															+    # 2. 安全检查：防止目录穿越漏洞 (Directory Traversal)
														
 
															+    # 确保用户请求的文件确实在 UPLOAD_DIR 目录下
														
 
															+    real_path = os.path.realpath(file_path)
														
 
															+    if not (real_path.startswith(os.path.realpath(UPLOAD_DIR)) or real_path.startswith(os.path.realpath(OUTPUT_DIR))):
														
 
															+        raise HTTPException(status_code=403, detail="拒绝访问该路径")
														
 
															+
														
 
															+    # 3. 检查文件是否存在
														
 
															+    if not os.path.exists(real_path):
														
 
															+        raise HTTPException(status_code=404, detail="视频不存在")
														
 
															+
														
 
															+    # 4. 返回文件流
														
 
															+    # media_type 会根据后缀自动识别（如 image/jpeg），也可以手动指定
														
 
															+    return FileResponse(real_path)
														
--- a/route/gpu.py
+++ b/route/gpu.py
@@ -0,0 +1,25 @@
 
															+import logging
														
 
															+from fastapi import APIRouter, HTTPException
														
 
															+import service.pygpu as pygpu
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+# 创建路由对象，可以统一设置前缀 (prefix) 和 标签 (tags)
														
 
															+router = APIRouter(
														
 
															+    prefix="/api1/gpu",
														
 
															+    tags=["gpu"]
														
 
															+)
														
 
															+
														
 
															+@router.get("/info")
														
 
															+async def gpu_info():
														
 
															+    try:
														
 
															+        gpu_result = pygpu.get_gpu_memory_info(0)
														
 
															+        torch_result = pygpu.get_torch_memory_usage()
														
 
															+        result = pygpu.get_ollama_resource()
														
 
															+        return {
														
 
															+            "gpu": gpu_result,
														
 
															+            "torch": torch_result,
														
 
															+            "ollama": result,
														
 
															+        }
														
 
															+    except Exception as e:
														
 
															+        raise HTTPException(status_code=500, detail=str(e))
														
--- a/route/image.py
+++ b/route/image.py
@@ -0,0 +1,55 @@
 
															+import logging
														
 
															+import os
														
 
															+import uuid
														
 
															+from typing import List
														
 
															+
														
 
															+from fastapi import APIRouter, UploadFile, File, Form, HTTPException
														
 
															+import shutil
														
 
															+from starlette.concurrency import run_in_threadpool
														
 
															+from setting import UPLOAD_DIR
														
 
															+import service.ai_image_ollama as ai_image
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+# 创建路由对象，可以统一设置前缀 (prefix) 和 标签 (tags)
														
 
															+router = APIRouter(
														
 
															+    prefix="/api1/image",
														
 
															+    tags=["video"]
														
 
															+)
														
 
															+
														
 
															+@router.post("/analyze")
														
 
															+async def upload_image(
														
 
															+        file: UploadFile = File(...),
														
 
															+        prompts: List[str] = Form(...)
														
 
															+):
														
 
															+    if not file:
														
 
															+        raise HTTPException(status_code=400, detail="文件不能为空")
														
 
															+
														
 
															+    if not prompts:
														
 
															+        raise HTTPException(status_code=400, detail="Prompts 不能为空")
														
 
															+
														
 
															+    task_id = str(uuid.uuid4())[:8]
														
 
															+    ext = file.filename.split('.')[-1]
														
 
															+    save_filename = f"{task_id}.{ext}"
														
 
															+    save_path = os.path.join(UPLOAD_DIR, save_filename)
														
 
															+
														
 
															+    # 异步保存文件
														
 
															+    def save_file():
														
 
															+        with open(save_path, "wb") as buffer:
														
 
															+            shutil.copyfileobj(file.file, buffer)
														
 
															+    await run_in_threadpool(save_file)
														
 
															+
														
 
															+    try:
														
 
															+        # 读取字节流进行 AI 处理
														
 
															+        with open(save_path, 'rb') as f:
														
 
															+            image_bytes = f.read()
														
 
															+            result = ai_image.describe_image(prompts, image_bytes)
														
 
															+            return {
														
 
															+                "task_id": task_id,
														
 
															+                "model_name": result['model_name'],
														
 
															+                "image_url": f"/api1/file/image/{save_filename}",
														
 
															+                "results": result['results']
														
 
															+            }
														
 
															+    except Exception as e:
														
 
															+        # 记录日志并抛出错误
														
 
															+        raise HTTPException(status_code=500, detail=f"AI 推理失败: {str(e)}")
														
--- a/route/text.py
+++ b/route/text.py
@@ -0,0 +1,89 @@
 
															+import logging
														
 
															+from typing import Optional
														
 
															+
														
 
															+from fastapi import APIRouter, HTTPException
														
 
															+from pydantic import BaseModel
														
 
															+import service.ai_text_ollama as ai_text
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+# 创建路由对象，可以统一设置前缀 (prefix) 和 标签 (tags)
														
 
															+router = APIRouter(
														
 
															+    prefix="/api1/text",
														
 
															+    tags=["text"]
														
 
															+)
														
 
															+
														
 
															+class TranslationRequest(BaseModel):
														
 
															+    text: str
														
 
															+
														
 
															+@router.post("/translate")
														
 
															+async def translate(request: TranslationRequest):
														
 
															+    if not request.text:
														
 
															+        raise HTTPException(status_code=400, detail="输入文本不能为空")
														
 
															+
														
 
															+    try:
														
 
															+        final_result = ai_text.translate2zh(request.text)
														
 
															+        return {"original": request.text, "translation": final_result}
														
 
															+    except Exception as e:
														
 
															+        raise HTTPException(status_code=500, detail=str(e))
														
 
															+
														
 
															+
														
 
															+# 定义接收 JSON 的模型
														
 
															+class SummarizeRequest(BaseModel):
														
 
															+    text: str
														
 
															+    prompt: Optional[str] = ""  # 允许 prompt 为空字符串或不传
														
 
															+
														
 
															+
														
 
															+@router.post("/summarize")
														
 
															+async def get_summarize(request: SummarizeRequest):
														
 
															+    if not request.text.strip():
														
 
															+        raise HTTPException(status_code=400, detail="输入文本不能为空")
														
 
															+
														
 
															+    try:
														
 
															+        max_length = 256
														
 
															+        combined_prompt = f"""
														
 
															+            你是一个专业的视频内容分析师。请对以下识别结果进行精炼的中文总结。
														
 
															+
														
 
															+            要求字数不超过{max_length}字，语气客观专业：
														
 
															+            {request.text}
														
 
															+            """
														
 
															+
														
 
															+        result = ai_text.summarize(combined_prompt)
														
 
															+        return {
														
 
															+            "model_name": result['model_name'],
														
 
															+            "prompt": combined_prompt,
														
 
															+            "result": result['result']
														
 
															+        }
														
 
															+    except Exception as e:
														
 
															+        raise HTTPException(status_code=500, detail=f"总结失败: {str(e)}")
														
 
															+
														
 
															+
														
 
															+@router.post("/tag")
														
 
															+async def get_tag(request: SummarizeRequest):
														
 
															+    if not request.text.strip():
														
 
															+        raise HTTPException(status_code=400, detail="输入文本不能为空")
														
 
															+
														
 
															+    try:
														
 
															+        combined_prompt = f"""
														
 
															+            你是一个专业的视频内容分析师。请从下方文本中提取出 1-10 个关键词标签。
														
 
															+
														
 
															+            限制条件：
														
 
															+            1. 标签必须是中文。
														
 
															+            2. 标签要包含：人物特征、动作行为、场景环境、氛围。
														
 
															+            3. 严禁输出任何解释性文字，只输出标签。
														
 
															+            4. 格式要求：标签之间用英文逗号分隔。
														
 
															+
														
 
															+            文本内容：
														
 
															+            {request.text}
														
 
															+
														
 
															+            标签结果：
														
 
															+            """
														
 
															+
														
 
															+        result = ai_text.summarize(combined_prompt)
														
 
															+        return {
														
 
															+            "model_name": result['model_name'],
														
 
															+            "prompt": combined_prompt,
														
 
															+            "result": result['result']
														
 
															+        }
														
 
															+    except Exception as e:
														
 
															+        raise HTTPException(status_code=500, detail=f"总结失败: {str(e)}")
														
--- a/service/__init__.py
+++ b/service/__init__.py
--- a/service/ai_asr.py
+++ b/service/ai_asr.py
@@ -0,0 +1,84 @@
 
															+import logging
														
 
															+import asyncio
														
 
															+import re
														
 
															+import time
														
 
															+from typing import Optional
														
 
															+
														
 
															+import service.pyav as pyav
														
 
															+import torch
														
 
															+from funasr import AutoModel
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+asr_model: Optional[AutoModel] = None
														
 
															+is_model_ready = False
														
 
															+
														
 
															+
														
 
															+def get_asr_model():
														
 
															+    """获取模型实例的接口"""
														
 
															+    global asr_model
														
 
															+    return asr_model
														
 
															+
														
 
															+
														
 
															+def check_ready():
														
 
															+    """检查模型是否加载完成"""
														
 
															+    global is_model_ready
														
 
															+    return is_model_ready
														
 
															+
														
 
															+
														
 
															+async def init_funasr():
														
 
															+    """异步初始化函数"""
														
 
															+    global asr_model, is_model_ready
														
 
															+
														
 
															+    if is_model_ready:
														
 
															+        return
														
 
															+
														
 
															+    logger.info("⏳ [ASR] 开始异步加载 funasr 模型...")
														
 
															+    start_time = time.time()
														
 
															+
														
 
															+    try:
														
 
															+        # 使用 run_in_executor 避免阻塞主事件循环
														
 
															+        loop = asyncio.get_event_loop()
														
 
															+
														
 
															+        # 定义具体的加载逻辑
														
 
															+        def load():
														
 
															+            return AutoModel(
														
 
															+                model="paraformer-zh",
														
 
															+                vad_model="fsmn-vad",
														
 
															+                vad_kwargs={"max_single_segment_time": 30000},
														
 
															+                punc_model="ct-punc",
														
 
															+                device="cuda:0" if torch.cuda.is_available() else "cpu",
														
 
															+                disable_update=True
														
 
															+            )
														
 
															+
														
 
															+        asr_model = await loop.run_in_executor(None, load)
														
 
															+        is_model_ready = True
														
 
															+        logger.info(f"✅ [ASR] 模型加载成功！耗时 {(time.time() - start_time):.2f}s")
														
 
															+    except Exception as e:
														
 
															+        logger.error(f"❌ [ASR] 模型加载失败: {e}")
														
 
															+        is_model_ready = False
														
 
															+
														
 
															+
														
 
															+def get_text(audio_path):
														
 
															+    start_time = time.time()
														
 
															+    logger.info("⏳ 开始进行音频识别...")
														
 
															+    result = asr_model.generate(input=[audio_path], cache={}, batch_size_s=300)
														
 
															+    logger.info(f"✅ 音频识别完成, 耗时 {(time.time() - start_time):.2f}秒")
														
 
															+
														
 
															+    # 清理文本中的空字符
														
 
															+    text = result[0]['text'].replace(" ", "")
														
 
															+    timestamps = result[0]['timestamp']
														
 
															+    return {
														
 
															+        'text': text,
														
 
															+        'timestamps': timestamps
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def generate_srt(audio_path, srt_path):
														
 
															+    result = get_text(audio_path)
														
 
															+    text = result['text']
														
 
															+    timestamp_list = result['timestamps']
														
 
															+
														
 
															+    # 使用正则表达式将文本按标点切分为一个 list，保留标点
														
 
															+    text_list = re.split(r"([。！？；，])", text)
														
 
															+    srt_list = pyav.get_precise_srt(text_list, timestamp_list)
														
 
															+    pyav.save_srt_file(srt_list, srt_path)
														
--- a/service/ai_image_ollama.py
+++ b/service/ai_image_ollama.py
@@ -0,0 +1,80 @@
 
															+import logging
														
 
															+from setting import ollama_client
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+model_name = 'moondream'
														
 
															+
														
 
															+def describe_frame(scene_list):
														
 
															+    results = []
														
 
															+    logger.info(f"🚀 开始分析视频，共有 {len(scene_list)} 个场景待处理...")
														
 
															+    for i, scene in enumerate(scene_list):
														
 
															+        logger.info(f"\n🎬 正在处理场景 {i + 1}/{len(scene_list)} (时间点: {scene['frame_pos']}s)")
														
 
															+        frame_path = scene['frame_path']
														
 
															+        with open(frame_path, 'rb') as f:
														
 
															+            image_bytes = f.read()
														
 
															+            image_data = {
														
 
															+                "scene_start": scene['scene_start'],
														
 
															+                "scene_end": scene['scene_end'],
														
 
															+                "frame_pos": scene['frame_pos'],
														
 
															+                "frame_path": frame_path,
														
 
															+                "prompts": []
														
 
															+            }
														
 
															+
														
 
															+            prompts = [
														
 
															+                """
														
 
															+                Analyze the physical interaction between the individuals. Is there any intimate or sexual contact visible? Describe the positioning of their bodies and limbs objectively
														
 
															+                """,
														
 
															+                """
														
 
															+                Describe the clothing status of all individuals. Is there any visible nudity, undergarments, or partially exposed sensitive areas? Identify the specific body parts shown.
														
 
															+                """,
														
 
															+                """
														
 
															+                Observe the posture and movement. Does the scene depict a sexual act or a highly suggestive sexual position?provide a neutral description of the pose.
														
 
															+                """
														
 
															+            ]
														
 
															+            for i, p in enumerate(prompts):
														
 
															+                try:
														
 
															+                    response = ollama_client.chat(
														
 
															+                        model=model_name,
														
 
															+                        messages=[{
														
 
															+                            'role': 'user',
														
 
															+                            'content': p,
														
 
															+                            'images': [image_bytes]
														
 
															+                        }]
														
 
															+                    )
														
 
															+                    eng_text = response['message']['content']
														
 
															+                    eng_text1 = eng_text.strip()
														
 
															+                    prompt_item = {
														
 
															+                        "prompt": p,
														
 
															+                        "result": eng_text1
														
 
															+                    }
														
 
															+                    image_data["prompts"].append(prompt_item)
														
 
															+                except Exception as e:
														
 
															+                    logger.error(f"❌ Prompt {i + 1} 推理失败: {e}")
														
 
															+
														
 
															+            results.append(image_data)
														
 
															+
														
 
															+    return results
														
 
															+
														
 
															+
														
 
															+def describe_image(prompts, image_bytes):
														
 
															+    results = []
														
 
															+    for i, p in enumerate(prompts):
														
 
															+        response = ollama_client.chat(
														
 
															+            model=model_name,
														
 
															+            messages=[{
														
 
															+                'role': 'user',
														
 
															+                'content': p,
														
 
															+                'images': [image_bytes]
														
 
															+            }]
														
 
															+        )
														
 
															+        eng_text = response['message']['content']
														
 
															+        prompt_item = {
														
 
															+            "prompt": p,
														
 
															+            "result": eng_text.strip()
														
 
															+        }
														
 
															+        results.append(prompt_item)
														
 
															+
														
 
															+    return {
														
 
															+                "model_name": model_name,
														
 
															+                "results": results
														
 
															+            }
														
--- a/service/ai_task.py
+++ b/service/ai_task.py
@@ -0,0 +1,79 @@
 
															+import logging
														
 
															+import time
														
 
															+import re
														
 
															+import asyncio
														
 
															+from concurrent.futures import ThreadPoolExecutor
														
 
															+
														
 
															+import service.pyav as pyav
														
 
															+import service.pygpu as pygpu
														
 
															+import service.ai_text_ollama as ai_text
														
 
															+import service.ai_asr as pyasr
														
 
															+from setting import OUTPUT_DIR
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+executor = ThreadPoolExecutor(max_workers=1)
														
 
															+task_queue = asyncio.Queue()
														
 
															+
														
 
															+async def start_worker():
														
 
															+    asyncio.create_task(gpu_worker())  # 开启 Worker
														
 
															+
														
 
															+
														
 
															+# --- 后台工作进程 (Consumer) ---
														
 
															+async def gpu_worker():
														
 
															+    logger.info("🏃 Worker 开始监听任务队列")
														
 
															+    while True:
														
 
															+        if not pyasr.check_ready():
														
 
															+            logger.info("休眠 10s 等待模型加载完成...")
														
 
															+            await asyncio.sleep(10)
														
 
															+            continue
														
 
															+
														
 
															+        # 获取任务
														
 
															+        task = await task_queue.get()
														
 
															+        task_id, audio_path, srt_path, video_path = task
														
 
															+        logger.info(f"⚡ 开始处理任务 [{task_id}]: {audio_path}")
														
 
															+        try:
														
 
															+            start_t = time.time()
														
 
															+            # 2. 推理 (在线程池中运行同步识别，防止阻塞事件循环)
														
 
															+            # 使用 run_in_executor 将同步函数包装成异步，不再阻塞 Event Loop
														
 
															+            # 这样在处理视频的同时，FastAPI 依然可以接收新请求并 put 到队列中
														
 
															+            loop = asyncio.get_event_loop()
														
 
															+            result = await loop.run_in_executor(
														
 
															+                executor,
														
 
															+                pyasr.get_text,
														
 
															+                audio_path
														
 
															+            )
														
 
															+
														
 
															+            text = result['text']
														
 
															+            timestamp_list = result['timestamps']
														
 
															+            text_list = re.split(r"([。！？；，])", text)
														
 
															+            srt_list = pyav.get_precise_srt(text_list, timestamp_list)
														
 
															+            pyav.save_srt_file(srt_list, srt_path)
														
 
															+
														
 
															+            with open(f"{OUTPUT_DIR}/{task_id}.txt", "w", encoding="utf-8") as f:
														
 
															+                f.write(text)
														
 
															+
														
 
															+            await loop.run_in_executor(
														
 
															+                executor,
														
 
															+                pyav.generate_video,
														
 
															+                audio_path,
														
 
															+                srt_path,
														
 
															+                video_path
														
 
															+            )
														
 
															+            logger.info(f"🎉 任务 [{task_id}] 完成，耗时: {time.time() - start_t:.2f}s")
														
 
															+        except Exception as e:
														
 
															+            logger.error(f"❌ 任务 [{task_id}] 失败: {str(e)}")
														
 
															+        finally:
														
 
															+            # 4. 定时/按需清理 GPU 显存
														
 
															+            task_queue.task_done()
														
 
															+            pygpu.clear_gpu_memory()
														
 
															+
														
 
															+
														
 
															+async def put_task(task_id, save_path, srt_path, video_path):
														
 
															+    await task_queue.put((task_id, save_path, srt_path, video_path))
														
 
															+
														
 
															+def get_tasks():
														
 
															+    return task_queue.qsize()
														
 
															+
														
 
															+def translate_to_zh(text):
														
 
															+    result = ai_text.translate2zh(text)
														
 
															+    return result
														
--- a/service/ai_text.py
+++ b/service/ai_text.py
@@ -0,0 +1,67 @@
 
															+import logging
														
 
															+import os
														
 
															+os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
														
 
															+
														
 
															+import time
														
 
															+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
														
 
															+import torch
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+model_name = "Qwen/Qwen2.5-1.5B-Instruct"
														
 
															+start_time = time.time()
														
 
															+logger.info(f"⏳ 开始加载 {model_name} 模型...")
														
 
															+try:
														
 
															+    # 1. 定义量化配置
														
 
															+    quantization_config = BitsAndBytesConfig(
														
 
															+        load_in_4bit=True,
														
 
															+        bnb_4bit_compute_dtype=torch.float16,  # 1650 显卡建议设为 fp16
														
 
															+        bnb_4bit_quant_type="nf4",  # 高精度量化类型
														
 
															+        bnb_4bit_use_double_quant=True  # 进一步压缩显存
														
 
															+    )
														
 
															+
														
 
															+    # 2. 加载模型
														
 
															+    model = AutoModelForCausalLM.from_pretrained(
														
 
															+        model_name,
														
 
															+        quantization_config=quantization_config,  # 使用配置对象
														
 
															+        device_map="auto"  # 自动分配到 GPU
														
 
															+    )
														
 
															+
														
 
															+    tokenizer = AutoTokenizer.from_pretrained(model_name)
														
 
															+    logger.info(f"✅ {model_name} 模型加载成功！耗时 {(time.time() - start_time):.2f}秒")
														
 
															+except Exception as e:
														
 
															+    logger.info(f"模型加载失败: {e}")
														
 
															+    logger.info(f"✅ {model_name} 模型加载失败: {e}")
														
 
															+    raise e
														
 
															+
														
 
															+
														
 
															+def translate2zh(text):
														
 
															+    # 构建适合 Moondream 场景的 Prompt
														
 
															+    prompt = f"你是一个专业的图像描述翻译官。请将下面这段英文描述翻译成自然、地道的中文，直接输出结果，不要解释：\n{text}"
														
 
															+
														
 
															+    messages = [{"role": "user", "content": prompt}]
														
 
															+    input_text = tokenizer.apply_chat_template(
														
 
															+        messages,
														
 
															+        tokenize=False,
														
 
															+        add_generation_prompt=True
														
 
															+    )
														
 
															+
														
 
															+    model_inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
														
 
															+
														
 
															+    # 推理
														
 
															+    max_tokens = 128
														
 
															+    with torch.no_grad():
														
 
															+        generated_ids = model.generate(
														
 
															+            **model_inputs,
														
 
															+            max_new_tokens=max_tokens,
														
 
															+            do_sample=False  # 翻译建议关闭随机性，保证结果稳定
														
 
															+        )
														
 
															+
														
 
															+    # 解码
														
 
															+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
														
 
															+    # 提取助手回答的部分
														
 
															+    final_result = response.split("assistant\n")[-1].strip()
														
 
															+    return final_result
														
 
															+
														
 
															+
														
 
															+def summarize():
														
 
															+    pass
														
--- a/service/ai_text_ollama.py
+++ b/service/ai_text_ollama.py
@@ -0,0 +1,44 @@
 
															+import logging
														
 
															+import time
														
 
															+from setting import ollama_client
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+model_name = "qwen2.5:1.5b"
														
 
															+
														
 
															+def translate2zh(text):
														
 
															+    start_time = time.time()
														
 
															+    logger.info(f"⏳ 正在通过 Ollama 调用 {model_name}...")
														
 
															+
														
 
															+    prompt = f"你是一个专业的图像描述翻译官。请将下面这段英文描述翻译成自然、地道的中文，直接输出结果，不要解释：\n{text}"
														
 
															+    try:
														
 
															+        # Ollama 会自动处理 4-bit 加载和显存分配
														
 
															+        response = ollama_client.chat(
														
 
															+            model=model_name,
														
 
															+            messages=[{'role': 'user', 'content': prompt}],
														
 
															+            # options={
														
 
															+            #     "num_gpu": 1,  # 强制使用 GPU
														
 
															+            #     "temperature": 0.7,
														
 
															+            #     "top_p": 0.9
														
 
															+            # }
														
 
															+        )
														
 
															+
														
 
															+        duration = time.time() - start_time
														
 
															+        logger.info(f"✅ 推理成功！耗时 {duration:.2f}秒")
														
 
															+        return response['message']['content']
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        logger.error(f"❌ Ollama 调用失败: {e}")
														
 
															+        raise e
														
 
															+
														
 
															+
														
 
															+def summarize(content):
														
 
															+    response = ollama_client.chat(
														
 
															+        model=model_name,
														
 
															+        messages=[{'role': 'user', 'content': content}],
														
 
															+        options={"temperature": 0.3}  # 较低的温度使总结更稳定
														
 
															+    )
														
 
															+    result = response['message']['content'].strip()
														
 
															+    return {
														
 
															+        "model_name": model_name,
														
 
															+        "result": result
														
 
															+    }
														
--- a/service/pyav.py
+++ b/service/pyav.py
@@ -0,0 +1,533 @@
 
															+import json
														
 
															+import os
														
 
															+import re
														
 
															+import hashlib
														
 
															+import logging
														
 
															+import shutil
														
 
															+import subprocess
														
 
															+from pathlib import Path
														
 
															+import cv2
														
 
															+import librosa
														
 
															+import numpy as np
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+def get_video_info(video_path):
														
 
															+    """获取视频基础元数据"""
														
 
															+    cap = cv2.VideoCapture(video_path)
														
 
															+    if not cap.isOpened():
														
 
															+        return None
														
 
															+
														
 
															+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
														
 
															+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
														
 
															+    fps = cap.get(cv2.CAP_PROP_FPS)
														
 
															+    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
														
 
															+    duration = int(frame_count / fps) if fps > 0 else 0
														
 
															+    size_byte = int(os.path.getsize(video_path))
														
 
															+
														
 
															+    # 判定横竖屏：1 为横屏, 0 为竖屏
														
 
															+    horizontal = 1 if width >= height else 0
														
 
															+
														
 
															+    cap.release()
														
 
															+    return {
														
 
															+        "video_path": video_path,
														
 
															+        "duration": duration,
														
 
															+        "size_byte": size_byte,
														
 
															+        "width": width,
														
 
															+        "height": height,
														
 
															+        "horizontal": horizontal,
														
 
															+        "scenes": []
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+def get_media_info(media_path):
														
 
															+    """获取视频基础元数据（含音频）"""
														
 
															+    # 1. 使用 ffprobe 获取详细流信息
														
 
															+    cmd = [
														
 
															+        'ffprobe', '-v', 'quiet', '-print_format', 'json',
														
 
															+        '-show_streams', '-show_format', media_path
														
 
															+    ]
														
 
															+
														
 
															+    try:
														
 
															+        result = subprocess.check_output(cmd, encoding='utf-8')
														
 
															+        data = json.loads(result)
														
 
															+    except Exception as e:
														
 
															+        print(f"ffprobe 解析失败: {e}")
														
 
															+        return None
														
 
															+
														
 
															+    # 提取视频流和音频流
														
 
															+    video_stream = next((s for s in data['streams'] if s['codec_type'] == 'video'), None)
														
 
															+    audio_stream = next((s for s in data['streams'] if s['codec_type'] == 'audio'), None)
														
 
															+
														
 
															+    # 2. 基础视频信息
														
 
															+    width = int(video_stream.get('width', 0)) if video_stream else 0
														
 
															+    height = int(video_stream.get('height', 0)) if video_stream else 0
														
 
															+    duration = float(data['format'].get('duration', 0))
														
 
															+    size_byte = int(os.path.getsize(media_path))
														
 
															+    horizontal = 1 if width >= height else 0
														
 
															+
														
 
															+    # 3. 构造返回结构
														
 
															+    info = {
														
 
															+        "media_path": media_path,
														
 
															+        "duration": round(duration, 2),
														
 
															+        "size_byte": size_byte,
														
 
															+        "width": width,
														
 
															+        "height": height,
														
 
															+        "horizontal": horizontal,
														
 
															+        # 新增音频字段
														
 
															+        "has_audio": audio_stream is not None,
														
 
															+        "audio_info": {
														
 
															+            "codec": audio_stream.get('codec_name'),
														
 
															+            "sample_rate": audio_stream.get('sample_rate'),
														
 
															+            "channels": audio_stream.get('channels'),
														
 
															+            "bit_rate": audio_stream.get('bit_rate')
														
 
															+        } if audio_stream else None,
														
 
															+        "scenes": []
														
 
															+    }
														
 
															+
														
 
															+    return info
														
 
															+
														
 
															+
														
 
															+def get_scene_times(video_path, threshold=0.3):
														
 
															+    cmd = [
														
 
															+        'ffmpeg',
														
 
															+        '-hide_banner',
														
 
															+        '-i', video_path,
														
 
															+        '-threads', '0',
														
 
															+        '-vf', f"select='eq(n,0)+gt(scene,{threshold})',showinfo",
														
 
															+        '-vsync', 'vfr',
														
 
															+        '-f', 'null', '-' # 仅测试检测，不实际写文件；如需写文件请换回你的参数
														
 
															+    ]
														
 
															+
														
 
															+    scene_start_times = []
														
 
															+    try:
														
 
															+        # 2. 启动子进程
														
 
															+        # stderr=subprocess.PIPE 捕获日志，stdout=subprocess.DEVNULL 忽略正常输出
														
 
															+        process = subprocess.Popen(
														
 
															+            cmd,
														
 
															+            stdout=subprocess.DEVNULL,
														
 
															+            stderr=subprocess.PIPE,
														
 
															+            universal_newlines=True,
														
 
															+            encoding='utf-8'
														
 
															+        )
														
 
															+
														
 
															+        # 3. 实时解析日志
														
 
															+        # 使用 stdout/stderr 迭代时，建议处理编码或可能的读取中断
														
 
															+        try:
														
 
															+            # showinfo 的输出在 stderr
														
 
															+            for line in process.stderr:
														
 
															+                if "pts_time:" in line:
														
 
															+                    match = re.search(r"pts_time:(\d+\.\d+)", line)
														
 
															+                    if match:
														
 
															+                        time_val = float(match.group(1))
														
 
															+                        scene_start_times.append(time_val)
														
 
															+                        logger.info(f"检测到新场景起始点: {time_val}s")
														
 
															+        except Exception as e:
														
 
															+            process.kill()  # 如果读取过程崩溃，强制结束进程
														
 
															+            raise RuntimeError(f"读取 FFmpeg 输出时发生错误: {e}")
														
 
															+
														
 
															+        # 4. 等待进程结束并检查退出码
														
 
															+        process.wait()
														
 
															+
														
 
															+        if process.returncode != 0:
														
 
															+            # 获取最后几行错误信息（如果有）
														
 
															+            raise subprocess.CalledProcessError(process.returncode, cmd)
														
 
															+
														
 
															+    except FileNotFoundError:
														
 
															+        # 当系统环境变量里找不到 'ffmpeg' 命令时触发
														
 
															+        raise RuntimeError("系统未安装 FFmpeg 或未将其添加到环境变量 PATH 中")
														
 
															+
														
 
															+    except subprocess.CalledProcessError as e:
														
 
															+        # FFmpeg 执行过程中报错（如视频解码失败、参数错误）
														
 
															+        raise RuntimeError(f"FFmpeg 处理视频失败，退出码: {e.returncode}")
														
 
															+
														
 
															+    except Exception as e:
														
 
															+        # 其他未知异常
														
 
															+        raise RuntimeError(f"发生未知错误: {e}")
														
 
															+
														
 
															+    if not scene_start_times:
														
 
															+        return [0.0]
														
 
															+
														
 
															+    if scene_start_times[0] > 0.5:
														
 
															+        # 手动把第一个点修正为 0.0
														
 
															+        scene_start_times.insert(0, 0.0)
														
 
															+
														
 
															+    return scene_start_times
														
 
															+
														
 
															+
														
 
															+def split_video_by_scenes(video_path, scene_start_times, output_dir="segment"):
														
 
															+    """
														
 
															+    根据给定的起始时间列表分割视频
														
 
															+    """
														
 
															+    if not scene_start_times:
														
 
															+        logger.info("没有检测到场景，跳过分割。")
														
 
															+        return
														
 
															+
														
 
															+    # 添加一个结束标识，方便循环计算时长
														
 
															+    # 这里不需要准确的视频总长，FFmpeg 处理最后一个片段时会自动截取到末尾
														
 
															+    times = scene_start_times + [None]
														
 
															+    for i in range(len(times) - 1):
														
 
															+        start_time = times[i]
														
 
															+        next_time = times[i + 1]
														
 
															+
														
 
															+        output_file = f"{output_dir}/segment_{i:03d}.mp4"
														
 
															+
														
 
															+        # 构建命令
														
 
															+        # -ss 放在 -i 前面可以实现快速定位（基于关键帧）
														
 
															+        cmd = [
														
 
															+            'ffmpeg', '-hide_banner', '-y',
														
 
															+            '-ss', str(start_time),
														
 
															+            '-i', video_path
														
 
															+        ]
														
 
															+
														
 
															+        # 如果不是最后一个片段，指定持续时间 -t
														
 
															+        if next_time is not None:
														
 
															+            duration = next_time - start_time
														
 
															+            cmd.extend(['-t', str(duration)])
														
 
															+
														
 
															+        # 使用 copy 模式不重编码，速度极快
														
 
															+        cmd.extend(['-c', 'copy', output_file])
														
 
															+
														
 
															+        try:
														
 
															+            subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
														
 
															+            logger.info(f"完成: {output_file} (起始点: {start_time}s)")
														
 
															+        except subprocess.CalledProcessError as e:
														
 
															+            logger.info(f"分割片段 {i} 失败: {e.stderr.decode()}")
														
 
															+
														
 
															+
														
 
															+def calculate_mid_points(video_path, scene_start_times):
														
 
															+    """
														
 
															+    计算每个场景的详细信息：
														
 
															+    1. 获取视频总时长以确定最后一个场景的边界。
														
 
															+    2. 如果场景时长 < 0.5s，抽帧点取起始点；否则取中点。
														
 
															+    3. 返回格式化的字典列表。
														
 
															+    """
														
 
															+    # 1. 获取视频总时长
														
 
															+    duration_cmd = [
														
 
															+        'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
														
 
															+        '-of', 'default=noprint_wrappers=1:nokey=1', video_path
														
 
															+    ]
														
 
															+    try:
														
 
															+        total_duration = float(subprocess.check_output(duration_cmd).decode().strip())
														
 
															+    except Exception as e:
														
 
															+        # 如果获取时长失败，可以根据需求抛出异常或设置一个保守值
														
 
															+        raise RuntimeError(f"无法获取视频时长: {e}")
														
 
															+
														
 
															+    # 2. 构建结束时间点（下一个场景的开始即当前场景的结束）
														
 
															+    end_times = scene_start_times[1:] + [total_duration]
														
 
															+
														
 
															+    scenes = []
														
 
															+    for start, end in zip(scene_start_times, end_times):
														
 
															+        duration = end - start
														
 
															+
														
 
															+        # 3. 计算抽帧位置逻辑
														
 
															+        if duration < 0.5:
														
 
															+            mid_point = start
														
 
															+        else:
														
 
															+            mid_point = (start + end) / 2
														
 
															+
														
 
															+        # 4. 组装成指定的字典格式
														
 
															+        scenes.append({
														
 
															+            "scene_start": round(start, 3),
														
 
															+            "scene_end": round(end, 3),
														
 
															+            "frame_pos": round(mid_point, 3)
														
 
															+        })
														
 
															+
														
 
															+    return scenes
														
 
															+
														
 
															+
														
 
															+def extract_frames(video_path, scenes, output_dir="thumbnails"):
														
 
															+    """
														
 
															+    执行实际的 FFmpeg 抽帧操作。
														
 
															+    输入: scenes 字典列表。
														
 
															+    输出: 带有 'frame_path' 绝对路径的 scenes 字典列表。
														
 
															+    """
														
 
															+    if not os.path.exists(output_dir):
														
 
															+        os.makedirs(output_dir)
														
 
															+
														
 
															+    # 获取输出目录的绝对路径，确保返回的路径是完整的
														
 
															+    abs_output_dir = os.path.abspath(output_dir)
														
 
															+
														
 
															+    logger.info(f"开始执行抽帧任务，目标数量: {len(scenes)}")
														
 
															+
														
 
															+    for i, scene in enumerate(scenes):
														
 
															+        ts = scene["frame_pos"]
														
 
															+        # 文件命名保持之前的规范：序号_时间戳.jpg
														
 
															+        file_name = f"scene_{i + 1:03d}_{ts}s.jpg"
														
 
															+        output_file_path = os.path.join(abs_output_dir, file_name)
														
 
															+
														
 
															+        # 使用快速定位 (-ss 在 -i 前)
														
 
															+        cmd = [
														
 
															+            'ffmpeg', '-hide_banner', '-loglevel', 'error',
														
 
															+            '-ss', str(ts),
														
 
															+            '-i', video_path,
														
 
															+            '-frames:v', '1',
														
 
															+            '-q:v', '2',
														
 
															+            '-vf', 'scale=640:-1',  # 预览图建议缩放，速度更快
														
 
															+            output_file_path, '-y'
														
 
															+        ]
														
 
															+
														
 
															+        try:
														
 
															+            subprocess.run(cmd, check=True)
														
 
															+            # 抽帧成功后，将绝对路径存入字典
														
 
															+            scene["frame_path"] = output_file_path
														
 
															+
														
 
															+            if (i + 1) % 5 == 0 or (i + 1) == len(scenes):
														
 
															+                logger.info(f"进度: {i + 1}/{len(scenes)}")
														
 
															+        except subprocess.CalledProcessError:
														
 
															+            logger.info(f"错误: 无法提取 {ts}s 处的帧")
														
 
															+            scene["frame_path"] = None  # 如果提取失败，可以标记为 None
														
 
															+
														
 
															+    logger.info(f"任务完成，存储路径: {abs_output_dir}")
														
 
															+    return scenes
														
 
															+
														
 
															+
														
 
															+# subtitles 滤镜位于 filter_complex 的字符串内部，FFmpeg 会对其进行二次解析。如果路径包含 \、: 或空格，解析就会崩溃
														
 
															+# 将 srt 文件临时改名为一个完全合法的名字并复制文件, 处理完成后再删除复制的文件
														
 
															+def get_safe_temp_srt(srt_path):
														
 
															+    """
														
 
															+    根据原始路径生成一个位于同目录下的 SHA256 临时文件名
														
 
															+    """
														
 
															+    srt_obj = Path(srt_path).resolve()
														
 
															+    # 计算路径或内容的 hash (建议计算路径的 hash 即可，速度快)
														
 
															+    path_hash = hashlib.sha256(str(srt_obj).encode('utf-8')).hexdigest()
														
 
															+
														
 
															+    # 构造临时文件路径：与原文件同目录，名字为 hash.srt
														
 
															+    temp_srt_path = srt_obj.parent / f"{path_hash}.srt"
														
 
															+    return temp_srt_path
														
 
															+
														
 
															+
														
 
															+def generate_video(audio_path, srt_path, video_output):
														
 
															+    # 预处理 srt 路径
														
 
															+    # 1. 转为绝对路径
														
 
															+    # 2. 统一使用正斜杠 /
														
 
															+    # 3. 处理 subtitles 滤镜特有的转义：将 ':' 替换为 '\:'
														
 
															+    temp_srt_path = get_safe_temp_srt(srt_path)
														
 
															+    shutil.copy(srt_path, temp_srt_path)
														
 
															+
														
 
															+    font_name = 'WenQuanYi Micro Hei'
														
 
															+    font_size = 20
														
 
															+    font_color = '&H0000FFFF&'
														
 
															+
														
 
															+    """调用 FFmpeg 合成视频"""
														
 
															+    # 建议设置：
														
 
															+    # -rc vbr: 使用可变码率模式
														
 
															+    # -cq 28: 控制质量。数值越大，体积越小。推荐范围 24-32
														
 
															+    # -b:v 0: 在 cq 模式下，将目标码率设为 0，让编码器完全根据质量控制
														
 
															+    command = [
														
 
															+        'ffmpeg', '-y',
														
 
															+        '-hide_banner',
														
 
															+        '-i', audio_path,
														
 
															+        '-filter_complex',
														
 
															+        f"[0:a]showwaves=s=854x480:mode=line:colors=0x00FFFF[v];"
														
 
															+        f"[v]subtitles={temp_srt_path}:charenc=UTF-8:force_style='FontName={font_name},FontSize={font_size},PrimaryColour={font_color},Alignment=2'[v_out]",
														
 
															+        '-map', '[v_out]',
														
 
															+        '-map', '0:a',
														
 
															+        '-c:v', 'libx264',  # 使用 CPU 编码压缩率更高
														
 
															+        '-preset', 'veryfast',  # 编码速度预设。想要体积更小可以改为 'medium'，但速度会慢一点
														
 
															+        '-crf', '28',  # 质量控制：23 是默认，28 体积更小，对于 480p 波形图完全够用
														
 
															+        '-pix_fmt', 'yuv420p',  # 增强兼容性，确保所有播放器都能看
														
 
															+        '-c:a', 'aac',
														
 
															+        '-b:a', '128k',  # 音频码率限制在 128k
														
 
															+        '-shortest',
														
 
															+        video_output
														
 
															+    ]
														
 
															+
														
 
															+    try:
														
 
															+        # 使用 Popen 启动进程，将 stderr 重定向到 PIPE
														
 
															+        # 注意：FFmpeg 的进度信息是在 stderr 输出的
														
 
															+        process = subprocess.Popen(
														
 
															+            command,
														
 
															+            stdout=subprocess.PIPE,
														
 
															+            stderr=subprocess.STDOUT,  # 将 stderr 合并到 stdout 统一处理
														
 
															+            text=True,
														
 
															+            encoding='utf-8',
														
 
															+            errors='replace'
														
 
															+        )
														
 
															+
														
 
															+        # 用于记录最后几行日志，方便报错时排查
														
 
															+        error_log_buffer = []
														
 
															+        logger.info(f"🎬 开始合成视频: {video_output}")
														
 
															+        # 实时读取并打印输出内容
														
 
															+        while True:
														
 
															+            line = process.stdout.readline()
														
 
															+            if not line and process.poll() is not None:
														
 
															+                break
														
 
															+
														
 
															+            if line:
														
 
															+                clean_line = line.strip()
														
 
															+                error_log_buffer.append(clean_line)
														
 
															+                # 保持缓冲区大小，只留最后 20 行
														
 
															+                if len(error_log_buffer) > 20:
														
 
															+                    error_log_buffer.pop(0)
														
 
															+
														
 
															+                # 如果是进度行，则在同一行刷新；如果是警告/错误，则换行打印
														
 
															+                if "frame=" in clean_line or "size=" in clean_line:
														
 
															+                    # 这里的 line 会包含诸如 "frame=  123 fps= 30 size=  512kB time=00:00:05.12..." 的进度信息
														
 
															+                    # 使用 end='' 是因为 readline 自带换行符
														
 
															+                    logger.info(f"\rFFmpeg 进度: {clean_line}")
														
 
															+
														
 
															+        # 检查最终退出状态
														
 
															+        process.wait()
														
 
															+
														
 
															+        if process.returncode != 0:
														
 
															+            # 拼接最后的错误片段
														
 
															+            last_errors = "\n".join(error_log_buffer)
														
 
															+            raise RuntimeError(
														
 
															+                f"FFmpeg 执行失败 (退出码 {process.returncode})\n"
														
 
															+                f"--- 最后 20 行日志 ---\n{last_errors}"
														
 
															+            )
														
 
															+
														
 
															+        logger.info(f"\n🚀 视频合成成功: {video_output}")
														
 
															+    finally:
														
 
															+        if temp_srt_path.exists():
														
 
															+            temp_srt_path.unlink()
														
 
															+
														
 
															+
														
 
															+def get_precise_srt(text_list, timestamp_list, max_chars=20):
														
 
															+    total_ts = len(timestamp_list)
														
 
															+    raw_parts = text_list
														
 
															+    sentences = []
														
 
															+    # 合并标点到前面的短句
														
 
															+    for i in range(0, len(raw_parts) - 1, 2):
														
 
															+        sentences.append(raw_parts[i] + raw_parts[i + 1])
														
 
															+    if len(raw_parts) % 2 == 1:
														
 
															+        sentences.append(raw_parts[-1])
														
 
															+
														
 
															+    ts_idx = 0
														
 
															+    line_count = 1
														
 
															+
														
 
															+    srt_list = []
														
 
															+    for sentence in sentences:
														
 
															+        sentence = sentence.strip()
														
 
															+        if not sentence or ts_idx >= total_ts:
														
 
															+            continue
														
 
															+
														
 
															+        # 2. 如果单句太长，进行硬切分（按 max_chars）
														
 
															+        sub_sentences = [sentence[i:i + max_chars] for i in range(0, len(sentence), max_chars)]
														
 
															+
														
 
															+        for s in sub_sentences:
														
 
															+            # 统计这行里有多少个字符是对应时间戳的
														
 
															+            # 注意：Paraformer 的时间戳通常不包含标点，需要过滤掉标点再计数
														
 
															+            pure_words = re.sub(r'[^\w\u4e00-\u9fa5]', '', s)  # 仅保留中文字符和字母数字
														
 
															+            num_words = len(pure_words)
														
 
															+
														
 
															+            if num_words == 0:
														
 
															+                continue
														
 
															+
														
 
															+            # --- 关键防护：检查 ts_idx 是否越界 ---
														
 
															+            if ts_idx >= total_ts:
														
 
															+                break
														
 
															+
														
 
															+            # 获取开始时间
														
 
															+            start_t = timestamp_list[ts_idx][0]
														
 
															+
														
 
															+            # 计算结束索引，确保不越界
														
 
															+            end_pos = ts_idx + num_words - 1
														
 
															+            if end_pos >= total_ts:
														
 
															+                end_pos = total_ts - 1
														
 
															+
														
 
															+            end_t = timestamp_list[end_pos][1]
														
 
															+
														
 
															+            # 写入 SRT 格式
														
 
															+            # f.write(f"{line_count}\n")
														
 
															+            # f.write(f"{format_time_srt(start_t)} --> {format_time_srt(end_t)}\n")
														
 
															+            # f.write(f"{s}\n\n")
														
 
															+            srt_list.append({
														
 
															+                "line": line_count,
														
 
															+                "time": f"{format_time_srt(start_t)} --> {format_time_srt(end_t)}",
														
 
															+                "text": s
														
 
															+            })
														
 
															+
														
 
															+            # 更新索引
														
 
															+            ts_idx += num_words
														
 
															+            line_count += 1
														
 
															+    return srt_list
														
 
															+
														
 
															+
														
 
															+def format_time_srt(ms):
														
 
															+    """毫秒转 SRT 格式: HH:MM:SS,mmm"""
														
 
															+    s, ms = divmod(ms, 1000)
														
 
															+    m, s = divmod(s, 60)
														
 
															+    h, m = divmod(m, 60)
														
 
															+    return f"{h:02}:{m:02}:{s:02},{int(ms):03}"
														
 
															+
														
 
															+
														
 
															+def save_srt_file(srt_list, output_path):
														
 
															+    """
														
 
															+    将 srt 列表写入文件
														
 
															+    :param srt_list: 包含 line, time, text 字典的列表
														
 
															+    :param output_path: 输出路径 (如 'output.srt')
														
 
															+    """
														
 
															+    with open(output_path, 'w', encoding='utf-8') as f:
														
 
															+        for entry in srt_list:
														
 
															+            # 1. 写入序号 (line)
														
 
															+            f.write(f"{entry['line']}\n")
														
 
															+            # 2. 写入时间轴 (time)
														
 
															+            f.write(f"{entry['time']}\n")
														
 
															+            # 3. 写入文本 (text)
														
 
															+            f.write(f"{entry['text']}\n")
														
 
															+            # 4. 写入一个空行作为分隔符
														
 
															+            f.write("\n")
														
 
															+
														
 
															+
														
 
															+def parse_srt_to_list(file_path):
														
 
															+    """
														
 
															+    将 SRT 文件还原为 [{line, time, text}, ...] 结构
														
 
															+    """
														
 
															+    with open(file_path, 'r', encoding='utf-8') as f:
														
 
															+        content = f.read().strip()
														
 
															+
														
 
															+    # 正则表达式解释：
														
 
															+    # (\d+)                -> 匹配序号 (line)
														
 
															+    # (\d{2}:\d{2}:.*)     -> 匹配时间轴 (time)
														
 
															+    # ([\s\S]*?)           -> 匹配文本内容 (text)，支持多行
														
 
															+    # (?=\n\d+\n|\Z)       -> 断言后面紧跟下一个序号或文件末尾
														
 
															+    pattern = re.compile(r'(\d+)\n(\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3})\n([\s\S]*?)(?=\n\d+\n|\Z)')
														
 
															+
														
 
															+    matches = pattern.findall(content)
														
 
															+
														
 
															+    srt_list = []
														
 
															+    for m in matches:
														
 
															+        srt_list.append({
														
 
															+            "line": int(m[0]),
														
 
															+            "time": m[1],
														
 
															+            "text": m[2].strip()  # 去掉文本末尾可能的换行
														
 
															+        })
														
 
															+    return srt_list
														
 
															+
														
 
															+
														
 
															+def analyze_audio_energy(audio_path, segment_ms=100):
														
 
															+    """
														
 
															+    按时间片段分析音频能量，帮助确定静音阈值
														
 
															+    :param audio_path: 音频文件路径
														
 
															+    :param segment_ms: 检查的时间块大小（毫秒）
														
 
															+    """
														
 
															+    # 1. 加载音频
														
 
															+    sr = 16000
														
 
															+    y, _ = librosa.load(audio_path, sr=sr)
														
 
															+
														
 
															+    # 2. 计算每个片段的能量 (RMS)
														
 
															+    hop_length = int(sr * segment_ms / 1000)
														
 
															+    energy_list = []
														
 
															+
														
 
															+    logger.info(f"{'时间 (秒)':<10} | {'能量值 (RMS)':<15} | {'状态估计'}")
														
 
															+    logger.info("-" * 45)
														
 
															+
														
 
															+    for i in range(0, len(y), hop_length):
														
 
															+        segment = y[i: i + hop_length]
														
 
															+        if len(segment) == 0: break
														
 
															+
														
 
															+        rms = np.sqrt(np.mean(segment ** 2))
														
 
															+        energy_list.append(rms)
														
 
															+
														
 
															+        # 打印进度和数值
														
 
															+        time_sec = i / sr
														
 
															+        status = "🤫 静音" if rms < 0.005 else "🗣️ 有声"
														
 
															+        logger.info(f"{time_sec:>8.2f}s | {rms:>15.6f} | {status}")
														
 
															+
														
 
															+    # 3. 输出统计建议
														
 
															+    logger.info("-" * 45)
														
 
															+    logger.info(f"最大能量: {max(energy_list):.6f}")
														
 
															+    logger.info(f"最小能量: {min(energy_list):.6f}")
														
 
															+    logger.info(f"建议阈值: {np.percentile(energy_list, 20):.6f} (取前20%分位数作为参考)")
														
--- a/service/pygpu.py
+++ b/service/pygpu.py
@@ -0,0 +1,100 @@
 
															+import logging
														
 
															+from datetime import datetime
														
 
															+import gc
														
 
															+import pynvml
														
 
															+import torch
														
 
															+from setting import ollama_client
														
 
															+
														
 
															+logger = logging.getLogger(__name__)
														
 
															+
														
 
															+def get_gpu_memory_info(device_index=0):
														
 
															+    pynvml.nvmlInit()
														
 
															+    handle = pynvml.nvmlDeviceGetHandleByIndex(device_index)
														
 
															+    info = pynvml.nvmlDeviceGetMemoryInfo(handle)
														
 
															+
														
 
															+    # print(f"显卡型号: {pynvml.nvmlDeviceGetName(handle)}")
														
 
															+    # print(f"总显存: {info.total / 1024 ** 2:.2f} MB")
														
 
															+    # print(f"已用显存: {info.used / 1024 ** 2:.2f} MB")
														
 
															+    # print(f"空闲显存: {info.free / 1024 ** 2:.2f} MB")
														
 
															+    result = {
														
 
															+        "model": pynvml.nvmlDeviceGetName(handle),
														
 
															+        "mem_total": f"{info.total / 1024 ** 2:.2f} MB",
														
 
															+        "mem_used": f"{info.used / 1024 ** 2:.2f} MB",
														
 
															+        "mem_free": f"{info.free / 1024 ** 2:.2f} MB",
														
 
															+    }
														
 
															+    pynvml.nvmlShutdown()
														
 
															+    return  result
														
 
															+
														
 
															+
														
 
															+def get_torch_memory_usage():
														
 
															+    if torch.cuda.is_available():
														
 
															+        # 获取当前设备索引
														
 
															+        device = torch.cuda.current_device()
														
 
															+        # 显存缓存（PyTorch 预占用的）
														
 
															+        reserved = torch.cuda.memory_reserved(device) / 1024 ** 2
														
 
															+        # 已经分配给 Tensor 的显存
														
 
															+        allocated = torch.cuda.memory_allocated(device) / 1024 ** 2
														
 
															+
														
 
															+        # reserved 是实际占有的 gpu 内存
														
 
															+        # allocated 是 reserved 中实际使用的部分
														
 
															+        return {
														
 
															+            "reserved": f"{reserved:.2f} MB",
														
 
															+            "allocated": f"{allocated:.2f} MB"
														
 
															+        }
														
 
															+    else:
														
 
															+        return {
														
 
															+                "message": "CUDA N/A",
														
 
															+            }
														
 
															+
														
 
															+
														
 
															+# --- GPU 显存深度清理函数 ---
														
 
															+def clear_gpu_memory():
														
 
															+    if torch.cuda.is_available():
														
 
															+        # 释放 PyTorch 占用的缓存
														
 
															+        torch.cuda.empty_cache()
														
 
															+        # 释放进程间共享内存
														
 
															+        torch.cuda.ipc_collect()
														
 
															+    # 强制进行 Python 层的垃圾回收
														
 
															+    gc.collect()
														
 
															+    logger.info(f"[{datetime.now().strftime('%H:%M:%S')}] 🧹 GPU 显存深度清理完成")
														
 
															+
														
 
															+
														
 
															+def get_ollama_resource():
														
 
															+    result = []
														
 
															+    try:
														
 
															+        # 获取当前运行中的模型列表
														
 
															+        response = ollama_client.ps()
														
 
															+        if not response['models']:
														
 
															+            return result
														
 
															+
														
 
															+        for model in response['models']:
														
 
															+            name = model['name']
														
 
															+            size_vram = model.get('size_vram', 0)
														
 
															+            size = model.get('size', 0)
														
 
															+
														
 
															+            # 计算显存占比
														
 
															+            if size > 0:
														
 
															+                gpu_percentage = (size_vram / size) * 100
														
 
															+            else:
														
 
															+                gpu_percentage = 0
														
 
															+
														
 
															+            # print(f"模型名称: {name}")
														
 
															+            # print(f"总大小: {size / 1024 ** 3:.2f} GB")
														
 
															+            # print(f"显存(VRAM)大小: {size_vram / 1024 ** 3:.2f} GB")
														
 
															+            if gpu_percentage >= 100:
														
 
															+                stat = "🚀 完全运行在 GPU 上"
														
 
															+            elif gpu_percentage > 0:
														
 
															+                stat = f"🌓 混合模式 (GPU 占比 {gpu_percentage:.2f}%)"
														
 
															+            else:
														
 
															+                stat = "🐌 完全运行在 CPU 上"
														
 
															+
														
 
															+            result.append({
														
 
															+                'model_name': name,
														
 
															+                'size': f"{size / 1024 ** 3:.2f} GB",
														
 
															+                'size_vram': f"{size_vram / 1024 ** 3:.2f} GB",
														
 
															+                'gpu_percentage': f"{gpu_percentage:.2f}",
														
 
															+                'stat': stat
														
 
															+            })
														
 
															+    except Exception as e:
														
 
															+        print(f"无法连接到 Ollama 服务: {e}")
														
 
															+    return result
														
--- a/setting.py
+++ b/setting.py
@@ -0,0 +1,11 @@
 
															+import os
														
 
															+from ollama import Client
														
 
															+
														
 
															+# 从环境变量获取地址，默认指向 compose 中的服务名
														
 
															+OLLAMA_URL = os.getenv("OLLAMA_HOST", "http://127.0.0.1:11434")
														
 
															+ollama_client = Client(host=OLLAMA_URL)
														
 
															+
														
 
															+UPLOAD_DIR = "ai_uploads"
														
 
															+OUTPUT_DIR = "ai_outputs"
														
 
															+os.makedirs(UPLOAD_DIR, exist_ok=True)
														
 
															+os.makedirs(OUTPUT_DIR, exist_ok=True)