| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- import json
- import os
- from setting import OUTPUT_DIR
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
- import logging
- logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s [%(name)s] %(levelname)s: %(message)s'
- )
- import torch
- import service.pyav as pyav
- import service.pygpu as pygpu
- import service.ai_asr as ai_asr
- import service.ai_image_ollama as ai_image
- logger = logging.getLogger(__name__)
- def process(audio_path, srt_path, video_path):
- try:
- # 执行识别
- ai_asr.generate_srt(audio_path, srt_path)
- pyav.generate_video(audio_path, srt_path, video_path)
- except torch.OutOfMemoryError:
- logger.error(f"❌ 显存溢出!跳过文件: {audio_path}")
- pygpu.clear_gpu_memory() # 发生 OOM 后强制清理一次
- except Exception as e:
- logger.error(f"💥 处理 {audio_path} 时发生未知错误: {e}")
- finally:
- # 每一个文件处理完都主动清理一次,确保下一个文件有足够的初始空间
- pygpu.clear_gpu_memory()
- if __name__ == "__main__":
- audio = 'abc.mp3'
- srt = 'abc.mp3.srt'
- video = 'abc.mp3.mp4'
- ai_asr.generate_srt(audio, srt)
- pyav.generate_video(audio, srt, video)
- video = 'abc.mp4'
- video_dict = pyav.get_video_info(video)
- start_scene_times = pyav.get_scene_times(video, 0.3)
- # 按场景分割视频
- # pyav.split_video_by_scenes(video, start_scene_times, OUTPUT_DIR)
- # 计算的每个场景的抽帧时间点
- target_times = pyav.calculate_mid_points(video, start_scene_times)
- # 使用 ffmpeg 抽帧
- scenes = pyav.extract_frames(video, target_times, OUTPUT_DIR)
- # 使用 ai 对抽帧的画面进行识别
- scenes_result = ai_image.describe_frame(scenes)
- video_dict['scenes'] = scenes_result
- print(json.dumps(video_dict, indent=8, ensure_ascii=False))
|