main.py 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. import json
  2. import os
  3. from setting import OUTPUT_DIR
  4. os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
  5. import logging
  6. logging.basicConfig(
  7. level=logging.INFO,
  8. format='%(asctime)s [%(name)s] %(levelname)s: %(message)s'
  9. )
  10. import torch
  11. import service.pyav as pyav
  12. import service.pygpu as pygpu
  13. import service.ai_asr as ai_asr
  14. import service.ai_image_ollama as ai_image
  15. logger = logging.getLogger(__name__)
  16. def process(audio_path, srt_path, video_path):
  17. try:
  18. # 执行识别
  19. ai_asr.generate_srt(audio_path, srt_path)
  20. pyav.generate_video(audio_path, srt_path, video_path)
  21. except torch.OutOfMemoryError:
  22. logger.error(f"❌ 显存溢出!跳过文件: {audio_path}")
  23. pygpu.clear_gpu_memory() # 发生 OOM 后强制清理一次
  24. except Exception as e:
  25. logger.error(f"💥 处理 {audio_path} 时发生未知错误: {e}")
  26. finally:
  27. # 每一个文件处理完都主动清理一次,确保下一个文件有足够的初始空间
  28. pygpu.clear_gpu_memory()
  29. if __name__ == "__main__":
  30. audio = 'abc.mp3'
  31. srt = 'abc.mp3.srt'
  32. video = 'abc.mp3.mp4'
  33. ai_asr.generate_srt(audio, srt)
  34. pyav.generate_video(audio, srt, video)
  35. video = 'abc.mp4'
  36. video_dict = pyav.get_video_info(video)
  37. start_scene_times = pyav.get_scene_times(video, 0.3)
  38. # 按场景分割视频
  39. # pyav.split_video_by_scenes(video, start_scene_times, OUTPUT_DIR)
  40. # 计算的每个场景的抽帧时间点
  41. target_times = pyav.calculate_mid_points(video, start_scene_times)
  42. # 使用 ffmpeg 抽帧
  43. scenes = pyav.extract_frames(video, target_times, OUTPUT_DIR)
  44. # 使用 ai 对抽帧的画面进行识别
  45. scenes_result = ai_image.describe_frame(scenes)
  46. video_dict['scenes'] = scenes_result
  47. print(json.dumps(video_dict, indent=8, ensure_ascii=False))