import logging import asyncio import re import time from typing import Optional import service.pyav as pyav import torch from funasr import AutoModel logger = logging.getLogger(__name__) asr_model: Optional[AutoModel] = None is_model_ready = False def get_asr_model(): """获取模型实例的接口""" global asr_model return asr_model def check_ready(): """检查模型是否加载完成""" global is_model_ready return is_model_ready async def init_funasr(): """异步初始化函数""" global asr_model, is_model_ready if is_model_ready: return logger.info("⏳ [ASR] 开始异步加载 funasr 模型...") start_time = time.time() try: # 使用 run_in_executor 避免阻塞主事件循环 loop = asyncio.get_event_loop() # 定义具体的加载逻辑 def load(): return AutoModel( model="paraformer-zh", vad_model="fsmn-vad", vad_kwargs={"max_single_segment_time": 30000}, punc_model="ct-punc", device="cuda:0" if torch.cuda.is_available() else "cpu", disable_update=True ) asr_model = await loop.run_in_executor(None, load) is_model_ready = True logger.info(f"✅ [ASR] 模型加载成功!耗时 {(time.time() - start_time):.2f}s") except Exception as e: logger.error(f"❌ [ASR] 模型加载失败: {e}") is_model_ready = False def get_text(audio_path): start_time = time.time() logger.info("⏳ 开始进行音频识别...") result = asr_model.generate(input=[audio_path], cache={}, batch_size_s=300) logger.info(f"✅ 音频识别完成, 耗时 {(time.time() - start_time):.2f}秒") # 清理文本中的空字符 text = result[0]['text'].replace(" ", "") timestamps = result[0]['timestamp'] return { 'text': text, 'timestamps': timestamps } def generate_srt(audio_path, srt_path): result = get_text(audio_path) text = result['text'] timestamp_list = result['timestamps'] # 使用正则表达式将文本按标点切分为一个 list,保留标点 text_list = re.split(r"([。!?;,])", text) srt_list = pyav.get_precise_srt(text_list, timestamp_list) pyav.save_srt_file(srt_list, srt_path)