| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- import logging
- import asyncio
- import re
- import time
- from typing import Optional
- import service.pyav as pyav
- import torch
- from funasr import AutoModel
- logger = logging.getLogger(__name__)
- asr_model: Optional[AutoModel] = None
- is_model_ready = False
- def get_asr_model():
- """获取模型实例的接口"""
- global asr_model
- return asr_model
- def check_ready():
- """检查模型是否加载完成"""
- global is_model_ready
- return is_model_ready
- async def init_funasr():
- """异步初始化函数"""
- global asr_model, is_model_ready
- if is_model_ready:
- return
- logger.info("⏳ [ASR] 开始异步加载 funasr 模型...")
- start_time = time.time()
- try:
- # 使用 run_in_executor 避免阻塞主事件循环
- loop = asyncio.get_event_loop()
- # 定义具体的加载逻辑
- def load():
- return AutoModel(
- model="paraformer-zh",
- vad_model="fsmn-vad",
- vad_kwargs={"max_single_segment_time": 30000},
- punc_model="ct-punc",
- device="cuda:0" if torch.cuda.is_available() else "cpu",
- disable_update=True
- )
- asr_model = await loop.run_in_executor(None, load)
- is_model_ready = True
- logger.info(f"✅ [ASR] 模型加载成功!耗时 {(time.time() - start_time):.2f}s")
- except Exception as e:
- logger.error(f"❌ [ASR] 模型加载失败: {e}")
- is_model_ready = False
- def get_text(audio_path):
- start_time = time.time()
- logger.info("⏳ 开始进行音频识别...")
- result = asr_model.generate(input=[audio_path], cache={}, batch_size_s=300)
- logger.info(f"✅ 音频识别完成, 耗时 {(time.time() - start_time):.2f}秒")
- # 清理文本中的空字符
- text = result[0]['text'].replace(" ", "")
- timestamps = result[0]['timestamp']
- return {
- 'text': text,
- 'timestamps': timestamps
- }
- def generate_srt(audio_path, srt_path):
- result = get_text(audio_path)
- text = result['text']
- timestamp_list = result['timestamps']
- # 使用正则表达式将文本按标点切分为一个 list,保留标点
- text_list = re.split(r"([。!?;,])", text)
- srt_list = pyav.get_precise_srt(text_list, timestamp_list)
- pyav.save_srt_file(srt_list, srt_path)
|