import logging import time from setting import ollama_client logger = logging.getLogger(__name__) model_name = "qwen2.5:1.5b" def translate2zh(text): start_time = time.time() logger.info(f"⏳ 正在通过 Ollama 调用 {model_name}...") prompt = f"你是一个专业的图像描述翻译官。请将下面这段英文描述翻译成自然、地道的中文,直接输出结果,不要解释:\n{text}" try: # Ollama 会自动处理 4-bit 加载和显存分配 response = ollama_client.chat( model=model_name, messages=[{'role': 'user', 'content': prompt}], # options={ # "num_gpu": 1, # 强制使用 GPU # "temperature": 0.7, # "top_p": 0.9 # } ) duration = time.time() - start_time logger.info(f"✅ 推理成功!耗时 {duration:.2f}秒") return response['message']['content'] except Exception as e: logger.error(f"❌ Ollama 调用失败: {e}") raise e def summarize(content): response = ollama_client.chat( model=model_name, messages=[{'role': 'user', 'content': content}], options={"temperature": 0.3} # 较低的温度使总结更稳定 ) result = response['message']['content'].strip() return { "model_name": model_name, "result": result }