| 1234567891011121314151617181920212223242526272829303132333435363738394041424344 |
- import logging
- import time
- from setting import ollama_client
- logger = logging.getLogger(__name__)
- model_name = "qwen2.5:1.5b"
- def translate2zh(text):
- start_time = time.time()
- logger.info(f"⏳ 正在通过 Ollama 调用 {model_name}...")
- prompt = f"你是一个专业的图像描述翻译官。请将下面这段英文描述翻译成自然、地道的中文,直接输出结果,不要解释:\n{text}"
- try:
- # Ollama 会自动处理 4-bit 加载和显存分配
- response = ollama_client.chat(
- model=model_name,
- messages=[{'role': 'user', 'content': prompt}],
- # options={
- # "num_gpu": 1, # 强制使用 GPU
- # "temperature": 0.7,
- # "top_p": 0.9
- # }
- )
- duration = time.time() - start_time
- logger.info(f"✅ 推理成功!耗时 {duration:.2f}秒")
- return response['message']['content']
- except Exception as e:
- logger.error(f"❌ Ollama 调用失败: {e}")
- raise e
- def summarize(content):
- response = ollama_client.chat(
- model=model_name,
- messages=[{'role': 'user', 'content': content}],
- options={"temperature": 0.3} # 较低的温度使总结更稳定
- )
- result = response['message']['content'].strip()
- return {
- "model_name": model_name,
- "result": result
- }
|