| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- import logging
- import os
- os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
- import time
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
- import torch
- logger = logging.getLogger(__name__)
- model_name = "Qwen/Qwen2.5-1.5B-Instruct"
- start_time = time.time()
- logger.info(f"⏳ 开始加载 {model_name} 模型...")
- try:
- # 1. 定义量化配置
- quantization_config = BitsAndBytesConfig(
- load_in_4bit=True,
- bnb_4bit_compute_dtype=torch.float16, # 1650 显卡建议设为 fp16
- bnb_4bit_quant_type="nf4", # 高精度量化类型
- bnb_4bit_use_double_quant=True # 进一步压缩显存
- )
- # 2. 加载模型
- model = AutoModelForCausalLM.from_pretrained(
- model_name,
- quantization_config=quantization_config, # 使用配置对象
- device_map="auto" # 自动分配到 GPU
- )
- tokenizer = AutoTokenizer.from_pretrained(model_name)
- logger.info(f"✅ {model_name} 模型加载成功!耗时 {(time.time() - start_time):.2f}秒")
- except Exception as e:
- logger.info(f"模型加载失败: {e}")
- logger.info(f"✅ {model_name} 模型加载失败: {e}")
- raise e
- def translate2zh(text):
- # 构建适合 Moondream 场景的 Prompt
- prompt = f"你是一个专业的图像描述翻译官。请将下面这段英文描述翻译成自然、地道的中文,直接输出结果,不要解释:\n{text}"
- messages = [{"role": "user", "content": prompt}]
- input_text = tokenizer.apply_chat_template(
- messages,
- tokenize=False,
- add_generation_prompt=True
- )
- model_inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
- # 推理
- max_tokens = 128
- with torch.no_grad():
- generated_ids = model.generate(
- **model_inputs,
- max_new_tokens=max_tokens,
- do_sample=False # 翻译建议关闭随机性,保证结果稳定
- )
- # 解码
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
- # 提取助手回答的部分
- final_result = response.split("assistant\n")[-1].strip()
- return final_result
- def summarize():
- pass
|