import logging from datetime import datetime import gc import pynvml import torch from setting import ollama_client logger = logging.getLogger(__name__) def get_gpu_memory_info(device_index=0): pynvml.nvmlInit() handle = pynvml.nvmlDeviceGetHandleByIndex(device_index) info = pynvml.nvmlDeviceGetMemoryInfo(handle) # print(f"显卡型号: {pynvml.nvmlDeviceGetName(handle)}") # print(f"总显存: {info.total / 1024 ** 2:.2f} MB") # print(f"已用显存: {info.used / 1024 ** 2:.2f} MB") # print(f"空闲显存: {info.free / 1024 ** 2:.2f} MB") result = { "model": pynvml.nvmlDeviceGetName(handle), "mem_total": f"{info.total / 1024 ** 2:.2f} MB", "mem_used": f"{info.used / 1024 ** 2:.2f} MB", "mem_free": f"{info.free / 1024 ** 2:.2f} MB", } pynvml.nvmlShutdown() return result def get_torch_memory_usage(): if torch.cuda.is_available(): # 获取当前设备索引 device = torch.cuda.current_device() # 显存缓存(PyTorch 预占用的) reserved = torch.cuda.memory_reserved(device) / 1024 ** 2 # 已经分配给 Tensor 的显存 allocated = torch.cuda.memory_allocated(device) / 1024 ** 2 # reserved 是实际占有的 gpu 内存 # allocated 是 reserved 中实际使用的部分 return { "reserved": f"{reserved:.2f} MB", "allocated": f"{allocated:.2f} MB" } else: return { "message": "CUDA N/A", } # --- GPU 显存深度清理函数 --- def clear_gpu_memory(): if torch.cuda.is_available(): # 释放 PyTorch 占用的缓存 torch.cuda.empty_cache() # 释放进程间共享内存 torch.cuda.ipc_collect() # 强制进行 Python 层的垃圾回收 gc.collect() logger.info(f"[{datetime.now().strftime('%H:%M:%S')}] 🧹 GPU 显存深度清理完成") def get_ollama_resource(): result = [] try: # 获取当前运行中的模型列表 response = ollama_client.ps() if not response['models']: return result for model in response['models']: name = model['name'] size_vram = model.get('size_vram', 0) size = model.get('size', 0) # 计算显存占比 if size > 0: gpu_percentage = (size_vram / size) * 100 else: gpu_percentage = 0 # print(f"模型名称: {name}") # print(f"总大小: {size / 1024 ** 3:.2f} GB") # print(f"显存(VRAM)大小: {size_vram / 1024 ** 3:.2f} GB") if gpu_percentage >= 100: stat = "🚀 完全运行在 GPU 上" elif gpu_percentage > 0: stat = f"🌓 混合模式 (GPU 占比 {gpu_percentage:.2f}%)" else: stat = "🐌 完全运行在 CPU 上" result.append({ 'model_name': name, 'size': f"{size / 1024 ** 3:.2f} GB", 'size_vram': f"{size_vram / 1024 ** 3:.2f} GB", 'gpu_percentage': f"{gpu_percentage:.2f}", 'stat': stat }) except Exception as e: print(f"无法连接到 Ollama 服务: {e}") return result