pygpu.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. import logging
  2. from datetime import datetime
  3. import gc
  4. import pynvml
  5. import torch
  6. from setting import ollama_client
  7. logger = logging.getLogger(__name__)
  8. def get_gpu_memory_info(device_index=0):
  9. pynvml.nvmlInit()
  10. handle = pynvml.nvmlDeviceGetHandleByIndex(device_index)
  11. info = pynvml.nvmlDeviceGetMemoryInfo(handle)
  12. # print(f"显卡型号: {pynvml.nvmlDeviceGetName(handle)}")
  13. # print(f"总显存: {info.total / 1024 ** 2:.2f} MB")
  14. # print(f"已用显存: {info.used / 1024 ** 2:.2f} MB")
  15. # print(f"空闲显存: {info.free / 1024 ** 2:.2f} MB")
  16. result = {
  17. "model": pynvml.nvmlDeviceGetName(handle),
  18. "mem_total": f"{info.total / 1024 ** 2:.2f} MB",
  19. "mem_used": f"{info.used / 1024 ** 2:.2f} MB",
  20. "mem_free": f"{info.free / 1024 ** 2:.2f} MB",
  21. }
  22. pynvml.nvmlShutdown()
  23. return result
  24. def get_torch_memory_usage():
  25. if torch.cuda.is_available():
  26. # 获取当前设备索引
  27. device = torch.cuda.current_device()
  28. # 显存缓存(PyTorch 预占用的)
  29. reserved = torch.cuda.memory_reserved(device) / 1024 ** 2
  30. # 已经分配给 Tensor 的显存
  31. allocated = torch.cuda.memory_allocated(device) / 1024 ** 2
  32. # reserved 是实际占有的 gpu 内存
  33. # allocated 是 reserved 中实际使用的部分
  34. return {
  35. "reserved": f"{reserved:.2f} MB",
  36. "allocated": f"{allocated:.2f} MB"
  37. }
  38. else:
  39. return {
  40. "message": "CUDA N/A",
  41. }
  42. # --- GPU 显存深度清理函数 ---
  43. def clear_gpu_memory():
  44. if torch.cuda.is_available():
  45. # 释放 PyTorch 占用的缓存
  46. torch.cuda.empty_cache()
  47. # 释放进程间共享内存
  48. torch.cuda.ipc_collect()
  49. # 强制进行 Python 层的垃圾回收
  50. gc.collect()
  51. logger.info(f"[{datetime.now().strftime('%H:%M:%S')}] 🧹 GPU 显存深度清理完成")
  52. def get_ollama_resource():
  53. result = []
  54. try:
  55. # 获取当前运行中的模型列表
  56. response = ollama_client.ps()
  57. if not response['models']:
  58. return result
  59. for model in response['models']:
  60. name = model['name']
  61. size_vram = model.get('size_vram', 0)
  62. size = model.get('size', 0)
  63. # 计算显存占比
  64. if size > 0:
  65. gpu_percentage = (size_vram / size) * 100
  66. else:
  67. gpu_percentage = 0
  68. # print(f"模型名称: {name}")
  69. # print(f"总大小: {size / 1024 ** 3:.2f} GB")
  70. # print(f"显存(VRAM)大小: {size_vram / 1024 ** 3:.2f} GB")
  71. if gpu_percentage >= 100:
  72. stat = "🚀 完全运行在 GPU 上"
  73. elif gpu_percentage > 0:
  74. stat = f"🌓 混合模式 (GPU 占比 {gpu_percentage:.2f}%)"
  75. else:
  76. stat = "🐌 完全运行在 CPU 上"
  77. result.append({
  78. 'model_name': name,
  79. 'size': f"{size / 1024 ** 3:.2f} GB",
  80. 'size_vram': f"{size_vram / 1024 ** 3:.2f} GB",
  81. 'gpu_percentage': f"{gpu_percentage:.2f}",
  82. 'stat': stat
  83. })
  84. except Exception as e:
  85. print(f"无法连接到 Ollama 服务: {e}")
  86. return result