Explorar el Código

处理机器和应用的监控

reghao hace 4 años
padre
commit
4ae2035ee5
Se han modificado 22 ficheros con 514 adiciones y 184 borrados
  1. 4 3
      dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/db/crud/MachineHostCrudService.java
  2. 21 3
      dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/db/crud/MachineInfoCrudService.java
  3. 36 0
      dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/db/query/MachineHostQuery.java
  4. 13 0
      dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/db/query/MachineInfoQuery.java
  5. 12 3
      dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/entity/po/MachineHost.java
  6. 0 35
      dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/entity/po/MachineStat.java
  7. 3 1
      dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/repository/MachineHostRepository.java
  8. 39 22
      dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/service/DagentOpsService.java
  9. 4 4
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/controller/MonitorController.java
  10. 7 8
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/db/MonitorJobCrudService.java
  11. 15 0
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/entity/JobId.java
  12. 9 0
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/entity/JobType.java
  13. 10 1
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/entity/MonitorJob.java
  14. 106 0
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/MonitorJobData.java
  15. 5 4
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/MonitorListService.java
  16. 14 10
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/MonitorScheduler.java
  17. 4 3
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/MonitorService.java
  18. 45 29
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/MonitorServiceImpl.java
  19. 8 2
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/job/AppHealthCheckJob.java
  20. 0 56
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/job/MachineHeartbeatCheckJob.java
  21. 132 0
      dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/job/MachineStatCheckJob.java
  22. 27 0
      dmaster/src/test/java/cn/reghao/autodop/dmaster/machine/db/query/MachineHostQueryTest.java

+ 4 - 3
dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/db/crud/MachineHostCrudService.java

@@ -1,6 +1,7 @@
 package cn.reghao.autodop.dmaster.machine.db.crud;
 
 import cn.reghao.autodop.dmaster.machine.entity.po.MachineHost;
+import cn.reghao.autodop.dmaster.machine.entity.po.info.MachineInfo;
 import cn.reghao.autodop.dmaster.machine.repository.MachineHostRepository;
 import org.springframework.cache.annotation.CacheConfig;
 import org.springframework.stereotype.Service;
@@ -33,7 +34,7 @@ public class MachineHostCrudService {
         return optional.orElse(null);
     }
 
-    /*public MachineHost selectByUk(String machineId) {
-        return hostRepository.findByMachineId(machineId);
-    }*/
+    public MachineHost selectByUk(MachineInfo machineInfo) {
+        return hostRepository.findByMachineInfo(machineInfo);
+    }
 }

+ 21 - 3
dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/db/crud/MachineInfoCrudService.java

@@ -2,9 +2,13 @@ package cn.reghao.autodop.dmaster.machine.db.crud;
 
 import cn.reghao.autodop.dmaster.machine.entity.po.MachineHost;
 import cn.reghao.autodop.dmaster.machine.entity.po.info.MachineInfo;
+import cn.reghao.autodop.dmaster.machine.repository.MachineHostRepository;
 import cn.reghao.autodop.dmaster.machine.repository.MachineInfoRepository;
+import cn.reghao.autodop.dmaster.monitor.db.MonitorJobCrudService;
+import cn.reghao.autodop.dmaster.monitor.entity.JobType;
+import cn.reghao.autodop.dmaster.monitor.entity.MonitorJob;
 import cn.reghao.autodop.dmaster.monitor.service.MonitorService;
-import cn.reghao.autodop.dmaster.monitor.service.job.MachineHeartbeatCheckJob;
+import cn.reghao.autodop.dmaster.monitor.service.job.MachineStatCheckJob;
 import org.springframework.cache.annotation.CacheConfig;
 import org.springframework.data.domain.Page;
 import org.springframework.data.domain.Pageable;
@@ -20,21 +24,35 @@ import java.util.Optional;
 @Service
 public class MachineInfoCrudService {
     private MachineInfoRepository infoRepository;
+    private MachineHostRepository hostRepository;
     private MachineStatCrudService statCrudService;
     private MonitorService monitorService;
+    private MonitorJobCrudService jobCrudService;
 
     public MachineInfoCrudService(MachineInfoRepository infoRepository,
+                                  MachineHostRepository hostRepository,
                                   MachineStatCrudService statCrudService,
                                   MonitorService monitorService) {
         this.infoRepository = infoRepository;
+        this.hostRepository = hostRepository;
         this.statCrudService = statCrudService;
         this.monitorService = monitorService;
     }
 
     public void insertOrUpdate(MachineInfo machineInfo) {
         MachineInfo infoEntity = infoRepository.save(machineInfo);
-        MachineHost machineHost = new MachineHost(infoEntity);
+        MachineHost machineHost = hostRepository.findByMachineInfo(infoEntity);
+        if (machineHost == null) {
+            machineHost = new MachineHost(infoEntity);
+            hostRepository.save(machineHost);
+        }
+    }
 
+    private void addMonitorJob(String machineId) {
+        String jobClassName = MachineStatCheckJob.class.getSimpleName();
+        String jobId = String.format("%s-%s", machineId, jobClassName);
+        MonitorJob monitorJob = new MonitorJob(jobId, jobClassName, JobType.machine.name());
+        jobCrudService.insertOrUpdate(monitorJob);
     }
 
     public void delete(MachineInfo machineInfo) {
@@ -42,7 +60,7 @@ public class MachineInfoCrudService {
         String machineId = machineInfo.getMachineId();
         statCrudService.deleteByUk(machineId);
 
-        String jobClassName = MachineHeartbeatCheckJob.class.getSimpleName();
+        String jobClassName = MachineStatCheckJob.class.getSimpleName();
         String jobId = String.format("%s-%s", machineId, jobClassName);
         monitorService.deleteJob(jobId);
         infoRepository.delete(machineInfo);

+ 36 - 0
dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/db/query/MachineHostQuery.java

@@ -0,0 +1,36 @@
+package cn.reghao.autodop.dmaster.machine.db.query;
+
+import cn.reghao.autodop.dmaster.machine.entity.po.MachineHost;
+import cn.reghao.autodop.dmaster.machine.entity.po.info.MachineInfo;
+import cn.reghao.autodop.dmaster.machine.repository.MachineHostRepository;
+import org.springframework.data.jpa.domain.Specification;
+import org.springframework.stereotype.Service;
+
+import javax.persistence.criteria.Join;
+import javax.persistence.criteria.JoinType;
+import javax.persistence.criteria.Predicate;
+import java.util.List;
+
+/**
+ * @author reghao
+ * @date 2021-06-17 22:52:32
+ */
+@Service
+public class MachineHostQuery {
+    private MachineHostRepository hostRepository;
+
+    public MachineHostQuery(MachineHostRepository hostRepository) {
+        this.hostRepository = hostRepository;
+    }
+
+    public MachineHost query(String machineId) {
+        Specification<MachineHost> specification = (root, query, cb) -> {
+            Join<MachineHost, MachineInfo> innerJoin = root.join("machineInfo", JoinType.INNER);
+            Predicate predicate = cb.equal(innerJoin.get("machineId"), machineId);
+            //
+            return cb.and(predicate);
+        };
+        List<MachineHost> list = hostRepository.findAll(specification);
+        return list.isEmpty() ? null : list.get(0);
+    }
+}

+ 13 - 0
dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/db/query/MachineInfoQuery.java

@@ -2,8 +2,11 @@ package cn.reghao.autodop.dmaster.machine.db.query;
 
 import cn.reghao.autodop.dmaster.machine.entity.po.info.MachineInfo;
 import cn.reghao.autodop.dmaster.machine.repository.MachineInfoRepository;
+import org.springframework.data.jpa.domain.Specification;
 import org.springframework.stereotype.Service;
 
+import javax.persistence.criteria.Predicate;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
@@ -22,4 +25,14 @@ public class MachineInfoQuery {
     public List<MachineInfo> queryAll(Map<String, String> kv) {
         return infoRepository.findAll();
     }
+
+    public MachineInfo query(String machineId) {
+        Specification<MachineInfo> specification = (root, query, cb) -> {
+            Predicate predicate = cb.equal(root.get("machineId"), machineId);
+            // select * from app_building where app_id like '%test%' and app_name like '%测试%'
+            return cb.and(predicate);
+        };
+
+        return null;
+    }
 }

+ 12 - 3
dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/entity/po/MachineHost.java

@@ -1,13 +1,17 @@
 package cn.reghao.autodop.dmaster.machine.entity.po;
 
+import cn.reghao.autodop.dmaster.app.constant.EnvType;
 import cn.reghao.autodop.dmaster.common.orm.BaseEntity;
 import cn.reghao.autodop.dmaster.machine.entity.po.info.MachineInfo;
+import cn.reghao.autodop.dmaster.notification.entity.NotifyGroup;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.NoArgsConstructor;
+import org.hibernate.annotations.LazyCollection;
+import org.hibernate.annotations.LazyCollectionOption;
 
-import javax.persistence.Entity;
-import javax.persistence.OneToOne;
+import javax.persistence.*;
+import java.util.List;
 
 /**
  * @author reghao
@@ -21,10 +25,15 @@ public class MachineHost extends BaseEntity<Integer> {
     @OneToOne(optional = false)
     private MachineInfo machineInfo;
     private String env;
-    private String status;
+    @ManyToMany(cascade = CascadeType.REFRESH)
+    @JoinColumn(name = "notify_group_id", foreignKey = @ForeignKey(value = ConstraintMode.NO_CONSTRAINT))
+    @LazyCollection(LazyCollectionOption.FALSE)
+    private List<NotifyGroup> notifyGroups;
+
     // TODO 根据机器所属的地区,机房等维度来分组
 
     public MachineHost(MachineInfo machineInfo) {
         this.machineInfo = machineInfo;
+        this.env = EnvType.test.name();
     }
 }

+ 0 - 35
dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/entity/po/MachineStat.java

@@ -9,7 +9,6 @@ import cn.reghao.autodop.common.dagent.machine.network.TcpState;
 import cn.reghao.autodop.common.dagent.machine.os.OsStat;
 import cn.reghao.autodop.common.utils.ByteConverter;
 import cn.reghao.autodop.common.utils.ByteType;
-import cn.reghao.autodop.common.utils.PercentCalculator;
 import cn.reghao.autodop.dmaster.common.orm.BaseDocument;
 import cn.reghao.autodop.dmaster.machine.entity.vo.DiskUsage;
 import cn.reghao.autodop.dmaster.machine.entity.vo.MemoryUsage;
@@ -68,44 +67,10 @@ public class MachineStat extends BaseDocument {
         return memoryUsage;
     }
 
-    public void memoryUsageAlert() {
-        long total = memoryInfo.getTotal();
-        long avail = memoryInfo.getAvailable();
-
-        ByteConverter converter = new ByteConverter();
-        String availSize = converter.convertStr(ByteType.KiB, ByteType.MiB, avail);
-
-        double value = PercentCalculator.percentValue(avail, total);
-        String percent = PercentCalculator.percent(value);
-        double minimalPercent = 0.20;
-        if (value < minimalPercent) {
-            // TODO 发出告警通知
-            log.info("{} 上可用的内存仅占内存的 {},共计 {}", machineId, percent, availSize);
-        }
-    }
-
     public List<DiskUsage> getDiskUsages() {
         return diskInfos.stream().map(DiskUsage::new).collect(Collectors.toList());
     }
 
-    public void diskUsageAlert() {
-        long total = 0, avail = 0;
-        for (DiskInfo diskInfo : diskInfos) {
-            total += diskInfo.getSize();
-            avail += diskInfo.getAvail();
-        }
-
-        ByteConverter converter = new ByteConverter();
-        String availSize = converter.convert(ByteType.Bytes, ByteType.MiB, avail) + ByteType.MiB.name();
-        double value = PercentCalculator.percentValue(avail, total);
-        String percent = PercentCalculator.percent(value);
-        double minimalPercent = 0.20;
-        if (value < minimalPercent) {
-            // TODO 发出告警通知
-            log.info("{} 上可用的磁盘空间仅占总磁盘空间的 {},共计 {}", machineId, percent, availSize);
-        }
-    }
-
     public Integer getTcpConnNum() {
         List<TcpConnStat> list = networkStat.getTcpConnStats().stream()
                 .filter(tcpConnStat -> tcpConnStat.getTcpState().equals(TcpState.TCP_ESTABLISHED.name()))

+ 3 - 1
dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/repository/MachineHostRepository.java

@@ -5,12 +5,14 @@ import cn.reghao.autodop.dmaster.machine.entity.po.info.MachineInfo;
 import org.springframework.data.domain.Page;
 import org.springframework.data.domain.Pageable;
 import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.data.jpa.repository.JpaSpecificationExecutor;
 
 /**
  * @author reghao
  * @date 2020-01-21 14:53:03
  */
-public interface MachineHostRepository extends JpaRepository<MachineHost, Integer> {
+public interface MachineHostRepository
+        extends JpaRepository<MachineHost, Integer>, JpaSpecificationExecutor<MachineHost> {
     MachineHost findByMachineInfo(MachineInfo machineInfo);
     Page<MachineHost> findByIsDeleteFalse(Pageable pageable);
 }

+ 39 - 22
dmaster/src/main/java/cn/reghao/autodop/dmaster/machine/service/DagentOpsService.java

@@ -3,18 +3,23 @@ package cn.reghao.autodop.dmaster.machine.service;
 import cn.reghao.autodop.dmaster.machine.db.crud.MachineHostCrudService;
 import cn.reghao.autodop.dmaster.machine.db.crud.MachineInfoCrudService;
 import cn.reghao.autodop.dmaster.machine.db.crud.MachineStatCrudService;
+import cn.reghao.autodop.dmaster.machine.entity.po.MachineHost;
 import cn.reghao.autodop.dmaster.machine.entity.po.MachineStat;
 import cn.reghao.autodop.dmaster.machine.entity.po.info.MachineInfo;
+import cn.reghao.autodop.dmaster.monitor.db.MonitorJobCrudService;
+import cn.reghao.autodop.dmaster.monitor.entity.JobId;
+import cn.reghao.autodop.dmaster.monitor.entity.JobType;
 import cn.reghao.autodop.dmaster.monitor.entity.MonitorJob;
+import cn.reghao.autodop.dmaster.monitor.service.MonitorJobData;
 import cn.reghao.autodop.dmaster.monitor.service.MonitorService;
-import cn.reghao.autodop.dmaster.monitor.service.job.MachineHeartbeatCheckJob;
+import cn.reghao.autodop.dmaster.notification.entity.NotifyGroup;
 import cn.reghao.autodop.dmaster.sys.db.AppRuntimeLogCrudService;
 import cn.reghao.autodop.dmaster.sys.entity.AppRuntimeLog;
 import lombok.extern.slf4j.Slf4j;
-import org.quartz.JobDataMap;
 import org.springframework.stereotype.Service;
 
 import java.time.LocalDateTime;
+import java.util.List;
 
 /**
  * DagentOps 实现
@@ -25,22 +30,28 @@ import java.time.LocalDateTime;
 @Slf4j
 @Service
 public class DagentOpsService {
-    private MachineHostCrudService hostCrudService;
     private MachineInfoCrudService infoCrudService;
+    private MachineHostCrudService hostCrudService;
     private MachineStatCrudService statCrudService;
     private AppRuntimeLogCrudService logCrudService;
     private MonitorService monitorService;
+    private MonitorJobData monitorJobData;
+    private MonitorJobCrudService jobCrudService;
 
-    public DagentOpsService(MachineHostCrudService hostCrudService,
-                            MachineInfoCrudService infoCrudService,
+    public DagentOpsService(MachineInfoCrudService infoCrudService,
+                            MachineHostCrudService hostCrudService,
                             MachineStatCrudService statCrudService,
                             AppRuntimeLogCrudService logCrudService,
-                            MonitorService monitorService) {
-        this.hostCrudService = hostCrudService;
+                            MonitorService monitorService,
+                            MonitorJobData monitorJobData,
+                            MonitorJobCrudService jobCrudService) {
         this.infoCrudService = infoCrudService;
+        this.hostCrudService = hostCrudService;
         this.statCrudService = statCrudService;
         this.logCrudService = logCrudService;
         this.monitorService = monitorService;
+        this.monitorJobData = monitorJobData;
+        this.jobCrudService = jobCrudService;
     }
 
     public void start(MachineInfo machineInfo) {
@@ -51,34 +62,40 @@ public class DagentOpsService {
             machineInfo.setCreateTime(infoEntity.getCreateTime());
             machineInfo.setUpdateTime(LocalDateTime.now());
             infoCrudService.insertOrUpdate(machineInfo);
+
+            MachineHost machineHost = hostCrudService.selectByUk(infoEntity);
+            List<NotifyGroup> notifyGroups = machineHost.getNotifyGroups();
+            if (!notifyGroups.isEmpty()) {
+                String jobId = monitorJobData.machineMonitorJobId(machineId).getJobId();
+                MonitorJob monitorJob = jobCrudService.selectByUk(jobId);
+                try {
+                    monitorService.startJob(monitorJob);
+                } catch (Exception e) {
+                    e.printStackTrace();
+                    log.error("任务启动失败 {}", e.getMessage());
+                }
+            }
         } else {
             // 先存储数据,然后再添加监控任务,若任务添加失败则回滚
             infoCrudService.insertOrUpdate(machineInfo);
+            addMonitorJob(machineId);
         }
-        addOrStartMonitorJob(machineId);
     }
 
-    private void addOrStartMonitorJob(String machineId) {
-        String jobClassName = MachineHeartbeatCheckJob.class.getSimpleName();
-        String jobId = String.format("%s-%s", machineId, jobClassName);
-
-        JobDataMap jobDataMap = new JobDataMap();
-
-        MonitorJob monitorJob = new MonitorJob(jobId, jobClassName);
-        try {
-            monitorService.addOrModifyJob(monitorJob, jobDataMap);
-        } catch (Exception e) {
-            log.error("添加任务失败 {}", e.getMessage());
-        }
+    private void addMonitorJob(String machineId) {
+        JobId jobId = monitorJobData.machineMonitorJobId(machineId);
+        MonitorJob monitorJob = new MonitorJob(jobId.getJobId(), jobId.getJobClassName(), JobType.machine.name());
+        jobCrudService.insertOrUpdate(monitorJob);
     }
 
     public void heartbeat(MachineStat machineStat) {
-        // TODO 检测内存和磁盘使用量
         statCrudService.insertOrUpdate(machineStat);
     }
 
     public void shutdown(MachineStat machineStat) {
-        // TODO 停止心跳检测任务
+        String machineId = machineStat.getMachineId();
+        JobId jobId = monitorJobData.machineMonitorJobId(machineId);
+        monitorService.pauseJob(jobId.getJobId());
         statCrudService.insertOrUpdate(machineStat);
     }
 

+ 4 - 4
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/controller/MonitorController.java

@@ -41,7 +41,7 @@ public class MonitorController {
     @ApiOperation(value = "编辑监控任务")
     @PostMapping(value = "/job", produces = MediaType.APPLICATION_JSON_VALUE)
     public ResponseEntity<String> editMonitorJob(@Valid MonitorJob monitorJob) throws Exception {
-        monitorService.addOrModifyJob(monitorJob, null);
+        monitorService.rescheduleJob(monitorJob);
         return ResponseEntity.ok().body(WebBody.success());
     }
 
@@ -49,10 +49,10 @@ public class MonitorController {
     @PostMapping(value = "/job/{jobId}/{enable}", produces = MediaType.APPLICATION_JSON_VALUE)
     public ResponseEntity<String> monitorJobStatus(@PathVariable("jobId") String jobId,
                                                    @PathVariable("enable") Boolean enable) {
-        if (enable) {
-            monitorService.startJob(jobId);
-        } else {
+        if (!enable) {
             monitorService.pauseJob(jobId);
+        } else {
+            monitorService.resumeJob(jobId);
         }
         return ResponseEntity.ok().body(WebBody.success());
     }

+ 7 - 8
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/db/MonitorJobCrudService.java

@@ -10,22 +10,21 @@ import org.springframework.stereotype.Service;
  */
 @Service
 public class MonitorJobCrudService {
-    private MonitorJobRepository monitorJobRepository;
+    private MonitorJobRepository jobRepository;
 
-    public MonitorJobCrudService(MonitorJobRepository monitorJobRepository) {
-        this.monitorJobRepository = monitorJobRepository;
+    public MonitorJobCrudService(MonitorJobRepository jobRepository) {
+        this.jobRepository = jobRepository;
     }
 
-    public void insert(MonitorJob monitorJob) {
-    }
-
-    public void update(MonitorJob monitorJob) {
+    public void insertOrUpdate(MonitorJob monitorJob) {
+        jobRepository.save(monitorJob);
     }
 
     public void delete(MonitorJob monitorJob) {
+        jobRepository.delete(monitorJob);
     }
 
     public MonitorJob selectByUk(String jobId) {
-        return monitorJobRepository.findByJobId(jobId);
+        return jobRepository.findByJobId(jobId);
     }
 }

+ 15 - 0
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/entity/JobId.java

@@ -0,0 +1,15 @@
+package cn.reghao.autodop.dmaster.monitor.entity;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+
+/**
+ * @author reghao
+ * @date 2021-07-06 17:29:26
+ */
+@AllArgsConstructor
+@Data
+public class JobId {
+    private String jobId;
+    private String jobClassName;
+}

+ 9 - 0
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/entity/JobType.java

@@ -0,0 +1,9 @@
+package cn.reghao.autodop.dmaster.monitor.entity;
+
+/**
+ * @author reghao
+ * @date 2021-07-06 09:49:45
+ */
+public enum JobType {
+    machine,app
+}

+ 10 - 1
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/entity/MonitorJob.java

@@ -24,14 +24,23 @@ public class MonitorJob extends BaseEntity<Integer> {
     private String jobId;
     @NotBlank(message = "任务类名不能为空白字符串")
     private String jobClassName;
+    @NotBlank(message = "任务类型不能为空白字符串")
+    private String jobType;
     @NotBlank(message = "CRON 表达式不能为空白字符串")
     private String cronExp;
     private Boolean enable;
+    // 已通知次数
+    private Integer notifyCount;
+    // 最大通知次数
+    private Integer maxNotifyCount;
 
-    public MonitorJob(String jobId, String jobClassName) {
+    public MonitorJob(String jobId, String jobClassName, String jobType) {
         this.jobId = jobId;
         this.jobClassName = jobClassName;
+        this.jobType = jobType;
         this.cronExp = "0/10 * * * * ?";
         this.enable = false;
+        this.notifyCount = 0;
+        this.maxNotifyCount = 3;
     }
 }

+ 106 - 0
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/MonitorJobData.java

@@ -0,0 +1,106 @@
+package cn.reghao.autodop.dmaster.monitor.service;
+
+import cn.reghao.autodop.common.http.DefaultWebRequest;
+import cn.reghao.autodop.dmaster.app.db.query.config.AppQuery;
+import cn.reghao.autodop.dmaster.app.repository.AppRunningRepository;
+import cn.reghao.autodop.dmaster.machine.db.crud.MachineStatCrudService;
+import cn.reghao.autodop.dmaster.machine.db.query.MachineHostQuery;
+import cn.reghao.autodop.dmaster.monitor.db.MonitorJobCrudService;
+import cn.reghao.autodop.dmaster.monitor.entity.JobId;
+import cn.reghao.autodop.dmaster.monitor.entity.JobType;
+import cn.reghao.autodop.dmaster.monitor.entity.MonitorJob;
+import cn.reghao.autodop.dmaster.monitor.service.job.AppHealthCheckJob;
+import cn.reghao.autodop.dmaster.monitor.service.job.MachineStatCheckJob;
+import cn.reghao.autodop.dmaster.notification.service.NotifyService;
+import lombok.Data;
+import lombok.extern.slf4j.Slf4j;
+import org.quartz.JobDataMap;
+import org.springframework.stereotype.Service;
+
+/**
+ * 监控任务需要的数据
+ *
+ * @author reghao
+ * @date 2021-07-06 09:25:50
+ */
+@Slf4j
+@Service
+public class MonitorJobData {
+    private MonitorJobCrudService jobCrudService;
+    private NotifyService notifyService;
+    private MachineStatCrudService statCrudService;
+    private MachineHostQuery hostQuery;
+    private AppRunningRepository runningRepository;
+    private AppQuery appQuery;
+
+    public MonitorJobData(MonitorJobCrudService jobCrudService,
+                          NotifyService notifyService,
+                          MachineStatCrudService statCrudService,
+                          MachineHostQuery hostQuery,
+                          AppRunningRepository runningRepository,
+                          AppQuery appQuery) {
+        this.jobCrudService = jobCrudService;
+        this.notifyService = notifyService;
+        this.statCrudService = statCrudService;
+        this.hostQuery = hostQuery;
+        this.runningRepository = runningRepository;
+        this.appQuery = appQuery;
+    }
+
+    public JobId machineMonitorJobId(String machineId) {
+        String jobClassName = MachineStatCheckJob.class.getSimpleName();
+        String jobId = String.format("%s-%s", machineId, jobClassName);
+        return new JobId(jobId, jobClassName);
+    }
+
+    public JobId appMonitorJobId(String appId, String machineId) {
+        String jobClassName = AppHealthCheckJob.class.getSimpleName();
+        String jobId = String.format("%s-%s-%s", appId, machineId, jobClassName);
+        return new JobId(jobId, jobClassName);
+    }
+
+    public JobDataMap jobDataMap(MonitorJob monitorJob) {
+        String jobId = monitorJob.getJobId();
+        String jobType = monitorJob.getJobType();
+        JobDataMap jobDataMap;
+        switch (JobType.valueOf(jobType)) {
+            case machine:
+                jobDataMap = machineDataMap(jobId);
+                break;
+            case app:
+                jobDataMap = appDataMap(jobId);
+                break;
+            default:
+                jobDataMap = null;
+        }
+        return jobDataMap;
+    }
+
+    private JobDataMap machineDataMap(String jobId) {
+        String machineId = jobId.split("-")[0];
+
+        JobDataMap jobDataMap = new JobDataMap();
+        jobDataMap.put("notifyService", notifyService);
+        jobDataMap.put("jobCrudService", jobCrudService);
+        jobDataMap.put("statCrudService", statCrudService);
+        jobDataMap.put("hostQuery", hostQuery);
+        jobDataMap.put("machineId", machineId);
+        return jobDataMap;
+    }
+
+    private JobDataMap appDataMap(String jobId) {
+        String[] arr = jobId.split("-");
+        String appId = arr[0];
+        String machineId = arr[1];
+
+        JobDataMap jobDataMap = new JobDataMap();
+        jobDataMap.put("notifyService", notifyService);
+        jobDataMap.put("jobCrudService", jobCrudService);
+        jobDataMap.put("runningRepository", runningRepository);
+        jobDataMap.put("appQuery", appQuery);
+        jobDataMap.put("webRequest", new DefaultWebRequest());
+        jobDataMap.put("appId", appId);
+        jobDataMap.put("machineId", machineId);
+        return jobDataMap;
+    }
+}

+ 5 - 4
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/MonitorListService.java

@@ -4,10 +4,11 @@ import cn.reghao.autodop.dmaster.app.entity.AppRunning;
 import cn.reghao.autodop.dmaster.app.repository.AppRunningRepository;
 import cn.reghao.autodop.dmaster.machine.entity.po.info.MachineInfo;
 import cn.reghao.autodop.dmaster.machine.repository.MachineInfoRepository;
+import cn.reghao.autodop.dmaster.monitor.entity.JobType;
 import cn.reghao.autodop.dmaster.monitor.entity.MonitorJob;
 import cn.reghao.autodop.dmaster.monitor.repository.MonitorJobRepository;
 import cn.reghao.autodop.dmaster.monitor.service.job.AppHealthCheckJob;
-import cn.reghao.autodop.dmaster.monitor.service.job.MachineHeartbeatCheckJob;
+import cn.reghao.autodop.dmaster.monitor.service.job.MachineStatCheckJob;
 import org.springframework.stereotype.Service;
 
 import java.util.List;
@@ -41,12 +42,12 @@ public class MonitorListService {
         List<MonitorJob> monitorJobs = machineInfos.stream()
                 .map(machineInfo -> {
                     String machineId = machineInfo.getMachineId();
-                    String jobClassName = MachineHeartbeatCheckJob.class.getSimpleName();
+                    String jobClassName = MachineStatCheckJob.class.getSimpleName();
                     String jobId = String.format("%s-%s", machineId, jobClassName);
 
                     MonitorJob monitorJob = monitorJobRepository.findByJobId(jobId);
                     if (monitorJob == null) {
-                        monitorJob = new MonitorJob(jobId, jobClassName);
+                        monitorJob = new MonitorJob(jobId, jobClassName, JobType.machine.name());
                     }
                     return monitorJob;
                 })
@@ -65,7 +66,7 @@ public class MonitorListService {
 
                     MonitorJob monitorJob = monitorJobRepository.findByJobId(jobId);
                     if (monitorJob == null) {
-                        monitorJob = new MonitorJob(jobId, jobClassName);
+                        monitorJob = new MonitorJob(jobId, jobClassName, JobType.app.name());
                     }
                     return monitorJob;
                 })

+ 14 - 10
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/MonitorScheduler.java

@@ -2,7 +2,6 @@ package cn.reghao.autodop.dmaster.monitor.service;
 
 import cn.reghao.autodop.dmaster.monitor.entity.MonitorJob;
 import cn.reghao.autodop.dmaster.monitor.repository.MonitorJobRepository;
-import cn.reghao.autodop.dmaster.notification.entity.NotifyGroup;
 import cn.reghao.autodop.dmaster.utils.clazz.PackageScanner;
 import lombok.extern.slf4j.Slf4j;
 import org.quartz.*;
@@ -25,11 +24,14 @@ public class MonitorScheduler {
     private Scheduler scheduler;
     private MonitorJobRepository monitorJobRepository;
     private PackageScanner packageScanner;
+    private MonitorJobData monitorJobData;
 
-    public MonitorScheduler(MonitorJobRepository monitorJobRepository) throws SchedulerException {
+    public MonitorScheduler(MonitorJobRepository monitorJobRepository, MonitorJobData monitorJobData)
+            throws SchedulerException {
         this.scheduler = StdSchedulerFactory.getDefaultScheduler();
         this.monitorJobRepository = monitorJobRepository;
         this.packageScanner = new PackageScanner();
+        this.monitorJobData = monitorJobData;
     }
 
     /**
@@ -41,18 +43,20 @@ public class MonitorScheduler {
      */
     @PostConstruct
     public void startScheduler() throws SchedulerException, IOException {
+        startAllJobs();
+        scheduler.start();
+    }
+
+    private void startAllJobs() throws SchedulerException, IOException {
         List<MonitorJob> jobs = monitorJobRepository.findAll();
         for (MonitorJob job : jobs) {
             if (job.getEnable()) {
-
+                JobDataMap jobDataMap = monitorJobData.jobDataMap(job);
+                if (jobDataMap != null) {
+                    addJob(job, jobDataMap);
+                }
             }
         }
-        scheduler.start();
-    }
-
-    public void updateNotifyGroup(String jobId, List<NotifyGroup> notifyGroups) throws SchedulerException {
-        JobDetail jobDetail = scheduler.getJobDetail(JobKey.jobKey(jobId));
-        JobDataMap jobDataMap = jobDetail.getJobDataMap();
     }
 
     public void addJob(MonitorJob monitorJob, JobDataMap jobDataMap) throws IOException, SchedulerException {
@@ -79,7 +83,7 @@ public class MonitorScheduler {
         }
     }
 
-    public void modifyJob(String jobId, String cronExp) throws SchedulerException {
+    public void rescheduleJob(String jobId, String cronExp) throws SchedulerException {
         TriggerKey triggerKey = TriggerKey.triggerKey(jobId);
         CronTrigger cronTrigger = TriggerBuilder.newTrigger()
                 .withIdentity(jobId)

+ 4 - 3
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/MonitorService.java

@@ -1,7 +1,7 @@
 package cn.reghao.autodop.dmaster.monitor.service;
 
 import cn.reghao.autodop.dmaster.monitor.entity.MonitorJob;
-import org.quartz.JobDataMap;
+import org.quartz.SchedulerException;
 
 /**
  * 监控任务管理接口
@@ -10,8 +10,9 @@ import org.quartz.JobDataMap;
  * @date 2020-10-22 17:51:56
  */
 public interface MonitorService {
-    void addOrModifyJob(MonitorJob monitorJob, JobDataMap jobDataMap) throws Exception;
+    void startJob(MonitorJob monitorJob) throws Exception;
+    void rescheduleJob(MonitorJob monitorJob) throws SchedulerException;
     void deleteJob(String jobId);
-    void startJob(String jobId);
     void pauseJob(String jobId);
+    void resumeJob(String jobId);
 }

+ 45 - 29
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/MonitorServiceImpl.java

@@ -2,14 +2,12 @@ package cn.reghao.autodop.dmaster.monitor.service;
 
 import cn.reghao.autodop.dmaster.monitor.entity.MonitorJob;
 import cn.reghao.autodop.dmaster.monitor.repository.MonitorJobRepository;
-import cn.reghao.autodop.dmaster.notification.entity.NotifyGroup;
 import lombok.extern.slf4j.Slf4j;
 import org.quartz.JobDataMap;
 import org.quartz.SchedulerException;
 import org.springframework.stereotype.Service;
-import org.springframework.transaction.annotation.Transactional;
 
-import java.io.IOException;
+import java.time.LocalDateTime;
 
 /**
  * @author reghao
@@ -19,26 +17,45 @@ import java.io.IOException;
 @Service
 public class MonitorServiceImpl implements MonitorService {
     private MonitorScheduler monitorScheduler;
+    private MonitorJobData monitorJobData;
     private MonitorJobRepository monitorJobRepository;
 
-    public MonitorServiceImpl(MonitorScheduler monitorScheduler, MonitorJobRepository monitorJobRepository) {
+    public MonitorServiceImpl(MonitorScheduler monitorScheduler,
+                              MonitorJobData monitorJobData,
+                              MonitorJobRepository monitorJobRepository) {
         this.monitorScheduler = monitorScheduler;
+        this.monitorJobData = monitorJobData;
         this.monitorJobRepository = monitorJobRepository;
     }
 
     @Override
-    @Transactional(rollbackFor = Exception.class)
-    public synchronized void addOrModifyJob(MonitorJob monitorJob, JobDataMap jobDataMap) throws Exception {
+    public synchronized void startJob(MonitorJob monitorJob) throws Exception {
+        JobDataMap jobDataMap = monitorJobData.jobDataMap(monitorJob);
+        // 添加并启动任务
+        monitorScheduler.addJob(monitorJob, jobDataMap);
+        boolean enable = monitorJob.getEnable();
+        if (!enable) {
+            monitorJob.setEnable(true);
+            monitorJobRepository.save(monitorJob);
+        }
+    }
+
+    @Override
+    public void rescheduleJob(MonitorJob monitorJob) throws SchedulerException {
         String jobId = monitorJob.getJobId();
         MonitorJob jobEntity = monitorJobRepository.findByJobId(jobId);
-        if (jobEntity == null) {
-            // 添加任务
-            monitorScheduler.addJob(monitorJob, jobDataMap);
-        } else {
-            // 修改任务
-            monitorScheduler.modifyJob(jobId, monitorJob.getCronExp());
+        if (!jobEntity.getCronExp().equals(monitorJob.getCronExp())) {
+            // 修改任务触发时间
+            monitorScheduler.rescheduleJob(jobId, monitorJob.getCronExp());
+            boolean enable = monitorJob.getEnable();
+            if (!enable) {
+                monitorJob.setEnable(true);
+            }
         }
-        // TODO 数据库和 Scheduler 中的数据应该总是保持一致
+
+        monitorJob.setId(jobEntity.getId());
+        monitorJob.setCreateTime(jobEntity.getCreateTime());
+        monitorJob.setUpdateTime(LocalDateTime.now());
         monitorJobRepository.save(monitorJob);
     }
 
@@ -53,36 +70,35 @@ public class MonitorServiceImpl implements MonitorService {
     }
 
     @Override
-    public synchronized void startJob(String jobId) {
+    public synchronized void pauseJob(String jobId) {
         MonitorJob monitorJob = monitorJobRepository.findByJobId(jobId);
-        /*if (monitorJob.getNotifyGroups().isEmpty()) {
-            log.error("{} 没有通知组,添加通知组后再启用", jobId);
-            return;
-        }*/
-
         boolean isEnable = monitorJob.getEnable();
-        if (!isEnable) {
-            monitorJob.setEnable(true);
+        if (isEnable) {
+            monitorJob.setEnable(false);
             try {
-                monitorScheduler.resumeJob(monitorJob.getJobId());
+                monitorScheduler.pauseJob(jobId);
                 monitorJobRepository.save(monitorJob);
             } catch (SchedulerException e) {
-                e.printStackTrace();
+                log.error("{}", e.getMessage());
             }
         }
     }
 
     @Override
-    public synchronized void pauseJob(String jobId) {
+    public synchronized void resumeJob(String jobId) {
         MonitorJob monitorJob = monitorJobRepository.findByJobId(jobId);
         boolean isEnable = monitorJob.getEnable();
-        if (isEnable) {
-            monitorJob.setEnable(false);
+        if (!isEnable) {
+            monitorJob.setEnable(true);
             try {
-                monitorScheduler.pauseJob(monitorJob.getJobId());
+                if (monitorScheduler.isJobExist(jobId)) {
+                    monitorScheduler.resumeJob(jobId);
+                } else {
+                    startJob(monitorJob);
+                }
                 monitorJobRepository.save(monitorJob);
-            } catch (SchedulerException e) {
-                log.error("{}", e.getMessage());
+            } catch (Exception e) {
+                e.printStackTrace();
             }
         }
     }

+ 8 - 2
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/job/AppHealthCheckJob.java

@@ -1,8 +1,10 @@
 package cn.reghao.autodop.dmaster.monitor.service.job;
 
 import cn.reghao.autodop.common.http.WebRequest;
+import cn.reghao.autodop.dmaster.app.db.query.config.AppQuery;
 import cn.reghao.autodop.dmaster.app.entity.AppRunning;
 import cn.reghao.autodop.dmaster.app.repository.AppRunningRepository;
+import cn.reghao.autodop.dmaster.monitor.db.MonitorJobCrudService;
 import cn.reghao.autodop.dmaster.notification.entity.NotifyGroup;
 import cn.reghao.autodop.dmaster.notification.service.NotifyService;
 import cn.reghao.autodop.dmaster.notification.service.notifier.ding.DingMsg;
@@ -35,13 +37,17 @@ public class AppHealthCheckJob implements Job {
     public void execute(JobExecutionContext context) {
         JobDetail jobDetail = context.getJobDetail();
         JobDataMap jobDataMap = jobDetail.getJobDataMap();
+        String jobId = jobDetail.getKey().getName();
 
         NotifyService notifyService = (NotifyService) jobDataMap.get("notifyService");
-        List<NotifyGroup> notifyGroups = (List<NotifyGroup>) jobDataMap.get("notifyGroups");
+        MonitorJobCrudService jobCrudService = (MonitorJobCrudService) jobDataMap.get("jobCrudService");
+        AppRunningRepository runningRepository = (AppRunningRepository) jobDataMap.get("runningRepository");
+        AppQuery appQuery = (AppQuery) jobDataMap.get("appQuery");
         WebRequest webRequest = (WebRequest) jobDataMap.get("webRequest");
         String appId = jobDataMap.getString("appId");
         String machineId = jobDataMap.getString("machineId");
-        AppRunningRepository runningRepository = (AppRunningRepository) jobDataMap.get("runningRepository");
+
+        List<NotifyGroup> notifyGroups = appQuery.findByAppId(appId).getNotifyGroups();
 
         AppRunning appRunning = runningRepository.findByAppIdAndMachineId(appId, machineId);
         String machineIpv4 = appRunning.getMachineIpv4();

+ 0 - 56
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/job/MachineHeartbeatCheckJob.java

@@ -1,56 +0,0 @@
-package cn.reghao.autodop.dmaster.monitor.service.job;
-
-import cn.reghao.autodop.common.utils.DateTimeConverter;
-import cn.reghao.autodop.dmaster.machine.db.crud.MachineStatCrudService;
-import cn.reghao.autodop.dmaster.machine.entity.po.MachineStat;
-import cn.reghao.autodop.dmaster.machine.entity.po.StatusType;
-import cn.reghao.autodop.dmaster.notification.entity.NotifyGroup;
-import cn.reghao.autodop.dmaster.notification.service.NotifyService;
-import cn.reghao.autodop.dmaster.notification.service.notifier.ding.DingMsg;
-import lombok.extern.slf4j.Slf4j;
-import org.quartz.Job;
-import org.quartz.JobDataMap;
-import org.quartz.JobDetail;
-import org.quartz.JobExecutionContext;
-
-import java.util.List;
-
-/**
- * 机器心跳监控任务
- * 接收到 dagentStart 事件时添加(若 machine 是第一次注册)并启动(若不是第一次注册且存在通知组)任务
- * 接收到 dagnetShutdown 事件时停止任务
- * 删除 MachineInfo 时删除任务
- * 任务需要设置通知组后才能启用
- *
- * @author reghao
- * @date 2021-06-22 19:04:10
- */
-@Slf4j
-public class MachineHeartbeatCheckJob implements Job {
-    @Override
-    public void execute(JobExecutionContext context) {
-        JobDetail jobDetail = context.getJobDetail();
-        JobDataMap jobDataMap = jobDetail.getJobDataMap();
-
-        NotifyService notifyService = (NotifyService) jobDataMap.get("notifyService");
-        List<NotifyGroup> notifyGroups = (List<NotifyGroup>) jobDataMap.get("notifyGroups");
-        MachineStatCrudService statCrudService = (MachineStatCrudService) jobDataMap.get("statCrudService");
-        String machineId = jobDataMap.getString("machineId");
-
-        MachineStat machineStat = statCrudService.selectByUk(machineId);
-        long lastCheck = DateTimeConverter.msTimestamp(machineStat.getLastCheck());
-        long now = System.currentTimeMillis();
-
-        long result = now - lastCheck;
-        if (result > 10_000) {
-            // TODO 通知三次后,停止心跳检测
-            machineStat.setStatus(StatusType.DOWN.name());
-            statCrudService.insertOrUpdate(machineStat);
-            // TODO 检测 IP 是否能 ping 通
-            // TODO 检测 MQTT 服务器是否正常连接
-            String msg = String.format("距离上次接收到 %s 的心跳消息已过去 %ss", machineStat.getMachineId(), result/1000);
-            DingMsg dingMsg = new DingMsg("监控报警",msg);
-            //notifyGroups.forEach(notifyGroup -> notifyService.notify(notifyGroup, dingMsg));
-        }
-    }
-}

+ 132 - 0
dmaster/src/main/java/cn/reghao/autodop/dmaster/monitor/service/job/MachineStatCheckJob.java

@@ -0,0 +1,132 @@
+package cn.reghao.autodop.dmaster.monitor.service.job;
+
+import cn.reghao.autodop.common.dagent.machine.disk.DiskInfo;
+import cn.reghao.autodop.common.dagent.machine.memory.MemoryInfo;
+import cn.reghao.autodop.common.utils.ByteConverter;
+import cn.reghao.autodop.common.utils.ByteType;
+import cn.reghao.autodop.common.utils.DateTimeConverter;
+import cn.reghao.autodop.common.utils.PercentCalculator;
+import cn.reghao.autodop.dmaster.machine.db.crud.MachineHostCrudService;
+import cn.reghao.autodop.dmaster.machine.db.crud.MachineStatCrudService;
+import cn.reghao.autodop.dmaster.machine.db.query.MachineHostQuery;
+import cn.reghao.autodop.dmaster.machine.entity.po.MachineHost;
+import cn.reghao.autodop.dmaster.machine.entity.po.MachineStat;
+import cn.reghao.autodop.dmaster.machine.entity.po.StatusType;
+import cn.reghao.autodop.dmaster.monitor.db.MonitorJobCrudService;
+import cn.reghao.autodop.dmaster.monitor.entity.MonitorJob;
+import cn.reghao.autodop.dmaster.notification.entity.NotifyGroup;
+import cn.reghao.autodop.dmaster.notification.service.NotifyService;
+import cn.reghao.autodop.dmaster.notification.service.notifier.ding.DingMsg;
+import lombok.extern.slf4j.Slf4j;
+import org.quartz.Job;
+import org.quartz.JobDataMap;
+import org.quartz.JobDetail;
+import org.quartz.JobExecutionContext;
+
+import java.util.List;
+
+/**
+ * 机器状态监控任务
+ * 接收到 dagentStart 事件时添加(若 machine 是第一次注册)并启动(若不是第一次注册且存在通知组)任务
+ * 接收到 dagnetShutdown 事件时停止任务
+ * 删除 MachineInfo 时删除任务
+ * 任务需要设置通知组后才能启用
+ *
+ * TODO 同一个任务是否会被多个线程同时执行?
+ *
+ * @author reghao
+ * @date 2021-06-22 19:04:10
+ */
+@Slf4j
+public class MachineStatCheckJob implements Job {
+    @Override
+    public void execute(JobExecutionContext context) {
+        JobDetail jobDetail = context.getJobDetail();
+        JobDataMap jobDataMap = jobDetail.getJobDataMap();
+        String jobId = jobDetail.getKey().getName();
+
+        NotifyService notifyService = (NotifyService) jobDataMap.get("notifyService");
+        MonitorJobCrudService jobCrudService = (MonitorJobCrudService) jobDataMap.get("jobCrudService");
+        MachineStatCrudService statCrudService = (MachineStatCrudService) jobDataMap.get("statCrudService");
+        MachineHostQuery hostQuery = (MachineHostQuery) jobDataMap.get("hostQuery");
+        String machineId = jobDataMap.getString("machineId");
+
+        MachineHost machineHost = hostQuery.query(machineId);
+        List<NotifyGroup> notifyGroups = machineHost.getNotifyGroups();
+        if (notifyGroups.isEmpty()) {
+            log.error("没有通知组,不执行机器状态检查任务");
+            return;
+        }
+
+        MachineStat machineStat = statCrudService.selectByUk(machineId);
+        long lastCheck = DateTimeConverter.msTimestamp(machineStat.getLastCheck());
+        long now = System.currentTimeMillis();
+
+        long result = now - lastCheck;
+        if (result > 10_000) {
+            MonitorJob monitorJob = jobCrudService.selectByUk(jobId);
+            int maxCount = monitorJob.getMaxNotifyCount();
+            int currentCount = monitorJob.getNotifyCount();
+            if (currentCount >= maxCount) {
+                // TODO 通知三次后,停止心跳检测
+                return;
+            }
+
+            machineStat.setStatus(StatusType.DOWN.name());
+            statCrudService.insertOrUpdate(machineStat);
+            // TODO 检测 IP 是否能 ping 通
+            // TODO 检测 MQTT 服务器是否正常连接
+            String msg = String.format("距离上次接收到 %s 的心跳消息已过去 %ss", machineStat.getMachineId(), result/1000);
+            DingMsg dingMsg = new DingMsg("监控报警",msg);
+            notifyGroups.forEach(notifyGroup -> notifyService.notify(notifyGroup, dingMsg));
+
+            // 更新通知计数
+            monitorJob.setNotifyCount(currentCount+1);
+            jobCrudService.insertOrUpdate(monitorJob);
+        }
+    }
+
+    private String heartbeatAlert() {
+        return null;
+    }
+
+    private String memoryUsageAlert(MachineStat machineStat) {
+        String machineId = machineStat.getMachineId();
+        MemoryInfo memoryInfo = machineStat.getMemoryInfo();
+        long total = memoryInfo.getTotal();
+        long avail = memoryInfo.getAvailable();
+
+        ByteConverter converter = new ByteConverter();
+        String availSize = converter.convertStr(ByteType.KiB, ByteType.MiB, avail);
+
+        double value = PercentCalculator.percentValue(avail, total);
+        String percent = PercentCalculator.percent(value);
+        double minimalPercent = 0.20;
+        if (value < minimalPercent) {
+            return String.format("%s 上可用的内存仅占内存的 %s, 共计 %s", machineId, percent, availSize);
+        } else {
+            return null;
+        }
+    }
+
+    private String diskUsageAlert(MachineStat machineStat) {
+        String machineId = machineStat.getMachineId();
+        List<DiskInfo> diskInfos = machineStat.getDiskInfos();
+        long total = 0, avail = 0;
+        for (DiskInfo diskInfo : diskInfos) {
+            total += diskInfo.getSize();
+            avail += diskInfo.getAvail();
+        }
+
+        ByteConverter converter = new ByteConverter();
+        String availSize = converter.convert(ByteType.Bytes, ByteType.MiB, avail) + ByteType.MiB.name();
+        double value = PercentCalculator.percentValue(avail, total);
+        String percent = PercentCalculator.percent(value);
+        double minimalPercent = 0.20;
+        if (value < minimalPercent) {
+            return String.format("%s 上可用的磁盘空间仅占总磁盘空间的 %s, 共计 %s", machineId, percent, availSize);
+        } else {
+            return null;
+        }
+    }
+}

+ 27 - 0
dmaster/src/test/java/cn/reghao/autodop/dmaster/machine/db/query/MachineHostQueryTest.java

@@ -0,0 +1,27 @@
+package cn.reghao.autodop.dmaster.machine.db.query;
+
+import cn.reghao.autodop.dmaster.DmasterApplication;
+import cn.reghao.autodop.dmaster.machine.entity.po.MachineHost;
+import org.junit.jupiter.api.Test;
+import org.junit.runner.RunWith;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.test.context.ActiveProfiles;
+import org.springframework.test.context.junit4.SpringRunner;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+@ActiveProfiles("dev")
+@SpringBootTest(classes = DmasterApplication.class)
+@RunWith(SpringRunner.class)
+class MachineHostQueryTest {
+    @Autowired
+    private MachineHostQuery hostQuery;
+
+    @Test
+    void query() {
+        String machineId = "5d1a727991f34d3a9c1220a1899e6ebd";
+        MachineHost machineHost = hostQuery.query(machineId);
+        System.out.println();
+    }
+}