Browse Source

更新 mon 模块

reghao 1 month ago
parent
commit
5de43deb3b

+ 10 - 11
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/PrometheusAsyncClient.java

@@ -51,9 +51,7 @@ public class PrometheusAsyncClient {
     }
     }
 
 
     public CompletableFuture<Map.Entry<String, String>> fetchSingleMetric(String alias, String query) {
     public CompletableFuture<Map.Entry<String, String>> fetchSingleMetric(String alias, String query) {
-        String encodedUrl = prometheusBaseUrl + "/api/v1/query?query=" +
-                URLEncoder.encode(query, java.nio.charset.StandardCharsets.UTF_8);
-
+        String encodedUrl = prometheusBaseUrl + "/api/v1/query?query=" + URLEncoder.encode(query, StandardCharsets.UTF_8);
         HttpRequest request = HttpRequest.newBuilder()
         HttpRequest request = HttpRequest.newBuilder()
                 .uri(URI.create(encodedUrl))
                 .uri(URI.create(encodedUrl))
                 .GET()
                 .GET()
@@ -156,21 +154,22 @@ public class PrometheusAsyncClient {
         String baseUrl = "http://prometheus.iquizoo.cn";
         String baseUrl = "http://prometheus.iquizoo.cn";
         PrometheusAsyncClient client = new PrometheusAsyncClient(baseUrl);
         PrometheusAsyncClient client = new PrometheusAsyncClient(baseUrl);
         String promql = "topk(5, increase(container_cpu_cfs_throttled_periods_total{name!=''}[24h]))";
         String promql = "topk(5, increase(container_cpu_cfs_throttled_periods_total{name!=''}[24h]))";
-        //String respBody = client.query(promql).join();
-
-        String cpuQuery = "1 - avg(irate(node_cpu_seconds_total{mode='idle'}[5m])) by (instance)";
-        cpuQuery = """
-                sum(
-                  irate(container_cpu_usage_seconds_total{name!=""}[5m])
-                ) by (name, instance) * 100
+        promql = """
+                avg_over_time(
+                  (1 - avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])))[24h:1m]
+                ) * 100
                 """;
                 """;
+
+        String respBody = client.query(promql).join();
+
+
         // 1. 计算时间范围:昨天 00:00:00 到 23:59:59
         // 1. 计算时间范围:昨天 00:00:00 到 23:59:59
         // 也可以根据需求改为:当前时间向前推 24 小时
         // 也可以根据需求改为:当前时间向前推 24 小时
         long end = Instant.now().getEpochSecond();
         long end = Instant.now().getEpochSecond();
         long start = end - (24 * 3600);
         long start = end - (24 * 3600);
         // 30分钟一个采样点,适合 24h 趋势图
         // 30分钟一个采样点,适合 24h 趋势图
         String step = "30m";
         String step = "30m";
-        String respBody1 = client.queryRange(cpuQuery, start, end, step).join();
+        //String respBody1 = client.queryRange(cpuQuery, start, end, step).join();
         System.out.println();
         System.out.println();
     }
     }
 }
 }

+ 585 - 149
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/PrometheusService.java

@@ -2,6 +2,10 @@ package cn.reghao.devops.mgr.ops.srv.mon;
 
 
 import cn.reghao.devops.mgr.config.AppProperties;
 import cn.reghao.devops.mgr.config.AppProperties;
 import cn.reghao.devops.mgr.ops.srv.mon.dto.ContainerHealthReport;
 import cn.reghao.devops.mgr.ops.srv.mon.dto.ContainerHealthReport;
+import cn.reghao.devops.mgr.ops.srv.mon.model.AnalysisResult;
+import cn.reghao.devops.mgr.ops.srv.mon.model.CpuThresholdConfig;
+import cn.reghao.devops.mgr.ops.srv.mon.model.HostGroup;
+import cn.reghao.devops.mgr.ops.srv.mon.model.MetricRecord;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -21,10 +25,12 @@ import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.Paths;
+import java.text.SimpleDateFormat;
 import java.time.*;
 import java.time.*;
 import java.time.format.DateTimeFormatter;
 import java.time.format.DateTimeFormatter;
 import java.util.*;
 import java.util.*;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.CompletableFuture;
+import java.util.stream.Collectors;
 
 
 /**
 /**
  * @author reghao
  * @author reghao
@@ -34,25 +40,75 @@ import java.util.concurrent.CompletableFuture;
 @Service
 @Service
 public class PrometheusService {
 public class PrometheusService {
     private ObjectMapper objectMapper = new ObjectMapper();
     private ObjectMapper objectMapper = new ObjectMapper();
-    private final PrometheusAsyncClient promClient;
-    private final Cache<String, Object> cache;
+    private PrometheusAsyncClient promClient;
+    private Cache<String, Object> cache;
 
 
     public PrometheusService(AppProperties appProperties, Cache<String, Object> cache) {
     public PrometheusService(AppProperties appProperties, Cache<String, Object> cache) {
         this.promClient = new PrometheusAsyncClient(appProperties.getPrometheusBaseUrl());
         this.promClient = new PrometheusAsyncClient(appProperties.getPrometheusBaseUrl());
         this.cache = cache;
         this.cache = cache;
     }
     }
 
 
+    public PrometheusService() {
+        this.promClient = new PrometheusAsyncClient("http://prometheus.iquizoo.cn");
+    }
+
+    public static Configuration getTemplateConfiguration() throws TemplateException, IOException {
+        FreeMarkerConfigurer configurer = new FreeMarkerConfigurer();
+        // 1. 设置模板存放路径 (通常在 resources/templates 下)
+        configurer.setTemplateLoaderPath("classpath:/templates/");
+        // 2. 设置默认编码
+        configurer.setDefaultEncoding("UTF-8");
+        // 3. 配置 FreeMarker 的原生属性
+        Properties settings = new Properties();
+        settings.setProperty("template_update_delay", "0"); // 检查模板更新延迟
+        settings.setProperty("default_encoding", "UTF-8");
+        settings.setProperty("number_format", "0.##");      // 数字格式化,防止 1000 变 1,000
+        settings.setProperty("datetime_format", "yyyy-MM-dd HH:mm:ss");
+        configurer.setFreemarkerSettings(settings);
+        // 重要:必须调用此方法来初始化内部的 Configuration 对象
+        configurer.afterPropertiesSet();
+        return configurer.getConfiguration();
+    }
+
+    /**
+     * @param templatePath 相对于 src/main/resources/templates/ 的路径
+     * @return
+     * @date 2026-03-29 00:03:145
+     */
+    public String renderHtml(String templatePath, Map<String, Object> root) throws Exception {
+        // 2. 加载模板文件
+        // 默认路径:src/main/resources/templates/pillar_report.ftl
+        Template template = getTemplateConfiguration().getTemplate(templatePath);
+        // 渲染并返回 HTML 字符串, FreeMarkerTemplateUtils 会自动处理异常并转换为 String
+        return FreeMarkerTemplateUtils.processTemplateIntoString(template, root);
+    }
+
     public void generateDailyReport() {
     public void generateDailyReport() {
         // 定义查询任务
         // 定义查询任务
         Map<String, String> tasks = Map.of(
         Map<String, String> tasks = Map.of(
-                "container_count", "count by (instance) (container_last_seen{image!=''})",
-                "top_cpu_containers", "topk(5, sum by (name) (rate(container_cpu_usage_seconds_total{image!=''}[5m]) * 100))"
+                "container_count", """
+                        avg_over_time((1 - avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance))[24h:1m]) * 100
+                        """,
+                "top_cpu_containers", """
+                        max_over_time((1 - avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance))[24h:1m]) * 100
+                        """,
+                "top_cpu_containers1", """
+                        max_over_time((1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)[24h:1m]) * 100
+                        """,
+                "top_cpu_containers2", """
+                        max_over_time(rate(container_cpu_usage_seconds_total{name!=""}[5m])[24h:1m])
+                        """,
+                "top_cpu_containers3", """
+                        max_over_time(container_memory_usage_bytes{name!=""}[24h:1m]) / 1024 / 1024
+                        """,
+                "top_cpu_containers4", """
+                        increase(container_oom_events_total[24h]) > 0
+                        """
         );
         );
 
 
         // 异步执行
         // 异步执行
         promClient.fetchAllMetrics(tasks).thenAccept(results -> {
         promClient.fetchAllMetrics(tasks).thenAccept(results -> {
-            // 在这里解析 JSON 并填充到 DTO
-            processResults(results);
+            //processResults(results);
             System.out.println("所有数据采集完成,开始渲染报表...");
             System.out.println("所有数据采集完成,开始渲染报表...");
         }).join(); // 如果是在定时任务主线程,可以用 join 等待完成
         }).join(); // 如果是在定时任务主线程,可以用 join 等待完成
     }
     }
@@ -60,7 +116,6 @@ public class PrometheusService {
     private OperationReportDTO processResults(Map<String, String> results) {
     private OperationReportDTO processResults(Map<String, String> results) {
         OperationReportDTO report = new OperationReportDTO();
         OperationReportDTO report = new OperationReportDTO();
         Map<String, HostInfo> hostMap = new HashMap<>();
         Map<String, HostInfo> hostMap = new HashMap<>();
-
         try {
         try {
             // 1. 解析 CPU 指标
             // 1. 解析 CPU 指标
             if (results.containsKey("node_cpu")) {
             if (results.containsKey("node_cpu")) {
@@ -97,12 +152,527 @@ public class PrometheusService {
                 parseTrendData(results.get("cpu_trend"), report);
                 parseTrendData(results.get("cpu_trend"), report);
             }
             }
         } catch (Exception e) {
         } catch (Exception e) {
-            // log.error("JSON 解析失败", e); // 确保 log 对象已定义,或使用 System.err
-            e.printStackTrace();
         }
         }
+
+        Map<String, Object> root = new HashMap<>();
+        root.put("hostGroupList", report);
+
+        /*try {
+            // 6. 渲染最终的复合模板
+            String templatePath = "daily_report.ftl";
+            String htmlContent = renderHtml(templatePath, root);
+            Path outputPath = Paths.get("/home/reghao/Downloads", "daily_report_" + LocalDate.now() + ".html");
+            if (Files.notExists(outputPath.getParent())) {
+                Files.createDirectories(outputPath.getParent());
+            }
+            Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
+            System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
+        } catch (Exception e) {
+            log.error("{}", e.getMessage());
+        }*/
+
         return report;
         return report;
     }
     }
 
 
+    public void generateDailyReport1() {
+        // 定义查询任务
+        Map<String, String> tasks = Map.of(
+                "container_count", "count by (instance) (container_last_seen{image!=''})",
+                "top_cpu_containers", "topk(5, sum by (name) (rate(container_cpu_usage_seconds_total{image!=''}[5m]) * 100))"
+        );
+
+        tasks = Map.of(
+                "node_cpu_core", """
+                        count by(instance) (node_cpu_seconds_total{mode="idle"})
+                        """,
+                "node_cpu_avg", """
+                        avg_over_time(
+                          (1 - avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])))[24h:1m]
+                        ) * 100
+                        """,
+                "node_cpu_max", """
+                        max_over_time(
+                          (1 - avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])))[24h:1m]
+                        ) * 100
+                        """,
+                "container_cpu_limit", """
+                        (container_spec_cpu_quota > 0) / container_spec_cpu_period
+                        """,
+                "container_cpu_avg", """
+                        avg_over_time(
+                          (
+                            sum by (instance, name) (rate(container_cpu_usage_seconds_total{name!=""}[1m]))
+                            /\s
+                            sum by (instance, name) (container_spec_cpu_quota{name!=""} / container_spec_cpu_period{name!=""})
+                          )[24h:1m]
+                        ) * 100
+                        """,
+                "container_cpu_max", """
+                        max_over_time(
+                          (
+                            sum by (instance, name) (rate(container_cpu_usage_seconds_total{name!=""}[1m]))
+                            /\s
+                            sum by (instance, name) (container_spec_cpu_quota{name!=""} / container_spec_cpu_period{name!=""})
+                          )[24h:1m]
+                        ) * 100
+                        """,
+                "container_mem_avg", """
+                        avg_over_time(container_memory_working_set_bytes{name!=""}[24h]) / 1024 / 1024
+                        """,
+                "container_mem_max", """
+                        max_over_time(container_memory_working_set_bytes{name!=""}[24h]) / 1024 /1024
+                        """
+        );
+
+        // 异步执行
+        promClient.fetchAllMetrics(tasks).thenAccept(results -> {
+            processResults0(results);
+            System.out.println("所有数据采集完成,开始渲染报表...");
+        }).join(); // 如果是在定时任务主线程,可以用 join 等待完成
+    }
+
+    public List<HostGroup> generateUnifiedReport(Map<String, MetricRecord> mergedMap) {
+        // 2. 建立 HostGroup 层级关系
+        Map<String, HostGroup> hostGroupMap = new LinkedHashMap<>();
+        mergedMap.values().forEach(record -> {
+            // 提取 IP (172.16.45.66:9100 -> 172.16.45.66)
+            String ip = record.getInstance().split(":")[0];
+            HostGroup group = hostGroupMap.computeIfAbsent(ip, k -> {
+                HostGroup hg = new HostGroup();
+                hg.setHostIp(ip);
+                return hg;
+            });
+
+            if (record.getContainer() == null) {
+                group.setHostRecord(record);
+            } else {
+                group.getContainerRecords().add(record);
+            }
+        });
+
+        // 3. 核心:建立宿主机与容器的量纲关联 (统一为 Node 视角)
+        hostGroupMap.values().forEach(group -> {
+            MetricRecord host = group.getHostRecord();
+            List<MetricRecord> containers = group.getContainerRecords();
+
+            if (host != null && !containers.isEmpty()) {
+                // 获取宿主机总核数 (从 node_cpu_core 指标中解析,此处假设 record 已包含该值)
+                double totalCores = host.getCpuCore() > 0 ? host.getCpuCore() : 8.0;
+
+                // 计算容器对宿主机的“实际贡献值”
+                // 容器利用率 (C_util) = (Used_Cores / Limit_Cores) * 100
+                // 贡献 Node 的百分比 = C_util * (Limit_Cores / Node_Total_Cores)
+                double totalContainerContributionToNode = containers.stream()
+                        .mapToDouble(c -> c.getAvgValue() * (c.getCpuLimit() / totalCores))
+                        .sum();
+
+                // 系统隐性损耗 = 宿主机总利用率 - 容器贡献总和
+                group.setSystemOverhead(Math.max(0, host.getAvgValue() - totalContainerContributionToNode));
+
+                for (MetricRecord container : group.getContainerRecords()) {
+                    // 1. 获取该容器的 Limit 核数 (需从 container_cpu_limit 查询中匹配)
+                    double limitCores = container.getCpuLimit();
+
+                    // 2. 计算:相对宿主机的百分比 = 相对Limit百分比 * (Limit核数 / 总核数)
+                    double relativeToHost = container.getAvgValue() * (limitCores / totalCores);
+                    double relativeToHostMax = container.getMaxValue() * (limitCores / totalCores);
+
+                    // 将这两个值存入 record 供模板使用
+                    container.setRelativeToHostAvg(relativeToHost);
+                    container.setRelativeToHostMax(relativeToHostMax);
+                }
+
+                // 诊断结论
+                StringBuilder diag = new StringBuilder();
+                if (group.getSystemOverhead() > 15.0) {
+                    diag.append(String.format("⚠️ 宿主机非容器损耗(内核/IO)高达 %.1f%%。 ", group.getSystemOverhead()));
+                }
+                if (host.getMaxValue() > 85.0) {
+                    diag.append("🚨 宿主机峰值接近瓶颈。 ");
+                }
+                group.setRelationshipDiagnosis(diag.length() == 0 ? "✅ 资源分配健康" : diag.toString());
+            }
+        });
+
+        // 4. 传给模板
+        return new ArrayList<>(hostGroupMap.values());
+    }
+
+    private void processResults0(Map<String, String> rawResults) {
+        // 使用 Map 存储合并后的对象,Key 是 "instance:container"
+        Map<String, MetricRecord> mergedMap = new HashMap<>();
+        // 遍历所有的任务结果 (node_cpu_avg, node_cpu_max, container_cpu_avg 等)
+        rawResults.forEach((taskName, jsonContent) -> {
+            try {
+                JsonNode root = objectMapper.readTree(jsonContent);
+                JsonNode resultNodes = root.path("data").path("result");
+                if (resultNodes.isArray()) {
+                    for (JsonNode node : resultNodes) {
+                        // 1. 提取标签
+                        JsonNode metric = node.path("metric");
+                        String instance = metric.path("instance").asText("unknown");
+                        String container = metric.has("name") ? metric.path("name").asText() : null;
+
+                        // 2. 生成唯一键并获取/创建 MetricRecord
+                        String key = instance + ":" + (container == null ? "HOST" : container);
+                        MetricRecord record = mergedMap.computeIfAbsent(key, k -> {
+                            MetricRecord newRecord = new MetricRecord();
+                            newRecord.setInstance(instance);
+                            newRecord.setContainer(container);
+                            newRecord.setName(container == null ? "Node CPU" : "Container CPU");
+                            return newRecord;
+                        });
+
+                        // 3. 提取数值并根据任务名归类
+                        JsonNode valueNode = node.path("value");
+                        if (valueNode.isArray() && valueNode.size() >= 2) {
+                            double val = valueNode.get(1).asDouble();
+                            // 判断该任务是均值还是峰值
+                            if (taskName.contains("_avg")) {
+                                record.setAvgValue(val);
+                            } else if (taskName.contains("_max")) {
+                                record.setMaxValue(val);
+                            } else if (taskName.contains("_cpu_core")) {
+                                record.setCpuCore(val);
+                            } else if (taskName.contains("_cpu_limit")) {
+                                record.setCpuLimit(val);
+                            } else {
+                                System.out.println();
+                            }
+                        } else {
+                            System.out.println();
+                        }
+                    }
+                }
+            } catch (Exception e) {
+                System.err.println("解析任务 [" + taskName + "] 失败: " + e.getMessage());
+            }
+        });
+        // 4. 将合并后的结果转换为 List 并执行分析
+        List<MetricRecord> finalRecords = new ArrayList<>(mergedMap.values());
+        System.out.println("数据对齐完成,共计 " + finalRecords.size() + " 条指标记录。");
+
+
+        //List<HostGroup> hostGroupList0 = buildHierarchyAndAnalyze(mergedMap);
+        List<HostGroup> hostGroupList = generateUnifiedReport(mergedMap);
+        // 5. 调用之前的分析逻辑
+        /*List<AnalysisResult> results = finalRecords.stream()
+                .map(this::getAnalysisResult)
+                .filter(res -> !"NORMAL".equals(res.getStatus())) // 过滤掉正常的
+                .collect(Collectors.toList());
+
+        // 2. 按 instance 分组,并保证宿主机排在列表第一位
+        Map<String, List<AnalysisResult>> groupMap = results.stream()
+                .collect(Collectors.groupingBy(
+                        AnalysisResult::getInstance,
+                        Collectors.collectingAndThen(
+                                Collectors.toList(),
+                                list -> {
+                                    // 排序:宿主机(container == null)排在最前,其余按名称排序
+                                    list.sort(Comparator.comparing(
+                                            res -> res.getContainer() == null ? "" : res.getContainer()
+                                    ));
+                                    return list;
+                                }
+                        )
+                ));*/
+
+        Map<String, Object> root = new HashMap<>();
+        //root.put("hostMap", groupMap);
+        root.put("date", new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
+        root.put("hostGroupList", hostGroupList);
+
+        try {
+            // 6. 渲染最终的复合模板(左右布局那个)
+            String templatePath = "host_group_report.ftl";
+            String htmlContent = renderHtml(templatePath, root);
+            Path outputPath = Paths.get("/home/reghao/Downloads", "report_" + LocalDate.now() + ".html");
+            if (Files.notExists(outputPath.getParent())) {
+                Files.createDirectories(outputPath.getParent());
+            }
+            Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
+            System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
+        } catch (Exception e) {
+            log.error("{}", e.getMessage());
+        }
+    }
+
+    private List<HostGroup> buildHostGroups(Map<String, MetricRecord> mergedMap) {
+        Map<String, HostGroup> hostGroups = new HashMap<>();
+
+        // 1. 第一次遍历:初始化宿主机
+        mergedMap.values().stream().filter(r -> r.getContainer() == null).forEach(r -> {
+            HostGroup g = new HostGroup();
+            g.setHostIp(r.getInstance());
+            g.setHostRecord(r);
+            hostGroups.put(r.getInstance(), g);
+        });
+
+        // 2. 第二次遍历:挂载容器并折算贡献度
+        mergedMap.values().stream().filter(r -> r.getContainer() != null).forEach(r -> {
+            HostGroup g = hostGroups.get(r.getInstance());
+            if (g != null) {
+                g.getContainerRecords().add(r);
+                // 关键:计算该容器对宿主机的实际贡献 = (容器利用率 * 容器配额核心数) / 宿主机总核数
+                // 但因为我们没有直接查配额核心数,目前最稳妥的办法是仅做展示,
+                // 损耗诊断建议基于:HostAvg - Sum(Container核心数)/TotalCores
+                // 此处为了简化,我们假设容器利用率是相对于 Limit 的。
+            }
+        });
+
+        // 3. 执行关系分析
+        hostGroups.values().forEach(this::analyzeRelationship);
+        return new ArrayList<>(hostGroups.values());
+    }
+
+    private void analyzeRelationship(HostGroup group) {
+        MetricRecord host = group.getHostRecord();
+        if (host == null) return;
+
+        double totalCores = host.getCpuCore() > 0 ? host.getCpuCore() : 1.0;
+
+        // 这里需要注意:因为容器 avg 是相对于 limit 的百分比
+        // 如果没有采集到 limit 核心数,sum(avg) 是没有物理意义的。
+        // 建议:在实际运维中,我们关注的是宿主机整体水位。
+
+        StringBuilder sb = new StringBuilder();
+        if (host.getAvgValue() > 80) {
+            sb.append("🚨 宿主机整体负载极高,请检查资源分配。");
+        } else {
+            sb.append("✅ 节点运行状态平稳。");
+        }
+        group.setRelationshipDiagnosis(sb.toString());
+    }
+
+    public List<HostGroup> buildHierarchyAndAnalyze0(Map<String, MetricRecord> mergedMap) {
+        // 1. 初步按 instance 分组
+        Map<String, HostGroup> groups = new HashMap<>();
+
+        mergedMap.values().forEach(record -> {
+            HostGroup group = groups.computeIfAbsent(record.getInstance(), k -> new HostGroup());
+            if (record.getContainer() == null) {
+                group.setHostRecord(record);
+            } else {
+                group.getContainerRecords().add(record);
+            }
+        });
+
+        // 2. 深度分析每一组的关系
+        groups.values().forEach(group -> {
+            MetricRecord host = group.getHostRecord();
+            List<MetricRecord> containers = group.getContainerRecords();
+
+            if (host != null && !containers.isEmpty()) {
+                // 计算容器均值总和
+                double sumContainerAvg = containers.stream().mapToDouble(MetricRecord::getAvgValue).sum();
+                // 计算系统损耗 (宿主机利用率 - 容器利用率总和)
+                group.setSystemOverhead(Math.max(0, host.getAvgValue() - sumContainerAvg));
+
+                // 计算峰值共振:宿主机峰值 / 容器峰值总和
+                double sumContainerMax = containers.stream().mapToDouble(MetricRecord::getMaxValue).sum();
+                group.setPeakCohesion(sumContainerMax > 0 ? host.getMaxValue() / sumContainerMax : 0);
+
+                // 3. 自动生成诊断结论
+                StringBuilder diagnosis = new StringBuilder();
+                if (group.getSystemOverhead() > 20.0) {
+                    diagnosis.append(String.format("⚠️ 系统隐性损耗过高(%.1f%%),请检查宿主机原生进程。 ", group.getSystemOverhead()));
+                }
+                if (host.getMaxValue() > 80.0 && group.getPeakCohesion() > 0.8) {
+                    diagnosis.append("🚨 探测到明显的容器并发冲撞,建议交错执行高负载任务。 ");
+                }
+                if (diagnosis.length() == 0) diagnosis.append("✅ 宿主与容器负载分配比例健康。");
+
+                group.setRelationshipDiagnosis(diagnosis.toString());
+            }
+        });
+
+        return new ArrayList<>(groups.values());
+    }
+    public List<HostGroup> buildHierarchyAndAnalyze(Map<String, MetricRecord> mergedMap) {
+        // 1. 使用纯 IP (不带端口) 作为分组的 Key
+        Map<String, HostGroup> groups = new HashMap<>();
+
+        mergedMap.values().forEach(record -> {
+            // 提取 IP 部分,例如 "192.168.1.10:9100" -> "192.168.1.10"
+            String rawInstance = record.getInstance();
+            String ipAddress = rawInstance.contains(":") ? rawInstance.split(":")[0] : rawInstance;
+
+            HostGroup group = groups.computeIfAbsent(ipAddress, k -> {
+                HostGroup newGroup = new HostGroup();
+                newGroup.setHostIp(ipAddress); // 建议在 HostGroup 中增加该字段
+                return newGroup;
+            });
+
+            // 判定归属:根据 container 字段是否为空
+            if (record.getContainer() == null) {
+                // 来自 node_exporter 的宿主机数据
+                group.setHostRecord(record);
+            } else {
+                // 来自 cadvisor 的容器数据
+                group.getContainerRecords().add(record);
+            }
+        });
+
+        // 2. 深度分析每一组的关系
+        groups.values().forEach(group -> {
+            MetricRecord host = group.getHostRecord();
+            List<MetricRecord> containers = group.getContainerRecords();
+
+            // 只有当宿主机数据存在时才计算损耗
+            if (host != null) {
+                double sumContainerAvg = containers.stream().mapToDouble(MetricRecord::getAvgValue).sum();
+
+                // 计算系统隐性损耗:宿主机总量 - 容器总量
+                // 注意:如果容器很多,sum 可能略大于 host(采样时间差),需用 Math.max(0, ...)
+                group.setSystemOverhead(Math.max(0, host.getAvgValue() - sumContainerAvg));
+
+                // 诊断逻辑
+                StringBuilder diagnosis = new StringBuilder();
+                if (group.getSystemOverhead() > 20.0) {
+                    diagnosis.append(String.format("⚠️ 宿主机非容器损耗较高(%.1f%%)。 ", group.getSystemOverhead()));
+                }
+
+                if (containers.isEmpty()) {
+                    diagnosis.append("ℹ️ 该节点当前未发现运行中的业务容器。");
+                } else if (diagnosis.length() == 0) {
+                    diagnosis.append("✅ 宿主与容器负载分配正常。");
+                }
+                group.setRelationshipDiagnosis(diagnosis.toString());
+            }
+        });
+
+        return new ArrayList<>(groups.values());
+    }
+
+    /*public String analyze(MetricRecord record) {
+        StringBuilder report = new StringBuilder();
+        boolean isContainer = record.getContainer() != null;
+
+        // 1. 统一单位描述
+        String unit = isContainer ? "核" : "%";
+        String displayName = isContainer ? "容器:" + record.getContainer() : "宿主机:" + record.getInstance();
+
+        report.append(String.format("【%s 报告】均值: %.2f%s, 峰值: %.2f%s\n",
+                displayName, record.getAvgValue(), unit, record.getMaxValue(), unit));
+
+        // 2. 针对性设定“防零处理”的底数 (Silence Threshold)
+        // 宿主机分母至少 1%;容器分母至少 0.1 核
+        double silenceThreshold = isContainer ? 0.1 : 1.0;
+        double targetThreshold = isContainer ? CpuThresholdConfig.CONTAINER_THRESHOLD : CpuThresholdConfig.NODE_THRESHOLD;
+
+        // 3. 判定:容量不足 (注意容器的 threshold 应该是它的 CPU Limit 核心数)
+        if (record.getAvgValue() >= targetThreshold) {
+            report.append(" -> [严重异常] 均值已触及水位线,资源严重不足!\n");
+        }
+
+        // 4. 判定:毛刺率 (Spike Rate)
+        else {
+            double ratio = record.getMaxValue() / Math.max(record.getAvgValue(), silenceThreshold);
+
+            // 判定阈值:如果峰值本身很小(比如宿主机 < 5% 或 容器 < 0.2核),则忽略毛刺
+            double significantPeak = isContainer ? 0.2 : 5.0;
+
+            if (record.getMaxValue() > significantPeak && ratio > 5.0) {
+                report.append(String.format(" -> [预警] 瞬时毛刺严重(%.1fx)。", ratio));
+                if (isContainer) {
+                    report.append("建议检查容器内部是否有突发短查询或频繁GC。\n");
+                } else {
+                    report.append("建议检查宿主机是否有系统级任务或IO等待引起的CPU飙升。\n");
+                }
+            } else {
+                report.append(" -> [正常] 运行平稳。\n");
+            }
+        }
+
+        return report.toString();
+    }*/
+
+    public AnalysisResult getAnalysisResult(MetricRecord record) {
+        AnalysisResult res = new AnalysisResult();
+        // 基础信息设置
+        res.setInstance(record.getInstance());
+        res.setContainer(record.getContainer());
+        res.setUnit("%"); // 量纲已统一,固定为百分比
+        res.setAvg(record.getAvgValue());
+        res.setMax(record.getMaxValue());
+
+        // 核心计算逻辑:使用统一的静默底数 (1.0%)
+        double currentRatio = record.getMaxValue() / Math.max(record.getAvgValue(), CpuThresholdConfig.UNIFIED_SILENCE);
+        res.setRatio(currentRatio);
+
+        // 逻辑判定:使用统一的百分比阈值
+        if (record.getAvgValue() >= CpuThresholdConfig.UNIFIED_THRESHOLD) {
+            res.setStatus("CRITICAL");
+            res.setMessage(String.format("均值(%.2f%%)触及水位线,资源严重不足", record.getAvgValue()));
+        }
+        else if (record.getMaxValue() > CpuThresholdConfig.UNIFIED_SIGNIFICANT &&
+                currentRatio > CpuThresholdConfig.UNIFIED_RATIO) {
+            res.setStatus("WARNING");
+            res.setMessage(String.format("瞬时毛刺严重(%.1fx),峰值达到 %.2f%%", currentRatio, record.getMaxValue()));
+        }
+        else {
+            res.setStatus("NORMAL");
+            res.setMessage("运行平稳");
+        }
+
+        return res;
+    }
+
+    /**
+     * 模拟解析 Prometheus 返回的 JSON 结构
+     * 生产环境建议使用 Jackson 或 Fastjson 遍历 data.result 数组
+     */
+    private Map<String, List<MetricRecord>> parsePrometheusJson(Map<String, String> results) {
+        Map<String, List<MetricRecord>> parsedData = new HashMap<>();
+        results.forEach((taskName, jsonContent) -> {
+            List<MetricRecord> records = new ArrayList<>();
+            try {
+                JsonNode root = objectMapper.readTree(jsonContent);
+                // Prometheus 标准响应路径: data -> result
+                JsonNode resultNodes = root.path("data").path("result");
+
+                if (resultNodes.isArray()) {
+                    for (JsonNode node : resultNodes) {
+                        MetricRecord record = new MetricRecord();
+
+                        // 1. 解析标签 (Metric Labels)
+                        JsonNode metric = node.path("metric");
+                        record.setInstance(metric.path("instance").asText("unknown"));
+                        // container 在 cAdvisor 中通常对应 'name' 标签,宿主机指标则没有此标签
+                        if (metric.has("name")) {
+                            record.setContainer(metric.path("name").asText());
+                        }
+
+                        // 2. 解析数值 (Value)
+                        // Prometheus value 格式为 [timestamp, "value_string"]
+                        JsonNode valueNode = node.path("value");
+                        if (valueNode.isArray() && valueNode.size() >= 2) {
+                            // 注意:Prometheus 返回的数值是字符串形式,需要转换
+                            double val = valueNode.get(1).asDouble();
+
+                            // 根据任务名决定填充到哪个字段(暂时存入,后续 mergeMetrics 会处理)
+                            if (taskName.contains("avg")) {
+                                record.setAvgValue(val);
+                            } else if (taskName.contains("max")) {
+                                record.setMaxValue(val);
+                            }
+                            // 设置指标名称便于识别
+                            record.setName(taskName);
+                        }
+
+                        records.add(record);
+                    }
+                }
+                parsedData.put(taskName, records);
+            } catch (Exception e) {
+                System.err.println("解析任务 [" + taskName + "] 失败: " + e.getMessage());
+                parsedData.put(taskName, Collections.emptyList());
+            }
+        });
+
+        return parsedData;
+    }
+
     private void parseToHostMap(String json, Map<String, HostInfo> hostMap, String type) throws Exception {
     private void parseToHostMap(String json, Map<String, HostInfo> hostMap, String type) throws Exception {
         JsonNode root = objectMapper.readTree(json);
         JsonNode root = objectMapper.readTree(json);
         JsonNode resultList = root.path("data").path("result");
         JsonNode resultList = root.path("data").path("result");
@@ -181,37 +751,6 @@ public class PrometheusService {
         }
         }
     }
     }
 
 
-    public static Configuration getTemplateConfiguration() throws TemplateException, IOException {
-        FreeMarkerConfigurer configurer = new FreeMarkerConfigurer();
-        // 1. 设置模板存放路径 (通常在 resources/templates 下)
-        configurer.setTemplateLoaderPath("classpath:/templates/");
-        // 2. 设置默认编码
-        configurer.setDefaultEncoding("UTF-8");
-        // 3. 配置 FreeMarker 的原生属性
-        Properties settings = new Properties();
-        settings.setProperty("template_update_delay", "0"); // 检查模板更新延迟
-        settings.setProperty("default_encoding", "UTF-8");
-        settings.setProperty("number_format", "0.##");      // 数字格式化,防止 1000 变 1,000
-        settings.setProperty("datetime_format", "yyyy-MM-dd HH:mm:ss");
-        configurer.setFreemarkerSettings(settings);
-        // 重要:必须调用此方法来初始化内部的 Configuration 对象
-        configurer.afterPropertiesSet();
-        return configurer.getConfiguration();
-    }
-
-    /**
-     * @param templatePath 相对于 src/main/resources/templates/ 的路径
-     * @return
-     * @date 2026-03-29 00:03:145
-     */
-    public String renderHtml(String templatePath, Map<String, Object> root) throws Exception {
-        // 2. 加载模板文件
-        // 默认路径:src/main/resources/templates/pillar_report.ftl
-        Template template = getTemplateConfiguration().getTemplate(templatePath);
-        // 渲染并返回 HTML 字符串, FreeMarkerTemplateUtils 会自动处理异常并转换为 String
-        return FreeMarkerTemplateUtils.processTemplateIntoString(template, root);
-    }
-
     /**
     /**
      * 辅助方法:构建 query_range 的完整 URL
      * 辅助方法:构建 query_range 的完整 URL
      */
      */
@@ -487,11 +1026,10 @@ public class PrometheusService {
         model.put("timeLabels", String.join(",", timeLabels));
         model.put("timeLabels", String.join(",", timeLabels));
 
 
         // 6. 渲染与输出
         // 6. 渲染与输出
-        /*String htmlContent = renderHtml("container_report_v2.ftl", model);
+        String htmlContent = renderHtml("container_report_v2.ftl", model);
         Path outputPath = Paths.get("/home/reghao/Downloads", "container_report_v2_" + LocalDate.now() + ".html");
         Path outputPath = Paths.get("/home/reghao/Downloads", "container_report_v2_" + LocalDate.now() + ".html");
         Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
         Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
-        System.out.println("✅ 报表生成成功: " + outputPath.toAbsolutePath());*/
-        System.out.println();
+        System.out.println("✅ 报表生成成功: " + outputPath.toAbsolutePath());
     }
     }
 
 
     public ContainerReportVO getReportData() {
     public ContainerReportVO getReportData() {
@@ -573,113 +1111,11 @@ public class PrometheusService {
         return report;
         return report;
     }
     }
 
 
-    public void detect() throws Exception {
-        // 1. 计算时间范围:昨天 00:00:00 到 23:59:59
-        // 也可以根据需求改为:当前时间向前推 24 小时
-        long end = Instant.now().getEpochSecond();
-        long start = end - (24 * 3600);
-        String step = "5m"; // 30分钟一个采样点,适合 24h 趋势图
-
-        // 2. 定义 PromQL 查询语句
-        String cpuQuery = "sum(irate(container_cpu_usage_seconds_total{name!=\"\"}[5m])) by (name, instance)";
-        String memQuery = "sum(container_memory_working_set_bytes{name!=\"\"}) by (name, instance) / 1024 / 1024";
-        String diskQuery = "max(rate(node_disk_io_time_seconds_total[5m])) by (instance)";
-        String netQuery = "sum(irate(node_network_receive_bytes_total[5m])) by (instance) / 1024 / 1024";
-
-        // 3. 构造异步任务 Map
-        // 注意:这里调用的是 query_range 接口
-        Map<String, String> tasks = Map.of(
-                "cpu", buildRangeUrl(cpuQuery, start, end, step),
-                "mem", buildRangeUrl(memQuery, start, end, step)
-        );
-        log.info("开始并行抓取 Prometheus 四大支柱数据...");
-
-        // 4. 并行执行并阻塞等待结果(join)
-        Map<String, String> rawResults = promClient.fetchAllMetrics0(tasks).join();
-        jitter2(rawResults);
-    }
-
-    private void jitter1(Map<String, String> rawResults) throws Exception {
-        JitterAnalysisService jitterAnalysisService = new JitterAnalysisService();
-        List<ContainerHealthReport> list = jitterAnalysisService.analyzeMetrics(rawResults.get("cpu"), rawResults.get("mem"));
-        Map<String, Object> model = new HashMap<>();
-        // 关键:这里的 key "healthReports" 必须与模板中的 <#list healthReports> 匹配
-        model.put("healthReports", list);
-
-        String templatePath = "risk_dashboard.ftl";
-        String htmlContent = renderHtml(templatePath, model);
-        // 后续可以调用 playwright 截图
-        // screenshotService.capture(htmlContent, "container_report.png");
-
-        Path outputPath = Paths.get("/home/reghao/Downloads", "risk_dashboard_" + LocalDate.now() + ".html");
-        if (Files.notExists(outputPath.getParent())) {
-            Files.createDirectories(outputPath.getParent());
-        }
-        Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
-        System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
-    }
-
-    @Data
-    public class InstanceData {
-        private Map<String, List<Double>> cpuSeries = new TreeMap<>();
-        private Map<String, List<Double>> memSeries = new TreeMap<>();
-    }
-
-    private void parseToMap(String json, Map<String, InstanceData> groupedMap,
-                            List<String> timeLabels, boolean isCpu) throws Exception {
-        JsonNode results = objectMapper.readTree(json).path("data").path("result");
-        boolean labelsExtracted = (timeLabels == null);
-
-        for (JsonNode res : results) {
-            String name = res.path("metric").path("name").asText();
-            String instance = res.path("metric").path("instance").asText().split(":")[0];
-
-            InstanceData data = groupedMap.computeIfAbsent(instance, k -> new InstanceData());
-            Map<String, List<Double>> targetSeries = isCpu ? data.getCpuSeries() : data.getMemSeries();
-
-            List<Double> values = new ArrayList<>();
-            for (JsonNode v : res.path("values")) {
-                if (!labelsExtracted) {
-                    String time = Instant.ofEpochSecond(v.get(0).asLong())
-                            .atZone(ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("HH:mm"));
-                    timeLabels.add("'" + time + "'");
-                }
-                values.add(Math.round(v.get(1).asDouble() * 100.0) / 100.0);
-            }
-            labelsExtracted = true;
-            targetSeries.put(name, values);
-        }
-    }
-
-    private void jitter2(Map<String, String> rawResults) throws Exception {
-        Map<String, InstanceData> groupedMap = new TreeMap<>();
-        List<String> timeLabels = new ArrayList<>();
-
-        // 1. 解析 CPU 数据
-        parseToMap(rawResults.get("cpu"), groupedMap, timeLabels, true);
-        // 2. 解析内存数据 (不再重复提取 timeLabels)
-        parseToMap(rawResults.get("mem"), groupedMap, null, false);
-
-        Map<String, Object> model = new HashMap<>();
-        model.put("groupedMap", groupedMap);
-        model.put("timeLabels", String.join(",", timeLabels));
-
-        String templatePath = "jitter.ftl";
-        String htmlContent = renderHtml(templatePath, model);
-        Path outputPath = Paths.get("/home/reghao/Downloads", "jitter_" + LocalDate.now() + ".html");
-        if (Files.notExists(outputPath.getParent())) {
-            Files.createDirectories(outputPath.getParent());
-        }
-        Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
-        System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
-    }
-
     public static void main(String[] args) throws Exception {
     public static void main(String[] args) throws Exception {
-        //PrometheusService prometheusService = new PrometheusService();
-        //prometheusService.generateContainerReport1();
-        //prometheusService.getContainerReportData();
-        //prometheusService.generatePillarReport();
+        PrometheusService prometheusService = new PrometheusService();
+        prometheusService.generateContainerReport1();
+        prometheusService.generateContainerReport();
+        prometheusService.generatePillarReport();
         //prometheusService.generateDailyReport();
         //prometheusService.generateDailyReport();
-        //prometheusService.detect();
     }
     }
 }
 }

+ 19 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/model/AnalysisResult.java

@@ -0,0 +1,19 @@
+package cn.reghao.devops.mgr.ops.srv.mon.model;
+
+import lombok.Data;
+
+/**
+ * @author reghao
+ * @date 2026-04-01 11:23:37
+ */
+@Data
+public class AnalysisResult {
+    private String instance;
+    private String container;
+    private String unit;
+    private double avg;
+    private double max;
+    private double ratio;
+    private String status; // "NORMAL", "WARNING", "CRITICAL"
+    private String message;
+}

+ 21 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/model/CpuThresholdConfig.java

@@ -0,0 +1,21 @@
+package cn.reghao.devops.mgr.ops.srv.mon.model;
+
+/**
+ * @author reghao
+ * @date 2026-04-01 11:41:27
+ */
+public class CpuThresholdConfig {
+    // 统一量纲配置 (单位均为: %)
+
+    // 防零处理底数:低于 1% 的均值按 1% 计算,防止分母过小导致倍数爆炸
+    public static final double UNIFIED_SILENCE = 1.0;
+
+    // 显著性阈值:峰值必须超过 10%,才认为该毛刺具有分析价值
+    public static final double UNIFIED_SIGNIFICANT = 10.0;
+
+    // 毛刺率阈值:峰均比超过 5 倍判定为警告
+    public static final double UNIFIED_RATIO = 5.0;
+
+    // 水位线阈值:均值超过 80% 判定为严重
+    public static final double UNIFIED_THRESHOLD = 80.0;
+}

+ 22 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/model/HostGroup.java

@@ -0,0 +1,22 @@
+package cn.reghao.devops.mgr.ops.srv.mon.model;
+
+import lombok.Data;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author reghao
+ * @date 2026-04-01 14:25:40
+ */
+@Data
+public class HostGroup {
+    private String hostIp;
+    private MetricRecord hostRecord;           // 宿主机指标
+    private List<MetricRecord> containerRecords = new ArrayList<>(); // 下属容器指标
+
+    // 关系分析结果
+    private double systemOverhead;             // 系统隐性损耗 (HostAvg - SumContainerAvg)
+    private double peakCohesion;               // 峰值相关性/共振程度
+    private String relationshipDiagnosis;      // 关系诊断结论
+}

+ 20 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/model/MetricRecord.java

@@ -0,0 +1,20 @@
+package cn.reghao.devops.mgr.ops.srv.mon.model;
+
+import lombok.Data;
+
+/**
+ * @author reghao
+ * @date 2026-04-01 10:12:43
+ */
+@Data
+public class MetricRecord {
+    private String instance; // 实例名
+    private String container; // 容器名
+    private String name;        // 指标名称(如 CPU, Memory)
+    private double avgValue;    // 24h 均值
+    private double maxValue;    // 24h 峰值
+    private double cpuCore;
+    private double cpuLimit;
+    private double relativeToHostAvg;
+    private double relativeToHostMax;
+}

+ 111 - 0
mgr/src/main/resources/templates/host_group_report.ftl

@@ -0,0 +1,111 @@
+<style>
+.report-container { font-family: sans-serif; max-width: 1100px; margin: auto; }
+.card { border: 1px solid #d1d5da; border-radius: 6px; margin-bottom: 20px; box-shadow: 0 1px 3px rgba(0,0,0,0.1); }
+.card-header { background: #24292e; color: white; padding: 12px; display: flex; justify-content: space-between; align-items: center; }
+.header-left { display: flex; flex-direction: column; }
+.header-right { display: flex; gap: 20px; text-align: right; }
+.node-title { font-size: 16px; font-weight: bold; }
+.node-sub { font-size: 11px; opacity: 0.7; margin-top: 2px; }
+.metric-box { display: flex; flex-direction: column; }
+.metric-label { font-size: 10px; text-transform: uppercase; opacity: 0.8; margin-bottom: 2px; }
+.metric-value { font-size: 16px; font-weight: bold; font-family: monospace; }
+
+.usage-wrapper { padding: 15px; background: #f6f8fa; border-bottom: 1px solid #e1e4e8; }
+.bar-container { height: 20px; display: flex; background: #e1e4e8; border-radius: 10px; overflow: hidden; margin: 10px 0; }
+.bar-used { background: #0366d6; }
+.bar-sys { background: #6a737d; }
+
+.table { width: 100%; border-collapse: collapse; table-layout: fixed; }
+.table th { background: #fafbfc; border-bottom: 1px solid #e1e4e8; padding: 10px; text-align: left; font-size: 11px; color: #586069; }
+.table td { padding: 10px; border-bottom: 1px dotted #e1e4e8; font-size: 13px; word-break: break-all; }
+
+.status-tag { padding: 2px 6px; border-radius: 3px; font-size: 11px; font-weight: bold; }
+.tag-critical { background: #ffdce0; color: #cb2431; }
+.tag-normal { background: #dcffe4; color: #28a745; }
+
+/* 强调列样式 */
+.col-host-relative { background-color: #f1f8ff; font-weight: 500; }
+.small-label { font-size: 10px; color: #889; display: block; }
+</style>
+
+<div class="report-container">
+    <h2>🚀 资源关联审计日报 (${date})</h2>
+
+    <#list hostGroupList as group>
+        <div class="card">
+            <div class="card-header">
+                <div class="header-left">
+                    <span class="node-title">🌐 节点: ${group.hostIp}</span>
+                    <span class="node-sub">规格: ${group.hostRecord.cpuCore}核物理 CPU</span>
+                </div>
+                <div class="header-right">
+                    <div class="metric-box">
+                        <span class="metric-label">宿主机均值</span>
+                        <span class="metric-value">${group.hostRecord.avgValue?string("0.0")}%</span>
+                    </div>
+                    <div class="metric-box">
+                        <span class="metric-label">宿主机峰值</span>
+                        <span class="metric-value" style="${(group.hostRecord.maxValue > 80)?string('color:#f97583;','')}">
+${group.hostRecord.maxValue?string("0.0")}%
+                        </span>
+                    </div>
+                </div>
+            </div>
+
+            <div class="usage-wrapper">
+                <div style="font-size: 12px; color: #586069;">
+                    资源成分(均值视角):容器贡献 <b style="color:#0366d6;">■</b>
+                    系统损耗 <b style="color:#6a737d;">■</b>
+                </div>
+                <div class="bar-container">
+                    <#assign containerPart = group.hostRecord.avgValue - group.systemOverhead>
+                    <#if (containerPart < 0)><#assign containerPart = 0></#if>
+                    <div class="bar-used" style="width: ${containerPart}%"></div>
+                    <div class="bar-sys" style="width: ${group.systemOverhead}%"></div>
+                </div>
+                <div style="font-size: 13px;">💡 诊断: ${group.relationshipDiagnosis}</div>
+            </div>
+
+            <table class="table">
+                <thead>
+                    <tr>
+                        <th style="width: 20%;">下属容器</th>
+                        <th style="width: 18%;">自身利用率<br><span style="font-weight:normal;">(相对Limit)</span></th>
+                        <th style="width: 22%;" class="col-host-relative">节点贡献占比<br><span style="font-weight:normal;">(相对宿主机)</span></th>
+                        <th style="width: 10%;">毛刺率</th>
+                        <th style="width: 15%;">状态</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    <#list group.containerRecords as c>
+                        <#assign ratio = c.maxValue / ((c.avgValue < 1)?string(1, c.avgValue)?number)>
+                        <tr>
+                            <td><strong>${c.container}</strong></td>
+                            <#-- 自身利用率列 -->
+                            <td>
+                                <div>均: ${c.avgValue?string("0.0")}%</div>
+                                <div style="color: #666; font-size: 11px;">峰: ${c.maxValue?string("0.0")}%</div>
+                            </td>
+                            <#-- 相对宿主机列 (新增) -->
+                            <td class="col-host-relative">
+                                <div style="color: #0366d6;">均: ${c.relativeToHostAvg?string("0.00")}%</div>
+                                <div style="color: #586069; font-size: 11px;">峰值贡献: ${c.relativeToHostMax?string("0.00")}%</div>
+                            </td>
+                            <#-- 毛刺率 -->
+                            <td style="${(ratio > 5)?string('color:red;font-weight:bold;','')}">${ratio?string("0.1")}x</td>
+                            <td>
+                                <#if (c.maxValue > 90)>
+                                    <span class="status-tag tag-critical">压测/满载</span>
+                                <#elseif (c.relativeToHostAvg > 20)>
+                                    <span class="status-tag tag-critical">资源大户</span>
+                                <#else>
+                                    <span class="status-tag tag-normal">正常</span>
+                                </#if>
+                            </td>
+                        </tr>
+                    </#list>
+                </tbody>
+            </table>
+        </div>
+    </#list>
+</div>

+ 0 - 73
mgr/src/main/resources/templates/jitter.ftl

@@ -1,73 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-<meta charset="UTF-8">
-    <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
-    <style>
-.node-card { background: #fff; border: 1px solid #ddd; margin-bottom: 20px; padding: 15px; border-radius: 8px; }
-.node-title { font-size: 18px; font-weight: bold; color: #333; margin-bottom: 15px; border-left: 5px solid #1890ff; padding-left: 10px; }
-.chart-row { display: flex; gap: 20px; }
-.chart-item { flex: 1; height: 350px; background: #fafafa; padding: 10px; }
-</style>
-</head>
-<body style="background:#f5f5f5; padding: 20px;">
-
-<#list groupedMap?keys as instance>
-    <div class="node-card">
-        <div class="node-title">节点实例: ${instance}</div>
-        <div class="chart-row">
-            <#-- CPU 图表容器 -->
-            <div id="cpu_${instance?replace('.','_')}" class="chart-item"></div>
-            <#-- 内存图表容器 -->
-            <div id="mem_${instance?replace('.','_')}" class="chart-item"></div>
-        </div>
-    </div>
-
-    <script>
-    (function() {
-const timeData = [${timeLabels}];
-        const commonOption = {
-animation: false,
-tooltip: { trigger: 'axis' },
-            legend: { type: 'scroll', bottom: 0 },
-            grid: { top: 40, bottom: 60, left: 50, right: 20 },
-            xAxis: { type: 'category', boundaryGap: false, data: timeData }
-        };
-
-        // 渲染 CPU
-        const cpuChart = echarts.init(document.getElementById('cpu_${instance?replace(".","_")}'));
-        cpuChart.setOption({
-...commonOption,
-title: { text: 'CPU 使用率 (%)', left: 'center' },
-            yAxis: { type: 'value', axisLabel: { formatter: '{value}%' } },
-            series: [
-                <#list groupedMap[instance].cpuSeries?keys as cName>
-                {
-name: '${cName}', type: 'line', smooth: true, symbol: 'none',
-                    data: [${groupedMap[instance].cpuSeries[cName]?join(",")}]
-                }<#if cName_has_next>,</#if>
-                </#list>
-            ]
-        });
-
-        // 渲染内存
-        const memChart = echarts.init(document.getElementById('mem_${instance?replace(".","_")}'));
-        memChart.setOption({
-...commonOption,
-title: { text: '内存占用 (MB)', left: 'center' },
-            yAxis: { type: 'value', axisLabel: { formatter: '{value}M' } },
-            series: [
-                <#list groupedMap[instance].memSeries?keys as cName>
-                {
-name: '${cName}', type: 'line', smooth: true, symbol: 'none',
-                    data: [${groupedMap[instance].memSeries[cName]?join(",")}]
-                }<#if cName_has_next>,</#if>
-                </#list>
-            ]
-        });
-    })();
-    </script>
-</#list>
-
-</body>
-</html>

+ 54 - 0
mgr/src/main/resources/templates/report.ftl

@@ -0,0 +1,54 @@
+<style>
+/* 增加分组样式 */
+.host-group-header { background-color: #f0f7ff !important; font-weight: bold; }
+.container-row { padding-left: 20px; color: #555; }
+.indent { margin-left: 15px; color: #999; }
+</style>
+
+<h2>🚀 运维巡检日报</h2>
+
+<table class="report-table">
+    <thead>
+        <tr>
+            <th>目标对象 (宿主机 > 容器)</th>
+            <th>指标均值</th>
+            <th>指标峰值</th>
+            <th>毛刺率</th>
+            <th>诊断结果</th>
+        </tr>
+    </thead>
+    <tbody>
+        <#-- hostMap 是 Map<String, List<AnalysisResult>> -->
+        <#list hostMap?keys as instance>
+            <#list hostMap[instance] as res>
+                <tr class="${(res.container??)?string('container-row', 'host-group-header')}">
+                    <td>
+                        <#if res.container??>
+                            <span class="indent">└─</span>
+                            <span class="badge">容器</span>
+                            ${res.container}
+                        <#else>
+                            <span class="badge" style="background:#3498db; color:white;">宿主机</span>
+                            <strong>${res.instance}</strong>
+                        </#if>
+                    </td>
+                    <td>${res.avg?string("0.00")}${res.unit}</td>
+                    <td>${res.max?string("0.00")}${res.unit}</td>
+                    <td>${res.ratio?string("0.0")}x</td>
+                    <td>
+                        <#-- 只有异常才显示状态标签,正常的显示文字即可 -->
+                        <#if res.status == "NORMAL">
+                            <span class="status-NORMAL">正常</span>
+                        <#else>
+                            <span class="status-${res.status}">${res.message}</span>
+                        </#if>
+                    </td>
+                </tr>
+            </#list>
+        </#list>
+    </tbody>
+</table>
+
+<#if (hostMap?size == 0)>
+    <p style="color: green; text-align: center; margin-top: 50px;">☕ 今日大吉:未发现任何资源异常节点。</p>
+</#if>

+ 0 - 111
mgr/src/main/resources/templates/risk_dashboard.ftl

@@ -1,111 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-<meta charset="UTF-8">
-    <style>
-.risk-dashboard {
-font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
-background: #fffbe6;
-border: 1px solid #ffe58f;
-padding: 20px;
-border-radius: 8px;
-margin-bottom: 30px;
-}
-.risk-title {
-color: #856404;
-margin-top: 0;
-display: flex;
-align-items: center;
-font-size: 18px;
-border-bottom: 1px solid #ffe58f;
-padding-bottom: 10px;
-margin-bottom: 15px;
-}
-.risk-table {
-width: 100%;
-border-collapse: collapse;
-background: rgba(255, 255, 255, 0.5);
-border-radius: 4px;
-}
-.risk-table th {
-padding: 12px 8px;
-text-align: left;
-border-bottom: 2px solid #ffe58f;
-color: #856404;
-font-size: 14px;
-}
-.risk-table td {
-padding: 12px 8px;
-border-bottom: 1px solid #ffe58f;
-font-size: 13px;
-color: #555;
-vertical-align: top;
-}
-.status-tag {
-display: inline-block;
-padding: 2px 8px;
-border-radius: 4px;
-font-weight: bold;
-}
-.btn-action {
-color: #ffffff;
-background-color: #ff4d4f;
-padding: 4px 8px;
-border-radius: 4px;
-text-decoration: none;
-font-size: 12px;
-}
-</style>
-</head>
-<body>
-
-<div class="risk-dashboard">
-    <div class="risk-title">🚨 24h 容器异常风险看板</div>
-    <table class="risk-table">
-        <thead>
-            <tr>
-                <th>容器信息 (实例IP)</th>
-                <th>CPU 运行状态</th>
-                <th>内存运行状态</th>
-                <th>诊断建议</th>
-            </tr>
-        </thead>
-        <tbody>
-            <#assign hasError = false>
-            <#list healthReports as r>
-                <#-- 只显示非 ✅ 状态的记录 -->
-                <#if !r.cpuStatus?contains("✅") || !r.memStatus?contains("✅")>
-                <#assign hasError = true>
-                <tr>
-                    <td>
-                        <div style="font-weight: bold; color: #333;">${r.containerName}</div>
-                        <div style="font-size: 11px; color: #999;">${r.instanceIp}</div>
-                    </td>
-                    <td>
-                        <div>${r.cpuStatus}</div>
-                        <div style="font-size: 11px; color: #666;">抖动得分: ${r.cpuJitterScore?string("0.00")}</div>
-                    </td>
-                    <td>
-                        <div>${r.memStatus}</div>
-                        <div style="font-size: 11px; color: #666;">24h增长: ${(r.memGrowthRate * 100)?string("0.##")}%</div>
-                    </td>
-                    <td>
-                        <span class="btn-action">查阅日志 / 扩容</span>
-                    </td>
-                </tr>
-                </#if>
-            </#list>
-            
-            <#if !hasError>
-                <tr>
-                    <td colspan="4" style="text-align: center; padding: 30px; color: #52c41a;">
-                        🎉 过去 24 小时内所有容器运行平稳,未检测到显著波动。
-                    </td>
-                </tr>
-            </#if>
-        </tbody>
-    </table>
-</div>
-
-</body>
-</html>