Browse Source

更新 mon 模块

reghao 1 month ago
parent
commit
dc42880f00

+ 1 - 0
mgr/src/main/java/cn/reghao/devops/mgr/config/AppProperties.java

@@ -16,6 +16,7 @@ import org.springframework.context.annotation.Configuration;
 public class AppProperties {
     private String opsRoot;
     private String hostRoot;
+    private String prometheusBaseUrl;
 
     public boolean match() {
         return opsRoot.equals(hostRoot);

+ 15 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/ContainerReportVO.java

@@ -0,0 +1,15 @@
+package cn.reghao.devops.mgr.ops.srv.mon;
+
+import lombok.Data;
+
+import java.util.List;
+
+/**
+ * @author reghao
+ * @date 2026-03-31 16:48:07
+ */
+@Data
+public class ContainerReportVO {
+    private List<String> timeLabels;
+    private List<HostData> instances;
+}

+ 14 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/HostData.java

@@ -0,0 +1,14 @@
+package cn.reghao.devops.mgr.ops.srv.mon;
+
+import lombok.Data;
+
+/**
+ * @author reghao
+ * @date 2026-03-31 16:49:36
+ */
+@Data
+public class HostData {
+    private String name; // 对应 instance
+    private MetricGroup cpu;
+    private MetricGroup mem;
+}

+ 114 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/JitterAnalysisService.java

@@ -0,0 +1,114 @@
+package cn.reghao.devops.mgr.ops.srv.mon;
+
+import cn.reghao.devops.mgr.ops.srv.mon.dto.ContainerHealthReport;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author reghao
+ * @date 2026-03-30 15:59:35
+ */
+@Slf4j
+@Service
+public class JitterAnalysisService {
+    ObjectMapper objectMapper = new ObjectMapper();
+
+    public List<ContainerHealthReport> analyzeMetrics(String cpuJson, String memJson) throws Exception {
+        // 1. 解析原始数据为 Map<Key, List<Double>>
+        Map<String, List<Double>> cpuDataMap = parseRawMatrix(cpuJson);
+        Map<String, List<Double>> memDataMap = parseRawMatrix(memJson);
+
+        List<ContainerHealthReport> reports = new ArrayList<>();
+
+        // 2. 遍历所有容器进行健康诊断
+        for (String key : cpuDataMap.keySet()) {
+            List<Double> cpuValues = cpuDataMap.get(key);
+            List<Double> memValues = memDataMap.getOrDefault(key, new ArrayList<>());
+
+            String[] parts = key.split("@");
+            ContainerHealthReport report = ContainerHealthReport.builder()
+                    .containerName(parts[0])
+                    .instanceIp(parts[1])
+                    .build();
+
+            // --- CPU 诊断逻辑 ---
+            analyzeCpuHealth(report, cpuValues);
+
+            // --- 内存 诊断逻辑 ---
+            analyzeMemHealth(report, memValues);
+
+            reports.add(report);
+        }
+        return reports;
+    }
+
+    private void analyzeCpuHealth(ContainerHealthReport report, List<Double> values) {
+        if (values.isEmpty()) return;
+
+        double avg = values.stream().mapToDouble(v -> v).average().orElse(0);
+        double max = values.stream().mapToDouble(v -> v).max().orElse(0);
+
+        // 计算标准差
+        double variance = values.stream().mapToDouble(v -> Math.pow(v - avg, 2)).average().orElse(0);
+        double stdDev = Math.sqrt(variance);
+        double cv = avg > 0.05 ? stdDev / avg : 0; // 变异系数
+
+        report.setCpuAvg(round(avg));
+        report.setCpuMax(round(max));
+        report.setCpuJitterScore(round(cv));
+
+        // 判定准则
+        if (cv > 0.6) report.setCpuStatus("⚡ 剧烈抖动");
+        else if (avg > 0.8) report.setCpuStatus("🔥 持续高负载");
+        else report.setCpuStatus("✅ 运行平稳");
+    }
+
+    private void analyzeMemHealth(ContainerHealthReport report, List<Double> values) {
+        if (values.size() < 10) return;
+
+        double start = values.get(0);
+        double end = values.get(values.size() - 1);
+        double max = values.stream().mapToDouble(v -> v).max().orElse(0);
+        double growth = (end - start) / (start > 0 ? start : 1);
+
+        report.setMemStart(round(start));
+        report.setMemEnd(round(end));
+        report.setMemMax(round(max));
+        report.setMemGrowthRate(round(growth));
+
+        // 判定准则
+        // 1. 检测内存泄漏:24h 增长超过 20% 且不回落
+        if (growth > 0.2) report.setMemStatus("📈 疑似内存泄漏");
+            // 2. 检测锯齿抖动:通过比较 Max 和 Avg 的差距(简化算法)
+        else if ((max - end) / end > 0.3) report.setMemStatus("🔄 频繁GC (锯齿波动)");
+        else report.setMemStatus("✅ 运行平稳");
+    }
+
+    private Map<String, List<Double>> parseRawMatrix(String json) throws Exception {
+        Map<String, List<Double>> map = new HashMap<>();
+        JsonNode results = objectMapper.readTree(json).path("data").path("result");
+        for (JsonNode res : results) {
+            String name = res.path("metric").path("name").asText();
+            String ip = res.path("metric").path("instance").asText().split(":")[0];
+            String key = name + "@" + ip;
+
+            List<Double> vals = new ArrayList<>();
+            for (JsonNode v : res.path("values")) {
+                vals.add(v.get(1).asDouble());
+            }
+            map.put(key, vals);
+        }
+        return map;
+    }
+
+    private double round(double val) {
+        return Math.round(val * 100.0) / 100.0;
+    }
+}

+ 18 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/MetricGroup.java

@@ -0,0 +1,18 @@
+package cn.reghao.devops.mgr.ops.srv.mon;
+
+import lombok.Data;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author reghao
+ * @date 2026-03-31 16:49:42
+ */
+@Data
+public class MetricGroup {
+    // key 为容器名,value 为时序数据列表
+    private Map<String, List<Double>> today = new HashMap<>();
+    private Map<String, List<Double>> yesterday = new HashMap<>();
+}

+ 7 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/PrometheusAsyncClient.java

@@ -1,5 +1,7 @@
 package cn.reghao.devops.mgr.ops.srv.mon;
 
+import cn.reghao.devops.mgr.ops.srv.mon.dto.ContainerUsageDTO;
+import com.fasterxml.jackson.databind.JsonNode;
 import lombok.extern.slf4j.Slf4j;
 
 import java.net.URI;
@@ -157,6 +159,11 @@ public class PrometheusAsyncClient {
         //String respBody = client.query(promql).join();
 
         String cpuQuery = "1 - avg(irate(node_cpu_seconds_total{mode='idle'}[5m])) by (instance)";
+        cpuQuery = """
+                sum(
+                  irate(container_cpu_usage_seconds_total{name!=""}[5m])
+                ) by (name, instance) * 100
+                """;
         // 1. 计算时间范围:昨天 00:00:00 到 23:59:59
         // 也可以根据需求改为:当前时间向前推 24 小时
         long end = Instant.now().getEpochSecond();

+ 402 - 113
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/PrometheusService.java

@@ -1,10 +1,15 @@
 package cn.reghao.devops.mgr.ops.srv.mon;
 
+import cn.reghao.devops.mgr.config.AppProperties;
+import cn.reghao.devops.mgr.ops.srv.mon.dto.ContainerHealthReport;
+import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.github.benmanes.caffeine.cache.Cache;
 import freemarker.template.Configuration;
 import freemarker.template.Template;
 import freemarker.template.TemplateException;
+import lombok.Data;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.stereotype.Service;
 import org.springframework.ui.freemarker.FreeMarkerTemplateUtils;
@@ -16,12 +21,10 @@ import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
-import java.time.Instant;
-import java.time.LocalDate;
-import java.time.LocalDateTime;
-import java.time.ZoneId;
+import java.time.*;
 import java.time.format.DateTimeFormatter;
 import java.util.*;
+import java.util.concurrent.CompletableFuture;
 
 /**
  * @author reghao
@@ -30,41 +33,26 @@ import java.util.*;
 @Slf4j
 @Service
 public class PrometheusService {
-    private String baseUrl = "http://prometheus.iquizoo.cn";
     private ObjectMapper objectMapper = new ObjectMapper();
-    private final PrometheusAsyncClient promClient = new PrometheusAsyncClient(baseUrl);
+    private final PrometheusAsyncClient promClient;
+    private final Cache<String, Object> cache;
 
-    public OperationReportDTO getAggregatedData() {
-        // 定义 24 小时范围
-        long now = Instant.now().getEpochSecond();
-        Map<String, String> tasks = Map.of(
-                "node_cpu", "100 - (avg by (instance) (irate(node_cpu_seconds_total{mode='idle'}[5m])) * 100)",
-                "node_mem", "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
-                "container_count", "count by (instance) (container_last_seen{image!=''})",
-                "top_cpu_containers", "topk(5, sum by (name, instance) (rate(container_cpu_usage_seconds_total{image!=''}[5m]) * 100))",
-                "cpu_trend", "avg(100 - (irate(node_cpu_seconds_total{mode='idle'}[5m]) * 100))[24h:30m]"
-        );
-
-        // 异步抓取并解析
-        return promClient.fetchAllMetrics(tasks)
-                .thenApply(this::processResults) // 这里的 processResults 就是你之前写的 Jackson 解析逻辑
-                .join();
+    public PrometheusService(AppProperties appProperties, Cache<String, Object> cache) {
+        this.promClient = new PrometheusAsyncClient(appProperties.getPrometheusBaseUrl());
+        this.cache = cache;
     }
 
     public void generateDailyReport() {
         // 定义查询任务
         Map<String, String> tasks = Map.of(
-                "node_cpu", "100 - (avg by (instance) (irate(node_cpu_seconds_total{mode='idle'}[5m])) * 100)",
-                "node_mem", "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
                 "container_count", "count by (instance) (container_last_seen{image!=''})",
-                "top_cpu_containers", "topk(5, sum by (name) (rate(container_cpu_usage_seconds_total{image!=''}[5m]) * 100))",
-                "cpu_trend", "avg(100 - (irate(node_cpu_seconds_total{mode='idle'}[5m]) * 100))[24h:30m]"
+                "top_cpu_containers", "topk(5, sum by (name) (rate(container_cpu_usage_seconds_total{image!=''}[5m]) * 100))"
         );
 
         // 异步执行
         promClient.fetchAllMetrics(tasks).thenAccept(results -> {
             // 在这里解析 JSON 并填充到 DTO
-            OperationReportDTO operationReportDTO = processResults(results);
+            processResults(results);
             System.out.println("所有数据采集完成,开始渲染报表...");
         }).join(); // 如果是在定时任务主线程,可以用 join 等待完成
     }
@@ -224,35 +212,35 @@ public class PrometheusService {
         return FreeMarkerTemplateUtils.processTemplateIntoString(template, root);
     }
 
-    public String generateHtmlReport() throws Exception {
-        // 1. 获取聚合后的数据 DTO
-        OperationReportDTO reportData = getAggregatedData();
-        // 2. 准备 FreeMarker 数据模型 (Root Map)
-        Map<String, Object> root = new HashMap<>();
-        root.put("report", reportData);
-        // 这样在模板中可以使用 ${report.startTime}
-        // 或者为了匹配你之前的模板写法,直接放入 list 和 trend
-        root.put("hostList", reportData.getHostList());
-        root.put("topContainers", reportData.getTopContainers());
-        root.put("timeLabels", reportData.getTimeLabels());
-        root.put("avgCpuTrend", reportData.getAvgCpuTrend());
-        root.put("startTime", reportData.getStartTime());
-        root.put("endTime", reportData.getEndTime());
-        root.put("containerCount", reportData.getContainerCount());
-        String templatePath = "daily_report.ftl";
-
-        String htmlContent = renderHtml(templatePath, root);
-        return htmlContent;
-    }
-
     /**
-     * 获取四大支柱的原始 JSON 数据
+     * 辅助方法:构建 query_range 的完整 URL
      */
-    public Map<String, String> fetchFromPrometheus() {
+    private String buildRangeUrl(String query, long start, long end, String step) {
+        return String.format("/api/v1/query_range?query=%s&start=%d&end=%d&step=%s",
+                URLEncoder.encode(query, StandardCharsets.UTF_8),
+                start,
+                end,
+                step);
+    }
+
+    public void generatePillarReport() throws Exception {
         // 1. 计算时间范围:昨天 00:00:00 到 23:59:59
         // 也可以根据需求改为:当前时间向前推 24 小时
         long end = Instant.now().getEpochSecond();
         long start = end - (24 * 3600);
+        // 1. 获取今天的凌晨 03:00:00 (基于系统默认时区)
+        ZonedDateTime today3AM = LocalDate.now()
+                .atTime(3, 0, 0)
+                .atZone(ZoneId.systemDefault());
+        // 2. 如果当前时间还没到 3 点,LocalDate.now() 拿到的 3 点其实是“未来”,
+        //    为了保证逻辑稳健(拿已经过去的完整 24h),可以加个判断:
+        if (ZonedDateTime.now().isBefore(today3AM)) {
+            today3AM = today3AM.minusDays(1);
+        }
+        // 3. 计算时间戳
+        end = today3AM.toEpochSecond();      // 今天凌晨 03:00:00
+        start = end - (24 * 3600);           // 昨天凌晨 03:00:00
+
         String step = "30m"; // 30分钟一个采样点,适合 24h 趋势图
 
         // 2. 定义 PromQL 查询语句
@@ -269,58 +257,14 @@ public class PrometheusService {
                 "disk", buildRangeUrl(diskQuery, start, end, step),
                 "net", buildRangeUrl(netQuery, start, end, step)
         );
-
         log.info("开始并行抓取 Prometheus 四大支柱数据...");
 
         // 4. 并行执行并阻塞等待结果(join)
-        return promClient.fetchAllMetrics0(tasks).join();
-    }
-
-    /**
-     * 辅助方法:构建 query_range 的完整 URL
-     */
-    private String buildRangeUrl(String query, long start, long end, String step) {
-        return String.format("/api/v1/query_range?query=%s&start=%d&end=%d&step=%s",
-                URLEncoder.encode(query, StandardCharsets.UTF_8),
-                start,
-                end,
-                step);
-    }
-
-    private String buildRangeUrl1(String query, long start, long end, String step) {
-        String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8);
-        return String.format("/api/v1/query_range?query=%s&start=%d&end=%d&step=%s",
-                encodedQuery, start, end, step);
-    }
-
-    /**
-     * 构建 query_range 完整的 URL
-     * @param query  PromQL 语句
-     * @param hours  查询过去多少小时的数据(如 24)
-     * @param step   采样步长(如 "30m", "15m")
-     */
-    public String buildRangeUrl2(String query, int hours, String step) {
-        // 1. 获取当前时间戳(秒)作为结束时间
-        long end = Instant.now().getEpochSecond();
-        // 2. 计算开始时间
-        long start = end - (hours * 3600L);
-
-        // 3. 对 PromQL 进行 URL 编码,防止特殊字符(如 { } [ ] +)导致请求失败
-        String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8);
-
-        // 4. 拼装 Prometheus 标准 API 格式
-        return String.format("/api/v1/query_range?query=%s&start=%d&end=%d&step=%s",
-                encodedQuery, start, end, step);
-    }
+        Map<String, String> rawResults = promClient.fetchAllMetrics0(tasks).join();
 
-    public PillarReportDTO generatePillarReport() throws Exception {
-        // 假设 rawResults 是通过 PrometheusAsyncClient 拿到的 Map<String, String>
-        Map<String, String> rawResults = fetchFromPrometheus();
         PillarReportDTO dto = new PillarReportDTO();
-
         // 设置基础信息
         dto.setReportDate(LocalDate.now().minusDays(1).toString());
-
         // 解析四大指标
         dto.setCpuSeries(parseMatrix(rawResults.get("cpu"), true));
         dto.setMemSeries(parseMatrix(rawResults.get("mem"), true));
@@ -330,7 +274,20 @@ public class PrometheusService {
         // 提取 X 轴标签(取任意一个结果的 values 即可)
         dto.setTimeLabels(extractTimeLabels(rawResults.get("cpu")));
 
-        return dto;
+        // 1. 准备数据模型 (Root Map)
+        // 在模板中可以通过 ${report.reportDate} 或直接 ${reportDate} 访问
+        Map<String, Object> model = new HashMap<>();
+        model.put("report", dto);
+        String templatePath = "pillar_report.ftl";
+        // 3. 渲染 HTML (FreeMarker)
+        String htmlContent = renderHtml(templatePath,  model);
+
+        Path outputPath = Paths.get("/home/reghao/Downloads", "pillar_report_" + LocalDate.now() + ".html");
+        if (Files.notExists(outputPath.getParent())) {
+            Files.createDirectories(outputPath.getParent());
+        }
+        Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
+        System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
     }
 
     private String extractTimeLabels(String json) throws Exception {
@@ -362,35 +319,367 @@ public class PrometheusService {
         return map;
     }
 
-    public String executeFullProcess() throws Exception {
-        PillarReportDTO dto = generatePillarReport();
-        // 3. 渲染 HTML (FreeMarker)
-        String html = generateHtml(dto);
-        return html;
+    private List<String> getCpuTimeLabels(String usageJson) throws JsonProcessingException {
+        List<String> timeLabels = new ArrayList<>();
+        JsonNode results = objectMapper.readTree(usageJson).path("data").path("result");
+        boolean labelsExtracted = false;
+        for (JsonNode res : results) {
+            JsonNode valuesNode = res.path("values");
+            for (JsonNode v : valuesNode) {
+                if (!labelsExtracted) {
+                    String timeLabel = Instant.ofEpochSecond(v.get(0).asLong())
+                            .atZone(ZoneId.systemDefault())
+                            .format(DateTimeFormatter.ofPattern("HH:mm"));
+                    timeLabels.add("'" + timeLabel + "'");
+                }
+            }
+            labelsExtracted = true;
+        }
+
+        return timeLabels;
     }
 
-    public String generateHtml(PillarReportDTO dto) throws Exception {
-        // 1. 准备数据模型 (Root Map)
-        // 在模板中可以通过 ${report.reportDate} 或直接 ${reportDate} 访问
+    private Map<String, Map<String, List<Double>>> parseCpuJson(String usageJson) throws JsonProcessingException {
+        // 结构:Instance -> (ContainerName -> List<Double>)
+        Map<String, Map<String, List<Double>>> groupedMap = new TreeMap<>();
+        List<String> timeLabels = new ArrayList<>();
+        JsonNode results = objectMapper.readTree(usageJson).path("data").path("result");
+        boolean labelsExtracted = false;
+        for (JsonNode res : results) {
+            String containerName = res.path("metric").path("name").asText();
+            String instance = res.path("metric").path("instance").asText().split(":")[0];
+
+            // 获取或创建该节点的容器 Map
+            Map<String, List<Double>> containerMap = groupedMap.computeIfAbsent(instance, k -> new TreeMap<>());
+
+            List<Double> values = new ArrayList<>();
+            JsonNode valuesNode = res.path("values");
+            for (JsonNode v : valuesNode) {
+                if (!labelsExtracted) {
+                    String timeLabel = Instant.ofEpochSecond(v.get(0).asLong())
+                            .atZone(ZoneId.systemDefault())
+                            .format(DateTimeFormatter.ofPattern("HH:mm"));
+                    timeLabels.add("'" + timeLabel + "'");
+                }
+                values.add(Math.round(v.get(1).asDouble() * 100.0) / 100.0);
+            }
+            labelsExtracted = true;
+            containerMap.put(containerName, values);
+        }
+
+        return groupedMap;
+    }
+
+    private Map<String, Map<String, List<Double>>> parseMemJson(String usageJson) throws JsonProcessingException {
+        Map<String, Map<String, List<Double>>> groupedMap = new TreeMap<>();
+        JsonNode results = objectMapper.readTree(usageJson).path("data").path("result");
+        for (JsonNode res : results) {
+            String containerName = res.path("metric").path("name").asText();
+            String instance = res.path("metric").path("instance").asText().split(":")[0];
+
+            Map<String, List<Double>> containerMap = groupedMap.computeIfAbsent(instance, k -> new TreeMap<>());
+
+            List<Double> values = new ArrayList<>();
+            for (JsonNode v : res.path("values")) {
+                // 内存数值:MB
+                values.add(Math.round(v.get(1).asDouble() * 100.0) / 100.0);
+            }
+            containerMap.put(containerName, values);
+        }
+
+        return groupedMap;
+    }
+
+    public void generateContainerReport() throws Exception {
+        // 1. 定义查询语句
+        String cpuQuery = """
+                sum(
+                  irate(container_cpu_usage_seconds_total{name!=""}[5m])
+                ) by (name, instance) * 100
+                """;
+        String memQuery = """
+                sum(container_memory_working_set_bytes{name!=''}) by (name, instance) / 1024 / 1024
+                """;
+
+        // 2. 时间范围计算
+        long end = Instant.now().getEpochSecond();
+        long start = end - (24 * 3600);
+        // 1. 获取今天的凌晨 03:00:00 (基于系统默认时区)
+        ZonedDateTime today3AM = LocalDate.now()
+                .atTime(3, 0, 0)
+                .atZone(ZoneId.systemDefault());
+        // 2. 如果当前时间还没到 3 点,LocalDate.now() 拿到的 3 点其实是“未来”,
+        //    为了保证逻辑稳健(拿已经过去的完整 24h),可以加个判断:
+        if (ZonedDateTime.now().isBefore(today3AM)) {
+            today3AM = today3AM.minusDays(1);
+        }
+        // 3. 计算时间戳
+        end = today3AM.toEpochSecond();      // 今天凌晨 03:00:00
+        start = end - (24 * 3600);           // 昨天凌晨 03:00:00
+
+        String step = "30m";
+
+        // 3. 并行抓取数据(推荐异步 join,提高效率)
+        CompletableFuture<String> cpuFuture = promClient.queryRange(cpuQuery, start, end, step);
+        CompletableFuture<String> memFuture = promClient.queryRange(memQuery, start, end, step);
+        String cpuJson = cpuFuture.join();
+        String memJson = memFuture.join();
+        // 4. 解析数据
+        List<String> timeLabels = getCpuTimeLabels(cpuJson);
+        // 分别解析 CPU 和 内存 到不同的 GroupedMap
+        Map<String, Map<String, List<Double>>> cpuGroupedMap = parseCpuJson(cpuJson);
+        // 内存解析时复用 CPU 的 timeLabels 即可,不再重复抓取标签
+        Map<String, Map<String, List<Double>>> memGroupedMap = parseMemJson(memJson);
+
+        // 5. 构建统一的 Model
         Map<String, Object> model = new HashMap<>();
-        model.put("report", dto);
-        String templatePath = "pillar_report.ftl";
+        model.put("cpuGroupedMap", cpuGroupedMap);
+        model.put("memGroupedMap", memGroupedMap);
+        model.put("timeLabels", String.join(",", timeLabels));
+
+        // 6. 渲染最终的复合模板(左右布局那个)
+        String templatePath = "container_report.ftl";
+        String htmlContent = renderHtml(templatePath, model);
+        Path outputPath = Paths.get("/home/reghao/Downloads", "container_report_" + LocalDate.now() + ".html");
+        if (Files.notExists(outputPath.getParent())) {
+            Files.createDirectories(outputPath.getParent());
+        }
+        Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
+        System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
+    }
 
-        String htmlContent = renderHtml(templatePath,  model);
-        return htmlContent;
+    public void generateContainerReport1() throws Exception {
+        // 1. 定义查询语句 (修正后的 PromQL)
+        String cpuQuery = "sum(irate(container_cpu_usage_seconds_total{name!=''}[5m])) by (name, instance) * 100";
+        String cpuQueryOld = "sum(irate(container_cpu_usage_seconds_total{name!=''}[5m] offset 1d)) by (name, instance) * 100";
+        String memQuery = "sum(container_memory_working_set_bytes{name!=''}) by (name, instance) / 1024 / 1024";
+        String memQueryOld = "sum(container_memory_working_set_bytes{name!='' } offset 1d) by (name, instance) / 1024 / 1024";
+
+        // 2. 时间范围计算 (今日凌晨 03:00)
+        ZonedDateTime today3AM = LocalDate.now().atTime(3, 0, 0).atZone(ZoneId.systemDefault());
+        if (ZonedDateTime.now().isBefore(today3AM)) today3AM = today3AM.minusDays(1);
+
+        long end = today3AM.toEpochSecond();
+        long start = end - (24 * 3600);
+        String step = "30m";
+
+        // 3. 并行抓取
+        CompletableFuture<String> cpuFuture = promClient.queryRange(cpuQuery, start, end, step);
+        CompletableFuture<String> cpuOldFuture = promClient.queryRange(cpuQueryOld, start, end, step);
+        CompletableFuture<String> memFuture = promClient.queryRange(memQuery, start, end, step);
+        CompletableFuture<String> memOldFuture = promClient.queryRange(memQueryOld, start, end, step);
+
+        CompletableFuture.allOf(cpuFuture, cpuOldFuture, memFuture, memOldFuture).join();
+
+        // 4. 解析数据
+        List<String> timeLabels = getCpuTimeLabels(cpuFuture.get());
+        Map<String, Map<String, List<Double>>> cpuToday = parseCpuJson(cpuFuture.get());
+        Map<String, Map<String, List<Double>>> cpuYesterday = parseCpuJson(cpuOldFuture.get());
+        Map<String, Map<String, List<Double>>> memToday = parseMemJson(memFuture.get());
+        Map<String, Map<String, List<Double>>> memYesterday = parseMemJson(memOldFuture.get());
+
+        // 5. 组装 Model (这里的 Key 必须与 FTL 里的变量名严格一致)
+        Map<String, Object> model = new HashMap<>();
+        model.put("cpuToday", cpuToday);
+        model.put("cpuYesterday", cpuYesterday);
+        model.put("memToday", memToday);
+        model.put("memYesterday", memYesterday);
+        model.put("timeLabels", String.join(",", timeLabels));
+
+        // 6. 渲染与输出
+        /*String htmlContent = renderHtml("container_report_v2.ftl", model);
+        Path outputPath = Paths.get("/home/reghao/Downloads", "container_report_v2_" + LocalDate.now() + ".html");
+        Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
+        System.out.println("✅ 报表生成成功: " + outputPath.toAbsolutePath());*/
+        System.out.println();
     }
 
-    public static void main(String[] args) throws Exception {
-        PrometheusService prometheusService = new PrometheusService();
-        //prometheusService.generateDailyReport();
-        String htmlContent = prometheusService.executeFullProcess();
+    public ContainerReportVO getReportData() {
+        String cacheKey = "CONT_REPORT:" + LocalDate.now();
+        // Caffeine 的 get 方法天然支持并发锁,防止击穿
+        Object result = cache.get(cacheKey, key -> {
+            try {
+                return getContainerReportData();
+            } catch (Exception e) {
+                log.error("Failed to generate report", e);
+                return null;
+            }
+        });
+        return (ContainerReportVO) result;
+    }
 
-        //Path outputPath = Paths.get("/home/reghao/Downloads", "daily_report_" + LocalDate.now() + ".html");
-        Path outputPath = Paths.get("/home/reghao/Downloads", "pillar_report_" + LocalDate.now() + ".html");
+    public ContainerReportVO getContainerReportData() throws Exception {
+        // 1. 定义查询语句 (修正后的 PromQL)
+        String cpuQuery = "sum(irate(container_cpu_usage_seconds_total{name=~'.*-prod'}[5m])) by (name, instance) * 100";
+        String cpuQueryOld = "sum(irate(container_cpu_usage_seconds_total{name=~'.*-prod'}[5m] offset 1d)) by (name, instance) * 100";
+        String memQuery = "sum(container_memory_working_set_bytes{name=~'.*-prod'}) by (name, instance) / 1024 / 1024";
+        String memQueryOld = "sum(container_memory_working_set_bytes{name=~'.*-prod' } offset 1d) by (name, instance) / 1024 / 1024";
+
+        // 2. 时间范围计算 (今日凌晨 03:00)
+        ZonedDateTime today3AM = LocalDate.now().atTime(3, 0, 0).atZone(ZoneId.systemDefault());
+        if (ZonedDateTime.now().isBefore(today3AM)) today3AM = today3AM.minusDays(1);
+
+        long end = today3AM.toEpochSecond();
+        long start = end - (24 * 3600);
+        String step = "30m";
+
+        // 3. 并行抓取
+        CompletableFuture<String> cpuFuture = promClient.queryRange(cpuQuery, start, end, step);
+        CompletableFuture<String> cpuOldFuture = promClient.queryRange(cpuQueryOld, start, end, step);
+        CompletableFuture<String> memFuture = promClient.queryRange(memQuery, start, end, step);
+        CompletableFuture<String> memOldFuture = promClient.queryRange(memQueryOld, start, end, step);
+        CompletableFuture.allOf(cpuFuture, cpuOldFuture, memFuture, memOldFuture).join();
+
+        // 4. 解析原始数据 (假设解析出的结构依然是 Map<Instance, Map<Container, List<Double>>>)
+        Map<String, Map<String, List<Double>>> cpuT = parseCpuJson(cpuFuture.get());
+        Map<String, Map<String, List<Double>>> cpuY = parseCpuJson(cpuOldFuture.get());
+        Map<String, Map<String, List<Double>>> memT = parseMemJson(memFuture.get());
+        Map<String, Map<String, List<Double>>> memY = parseMemJson(memOldFuture.get());
+
+        // 5. 核心:按实例(Instance)维度聚合数据
+        // 获取所有出现过的实例名并去重
+        Set<String> allInstanceNames = new HashSet<>();
+        allInstanceNames.addAll(cpuT.keySet());
+        allInstanceNames.addAll(cpuY.keySet());
+        allInstanceNames.addAll(memT.keySet());
+        allInstanceNames.addAll(memY.keySet());
+
+        List<HostData> instanceList = new ArrayList<>();
+
+        for (String instName : allInstanceNames) {
+            HostData instData = new HostData();
+            instData.setName(instName);
+
+            // 组装 CPU 组
+            MetricGroup cpuGroup = new MetricGroup();
+            cpuGroup.setToday(cpuT.getOrDefault(instName, new HashMap<>()));
+            cpuGroup.setYesterday(cpuY.getOrDefault(instName, new HashMap<>()));
+            instData.setCpu(cpuGroup);
+
+            // 组装内存组
+            MetricGroup memGroup = new MetricGroup();
+            memGroup.setToday(memT.getOrDefault(instName, new HashMap<>()));
+            memGroup.setYesterday(memY.getOrDefault(instName, new HashMap<>()));
+            instData.setMem(memGroup);
+
+            instanceList.add(instData);
+        }
+
+        // 6. 返回最终结果
+        ContainerReportVO report = new ContainerReportVO();
+        report.setTimeLabels(getCpuTimeLabels(cpuFuture.get()));
+        report.setInstances(instanceList);
+
+        return report;
+    }
+
+    public void detect() throws Exception {
+        // 1. 计算时间范围:昨天 00:00:00 到 23:59:59
+        // 也可以根据需求改为:当前时间向前推 24 小时
+        long end = Instant.now().getEpochSecond();
+        long start = end - (24 * 3600);
+        String step = "5m"; // 30分钟一个采样点,适合 24h 趋势图
+
+        // 2. 定义 PromQL 查询语句
+        String cpuQuery = "sum(irate(container_cpu_usage_seconds_total{name!=\"\"}[5m])) by (name, instance)";
+        String memQuery = "sum(container_memory_working_set_bytes{name!=\"\"}) by (name, instance) / 1024 / 1024";
+        String diskQuery = "max(rate(node_disk_io_time_seconds_total[5m])) by (instance)";
+        String netQuery = "sum(irate(node_network_receive_bytes_total[5m])) by (instance) / 1024 / 1024";
+
+        // 3. 构造异步任务 Map
+        // 注意:这里调用的是 query_range 接口
+        Map<String, String> tasks = Map.of(
+                "cpu", buildRangeUrl(cpuQuery, start, end, step),
+                "mem", buildRangeUrl(memQuery, start, end, step)
+        );
+        log.info("开始并行抓取 Prometheus 四大支柱数据...");
+
+        // 4. 并行执行并阻塞等待结果(join)
+        Map<String, String> rawResults = promClient.fetchAllMetrics0(tasks).join();
+        jitter2(rawResults);
+    }
+
+    private void jitter1(Map<String, String> rawResults) throws Exception {
+        JitterAnalysisService jitterAnalysisService = new JitterAnalysisService();
+        List<ContainerHealthReport> list = jitterAnalysisService.analyzeMetrics(rawResults.get("cpu"), rawResults.get("mem"));
+        Map<String, Object> model = new HashMap<>();
+        // 关键:这里的 key "healthReports" 必须与模板中的 <#list healthReports> 匹配
+        model.put("healthReports", list);
+
+        String templatePath = "risk_dashboard.ftl";
+        String htmlContent = renderHtml(templatePath, model);
+        // 后续可以调用 playwright 截图
+        // screenshotService.capture(htmlContent, "container_report.png");
+
+        Path outputPath = Paths.get("/home/reghao/Downloads", "risk_dashboard_" + LocalDate.now() + ".html");
         if (Files.notExists(outputPath.getParent())) {
             Files.createDirectories(outputPath.getParent());
         }
         Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
         System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
     }
+
+    @Data
+    public class InstanceData {
+        private Map<String, List<Double>> cpuSeries = new TreeMap<>();
+        private Map<String, List<Double>> memSeries = new TreeMap<>();
+    }
+
+    private void parseToMap(String json, Map<String, InstanceData> groupedMap,
+                            List<String> timeLabels, boolean isCpu) throws Exception {
+        JsonNode results = objectMapper.readTree(json).path("data").path("result");
+        boolean labelsExtracted = (timeLabels == null);
+
+        for (JsonNode res : results) {
+            String name = res.path("metric").path("name").asText();
+            String instance = res.path("metric").path("instance").asText().split(":")[0];
+
+            InstanceData data = groupedMap.computeIfAbsent(instance, k -> new InstanceData());
+            Map<String, List<Double>> targetSeries = isCpu ? data.getCpuSeries() : data.getMemSeries();
+
+            List<Double> values = new ArrayList<>();
+            for (JsonNode v : res.path("values")) {
+                if (!labelsExtracted) {
+                    String time = Instant.ofEpochSecond(v.get(0).asLong())
+                            .atZone(ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("HH:mm"));
+                    timeLabels.add("'" + time + "'");
+                }
+                values.add(Math.round(v.get(1).asDouble() * 100.0) / 100.0);
+            }
+            labelsExtracted = true;
+            targetSeries.put(name, values);
+        }
+    }
+
+    private void jitter2(Map<String, String> rawResults) throws Exception {
+        Map<String, InstanceData> groupedMap = new TreeMap<>();
+        List<String> timeLabels = new ArrayList<>();
+
+        // 1. 解析 CPU 数据
+        parseToMap(rawResults.get("cpu"), groupedMap, timeLabels, true);
+        // 2. 解析内存数据 (不再重复提取 timeLabels)
+        parseToMap(rawResults.get("mem"), groupedMap, null, false);
+
+        Map<String, Object> model = new HashMap<>();
+        model.put("groupedMap", groupedMap);
+        model.put("timeLabels", String.join(",", timeLabels));
+
+        String templatePath = "jitter.ftl";
+        String htmlContent = renderHtml(templatePath, model);
+        Path outputPath = Paths.get("/home/reghao/Downloads", "jitter_" + LocalDate.now() + ".html");
+        if (Files.notExists(outputPath.getParent())) {
+            Files.createDirectories(outputPath.getParent());
+        }
+        Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
+        System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
+    }
+
+    public static void main(String[] args) throws Exception {
+        //PrometheusService prometheusService = new PrometheusService();
+        //prometheusService.generateContainerReport1();
+        //prometheusService.getContainerReportData();
+        //prometheusService.generatePillarReport();
+        //prometheusService.generateDailyReport();
+        //prometheusService.detect();
+    }
 }

+ 28 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/dto/ContainerHealthReport.java

@@ -0,0 +1,28 @@
+package cn.reghao.devops.mgr.ops.srv.mon.dto;
+
+import lombok.Builder;
+import lombok.Data;
+
+/**
+ * @author reghao
+ * @date 2026-03-30 15:59:53
+ */
+@Data
+@Builder
+public class ContainerHealthReport {
+    private String containerName;
+    private String instanceIp;
+
+    // CPU 维度
+    private Double cpuAvg;
+    private Double cpuMax;
+    private Double cpuJitterScore; // 变异系数
+    private String cpuStatus;      // 正常 / 频繁抖动 / 长期高负载
+
+    // 内存维度
+    private Double memStart;       // 24h前数值
+    private Double memEnd;         // 当前数值
+    private Double memMax;
+    private Double memGrowthRate;  // (End - Start) / Start
+    private String memStatus;      // 正常 / 疑似泄漏 / 锯齿形抖动 (GC)
+}

+ 43 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/dto/ContainerUsageDTO.java

@@ -0,0 +1,43 @@
+package cn.reghao.devops.mgr.ops.srv.mon.dto;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * @author reghao
+ * @date 2026-03-30 10:41:39
+ */
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class ContainerUsageDTO {
+    private String containerName;  // 容器名称 (name 标签)
+    private String nodeIp;         // 宿主机 IP (instance 标签)
+
+    // CPU 使用率相关
+    private Double cpuUsage;       // 当前 CPU 使用率 (百分比,如 15.42)
+
+    // CPU 限流相关
+    private Double throttleRatio;  // 限流时间占比 (如 1.62)
+    private String throttleStatus; // 状态描述:正常、警告、严重
+
+    /**
+     * 获取格式化的限流百分比字符串
+     */
+    public String getThrottlePercent() {
+        if (throttleRatio == null) return "0%";
+        return Math.round(throttleRatio * 100) + "%";
+    }
+
+    /**
+     * 根据限流比例自动判定状态颜色
+     */
+    public String getStatusColor() {
+        if (throttleRatio == null || throttleRatio < 0.1) return "#52c41a"; // 绿色
+        if (throttleRatio < 0.5) return "#fa8c16"; // 橙色
+        return "#f5222d"; // 红色
+    }
+}

+ 2 - 1
mgr/src/main/resources/application-dev.yml

@@ -4,4 +4,5 @@ spring:
     username: test
     password: Test@123456
 app:
-  ops-root: /opt/data/devops_data
+  ops-root: /opt/data/devops_data
+  prometheus-base-url: http://prometheus.iquizoo.cn

+ 2 - 1
mgr/src/main/resources/application-test.yml

@@ -4,4 +4,5 @@ spring:
     username: azytest
     password: Azy@123456
 app:
-  ops-root: /opt/data/devops_data
+  ops-root: /opt/data/devops_data
+  prometheus-base-url: http://prometheus.iquizoo.cn

+ 64 - 0
mgr/src/main/resources/templates/container.ftl

@@ -0,0 +1,64 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="UTF-8">
+    <title>Container CPU Report</title>
+    <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
+    <style>
+        body { font-family: sans-serif; padding: 20px; background-color: #f4f7f6; }
+.card { background: white; border-radius: 8px; padding: 20px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
+.chart-container { width: 100%; height: 450px; }
+h2 { color: #2c3e50; border-left: 5px solid #3498db; padding-left: 15px; }
+.card {
+background: #fff;
+border: 1px solid #e8e8e8;
+border-radius: 4px;
+padding: 15px;
+page-break-inside: avoid;
+}
+</style>
+</head>
+<body>
+
+<#-- 遍历每一个节点 -->
+<#list groupedMap?keys as instance>
+<div class="card" style="margin-bottom: 30px;">
+    <h2 style="color: #1890ff; border-bottom: 1px solid #eee; padding-bottom: 10px;">
+        🖥️ 节点实例: ${instance}
+    </h2>
+    <div id="chart_${instance?replace('.', '_')}" style="width: 100%; height: 350px;"></div>
+</div>
+
+<script type="text/javascript">
+(function() {
+var chartDom = document.getElementById('chart_${instance?replace('.', '_')}');
+    var myChart = echarts.init(chartDom);
+    var option = {
+animation: false,
+title: { text: '容器 CPU 消耗趋势 (%)', left: 'center', textStyle: {fontSize: 12} },
+        tooltip: { trigger: 'axis', confine: true },
+        legend: { type: 'scroll', bottom: 0 },
+        grid: { top: 40, left: '3%', right: '4%', bottom: '15%', containLabel: true },
+        xAxis: { type: 'category', boundaryGap: false, data: [${timeLabels}] },
+        yAxis: { type: 'value', min: 0, axisLabel: { formatter: '{value}%' } },
+        series: [
+            <#-- 遍历该节点下的所有容器 -->
+            <#assign containerMap = groupedMap[instance]>
+            <#list containerMap?keys as cName>
+            {
+name: '${cName}',
+                type: 'line',
+                smooth: true,
+                symbol: 'none',
+                data: [${containerMap[cName]?join(",")}]
+            }<#if cName_has_next>,</#if>
+            </#list>
+        ]
+    };
+    myChart.setOption(option);
+})();
+</script>
+</#list>
+
+</body>
+</html>

+ 62 - 0
mgr/src/main/resources/templates/container_mem.ftl

@@ -0,0 +1,62 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="UTF-8">
+    <title>Container CPU Report</title>
+    <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
+    <style>
+        body { font-family: sans-serif; padding: 20px; background-color: #f4f7f6; }
+.card { background: white; border-radius: 8px; padding: 20px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }
+.chart-container { width: 100%; height: 450px; }
+h2 { color: #2c3e50; border-left: 5px solid #3498db; padding-left: 15px; }
+.card {
+background: #fff;
+border: 1px solid #e8e8e8;
+border-radius: 4px;
+padding: 15px;
+page-break-inside: avoid;
+}
+</style>
+</head>
+
+<body>
+<#list memGroupedMap?keys as instance>
+<div class="card" style="margin-top: 20px;">
+    <h2 style="color: #52c41a; border-bottom: 1px solid #eee; padding-bottom: 10px;">
+        内存监控 - 节点: ${instance}
+    </h2>
+    <div id="mem_chart_${instance?replace('.', '_')}" style="width: 100%; height: 350px;"></div>
+</div>
+
+<script type="text/javascript">
+(function() {
+var chartDom = document.getElementById('mem_chart_${instance?replace('.', '_')}');
+    var myChart = echarts.init(chartDom);
+    var option = {
+animation: false,
+title: { text: '容器内存占用 (MB)', left: 'center', textStyle: {fontSize: 12} },
+        tooltip: { trigger: 'axis', confine: true },
+        legend: { type: 'scroll', bottom: 0 },
+        grid: { top: 40, left: '3%', right: '4%', bottom: '15%', containLabel: true },
+        xAxis: { type: 'category', data: [${timeLabels}] },
+        yAxis: { type: 'value', axisLabel: { formatter: '{value} MB' } },
+        series: [
+            <#assign containerMap = memGroupedMap[instance]>
+            <#list containerMap?keys as cName>
+            {
+name: '${cName}',
+                type: 'line',
+                smooth: true,
+                symbol: 'none',
+                data: [${containerMap[cName]?join(",")}]
+            }<#if cName_has_next>,</#if>
+            </#list>
+        ]
+    };
+    myChart.setOption(option);
+})();
+</script>
+</#list>
+
+</body>
+</html>

+ 139 - 0
mgr/src/main/resources/templates/container_report.ftl

@@ -0,0 +1,139 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <title>容器性能监控报告</title>
+    <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
+    <style>
+        body { font-family: "Helvetica Neue", Arial, sans-serif; background-color: #f4f7f9; margin: 0; padding: 20px; color: #333; }
+.header { text-align: center; margin-bottom: 25px; border-bottom: 2px solid #1890ff; padding-bottom: 10px; }
+.header h1 { margin: 0; color: #001529; font-size: 22px; }
+
+.node-card {
+background: #fff; border-radius: 8px; box-shadow: 0 4px 12px rgba(0,0,0,0.05);
+margin-bottom: 25px; padding: 20px; border: 1px solid #e8e8e8;
+page-break-inside: avoid;
+}
+.node-title {
+font-size: 16px; font-weight: bold; color: #1890ff;
+margin-bottom: 15px; display: flex; align-items: center;
+}
+.node-title::before { content: ""; display: inline-block; width: 4px; height: 16px; background: #1890ff; margin-right: 8px; border-radius: 2px; }
+
+.charts-row { display: flex; flex-direction: row; gap: 20px; }
+.chart-box { flex: 1; min-width: 0; }
+.chart-container { width: 100%; height: 350px; }
+
+.footer { text-align: center; color: #8c8c8c; font-size: 12px; margin-top: 30px; border-top: 1px solid #e8e8e8; padding-top: 15px; }
+.no-data { display: flex; align-items: center; justify-content: center; height: 100%; color: #999; font-style: italic; background: #fafafa; border-radius: 4px; }
+</style>
+</head>
+<body>
+
+<div class="header">
+    <h1>📊 容器资源消耗巡检报告 (24h)</h1>
+</div>
+
+<#--
+  后端传入 model 包含:
+  1. cpuGroupedMap: Map<String, Map<String, List<Double>>>
+  2. memGroupedMap: Map<String, Map<String, List<Double>>>
+  3. timeLabels: String (例如 "'00:00','00:30'...")
+-->
+
+<#--
+优化点:使用 cpuGroupedMap 和 memGroupedMap 的并集作为循环对象,防止漏掉只有内存数据的机器
+-->
+<#assign allInstances = (cpuGroupedMap?keys)![] >
+<#list memGroupedMap?keys as mInst>
+<#if !allInstances?seq_contains(mInst)>
+<#assign allInstances = allInstances + [mInst]>
+</#if>
+</#list>
+
+<#list allInstances as instance>
+<div class="node-card">
+    <div class="node-title">节点实例: ${instance}</div>
+
+    <div class="charts-row">
+        <div class="chart-box">
+            <#if (cpuGroupedMap[instance])??>
+                <div id="cpu_${instance?replace('.', '_')}" class="chart-container"></div>
+            <#else>
+                <div class="chart-container"><div class="no-data">暂无 CPU 监控数据</div></div>
+            </#if>
+        </div>
+
+        <div class="chart-box">
+            <#if (memGroupedMap[instance])??>
+                <div id="mem_${instance?replace('.', '_')}" class="chart-container"></div>
+            <#else>
+                <div class="chart-container"><div class="no-data">暂无内存监控数据</div></div>
+            </#if>
+        </div>
+    </div>
+</div>
+
+<script type="text/javascript">
+(function() {
+const timeData = [${timeLabels}];
+    const commonOption = {
+animation: false,
+tooltip: { trigger: 'axis', confine: true, backgroundColor: 'rgba(255, 255, 255, 0.9)' },
+        legend: { type: 'scroll', bottom: 0, itemWidth: 10, textStyle: { fontSize: 10 } },
+        grid: { top: 45, left: '3%', right: '4%', bottom: '15%', containLabel: true },
+        xAxis: { type: 'category', boundaryGap: false, data: timeData, axisLabel: { fontSize: 10, color: '#999' } }
+};
+
+    // --- CPU 渲染 ---
+    <#if (cpuGroupedMap[instance])??>
+    const cpuChart = echarts.init(document.getElementById('cpu_${instance?replace(".", "_")}'));
+    cpuChart.setOption({
+...commonOption,
+title: { text: 'CPU 使用率', left: 'center', textStyle: { fontSize: 13, color: '#555' } },
+yAxis: { type: 'value', axisLabel: { formatter: '{value}%' } },
+        series: [
+            <#list cpuGroupedMap[instance]?keys as cName>
+            {
+name: '${cName}',
+                type: 'line', smooth: true, symbol: 'none',
+                data: [${cpuGroupedMap[instance][cName]?join(",")}]
+            }<#if cName_has_next>,</#if>
+            </#list>
+        ]
+    });
+    </#if>
+
+    // --- 内存 渲染 ---
+    <#if (memGroupedMap[instance])??>
+    const memChart = echarts.init(document.getElementById('mem_${instance?replace(".", "_")}'));
+    memChart.setOption({
+...commonOption,
+title: { text: '内存占用 (MB)', left: 'center', textStyle: { fontSize: 13, color: '#555' } },
+yAxis: { type: 'value', axisLabel: { formatter: '{value} MB' } },
+        series: [
+            <#list memGroupedMap[instance]?keys as cName>
+            {
+name: '${cName}',
+                type: 'line', smooth: true, symbol: 'none',
+                lineStyle: { width: 1.5, type: 'dashed' }, // 虚线区分内存
+                data: [${memGroupedMap[instance][cName]?join(",")}]
+            }<#if cName_has_next>,</#if>
+            </#list>
+        ]
+    });
+    </#if>
+})();
+</script>
+</#list>
+
+<div class="footer">
+    数据源: Prometheus | 采样间隔: 30m | 生成时间: ${.now?string("yyyy-MM-dd HH:mm:ss")}
+</div>
+
+<script>
+    // 标记全部渲染完成,供 Playwright 识别
+    window.isAllRendered = true;
+</script>
+</body>
+</html>

+ 150 - 0
mgr/src/main/resources/templates/container_report_v2.ftl

@@ -0,0 +1,150 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <title>容器性能同比报告</title>
+    <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
+    <style>
+        body { font-family: "Helvetica Neue", Arial, sans-serif; background-color: #f4f7f9; margin: 0; padding: 20px; color: #333; }
+.header { text-align: center; margin-bottom: 25px; border-bottom: 2px solid #1890ff; padding-bottom: 10px; }
+.node-card { background: #fff; border-radius: 8px; box-shadow: 0 4px 12px rgba(0,0,0,0.05); margin-bottom: 25px; padding: 20px; border: 1px solid #e8e8e8; page-break-inside: avoid; }
+.node-title { font-size: 16px; font-weight: bold; color: #1890ff; margin-bottom: 15px; display: flex; align-items: center; }
+.node-title::before { content: ""; display: inline-block; width: 4px; height: 16px; background: #1890ff; margin-right: 8px; border-radius: 2px; }
+.charts-row { display: flex; flex-direction: row; gap: 20px; }
+.chart-box { flex: 1; min-width: 0; }
+.chart-container { width: 100%; height: 400px; }
+.footer { text-align: center; color: #8c8c8c; font-size: 12px; margin-top: 30px; border-top: 1px solid #e8e8e8; padding-top: 15px; }
+</style>
+</head>
+<body>
+
+<div class="header">
+    <h1>📊 容器资源同比巡检报告 (今日 vs 昨日)</h1>
+    <p style="font-size: 12px; color: #666;">实线: 今日 | 虚线: 昨日 | 同一指标颜色保持一致</p>
+</div>
+
+<#-- 1. 计算所有实例的并集 -->
+<#assign todayInsts = (cpuToday?keys)![] >
+<#assign yesterdayInsts = (cpuYesterday?keys)![] >
+<#assign allInstances = todayInsts >
+<#list yesterdayInsts as inst>
+    <#if !allInstances?seq_contains(inst)>
+        <#assign allInstances = allInstances + [inst]>
+    </#if>
+</#list>
+
+<#-- 2. 遍历每个实例生成卡片 -->
+<#list allInstances as instance>
+<div class="node-card">
+    <div class="node-title">节点实例: ${instance}</div>
+    <div class="charts-row">
+        <div class="chart-box">
+            <div id="cpu_${instance?replace('.', '_')}" class="chart-container"></div>
+        </div>
+        <div class="chart-box">
+            <div id="mem_${instance?replace('.', '_')}" class="chart-container"></div>
+        </div>
+    </div>
+</div>
+
+<script type="text/javascript">
+(function() {
+const timeData = [${timeLabels}];
+    const colorPalette = ['#5470c6', '#91cc75', '#fac858', '#ee6666', '#73c0de', '#3ba272', '#fc8452', '#9a60b4', '#ea7ccc'];
+
+    function getColor(name, allNames) {
+let index = allNames.indexOf(name);
+return colorPalette[index % colorPalette.length];
+}
+
+    const commonOption = {
+animation: false,
+tooltip: {
+trigger: 'axis',
+backgroundColor: 'rgba(255, 255, 255, 0.9)',
+formatter: function(params) {
+let res = '<b>' + params[0].name + '</b><br/>';
+params.forEach(item => {
+res += item.marker + item.seriesName + ': ' + item.value + '<br/>';
+});
+                return res;
+            }
+        },
+        legend: { type: 'scroll', bottom: 0, textStyle: { fontSize: 10 } },
+        grid: { top: 45, left: '3%', right: '4%', bottom: '18%', containLabel: true },
+        xAxis: { type: 'category', boundaryGap: false, data: timeData }
+    };
+
+    // --- CPU 渲染逻辑 ---
+    <#assign cpuTMap = (cpuToday[instance])!{} >
+    <#assign cpuYMap = (cpuYesterday[instance])!{} >
+    <#assign cpuContainers = cpuTMap?keys >
+    <#list cpuYMap?keys as cName>
+        <#if !cpuContainers?seq_contains(cName)><#assign cpuContainers = cpuContainers + [cName]></#if>
+    </#list>
+
+    const cpuChart = echarts.init(document.getElementById('cpu_${instance?replace(".", "_")}'));
+    const cpuAllNames = [<#list cpuContainers as c>"${c}"<#if c_has_next>,</#if></#list>];
+    let cpuSeries = [];
+
+    <#list cpuContainers as cName>
+        <#if cpuTMap[cName]??>
+        cpuSeries.push({
+name: '${cName} (今日)', type: 'line', smooth: true, symbol: 'none',
+            itemStyle: { color: getColor('${cName}', cpuAllNames) },
+            lineStyle: { width: 2 },
+            data: [${cpuTMap[cName]?join(",")}]
+        });
+        </#if>
+        <#if cpuYMap[cName]??>
+        cpuSeries.push({
+name: '${cName} (昨日)', type: 'line', smooth: true, symbol: 'none',
+            itemStyle: { color: getColor('${cName}', cpuAllNames) },
+            lineStyle: { type: 'dashed', width: 1.5, opacity: 0.4 },
+            data: [${cpuYMap[cName]?join(",")}]
+        });
+        </#if>
+    </#list>
+    cpuChart.setOption({ ...commonOption, title: { text: 'CPU 同比 (%)', left: 'center' }, yAxis: { type: 'value' }, series: cpuSeries });
+
+    // --- 内存 渲染逻辑 ---
+    <#assign memTMap = (memToday[instance])!{} >
+    <#assign memYMap = (memYesterday[instance])!{} >
+    <#assign memContainers = memTMap?keys >
+    <#list memYMap?keys as cName>
+        <#if !memContainers?seq_contains(cName)><#assign memContainers = memContainers + [cName]></#if>
+    </#list>
+
+    const memChart = echarts.init(document.getElementById('mem_${instance?replace(".", "_")}'));
+    const memAllNames = [<#list memContainers as c>"${c}"<#if c_has_next>,</#if></#list>];
+    let memSeries = [];
+
+    <#list memContainers as cName>
+        <#if memTMap[cName]??>
+        memSeries.push({
+name: '${cName} (今日)', type: 'line', smooth: true, symbol: 'none',
+            itemStyle: { color: getColor('${cName}', memAllNames) },
+            lineStyle: { width: 2 },
+            data: [${memTMap[cName]?join(",")}]
+        });
+        </#if>
+        <#if memYMap[cName]??>
+        memSeries.push({
+name: '${cName} (昨日)', type: 'line', smooth: true, symbol: 'none',
+            itemStyle: { color: getColor('${cName}', memAllNames) },
+            lineStyle: { type: 'dashed', width: 1.5, opacity: 0.4 },
+            data: [${memYMap[cName]?join(",")}]
+        });
+        </#if>
+    </#list>
+    memChart.setOption({ ...commonOption, title: { text: '内存同比 (MB)', left: 'center' }, yAxis: { type: 'value' }, series: memSeries });
+})();
+</script>
+</#list>
+
+<div class="footer">
+    数据源: Prometheus | 采样步长: 30m | 生成时间: ${.now?string("yyyy-MM-dd HH:mm:ss")}
+</div>
+<script>window.isAllRendered = true;</script>
+</body>
+</html>

+ 73 - 0
mgr/src/main/resources/templates/jitter.ftl

@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="UTF-8">
+    <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
+    <style>
+.node-card { background: #fff; border: 1px solid #ddd; margin-bottom: 20px; padding: 15px; border-radius: 8px; }
+.node-title { font-size: 18px; font-weight: bold; color: #333; margin-bottom: 15px; border-left: 5px solid #1890ff; padding-left: 10px; }
+.chart-row { display: flex; gap: 20px; }
+.chart-item { flex: 1; height: 350px; background: #fafafa; padding: 10px; }
+</style>
+</head>
+<body style="background:#f5f5f5; padding: 20px;">
+
+<#list groupedMap?keys as instance>
+    <div class="node-card">
+        <div class="node-title">节点实例: ${instance}</div>
+        <div class="chart-row">
+            <#-- CPU 图表容器 -->
+            <div id="cpu_${instance?replace('.','_')}" class="chart-item"></div>
+            <#-- 内存图表容器 -->
+            <div id="mem_${instance?replace('.','_')}" class="chart-item"></div>
+        </div>
+    </div>
+
+    <script>
+    (function() {
+const timeData = [${timeLabels}];
+        const commonOption = {
+animation: false,
+tooltip: { trigger: 'axis' },
+            legend: { type: 'scroll', bottom: 0 },
+            grid: { top: 40, bottom: 60, left: 50, right: 20 },
+            xAxis: { type: 'category', boundaryGap: false, data: timeData }
+        };
+
+        // 渲染 CPU
+        const cpuChart = echarts.init(document.getElementById('cpu_${instance?replace(".","_")}'));
+        cpuChart.setOption({
+...commonOption,
+title: { text: 'CPU 使用率 (%)', left: 'center' },
+            yAxis: { type: 'value', axisLabel: { formatter: '{value}%' } },
+            series: [
+                <#list groupedMap[instance].cpuSeries?keys as cName>
+                {
+name: '${cName}', type: 'line', smooth: true, symbol: 'none',
+                    data: [${groupedMap[instance].cpuSeries[cName]?join(",")}]
+                }<#if cName_has_next>,</#if>
+                </#list>
+            ]
+        });
+
+        // 渲染内存
+        const memChart = echarts.init(document.getElementById('mem_${instance?replace(".","_")}'));
+        memChart.setOption({
+...commonOption,
+title: { text: '内存占用 (MB)', left: 'center' },
+            yAxis: { type: 'value', axisLabel: { formatter: '{value}M' } },
+            series: [
+                <#list groupedMap[instance].memSeries?keys as cName>
+                {
+name: '${cName}', type: 'line', smooth: true, symbol: 'none',
+                    data: [${groupedMap[instance].memSeries[cName]?join(",")}]
+                }<#if cName_has_next>,</#if>
+                </#list>
+            ]
+        });
+    })();
+    </script>
+</#list>
+
+</body>
+</html>

+ 111 - 0
mgr/src/main/resources/templates/risk_dashboard.ftl

@@ -0,0 +1,111 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta charset="UTF-8">
+    <style>
+.risk-dashboard {
+font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
+background: #fffbe6;
+border: 1px solid #ffe58f;
+padding: 20px;
+border-radius: 8px;
+margin-bottom: 30px;
+}
+.risk-title {
+color: #856404;
+margin-top: 0;
+display: flex;
+align-items: center;
+font-size: 18px;
+border-bottom: 1px solid #ffe58f;
+padding-bottom: 10px;
+margin-bottom: 15px;
+}
+.risk-table {
+width: 100%;
+border-collapse: collapse;
+background: rgba(255, 255, 255, 0.5);
+border-radius: 4px;
+}
+.risk-table th {
+padding: 12px 8px;
+text-align: left;
+border-bottom: 2px solid #ffe58f;
+color: #856404;
+font-size: 14px;
+}
+.risk-table td {
+padding: 12px 8px;
+border-bottom: 1px solid #ffe58f;
+font-size: 13px;
+color: #555;
+vertical-align: top;
+}
+.status-tag {
+display: inline-block;
+padding: 2px 8px;
+border-radius: 4px;
+font-weight: bold;
+}
+.btn-action {
+color: #ffffff;
+background-color: #ff4d4f;
+padding: 4px 8px;
+border-radius: 4px;
+text-decoration: none;
+font-size: 12px;
+}
+</style>
+</head>
+<body>
+
+<div class="risk-dashboard">
+    <div class="risk-title">🚨 24h 容器异常风险看板</div>
+    <table class="risk-table">
+        <thead>
+            <tr>
+                <th>容器信息 (实例IP)</th>
+                <th>CPU 运行状态</th>
+                <th>内存运行状态</th>
+                <th>诊断建议</th>
+            </tr>
+        </thead>
+        <tbody>
+            <#assign hasError = false>
+            <#list healthReports as r>
+                <#-- 只显示非 ✅ 状态的记录 -->
+                <#if !r.cpuStatus?contains("✅") || !r.memStatus?contains("✅")>
+                <#assign hasError = true>
+                <tr>
+                    <td>
+                        <div style="font-weight: bold; color: #333;">${r.containerName}</div>
+                        <div style="font-size: 11px; color: #999;">${r.instanceIp}</div>
+                    </td>
+                    <td>
+                        <div>${r.cpuStatus}</div>
+                        <div style="font-size: 11px; color: #666;">抖动得分: ${r.cpuJitterScore?string("0.00")}</div>
+                    </td>
+                    <td>
+                        <div>${r.memStatus}</div>
+                        <div style="font-size: 11px; color: #666;">24h增长: ${(r.memGrowthRate * 100)?string("0.##")}%</div>
+                    </td>
+                    <td>
+                        <span class="btn-action">查阅日志 / 扩容</span>
+                    </td>
+                </tr>
+                </#if>
+            </#list>
+            
+            <#if !hasError>
+                <tr>
+                    <td colspan="4" style="text-align: center; padding: 30px; color: #52c41a;">
+                        🎉 过去 24 小时内所有容器运行平稳,未检测到显著波动。
+                    </td>
+                </tr>
+            </#if>
+        </tbody>
+    </table>
+</div>
+
+</body>
+</html>