Переглянути джерело

从 Prometheus 中获取数据生成运维报告

reghao 2 днів тому
батько
коміт
fd5b908278

+ 15 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/ContainerInfo.java

@@ -0,0 +1,15 @@
+package cn.reghao.devops.mgr.ops.srv.mon;
+
+import lombok.Data;
+
+/**
+ * @author reghao
+ * @date 2026-03-28 01:21:40
+ */
+@Data
+public class ContainerInfo {
+    private String name;
+    private String hostIp;
+    private double cpu;
+    private double mem;
+}

+ 23 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/HostInfo.java

@@ -0,0 +1,23 @@
+package cn.reghao.devops.mgr.ops.srv.mon;
+
+import lombok.Data;
+
+/**
+ * @author reghao
+ * @date 2026-03-28 01:22:04
+ */
+@Data
+public class HostInfo {
+    private String name;
+    private String ip;
+    private double cpuUsage;
+    private double memUsage;
+    private int containerCount;
+
+    // 根据状态获取颜色标识,供 FreeMarker 使用
+    public String getStatusColor() {
+        if (cpuUsage > 85 || memUsage > 90) return "crit";
+        if (cpuUsage > 70 || memUsage > 75) return "warn";
+        return "ok";
+    }
+}

+ 27 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/OperationReportDTO.java

@@ -0,0 +1,27 @@
+package cn.reghao.devops.mgr.ops.srv.mon;
+
+import lombok.Data;
+
+import java.util.List;
+
+/**
+ * @author reghao
+ * @date 2026-03-28 01:21:21
+ */
+@Data
+public class OperationReportDTO {
+    // 基础信息
+    private String startTime;
+    private String endTime;
+    private int containerCount;
+
+    // 宿主机列表 (10条)
+    private List<HostInfo> hostList;
+
+    // 容器排行 (Top 5)
+    private List<ContainerInfo> topContainers;
+
+    // 图表数据
+    private String timeLabels;    // "00:00, 04:00..."
+    private String avgCpuTrend;   // "10, 15, 40..."
+}

+ 27 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/PillarReportDTO.java

@@ -0,0 +1,27 @@
+package cn.reghao.devops.mgr.ops.srv.mon;
+
+import lombok.Data;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author reghao
+ * @date 2026-03-28 02:12:58
+ */
+@Data
+public class PillarReportDTO {
+    private String reportDate;      // 报告日期
+    private String startTime;       // 统计起始时间
+    private String endTime;         // 统计结束时间
+    private String timeLabels;      // ECharts X轴: "'00:00','00:30'..."
+
+    // Key: Instance IP, Value: 数据序列 [0.1, 0.5, ...]
+    private Map<String, List<Double>> cpuSeries = new HashMap<>();
+    private Map<String, List<Double>> memSeries = new HashMap<>();
+    private Map<String, List<Double>> diskSeries = new HashMap<>();
+    private Map<String, List<Double>> netSeries = new HashMap<>();
+
+    private String statusSummary = "系统整体运行平稳,未发现超标异常。";
+}

+ 113 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/PrometheusAsyncClient.java

@@ -0,0 +1,113 @@
+package cn.reghao.devops.mgr.ops.srv.mon;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.net.URI;
+import java.net.URLEncoder;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.time.Duration;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.stream.Collectors;
+
+/**
+ * @author reghao
+ * @date 2026-03-28 01:16:07
+ */
+@Slf4j
+public class PrometheusAsyncClient {
+    private final HttpClient httpClient;
+    private final String prometheusBaseUrl;
+
+    public PrometheusAsyncClient(String baseUrl) {
+        this.prometheusBaseUrl = baseUrl;
+        // JDK 17 HttpClient 配置
+        this.httpClient = HttpClient.newBuilder()
+                .connectTimeout(Duration.ofSeconds(5))
+                .build();
+    }
+
+    /**
+     * 异步批量执行 PromQL
+     * @param queries Key 为指标别名,Value 为 PromQL 语句
+     */
+    public CompletableFuture<Map<String, String>> fetchAllMetrics(Map<String, String> queries) {
+        List<CompletableFuture<Map.Entry<String, String>>> futures = queries.entrySet().stream()
+                .map(entry -> fetchSingleMetric(entry.getKey(), entry.getValue()))
+                .toList();
+
+        // 等待所有请求完成
+        return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]))
+                .thenApply(v -> futures.stream()
+                        .map(CompletableFuture::join)
+                        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
+    }
+
+    private CompletableFuture<Map.Entry<String, String>> fetchSingleMetric(String alias, String query) {
+        String encodedUrl0 = prometheusBaseUrl + "/api/v1/query?query=" +
+                URLEncoder.encode(query, java.nio.charset.StandardCharsets.UTF_8);
+        String encodedUrl = prometheusBaseUrl + URLEncoder.encode(query, java.nio.charset.StandardCharsets.UTF_8);
+
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(encodedUrl))
+                .GET()
+                .build();
+
+        // 异步发送并处理响应
+        return httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString())
+                .thenApply(response -> {
+                    if (response.statusCode() == 200) {
+                        return Map.entry(alias, response.body());
+                    } else {
+                        throw new RuntimeException("Prometheus error: " + response.statusCode());
+                    }
+                })
+                .exceptionally(ex -> Map.entry(alias, "{\"status\":\"error\",\"message\":\"" + ex.getMessage() + "\"}"));
+    }
+
+    /**
+     * 并行抓取所有指标
+     * @param tasks Map<任务别名, 相对路径包含Query参数>
+     * @return CompletableFuture<Map<任务别名, JSON响应字符串>>
+     */
+    public CompletableFuture<Map<String, String>> fetchAllMetrics0(Map<String, String> tasks) {
+        log.info("开始异步抓取 Prometheus 指标,任务数: {}", tasks.size());
+
+        // 1. 将每个任务转换为 CompletableFuture
+        List<CompletableFuture<Map.Entry<String, String>>> futures = tasks.entrySet().stream()
+                .map(this::fetchSingleMetric0)
+                .toList();
+
+        // 2. 使用 CompletableFuture.allOf 等待所有请求完成
+        return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]))
+                .thenApply(v -> futures.stream()
+                        .map(CompletableFuture::join) // 此时 join 不会阻塞,因为 allOf 已完成
+                        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))
+                );
+    }
+
+    private CompletableFuture<Map.Entry<String, String>> fetchSingleMetric0(Map.Entry<String, String> entry) {
+        String url = prometheusBaseUrl + entry.getValue();
+        HttpRequest request = HttpRequest.newBuilder()
+                .uri(URI.create(url))
+                .timeout(Duration.ofSeconds(20))
+                .GET()
+                .build();
+
+        return httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString())
+                .thenApply(response -> {
+                    if (response.statusCode() != 200) {
+                        log.error("Prometheus 请求失败: {}, 状态码: {}", url, response.statusCode());
+                        return Map.entry(entry.getKey(), "{}"); // 返回空 JSON 防止中断
+                    }
+                    return Map.entry(entry.getKey(), response.body());
+                })
+                .exceptionally(ex -> {
+                    log.error("请求抛出异常: {}", url, ex);
+                    return Map.entry(entry.getKey(), "{}");
+                });
+    }
+}

+ 399 - 0
mgr/src/main/java/cn/reghao/devops/mgr/ops/srv/mon/PrometheusService.java

@@ -0,0 +1,399 @@
+package cn.reghao.devops.mgr.ops.srv.mon;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import freemarker.template.Template;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+import org.springframework.ui.freemarker.FreeMarkerTemplateUtils;
+import org.springframework.web.servlet.view.freemarker.FreeMarkerConfigurer;
+
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+
+/**
+ * @author reghao
+ * @date 2026-03-28 01:15:05
+ */
+@Slf4j
+@Service
+public class PrometheusService {
+    private String baseUrl = "http://prometheus.reghao.cn";
+    private ObjectMapper objectMapper = new ObjectMapper();
+    private final PrometheusAsyncClient promClient = new PrometheusAsyncClient(baseUrl);
+
+    public OperationReportDTO getAggregatedData() {
+        // 定义 24 小时范围
+        long now = Instant.now().getEpochSecond();
+        Map<String, String> tasks = Map.of(
+                "node_cpu", "100 - (avg by (instance) (irate(node_cpu_seconds_total{mode='idle'}[5m])) * 100)",
+                "node_mem", "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
+                "container_count", "count by (instance) (container_last_seen{image!=''})",
+                "top_cpu_containers", "topk(5, sum by (name, instance) (rate(container_cpu_usage_seconds_total{image!=''}[5m]) * 100))",
+                "cpu_trend", "avg(100 - (irate(node_cpu_seconds_total{mode='idle'}[5m]) * 100))[24h:30m]"
+        );
+
+        // 异步抓取并解析
+        return promClient.fetchAllMetrics(tasks)
+                .thenApply(this::processResults) // 这里的 processResults 就是你之前写的 Jackson 解析逻辑
+                .join();
+    }
+
+    public void generateDailyReport() {
+        // 定义查询任务
+        Map<String, String> tasks = Map.of(
+                "node_cpu", "100 - (avg by (instance) (irate(node_cpu_seconds_total{mode='idle'}[5m])) * 100)",
+                "node_mem", "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
+                "container_count", "count by (instance) (container_last_seen{image!=''})",
+                "top_cpu_containers", "topk(5, sum by (name) (rate(container_cpu_usage_seconds_total{image!=''}[5m]) * 100))",
+                "cpu_trend", "avg(100 - (irate(node_cpu_seconds_total{mode='idle'}[5m]) * 100))[24h:30m]"
+        );
+
+        // 异步执行
+        promClient.fetchAllMetrics(tasks).thenAccept(results -> {
+            // 在这里解析 JSON 并填充到 DTO
+            OperationReportDTO operationReportDTO = processResults(results);
+            System.out.println("所有数据采集完成,开始渲染报表...");
+        }).join(); // 如果是在定时任务主线程,可以用 join 等待完成
+    }
+
+    private OperationReportDTO processResults(Map<String, String> results) {
+        OperationReportDTO report = new OperationReportDTO();
+        Map<String, HostInfo> hostMap = new HashMap<>();
+
+        try {
+            // 1. 解析 CPU 指标
+            if (results.containsKey("node_cpu")) {
+                parseToHostMap(results.get("node_cpu"), hostMap, "cpu");
+            }
+
+            // 2. 填充内存数据
+            if (results.containsKey("node_mem")) {
+                parseToHostMap(results.get("node_mem"), hostMap, "mem");
+            }
+
+            // 3. 填充容器数量
+            if (results.containsKey("container_count")) {
+                parseToHostMap(results.get("container_count"), hostMap, "count");
+            }
+
+            // 4. 解析 Top 5 容器排行 (注意这里 Key 的对齐)
+            // 修正:使用 generateDailyReport 中定义的 "top_cpu_containers"
+            String topCpuJson = results.get("top_cpu_containers");
+            if (topCpuJson != null) {
+                List<ContainerInfo> topContainers = parseTopContainers(topCpuJson);
+                report.setTopContainers(topContainers);
+            }
+
+            report.setHostList(new ArrayList<>(hostMap.values()));
+            // 这里建议加上容器总数统计
+            report.setContainerCount(report.getTopContainers() != null ? report.getTopContainers().size() : 0);
+
+            report.setStartTime(LocalDateTime.now().minusDays(1).format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")));
+            report.setEndTime(LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")));
+
+            // 解析趋势数据
+            if (results.containsKey("cpu_trend")) {
+                parseTrendData(results.get("cpu_trend"), report);
+            }
+        } catch (Exception e) {
+            // log.error("JSON 解析失败", e); // 确保 log 对象已定义,或使用 System.err
+            e.printStackTrace();
+        }
+        return report;
+    }
+
+    private void parseToHostMap(String json, Map<String, HostInfo> hostMap, String type) throws Exception {
+        JsonNode root = objectMapper.readTree(json);
+        JsonNode resultList = root.path("data").path("result");
+
+        for (JsonNode node : resultList) {
+            // 关键点:提取 IP (例如从 192.168.1.10:9100 提取 192.168.1.10)
+            String rawInstance = node.path("metric").path("instance").asText();
+            String ip = rawInstance.contains(":") ? rawInstance.split(":")[0] : rawInstance;
+
+            double value = node.path("value").get(1).asDouble();
+
+            // 如果 Map 里没有该 IP,则新建
+            HostInfo host = hostMap.computeIfAbsent(ip, k -> {
+                HostInfo h = new HostInfo();
+                h.setName(k);
+                h.setIp(k);
+                return h;
+            });
+
+            // 根据类型赋值
+            switch (type) {
+                case "cpu" -> host.setCpuUsage(formatDouble(value));
+                case "mem" -> host.setMemUsage(formatDouble(value));
+                case "count" -> host.setContainerCount((int) value);
+            }
+        }
+    }
+
+    private List<ContainerInfo> parseTopContainers(String json) throws Exception {
+        List<ContainerInfo> list = new ArrayList<>();
+        JsonNode resultList = objectMapper.readTree(json).path("data").path("result");
+
+        for (JsonNode node : resultList) {
+            ContainerInfo c = new ContainerInfo();
+            c.setName(node.path("metric").path("name").asText());
+            c.setHostIp(node.path("metric").path("instance").asText().split(":")[0]);
+            c.setCpu(formatDouble(node.path("value").get(1).asDouble()));
+            list.add(c);
+        }
+        return list;
+    }
+
+    private double formatDouble(double val) {
+        return Math.round(val * 10.0) / 10.0; // 保留一位小数
+    }
+
+    private void parseTrendData(String json, OperationReportDTO report) {
+        try {
+            JsonNode root = objectMapper.readTree(json);
+            // 趋势数据在 data.result[0].values 中
+            JsonNode valuesNode = root.path("data").path("result").get(0).path("values");
+
+            List<String> labels = new ArrayList<>();
+            List<String> trends = new ArrayList<>();
+            DateTimeFormatter formatter = DateTimeFormatter.ofPattern("HH:mm");
+
+            for (JsonNode node : valuesNode) {
+                // node 是一个数组: [1672531200, "15.5"]
+                long timestamp = node.get(0).asLong();
+                double value = node.get(1).asDouble();
+
+                // 转换时间戳为 HH:mm 格式
+                String timeLabel = LocalDateTime.ofInstant(Instant.ofEpochSecond(timestamp), ZoneId.systemDefault())
+                        .format(formatter);
+
+                labels.add("'" + timeLabel + "'"); // 加引号是为了符合 JS 数组格式
+                trends.add(String.valueOf(formatDouble(value)));
+            }
+
+            // 将 List 转为逗号分隔的字符串,直接交给 FreeMarker 渲染进 JS 数组
+            report.setTimeLabels(String.join(",", labels));
+            report.setAvgCpuTrend(String.join(",", trends));
+
+        } catch (Exception e) {
+            log.error("趋势数据解析失败", e);
+        }
+    }
+
+    public static FreeMarkerConfigurer createConfigurer() {
+        FreeMarkerConfigurer configurer = new FreeMarkerConfigurer();
+
+        // 1. 设置模板存放路径 (通常在 resources/templates 下)
+        configurer.setTemplateLoaderPath("classpath:/templates/");
+
+        // 2. 设置默认编码
+        configurer.setDefaultEncoding("UTF-8");
+
+        // 3. 配置 FreeMarker 的原生属性
+        Properties settings = new Properties();
+        settings.setProperty("template_update_delay", "0"); // 检查模板更新延迟
+        settings.setProperty("default_encoding", "UTF-8");
+        settings.setProperty("number_format", "0.##");      // 数字格式化,防止 1000 变 1,000
+        settings.setProperty("datetime_format", "yyyy-MM-dd HH:mm:ss");
+        configurer.setFreemarkerSettings(settings);
+
+        try {
+            // 重要:必须调用此方法来初始化内部的 Configuration 对象
+            configurer.afterPropertiesSet();
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+
+        return configurer;
+    }
+
+    /**
+     * 生成最终的 HTML 字符串
+     */
+    public String generateHtmlReport() throws Exception {
+        // 1. 获取聚合后的数据 DTO
+        OperationReportDTO reportData = getAggregatedData();
+
+        // 2. 准备 FreeMarker 数据模型 (Root Map)
+        Map<String, Object> root = new HashMap<>();
+        root.put("report", reportData);
+        // 这样在模板中可以使用 ${report.startTime}
+        // 或者为了匹配你之前的模板写法,直接放入 list 和 trend
+        root.put("hostList", reportData.getHostList());
+        root.put("topContainers", reportData.getTopContainers());
+        root.put("timeLabels", reportData.getTimeLabels());
+        root.put("avgCpuTrend", reportData.getAvgCpuTrend());
+        root.put("startTime", reportData.getStartTime());
+        root.put("endTime", reportData.getEndTime());
+        root.put("containerCount", reportData.getContainerCount());
+
+        // 3. 加载模板 (确保文件位于 src/main/resources/templates/daily_report.ftl)
+        Template template = createConfigurer().getConfiguration().getTemplate("daily_report.ftl");
+        // 4. 合并数据与模板生成字符串
+        return FreeMarkerTemplateUtils.processTemplateIntoString(template, root);
+    }
+
+    /**
+     * 获取四大支柱的原始 JSON 数据
+     */
+    public Map<String, String> fetchFromPrometheus() {
+        // 1. 计算时间范围:昨天 00:00:00 到 23:59:59
+        // 也可以根据需求改为:当前时间向前推 24 小时
+        long now = Instant.now().getEpochSecond();
+        long start = now - (24 * 3600);
+        long end = now;
+        String step = "30m"; // 30分钟一个采样点,适合 24h 趋势图
+
+        // 2. 定义 PromQL 查询语句
+        String cpuQuery = "1 - avg(irate(node_cpu_seconds_total{mode='idle'}[5m])) by (instance)";
+        String memQuery = "1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)";
+        String diskQuery = "max(rate(node_disk_io_time_seconds_total[5m])) by (instance)";
+        String netQuery = "sum(irate(node_network_receive_bytes_total[5m])) by (instance) / 1024 / 1024";
+
+        // 3. 构造异步任务 Map
+        // 注意:这里调用的是 query_range 接口
+        Map<String, String> tasks = Map.of(
+                "cpu", buildRangeUrl(cpuQuery, start, end, step),
+                "mem", buildRangeUrl(memQuery, start, end, step),
+                "disk", buildRangeUrl(diskQuery, start, end, step),
+                "net", buildRangeUrl(netQuery, start, end, step)
+        );
+
+        log.info("开始并行抓取 Prometheus 四大支柱数据...");
+
+        // 4. 并行执行并阻塞等待结果(join)
+        return promClient.fetchAllMetrics0(tasks).join();
+    }
+
+    /**
+     * 辅助方法:构建 query_range 的完整 URL
+     */
+    private String buildRangeUrl(String query, long start, long end, String step) {
+        return String.format("/api/v1/query_range?query=%s&start=%d&end=%d&step=%s",
+                URLEncoder.encode(query, StandardCharsets.UTF_8),
+                start,
+                end,
+                step);
+    }
+
+    private String buildRangeUrl1(String query, long start, long end, String step) {
+        String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8);
+        return String.format("/api/v1/query_range?query=%s&start=%d&end=%d&step=%s",
+                encodedQuery, start, end, step);
+    }
+
+    /**
+     * 构建 query_range 完整的 URL
+     * @param query  PromQL 语句
+     * @param hours  查询过去多少小时的数据(如 24)
+     * @param step   采样步长(如 "30m", "15m")
+     */
+    public String buildRangeUrl2(String query, int hours, String step) {
+        // 1. 获取当前时间戳(秒)作为结束时间
+        long end = Instant.now().getEpochSecond();
+        // 2. 计算开始时间
+        long start = end - (hours * 3600L);
+
+        // 3. 对 PromQL 进行 URL 编码,防止特殊字符(如 { } [ ] +)导致请求失败
+        String encodedQuery = URLEncoder.encode(query, StandardCharsets.UTF_8);
+
+        // 4. 拼装 Prometheus 标准 API 格式
+        return String.format("/api/v1/query_range?query=%s&start=%d&end=%d&step=%s",
+                encodedQuery, start, end, step);
+    }
+
+    public PillarReportDTO generatePillarReport() throws Exception {
+        // 假设 rawResults 是通过 PrometheusAsyncClient 拿到的 Map<String, String>
+        Map<String, String> rawResults = fetchFromPrometheus();
+        PillarReportDTO dto = new PillarReportDTO();
+
+        // 设置基础信息
+        dto.setReportDate(LocalDate.now().minusDays(1).toString());
+
+        // 解析四大指标
+        dto.setCpuSeries(parseMatrix(rawResults.get("cpu"), true));
+        dto.setMemSeries(parseMatrix(rawResults.get("mem"), true));
+        dto.setDiskSeries(parseMatrix(rawResults.get("disk"), true));
+        dto.setNetSeries(parseMatrix(rawResults.get("net"), false)); // 网络不乘100
+
+        // 提取 X 轴标签(取任意一个结果的 values 即可)
+        dto.setTimeLabels(extractTimeLabels(rawResults.get("cpu")));
+
+        return dto;
+    }
+
+    private String extractTimeLabels(String json) throws Exception {
+        List<String> labels = new ArrayList<>();
+        JsonNode firstSeries = objectMapper.readTree(json).path("data").path("result").get(0);
+        if (firstSeries != null) {
+            for (JsonNode val : firstSeries.path("values")) {
+                long ts = val.get(0).asLong();
+                labels.add("'" + Instant.ofEpochSecond(ts).atZone(ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("HH:mm")) + "'");
+            }
+        }
+        return String.join(",", labels);
+    }
+
+    private Map<String, List<Double>> parseMatrix(String json, boolean isRate) {
+        Map<String, List<Double>> map = new HashMap<>();
+        try {
+            JsonNode results = objectMapper.readTree(json).path("data").path("result");
+            for (JsonNode res : results) {
+                String ip = res.path("metric").path("instance").asText().split(":")[0];
+                List<Double> data = new ArrayList<>();
+                for (JsonNode v : res.path("values")) {
+                    double val = v.get(1).asDouble();
+                    data.add(Math.round((isRate ? val * 100 : val) * 100.0) / 100.0);
+                }
+                map.put(ip, data);
+            }
+        } catch (Exception e) { e.printStackTrace(); }
+        return map;
+    }
+
+    public String executeFullProcess() throws Exception {
+        PillarReportDTO dto = generatePillarReport();
+        // 3. 渲染 HTML (FreeMarker)
+        String html = generateHtml(dto);
+        return html;
+    }
+
+    public String generateHtml(PillarReportDTO dto) throws Exception {
+        // 1. 准备数据模型 (Root Map)
+        // 在模板中可以通过 ${report.reportDate} 或直接 ${reportDate} 访问
+        Map<String, Object> model = new HashMap<>();
+        model.put("report", dto);
+
+        // 2. 加载模板文件
+        // 默认路径:src/main/resources/templates/pillar_report.ftl
+        Template template = createConfigurer().getConfiguration().getTemplate("pillar_report.ftl");
+
+        // 3. 执行渲染并返回 HTML 字符串
+        // FreeMarkerTemplateUtils 会自动处理异常并转换为 String
+        return FreeMarkerTemplateUtils.processTemplateIntoString(template, model);
+    }
+
+    public static void main(String[] args) throws Exception {
+        PrometheusService prometheusService = new PrometheusService();
+        //prometheusService.generateDailyReport();
+        String htmlContent = prometheusService.executeFullProcess();
+
+        //Path outputPath = Paths.get("/home/reghao/Downloads", "daily_report_" + LocalDate.now() + ".html");
+        Path outputPath = Paths.get("/home/reghao/Downloads", "pillar_report_" + LocalDate.now() + ".html");
+        if (Files.notExists(outputPath.getParent())) {
+            Files.createDirectories(outputPath.getParent());
+        }
+        Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
+        System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
+    }
+}

+ 94 - 0
mgr/src/main/resources/templates/daily_report.ftl

@@ -0,0 +1,94 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <title>运维日报</title>
+    <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
+    <style>
+        body { font-family: Arial, sans-serif; background: #f4f7f6; padding: 20px; }
+        .card { background: white; border-radius: 8px; padding: 20px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-bottom: 20px; }
+        .host-grid { display: grid; grid-template-columns: repeat(5, 1fr); gap: 10px; }
+        .host-item { padding: 10px; border: 1px solid #eee; border-radius: 4px; text-align: center; }
+        /* 动态背景色 */
+        .bg-ok { background-color: #f6ffed; border-color: #b7eb8f; }
+        .bg-warn { background-color: #fff7e6; border-color: #ffd591; }
+        .bg-crit { background-color: #fff1f0; border-color: #ffa39e; }
+        table { width: 100%; border-collapse: collapse; }
+        th, td { border: 1px solid #eee; padding: 8px; text-align: left; }
+        th { background: #fafafa; }
+    </style>
+</head>
+<body>
+
+<div class="card">
+    <h2 style="text-align:center;">基础架构运维日报</h2>
+    <p style="text-align:center; color:#888;">周期: ${startTime} 至 ${endTime}</p>
+
+    <h3>🖥️ 宿主机健康度 (10 Nodes)</h3>
+    <div class="host-grid">
+        <#list hostList as host>
+            <div class="host-item
+                <#if host.cpuUsage gt 80 || host.memUsage gt 85>bg-crit
+                <#elseif host.cpuUsage gt 60>bg-warn
+                <#else>bg-ok</#if>">
+                <strong>${host.name}</strong><br/>
+                <small>CPU: ${host.cpuUsage}%</small><br/>
+                <small>MEM: ${host.memUsage}%</small>
+            </div>
+        </#list>
+    </div>
+</div>
+
+<div class="card">
+    <h3>📈 集群 CPU 负载趋势 (24h)</h3>
+    <div id="cpuChart" style="width: 100%; height: 300px;"></div>
+</div>
+
+<div class="card">
+    <h3>🚀 高负载容器 Top 5</h3>
+    <table>
+        <thead>
+            <tr>
+                <th>容器名称</th>
+                <th>所属主机</th>
+                <th>CPU 使用率</th>
+            </tr>
+        </thead>
+        <tbody>
+            <#list topContainers as container>
+            <tr>
+                <td>${container.name}</td>
+                <td>${container.hostIp}</td>
+                <td style="color: <#if container.cpu gt 80>red<#else>black</#if>;">
+                    ${container.cpu}%
+                </td>
+            </tr>
+            </#list>
+        </tbody>
+    </table>
+</div>
+
+<script>
+    // 渲染 ECharts
+    var chartDom = document.getElementById('cpuChart');
+    var myChart = echarts.init(chartDom);
+    var option = {
+        animation: false, // 必须禁用动画,否则 Playwright 截图时可能是空白
+        xAxis: {
+            type: 'category',
+            data: [${timeLabels!""}] // 使用 !"" 防止 null 报错
+        },
+        yAxis: { type: 'value', axisLabel: { formatter: '{value}%' } },
+        series: [{
+            data: [${avgCpuTrend!""}],
+            type: 'line',
+            smooth: true,
+            areaStyle: { color: 'rgba(24, 144, 255, 0.2)' },
+            itemStyle: { color: '#1890ff' }
+        }]
+    };
+    myChart.setOption(option);
+</script>
+
+</body>
+</html>

+ 102 - 0
mgr/src/main/resources/templates/pillar_report.ftl

@@ -0,0 +1,102 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
+    <style>
+        body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: #f0f2f5; margin: 0; padding: 20px; }
+        .header { background: #fff; padding: 20px; border-radius: 8px; margin-bottom: 20px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); text-align: center; }
+        .pillar-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; }
+        .card { background: #fff; padding: 15px; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.05); }
+        .chart-container { width: 100%; height: 350px; }
+        h3 { margin-top: 0; color: #333; border-left: 4px solid #1890ff; padding-left: 10px; font-size: 16px; }
+        .summary { color: #666; font-size: 14px; margin-top: 10px; }
+    </style>
+</head>
+<body>
+
+<div class="header">
+    <h2 style="margin:0;">🌐 基础设施资源监控日报</h2>
+    <div class="summary">报告日期:${report.reportDate} | 状态:<span style="color:#52c41a;font-weight:bold;">${report.statusSummary}</span></div>
+</div>
+
+<div class="pillar-grid">
+    <div class="card">
+        <h3>💻 计算 (CPU Usage %)</h3>
+        <div id="cpuChart" class="chart-container"></div>
+    </div>
+    <div class="card">
+        <h3>🧠 存储 (Memory Usage %)</h3>
+        <div id="memChart" class="chart-container"></div>
+    </div>
+    <div class="card">
+        <h3>💿 磁盘 (Disk I/O Saturation %)</h3>
+        <div id="diskChart" class="chart-container"></div>
+    </div>
+    <div class="card">
+        <h3>📡 网络 (Network Ingress MB/s)</h3>
+        <div id="netChart" class="chart-container"></div>
+    </div>
+</div>
+
+<script>
+    // 通用配置生成逻辑
+    const commonOption = (title, unit, threshold) => ({
+        animation: false, // 禁用动画确保截图完整
+        tooltip: { trigger: 'axis' },
+        legend: { bottom: 0, type: 'scroll', itemWidth: 10, textStyle: { fontSize: 10 } },
+        grid: { top: 40, left: '3%', right: '4%', bottom: '15%', containLabel: true },
+        xAxis: { type: 'category', boundaryGap: false, data: [${report.timeLabels}] },
+        yAxis: { type: 'value', axisLabel: { formatter: '{value}' + unit } }
+    });
+
+    // 1. CPU Chart
+    const cpuChart = echarts.init(document.getElementById('cpuChart'));
+    cpuChart.setOption({
+        ...commonOption('CPU', '%'),
+        series: [
+            <#list report.cpuSeries?keys as ip>
+            { name: '${ip}', type: 'line', smooth: true, symbol: 'none', data: [${report.cpuSeries[ip]?join(",")}] }<#if ip_has_next>,</#if>
+            </#list>
+        ],
+        visualMap: { show: false, pieces: [{ gt: 0, lte: 80, color: '#1890ff' }, { gt: 80, color: '#ff4d4f' }] }
+    });
+
+    // 2. Memory Chart
+    const memChart = echarts.init(document.getElementById('memChart'));
+    memChart.setOption({
+        ...commonOption('Memory', '%'),
+        series: [
+            <#list report.memSeries?keys as ip>
+            { name: '${ip}', type: 'line', smooth: true, symbol: 'none', data: [${report.memSeries[ip]?join(",")}] }<#if ip_has_next>,</#if>
+            </#list>
+        ]
+    });
+
+    // 3. Disk Chart (包含 90% 警戒线)
+    const diskChart = echarts.init(document.getElementById('diskChart'));
+    diskChart.setOption({
+        ...commonOption('Disk', '%'),
+        series: [
+            <#list report.diskSeries?keys as ip>
+            {
+                name: '${ip}', type: 'line', smooth: true, symbol: 'none', data: [${report.diskSeries[ip]?join(",")}],
+                markLine: { symbol: 'none', data: [{ yAxis: 90, lineStyle: { color: 'red', type: 'dashed' } }] }
+            }<#if ip_has_next>,</#if>
+            </#list>
+        ]
+    });
+
+    // 4. Network Chart
+    const netChart = echarts.init(document.getElementById('netChart'));
+    netChart.setOption({
+        ...commonOption('Network', 'MB/s'),
+        series: [
+            <#list report.netSeries?keys as ip>
+            { name: '${ip}', type: 'line', smooth: true, symbol: 'none', data: [${report.netSeries[ip]?join(",")}] }<#if ip_has_next>,</#if>
+            </#list>
+        ]
+    });
+</script>
+</body>
+</html>