|
@@ -2,6 +2,10 @@ package cn.reghao.devops.mgr.ops.srv.mon;
|
|
|
|
|
|
|
|
import cn.reghao.devops.mgr.config.AppProperties;
|
|
import cn.reghao.devops.mgr.config.AppProperties;
|
|
|
import cn.reghao.devops.mgr.ops.srv.mon.dto.ContainerHealthReport;
|
|
import cn.reghao.devops.mgr.ops.srv.mon.dto.ContainerHealthReport;
|
|
|
|
|
+import cn.reghao.devops.mgr.ops.srv.mon.model.AnalysisResult;
|
|
|
|
|
+import cn.reghao.devops.mgr.ops.srv.mon.model.CpuThresholdConfig;
|
|
|
|
|
+import cn.reghao.devops.mgr.ops.srv.mon.model.HostGroup;
|
|
|
|
|
+import cn.reghao.devops.mgr.ops.srv.mon.model.MetricRecord;
|
|
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
|
import com.fasterxml.jackson.databind.JsonNode;
|
|
import com.fasterxml.jackson.databind.JsonNode;
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
@@ -21,10 +25,12 @@ import java.nio.charset.StandardCharsets;
|
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Files;
|
|
|
import java.nio.file.Path;
|
|
import java.nio.file.Path;
|
|
|
import java.nio.file.Paths;
|
|
import java.nio.file.Paths;
|
|
|
|
|
+import java.text.SimpleDateFormat;
|
|
|
import java.time.*;
|
|
import java.time.*;
|
|
|
import java.time.format.DateTimeFormatter;
|
|
import java.time.format.DateTimeFormatter;
|
|
|
import java.util.*;
|
|
import java.util.*;
|
|
|
import java.util.concurrent.CompletableFuture;
|
|
import java.util.concurrent.CompletableFuture;
|
|
|
|
|
+import java.util.stream.Collectors;
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
|
* @author reghao
|
|
* @author reghao
|
|
@@ -34,25 +40,75 @@ import java.util.concurrent.CompletableFuture;
|
|
|
@Service
|
|
@Service
|
|
|
public class PrometheusService {
|
|
public class PrometheusService {
|
|
|
private ObjectMapper objectMapper = new ObjectMapper();
|
|
private ObjectMapper objectMapper = new ObjectMapper();
|
|
|
- private final PrometheusAsyncClient promClient;
|
|
|
|
|
- private final Cache<String, Object> cache;
|
|
|
|
|
|
|
+ private PrometheusAsyncClient promClient;
|
|
|
|
|
+ private Cache<String, Object> cache;
|
|
|
|
|
|
|
|
public PrometheusService(AppProperties appProperties, Cache<String, Object> cache) {
|
|
public PrometheusService(AppProperties appProperties, Cache<String, Object> cache) {
|
|
|
this.promClient = new PrometheusAsyncClient(appProperties.getPrometheusBaseUrl());
|
|
this.promClient = new PrometheusAsyncClient(appProperties.getPrometheusBaseUrl());
|
|
|
this.cache = cache;
|
|
this.cache = cache;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ public PrometheusService() {
|
|
|
|
|
+ this.promClient = new PrometheusAsyncClient("http://prometheus.iquizoo.cn");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public static Configuration getTemplateConfiguration() throws TemplateException, IOException {
|
|
|
|
|
+ FreeMarkerConfigurer configurer = new FreeMarkerConfigurer();
|
|
|
|
|
+ // 1. 设置模板存放路径 (通常在 resources/templates 下)
|
|
|
|
|
+ configurer.setTemplateLoaderPath("classpath:/templates/");
|
|
|
|
|
+ // 2. 设置默认编码
|
|
|
|
|
+ configurer.setDefaultEncoding("UTF-8");
|
|
|
|
|
+ // 3. 配置 FreeMarker 的原生属性
|
|
|
|
|
+ Properties settings = new Properties();
|
|
|
|
|
+ settings.setProperty("template_update_delay", "0"); // 检查模板更新延迟
|
|
|
|
|
+ settings.setProperty("default_encoding", "UTF-8");
|
|
|
|
|
+ settings.setProperty("number_format", "0.##"); // 数字格式化,防止 1000 变 1,000
|
|
|
|
|
+ settings.setProperty("datetime_format", "yyyy-MM-dd HH:mm:ss");
|
|
|
|
|
+ configurer.setFreemarkerSettings(settings);
|
|
|
|
|
+ // 重要:必须调用此方法来初始化内部的 Configuration 对象
|
|
|
|
|
+ configurer.afterPropertiesSet();
|
|
|
|
|
+ return configurer.getConfiguration();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * @param templatePath 相对于 src/main/resources/templates/ 的路径
|
|
|
|
|
+ * @return
|
|
|
|
|
+ * @date 2026-03-29 00:03:145
|
|
|
|
|
+ */
|
|
|
|
|
+ public String renderHtml(String templatePath, Map<String, Object> root) throws Exception {
|
|
|
|
|
+ // 2. 加载模板文件
|
|
|
|
|
+ // 默认路径:src/main/resources/templates/pillar_report.ftl
|
|
|
|
|
+ Template template = getTemplateConfiguration().getTemplate(templatePath);
|
|
|
|
|
+ // 渲染并返回 HTML 字符串, FreeMarkerTemplateUtils 会自动处理异常并转换为 String
|
|
|
|
|
+ return FreeMarkerTemplateUtils.processTemplateIntoString(template, root);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
public void generateDailyReport() {
|
|
public void generateDailyReport() {
|
|
|
// 定义查询任务
|
|
// 定义查询任务
|
|
|
Map<String, String> tasks = Map.of(
|
|
Map<String, String> tasks = Map.of(
|
|
|
- "container_count", "count by (instance) (container_last_seen{image!=''})",
|
|
|
|
|
- "top_cpu_containers", "topk(5, sum by (name) (rate(container_cpu_usage_seconds_total{image!=''}[5m]) * 100))"
|
|
|
|
|
|
|
+ "container_count", """
|
|
|
|
|
+ avg_over_time((1 - avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance))[24h:1m]) * 100
|
|
|
|
|
+ """,
|
|
|
|
|
+ "top_cpu_containers", """
|
|
|
|
|
+ max_over_time((1 - avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by (instance))[24h:1m]) * 100
|
|
|
|
|
+ """,
|
|
|
|
|
+ "top_cpu_containers1", """
|
|
|
|
|
+ max_over_time((1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)[24h:1m]) * 100
|
|
|
|
|
+ """,
|
|
|
|
|
+ "top_cpu_containers2", """
|
|
|
|
|
+ max_over_time(rate(container_cpu_usage_seconds_total{name!=""}[5m])[24h:1m])
|
|
|
|
|
+ """,
|
|
|
|
|
+ "top_cpu_containers3", """
|
|
|
|
|
+ max_over_time(container_memory_usage_bytes{name!=""}[24h:1m]) / 1024 / 1024
|
|
|
|
|
+ """,
|
|
|
|
|
+ "top_cpu_containers4", """
|
|
|
|
|
+ increase(container_oom_events_total[24h]) > 0
|
|
|
|
|
+ """
|
|
|
);
|
|
);
|
|
|
|
|
|
|
|
// 异步执行
|
|
// 异步执行
|
|
|
promClient.fetchAllMetrics(tasks).thenAccept(results -> {
|
|
promClient.fetchAllMetrics(tasks).thenAccept(results -> {
|
|
|
- // 在这里解析 JSON 并填充到 DTO
|
|
|
|
|
- processResults(results);
|
|
|
|
|
|
|
+ //processResults(results);
|
|
|
System.out.println("所有数据采集完成,开始渲染报表...");
|
|
System.out.println("所有数据采集完成,开始渲染报表...");
|
|
|
}).join(); // 如果是在定时任务主线程,可以用 join 等待完成
|
|
}).join(); // 如果是在定时任务主线程,可以用 join 等待完成
|
|
|
}
|
|
}
|
|
@@ -60,7 +116,6 @@ public class PrometheusService {
|
|
|
private OperationReportDTO processResults(Map<String, String> results) {
|
|
private OperationReportDTO processResults(Map<String, String> results) {
|
|
|
OperationReportDTO report = new OperationReportDTO();
|
|
OperationReportDTO report = new OperationReportDTO();
|
|
|
Map<String, HostInfo> hostMap = new HashMap<>();
|
|
Map<String, HostInfo> hostMap = new HashMap<>();
|
|
|
-
|
|
|
|
|
try {
|
|
try {
|
|
|
// 1. 解析 CPU 指标
|
|
// 1. 解析 CPU 指标
|
|
|
if (results.containsKey("node_cpu")) {
|
|
if (results.containsKey("node_cpu")) {
|
|
@@ -97,12 +152,527 @@ public class PrometheusService {
|
|
|
parseTrendData(results.get("cpu_trend"), report);
|
|
parseTrendData(results.get("cpu_trend"), report);
|
|
|
}
|
|
}
|
|
|
} catch (Exception e) {
|
|
} catch (Exception e) {
|
|
|
- // log.error("JSON 解析失败", e); // 确保 log 对象已定义,或使用 System.err
|
|
|
|
|
- e.printStackTrace();
|
|
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, Object> root = new HashMap<>();
|
|
|
|
|
+ root.put("hostGroupList", report);
|
|
|
|
|
+
|
|
|
|
|
+ /*try {
|
|
|
|
|
+ // 6. 渲染最终的复合模板
|
|
|
|
|
+ String templatePath = "daily_report.ftl";
|
|
|
|
|
+ String htmlContent = renderHtml(templatePath, root);
|
|
|
|
|
+ Path outputPath = Paths.get("/home/reghao/Downloads", "daily_report_" + LocalDate.now() + ".html");
|
|
|
|
|
+ if (Files.notExists(outputPath.getParent())) {
|
|
|
|
|
+ Files.createDirectories(outputPath.getParent());
|
|
|
|
|
+ }
|
|
|
|
|
+ Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
|
|
|
|
|
+ System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("{}", e.getMessage());
|
|
|
|
|
+ }*/
|
|
|
|
|
+
|
|
|
return report;
|
|
return report;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ public void generateDailyReport1() {
|
|
|
|
|
+ // 定义查询任务
|
|
|
|
|
+ Map<String, String> tasks = Map.of(
|
|
|
|
|
+ "container_count", "count by (instance) (container_last_seen{image!=''})",
|
|
|
|
|
+ "top_cpu_containers", "topk(5, sum by (name) (rate(container_cpu_usage_seconds_total{image!=''}[5m]) * 100))"
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ tasks = Map.of(
|
|
|
|
|
+ "node_cpu_core", """
|
|
|
|
|
+ count by(instance) (node_cpu_seconds_total{mode="idle"})
|
|
|
|
|
+ """,
|
|
|
|
|
+ "node_cpu_avg", """
|
|
|
|
|
+ avg_over_time(
|
|
|
|
|
+ (1 - avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])))[24h:1m]
|
|
|
|
|
+ ) * 100
|
|
|
|
|
+ """,
|
|
|
|
|
+ "node_cpu_max", """
|
|
|
|
|
+ max_over_time(
|
|
|
|
|
+ (1 - avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])))[24h:1m]
|
|
|
|
|
+ ) * 100
|
|
|
|
|
+ """,
|
|
|
|
|
+ "container_cpu_limit", """
|
|
|
|
|
+ (container_spec_cpu_quota > 0) / container_spec_cpu_period
|
|
|
|
|
+ """,
|
|
|
|
|
+ "container_cpu_avg", """
|
|
|
|
|
+ avg_over_time(
|
|
|
|
|
+ (
|
|
|
|
|
+ sum by (instance, name) (rate(container_cpu_usage_seconds_total{name!=""}[1m]))
|
|
|
|
|
+ /\s
|
|
|
|
|
+ sum by (instance, name) (container_spec_cpu_quota{name!=""} / container_spec_cpu_period{name!=""})
|
|
|
|
|
+ )[24h:1m]
|
|
|
|
|
+ ) * 100
|
|
|
|
|
+ """,
|
|
|
|
|
+ "container_cpu_max", """
|
|
|
|
|
+ max_over_time(
|
|
|
|
|
+ (
|
|
|
|
|
+ sum by (instance, name) (rate(container_cpu_usage_seconds_total{name!=""}[1m]))
|
|
|
|
|
+ /\s
|
|
|
|
|
+ sum by (instance, name) (container_spec_cpu_quota{name!=""} / container_spec_cpu_period{name!=""})
|
|
|
|
|
+ )[24h:1m]
|
|
|
|
|
+ ) * 100
|
|
|
|
|
+ """,
|
|
|
|
|
+ "container_mem_avg", """
|
|
|
|
|
+ avg_over_time(container_memory_working_set_bytes{name!=""}[24h]) / 1024 / 1024
|
|
|
|
|
+ """,
|
|
|
|
|
+ "container_mem_max", """
|
|
|
|
|
+ max_over_time(container_memory_working_set_bytes{name!=""}[24h]) / 1024 /1024
|
|
|
|
|
+ """
|
|
|
|
|
+ );
|
|
|
|
|
+
|
|
|
|
|
+ // 异步执行
|
|
|
|
|
+ promClient.fetchAllMetrics(tasks).thenAccept(results -> {
|
|
|
|
|
+ processResults0(results);
|
|
|
|
|
+ System.out.println("所有数据采集完成,开始渲染报表...");
|
|
|
|
|
+ }).join(); // 如果是在定时任务主线程,可以用 join 等待完成
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public List<HostGroup> generateUnifiedReport(Map<String, MetricRecord> mergedMap) {
|
|
|
|
|
+ // 2. 建立 HostGroup 层级关系
|
|
|
|
|
+ Map<String, HostGroup> hostGroupMap = new LinkedHashMap<>();
|
|
|
|
|
+ mergedMap.values().forEach(record -> {
|
|
|
|
|
+ // 提取 IP (172.16.45.66:9100 -> 172.16.45.66)
|
|
|
|
|
+ String ip = record.getInstance().split(":")[0];
|
|
|
|
|
+ HostGroup group = hostGroupMap.computeIfAbsent(ip, k -> {
|
|
|
|
|
+ HostGroup hg = new HostGroup();
|
|
|
|
|
+ hg.setHostIp(ip);
|
|
|
|
|
+ return hg;
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ if (record.getContainer() == null) {
|
|
|
|
|
+ group.setHostRecord(record);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ group.getContainerRecords().add(record);
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 3. 核心:建立宿主机与容器的量纲关联 (统一为 Node 视角)
|
|
|
|
|
+ hostGroupMap.values().forEach(group -> {
|
|
|
|
|
+ MetricRecord host = group.getHostRecord();
|
|
|
|
|
+ List<MetricRecord> containers = group.getContainerRecords();
|
|
|
|
|
+
|
|
|
|
|
+ if (host != null && !containers.isEmpty()) {
|
|
|
|
|
+ // 获取宿主机总核数 (从 node_cpu_core 指标中解析,此处假设 record 已包含该值)
|
|
|
|
|
+ double totalCores = host.getCpuCore() > 0 ? host.getCpuCore() : 8.0;
|
|
|
|
|
+
|
|
|
|
|
+ // 计算容器对宿主机的“实际贡献值”
|
|
|
|
|
+ // 容器利用率 (C_util) = (Used_Cores / Limit_Cores) * 100
|
|
|
|
|
+ // 贡献 Node 的百分比 = C_util * (Limit_Cores / Node_Total_Cores)
|
|
|
|
|
+ double totalContainerContributionToNode = containers.stream()
|
|
|
|
|
+ .mapToDouble(c -> c.getAvgValue() * (c.getCpuLimit() / totalCores))
|
|
|
|
|
+ .sum();
|
|
|
|
|
+
|
|
|
|
|
+ // 系统隐性损耗 = 宿主机总利用率 - 容器贡献总和
|
|
|
|
|
+ group.setSystemOverhead(Math.max(0, host.getAvgValue() - totalContainerContributionToNode));
|
|
|
|
|
+
|
|
|
|
|
+ for (MetricRecord container : group.getContainerRecords()) {
|
|
|
|
|
+ // 1. 获取该容器的 Limit 核数 (需从 container_cpu_limit 查询中匹配)
|
|
|
|
|
+ double limitCores = container.getCpuLimit();
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 计算:相对宿主机的百分比 = 相对Limit百分比 * (Limit核数 / 总核数)
|
|
|
|
|
+ double relativeToHost = container.getAvgValue() * (limitCores / totalCores);
|
|
|
|
|
+ double relativeToHostMax = container.getMaxValue() * (limitCores / totalCores);
|
|
|
|
|
+
|
|
|
|
|
+ // 将这两个值存入 record 供模板使用
|
|
|
|
|
+ container.setRelativeToHostAvg(relativeToHost);
|
|
|
|
|
+ container.setRelativeToHostMax(relativeToHostMax);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 诊断结论
|
|
|
|
|
+ StringBuilder diag = new StringBuilder();
|
|
|
|
|
+ if (group.getSystemOverhead() > 15.0) {
|
|
|
|
|
+ diag.append(String.format("⚠️ 宿主机非容器损耗(内核/IO)高达 %.1f%%。 ", group.getSystemOverhead()));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (host.getMaxValue() > 85.0) {
|
|
|
|
|
+ diag.append("🚨 宿主机峰值接近瓶颈。 ");
|
|
|
|
|
+ }
|
|
|
|
|
+ group.setRelationshipDiagnosis(diag.length() == 0 ? "✅ 资源分配健康" : diag.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 4. 传给模板
|
|
|
|
|
+ return new ArrayList<>(hostGroupMap.values());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void processResults0(Map<String, String> rawResults) {
|
|
|
|
|
+ // 使用 Map 存储合并后的对象,Key 是 "instance:container"
|
|
|
|
|
+ Map<String, MetricRecord> mergedMap = new HashMap<>();
|
|
|
|
|
+ // 遍历所有的任务结果 (node_cpu_avg, node_cpu_max, container_cpu_avg 等)
|
|
|
|
|
+ rawResults.forEach((taskName, jsonContent) -> {
|
|
|
|
|
+ try {
|
|
|
|
|
+ JsonNode root = objectMapper.readTree(jsonContent);
|
|
|
|
|
+ JsonNode resultNodes = root.path("data").path("result");
|
|
|
|
|
+ if (resultNodes.isArray()) {
|
|
|
|
|
+ for (JsonNode node : resultNodes) {
|
|
|
|
|
+ // 1. 提取标签
|
|
|
|
|
+ JsonNode metric = node.path("metric");
|
|
|
|
|
+ String instance = metric.path("instance").asText("unknown");
|
|
|
|
|
+ String container = metric.has("name") ? metric.path("name").asText() : null;
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 生成唯一键并获取/创建 MetricRecord
|
|
|
|
|
+ String key = instance + ":" + (container == null ? "HOST" : container);
|
|
|
|
|
+ MetricRecord record = mergedMap.computeIfAbsent(key, k -> {
|
|
|
|
|
+ MetricRecord newRecord = new MetricRecord();
|
|
|
|
|
+ newRecord.setInstance(instance);
|
|
|
|
|
+ newRecord.setContainer(container);
|
|
|
|
|
+ newRecord.setName(container == null ? "Node CPU" : "Container CPU");
|
|
|
|
|
+ return newRecord;
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 3. 提取数值并根据任务名归类
|
|
|
|
|
+ JsonNode valueNode = node.path("value");
|
|
|
|
|
+ if (valueNode.isArray() && valueNode.size() >= 2) {
|
|
|
|
|
+ double val = valueNode.get(1).asDouble();
|
|
|
|
|
+ // 判断该任务是均值还是峰值
|
|
|
|
|
+ if (taskName.contains("_avg")) {
|
|
|
|
|
+ record.setAvgValue(val);
|
|
|
|
|
+ } else if (taskName.contains("_max")) {
|
|
|
|
|
+ record.setMaxValue(val);
|
|
|
|
|
+ } else if (taskName.contains("_cpu_core")) {
|
|
|
|
|
+ record.setCpuCore(val);
|
|
|
|
|
+ } else if (taskName.contains("_cpu_limit")) {
|
|
|
|
|
+ record.setCpuLimit(val);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ System.out.println();
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ System.out.println();
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ System.err.println("解析任务 [" + taskName + "] 失败: " + e.getMessage());
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+ // 4. 将合并后的结果转换为 List 并执行分析
|
|
|
|
|
+ List<MetricRecord> finalRecords = new ArrayList<>(mergedMap.values());
|
|
|
|
|
+ System.out.println("数据对齐完成,共计 " + finalRecords.size() + " 条指标记录。");
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ //List<HostGroup> hostGroupList0 = buildHierarchyAndAnalyze(mergedMap);
|
|
|
|
|
+ List<HostGroup> hostGroupList = generateUnifiedReport(mergedMap);
|
|
|
|
|
+ // 5. 调用之前的分析逻辑
|
|
|
|
|
+ /*List<AnalysisResult> results = finalRecords.stream()
|
|
|
|
|
+ .map(this::getAnalysisResult)
|
|
|
|
|
+ .filter(res -> !"NORMAL".equals(res.getStatus())) // 过滤掉正常的
|
|
|
|
|
+ .collect(Collectors.toList());
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 按 instance 分组,并保证宿主机排在列表第一位
|
|
|
|
|
+ Map<String, List<AnalysisResult>> groupMap = results.stream()
|
|
|
|
|
+ .collect(Collectors.groupingBy(
|
|
|
|
|
+ AnalysisResult::getInstance,
|
|
|
|
|
+ Collectors.collectingAndThen(
|
|
|
|
|
+ Collectors.toList(),
|
|
|
|
|
+ list -> {
|
|
|
|
|
+ // 排序:宿主机(container == null)排在最前,其余按名称排序
|
|
|
|
|
+ list.sort(Comparator.comparing(
|
|
|
|
|
+ res -> res.getContainer() == null ? "" : res.getContainer()
|
|
|
|
|
+ ));
|
|
|
|
|
+ return list;
|
|
|
|
|
+ }
|
|
|
|
|
+ )
|
|
|
|
|
+ ));*/
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, Object> root = new HashMap<>();
|
|
|
|
|
+ //root.put("hostMap", groupMap);
|
|
|
|
|
+ root.put("date", new SimpleDateFormat("yyyy-MM-dd").format(new Date()));
|
|
|
|
|
+ root.put("hostGroupList", hostGroupList);
|
|
|
|
|
+
|
|
|
|
|
+ try {
|
|
|
|
|
+ // 6. 渲染最终的复合模板(左右布局那个)
|
|
|
|
|
+ String templatePath = "host_group_report.ftl";
|
|
|
|
|
+ String htmlContent = renderHtml(templatePath, root);
|
|
|
|
|
+ Path outputPath = Paths.get("/home/reghao/Downloads", "report_" + LocalDate.now() + ".html");
|
|
|
|
|
+ if (Files.notExists(outputPath.getParent())) {
|
|
|
|
|
+ Files.createDirectories(outputPath.getParent());
|
|
|
|
|
+ }
|
|
|
|
|
+ Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
|
|
|
|
|
+ System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("{}", e.getMessage());
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private List<HostGroup> buildHostGroups(Map<String, MetricRecord> mergedMap) {
|
|
|
|
|
+ Map<String, HostGroup> hostGroups = new HashMap<>();
|
|
|
|
|
+
|
|
|
|
|
+ // 1. 第一次遍历:初始化宿主机
|
|
|
|
|
+ mergedMap.values().stream().filter(r -> r.getContainer() == null).forEach(r -> {
|
|
|
|
|
+ HostGroup g = new HostGroup();
|
|
|
|
|
+ g.setHostIp(r.getInstance());
|
|
|
|
|
+ g.setHostRecord(r);
|
|
|
|
|
+ hostGroups.put(r.getInstance(), g);
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 第二次遍历:挂载容器并折算贡献度
|
|
|
|
|
+ mergedMap.values().stream().filter(r -> r.getContainer() != null).forEach(r -> {
|
|
|
|
|
+ HostGroup g = hostGroups.get(r.getInstance());
|
|
|
|
|
+ if (g != null) {
|
|
|
|
|
+ g.getContainerRecords().add(r);
|
|
|
|
|
+ // 关键:计算该容器对宿主机的实际贡献 = (容器利用率 * 容器配额核心数) / 宿主机总核数
|
|
|
|
|
+ // 但因为我们没有直接查配额核心数,目前最稳妥的办法是仅做展示,
|
|
|
|
|
+ // 损耗诊断建议基于:HostAvg - Sum(Container核心数)/TotalCores
|
|
|
|
|
+ // 此处为了简化,我们假设容器利用率是相对于 Limit 的。
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 3. 执行关系分析
|
|
|
|
|
+ hostGroups.values().forEach(this::analyzeRelationship);
|
|
|
|
|
+ return new ArrayList<>(hostGroups.values());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void analyzeRelationship(HostGroup group) {
|
|
|
|
|
+ MetricRecord host = group.getHostRecord();
|
|
|
|
|
+ if (host == null) return;
|
|
|
|
|
+
|
|
|
|
|
+ double totalCores = host.getCpuCore() > 0 ? host.getCpuCore() : 1.0;
|
|
|
|
|
+
|
|
|
|
|
+ // 这里需要注意:因为容器 avg 是相对于 limit 的百分比
|
|
|
|
|
+ // 如果没有采集到 limit 核心数,sum(avg) 是没有物理意义的。
|
|
|
|
|
+ // 建议:在实际运维中,我们关注的是宿主机整体水位。
|
|
|
|
|
+
|
|
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
|
|
+ if (host.getAvgValue() > 80) {
|
|
|
|
|
+ sb.append("🚨 宿主机整体负载极高,请检查资源分配。");
|
|
|
|
|
+ } else {
|
|
|
|
|
+ sb.append("✅ 节点运行状态平稳。");
|
|
|
|
|
+ }
|
|
|
|
|
+ group.setRelationshipDiagnosis(sb.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public List<HostGroup> buildHierarchyAndAnalyze0(Map<String, MetricRecord> mergedMap) {
|
|
|
|
|
+ // 1. 初步按 instance 分组
|
|
|
|
|
+ Map<String, HostGroup> groups = new HashMap<>();
|
|
|
|
|
+
|
|
|
|
|
+ mergedMap.values().forEach(record -> {
|
|
|
|
|
+ HostGroup group = groups.computeIfAbsent(record.getInstance(), k -> new HostGroup());
|
|
|
|
|
+ if (record.getContainer() == null) {
|
|
|
|
|
+ group.setHostRecord(record);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ group.getContainerRecords().add(record);
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 深度分析每一组的关系
|
|
|
|
|
+ groups.values().forEach(group -> {
|
|
|
|
|
+ MetricRecord host = group.getHostRecord();
|
|
|
|
|
+ List<MetricRecord> containers = group.getContainerRecords();
|
|
|
|
|
+
|
|
|
|
|
+ if (host != null && !containers.isEmpty()) {
|
|
|
|
|
+ // 计算容器均值总和
|
|
|
|
|
+ double sumContainerAvg = containers.stream().mapToDouble(MetricRecord::getAvgValue).sum();
|
|
|
|
|
+ // 计算系统损耗 (宿主机利用率 - 容器利用率总和)
|
|
|
|
|
+ group.setSystemOverhead(Math.max(0, host.getAvgValue() - sumContainerAvg));
|
|
|
|
|
+
|
|
|
|
|
+ // 计算峰值共振:宿主机峰值 / 容器峰值总和
|
|
|
|
|
+ double sumContainerMax = containers.stream().mapToDouble(MetricRecord::getMaxValue).sum();
|
|
|
|
|
+ group.setPeakCohesion(sumContainerMax > 0 ? host.getMaxValue() / sumContainerMax : 0);
|
|
|
|
|
+
|
|
|
|
|
+ // 3. 自动生成诊断结论
|
|
|
|
|
+ StringBuilder diagnosis = new StringBuilder();
|
|
|
|
|
+ if (group.getSystemOverhead() > 20.0) {
|
|
|
|
|
+ diagnosis.append(String.format("⚠️ 系统隐性损耗过高(%.1f%%),请检查宿主机原生进程。 ", group.getSystemOverhead()));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (host.getMaxValue() > 80.0 && group.getPeakCohesion() > 0.8) {
|
|
|
|
|
+ diagnosis.append("🚨 探测到明显的容器并发冲撞,建议交错执行高负载任务。 ");
|
|
|
|
|
+ }
|
|
|
|
|
+ if (diagnosis.length() == 0) diagnosis.append("✅ 宿主与容器负载分配比例健康。");
|
|
|
|
|
+
|
|
|
|
|
+ group.setRelationshipDiagnosis(diagnosis.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ return new ArrayList<>(groups.values());
|
|
|
|
|
+ }
|
|
|
|
|
+ public List<HostGroup> buildHierarchyAndAnalyze(Map<String, MetricRecord> mergedMap) {
|
|
|
|
|
+ // 1. 使用纯 IP (不带端口) 作为分组的 Key
|
|
|
|
|
+ Map<String, HostGroup> groups = new HashMap<>();
|
|
|
|
|
+
|
|
|
|
|
+ mergedMap.values().forEach(record -> {
|
|
|
|
|
+ // 提取 IP 部分,例如 "192.168.1.10:9100" -> "192.168.1.10"
|
|
|
|
|
+ String rawInstance = record.getInstance();
|
|
|
|
|
+ String ipAddress = rawInstance.contains(":") ? rawInstance.split(":")[0] : rawInstance;
|
|
|
|
|
+
|
|
|
|
|
+ HostGroup group = groups.computeIfAbsent(ipAddress, k -> {
|
|
|
|
|
+ HostGroup newGroup = new HostGroup();
|
|
|
|
|
+ newGroup.setHostIp(ipAddress); // 建议在 HostGroup 中增加该字段
|
|
|
|
|
+ return newGroup;
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 判定归属:根据 container 字段是否为空
|
|
|
|
|
+ if (record.getContainer() == null) {
|
|
|
|
|
+ // 来自 node_exporter 的宿主机数据
|
|
|
|
|
+ group.setHostRecord(record);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ // 来自 cadvisor 的容器数据
|
|
|
|
|
+ group.getContainerRecords().add(record);
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 深度分析每一组的关系
|
|
|
|
|
+ groups.values().forEach(group -> {
|
|
|
|
|
+ MetricRecord host = group.getHostRecord();
|
|
|
|
|
+ List<MetricRecord> containers = group.getContainerRecords();
|
|
|
|
|
+
|
|
|
|
|
+ // 只有当宿主机数据存在时才计算损耗
|
|
|
|
|
+ if (host != null) {
|
|
|
|
|
+ double sumContainerAvg = containers.stream().mapToDouble(MetricRecord::getAvgValue).sum();
|
|
|
|
|
+
|
|
|
|
|
+ // 计算系统隐性损耗:宿主机总量 - 容器总量
|
|
|
|
|
+ // 注意:如果容器很多,sum 可能略大于 host(采样时间差),需用 Math.max(0, ...)
|
|
|
|
|
+ group.setSystemOverhead(Math.max(0, host.getAvgValue() - sumContainerAvg));
|
|
|
|
|
+
|
|
|
|
|
+ // 诊断逻辑
|
|
|
|
|
+ StringBuilder diagnosis = new StringBuilder();
|
|
|
|
|
+ if (group.getSystemOverhead() > 20.0) {
|
|
|
|
|
+ diagnosis.append(String.format("⚠️ 宿主机非容器损耗较高(%.1f%%)。 ", group.getSystemOverhead()));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (containers.isEmpty()) {
|
|
|
|
|
+ diagnosis.append("ℹ️ 该节点当前未发现运行中的业务容器。");
|
|
|
|
|
+ } else if (diagnosis.length() == 0) {
|
|
|
|
|
+ diagnosis.append("✅ 宿主与容器负载分配正常。");
|
|
|
|
|
+ }
|
|
|
|
|
+ group.setRelationshipDiagnosis(diagnosis.toString());
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ return new ArrayList<>(groups.values());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /*public String analyze(MetricRecord record) {
|
|
|
|
|
+ StringBuilder report = new StringBuilder();
|
|
|
|
|
+ boolean isContainer = record.getContainer() != null;
|
|
|
|
|
+
|
|
|
|
|
+ // 1. 统一单位描述
|
|
|
|
|
+ String unit = isContainer ? "核" : "%";
|
|
|
|
|
+ String displayName = isContainer ? "容器:" + record.getContainer() : "宿主机:" + record.getInstance();
|
|
|
|
|
+
|
|
|
|
|
+ report.append(String.format("【%s 报告】均值: %.2f%s, 峰值: %.2f%s\n",
|
|
|
|
|
+ displayName, record.getAvgValue(), unit, record.getMaxValue(), unit));
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 针对性设定“防零处理”的底数 (Silence Threshold)
|
|
|
|
|
+ // 宿主机分母至少 1%;容器分母至少 0.1 核
|
|
|
|
|
+ double silenceThreshold = isContainer ? 0.1 : 1.0;
|
|
|
|
|
+ double targetThreshold = isContainer ? CpuThresholdConfig.CONTAINER_THRESHOLD : CpuThresholdConfig.NODE_THRESHOLD;
|
|
|
|
|
+
|
|
|
|
|
+ // 3. 判定:容量不足 (注意容器的 threshold 应该是它的 CPU Limit 核心数)
|
|
|
|
|
+ if (record.getAvgValue() >= targetThreshold) {
|
|
|
|
|
+ report.append(" -> [严重异常] 均值已触及水位线,资源严重不足!\n");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 4. 判定:毛刺率 (Spike Rate)
|
|
|
|
|
+ else {
|
|
|
|
|
+ double ratio = record.getMaxValue() / Math.max(record.getAvgValue(), silenceThreshold);
|
|
|
|
|
+
|
|
|
|
|
+ // 判定阈值:如果峰值本身很小(比如宿主机 < 5% 或 容器 < 0.2核),则忽略毛刺
|
|
|
|
|
+ double significantPeak = isContainer ? 0.2 : 5.0;
|
|
|
|
|
+
|
|
|
|
|
+ if (record.getMaxValue() > significantPeak && ratio > 5.0) {
|
|
|
|
|
+ report.append(String.format(" -> [预警] 瞬时毛刺严重(%.1fx)。", ratio));
|
|
|
|
|
+ if (isContainer) {
|
|
|
|
|
+ report.append("建议检查容器内部是否有突发短查询或频繁GC。\n");
|
|
|
|
|
+ } else {
|
|
|
|
|
+ report.append("建议检查宿主机是否有系统级任务或IO等待引起的CPU飙升。\n");
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ report.append(" -> [正常] 运行平稳。\n");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return report.toString();
|
|
|
|
|
+ }*/
|
|
|
|
|
+
|
|
|
|
|
+ public AnalysisResult getAnalysisResult(MetricRecord record) {
|
|
|
|
|
+ AnalysisResult res = new AnalysisResult();
|
|
|
|
|
+ // 基础信息设置
|
|
|
|
|
+ res.setInstance(record.getInstance());
|
|
|
|
|
+ res.setContainer(record.getContainer());
|
|
|
|
|
+ res.setUnit("%"); // 量纲已统一,固定为百分比
|
|
|
|
|
+ res.setAvg(record.getAvgValue());
|
|
|
|
|
+ res.setMax(record.getMaxValue());
|
|
|
|
|
+
|
|
|
|
|
+ // 核心计算逻辑:使用统一的静默底数 (1.0%)
|
|
|
|
|
+ double currentRatio = record.getMaxValue() / Math.max(record.getAvgValue(), CpuThresholdConfig.UNIFIED_SILENCE);
|
|
|
|
|
+ res.setRatio(currentRatio);
|
|
|
|
|
+
|
|
|
|
|
+ // 逻辑判定:使用统一的百分比阈值
|
|
|
|
|
+ if (record.getAvgValue() >= CpuThresholdConfig.UNIFIED_THRESHOLD) {
|
|
|
|
|
+ res.setStatus("CRITICAL");
|
|
|
|
|
+ res.setMessage(String.format("均值(%.2f%%)触及水位线,资源严重不足", record.getAvgValue()));
|
|
|
|
|
+ }
|
|
|
|
|
+ else if (record.getMaxValue() > CpuThresholdConfig.UNIFIED_SIGNIFICANT &&
|
|
|
|
|
+ currentRatio > CpuThresholdConfig.UNIFIED_RATIO) {
|
|
|
|
|
+ res.setStatus("WARNING");
|
|
|
|
|
+ res.setMessage(String.format("瞬时毛刺严重(%.1fx),峰值达到 %.2f%%", currentRatio, record.getMaxValue()));
|
|
|
|
|
+ }
|
|
|
|
|
+ else {
|
|
|
|
|
+ res.setStatus("NORMAL");
|
|
|
|
|
+ res.setMessage("运行平稳");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return res;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 模拟解析 Prometheus 返回的 JSON 结构
|
|
|
|
|
+ * 生产环境建议使用 Jackson 或 Fastjson 遍历 data.result 数组
|
|
|
|
|
+ */
|
|
|
|
|
+ private Map<String, List<MetricRecord>> parsePrometheusJson(Map<String, String> results) {
|
|
|
|
|
+ Map<String, List<MetricRecord>> parsedData = new HashMap<>();
|
|
|
|
|
+ results.forEach((taskName, jsonContent) -> {
|
|
|
|
|
+ List<MetricRecord> records = new ArrayList<>();
|
|
|
|
|
+ try {
|
|
|
|
|
+ JsonNode root = objectMapper.readTree(jsonContent);
|
|
|
|
|
+ // Prometheus 标准响应路径: data -> result
|
|
|
|
|
+ JsonNode resultNodes = root.path("data").path("result");
|
|
|
|
|
+
|
|
|
|
|
+ if (resultNodes.isArray()) {
|
|
|
|
|
+ for (JsonNode node : resultNodes) {
|
|
|
|
|
+ MetricRecord record = new MetricRecord();
|
|
|
|
|
+
|
|
|
|
|
+ // 1. 解析标签 (Metric Labels)
|
|
|
|
|
+ JsonNode metric = node.path("metric");
|
|
|
|
|
+ record.setInstance(metric.path("instance").asText("unknown"));
|
|
|
|
|
+ // container 在 cAdvisor 中通常对应 'name' 标签,宿主机指标则没有此标签
|
|
|
|
|
+ if (metric.has("name")) {
|
|
|
|
|
+ record.setContainer(metric.path("name").asText());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 2. 解析数值 (Value)
|
|
|
|
|
+ // Prometheus value 格式为 [timestamp, "value_string"]
|
|
|
|
|
+ JsonNode valueNode = node.path("value");
|
|
|
|
|
+ if (valueNode.isArray() && valueNode.size() >= 2) {
|
|
|
|
|
+ // 注意:Prometheus 返回的数值是字符串形式,需要转换
|
|
|
|
|
+ double val = valueNode.get(1).asDouble();
|
|
|
|
|
+
|
|
|
|
|
+ // 根据任务名决定填充到哪个字段(暂时存入,后续 mergeMetrics 会处理)
|
|
|
|
|
+ if (taskName.contains("avg")) {
|
|
|
|
|
+ record.setAvgValue(val);
|
|
|
|
|
+ } else if (taskName.contains("max")) {
|
|
|
|
|
+ record.setMaxValue(val);
|
|
|
|
|
+ }
|
|
|
|
|
+ // 设置指标名称便于识别
|
|
|
|
|
+ record.setName(taskName);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ records.add(record);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ parsedData.put(taskName, records);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ System.err.println("解析任务 [" + taskName + "] 失败: " + e.getMessage());
|
|
|
|
|
+ parsedData.put(taskName, Collections.emptyList());
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+
|
|
|
|
|
+ return parsedData;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
private void parseToHostMap(String json, Map<String, HostInfo> hostMap, String type) throws Exception {
|
|
private void parseToHostMap(String json, Map<String, HostInfo> hostMap, String type) throws Exception {
|
|
|
JsonNode root = objectMapper.readTree(json);
|
|
JsonNode root = objectMapper.readTree(json);
|
|
|
JsonNode resultList = root.path("data").path("result");
|
|
JsonNode resultList = root.path("data").path("result");
|
|
@@ -181,37 +751,6 @@ public class PrometheusService {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- public static Configuration getTemplateConfiguration() throws TemplateException, IOException {
|
|
|
|
|
- FreeMarkerConfigurer configurer = new FreeMarkerConfigurer();
|
|
|
|
|
- // 1. 设置模板存放路径 (通常在 resources/templates 下)
|
|
|
|
|
- configurer.setTemplateLoaderPath("classpath:/templates/");
|
|
|
|
|
- // 2. 设置默认编码
|
|
|
|
|
- configurer.setDefaultEncoding("UTF-8");
|
|
|
|
|
- // 3. 配置 FreeMarker 的原生属性
|
|
|
|
|
- Properties settings = new Properties();
|
|
|
|
|
- settings.setProperty("template_update_delay", "0"); // 检查模板更新延迟
|
|
|
|
|
- settings.setProperty("default_encoding", "UTF-8");
|
|
|
|
|
- settings.setProperty("number_format", "0.##"); // 数字格式化,防止 1000 变 1,000
|
|
|
|
|
- settings.setProperty("datetime_format", "yyyy-MM-dd HH:mm:ss");
|
|
|
|
|
- configurer.setFreemarkerSettings(settings);
|
|
|
|
|
- // 重要:必须调用此方法来初始化内部的 Configuration 对象
|
|
|
|
|
- configurer.afterPropertiesSet();
|
|
|
|
|
- return configurer.getConfiguration();
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- /**
|
|
|
|
|
- * @param templatePath 相对于 src/main/resources/templates/ 的路径
|
|
|
|
|
- * @return
|
|
|
|
|
- * @date 2026-03-29 00:03:145
|
|
|
|
|
- */
|
|
|
|
|
- public String renderHtml(String templatePath, Map<String, Object> root) throws Exception {
|
|
|
|
|
- // 2. 加载模板文件
|
|
|
|
|
- // 默认路径:src/main/resources/templates/pillar_report.ftl
|
|
|
|
|
- Template template = getTemplateConfiguration().getTemplate(templatePath);
|
|
|
|
|
- // 渲染并返回 HTML 字符串, FreeMarkerTemplateUtils 会自动处理异常并转换为 String
|
|
|
|
|
- return FreeMarkerTemplateUtils.processTemplateIntoString(template, root);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
/**
|
|
/**
|
|
|
* 辅助方法:构建 query_range 的完整 URL
|
|
* 辅助方法:构建 query_range 的完整 URL
|
|
|
*/
|
|
*/
|
|
@@ -487,11 +1026,10 @@ public class PrometheusService {
|
|
|
model.put("timeLabels", String.join(",", timeLabels));
|
|
model.put("timeLabels", String.join(",", timeLabels));
|
|
|
|
|
|
|
|
// 6. 渲染与输出
|
|
// 6. 渲染与输出
|
|
|
- /*String htmlContent = renderHtml("container_report_v2.ftl", model);
|
|
|
|
|
|
|
+ String htmlContent = renderHtml("container_report_v2.ftl", model);
|
|
|
Path outputPath = Paths.get("/home/reghao/Downloads", "container_report_v2_" + LocalDate.now() + ".html");
|
|
Path outputPath = Paths.get("/home/reghao/Downloads", "container_report_v2_" + LocalDate.now() + ".html");
|
|
|
Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
|
|
Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
|
|
|
- System.out.println("✅ 报表生成成功: " + outputPath.toAbsolutePath());*/
|
|
|
|
|
- System.out.println();
|
|
|
|
|
|
|
+ System.out.println("✅ 报表生成成功: " + outputPath.toAbsolutePath());
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
public ContainerReportVO getReportData() {
|
|
public ContainerReportVO getReportData() {
|
|
@@ -573,113 +1111,11 @@ public class PrometheusService {
|
|
|
return report;
|
|
return report;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- public void detect() throws Exception {
|
|
|
|
|
- // 1. 计算时间范围:昨天 00:00:00 到 23:59:59
|
|
|
|
|
- // 也可以根据需求改为:当前时间向前推 24 小时
|
|
|
|
|
- long end = Instant.now().getEpochSecond();
|
|
|
|
|
- long start = end - (24 * 3600);
|
|
|
|
|
- String step = "5m"; // 30分钟一个采样点,适合 24h 趋势图
|
|
|
|
|
-
|
|
|
|
|
- // 2. 定义 PromQL 查询语句
|
|
|
|
|
- String cpuQuery = "sum(irate(container_cpu_usage_seconds_total{name!=\"\"}[5m])) by (name, instance)";
|
|
|
|
|
- String memQuery = "sum(container_memory_working_set_bytes{name!=\"\"}) by (name, instance) / 1024 / 1024";
|
|
|
|
|
- String diskQuery = "max(rate(node_disk_io_time_seconds_total[5m])) by (instance)";
|
|
|
|
|
- String netQuery = "sum(irate(node_network_receive_bytes_total[5m])) by (instance) / 1024 / 1024";
|
|
|
|
|
-
|
|
|
|
|
- // 3. 构造异步任务 Map
|
|
|
|
|
- // 注意:这里调用的是 query_range 接口
|
|
|
|
|
- Map<String, String> tasks = Map.of(
|
|
|
|
|
- "cpu", buildRangeUrl(cpuQuery, start, end, step),
|
|
|
|
|
- "mem", buildRangeUrl(memQuery, start, end, step)
|
|
|
|
|
- );
|
|
|
|
|
- log.info("开始并行抓取 Prometheus 四大支柱数据...");
|
|
|
|
|
-
|
|
|
|
|
- // 4. 并行执行并阻塞等待结果(join)
|
|
|
|
|
- Map<String, String> rawResults = promClient.fetchAllMetrics0(tasks).join();
|
|
|
|
|
- jitter2(rawResults);
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- private void jitter1(Map<String, String> rawResults) throws Exception {
|
|
|
|
|
- JitterAnalysisService jitterAnalysisService = new JitterAnalysisService();
|
|
|
|
|
- List<ContainerHealthReport> list = jitterAnalysisService.analyzeMetrics(rawResults.get("cpu"), rawResults.get("mem"));
|
|
|
|
|
- Map<String, Object> model = new HashMap<>();
|
|
|
|
|
- // 关键:这里的 key "healthReports" 必须与模板中的 <#list healthReports> 匹配
|
|
|
|
|
- model.put("healthReports", list);
|
|
|
|
|
-
|
|
|
|
|
- String templatePath = "risk_dashboard.ftl";
|
|
|
|
|
- String htmlContent = renderHtml(templatePath, model);
|
|
|
|
|
- // 后续可以调用 playwright 截图
|
|
|
|
|
- // screenshotService.capture(htmlContent, "container_report.png");
|
|
|
|
|
-
|
|
|
|
|
- Path outputPath = Paths.get("/home/reghao/Downloads", "risk_dashboard_" + LocalDate.now() + ".html");
|
|
|
|
|
- if (Files.notExists(outputPath.getParent())) {
|
|
|
|
|
- Files.createDirectories(outputPath.getParent());
|
|
|
|
|
- }
|
|
|
|
|
- Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
|
|
|
|
|
- System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- @Data
|
|
|
|
|
- public class InstanceData {
|
|
|
|
|
- private Map<String, List<Double>> cpuSeries = new TreeMap<>();
|
|
|
|
|
- private Map<String, List<Double>> memSeries = new TreeMap<>();
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- private void parseToMap(String json, Map<String, InstanceData> groupedMap,
|
|
|
|
|
- List<String> timeLabels, boolean isCpu) throws Exception {
|
|
|
|
|
- JsonNode results = objectMapper.readTree(json).path("data").path("result");
|
|
|
|
|
- boolean labelsExtracted = (timeLabels == null);
|
|
|
|
|
-
|
|
|
|
|
- for (JsonNode res : results) {
|
|
|
|
|
- String name = res.path("metric").path("name").asText();
|
|
|
|
|
- String instance = res.path("metric").path("instance").asText().split(":")[0];
|
|
|
|
|
-
|
|
|
|
|
- InstanceData data = groupedMap.computeIfAbsent(instance, k -> new InstanceData());
|
|
|
|
|
- Map<String, List<Double>> targetSeries = isCpu ? data.getCpuSeries() : data.getMemSeries();
|
|
|
|
|
-
|
|
|
|
|
- List<Double> values = new ArrayList<>();
|
|
|
|
|
- for (JsonNode v : res.path("values")) {
|
|
|
|
|
- if (!labelsExtracted) {
|
|
|
|
|
- String time = Instant.ofEpochSecond(v.get(0).asLong())
|
|
|
|
|
- .atZone(ZoneId.systemDefault()).format(DateTimeFormatter.ofPattern("HH:mm"));
|
|
|
|
|
- timeLabels.add("'" + time + "'");
|
|
|
|
|
- }
|
|
|
|
|
- values.add(Math.round(v.get(1).asDouble() * 100.0) / 100.0);
|
|
|
|
|
- }
|
|
|
|
|
- labelsExtracted = true;
|
|
|
|
|
- targetSeries.put(name, values);
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- private void jitter2(Map<String, String> rawResults) throws Exception {
|
|
|
|
|
- Map<String, InstanceData> groupedMap = new TreeMap<>();
|
|
|
|
|
- List<String> timeLabels = new ArrayList<>();
|
|
|
|
|
-
|
|
|
|
|
- // 1. 解析 CPU 数据
|
|
|
|
|
- parseToMap(rawResults.get("cpu"), groupedMap, timeLabels, true);
|
|
|
|
|
- // 2. 解析内存数据 (不再重复提取 timeLabels)
|
|
|
|
|
- parseToMap(rawResults.get("mem"), groupedMap, null, false);
|
|
|
|
|
-
|
|
|
|
|
- Map<String, Object> model = new HashMap<>();
|
|
|
|
|
- model.put("groupedMap", groupedMap);
|
|
|
|
|
- model.put("timeLabels", String.join(",", timeLabels));
|
|
|
|
|
-
|
|
|
|
|
- String templatePath = "jitter.ftl";
|
|
|
|
|
- String htmlContent = renderHtml(templatePath, model);
|
|
|
|
|
- Path outputPath = Paths.get("/home/reghao/Downloads", "jitter_" + LocalDate.now() + ".html");
|
|
|
|
|
- if (Files.notExists(outputPath.getParent())) {
|
|
|
|
|
- Files.createDirectories(outputPath.getParent());
|
|
|
|
|
- }
|
|
|
|
|
- Files.writeString(outputPath, htmlContent, StandardCharsets.UTF_8);
|
|
|
|
|
- System.out.println("✅ 报表已成功保存至: " + outputPath.toAbsolutePath());
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
public static void main(String[] args) throws Exception {
|
|
|
- //PrometheusService prometheusService = new PrometheusService();
|
|
|
|
|
- //prometheusService.generateContainerReport1();
|
|
|
|
|
- //prometheusService.getContainerReportData();
|
|
|
|
|
- //prometheusService.generatePillarReport();
|
|
|
|
|
|
|
+ PrometheusService prometheusService = new PrometheusService();
|
|
|
|
|
+ prometheusService.generateContainerReport1();
|
|
|
|
|
+ prometheusService.generateContainerReport();
|
|
|
|
|
+ prometheusService.generatePillarReport();
|
|
|
//prometheusService.generateDailyReport();
|
|
//prometheusService.generateDailyReport();
|
|
|
- //prometheusService.detect();
|
|
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|