This commit is contained in:
CN-JS-HuiBai
2026-04-04 23:01:15 +08:00
parent 79779d6fcf
commit f3f49f2c8e
3 changed files with 57 additions and 17 deletions

View File

@@ -482,6 +482,7 @@ app.get('/api/metrics/overview', async (req, res) => {
// Aggregate across all sources
let totalServers = 0;
let activeServers = 0;
let cpuUsed = 0, cpuTotal = 0;
let memUsed = 0, memTotal = 0;
let diskUsed = 0, diskTotal = 0;
@@ -491,6 +492,7 @@ app.get('/api/metrics/overview', async (req, res) => {
for (const m of validMetrics) {
totalServers += m.totalServers;
activeServers += m.activeServers || m.totalServers; // Default if missing
cpuUsed += m.cpu.used;
cpuTotal += m.cpu.total;
memUsed += m.memory.used;
@@ -520,6 +522,7 @@ app.get('/api/metrics/overview', async (req, res) => {
res.json({
totalServers,
activeServers,
cpu: {
used: cpuUsed,
total: cpuTotal,

View File

@@ -110,6 +110,38 @@ async function query(baseUrl, expr) {
}
}
/**
* Get all targets from Prometheus
*/
async function getTargets(baseUrl) {
const url = normalizeUrl(baseUrl);
try {
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), QUERY_TIMEOUT);
const res = await fetch(`${url}/api/v1/targets`, {
signal: controller.signal
});
clearTimeout(timer);
if (!res.ok) {
throw new Error(`Prometheus returned HTTP ${res.status}`);
}
const data = await res.json();
if (data.status !== 'success') {
throw new Error(`Prometheus targets fetch failed: ${data.error || 'unknown error'}`);
}
return data.data.activeTargets || [];
} catch (err) {
if (err.name === 'AbortError') {
throw new Error('Prometheus targets fetch timed out');
}
throw err;
}
}
/**
* Execute a Prometheus range query
*/
@@ -145,9 +177,6 @@ async function queryRange(baseUrl, expr, start, end, step) {
/**
* Get overview metrics from a single Prometheus source
*/
async function getOverviewMetrics(url, sourceName) {
// Run all queries in parallel
const [
@@ -161,7 +190,7 @@ async function getOverviewMetrics(url, sourceName) {
netTxResult,
traffic24hRxResult,
traffic24hTxResult,
upResult
targetsResult
] = await Promise.all([
// CPU usage per instance: 1 - avg idle
query(url, '100 - (avg by (instance, job) (rate(node_cpu_seconds_total{mode="idle"}[1m])) * 100)').catch(() => []),
@@ -183,9 +212,8 @@ async function getOverviewMetrics(url, sourceName) {
query(url, 'sum by (instance, job) (increase(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
// Total traffic transmitted in last 24h
query(url, 'sum by (instance, job) (increase(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
// Up instances (at least one successful scrape in last 5m)
// We broaden the job filter to catch more variations of node-exporter jobs
query(url, 'max_over_time(up{job=~".*node.*|.*exporter.*|.*host.*"}[5m])').catch(() => [])
// Targets status from /api/v1/targets
getTargets(url).catch(() => [])
]);
// Build per-instance data map
@@ -223,10 +251,18 @@ async function getOverviewMetrics(url, sourceName) {
return inst;
};
// Parse UP status
for (const r of upResult) {
const inst = getOrCreate(r.metric);
inst.up = parseFloat(r.value[1]) === 1;
// Initialize instances from targets first (to ensure we have all servers even if they have no metrics)
const nodeJobRegex = /node|exporter|host/i;
for (const target of targetsResult) {
const labels = target.labels || {};
const instance = labels.instance;
const job = labels.job;
// Only include targets that look like node-exporters
if (instance && (nodeJobRegex.test(job) || nodeJobRegex.test(target.scrapePool))) {
const inst = getOrCreate(labels);
inst.up = target.health === 'up';
}
}
// Parse CPU usage
@@ -271,15 +307,14 @@ async function getOverviewMetrics(url, sourceName) {
inst.netTx = parseFloat(r.value[1]) || 0;
}
// Final check: If an instance has non-zero CPU or Memory total data but is marked offline,
// it means we missed its 'up' metric due to job labels, but it's clearly sending data.
for (const inst of instances.values()) {
if (!inst.up && (inst.cpuPercent > 0 || inst.memTotal > 0)) {
inst.up = true;
}
}
const activeInstances = Array.from(instances.values()).filter(inst => inst.up);
const allInstancesList = Array.from(instances.values());
const activeInstances = allInstancesList.filter(inst => inst.up);
// Aggregate
let totalCpuUsed = 0, totalCpuCores = 0;
@@ -308,7 +343,8 @@ async function getOverviewMetrics(url, sourceName) {
}
return {
totalServers: activeInstances.length,
totalServers: allInstancesList.length,
activeServers: activeInstances.length,
cpu: {
used: totalCpuUsed,
total: totalCpuCores,
@@ -334,7 +370,7 @@ async function getOverviewMetrics(url, sourceName) {
tx: totalTraffic24hTx,
total: totalTraffic24hRx + totalTraffic24hTx
},
servers: activeInstances.map(s => {
servers: allInstancesList.map(s => {
const { originalInstance, ...rest } = s;
return rest;
})
@@ -607,6 +643,7 @@ module.exports = {
testConnection,
query,
queryRange,
getTargets,
getOverviewMetrics,
getNetworkHistory,
mergeNetworkHistories,