2
This commit is contained in:
@@ -322,7 +322,7 @@
|
|||||||
function updateDashboard(data) {
|
function updateDashboard(data) {
|
||||||
// Server count
|
// Server count
|
||||||
dom.totalServers.textContent = data.totalServers;
|
dom.totalServers.textContent = data.totalServers;
|
||||||
dom.serverCountText.textContent = `${data.totalServers} 台服务器`;
|
dom.serverCountText.textContent = `${data.activeServers} / ${data.totalServers} 在线`;
|
||||||
|
|
||||||
// CPU
|
// CPU
|
||||||
const cpuPct = data.cpu.percent;
|
const cpuPct = data.cpu.percent;
|
||||||
|
|||||||
@@ -482,6 +482,7 @@ app.get('/api/metrics/overview', async (req, res) => {
|
|||||||
|
|
||||||
// Aggregate across all sources
|
// Aggregate across all sources
|
||||||
let totalServers = 0;
|
let totalServers = 0;
|
||||||
|
let activeServers = 0;
|
||||||
let cpuUsed = 0, cpuTotal = 0;
|
let cpuUsed = 0, cpuTotal = 0;
|
||||||
let memUsed = 0, memTotal = 0;
|
let memUsed = 0, memTotal = 0;
|
||||||
let diskUsed = 0, diskTotal = 0;
|
let diskUsed = 0, diskTotal = 0;
|
||||||
@@ -491,6 +492,7 @@ app.get('/api/metrics/overview', async (req, res) => {
|
|||||||
|
|
||||||
for (const m of validMetrics) {
|
for (const m of validMetrics) {
|
||||||
totalServers += m.totalServers;
|
totalServers += m.totalServers;
|
||||||
|
activeServers += m.activeServers || m.totalServers; // Default if missing
|
||||||
cpuUsed += m.cpu.used;
|
cpuUsed += m.cpu.used;
|
||||||
cpuTotal += m.cpu.total;
|
cpuTotal += m.cpu.total;
|
||||||
memUsed += m.memory.used;
|
memUsed += m.memory.used;
|
||||||
@@ -520,6 +522,7 @@ app.get('/api/metrics/overview', async (req, res) => {
|
|||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
totalServers,
|
totalServers,
|
||||||
|
activeServers,
|
||||||
cpu: {
|
cpu: {
|
||||||
used: cpuUsed,
|
used: cpuUsed,
|
||||||
total: cpuTotal,
|
total: cpuTotal,
|
||||||
|
|||||||
@@ -110,6 +110,38 @@ async function query(baseUrl, expr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get all targets from Prometheus
|
||||||
|
*/
|
||||||
|
async function getTargets(baseUrl) {
|
||||||
|
const url = normalizeUrl(baseUrl);
|
||||||
|
try {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timer = setTimeout(() => controller.abort(), QUERY_TIMEOUT);
|
||||||
|
|
||||||
|
const res = await fetch(`${url}/api/v1/targets`, {
|
||||||
|
signal: controller.signal
|
||||||
|
});
|
||||||
|
|
||||||
|
clearTimeout(timer);
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
throw new Error(`Prometheus returned HTTP ${res.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await res.json();
|
||||||
|
if (data.status !== 'success') {
|
||||||
|
throw new Error(`Prometheus targets fetch failed: ${data.error || 'unknown error'}`);
|
||||||
|
}
|
||||||
|
return data.data.activeTargets || [];
|
||||||
|
} catch (err) {
|
||||||
|
if (err.name === 'AbortError') {
|
||||||
|
throw new Error('Prometheus targets fetch timed out');
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute a Prometheus range query
|
* Execute a Prometheus range query
|
||||||
*/
|
*/
|
||||||
@@ -145,9 +177,6 @@ async function queryRange(baseUrl, expr, start, end, step) {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get overview metrics from a single Prometheus source
|
|
||||||
*/
|
|
||||||
async function getOverviewMetrics(url, sourceName) {
|
async function getOverviewMetrics(url, sourceName) {
|
||||||
// Run all queries in parallel
|
// Run all queries in parallel
|
||||||
const [
|
const [
|
||||||
@@ -161,7 +190,7 @@ async function getOverviewMetrics(url, sourceName) {
|
|||||||
netTxResult,
|
netTxResult,
|
||||||
traffic24hRxResult,
|
traffic24hRxResult,
|
||||||
traffic24hTxResult,
|
traffic24hTxResult,
|
||||||
upResult
|
targetsResult
|
||||||
] = await Promise.all([
|
] = await Promise.all([
|
||||||
// CPU usage per instance: 1 - avg idle
|
// CPU usage per instance: 1 - avg idle
|
||||||
query(url, '100 - (avg by (instance, job) (rate(node_cpu_seconds_total{mode="idle"}[1m])) * 100)').catch(() => []),
|
query(url, '100 - (avg by (instance, job) (rate(node_cpu_seconds_total{mode="idle"}[1m])) * 100)').catch(() => []),
|
||||||
@@ -183,9 +212,8 @@ async function getOverviewMetrics(url, sourceName) {
|
|||||||
query(url, 'sum by (instance, job) (increase(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
|
query(url, 'sum by (instance, job) (increase(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
|
||||||
// Total traffic transmitted in last 24h
|
// Total traffic transmitted in last 24h
|
||||||
query(url, 'sum by (instance, job) (increase(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
|
query(url, 'sum by (instance, job) (increase(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
|
||||||
// Up instances (at least one successful scrape in last 5m)
|
// Targets status from /api/v1/targets
|
||||||
// We broaden the job filter to catch more variations of node-exporter jobs
|
getTargets(url).catch(() => [])
|
||||||
query(url, 'max_over_time(up{job=~".*node.*|.*exporter.*|.*host.*"}[5m])').catch(() => [])
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// Build per-instance data map
|
// Build per-instance data map
|
||||||
@@ -223,10 +251,18 @@ async function getOverviewMetrics(url, sourceName) {
|
|||||||
return inst;
|
return inst;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Parse UP status
|
// Initialize instances from targets first (to ensure we have all servers even if they have no metrics)
|
||||||
for (const r of upResult) {
|
const nodeJobRegex = /node|exporter|host/i;
|
||||||
const inst = getOrCreate(r.metric);
|
for (const target of targetsResult) {
|
||||||
inst.up = parseFloat(r.value[1]) === 1;
|
const labels = target.labels || {};
|
||||||
|
const instance = labels.instance;
|
||||||
|
const job = labels.job;
|
||||||
|
|
||||||
|
// Only include targets that look like node-exporters
|
||||||
|
if (instance && (nodeJobRegex.test(job) || nodeJobRegex.test(target.scrapePool))) {
|
||||||
|
const inst = getOrCreate(labels);
|
||||||
|
inst.up = target.health === 'up';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse CPU usage
|
// Parse CPU usage
|
||||||
@@ -271,15 +307,14 @@ async function getOverviewMetrics(url, sourceName) {
|
|||||||
inst.netTx = parseFloat(r.value[1]) || 0;
|
inst.netTx = parseFloat(r.value[1]) || 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final check: If an instance has non-zero CPU or Memory total data but is marked offline,
|
|
||||||
// it means we missed its 'up' metric due to job labels, but it's clearly sending data.
|
|
||||||
for (const inst of instances.values()) {
|
for (const inst of instances.values()) {
|
||||||
if (!inst.up && (inst.cpuPercent > 0 || inst.memTotal > 0)) {
|
if (!inst.up && (inst.cpuPercent > 0 || inst.memTotal > 0)) {
|
||||||
inst.up = true;
|
inst.up = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const activeInstances = Array.from(instances.values()).filter(inst => inst.up);
|
const allInstancesList = Array.from(instances.values());
|
||||||
|
const activeInstances = allInstancesList.filter(inst => inst.up);
|
||||||
|
|
||||||
// Aggregate
|
// Aggregate
|
||||||
let totalCpuUsed = 0, totalCpuCores = 0;
|
let totalCpuUsed = 0, totalCpuCores = 0;
|
||||||
@@ -308,7 +343,8 @@ async function getOverviewMetrics(url, sourceName) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
totalServers: activeInstances.length,
|
totalServers: allInstancesList.length,
|
||||||
|
activeServers: activeInstances.length,
|
||||||
cpu: {
|
cpu: {
|
||||||
used: totalCpuUsed,
|
used: totalCpuUsed,
|
||||||
total: totalCpuCores,
|
total: totalCpuCores,
|
||||||
@@ -334,7 +370,7 @@ async function getOverviewMetrics(url, sourceName) {
|
|||||||
tx: totalTraffic24hTx,
|
tx: totalTraffic24hTx,
|
||||||
total: totalTraffic24hRx + totalTraffic24hTx
|
total: totalTraffic24hRx + totalTraffic24hTx
|
||||||
},
|
},
|
||||||
servers: activeInstances.map(s => {
|
servers: allInstancesList.map(s => {
|
||||||
const { originalInstance, ...rest } = s;
|
const { originalInstance, ...rest } = s;
|
||||||
return rest;
|
return rest;
|
||||||
})
|
})
|
||||||
@@ -607,6 +643,7 @@ module.exports = {
|
|||||||
testConnection,
|
testConnection,
|
||||||
query,
|
query,
|
||||||
queryRange,
|
queryRange,
|
||||||
|
getTargets,
|
||||||
getOverviewMetrics,
|
getOverviewMetrics,
|
||||||
getNetworkHistory,
|
getNetworkHistory,
|
||||||
mergeNetworkHistories,
|
mergeNetworkHistories,
|
||||||
|
|||||||
Reference in New Issue
Block a user