From f3f49f2c8e5c8ae687d3136e59a7751eac5d518e Mon Sep 17 00:00:00 2001 From: CN-JS-HuiBai Date: Sat, 4 Apr 2026 23:01:15 +0800 Subject: [PATCH] 2 --- public/js/app.js | 2 +- server/index.js | 3 ++ server/prometheus-service.js | 69 +++++++++++++++++++++++++++--------- 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/public/js/app.js b/public/js/app.js index 9a9aeb8..ac2f256 100644 --- a/public/js/app.js +++ b/public/js/app.js @@ -322,7 +322,7 @@ function updateDashboard(data) { // Server count dom.totalServers.textContent = data.totalServers; - dom.serverCountText.textContent = `${data.totalServers} 台服务器`; + dom.serverCountText.textContent = `${data.activeServers} / ${data.totalServers} 在线`; // CPU const cpuPct = data.cpu.percent; diff --git a/server/index.js b/server/index.js index a76d918..de47ff6 100644 --- a/server/index.js +++ b/server/index.js @@ -482,6 +482,7 @@ app.get('/api/metrics/overview', async (req, res) => { // Aggregate across all sources let totalServers = 0; + let activeServers = 0; let cpuUsed = 0, cpuTotal = 0; let memUsed = 0, memTotal = 0; let diskUsed = 0, diskTotal = 0; @@ -491,6 +492,7 @@ app.get('/api/metrics/overview', async (req, res) => { for (const m of validMetrics) { totalServers += m.totalServers; + activeServers += m.activeServers || m.totalServers; // Default if missing cpuUsed += m.cpu.used; cpuTotal += m.cpu.total; memUsed += m.memory.used; @@ -520,6 +522,7 @@ app.get('/api/metrics/overview', async (req, res) => { res.json({ totalServers, + activeServers, cpu: { used: cpuUsed, total: cpuTotal, diff --git a/server/prometheus-service.js b/server/prometheus-service.js index 616ff78..647a76f 100644 --- a/server/prometheus-service.js +++ b/server/prometheus-service.js @@ -110,6 +110,38 @@ async function query(baseUrl, expr) { } } +/** + * Get all targets from Prometheus + */ +async function getTargets(baseUrl) { + const url = normalizeUrl(baseUrl); + try { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), QUERY_TIMEOUT); + + const res = await fetch(`${url}/api/v1/targets`, { + signal: controller.signal + }); + + clearTimeout(timer); + + if (!res.ok) { + throw new Error(`Prometheus returned HTTP ${res.status}`); + } + + const data = await res.json(); + if (data.status !== 'success') { + throw new Error(`Prometheus targets fetch failed: ${data.error || 'unknown error'}`); + } + return data.data.activeTargets || []; + } catch (err) { + if (err.name === 'AbortError') { + throw new Error('Prometheus targets fetch timed out'); + } + throw err; + } +} + /** * Execute a Prometheus range query */ @@ -145,9 +177,6 @@ async function queryRange(baseUrl, expr, start, end, step) { -/** - * Get overview metrics from a single Prometheus source - */ async function getOverviewMetrics(url, sourceName) { // Run all queries in parallel const [ @@ -161,7 +190,7 @@ async function getOverviewMetrics(url, sourceName) { netTxResult, traffic24hRxResult, traffic24hTxResult, - upResult + targetsResult ] = await Promise.all([ // CPU usage per instance: 1 - avg idle query(url, '100 - (avg by (instance, job) (rate(node_cpu_seconds_total{mode="idle"}[1m])) * 100)').catch(() => []), @@ -183,9 +212,8 @@ async function getOverviewMetrics(url, sourceName) { query(url, 'sum by (instance, job) (increase(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []), // Total traffic transmitted in last 24h query(url, 'sum by (instance, job) (increase(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []), - // Up instances (at least one successful scrape in last 5m) - // We broaden the job filter to catch more variations of node-exporter jobs - query(url, 'max_over_time(up{job=~".*node.*|.*exporter.*|.*host.*"}[5m])').catch(() => []) + // Targets status from /api/v1/targets + getTargets(url).catch(() => []) ]); // Build per-instance data map @@ -223,10 +251,18 @@ async function getOverviewMetrics(url, sourceName) { return inst; }; - // Parse UP status - for (const r of upResult) { - const inst = getOrCreate(r.metric); - inst.up = parseFloat(r.value[1]) === 1; + // Initialize instances from targets first (to ensure we have all servers even if they have no metrics) + const nodeJobRegex = /node|exporter|host/i; + for (const target of targetsResult) { + const labels = target.labels || {}; + const instance = labels.instance; + const job = labels.job; + + // Only include targets that look like node-exporters + if (instance && (nodeJobRegex.test(job) || nodeJobRegex.test(target.scrapePool))) { + const inst = getOrCreate(labels); + inst.up = target.health === 'up'; + } } // Parse CPU usage @@ -271,15 +307,14 @@ async function getOverviewMetrics(url, sourceName) { inst.netTx = parseFloat(r.value[1]) || 0; } - // Final check: If an instance has non-zero CPU or Memory total data but is marked offline, - // it means we missed its 'up' metric due to job labels, but it's clearly sending data. for (const inst of instances.values()) { if (!inst.up && (inst.cpuPercent > 0 || inst.memTotal > 0)) { inst.up = true; } } - const activeInstances = Array.from(instances.values()).filter(inst => inst.up); + const allInstancesList = Array.from(instances.values()); + const activeInstances = allInstancesList.filter(inst => inst.up); // Aggregate let totalCpuUsed = 0, totalCpuCores = 0; @@ -308,7 +343,8 @@ async function getOverviewMetrics(url, sourceName) { } return { - totalServers: activeInstances.length, + totalServers: allInstancesList.length, + activeServers: activeInstances.length, cpu: { used: totalCpuUsed, total: totalCpuCores, @@ -334,7 +370,7 @@ async function getOverviewMetrics(url, sourceName) { tx: totalTraffic24hTx, total: totalTraffic24hRx + totalTraffic24hTx }, - servers: activeInstances.map(s => { + servers: allInstancesList.map(s => { const { originalInstance, ...rest } = s; return rest; }) @@ -607,6 +643,7 @@ module.exports = { testConnection, query, queryRange, + getTargets, getOverviewMetrics, getNetworkHistory, mergeNetworkHistories,