From f3f49f2c8e5c8ae687d3136e59a7751eac5d518e Mon Sep 17 00:00:00 2001
From: CN-JS-HuiBai <huyang@littlediary.cn>
Date: Sat, 4 Apr 2026 23:01:15 +0800
Subject: [PATCH] 2

---
 public/js/app.js             |  2 +-
 server/index.js              |  3 ++
 server/prometheus-service.js | 69 +++++++++++++++++++++++++++---------
 3 files changed, 57 insertions(+), 17 deletions(-)

diff --git a/public/js/app.js b/public/js/app.js
index 9a9aeb8..ac2f256 100644
--- a/public/js/app.js
+++ b/public/js/app.js
@@ -322,7 +322,7 @@
   function updateDashboard(data) {
     // Server count
     dom.totalServers.textContent = data.totalServers;
-    dom.serverCountText.textContent = `${data.totalServers} 台服务器`;
+    dom.serverCountText.textContent = `${data.activeServers} / ${data.totalServers} 在线`;
 
     // CPU
     const cpuPct = data.cpu.percent;
diff --git a/server/index.js b/server/index.js
index a76d918..de47ff6 100644
--- a/server/index.js
+++ b/server/index.js
@@ -482,6 +482,7 @@ app.get('/api/metrics/overview', async (req, res) => {
 
     // Aggregate across all sources
     let totalServers = 0;
+    let activeServers = 0;
     let cpuUsed = 0, cpuTotal = 0;
     let memUsed = 0, memTotal = 0;
     let diskUsed = 0, diskTotal = 0;
@@ -491,6 +492,7 @@ app.get('/api/metrics/overview', async (req, res) => {
 
     for (const m of validMetrics) {
       totalServers += m.totalServers;
+      activeServers += m.activeServers || m.totalServers; // Default if missing
       cpuUsed += m.cpu.used;
       cpuTotal += m.cpu.total;
       memUsed += m.memory.used;
@@ -520,6 +522,7 @@ app.get('/api/metrics/overview', async (req, res) => {
 
     res.json({
       totalServers,
+      activeServers,
       cpu: {
         used: cpuUsed,
         total: cpuTotal,
diff --git a/server/prometheus-service.js b/server/prometheus-service.js
index 616ff78..647a76f 100644
--- a/server/prometheus-service.js
+++ b/server/prometheus-service.js
@@ -110,6 +110,38 @@ async function query(baseUrl, expr) {
   }
 }
 
+/**
+ * Get all targets from Prometheus
+ */
+async function getTargets(baseUrl) {
+  const url = normalizeUrl(baseUrl);
+  try {
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), QUERY_TIMEOUT);
+
+    const res = await fetch(`${url}/api/v1/targets`, {
+      signal: controller.signal
+    });
+
+    clearTimeout(timer);
+
+    if (!res.ok) {
+      throw new Error(`Prometheus returned HTTP ${res.status}`);
+    }
+
+    const data = await res.json();
+    if (data.status !== 'success') {
+      throw new Error(`Prometheus targets fetch failed: ${data.error || 'unknown error'}`);
+    }
+    return data.data.activeTargets || [];
+  } catch (err) {
+    if (err.name === 'AbortError') {
+      throw new Error('Prometheus targets fetch timed out');
+    }
+    throw err;
+  }
+}
+
 /**
  * Execute a Prometheus range query
  */
@@ -145,9 +177,6 @@ async function queryRange(baseUrl, expr, start, end, step) {
 
 
 
-/**
- * Get overview metrics from a single Prometheus source
- */
 async function getOverviewMetrics(url, sourceName) {
   // Run all queries in parallel
   const [
@@ -161,7 +190,7 @@ async function getOverviewMetrics(url, sourceName) {
     netTxResult,
     traffic24hRxResult,
     traffic24hTxResult,
-    upResult
+    targetsResult
   ] = await Promise.all([
     // CPU usage per instance: 1 - avg idle
     query(url, '100 - (avg by (instance, job) (rate(node_cpu_seconds_total{mode="idle"}[1m])) * 100)').catch(() => []),
@@ -183,9 +212,8 @@ async function getOverviewMetrics(url, sourceName) {
     query(url, 'sum by (instance, job) (increase(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
     // Total traffic transmitted in last 24h
     query(url, 'sum by (instance, job) (increase(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
-    // Up instances (at least one successful scrape in last 5m)
-    // We broaden the job filter to catch more variations of node-exporter jobs
-    query(url, 'max_over_time(up{job=~".*node.*|.*exporter.*|.*host.*"}[5m])').catch(() => [])
+    // Targets status from /api/v1/targets
+    getTargets(url).catch(() => [])
   ]);
 
   // Build per-instance data map
@@ -223,10 +251,18 @@ async function getOverviewMetrics(url, sourceName) {
     return inst;
   };
 
-  // Parse UP status
-  for (const r of upResult) {
-    const inst = getOrCreate(r.metric);
-    inst.up = parseFloat(r.value[1]) === 1;
+  // Initialize instances from targets first (to ensure we have all servers even if they have no metrics)
+  const nodeJobRegex = /node|exporter|host/i;
+  for (const target of targetsResult) {
+    const labels = target.labels || {};
+    const instance = labels.instance;
+    const job = labels.job;
+    
+    // Only include targets that look like node-exporters
+    if (instance && (nodeJobRegex.test(job) || nodeJobRegex.test(target.scrapePool))) {
+      const inst = getOrCreate(labels);
+      inst.up = target.health === 'up';
+    }
   }
 
   // Parse CPU usage
@@ -271,15 +307,14 @@ async function getOverviewMetrics(url, sourceName) {
     inst.netTx = parseFloat(r.value[1]) || 0;
   }
 
-  // Final check: If an instance has non-zero CPU or Memory total data but is marked offline, 
-  // it means we missed its 'up' metric due to job labels, but it's clearly sending data.
   for (const inst of instances.values()) {
     if (!inst.up && (inst.cpuPercent > 0 || inst.memTotal > 0)) {
       inst.up = true;
     }
   }
 
-  const activeInstances = Array.from(instances.values()).filter(inst => inst.up);
+  const allInstancesList = Array.from(instances.values());
+  const activeInstances = allInstancesList.filter(inst => inst.up);
 
   // Aggregate
   let totalCpuUsed = 0, totalCpuCores = 0;
@@ -308,7 +343,8 @@ async function getOverviewMetrics(url, sourceName) {
   }
 
   return {
-    totalServers: activeInstances.length,
+    totalServers: allInstancesList.length,
+    activeServers: activeInstances.length,
     cpu: {
       used: totalCpuUsed,
       total: totalCpuCores,
@@ -334,7 +370,7 @@ async function getOverviewMetrics(url, sourceName) {
       tx: totalTraffic24hTx,
       total: totalTraffic24hRx + totalTraffic24hTx
     },
-    servers: activeInstances.map(s => {
+    servers: allInstancesList.map(s => {
       const { originalInstance, ...rest } = s;
       return rest;
     })
@@ -607,6 +643,7 @@ module.exports = {
   testConnection,
   query,
   queryRange,
+  getTargets,
   getOverviewMetrics,
   getNetworkHistory,
   mergeNetworkHistories,