921 lines
32 KiB
JavaScript
921 lines
32 KiB
JavaScript
const axios = require('axios');
|
|
const http = require('http');
|
|
const https = require('https');
|
|
|
|
const QUERY_TIMEOUT = 10000;
|
|
|
|
// Reusable agents to handle potential redirect issues and protocol mismatches
|
|
const crypto = require('crypto');
|
|
const httpAgent = new http.Agent({ keepAlive: true });
|
|
const httpsAgent = new https.Agent({ keepAlive: true });
|
|
|
|
const serverIdMap = new Map(); // token -> { instance, job, source, lastSeen }
|
|
const SECRET = process.env.APP_SECRET || crypto.randomBytes(32).toString('hex');
|
|
|
|
// Periodic cleanup of serverIdMap to prevent infinite growth
|
|
setInterval(() => {
|
|
const now = Date.now();
|
|
const TTL = 24 * 60 * 60 * 1000; // 24 hours
|
|
for (const [token, data] of serverIdMap.entries()) {
|
|
if (now - (data.lastSeen || 0) > TTL) {
|
|
serverIdMap.delete(token);
|
|
}
|
|
}
|
|
}, 3600000); // Once per hour
|
|
|
|
function getServerToken(instance, job, source) {
|
|
const hash = crypto.createHmac('sha256', SECRET)
|
|
.update(`${instance}:${job}:${source}`)
|
|
.digest('hex')
|
|
.substring(0, 16);
|
|
|
|
// Update lastSeen timestamp
|
|
const data = serverIdMap.get(hash);
|
|
if (data) data.lastSeen = Date.now();
|
|
|
|
return hash;
|
|
}
|
|
|
|
/**
|
|
* Normalize URL and ensure protocol
|
|
*/
|
|
function normalizeUrl(baseUrl) {
|
|
let url = baseUrl.trim().replace(/\/+$/, '');
|
|
if (!/^https?:\/\//i.test(url)) {
|
|
url = 'http://' + url;
|
|
}
|
|
return url;
|
|
}
|
|
|
|
/**
|
|
* Create an axios instance for a given Prometheus URL
|
|
*/
|
|
function createClient(baseUrl) {
|
|
const url = normalizeUrl(baseUrl);
|
|
return axios.create({
|
|
baseURL: url,
|
|
timeout: QUERY_TIMEOUT,
|
|
httpAgent,
|
|
httpsAgent,
|
|
maxRedirects: 5
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Test Prometheus connection
|
|
*/
|
|
async function testConnection(url, customTimeout = null) {
|
|
const normalized = normalizeUrl(url);
|
|
try {
|
|
// Using native fetch to avoid follow-redirects/axios "protocol mismatch" issues in some Node environments
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), customTimeout || QUERY_TIMEOUT);
|
|
|
|
// Node native fetch - handles http/https automatically
|
|
const res = await fetch(`${normalized}/api/v1/status/buildinfo`, {
|
|
signal: controller.signal
|
|
});
|
|
|
|
clearTimeout(timer);
|
|
|
|
if (!res.ok) {
|
|
throw new Error(`Prometheus returned HTTP ${res.status}`);
|
|
}
|
|
|
|
const data = await res.json();
|
|
return data?.data?.version || 'unknown';
|
|
} catch (err) {
|
|
if (err.name === 'AbortError') {
|
|
throw new Error('Connection timed out');
|
|
}
|
|
console.error(`[Prometheus] Connection test failed for ${normalized}:`, err.message);
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Execute a Prometheus instant query
|
|
*/
|
|
async function query(baseUrl, expr) {
|
|
const url = normalizeUrl(baseUrl);
|
|
try {
|
|
const params = new URLSearchParams({ query: expr });
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), QUERY_TIMEOUT);
|
|
|
|
const res = await fetch(`${url}/api/v1/query?${params.toString()}`, {
|
|
signal: controller.signal
|
|
});
|
|
|
|
clearTimeout(timer);
|
|
|
|
if (!res.ok) {
|
|
throw new Error(`Prometheus returned HTTP ${res.status}`);
|
|
}
|
|
|
|
const data = await res.json();
|
|
if (data.status !== 'success') {
|
|
throw new Error(`Prometheus query failed: ${data.error || 'unknown error'}`);
|
|
}
|
|
return data.data.result;
|
|
} catch (err) {
|
|
if (err.name === 'AbortError') {
|
|
throw new Error('Prometheus query timed out');
|
|
}
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get all targets from Prometheus
|
|
*/
|
|
async function getTargets(baseUrl) {
|
|
const url = normalizeUrl(baseUrl);
|
|
try {
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), QUERY_TIMEOUT);
|
|
|
|
const res = await fetch(`${url}/api/v1/targets`, {
|
|
signal: controller.signal
|
|
});
|
|
|
|
clearTimeout(timer);
|
|
|
|
if (!res.ok) {
|
|
throw new Error(`Prometheus returned HTTP ${res.status}`);
|
|
}
|
|
|
|
const data = await res.json();
|
|
if (data.status !== 'success') {
|
|
throw new Error(`Prometheus targets fetch failed: ${data.error || 'unknown error'}`);
|
|
}
|
|
return data.data.activeTargets || [];
|
|
} catch (err) {
|
|
if (err.name === 'AbortError') {
|
|
throw new Error('Prometheus targets fetch timed out');
|
|
}
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Execute a Prometheus range query
|
|
*/
|
|
async function queryRange(baseUrl, expr, start, end, step) {
|
|
const url = normalizeUrl(baseUrl);
|
|
try {
|
|
const params = new URLSearchParams({ query: expr, start, end, step });
|
|
const controller = new AbortController();
|
|
const timer = setTimeout(() => controller.abort(), QUERY_TIMEOUT);
|
|
|
|
const res = await fetch(`${url}/api/v1/query_range?${params.toString()}`, {
|
|
signal: controller.signal
|
|
});
|
|
|
|
clearTimeout(timer);
|
|
|
|
if (!res.ok) {
|
|
throw new Error(`Prometheus returned HTTP ${res.status}`);
|
|
}
|
|
|
|
const data = await res.json();
|
|
if (data.status !== 'success') {
|
|
throw new Error(`Prometheus range query failed: ${data.error || 'unknown error'}`);
|
|
}
|
|
return data.data.result;
|
|
} catch (err) {
|
|
if (err.name === 'AbortError') {
|
|
throw new Error('Prometheus range query timed out');
|
|
}
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
async function getOverviewMetrics(url, sourceName) {
|
|
// Run all queries in parallel
|
|
const [
|
|
cpuResult,
|
|
cpuCountResult,
|
|
memTotalResult,
|
|
memAvailResult,
|
|
diskTotalResult,
|
|
diskFreeResult,
|
|
netRxResult,
|
|
netTxResult,
|
|
netRx24hResult,
|
|
netTx24hResult,
|
|
targetsResult
|
|
] = await Promise.all([
|
|
// CPU usage per instance: 1 - avg idle
|
|
query(url, '100 - (avg by (instance, job) (rate(node_cpu_seconds_total{mode="idle"}[1m])) * 100)').catch(() => []),
|
|
// CPU count per instance
|
|
query(url, 'count by (instance, job) (node_cpu_seconds_total{mode="idle"})').catch(() => []),
|
|
// Memory total per instance
|
|
query(url, 'node_memory_MemTotal_bytes').catch(() => []),
|
|
// Memory available per instance
|
|
query(url, 'node_memory_MemAvailable_bytes').catch(() => []),
|
|
// Disk total per instance (excluding virtual fs and FUSE/rclone mounts)
|
|
query(url, 'sum by (instance, job) (node_filesystem_size_bytes{fstype!~"tmpfs|autofs|proc|sysfs|fuse.*", mountpoint!~"/tmp.*|/var/lib/docker/.*|/run/.*"})').catch(() => []),
|
|
// Disk free per instance
|
|
query(url, 'sum by (instance, job) (node_filesystem_free_bytes{fstype!~"tmpfs|autofs|proc|sysfs|fuse.*", mountpoint!~"/tmp.*|/var/lib/docker/.*|/run/.*"})').catch(() => []),
|
|
// Network receive rate (bytes/sec)
|
|
query(url, 'sum by (instance, job) (rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[1m]))').catch(() => []),
|
|
// Network transmit rate (bytes/sec)
|
|
query(url, 'sum by (instance, job) (rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[1m]))').catch(() => []),
|
|
// 24h Network receive total (bytes)
|
|
query(url, 'sum by (instance, job) (increase(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
|
|
// 24h Network transmit total (bytes)
|
|
query(url, 'sum by (instance, job) (increase(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
|
|
// Targets status from /api/v1/targets
|
|
getTargets(url).catch(() => [])
|
|
]);
|
|
|
|
// Fetch 24h detailed traffic using the A*duration logic
|
|
const traffic24hSum = await get24hTrafficSum(url).catch(() => ({ rx: 0, tx: 0 }));
|
|
|
|
// Build per-instance data map
|
|
const instances = new Map();
|
|
|
|
const getOrCreate = (metric) => {
|
|
const originalInstance = metric.instance || 'Unknown';
|
|
const job = metric.job || 'Unknown';
|
|
const token = getServerToken(originalInstance, job, sourceName);
|
|
|
|
// Store mapping for detail queries
|
|
serverIdMap.set(token, { instance: originalInstance, source: sourceName, job, lastSeen: Date.now() });
|
|
|
|
if (!instances.has(token)) {
|
|
instances.set(token, {
|
|
instance: token, // This is the masked IP SENT TO FRONTEND
|
|
originalInstance, // Keep internal for aggregation/parsing
|
|
job: metric.job || 'Unknown',
|
|
source: sourceName,
|
|
cpuPercent: 0,
|
|
cpuCores: 0,
|
|
memTotal: 0,
|
|
memUsed: 0,
|
|
diskTotal: 0,
|
|
diskUsed: 0,
|
|
netRx: 0,
|
|
netTx: 0,
|
|
traffic24hRx: 0,
|
|
traffic24hTx: 0,
|
|
up: false,
|
|
memPercent: 0,
|
|
diskPercent: 0
|
|
});
|
|
}
|
|
const inst = instances.get(token);
|
|
// If job was Unknown but we now have a job name, update it
|
|
if (inst.job === 'Unknown' && metric.job) {
|
|
inst.job = metric.job;
|
|
}
|
|
return inst;
|
|
};
|
|
|
|
// Initialize instances from targets first (to ensure we have all servers even if they have no metrics)
|
|
for (const target of targetsResult) {
|
|
const labels = target.labels || {};
|
|
const instance = labels.instance;
|
|
const job = labels.job || '';
|
|
|
|
// Include every target from the activeTargets list
|
|
if (instance) {
|
|
const inst = getOrCreate(labels);
|
|
inst.up = target.health === 'up';
|
|
}
|
|
}
|
|
|
|
// Parse CPU usage
|
|
for (const r of cpuResult) {
|
|
const inst = getOrCreate(r.metric);
|
|
inst.cpuPercent = parseFloat(r.value[1]) || 0;
|
|
}
|
|
|
|
// Parse CPU count
|
|
for (const r of cpuCountResult) {
|
|
const inst = getOrCreate(r.metric);
|
|
inst.cpuCores = parseFloat(r.value[1]) || 0;
|
|
}
|
|
|
|
// Parse memory
|
|
for (const r of memTotalResult) {
|
|
const inst = getOrCreate(r.metric);
|
|
inst.memTotal = parseFloat(r.value[1]) || 0;
|
|
}
|
|
for (const r of memAvailResult) {
|
|
const inst = getOrCreate(r.metric);
|
|
inst.memUsed = inst.memTotal - (parseFloat(r.value[1]) || 0);
|
|
}
|
|
|
|
// Parse disk
|
|
for (const r of diskTotalResult) {
|
|
const inst = getOrCreate(r.metric);
|
|
inst.diskTotal = parseFloat(r.value[1]) || 0;
|
|
}
|
|
for (const r of diskFreeResult) {
|
|
const inst = getOrCreate(r.metric);
|
|
inst.diskUsed = inst.diskTotal - (parseFloat(r.value[1]) || 0);
|
|
}
|
|
|
|
// Parse network rates
|
|
for (const r of netRxResult) {
|
|
const inst = getOrCreate(r.metric);
|
|
inst.netRx = parseFloat(r.value[1]) || 0;
|
|
}
|
|
for (const r of netTxResult) {
|
|
const inst = getOrCreate(r.metric);
|
|
inst.netTx = parseFloat(r.value[1]) || 0;
|
|
}
|
|
|
|
// Parse 24h traffic
|
|
for (const r of netRx24hResult) {
|
|
const inst = getOrCreate(r.metric);
|
|
inst.traffic24hRx = parseFloat(r.value[1]) || 0;
|
|
}
|
|
for (const r of netTx24hResult) {
|
|
const inst = getOrCreate(r.metric);
|
|
inst.traffic24hTx = parseFloat(r.value[1]) || 0;
|
|
}
|
|
|
|
for (const inst of instances.values()) {
|
|
if (!inst.up && (inst.cpuPercent > 0 || inst.memTotal > 0)) {
|
|
inst.up = true;
|
|
}
|
|
// Calculate percentages on backend
|
|
inst.memPercent = inst.memTotal > 0 ? (inst.memUsed / inst.memTotal * 100) : 0;
|
|
inst.diskPercent = inst.diskTotal > 0 ? (inst.diskUsed / inst.diskTotal * 100) : 0;
|
|
}
|
|
|
|
const allInstancesList = Array.from(instances.values());
|
|
const activeInstances = allInstancesList.filter(inst => inst.up);
|
|
|
|
// Aggregate
|
|
let totalCpuUsed = 0, totalCpuCores = 0;
|
|
let totalMemUsed = 0, totalMemTotal = 0;
|
|
let totalDiskUsed = 0, totalDiskTotal = 0;
|
|
let totalNetRx = 0, totalNetTx = 0;
|
|
let totalTraffic24hRx = 0, totalTraffic24hTx = 0;
|
|
|
|
for (const inst of activeInstances) {
|
|
totalCpuUsed += (inst.cpuPercent / 100) * inst.cpuCores;
|
|
totalCpuCores += inst.cpuCores;
|
|
totalMemUsed += inst.memUsed;
|
|
totalMemTotal += inst.memTotal;
|
|
totalDiskUsed += inst.diskUsed;
|
|
totalDiskTotal += inst.diskTotal;
|
|
totalNetRx += inst.netRx;
|
|
totalNetTx += inst.netTx;
|
|
}
|
|
|
|
// Use the pre-calculated 24h traffic
|
|
totalTraffic24hRx = traffic24hSum.rx;
|
|
totalTraffic24hTx = traffic24hSum.tx;
|
|
|
|
return {
|
|
totalServers: allInstancesList.length,
|
|
activeServers: activeInstances.length,
|
|
cpu: {
|
|
used: totalCpuUsed,
|
|
total: totalCpuCores,
|
|
percent: totalCpuCores > 0 ? (totalCpuUsed / totalCpuCores * 100) : 0
|
|
},
|
|
memory: {
|
|
used: totalMemUsed,
|
|
total: totalMemTotal,
|
|
percent: totalMemTotal > 0 ? (totalMemUsed / totalMemTotal * 100) : 0
|
|
},
|
|
disk: {
|
|
used: totalDiskUsed,
|
|
total: totalDiskTotal,
|
|
percent: totalDiskTotal > 0 ? (totalDiskUsed / totalDiskTotal * 100) : 0
|
|
},
|
|
network: {
|
|
rx: totalNetRx,
|
|
tx: totalNetTx,
|
|
total: totalNetRx + totalNetTx
|
|
},
|
|
traffic24h: {
|
|
rx: totalTraffic24hRx,
|
|
tx: totalTraffic24hTx,
|
|
total: totalTraffic24hRx + totalTraffic24hTx
|
|
},
|
|
servers: allInstancesList
|
|
};
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* Calculate total traffic from bandwidth data points using the A*duration logic
|
|
*/
|
|
function calculateTrafficFromHistory(values) {
|
|
if (!values || values.length < 2) return 0;
|
|
|
|
let totalBytes = 0;
|
|
for (let i = 0; i < values.length - 1; i++) {
|
|
const [tsA, valA] = values[i];
|
|
const [tsB] = values[i+1];
|
|
const duration = tsB - tsA;
|
|
totalBytes += parseFloat(valA) * duration;
|
|
}
|
|
return totalBytes;
|
|
}
|
|
|
|
/**
|
|
* Get total traffic for the past 24h using Prometheus increase() for stability and accuracy
|
|
*/
|
|
async function get24hTrafficSum(url) {
|
|
try {
|
|
const [rxResult, txResult] = await Promise.all([
|
|
query(url, 'sum(increase(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => []),
|
|
query(url, 'sum(increase(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[24h]))').catch(() => [])
|
|
]);
|
|
|
|
const rx = rxResult.length > 0 ? parseFloat(rxResult[0].value[1]) : 0;
|
|
const tx = txResult.length > 0 ? parseFloat(txResult[0].value[1]) : 0;
|
|
|
|
return { rx, tx };
|
|
} catch (err) {
|
|
console.error(`[Prometheus] get24hTrafficSum error:`, err.message);
|
|
return { rx: 0, tx: 0 };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get total traffic for a specific server in the past 24h
|
|
*/
|
|
async function get24hServerTrafficSum(url, instance, job) {
|
|
const node = resolveToken(instance);
|
|
|
|
const rxExpr = `sum(increase(node_network_receive_bytes_total{instance="${node}",job="${job}",device!~'tap.*|veth.*|br.*|docker.*|virbr*|podman.*|lo.*|vmbr.*|fwbr.|ip.*|gre.*|virbr.*|vnet.*'}[24h]))`;
|
|
const txExpr = `sum(increase(node_network_transmit_bytes_total{instance="${node}",job="${job}",device!~'tap.*|veth.*|br.*|docker.*|virbr*|podman.*|lo.*|vmbr.*|fwbr.|ip.*|gre.*|virbr.*|vnet.*'}[24h]))`;
|
|
|
|
const [rxResult, txResult] = await Promise.all([
|
|
query(url, rxExpr).catch(() => []),
|
|
query(url, txExpr).catch(() => [])
|
|
]);
|
|
|
|
const rx = rxResult.length > 0 ? parseFloat(rxResult[0].value[1]) : 0;
|
|
const tx = txResult.length > 0 ? parseFloat(txResult[0].value[1]) : 0;
|
|
|
|
return { rx, tx };
|
|
}
|
|
|
|
/**
|
|
* Get network traffic history (past 24h, 5-min intervals for chart)
|
|
*/
|
|
async function getNetworkHistory(url) {
|
|
const step = 300; // 5 minutes for better resolution on chart
|
|
const now = Math.floor(Date.now() / 1000 / step) * step; // Sync to step boundary
|
|
const start = now - 86400; // 24h ago
|
|
|
|
const [rxResult, txResult] = await Promise.all([
|
|
queryRange(url,
|
|
'sum(rate(node_network_receive_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[1m]))',
|
|
start, now, step
|
|
).catch(() => []),
|
|
queryRange(url,
|
|
'sum(rate(node_network_transmit_bytes_total{device!~"lo|veth.*|docker.*|br-.*"}[1m]))',
|
|
start, now, step
|
|
).catch(() => [])
|
|
]);
|
|
|
|
// Extract values - each result[0].values = [[timestamp, value], ...]
|
|
const rxValues = rxResult.length > 0 ? rxResult[0].values : [];
|
|
const txValues = txResult.length > 0 ? txResult[0].values : [];
|
|
|
|
return { rxValues, txValues };
|
|
}
|
|
|
|
/**
|
|
* Merge network histories from multiple sources
|
|
*/
|
|
function mergeNetworkHistories(histories) {
|
|
const timestampMap = new Map();
|
|
|
|
for (const history of histories) {
|
|
for (const [ts, val] of history.rxValues) {
|
|
const existing = timestampMap.get(ts) || { rx: 0, tx: 0 };
|
|
existing.rx += parseFloat(val) || 0;
|
|
timestampMap.set(ts, existing);
|
|
}
|
|
for (const [ts, val] of history.txValues) {
|
|
const existing = timestampMap.get(ts) || { rx: 0, tx: 0 };
|
|
existing.tx += parseFloat(val) || 0;
|
|
timestampMap.set(ts, existing);
|
|
}
|
|
}
|
|
|
|
const sorted = [...timestampMap.entries()].sort((a, b) => a[0] - b[0]);
|
|
|
|
return {
|
|
timestamps: sorted.map(([ts]) => ts * 1000), // ms for JS
|
|
rx: sorted.map(([, v]) => v.rx),
|
|
tx: sorted.map(([, v]) => v.tx)
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get CPU usage history (past 1h, 1-min intervals)
|
|
*/
|
|
async function getCpuHistory(url) {
|
|
const step = 60; // 1 minute
|
|
const now = Math.floor(Date.now() / 1000 / step) * step; // Sync to step boundary
|
|
const start = now - 3600; // 1h ago
|
|
|
|
const result = await queryRange(url,
|
|
'100 - (avg(rate(node_cpu_seconds_total{mode="idle"}[1m])) * 100)',
|
|
start, now, step
|
|
).catch(() => []);
|
|
|
|
const values = result.length > 0 ? result[0].values : [];
|
|
return values; // [[timestamp, value], ...]
|
|
}
|
|
|
|
/**
|
|
* Merge CPU histories from multiple sources (average)
|
|
*/
|
|
function mergeCpuHistories(histories) {
|
|
const timestampMap = new Map();
|
|
|
|
for (const history of histories) {
|
|
for (const [ts, val] of history) {
|
|
const existing = timestampMap.get(ts) || { sum: 0, count: 0 };
|
|
existing.sum += parseFloat(val) || 0;
|
|
existing.count += 1;
|
|
timestampMap.set(ts, existing);
|
|
}
|
|
}
|
|
|
|
const sorted = [...timestampMap.entries()].sort((a, b) => a[0] - b[0]);
|
|
|
|
return {
|
|
timestamps: sorted.map(([ts]) => ts * 1000),
|
|
values: sorted.map(([, v]) => v.sum / v.count)
|
|
};
|
|
}
|
|
|
|
|
|
function resolveToken(token) {
|
|
if (serverIdMap.has(token)) {
|
|
return serverIdMap.get(token).instance;
|
|
}
|
|
return token;
|
|
}
|
|
|
|
/**
|
|
* Get detailed metrics for a specific server (node)
|
|
*/
|
|
async function getServerDetails(baseUrl, instance, job, settings = {}) {
|
|
const url = normalizeUrl(baseUrl);
|
|
const node = resolveToken(instance);
|
|
|
|
// Queries based on the requested dashboard structure
|
|
const queries = {
|
|
cpuIowait: `avg(rate(node_cpu_seconds_total{mode="iowait", instance="${node}"}[1m])) * 100`,
|
|
cpuOther: `avg(rate(node_cpu_seconds_total{mode=~"nice|steal|guest|guest_nice", instance="${node}"}[1m])) * 100`,
|
|
cpuBusy: `100 * (1 - avg(rate(node_cpu_seconds_total{mode="idle", instance="${node}"}[1m])))`,
|
|
sysLoad: `node_load1{instance="${node}",job="${job}"} * 100 / count(count(node_cpu_seconds_total{instance="${node}",job="${job}"}) by (cpu))`,
|
|
memUsedPct: `(1 - (node_memory_MemAvailable_bytes{instance="${node}", job="${job}"} / node_memory_MemTotal_bytes{instance="${node}", job="${job}"})) * 100`,
|
|
swapUsedPct: `((node_memory_SwapTotal_bytes{instance="${node}",job="${job}"} - node_memory_SwapFree_bytes{instance="${node}",job="${job}"}) / (node_memory_SwapTotal_bytes{instance="${node}",job="${job}"})) * 100`,
|
|
rootFsUsedPct: `100 - ((node_filesystem_avail_bytes{instance="${node}",job="${job}",mountpoint="/",fstype!~"rootfs|tmpfs"} * 100) / node_filesystem_size_bytes{instance="${node}",job="${job}",mountpoint="/",fstype!~"rootfs|tmpfs"})`,
|
|
cpuCores: `count(count(node_cpu_seconds_total{instance="${node}",job="${job}"}) by (cpu))`,
|
|
memTotal: `node_memory_MemTotal_bytes{instance="${node}",job="${job}"}`,
|
|
swapTotal: `node_memory_SwapTotal_bytes{instance="${node}",job="${job}"}`,
|
|
rootFsTotal: `node_filesystem_size_bytes{instance="${node}",job="${job}",mountpoint="/",fstype!~"rootfs|tmpfs"}`,
|
|
uptime: `node_time_seconds{instance="${node}",job="${job}"} - node_boot_time_seconds{instance="${node}",job="${job}"}`,
|
|
netRx: `sum(rate(node_network_receive_bytes_total{instance="${node}",job="${job}",device!~'tap.*|veth.*|br.*|docker.*|virbr*|podman.*|lo.*|vmbr.*|fwbr.|ip.*|gre.*|virbr.*|vnet.*'}[1m]))`,
|
|
netTx: `sum(rate(node_network_transmit_bytes_total{instance="${node}",job="${job}",device!~'tap.*|veth.*|br.*|docker.*|virbr*|podman.*|lo.*|vmbr.*|fwbr.|ip.*|gre.*|virbr.*|vnet.*'}[1m]))`,
|
|
sockstatTcp: `node_sockstat_TCP_inuse{instance="${node}",job="${job}"}`,
|
|
sockstatTcpMem: `node_sockstat_TCP_mem{instance="${node}",job="${job}"} * 4096`,
|
|
// Get individual partitions (excluding virtual and FUSE mounts)
|
|
partitions_size: `node_filesystem_size_bytes{instance="${node}", job="${job}", fstype!~"tmpfs|autofs|proc|sysfs|fuse.*", mountpoint!~"/tmp.*|/var/lib/docker/.*|/run/.*"}`,
|
|
partitions_free: `node_filesystem_free_bytes{instance="${node}", job="${job}", fstype!~"tmpfs|autofs|proc|sysfs|fuse.*", mountpoint!~"/tmp.*|/var/lib/docker/.*|/run/.*"}`
|
|
};
|
|
|
|
const results = {};
|
|
const queryPromises = Object.entries(queries).map(async ([key, expr]) => {
|
|
try {
|
|
const res = await query(url, expr);
|
|
if (key.startsWith('partitions_')) {
|
|
results[key] = res.map(r => ({
|
|
mountpoint: r.metric.mountpoint,
|
|
value: parseFloat(r.value[1]) || 0
|
|
}));
|
|
} else {
|
|
results[key] = res.length > 0 ? parseFloat(res[0].value[1]) : 0;
|
|
}
|
|
} catch (e) {
|
|
console.error(`[Prometheus] Error querying ${key} for ${node}:`, e.message);
|
|
results[key] = key.startsWith('partitions_') ? [] : 0;
|
|
}
|
|
});
|
|
|
|
await Promise.all(queryPromises);
|
|
|
|
// Process custom metrics from settings
|
|
results.custom_data = [];
|
|
try {
|
|
const customMetrics = typeof settings.custom_metrics === 'string'
|
|
? JSON.parse(settings.custom_metrics)
|
|
: (settings.custom_metrics || []);
|
|
|
|
if (Array.isArray(customMetrics) && customMetrics.length > 0) {
|
|
const customPromises = customMetrics.map(async (cfg) => {
|
|
if (!cfg.metric) return null;
|
|
try {
|
|
const expr = `${cfg.metric}{instance="${node}",job="${job}"}`;
|
|
const res = await query(url, expr);
|
|
if (res && res.length > 0) {
|
|
const val = res[0].metric[cfg.label || 'address'] || res[0].value[1];
|
|
|
|
// If this metric is marked as an IP source, update the main IP fields
|
|
if (cfg.is_ip && !results.ipv4?.length && !results.ipv6?.length) {
|
|
if (val.includes(':')) {
|
|
results.ipv6 = [val];
|
|
results.ipv4 = [];
|
|
} else {
|
|
results.ipv4 = [val];
|
|
results.ipv6 = [];
|
|
}
|
|
}
|
|
|
|
return {
|
|
name: cfg.name || cfg.metric,
|
|
value: val
|
|
};
|
|
}
|
|
} catch (e) {
|
|
console.error(`[Prometheus] Custom metric error (${cfg.metric}):`, e.message);
|
|
}
|
|
return null;
|
|
});
|
|
|
|
const customResults = await Promise.all(customPromises);
|
|
results.custom_data = customResults.filter(r => r !== null);
|
|
}
|
|
} catch (err) {
|
|
console.error('[Prometheus] Error processing custom metrics:', err.message);
|
|
}
|
|
|
|
// Ensure IP discovery fallback if no custom IP metric found
|
|
if ((!results.ipv4 || results.ipv4.length === 0) && (!results.ipv6 || results.ipv6.length === 0)) {
|
|
try {
|
|
const targets = await getTargets(baseUrl);
|
|
const matchedTarget = targets.find(t => t.labels && t.labels.instance === node && t.labels.job === job);
|
|
if (matchedTarget) {
|
|
const scrapeUrl = matchedTarget.scrapeUrl || '';
|
|
try {
|
|
const urlObj = new URL(scrapeUrl);
|
|
const host = urlObj.hostname;
|
|
if (host.includes(':')) {
|
|
results.ipv6 = [host];
|
|
results.ipv4 = [];
|
|
} else {
|
|
results.ipv4 = [host];
|
|
results.ipv6 = [];
|
|
}
|
|
} catch (e) {
|
|
const host = scrapeUrl.split('//').pop().split('/')[0].split(':')[0];
|
|
if (host) {
|
|
results.ipv4 = [host];
|
|
results.ipv6 = [];
|
|
}
|
|
}
|
|
}
|
|
} catch (e) {
|
|
console.error(`[Prometheus] Target fallback error for ${node}:`, e.message);
|
|
}
|
|
}
|
|
|
|
// Final sanitization
|
|
results.ipv4 = results.ipv4 || [];
|
|
results.ipv6 = results.ipv6 || [];
|
|
|
|
// Group partitions
|
|
const partitionsMap = {};
|
|
(results.partitions_size || []).forEach(p => {
|
|
partitionsMap[p.mountpoint] = { mountpoint: p.mountpoint, size: p.value, free: 0 };
|
|
});
|
|
(results.partitions_free || []).forEach(p => {
|
|
if (partitionsMap[p.mountpoint]) {
|
|
partitionsMap[p.mountpoint].free = p.value;
|
|
}
|
|
});
|
|
|
|
results.partitions = Object.values(partitionsMap).map(p => ({
|
|
...p,
|
|
used: p.size - p.free,
|
|
percent: p.size > 0 ? ((p.size - p.free) / p.size * 100) : 0
|
|
})).sort((a, b) => a.mountpoint.localeCompare(b.mountpoint));
|
|
|
|
// Calculate total disk size
|
|
results.totalDiskSize = results.partitions.reduce((sum, p) => sum + (p.size || 0), 0);
|
|
|
|
delete results.partitions_size;
|
|
delete results.partitions_free;
|
|
|
|
// Add 24h traffic sum for this specific server
|
|
try {
|
|
const traffic24h = await get24hServerTrafficSum(baseUrl, instance, job);
|
|
results.traffic24h = traffic24h;
|
|
} catch (e) {
|
|
console.error(`[Prometheus] Error fetching 24h traffic for ${node}:`, e.message);
|
|
results.traffic24h = { rx: 0, tx: 0 };
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Get historical metrics for a specific server (node)
|
|
*/
|
|
async function getServerHistory(baseUrl, instance, job, metric, range = '1h', start = null, end = null, p95Type = 'tx') {
|
|
const url = normalizeUrl(baseUrl);
|
|
const node = resolveToken(instance);
|
|
|
|
// CPU Busy history: 100 - idle
|
|
if (metric === 'cpuBusy') {
|
|
const expr = `100 * (1 - avg(rate(node_cpu_seconds_total{mode="idle", instance="${node}"}[1m])))`;
|
|
const rangeObj = parseRange(range, start, end);
|
|
const result = await queryRange(url, expr, rangeObj.queryStart, rangeObj.queryEnd, rangeObj.step);
|
|
|
|
if (!result || result.length === 0) return { timestamps: [], values: [] };
|
|
|
|
return {
|
|
timestamps: result[0].values.map(v => v[0] * 1000),
|
|
values: result[0].values.map(v => parseFloat(v[1]))
|
|
};
|
|
}
|
|
|
|
// Map metric keys to Prometheus expressions
|
|
const metricMap = {
|
|
sysLoad: `node_load1{instance="${node}",job="${job}"} * 100 / count(count(node_cpu_seconds_total{instance="${node}",job="${job}"}) by (cpu))`,
|
|
memUsedPct: `(1 - (node_memory_MemAvailable_bytes{instance="${node}", job="${job}"} / node_memory_MemTotal_bytes{instance="${node}", job="${job}"})) * 100`,
|
|
swapUsedPct: `((node_memory_SwapTotal_bytes{instance="${node}",job="${job}"} - node_memory_SwapFree_bytes{instance="${node}",job="${job}"}) / (node_memory_SwapTotal_bytes{instance="${node}",job="${job}"})) * 100`,
|
|
rootFsUsedPct: `100 - ((node_filesystem_avail_bytes{instance="${node}",job="${job}",mountpoint="/",fstype!="rootfs"} * 100) / node_filesystem_size_bytes{instance="${node}",job="${job}",mountpoint="/",fstype!="rootfs"})`,
|
|
netRx: `sum(rate(node_network_receive_bytes_total{instance="${node}",job="${job}",device!~'tap.*|veth.*|br.*|docker.*|virbr*|podman.*|lo.*|vmbr.*|fwbr.|ip.*|gre.*|virbr.*|vnet.*'}[1m]))`,
|
|
netTx: `sum(rate(node_network_transmit_bytes_total{instance="${node}",job="${job}",device!~'tap.*|veth.*|br.*|docker.*|virbr*|podman.*|lo.*|vmbr.*|fwbr.|ip.*|gre.*|virbr.*|vnet.*'}[1m]))`,
|
|
sockstatTcp: `node_sockstat_TCP_inuse{instance="${node}",job="${job}"}`,
|
|
sockstatTcpMem: `node_sockstat_TCP_mem{instance="${node}",job="${job}"} * 4096`
|
|
};
|
|
|
|
const rangeObj = parseRange(range, start, end);
|
|
|
|
if (metric === 'networkTrend') {
|
|
const txExpr = metricMap.netTx;
|
|
const rxExpr = metricMap.netRx;
|
|
const [txResult, rxResult] = await Promise.all([
|
|
queryRange(url, txExpr, rangeObj.queryStart, rangeObj.queryEnd, rangeObj.step),
|
|
queryRange(url, rxExpr, rangeObj.queryStart, rangeObj.queryEnd, rangeObj.step)
|
|
]);
|
|
|
|
if (txResult.length === 0 && rxResult.length === 0) return { timestamps: [], rx: [], tx: [] };
|
|
|
|
const timestamps = (txResult.length > 0 ? txResult[0] : rxResult[0]).values.map(v => v[0] * 1000);
|
|
const tx = txResult.length > 0 ? txResult[0].values.map(v => parseFloat(v[1])) : new Array(timestamps.length).fill(0);
|
|
const rx = rxResult.length > 0 ? rxResult[0].values.map(v => parseFloat(v[1])) : new Array(timestamps.length).fill(0);
|
|
|
|
// Calculate statistics on backend
|
|
let rxTotal = 0;
|
|
let txTotal = 0;
|
|
for (let i = 0; i < timestamps.length - 1; i++) {
|
|
const duration = (timestamps[i+1] - timestamps[i]) / 1000;
|
|
rxTotal += (rx[i] || 0) * duration;
|
|
txTotal += (tx[i] || 0) * duration;
|
|
}
|
|
|
|
// Calculate P95 based on p95Type
|
|
let combined = [];
|
|
if (p95Type === 'rx') {
|
|
combined = [...rx];
|
|
} else if (p95Type === 'both') {
|
|
combined = tx.map((t, i) => (t || 0) + (rx[i] || 0));
|
|
} else if (p95Type === 'max') {
|
|
combined = tx.map((t, i) => Math.max(t || 0, rx[i] || 0));
|
|
} else {
|
|
// Default to tx
|
|
combined = [...tx];
|
|
}
|
|
|
|
const sorted = combined.sort((a, b) => a - b);
|
|
const p95Idx = Math.floor(sorted.length * 0.95);
|
|
const p95 = sorted.length > 0 ? sorted[p95Idx] : 0;
|
|
|
|
return {
|
|
timestamps,
|
|
tx,
|
|
rx,
|
|
stats: {
|
|
rxTotal,
|
|
txTotal,
|
|
p95,
|
|
total: rxTotal + txTotal
|
|
}
|
|
};
|
|
}
|
|
|
|
const expr = metricMap[metric];
|
|
if (!expr) throw new Error('Invalid metric for history');
|
|
|
|
try {
|
|
const result = await queryRange(url, expr, rangeObj.queryStart, rangeObj.queryEnd, rangeObj.step);
|
|
if (!result || result.length === 0) return { timestamps: [], values: [] };
|
|
|
|
return {
|
|
timestamps: result[0].values.map(v => v[0] * 1000),
|
|
values: result[0].values.map(v => parseFloat(v[1]))
|
|
};
|
|
} catch (err) {
|
|
console.error(`[Prometheus] Error fetching history for ${metric} on ${node}:`, err.message);
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
function parseRange(range, start, end) {
|
|
let duration, step, queryStart, queryEnd;
|
|
|
|
if (start && end) {
|
|
queryStart = Math.floor(new Date(start).getTime() / 1000);
|
|
queryEnd = Math.floor(new Date(end).getTime() / 1000);
|
|
duration = queryEnd - queryStart;
|
|
step = Math.max(15, Math.floor(duration / 100));
|
|
} else {
|
|
const rangeMap = {
|
|
'15m': { duration: 900, step: 15 },
|
|
'30m': { duration: 1800, step: 30 },
|
|
'1h': { duration: 3600, step: 60 },
|
|
'6h': { duration: 21600, step: 300 },
|
|
'12h': { duration: 43200, step: 600 },
|
|
'24h': { duration: 86400, step: 900 },
|
|
'2d': { duration: 172800, step: 1800 },
|
|
'7d': { duration: 604800, step: 3600 }
|
|
};
|
|
|
|
if (rangeMap[range]) {
|
|
duration = rangeMap[range].duration;
|
|
step = rangeMap[range].step;
|
|
} else {
|
|
const match = range.match(/^(\d+)([smhd])$/);
|
|
if (match) {
|
|
const val = parseInt(match[1]);
|
|
const unit = match[2];
|
|
const multipliers = { s: 1, m: 60, h: 3600, d: 86400 };
|
|
duration = val * (multipliers[unit] || 3600);
|
|
step = Math.max(15, Math.floor(duration / 100));
|
|
} else {
|
|
duration = 3600;
|
|
step = 60;
|
|
}
|
|
}
|
|
queryEnd = Math.floor(Date.now() / 1000);
|
|
queryStart = queryEnd - duration;
|
|
}
|
|
return { queryStart, queryEnd, step };
|
|
}
|
|
|
|
module.exports = {
|
|
testConnection,
|
|
query,
|
|
queryRange,
|
|
getTargets,
|
|
getOverviewMetrics,
|
|
get24hTrafficSum,
|
|
getNetworkHistory,
|
|
mergeNetworkHistories,
|
|
getCpuHistory,
|
|
mergeCpuHistories,
|
|
getServerDetails,
|
|
getServerHistory,
|
|
resolveToken,
|
|
getLatency: async (blackboxUrl, target) => {
|
|
if (!blackboxUrl || !target) return null;
|
|
try {
|
|
const normalized = normalizeUrl(blackboxUrl);
|
|
|
|
const queryExpr = `(
|
|
probe_icmp_duration_seconds{phase="rtt", instance="${target}"} or
|
|
probe_icmp_duration_seconds{phase="rtt", target="${target}"} or
|
|
probe_http_duration_seconds{phase="rtt", instance="${target}"} or
|
|
probe_http_duration_seconds{phase="rtt", target="${target}"} or
|
|
probe_icmp_duration_seconds{instance="${target}"} or
|
|
probe_icmp_duration_seconds{target="${target}"} or
|
|
probe_duration_seconds{instance="${target}"} or
|
|
probe_duration_seconds{target="${target}"}
|
|
)`;
|
|
|
|
const result = await query(normalized, queryExpr);
|
|
if (result && result.length > 0) {
|
|
return parseFloat(result[0].value[1]) * 1000;
|
|
}
|
|
return null;
|
|
} catch (err) {
|
|
console.error(`[Prometheus] Error fetching latency for ${target}:`, err.message);
|
|
return null;
|
|
}
|
|
}
|
|
};
|