Fix: Historical analysis now shows real consumption numbers and percentages relative to cluster totals
This commit is contained in:
@@ -495,53 +495,108 @@ async def get_workload_historical_metrics(
|
||||
workload: str,
|
||||
time_range: str = "24h"
|
||||
):
|
||||
"""Get historical metrics for a specific workload (deployment/daemonset)"""
|
||||
"""Get historical metrics for a specific workload with cluster percentages"""
|
||||
try:
|
||||
prometheus_client = PrometheusClient()
|
||||
|
||||
# Get CPU and Memory usage metrics for the workload
|
||||
cpu_usage = await prometheus_client.query_range(
|
||||
f'rate(container_cpu_usage_seconds_total{{namespace="{namespace}",pod=~"{workload}-.*"}}[5m])',
|
||||
time_range
|
||||
)
|
||||
# Get current usage (latest values)
|
||||
cpu_usage_query = f'rate(container_cpu_usage_seconds_total{{namespace="{namespace}",pod=~"{workload}-.*"}}[5m])'
|
||||
memory_usage_query = f'container_memory_working_set_bytes{{namespace="{namespace}",pod=~"{workload}-.*"}}'
|
||||
|
||||
memory_usage = await prometheus_client.query_range(
|
||||
f'container_memory_working_set_bytes{{namespace="{namespace}",pod=~"{workload}-.*"}}',
|
||||
time_range
|
||||
)
|
||||
cpu_usage_data = await prometheus_client.query(cpu_usage_query)
|
||||
memory_usage_data = await prometheus_client.query(memory_usage_query)
|
||||
|
||||
# Get resource requests and limits
|
||||
cpu_requests = await prometheus_client.query_range(
|
||||
f'kube_pod_container_resource_requests{{namespace="{namespace}",pod=~"{workload}-.*",resource="cpu"}}',
|
||||
time_range
|
||||
)
|
||||
cpu_requests_query = f'kube_pod_container_resource_requests{{namespace="{namespace}",pod=~"{workload}-.*",resource="cpu"}}'
|
||||
memory_requests_query = f'kube_pod_container_resource_requests{{namespace="{namespace}",pod=~"{workload}-.*",resource="memory"}}'
|
||||
|
||||
memory_requests = await prometheus_client.query_range(
|
||||
f'kube_pod_container_resource_requests{{namespace="{namespace}",pod=~"{workload}-.*",resource="memory"}}',
|
||||
time_range
|
||||
)
|
||||
cpu_requests_data = await prometheus_client.query(cpu_requests_query)
|
||||
memory_requests_data = await prometheus_client.query(memory_requests_query)
|
||||
|
||||
cpu_limits = await prometheus_client.query_range(
|
||||
f'kube_pod_container_resource_limits{{namespace="{namespace}",pod=~"{workload}-.*",resource="cpu"}}',
|
||||
time_range
|
||||
)
|
||||
cpu_limits_query = f'kube_pod_container_resource_limits{{namespace="{namespace}",pod=~"{workload}-.*",resource="cpu"}}'
|
||||
memory_limits_query = f'kube_pod_container_resource_limits{{namespace="{namespace}",pod=~"{workload}-.*",resource="memory"}}'
|
||||
|
||||
memory_limits = await prometheus_client.query_range(
|
||||
f'kube_pod_container_resource_limits{{namespace="{namespace}",pod=~"{workload}-.*",resource="memory"}}',
|
||||
time_range
|
||||
)
|
||||
cpu_limits_data = await prometheus_client.query(cpu_limits_query)
|
||||
memory_limits_data = await prometheus_client.query(memory_limits_query)
|
||||
|
||||
# Get cluster total resources
|
||||
cluster_cpu_query = 'sum(kube_node_status_allocatable{resource="cpu"})'
|
||||
cluster_memory_query = 'sum(kube_node_status_allocatable{resource="memory"})'
|
||||
|
||||
cluster_cpu_data = await prometheus_client.query(cluster_cpu_query)
|
||||
cluster_memory_data = await prometheus_client.query(cluster_memory_query)
|
||||
|
||||
# Extract values
|
||||
cpu_usage = 0
|
||||
memory_usage = 0
|
||||
cpu_requests = 0
|
||||
memory_requests = 0
|
||||
cpu_limits = 0
|
||||
memory_limits = 0
|
||||
cluster_cpu_total = 0
|
||||
cluster_memory_total = 0
|
||||
|
||||
if cpu_usage_data.get("status") == "success" and cpu_usage_data.get("data", {}).get("result"):
|
||||
cpu_usage = float(cpu_usage_data["data"]["result"][0]["value"][1])
|
||||
|
||||
if memory_usage_data.get("status") == "success" and memory_usage_data.get("data", {}).get("result"):
|
||||
memory_usage = float(memory_usage_data["data"]["result"][0]["value"][1])
|
||||
|
||||
if cpu_requests_data.get("status") == "success" and cpu_requests_data.get("data", {}).get("result"):
|
||||
cpu_requests = float(cpu_requests_data["data"]["result"][0]["value"][1])
|
||||
|
||||
if memory_requests_data.get("status") == "success" and memory_requests_data.get("data", {}).get("result"):
|
||||
memory_requests = float(memory_requests_data["data"]["result"][0]["value"][1])
|
||||
|
||||
if cpu_limits_data.get("status") == "success" and cpu_limits_data.get("data", {}).get("result"):
|
||||
cpu_limits = float(cpu_limits_data["data"]["result"][0]["value"][1])
|
||||
|
||||
if memory_limits_data.get("status") == "success" and memory_limits_data.get("data", {}).get("result"):
|
||||
memory_limits = float(memory_limits_data["data"]["result"][0]["value"][1])
|
||||
|
||||
if cluster_cpu_data.get("status") == "success" and cluster_cpu_data.get("data", {}).get("result"):
|
||||
cluster_cpu_total = float(cluster_cpu_data["data"]["result"][0]["value"][1])
|
||||
|
||||
if cluster_memory_data.get("status") == "success" and cluster_memory_data.get("data", {}).get("result"):
|
||||
cluster_memory_total = float(cluster_memory_data["data"]["result"][0]["value"][1])
|
||||
|
||||
# Calculate percentages
|
||||
cpu_usage_percent = (cpu_usage / cluster_cpu_total * 100) if cluster_cpu_total > 0 else 0
|
||||
memory_usage_percent = (memory_usage / cluster_memory_total * 100) if cluster_memory_total > 0 else 0
|
||||
cpu_requests_percent = (cpu_requests / cluster_cpu_total * 100) if cluster_cpu_total > 0 else 0
|
||||
memory_requests_percent = (memory_requests / cluster_memory_total * 100) if cluster_memory_total > 0 else 0
|
||||
cpu_limits_percent = (cpu_limits / cluster_cpu_total * 100) if cluster_cpu_total > 0 else 0
|
||||
memory_limits_percent = (memory_limits / cluster_memory_total * 100) if cluster_memory_total > 0 else 0
|
||||
|
||||
return {
|
||||
"workload": workload,
|
||||
"namespace": namespace,
|
||||
"time_range": time_range,
|
||||
"metrics": {
|
||||
"cpu_usage": cpu_usage,
|
||||
"memory_usage": memory_usage,
|
||||
"cpu_requests": cpu_requests,
|
||||
"memory_requests": memory_requests,
|
||||
"cpu_limits": cpu_limits,
|
||||
"memory_limits": memory_limits
|
||||
"cluster_total": {
|
||||
"cpu_cores": cluster_cpu_total,
|
||||
"memory_bytes": cluster_memory_total,
|
||||
"memory_gb": cluster_memory_total / (1024**3)
|
||||
},
|
||||
"workload_metrics": {
|
||||
"cpu": {
|
||||
"usage_cores": cpu_usage,
|
||||
"usage_percent": round(cpu_usage_percent, 2),
|
||||
"requests_cores": cpu_requests,
|
||||
"requests_percent": round(cpu_requests_percent, 2),
|
||||
"limits_cores": cpu_limits,
|
||||
"limits_percent": round(cpu_limits_percent, 2)
|
||||
},
|
||||
"memory": {
|
||||
"usage_bytes": memory_usage,
|
||||
"usage_mb": round(memory_usage / (1024**2), 2),
|
||||
"usage_percent": round(memory_usage_percent, 2),
|
||||
"requests_bytes": memory_requests,
|
||||
"requests_mb": round(memory_requests / (1024**2), 2),
|
||||
"requests_percent": round(memory_requests_percent, 2),
|
||||
"limits_bytes": memory_limits,
|
||||
"limits_mb": round(memory_limits / (1024**2), 2),
|
||||
"limits_percent": round(memory_limits_percent, 2)
|
||||
}
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
|
||||
@@ -412,6 +412,95 @@
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.cluster-stats {
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
|
||||
.stat-card {
|
||||
background: #f8f9fa;
|
||||
border: 1px solid #dee2e6;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
text-align: center;
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.stat-card h4 {
|
||||
margin: 0 0 10px 0;
|
||||
color: #495057;
|
||||
font-size: 14px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.5px;
|
||||
}
|
||||
|
||||
.stat-value {
|
||||
font-size: 24px;
|
||||
font-weight: bold;
|
||||
color: #007bff;
|
||||
}
|
||||
|
||||
.metrics-grid {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 30px;
|
||||
}
|
||||
|
||||
.metric-section {
|
||||
background: white;
|
||||
border: 1px solid #dee2e6;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.metric-section h4 {
|
||||
margin: 0 0 20px 0;
|
||||
color: #495057;
|
||||
border-bottom: 2px solid #e9ecef;
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
|
||||
.metric-row {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: 8px 0;
|
||||
border-bottom: 1px solid #f8f9fa;
|
||||
}
|
||||
|
||||
.metric-row:last-child {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
.metric-label {
|
||||
font-weight: 500;
|
||||
color: #6c757d;
|
||||
min-width: 120px;
|
||||
}
|
||||
|
||||
.metric-value {
|
||||
font-weight: bold;
|
||||
color: #212529;
|
||||
font-family: 'Courier New', monospace;
|
||||
}
|
||||
|
||||
.metric-percent {
|
||||
color: #6c757d;
|
||||
font-size: 12px;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.metrics-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.cluster-stats {
|
||||
flex-direction: column;
|
||||
}
|
||||
}
|
||||
|
||||
/* Problem Summary Table */
|
||||
.problem-summary {
|
||||
margin-bottom: 2rem;
|
||||
@@ -1369,9 +1458,9 @@
|
||||
modal.id = 'historicalModal';
|
||||
modal.className = 'modal';
|
||||
modal.innerHTML = `
|
||||
<div class="modal-content" style="width: 90%; max-width: 1200px;">
|
||||
<div class="modal-content" style="width: 90%; max-width: 1000px;">
|
||||
<div class="modal-header">
|
||||
<h2>📈 Historical Analysis - Real Prometheus Metrics</h2>
|
||||
<h2>📊 Resource Consumption Analysis - Real Numbers</h2>
|
||||
<span class="close">×</span>
|
||||
</div>
|
||||
<div class="modal-body" id="historicalModalBody">
|
||||
@@ -1387,14 +1476,14 @@
|
||||
<option value="7d">Last 7 days</option>
|
||||
</select>
|
||||
</div>
|
||||
<div id="metricsCharts" style="display: none;">
|
||||
<div class="chart-container">
|
||||
<h3>CPU Usage vs Requests/Limits</h3>
|
||||
<canvas id="cpuChart" width="800" height="300"></canvas>
|
||||
<div id="metricsData" style="display: none;">
|
||||
<div class="cluster-info">
|
||||
<h3>🏢 Cluster Total Resources</h3>
|
||||
<div id="clusterTotal"></div>
|
||||
</div>
|
||||
<div class="chart-container">
|
||||
<h3>Memory Usage vs Requests/Limits</h3>
|
||||
<canvas id="memoryChart" width="800" height="300"></canvas>
|
||||
<div class="workload-metrics">
|
||||
<h3>📈 Workload Resource Consumption</h3>
|
||||
<div id="workloadData"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -1435,10 +1524,10 @@
|
||||
async function loadWorkloadMetrics() {
|
||||
const workloadSelect = document.getElementById('workloadSelect');
|
||||
const timeRangeSelect = document.getElementById('timeRangeSelect');
|
||||
const chartsDiv = document.getElementById('metricsCharts');
|
||||
const metricsDiv = document.getElementById('metricsData');
|
||||
|
||||
if (!workloadSelect.value) {
|
||||
chartsDiv.style.display = 'none';
|
||||
metricsDiv.style.display = 'none';
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1446,128 +1535,98 @@
|
||||
const timeRange = timeRangeSelect.value;
|
||||
|
||||
try {
|
||||
chartsDiv.style.display = 'block';
|
||||
chartsDiv.innerHTML = '<p>Loading metrics from Prometheus...</p>';
|
||||
metricsDiv.style.display = 'block';
|
||||
metricsDiv.innerHTML = '<p>Loading metrics from Prometheus...</p>';
|
||||
|
||||
const response = await fetch(`/api/v1/workloads/${namespace}/${workload}/metrics?time_range=${timeRange}`);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.metrics) {
|
||||
renderMetricsCharts(data.metrics, timeRange);
|
||||
if (data.workload_metrics) {
|
||||
renderMetricsData(data);
|
||||
} else {
|
||||
chartsDiv.innerHTML = '<p>No metrics data available for this workload.</p>';
|
||||
metricsDiv.innerHTML = '<p>No metrics data available for this workload.</p>';
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error loading metrics:', error);
|
||||
chartsDiv.innerHTML = '<p>Error loading metrics. Please try again.</p>';
|
||||
metricsDiv.innerHTML = '<p>Error loading metrics. Please try again.</p>';
|
||||
}
|
||||
}
|
||||
|
||||
function renderMetricsCharts(metrics, timeRange) {
|
||||
const chartsDiv = document.getElementById('metricsCharts');
|
||||
chartsDiv.innerHTML = `
|
||||
<div class="chart-container">
|
||||
<h3>CPU Usage vs Requests/Limits (${timeRange})</h3>
|
||||
<canvas id="cpuChart" width="800" height="300"></canvas>
|
||||
function renderMetricsData(data) {
|
||||
const clusterTotalDiv = document.getElementById('clusterTotal');
|
||||
const workloadDataDiv = document.getElementById('workloadData');
|
||||
|
||||
// Render cluster total resources
|
||||
clusterTotalDiv.innerHTML = `
|
||||
<div class="cluster-stats">
|
||||
<div class="stat-card">
|
||||
<h4>CPU Total</h4>
|
||||
<div class="stat-value">${data.cluster_total.cpu_cores.toFixed(2)} cores</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<h4>Memory Total</h4>
|
||||
<div class="stat-value">${data.cluster_total.memory_gb.toFixed(2)} GB</div>
|
||||
</div>
|
||||
<div class="chart-container">
|
||||
<h3>Memory Usage vs Requests/Limits (${timeRange})</h3>
|
||||
<canvas id="memoryChart" width="800" height="300"></canvas>
|
||||
</div>
|
||||
`;
|
||||
|
||||
// Simple chart rendering (you can replace with Chart.js or similar)
|
||||
renderSimpleChart('cpuChart', metrics.cpu_usage, metrics.cpu_requests, metrics.cpu_limits, 'CPU (cores)');
|
||||
renderSimpleChart('memoryChart', metrics.memory_usage, metrics.memory_requests, metrics.memory_limits, 'Memory (bytes)');
|
||||
// Render workload metrics
|
||||
const cpu = data.workload_metrics.cpu;
|
||||
const memory = data.workload_metrics.memory;
|
||||
|
||||
workloadDataDiv.innerHTML = `
|
||||
<div class="metrics-grid">
|
||||
<div class="metric-section">
|
||||
<h4>🖥️ CPU Resources</h4>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Current Usage:</span>
|
||||
<span class="metric-value">${cpu.usage_cores.toFixed(3)} cores</span>
|
||||
<span class="metric-percent">(${cpu.usage_percent}% of cluster)</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Requests:</span>
|
||||
<span class="metric-value">${cpu.requests_cores.toFixed(3)} cores</span>
|
||||
<span class="metric-percent">(${cpu.requests_percent}% of cluster)</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Limits:</span>
|
||||
<span class="metric-value">${cpu.limits_cores.toFixed(3)} cores</span>
|
||||
<span class="metric-percent">(${cpu.limits_percent}% of cluster)</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Efficiency:</span>
|
||||
<span class="metric-value ${cpu.usage_cores > 0 ? (cpu.usage_cores / cpu.requests_cores * 100).toFixed(1) + '%' : 'N/A'}">${cpu.usage_cores > 0 ? (cpu.usage_cores / cpu.requests_cores * 100).toFixed(1) + '%' : 'N/A'}</span>
|
||||
<span class="metric-percent">(usage vs requests)</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="metric-section">
|
||||
<h4>💾 Memory Resources</h4>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Current Usage:</span>
|
||||
<span class="metric-value">${memory.usage_mb.toFixed(2)} MB</span>
|
||||
<span class="metric-percent">(${memory.usage_percent}% of cluster)</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Requests:</span>
|
||||
<span class="metric-value">${memory.requests_mb.toFixed(2)} MB</span>
|
||||
<span class="metric-percent">(${memory.requests_percent}% of cluster)</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Limits:</span>
|
||||
<span class="metric-value">${memory.limits_mb.toFixed(2)} MB</span>
|
||||
<span class="metric-percent">(${memory.limits_percent}% of cluster)</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Efficiency:</span>
|
||||
<span class="metric-value ${memory.usage_bytes > 0 ? (memory.usage_bytes / memory.requests_bytes * 100).toFixed(1) + '%' : 'N/A'}">${memory.usage_bytes > 0 ? (memory.usage_bytes / memory.requests_bytes * 100).toFixed(1) + '%' : 'N/A'}</span>
|
||||
<span class="metric-percent">(usage vs requests)</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
function renderSimpleChart(canvasId, usage, requests, limits, unit) {
|
||||
const canvas = document.getElementById(canvasId);
|
||||
const ctx = canvas.getContext('2d');
|
||||
const width = canvas.width;
|
||||
const height = canvas.height;
|
||||
|
||||
// Clear canvas
|
||||
ctx.clearRect(0, 0, width, height);
|
||||
|
||||
// Draw axes
|
||||
ctx.strokeStyle = '#333';
|
||||
ctx.lineWidth = 2;
|
||||
ctx.beginPath();
|
||||
ctx.moveTo(50, height - 50);
|
||||
ctx.lineTo(width - 50, height - 50);
|
||||
ctx.moveTo(50, 50);
|
||||
ctx.lineTo(50, height - 50);
|
||||
ctx.stroke();
|
||||
|
||||
// Draw usage line
|
||||
if (usage && usage.length > 0) {
|
||||
ctx.strokeStyle = '#007bff';
|
||||
ctx.lineWidth = 2;
|
||||
ctx.beginPath();
|
||||
|
||||
usage.forEach((point, index) => {
|
||||
const x = 50 + (index * (width - 100) / usage.length);
|
||||
const y = height - 50 - (point[1] * (height - 100) / Math.max(...usage.map(p => p[1])));
|
||||
|
||||
if (index === 0) {
|
||||
ctx.moveTo(x, y);
|
||||
} else {
|
||||
ctx.lineTo(x, y);
|
||||
}
|
||||
});
|
||||
ctx.stroke();
|
||||
}
|
||||
|
||||
// Draw requests line
|
||||
if (requests && requests.length > 0) {
|
||||
ctx.strokeStyle = '#28a745';
|
||||
ctx.lineWidth = 1;
|
||||
ctx.setLineDash([5, 5]);
|
||||
ctx.beginPath();
|
||||
|
||||
requests.forEach((point, index) => {
|
||||
const x = 50 + (index * (width - 100) / requests.length);
|
||||
const y = height - 50 - (point[1] * (height - 100) / Math.max(...requests.map(p => p[1])));
|
||||
|
||||
if (index === 0) {
|
||||
ctx.moveTo(x, y);
|
||||
} else {
|
||||
ctx.lineTo(x, y);
|
||||
}
|
||||
});
|
||||
ctx.stroke();
|
||||
}
|
||||
|
||||
// Draw limits line
|
||||
if (limits && limits.length > 0) {
|
||||
ctx.strokeStyle = '#dc3545';
|
||||
ctx.lineWidth = 1;
|
||||
ctx.setLineDash([5, 5]);
|
||||
ctx.beginPath();
|
||||
|
||||
limits.forEach((point, index) => {
|
||||
const x = 50 + (index * (width - 100) / limits.length);
|
||||
const y = height - 50 - (point[1] * (height - 100) / Math.max(...limits.map(p => p[1])));
|
||||
|
||||
if (index === 0) {
|
||||
ctx.moveTo(x, y);
|
||||
} else {
|
||||
ctx.lineTo(x, y);
|
||||
}
|
||||
});
|
||||
ctx.stroke();
|
||||
}
|
||||
|
||||
// Reset line dash
|
||||
ctx.setLineDash([]);
|
||||
|
||||
// Add labels
|
||||
ctx.fillStyle = '#333';
|
||||
ctx.font = '12px Arial';
|
||||
ctx.fillText(unit, 10, height / 2);
|
||||
ctx.fillText('Time', width / 2, height - 10);
|
||||
}
|
||||
|
||||
function exportComplianceReport() {
|
||||
alert('Exporting compliance report...');
|
||||
|
||||
Reference in New Issue
Block a user