Fix: Historical analysis now shows real consumption numbers and percentages relative to cluster totals

This commit is contained in:
2025-09-30 18:03:17 -03:00
parent 5c5643576f
commit 2b2b3c23b2
2 changed files with 264 additions and 150 deletions

View File

@@ -495,53 +495,108 @@ async def get_workload_historical_metrics(
workload: str, workload: str,
time_range: str = "24h" time_range: str = "24h"
): ):
"""Get historical metrics for a specific workload (deployment/daemonset)""" """Get historical metrics for a specific workload with cluster percentages"""
try: try:
prometheus_client = PrometheusClient() prometheus_client = PrometheusClient()
# Get CPU and Memory usage metrics for the workload # Get current usage (latest values)
cpu_usage = await prometheus_client.query_range( cpu_usage_query = f'rate(container_cpu_usage_seconds_total{{namespace="{namespace}",pod=~"{workload}-.*"}}[5m])'
f'rate(container_cpu_usage_seconds_total{{namespace="{namespace}",pod=~"{workload}-.*"}}[5m])', memory_usage_query = f'container_memory_working_set_bytes{{namespace="{namespace}",pod=~"{workload}-.*"}}'
time_range
)
memory_usage = await prometheus_client.query_range( cpu_usage_data = await prometheus_client.query(cpu_usage_query)
f'container_memory_working_set_bytes{{namespace="{namespace}",pod=~"{workload}-.*"}}', memory_usage_data = await prometheus_client.query(memory_usage_query)
time_range
)
# Get resource requests and limits # Get resource requests and limits
cpu_requests = await prometheus_client.query_range( cpu_requests_query = f'kube_pod_container_resource_requests{{namespace="{namespace}",pod=~"{workload}-.*",resource="cpu"}}'
f'kube_pod_container_resource_requests{{namespace="{namespace}",pod=~"{workload}-.*",resource="cpu"}}', memory_requests_query = f'kube_pod_container_resource_requests{{namespace="{namespace}",pod=~"{workload}-.*",resource="memory"}}'
time_range
)
memory_requests = await prometheus_client.query_range( cpu_requests_data = await prometheus_client.query(cpu_requests_query)
f'kube_pod_container_resource_requests{{namespace="{namespace}",pod=~"{workload}-.*",resource="memory"}}', memory_requests_data = await prometheus_client.query(memory_requests_query)
time_range
)
cpu_limits = await prometheus_client.query_range( cpu_limits_query = f'kube_pod_container_resource_limits{{namespace="{namespace}",pod=~"{workload}-.*",resource="cpu"}}'
f'kube_pod_container_resource_limits{{namespace="{namespace}",pod=~"{workload}-.*",resource="cpu"}}', memory_limits_query = f'kube_pod_container_resource_limits{{namespace="{namespace}",pod=~"{workload}-.*",resource="memory"}}'
time_range
)
memory_limits = await prometheus_client.query_range( cpu_limits_data = await prometheus_client.query(cpu_limits_query)
f'kube_pod_container_resource_limits{{namespace="{namespace}",pod=~"{workload}-.*",resource="memory"}}', memory_limits_data = await prometheus_client.query(memory_limits_query)
time_range
) # Get cluster total resources
cluster_cpu_query = 'sum(kube_node_status_allocatable{resource="cpu"})'
cluster_memory_query = 'sum(kube_node_status_allocatable{resource="memory"})'
cluster_cpu_data = await prometheus_client.query(cluster_cpu_query)
cluster_memory_data = await prometheus_client.query(cluster_memory_query)
# Extract values
cpu_usage = 0
memory_usage = 0
cpu_requests = 0
memory_requests = 0
cpu_limits = 0
memory_limits = 0
cluster_cpu_total = 0
cluster_memory_total = 0
if cpu_usage_data.get("status") == "success" and cpu_usage_data.get("data", {}).get("result"):
cpu_usage = float(cpu_usage_data["data"]["result"][0]["value"][1])
if memory_usage_data.get("status") == "success" and memory_usage_data.get("data", {}).get("result"):
memory_usage = float(memory_usage_data["data"]["result"][0]["value"][1])
if cpu_requests_data.get("status") == "success" and cpu_requests_data.get("data", {}).get("result"):
cpu_requests = float(cpu_requests_data["data"]["result"][0]["value"][1])
if memory_requests_data.get("status") == "success" and memory_requests_data.get("data", {}).get("result"):
memory_requests = float(memory_requests_data["data"]["result"][0]["value"][1])
if cpu_limits_data.get("status") == "success" and cpu_limits_data.get("data", {}).get("result"):
cpu_limits = float(cpu_limits_data["data"]["result"][0]["value"][1])
if memory_limits_data.get("status") == "success" and memory_limits_data.get("data", {}).get("result"):
memory_limits = float(memory_limits_data["data"]["result"][0]["value"][1])
if cluster_cpu_data.get("status") == "success" and cluster_cpu_data.get("data", {}).get("result"):
cluster_cpu_total = float(cluster_cpu_data["data"]["result"][0]["value"][1])
if cluster_memory_data.get("status") == "success" and cluster_memory_data.get("data", {}).get("result"):
cluster_memory_total = float(cluster_memory_data["data"]["result"][0]["value"][1])
# Calculate percentages
cpu_usage_percent = (cpu_usage / cluster_cpu_total * 100) if cluster_cpu_total > 0 else 0
memory_usage_percent = (memory_usage / cluster_memory_total * 100) if cluster_memory_total > 0 else 0
cpu_requests_percent = (cpu_requests / cluster_cpu_total * 100) if cluster_cpu_total > 0 else 0
memory_requests_percent = (memory_requests / cluster_memory_total * 100) if cluster_memory_total > 0 else 0
cpu_limits_percent = (cpu_limits / cluster_cpu_total * 100) if cluster_cpu_total > 0 else 0
memory_limits_percent = (memory_limits / cluster_memory_total * 100) if cluster_memory_total > 0 else 0
return { return {
"workload": workload, "workload": workload,
"namespace": namespace, "namespace": namespace,
"time_range": time_range, "time_range": time_range,
"metrics": { "cluster_total": {
"cpu_usage": cpu_usage, "cpu_cores": cluster_cpu_total,
"memory_usage": memory_usage, "memory_bytes": cluster_memory_total,
"cpu_requests": cpu_requests, "memory_gb": cluster_memory_total / (1024**3)
"memory_requests": memory_requests, },
"cpu_limits": cpu_limits, "workload_metrics": {
"memory_limits": memory_limits "cpu": {
"usage_cores": cpu_usage,
"usage_percent": round(cpu_usage_percent, 2),
"requests_cores": cpu_requests,
"requests_percent": round(cpu_requests_percent, 2),
"limits_cores": cpu_limits,
"limits_percent": round(cpu_limits_percent, 2)
},
"memory": {
"usage_bytes": memory_usage,
"usage_mb": round(memory_usage / (1024**2), 2),
"usage_percent": round(memory_usage_percent, 2),
"requests_bytes": memory_requests,
"requests_mb": round(memory_requests / (1024**2), 2),
"requests_percent": round(memory_requests_percent, 2),
"limits_bytes": memory_limits,
"limits_mb": round(memory_limits / (1024**2), 2),
"limits_percent": round(memory_limits_percent, 2)
}
} }
} }
except Exception as e: except Exception as e:

View File

@@ -411,6 +411,95 @@
border: 1px solid #eee; border: 1px solid #eee;
border-radius: 4px; border-radius: 4px;
} }
.cluster-stats {
display: flex;
gap: 20px;
margin-bottom: 30px;
}
.stat-card {
background: #f8f9fa;
border: 1px solid #dee2e6;
border-radius: 8px;
padding: 20px;
text-align: center;
flex: 1;
}
.stat-card h4 {
margin: 0 0 10px 0;
color: #495057;
font-size: 14px;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.stat-value {
font-size: 24px;
font-weight: bold;
color: #007bff;
}
.metrics-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 30px;
}
.metric-section {
background: white;
border: 1px solid #dee2e6;
border-radius: 8px;
padding: 20px;
}
.metric-section h4 {
margin: 0 0 20px 0;
color: #495057;
border-bottom: 2px solid #e9ecef;
padding-bottom: 10px;
}
.metric-row {
display: flex;
justify-content: space-between;
align-items: center;
padding: 8px 0;
border-bottom: 1px solid #f8f9fa;
}
.metric-row:last-child {
border-bottom: none;
}
.metric-label {
font-weight: 500;
color: #6c757d;
min-width: 120px;
}
.metric-value {
font-weight: bold;
color: #212529;
font-family: 'Courier New', monospace;
}
.metric-percent {
color: #6c757d;
font-size: 12px;
font-style: italic;
}
@media (max-width: 768px) {
.metrics-grid {
grid-template-columns: 1fr;
}
.cluster-stats {
flex-direction: column;
}
}
/* Problem Summary Table */ /* Problem Summary Table */
.problem-summary { .problem-summary {
@@ -1369,9 +1458,9 @@
modal.id = 'historicalModal'; modal.id = 'historicalModal';
modal.className = 'modal'; modal.className = 'modal';
modal.innerHTML = ` modal.innerHTML = `
<div class="modal-content" style="width: 90%; max-width: 1200px;"> <div class="modal-content" style="width: 90%; max-width: 1000px;">
<div class="modal-header"> <div class="modal-header">
<h2>📈 Historical Analysis - Real Prometheus Metrics</h2> <h2>📊 Resource Consumption Analysis - Real Numbers</h2>
<span class="close">&times;</span> <span class="close">&times;</span>
</div> </div>
<div class="modal-body" id="historicalModalBody"> <div class="modal-body" id="historicalModalBody">
@@ -1387,14 +1476,14 @@
<option value="7d">Last 7 days</option> <option value="7d">Last 7 days</option>
</select> </select>
</div> </div>
<div id="metricsCharts" style="display: none;"> <div id="metricsData" style="display: none;">
<div class="chart-container"> <div class="cluster-info">
<h3>CPU Usage vs Requests/Limits</h3> <h3>🏢 Cluster Total Resources</h3>
<canvas id="cpuChart" width="800" height="300"></canvas> <div id="clusterTotal"></div>
</div> </div>
<div class="chart-container"> <div class="workload-metrics">
<h3>Memory Usage vs Requests/Limits</h3> <h3>📈 Workload Resource Consumption</h3>
<canvas id="memoryChart" width="800" height="300"></canvas> <div id="workloadData"></div>
</div> </div>
</div> </div>
</div> </div>
@@ -1435,10 +1524,10 @@
async function loadWorkloadMetrics() { async function loadWorkloadMetrics() {
const workloadSelect = document.getElementById('workloadSelect'); const workloadSelect = document.getElementById('workloadSelect');
const timeRangeSelect = document.getElementById('timeRangeSelect'); const timeRangeSelect = document.getElementById('timeRangeSelect');
const chartsDiv = document.getElementById('metricsCharts'); const metricsDiv = document.getElementById('metricsData');
if (!workloadSelect.value) { if (!workloadSelect.value) {
chartsDiv.style.display = 'none'; metricsDiv.style.display = 'none';
return; return;
} }
@@ -1446,128 +1535,98 @@
const timeRange = timeRangeSelect.value; const timeRange = timeRangeSelect.value;
try { try {
chartsDiv.style.display = 'block'; metricsDiv.style.display = 'block';
chartsDiv.innerHTML = '<p>Loading metrics from Prometheus...</p>'; metricsDiv.innerHTML = '<p>Loading metrics from Prometheus...</p>';
const response = await fetch(`/api/v1/workloads/${namespace}/${workload}/metrics?time_range=${timeRange}`); const response = await fetch(`/api/v1/workloads/${namespace}/${workload}/metrics?time_range=${timeRange}`);
const data = await response.json(); const data = await response.json();
if (data.metrics) { if (data.workload_metrics) {
renderMetricsCharts(data.metrics, timeRange); renderMetricsData(data);
} else { } else {
chartsDiv.innerHTML = '<p>No metrics data available for this workload.</p>'; metricsDiv.innerHTML = '<p>No metrics data available for this workload.</p>';
} }
} catch (error) { } catch (error) {
console.error('Error loading metrics:', error); console.error('Error loading metrics:', error);
chartsDiv.innerHTML = '<p>Error loading metrics. Please try again.</p>'; metricsDiv.innerHTML = '<p>Error loading metrics. Please try again.</p>';
} }
} }
function renderMetricsCharts(metrics, timeRange) { function renderMetricsData(data) {
const chartsDiv = document.getElementById('metricsCharts'); const clusterTotalDiv = document.getElementById('clusterTotal');
chartsDiv.innerHTML = ` const workloadDataDiv = document.getElementById('workloadData');
<div class="chart-container">
<h3>CPU Usage vs Requests/Limits (${timeRange})</h3> // Render cluster total resources
<canvas id="cpuChart" width="800" height="300"></canvas> clusterTotalDiv.innerHTML = `
</div> <div class="cluster-stats">
<div class="chart-container"> <div class="stat-card">
<h3>Memory Usage vs Requests/Limits (${timeRange})</h3> <h4>CPU Total</h4>
<canvas id="memoryChart" width="800" height="300"></canvas> <div class="stat-value">${data.cluster_total.cpu_cores.toFixed(2)} cores</div>
</div>
<div class="stat-card">
<h4>Memory Total</h4>
<div class="stat-value">${data.cluster_total.memory_gb.toFixed(2)} GB</div>
</div>
</div> </div>
`; `;
// Simple chart rendering (you can replace with Chart.js or similar) // Render workload metrics
renderSimpleChart('cpuChart', metrics.cpu_usage, metrics.cpu_requests, metrics.cpu_limits, 'CPU (cores)'); const cpu = data.workload_metrics.cpu;
renderSimpleChart('memoryChart', metrics.memory_usage, metrics.memory_requests, metrics.memory_limits, 'Memory (bytes)'); const memory = data.workload_metrics.memory;
workloadDataDiv.innerHTML = `
<div class="metrics-grid">
<div class="metric-section">
<h4>🖥️ CPU Resources</h4>
<div class="metric-row">
<span class="metric-label">Current Usage:</span>
<span class="metric-value">${cpu.usage_cores.toFixed(3)} cores</span>
<span class="metric-percent">(${cpu.usage_percent}% of cluster)</span>
</div>
<div class="metric-row">
<span class="metric-label">Requests:</span>
<span class="metric-value">${cpu.requests_cores.toFixed(3)} cores</span>
<span class="metric-percent">(${cpu.requests_percent}% of cluster)</span>
</div>
<div class="metric-row">
<span class="metric-label">Limits:</span>
<span class="metric-value">${cpu.limits_cores.toFixed(3)} cores</span>
<span class="metric-percent">(${cpu.limits_percent}% of cluster)</span>
</div>
<div class="metric-row">
<span class="metric-label">Efficiency:</span>
<span class="metric-value ${cpu.usage_cores > 0 ? (cpu.usage_cores / cpu.requests_cores * 100).toFixed(1) + '%' : 'N/A'}">${cpu.usage_cores > 0 ? (cpu.usage_cores / cpu.requests_cores * 100).toFixed(1) + '%' : 'N/A'}</span>
<span class="metric-percent">(usage vs requests)</span>
</div>
</div>
<div class="metric-section">
<h4>💾 Memory Resources</h4>
<div class="metric-row">
<span class="metric-label">Current Usage:</span>
<span class="metric-value">${memory.usage_mb.toFixed(2)} MB</span>
<span class="metric-percent">(${memory.usage_percent}% of cluster)</span>
</div>
<div class="metric-row">
<span class="metric-label">Requests:</span>
<span class="metric-value">${memory.requests_mb.toFixed(2)} MB</span>
<span class="metric-percent">(${memory.requests_percent}% of cluster)</span>
</div>
<div class="metric-row">
<span class="metric-label">Limits:</span>
<span class="metric-value">${memory.limits_mb.toFixed(2)} MB</span>
<span class="metric-percent">(${memory.limits_percent}% of cluster)</span>
</div>
<div class="metric-row">
<span class="metric-label">Efficiency:</span>
<span class="metric-value ${memory.usage_bytes > 0 ? (memory.usage_bytes / memory.requests_bytes * 100).toFixed(1) + '%' : 'N/A'}">${memory.usage_bytes > 0 ? (memory.usage_bytes / memory.requests_bytes * 100).toFixed(1) + '%' : 'N/A'}</span>
<span class="metric-percent">(usage vs requests)</span>
</div>
</div>
</div>
`;
} }
function renderSimpleChart(canvasId, usage, requests, limits, unit) {
const canvas = document.getElementById(canvasId);
const ctx = canvas.getContext('2d');
const width = canvas.width;
const height = canvas.height;
// Clear canvas
ctx.clearRect(0, 0, width, height);
// Draw axes
ctx.strokeStyle = '#333';
ctx.lineWidth = 2;
ctx.beginPath();
ctx.moveTo(50, height - 50);
ctx.lineTo(width - 50, height - 50);
ctx.moveTo(50, 50);
ctx.lineTo(50, height - 50);
ctx.stroke();
// Draw usage line
if (usage && usage.length > 0) {
ctx.strokeStyle = '#007bff';
ctx.lineWidth = 2;
ctx.beginPath();
usage.forEach((point, index) => {
const x = 50 + (index * (width - 100) / usage.length);
const y = height - 50 - (point[1] * (height - 100) / Math.max(...usage.map(p => p[1])));
if (index === 0) {
ctx.moveTo(x, y);
} else {
ctx.lineTo(x, y);
}
});
ctx.stroke();
}
// Draw requests line
if (requests && requests.length > 0) {
ctx.strokeStyle = '#28a745';
ctx.lineWidth = 1;
ctx.setLineDash([5, 5]);
ctx.beginPath();
requests.forEach((point, index) => {
const x = 50 + (index * (width - 100) / requests.length);
const y = height - 50 - (point[1] * (height - 100) / Math.max(...requests.map(p => p[1])));
if (index === 0) {
ctx.moveTo(x, y);
} else {
ctx.lineTo(x, y);
}
});
ctx.stroke();
}
// Draw limits line
if (limits && limits.length > 0) {
ctx.strokeStyle = '#dc3545';
ctx.lineWidth = 1;
ctx.setLineDash([5, 5]);
ctx.beginPath();
limits.forEach((point, index) => {
const x = 50 + (index * (width - 100) / limits.length);
const y = height - 50 - (point[1] * (height - 100) / Math.max(...limits.map(p => p[1])));
if (index === 0) {
ctx.moveTo(x, y);
} else {
ctx.lineTo(x, y);
}
});
ctx.stroke();
}
// Reset line dash
ctx.setLineDash([]);
// Add labels
ctx.fillStyle = '#333';
ctx.font = '12px Arial';
ctx.fillText(unit, 10, height / 2);
ctx.fillText('Time', width / 2, height - 10);
}
function exportComplianceReport() { function exportComplianceReport() {
alert('Exporting compliance report...'); alert('Exporting compliance report...');