Remove simulated data and enable real Prometheus metrics

This commit is contained in:
2025-09-30 21:13:46 -03:00
parent 6f914c9404
commit 6ad1997afd
2 changed files with 88 additions and 92 deletions

View File

@@ -498,127 +498,123 @@ async def get_workload_historical_metrics(
"""Get historical metrics for a specific workload with cluster percentages"""
try:
prometheus_client = PrometheusClient()
await prometheus_client.initialize()
# Get current usage using OpenShift-specific metrics
cpu_usage_query = f'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{{cluster="", namespace="{namespace}"}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
memory_usage_query = f'sum(container_memory_working_set_bytes{{job="kubelet", metrics_path="/metrics/cadvisor", cluster="", namespace="{namespace}", container!="", image!=""}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
cpu_usage_data = await prometheus_client.query(cpu_usage_query)
memory_usage_data = await prometheus_client.query(memory_usage_query)
# Get resource requests and limits using OpenShift-specific metrics
cpu_requests_query = f'sum(kube_pod_container_resource_requests{{job="kube-state-metrics", cluster="", namespace="{namespace}", resource="cpu"}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
memory_requests_query = f'sum(kube_pod_container_resource_requests{{job="kube-state-metrics", cluster="", namespace="{namespace}", resource="memory"}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
cpu_requests_data = await prometheus_client.query(cpu_requests_query)
memory_requests_data = await prometheus_client.query(memory_requests_query)
cpu_limits_query = f'sum(kube_pod_container_resource_limits{{job="kube-state-metrics", cluster="", namespace="{namespace}", resource="cpu"}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
memory_limits_query = f'sum(kube_pod_container_resource_limits{{job="kube-state-metrics", cluster="", namespace="{namespace}", resource="memory"}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
cpu_limits_data = await prometheus_client.query(cpu_limits_query)
memory_limits_data = await prometheus_client.query(memory_limits_query)
# Get cluster total resources
# Get cluster total resources first
cluster_cpu_query = 'sum(kube_node_status_allocatable{resource="cpu"})'
cluster_memory_query = 'sum(kube_node_status_allocatable{resource="memory"})'
cluster_cpu_data = await prometheus_client.query(cluster_cpu_query)
cluster_memory_data = await prometheus_client.query(cluster_memory_query)
# Extract values from OpenShift-specific queries
# Extract cluster totals
cluster_cpu_total = 0
cluster_memory_total = 0
if cluster_cpu_data.get("status") == "success" and cluster_cpu_data.get("data", {}).get("result"):
for result in cluster_cpu_data["data"]["result"]:
cluster_cpu_total += float(result["value"][1])
if cluster_memory_data.get("status") == "success" and cluster_memory_data.get("data", {}).get("result"):
for result in cluster_memory_data["data"]["result"]:
cluster_memory_total += float(result["value"][1])
# Get workload-specific metrics using simpler queries
# CPU usage for specific pod
cpu_usage_query = f'rate(container_cpu_usage_seconds_total{{namespace="{namespace}", pod=~".*{workload}.*"}}[5m])'
memory_usage_query = f'container_memory_working_set_bytes{{namespace="{namespace}", pod=~".*{workload}.*", container!="", image!=""}}'
# Resource requests and limits for specific pod
cpu_requests_query = f'sum(kube_pod_container_resource_requests{{namespace="{namespace}", pod=~".*{workload}.*", resource="cpu"}})'
memory_requests_query = f'sum(kube_pod_container_resource_requests{{namespace="{namespace}", pod=~".*{workload}.*", resource="memory"}})'
cpu_limits_query = f'sum(kube_pod_container_resource_limits{{namespace="{namespace}", pod=~".*{workload}.*", resource="cpu"}})'
memory_limits_query = f'sum(kube_pod_container_resource_limits{{namespace="{namespace}", pod=~".*{workload}.*", resource="memory"}})'
# Execute queries
cpu_usage_data = await prometheus_client.query(cpu_usage_query)
memory_usage_data = await prometheus_client.query(memory_usage_query)
cpu_requests_data = await prometheus_client.query(cpu_requests_query)
memory_requests_data = await prometheus_client.query(memory_requests_query)
cpu_limits_data = await prometheus_client.query(cpu_limits_query)
memory_limits_data = await prometheus_client.query(memory_limits_query)
# Extract values
cpu_usage = 0
memory_usage = 0
cpu_requests = 0
memory_requests = 0
cpu_limits = 0
memory_limits = 0
cluster_cpu_total = 0
cluster_memory_total = 0
# Check if we got any data from Prometheus
prometheus_available = False
# Extract CPU usage from workload-specific query
# Extract CPU usage
if cpu_usage_data.get("status") == "success" and cpu_usage_data.get("data", {}).get("result"):
for result in cpu_usage_data["data"]["result"]:
if result.get("metric", {}).get("workload") == workload:
cpu_usage = float(result["value"][1])
break
cpu_usage += float(result["value"][1])
# Extract Memory usage from workload-specific query
# Extract Memory usage
if memory_usage_data.get("status") == "success" and memory_usage_data.get("data", {}).get("result"):
for result in memory_usage_data["data"]["result"]:
if result.get("metric", {}).get("workload") == workload:
memory_usage = float(result["value"][1])
break
memory_usage += float(result["value"][1])
# Extract CPU requests from workload-specific query
# Extract CPU requests
if cpu_requests_data.get("status") == "success" and cpu_requests_data.get("data", {}).get("result"):
for result in cpu_requests_data["data"]["result"]:
if result.get("metric", {}).get("workload") == workload:
cpu_requests = float(result["value"][1])
break
cpu_requests += float(result["value"][1])
# Extract Memory requests from workload-specific query
# Extract Memory requests
if memory_requests_data.get("status") == "success" and memory_requests_data.get("data", {}).get("result"):
for result in memory_requests_data["data"]["result"]:
if result.get("metric", {}).get("workload") == workload:
memory_requests = float(result["value"][1])
break
memory_requests += float(result["value"][1])
# Extract CPU limits from workload-specific query
# Extract CPU limits
if cpu_limits_data.get("status") == "success" and cpu_limits_data.get("data", {}).get("result"):
for result in cpu_limits_data["data"]["result"]:
if result.get("metric", {}).get("workload") == workload:
cpu_limits = float(result["value"][1])
break
cpu_limits += float(result["value"][1])
# Extract Memory limits from workload-specific query
# Extract Memory limits
if memory_limits_data.get("status") == "success" and memory_limits_data.get("data", {}).get("result"):
for result in memory_limits_data["data"]["result"]:
if result.get("metric", {}).get("workload") == workload:
memory_limits = float(result["value"][1])
break
memory_limits += float(result["value"][1])
if cluster_cpu_data.get("status") == "success" and cluster_cpu_data.get("data", {}).get("result"):
cluster_cpu_total = float(cluster_cpu_data["data"]["result"][0]["value"][1])
# Check if we have real data
prometheus_available = cluster_cpu_total > 0 and cluster_memory_total > 0
if cluster_memory_data.get("status") == "success" and cluster_memory_data.get("data", {}).get("result"):
cluster_memory_total = float(cluster_memory_data["data"]["result"][0]["value"][1])
# Check if Prometheus is available (any non-zero values)
if cluster_cpu_total > 0 or cluster_memory_total > 0:
prometheus_available = True
# If Prometheus is not available, provide simulated data for demonstration
# If no real data, return zeros with appropriate message
if not prometheus_available:
# Simulate cluster resources (typical OpenShift cluster)
cluster_cpu_total = 24.0 # 6 nodes * 4 cores each
cluster_memory_total = 96.0 * (1024**3) # 6 nodes * 16GB each
# Simulate workload metrics based on namespace
if namespace == "resource-governance":
cpu_usage = 0.05
memory_usage = 128 * (1024**2) # 128MB
cpu_requests = 0.1
memory_requests = 128 * (1024**2)
cpu_limits = 0.5
memory_limits = 512 * (1024**2)
elif namespace == "shishika01":
cpu_usage = 0.15
memory_usage = 256 * (1024**2) # 256MB
cpu_requests = 0.2
memory_requests = 256 * (1024**2)
cpu_limits = 1.0
memory_limits = 1024 * (1024**2)
else:
cpu_usage = 0.08
memory_usage = 192 * (1024**2) # 192MB
cpu_requests = 0.15
memory_requests = 192 * (1024**2)
cpu_limits = 0.8
memory_limits = 768 * (1024**2)
return {
"workload": workload,
"namespace": namespace,
"time_range": time_range,
"prometheus_available": False,
"data_source": "no_data",
"message": "No metrics data available for this workload",
"cluster_total": {
"cpu_cores": 0,
"memory_bytes": 0,
"memory_gb": 0
},
"workload_metrics": {
"cpu": {
"usage_cores": 0,
"usage_percent": 0,
"requests_cores": 0,
"requests_percent": 0,
"limits_cores": 0,
"limits_percent": 0
},
"memory": {
"usage_bytes": 0,
"usage_mb": 0,
"usage_percent": 0,
"requests_bytes": 0,
"requests_mb": 0,
"requests_percent": 0,
"limits_bytes": 0,
"limits_mb": 0,
"limits_percent": 0
}
}
}
# Calculate percentages
cpu_usage_percent = (cpu_usage / cluster_cpu_total * 100) if cluster_cpu_total > 0 else 0
@@ -632,8 +628,8 @@ async def get_workload_historical_metrics(
"workload": workload,
"namespace": namespace,
"time_range": time_range,
"prometheus_available": prometheus_available,
"data_source": "simulated" if not prometheus_available else "prometheus",
"prometheus_available": True,
"data_source": "prometheus",
"cluster_total": {
"cpu_cores": cluster_cpu_total,
"memory_bytes": cluster_memory_total,

View File

@@ -1566,9 +1566,9 @@
const workloadDataDiv = document.getElementById('workloadData');
// Add data source indicator
const dataSourceIndicator = data.data_source === 'simulated' ?
'<div style="background: #fff3cd; border: 1px solid #ffeaa7; border-radius: 4px; padding: 10px; margin-bottom: 15px; color: #856404;"><strong>📊 Demo Mode:</strong> Showing simulated data for demonstration. Prometheus integration requires proper RBAC configuration.</div>' :
'<div style="background: #d4edda; border: 1px solid #c3e6cb; border-radius: 4px; padding: 10px; margin-bottom: 15px; color: #155724;"><strong>✅ Live Data:</strong> Real metrics from Prometheus</div>';
const dataSourceIndicator = data.data_source === 'prometheus' ?
'<div style="background: #d4edda; border: 1px solid #c3e6cb; border-radius: 4px; padding: 10px; margin-bottom: 15px; color: #155724;"><strong>✅ Live Data:</strong> Real metrics from Prometheus</div>' :
'<div style="background: #f8d7da; border: 1px solid #f5c6cb; border-radius: 4px; padding: 10px; margin-bottom: 15px; color: #721c24;"><strong>⚠️ No Data:</strong> No metrics available for this workload</div>';
// Render cluster total resources
clusterTotalDiv.innerHTML = dataSourceIndicator + `