Remove simulated data and enable real Prometheus metrics
This commit is contained in:
@@ -498,127 +498,123 @@ async def get_workload_historical_metrics(
|
|||||||
"""Get historical metrics for a specific workload with cluster percentages"""
|
"""Get historical metrics for a specific workload with cluster percentages"""
|
||||||
try:
|
try:
|
||||||
prometheus_client = PrometheusClient()
|
prometheus_client = PrometheusClient()
|
||||||
|
await prometheus_client.initialize()
|
||||||
|
|
||||||
# Get current usage using OpenShift-specific metrics
|
# Get cluster total resources first
|
||||||
cpu_usage_query = f'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{{cluster="", namespace="{namespace}"}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
|
|
||||||
memory_usage_query = f'sum(container_memory_working_set_bytes{{job="kubelet", metrics_path="/metrics/cadvisor", cluster="", namespace="{namespace}", container!="", image!=""}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
|
|
||||||
|
|
||||||
cpu_usage_data = await prometheus_client.query(cpu_usage_query)
|
|
||||||
memory_usage_data = await prometheus_client.query(memory_usage_query)
|
|
||||||
|
|
||||||
# Get resource requests and limits using OpenShift-specific metrics
|
|
||||||
cpu_requests_query = f'sum(kube_pod_container_resource_requests{{job="kube-state-metrics", cluster="", namespace="{namespace}", resource="cpu"}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
|
|
||||||
memory_requests_query = f'sum(kube_pod_container_resource_requests{{job="kube-state-metrics", cluster="", namespace="{namespace}", resource="memory"}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
|
|
||||||
|
|
||||||
cpu_requests_data = await prometheus_client.query(cpu_requests_query)
|
|
||||||
memory_requests_data = await prometheus_client.query(memory_requests_query)
|
|
||||||
|
|
||||||
cpu_limits_query = f'sum(kube_pod_container_resource_limits{{job="kube-state-metrics", cluster="", namespace="{namespace}", resource="cpu"}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
|
|
||||||
memory_limits_query = f'sum(kube_pod_container_resource_limits{{job="kube-state-metrics", cluster="", namespace="{namespace}", resource="memory"}} * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{{cluster="", namespace="{namespace}", workload_type=~".+"}}) by (workload, workload_type)'
|
|
||||||
|
|
||||||
cpu_limits_data = await prometheus_client.query(cpu_limits_query)
|
|
||||||
memory_limits_data = await prometheus_client.query(memory_limits_query)
|
|
||||||
|
|
||||||
# Get cluster total resources
|
|
||||||
cluster_cpu_query = 'sum(kube_node_status_allocatable{resource="cpu"})'
|
cluster_cpu_query = 'sum(kube_node_status_allocatable{resource="cpu"})'
|
||||||
cluster_memory_query = 'sum(kube_node_status_allocatable{resource="memory"})'
|
cluster_memory_query = 'sum(kube_node_status_allocatable{resource="memory"})'
|
||||||
|
|
||||||
cluster_cpu_data = await prometheus_client.query(cluster_cpu_query)
|
cluster_cpu_data = await prometheus_client.query(cluster_cpu_query)
|
||||||
cluster_memory_data = await prometheus_client.query(cluster_memory_query)
|
cluster_memory_data = await prometheus_client.query(cluster_memory_query)
|
||||||
|
|
||||||
# Extract values from OpenShift-specific queries
|
# Extract cluster totals
|
||||||
|
cluster_cpu_total = 0
|
||||||
|
cluster_memory_total = 0
|
||||||
|
|
||||||
|
if cluster_cpu_data.get("status") == "success" and cluster_cpu_data.get("data", {}).get("result"):
|
||||||
|
for result in cluster_cpu_data["data"]["result"]:
|
||||||
|
cluster_cpu_total += float(result["value"][1])
|
||||||
|
|
||||||
|
if cluster_memory_data.get("status") == "success" and cluster_memory_data.get("data", {}).get("result"):
|
||||||
|
for result in cluster_memory_data["data"]["result"]:
|
||||||
|
cluster_memory_total += float(result["value"][1])
|
||||||
|
|
||||||
|
# Get workload-specific metrics using simpler queries
|
||||||
|
# CPU usage for specific pod
|
||||||
|
cpu_usage_query = f'rate(container_cpu_usage_seconds_total{{namespace="{namespace}", pod=~".*{workload}.*"}}[5m])'
|
||||||
|
memory_usage_query = f'container_memory_working_set_bytes{{namespace="{namespace}", pod=~".*{workload}.*", container!="", image!=""}}'
|
||||||
|
|
||||||
|
# Resource requests and limits for specific pod
|
||||||
|
cpu_requests_query = f'sum(kube_pod_container_resource_requests{{namespace="{namespace}", pod=~".*{workload}.*", resource="cpu"}})'
|
||||||
|
memory_requests_query = f'sum(kube_pod_container_resource_requests{{namespace="{namespace}", pod=~".*{workload}.*", resource="memory"}})'
|
||||||
|
cpu_limits_query = f'sum(kube_pod_container_resource_limits{{namespace="{namespace}", pod=~".*{workload}.*", resource="cpu"}})'
|
||||||
|
memory_limits_query = f'sum(kube_pod_container_resource_limits{{namespace="{namespace}", pod=~".*{workload}.*", resource="memory"}})'
|
||||||
|
|
||||||
|
# Execute queries
|
||||||
|
cpu_usage_data = await prometheus_client.query(cpu_usage_query)
|
||||||
|
memory_usage_data = await prometheus_client.query(memory_usage_query)
|
||||||
|
cpu_requests_data = await prometheus_client.query(cpu_requests_query)
|
||||||
|
memory_requests_data = await prometheus_client.query(memory_requests_query)
|
||||||
|
cpu_limits_data = await prometheus_client.query(cpu_limits_query)
|
||||||
|
memory_limits_data = await prometheus_client.query(memory_limits_query)
|
||||||
|
|
||||||
|
# Extract values
|
||||||
cpu_usage = 0
|
cpu_usage = 0
|
||||||
memory_usage = 0
|
memory_usage = 0
|
||||||
cpu_requests = 0
|
cpu_requests = 0
|
||||||
memory_requests = 0
|
memory_requests = 0
|
||||||
cpu_limits = 0
|
cpu_limits = 0
|
||||||
memory_limits = 0
|
memory_limits = 0
|
||||||
cluster_cpu_total = 0
|
|
||||||
cluster_memory_total = 0
|
|
||||||
|
|
||||||
# Check if we got any data from Prometheus
|
# Extract CPU usage
|
||||||
prometheus_available = False
|
|
||||||
|
|
||||||
# Extract CPU usage from workload-specific query
|
|
||||||
if cpu_usage_data.get("status") == "success" and cpu_usage_data.get("data", {}).get("result"):
|
if cpu_usage_data.get("status") == "success" and cpu_usage_data.get("data", {}).get("result"):
|
||||||
for result in cpu_usage_data["data"]["result"]:
|
for result in cpu_usage_data["data"]["result"]:
|
||||||
if result.get("metric", {}).get("workload") == workload:
|
cpu_usage += float(result["value"][1])
|
||||||
cpu_usage = float(result["value"][1])
|
|
||||||
break
|
|
||||||
|
|
||||||
# Extract Memory usage from workload-specific query
|
# Extract Memory usage
|
||||||
if memory_usage_data.get("status") == "success" and memory_usage_data.get("data", {}).get("result"):
|
if memory_usage_data.get("status") == "success" and memory_usage_data.get("data", {}).get("result"):
|
||||||
for result in memory_usage_data["data"]["result"]:
|
for result in memory_usage_data["data"]["result"]:
|
||||||
if result.get("metric", {}).get("workload") == workload:
|
memory_usage += float(result["value"][1])
|
||||||
memory_usage = float(result["value"][1])
|
|
||||||
break
|
|
||||||
|
|
||||||
# Extract CPU requests from workload-specific query
|
# Extract CPU requests
|
||||||
if cpu_requests_data.get("status") == "success" and cpu_requests_data.get("data", {}).get("result"):
|
if cpu_requests_data.get("status") == "success" and cpu_requests_data.get("data", {}).get("result"):
|
||||||
for result in cpu_requests_data["data"]["result"]:
|
for result in cpu_requests_data["data"]["result"]:
|
||||||
if result.get("metric", {}).get("workload") == workload:
|
cpu_requests += float(result["value"][1])
|
||||||
cpu_requests = float(result["value"][1])
|
|
||||||
break
|
|
||||||
|
|
||||||
# Extract Memory requests from workload-specific query
|
# Extract Memory requests
|
||||||
if memory_requests_data.get("status") == "success" and memory_requests_data.get("data", {}).get("result"):
|
if memory_requests_data.get("status") == "success" and memory_requests_data.get("data", {}).get("result"):
|
||||||
for result in memory_requests_data["data"]["result"]:
|
for result in memory_requests_data["data"]["result"]:
|
||||||
if result.get("metric", {}).get("workload") == workload:
|
memory_requests += float(result["value"][1])
|
||||||
memory_requests = float(result["value"][1])
|
|
||||||
break
|
|
||||||
|
|
||||||
# Extract CPU limits from workload-specific query
|
# Extract CPU limits
|
||||||
if cpu_limits_data.get("status") == "success" and cpu_limits_data.get("data", {}).get("result"):
|
if cpu_limits_data.get("status") == "success" and cpu_limits_data.get("data", {}).get("result"):
|
||||||
for result in cpu_limits_data["data"]["result"]:
|
for result in cpu_limits_data["data"]["result"]:
|
||||||
if result.get("metric", {}).get("workload") == workload:
|
cpu_limits += float(result["value"][1])
|
||||||
cpu_limits = float(result["value"][1])
|
|
||||||
break
|
|
||||||
|
|
||||||
# Extract Memory limits from workload-specific query
|
# Extract Memory limits
|
||||||
if memory_limits_data.get("status") == "success" and memory_limits_data.get("data", {}).get("result"):
|
if memory_limits_data.get("status") == "success" and memory_limits_data.get("data", {}).get("result"):
|
||||||
for result in memory_limits_data["data"]["result"]:
|
for result in memory_limits_data["data"]["result"]:
|
||||||
if result.get("metric", {}).get("workload") == workload:
|
memory_limits += float(result["value"][1])
|
||||||
memory_limits = float(result["value"][1])
|
|
||||||
break
|
|
||||||
|
|
||||||
if cluster_cpu_data.get("status") == "success" and cluster_cpu_data.get("data", {}).get("result"):
|
# Check if we have real data
|
||||||
cluster_cpu_total = float(cluster_cpu_data["data"]["result"][0]["value"][1])
|
prometheus_available = cluster_cpu_total > 0 and cluster_memory_total > 0
|
||||||
|
|
||||||
if cluster_memory_data.get("status") == "success" and cluster_memory_data.get("data", {}).get("result"):
|
# If no real data, return zeros with appropriate message
|
||||||
cluster_memory_total = float(cluster_memory_data["data"]["result"][0]["value"][1])
|
|
||||||
|
|
||||||
# Check if Prometheus is available (any non-zero values)
|
|
||||||
if cluster_cpu_total > 0 or cluster_memory_total > 0:
|
|
||||||
prometheus_available = True
|
|
||||||
|
|
||||||
# If Prometheus is not available, provide simulated data for demonstration
|
|
||||||
if not prometheus_available:
|
if not prometheus_available:
|
||||||
# Simulate cluster resources (typical OpenShift cluster)
|
return {
|
||||||
cluster_cpu_total = 24.0 # 6 nodes * 4 cores each
|
"workload": workload,
|
||||||
cluster_memory_total = 96.0 * (1024**3) # 6 nodes * 16GB each
|
"namespace": namespace,
|
||||||
|
"time_range": time_range,
|
||||||
# Simulate workload metrics based on namespace
|
"prometheus_available": False,
|
||||||
if namespace == "resource-governance":
|
"data_source": "no_data",
|
||||||
cpu_usage = 0.05
|
"message": "No metrics data available for this workload",
|
||||||
memory_usage = 128 * (1024**2) # 128MB
|
"cluster_total": {
|
||||||
cpu_requests = 0.1
|
"cpu_cores": 0,
|
||||||
memory_requests = 128 * (1024**2)
|
"memory_bytes": 0,
|
||||||
cpu_limits = 0.5
|
"memory_gb": 0
|
||||||
memory_limits = 512 * (1024**2)
|
},
|
||||||
elif namespace == "shishika01":
|
"workload_metrics": {
|
||||||
cpu_usage = 0.15
|
"cpu": {
|
||||||
memory_usage = 256 * (1024**2) # 256MB
|
"usage_cores": 0,
|
||||||
cpu_requests = 0.2
|
"usage_percent": 0,
|
||||||
memory_requests = 256 * (1024**2)
|
"requests_cores": 0,
|
||||||
cpu_limits = 1.0
|
"requests_percent": 0,
|
||||||
memory_limits = 1024 * (1024**2)
|
"limits_cores": 0,
|
||||||
else:
|
"limits_percent": 0
|
||||||
cpu_usage = 0.08
|
},
|
||||||
memory_usage = 192 * (1024**2) # 192MB
|
"memory": {
|
||||||
cpu_requests = 0.15
|
"usage_bytes": 0,
|
||||||
memory_requests = 192 * (1024**2)
|
"usage_mb": 0,
|
||||||
cpu_limits = 0.8
|
"usage_percent": 0,
|
||||||
memory_limits = 768 * (1024**2)
|
"requests_bytes": 0,
|
||||||
|
"requests_mb": 0,
|
||||||
|
"requests_percent": 0,
|
||||||
|
"limits_bytes": 0,
|
||||||
|
"limits_mb": 0,
|
||||||
|
"limits_percent": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# Calculate percentages
|
# Calculate percentages
|
||||||
cpu_usage_percent = (cpu_usage / cluster_cpu_total * 100) if cluster_cpu_total > 0 else 0
|
cpu_usage_percent = (cpu_usage / cluster_cpu_total * 100) if cluster_cpu_total > 0 else 0
|
||||||
@@ -632,8 +628,8 @@ async def get_workload_historical_metrics(
|
|||||||
"workload": workload,
|
"workload": workload,
|
||||||
"namespace": namespace,
|
"namespace": namespace,
|
||||||
"time_range": time_range,
|
"time_range": time_range,
|
||||||
"prometheus_available": prometheus_available,
|
"prometheus_available": True,
|
||||||
"data_source": "simulated" if not prometheus_available else "prometheus",
|
"data_source": "prometheus",
|
||||||
"cluster_total": {
|
"cluster_total": {
|
||||||
"cpu_cores": cluster_cpu_total,
|
"cpu_cores": cluster_cpu_total,
|
||||||
"memory_bytes": cluster_memory_total,
|
"memory_bytes": cluster_memory_total,
|
||||||
|
|||||||
@@ -1566,9 +1566,9 @@
|
|||||||
const workloadDataDiv = document.getElementById('workloadData');
|
const workloadDataDiv = document.getElementById('workloadData');
|
||||||
|
|
||||||
// Add data source indicator
|
// Add data source indicator
|
||||||
const dataSourceIndicator = data.data_source === 'simulated' ?
|
const dataSourceIndicator = data.data_source === 'prometheus' ?
|
||||||
'<div style="background: #fff3cd; border: 1px solid #ffeaa7; border-radius: 4px; padding: 10px; margin-bottom: 15px; color: #856404;"><strong>📊 Demo Mode:</strong> Showing simulated data for demonstration. Prometheus integration requires proper RBAC configuration.</div>' :
|
'<div style="background: #d4edda; border: 1px solid #c3e6cb; border-radius: 4px; padding: 10px; margin-bottom: 15px; color: #155724;"><strong>✅ Live Data:</strong> Real metrics from Prometheus</div>' :
|
||||||
'<div style="background: #d4edda; border: 1px solid #c3e6cb; border-radius: 4px; padding: 10px; margin-bottom: 15px; color: #155724;"><strong>✅ Live Data:</strong> Real metrics from Prometheus</div>';
|
'<div style="background: #f8d7da; border: 1px solid #f5c6cb; border-radius: 4px; padding: 10px; margin-bottom: 15px; color: #721c24;"><strong>⚠️ No Data:</strong> No metrics available for this workload</div>';
|
||||||
|
|
||||||
// Render cluster total resources
|
// Render cluster total resources
|
||||||
clusterTotalDiv.innerHTML = dataSourceIndicator + `
|
clusterTotalDiv.innerHTML = dataSourceIndicator + `
|
||||||
|
|||||||
Reference in New Issue
Block a user