Fix Prometheus queries using correct OpenShift metrics from console dashboard

- Updated CPU usage query to use node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
- Updated memory usage query to use container_memory_working_set_bytes with correct job and metrics_path
- Updated requests/limits queries to use kube_resourcequota with correct cluster and type parameters
- Applied fixes to both get_workload_historical_analysis and get_namespace_historical_analysis functions
- Queries now match the working queries from OpenShift console dashboard
This commit is contained in:
2025-09-29 13:33:48 -03:00
parent 32ef5d859c
commit 6b2f8de6b6

View File

@@ -449,37 +449,47 @@ class HistoricalAnalysisService:
try: try:
logger.info(f"Getting historical analysis for namespace: {namespace}") logger.info(f"Getting historical analysis for namespace: {namespace}")
# Query for CPU usage by namespace # Query for CPU usage by namespace (using correct OpenShift metrics)
cpu_query = f''' cpu_query = f'''
sum(rate(container_cpu_usage_seconds_total{{ sum(
namespace="{namespace}", node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{{
container!="POD", cluster="",
container!="" namespace="{namespace}"
}}[{time_range}])) }}
) by (namespace)
''' '''
# Query for memory usage by namespace # Query for memory usage by namespace (using correct OpenShift metrics)
memory_query = f''' memory_query = f'''
sum(container_memory_working_set_bytes{{ sum(
namespace="{namespace}", container_memory_working_set_bytes{{
container!="POD", job="kubelet",
container!="" metrics_path="/metrics/cadvisor",
}}) cluster="",
namespace="{namespace}",
container!="",
image!=""
}}
) by (namespace)
''' '''
# Query for CPU requests by namespace # Query for CPU requests by namespace (using correct OpenShift resource quota)
cpu_requests_query = f''' cpu_requests_query = f'''
sum(kube_pod_container_resource_requests{{ scalar(kube_resourcequota{{
cluster="",
namespace="{namespace}", namespace="{namespace}",
resource="cpu" type="hard",
resource="requests.cpu"
}}) }})
''' '''
# Query for memory requests by namespace # Query for memory requests by namespace (using correct OpenShift resource quota)
memory_requests_query = f''' memory_requests_query = f'''
sum(kube_pod_container_resource_requests{{ scalar(kube_resourcequota{{
cluster="",
namespace="{namespace}", namespace="{namespace}",
resource="memory" type="hard",
resource="requests.memory"
}}) }})
''' '''
@@ -575,7 +585,7 @@ class HistoricalAnalysisService:
try: try:
logger.info(f"Getting historical analysis for workload: {workload} in namespace: {namespace}") logger.info(f"Getting historical analysis for workload: {workload} in namespace: {namespace}")
# Query for CPU usage by workload (aggregated by workload) # Query for CPU usage by workload (using correct OpenShift metrics)
cpu_query = f''' cpu_query = f'''
sum( sum(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{{ node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{{
@@ -587,32 +597,33 @@ class HistoricalAnalysisService:
namespace_workload_pod:kube_pod_owner:relabel{{ namespace_workload_pod:kube_pod_owner:relabel{{
cluster="", cluster="",
namespace="{namespace}", namespace="{namespace}",
workload="{workload}",
workload_type=~".+" workload_type=~".+"
}} }}
) by (workload, workload_type) ) by (workload, workload_type)
''' '''
# Query for memory usage by workload (aggregated by workload) # Query for memory usage by workload (using correct OpenShift metrics)
memory_query = f''' memory_query = f'''
sum( sum(
container_memory_working_set_bytes{{ container_memory_working_set_bytes{{
job="kubelet",
metrics_path="/metrics/cadvisor",
cluster="",
namespace="{namespace}", namespace="{namespace}",
container!="POD", container!="",
container!="" image!=""
}} }}
* on(namespace,pod) * on(namespace,pod)
group_left(workload, workload_type) group_left(workload, workload_type)
namespace_workload_pod:kube_pod_owner:relabel{{ namespace_workload_pod:kube_pod_owner:relabel{{
cluster="", cluster="",
namespace="{namespace}", namespace="{namespace}",
workload="{workload}",
workload_type=~".+" workload_type=~".+"
}} }}
) by (workload, workload_type) ) by (workload, workload_type)
''' '''
# Query for CPU requests by namespace (using resource quota) # Query for CPU requests by namespace (using correct OpenShift resource quota)
cpu_requests_query = f''' cpu_requests_query = f'''
scalar(kube_resourcequota{{ scalar(kube_resourcequota{{
cluster="", cluster="",
@@ -622,7 +633,7 @@ class HistoricalAnalysisService:
}}) }})
''' '''
# Query for memory requests by namespace (using resource quota) # Query for memory requests by namespace (using correct OpenShift resource quota)
memory_requests_query = f''' memory_requests_query = f'''
scalar(kube_resourcequota{{ scalar(kube_resourcequota{{
cluster="", cluster="",
@@ -632,7 +643,7 @@ class HistoricalAnalysisService:
}}) }})
''' '''
# Query for CPU limits by namespace (using resource quota) # Query for CPU limits by namespace (using correct OpenShift resource quota)
cpu_limits_query = f''' cpu_limits_query = f'''
scalar(kube_resourcequota{{ scalar(kube_resourcequota{{
cluster="", cluster="",
@@ -642,7 +653,7 @@ class HistoricalAnalysisService:
}}) }})
''' '''
# Query for memory limits by namespace (using resource quota) # Query for memory limits by namespace (using correct OpenShift resource quota)
memory_limits_query = f''' memory_limits_query = f'''
scalar(kube_resourcequota{{ scalar(kube_resourcequota{{
cluster="", cluster="",