Fix historical analysis contradictions and implement workload-based analysis

- Fix insufficient_historical_data vs historical_analysis contradiction
- Add return statement when insufficient data to prevent P99 calculation
- Implement workload-based historical analysis instead of pod-based
- Add _extract_workload_name() to identify workload from pod names
- Add analyze_workload_historical_usage() for workload-level analysis
- Add _analyze_workload_metrics() with Prometheus workload queries
- Add validate_workload_resources_with_historical_analysis() method
- Update /cluster/status endpoint to use workload analysis by namespace
- Improve reliability by analyzing workloads instead of individual pods
- Maintain fallback to pod-level analysis if workload analysis fails
This commit is contained in:
2025-10-01 16:32:12 -03:00
parent 6f5c8b0cac
commit 4721a1ef37
3 changed files with 280 additions and 11 deletions

View File

@@ -64,6 +64,39 @@ class ValidationService:
logger.warning(f"Error in historical analysis for pod {pod.name}: {e}")
return static_validations
async def validate_workload_resources_with_historical_analysis(
self,
pods: List[PodResource],
time_range: str = '24h'
) -> List[ResourceValidation]:
"""Validate workload resources including historical analysis (recommended approach)"""
all_validations = []
# Static validations for all pods
for pod in pods:
static_validations = self.validate_pod_resources(pod)
all_validations.extend(static_validations)
# Historical analysis by workload (more reliable than individual pods)
try:
historical_validations = await self.historical_analysis.analyze_workload_historical_usage(
pods, time_range
)
all_validations.extend(historical_validations)
except Exception as e:
logger.warning(f"Error in workload historical analysis: {e}")
# Fallback to individual pod analysis
for pod in pods:
try:
pod_historical = await self.historical_analysis.analyze_pod_historical_usage(
pod, time_range
)
all_validations.extend(pod_historical)
except Exception as pod_e:
logger.warning(f"Error in historical analysis for pod {pod.name}: {pod_e}")
return all_validations
def _validate_container_resources(
self,