Fix historical analysis contradictions and implement workload-based analysis

- Fix insufficient_historical_data vs historical_analysis contradiction - Add return statement when insufficient data to prevent P99 calculation - Implement workload-based historical analysis instead of pod-based - Add _extract_workload_name() to identify workload from pod names - Add analyze_workload_historical_usage() for workload-level analysis - Add _analyze_workload_metrics() with Prometheus workload queries - Add validate_workload_resources_with_historical_analysis() method - Update /cluster/status endpoint to use workload analysis by namespace - Improve reliability by analyzing workloads instead of individual pods - Maintain fallback to pod-level analysis if workload analysis fails
2025-10-01 16:32:12 -03:00
parent 6f5c8b0cac
commit 4721a1ef37
3 changed files with 280 additions and 11 deletions
--- a/app/services/validation_service.py
+++ b/app/services/validation_service.py
@@ -64,6 +64,39 @@ class ValidationService:
            logger.warning(f"Error in historical analysis for pod {pod.name}: {e}")
        
        return static_validations
+
+    async def validate_workload_resources_with_historical_analysis(
+        self, 
+        pods: List[PodResource], 
+        time_range: str = '24h'
+    ) -> List[ResourceValidation]:
+        """Validate workload resources including historical analysis (recommended approach)"""
+        all_validations = []
+        
+        # Static validations for all pods
+        for pod in pods:
+            static_validations = self.validate_pod_resources(pod)
+            all_validations.extend(static_validations)
+        
+        # Historical analysis by workload (more reliable than individual pods)
+        try:
+            historical_validations = await self.historical_analysis.analyze_workload_historical_usage(
+                pods, time_range
+            )
+            all_validations.extend(historical_validations)
+        except Exception as e:
+            logger.warning(f"Error in workload historical analysis: {e}")
+            # Fallback to individual pod analysis
+            for pod in pods:
+                try:
+                    pod_historical = await self.historical_analysis.analyze_pod_historical_usage(
+                        pod, time_range
+                    )
+                    all_validations.extend(pod_historical)
+                except Exception as pod_e:
+                    logger.warning(f"Error in historical analysis for pod {pod.name}: {pod_e}")
+        
+        return all_validations
    
    def _validate_container_resources(
        self,