feat: implement real cluster analysis with Kubernetes API data

2025-10-06 15:01:56 -03:00
parent a4630b786e
commit e66c29008a
2 changed files with 103 additions and 21 deletions
--- a/app/tasks/cluster_analysis.py
+++ b/app/tasks/cluster_analysis.py
@@ -26,45 +26,97 @@ def analyze_cluster(self, cluster_config=None):
        # Update task state
        self.update_state(
            state='PROGRESS',
-            meta={'current': 0, 'total': 3, 'status': 'Starting cluster analysis...'}
+            meta={'current': 0, 'total': 5, 'status': 'Starting cluster analysis...'}
        )
-        # Step 1: Simple test
+        # Step 1: Initialize clients
        self.update_state(
            state='PROGRESS',
-            meta={'current': 1, 'total': 3, 'status': 'Testing basic functionality...'}
+            meta={'current': 1, 'total': 5, 'status': 'Initializing Kubernetes client...'}
        )
-        # Simple test without complex clients
+        k8s_client = K8sClient()
-        logger.info("Starting simple cluster analysis test")
+        logger.info("Starting real cluster analysis")
-        # Step 2: Return simple results
+        # Step 2: Get cluster info
        self.update_state(
            state='PROGRESS',
-            meta={'current': 2, 'total': 3, 'status': 'Generating results...'}
+            meta={'current': 2, 'total': 5, 'status': 'Analyzing cluster resources...'}
        )
-        # Simple results without complex operations
+        # Get real cluster data
        namespaces = k8s_client.list_namespaces()
        pods = k8s_client.list_pods()
        nodes = k8s_client.list_nodes()
        # Step 3: Analyze workloads
        self.update_state(
            state='PROGRESS',
            meta={'current': 3, 'total': 5, 'status': 'Analyzing workloads...'}
        )
        # Count workloads by type
        workload_counts = {}
        for pod in pods:
            workload_type = pod.metadata.labels.get('app.kubernetes.io/name', 'unknown')
            workload_counts[workload_type] = workload_counts.get(workload_type, 0) + 1
        # Step 4: Get resource utilization
        self.update_state(
            state='PROGRESS',
            meta={'current': 4, 'total': 5, 'status': 'Calculating resource utilization...'}
        )
        # Calculate resource requests and limits
        total_cpu_requests = 0
        total_memory_requests = 0
        total_cpu_limits = 0
        total_memory_limits = 0
        for pod in pods:
            for container in pod.spec.containers:
                if container.resources and container.resources.requests:
                    if 'cpu' in container.resources.requests:
                        total_cpu_requests += _parse_cpu_value(container.resources.requests['cpu'])
                    if 'memory' in container.resources.requests:
                        total_memory_requests += _parse_memory_value(container.resources.requests['memory'])
                if container.resources and container.resources.limits:
                    if 'cpu' in container.resources.limits:
                        total_cpu_limits += _parse_cpu_value(container.resources.limits['cpu'])
                    if 'memory' in container.resources.limits:
                        total_memory_limits += _parse_memory_value(container.resources.limits['memory'])
        # Step 5: Generate results
        self.update_state(
            state='PROGRESS',
            meta={'current': 5, 'total': 5, 'status': 'Generating analysis results...'}
        )
        # Real analysis results
        results = {
            'cluster_info': {
-                'total_namespaces': 5,
+                'total_namespaces': len(namespaces),
-                'total_pods': 20,
+                'total_pods': len(pods),
-                'total_nodes': 3,
+                'total_nodes': len(nodes),
                'workload_types': len(workload_counts)
            },
            'resource_summary': {
                'cpu_requests': total_cpu_requests,
                'memory_requests': total_memory_requests,
                'cpu_limits': total_cpu_limits,
                'memory_limits': total_memory_limits
            },
            'workload_breakdown': workload_counts,
            'summary': {
-                'total_errors': 2,
+                'total_errors': 0,  # Will be calculated by validation service
-                'total_warnings': 5,
+                'total_warnings': 0,  # Will be calculated by validation service
-                'total_info': 10,
+                'total_info': len(pods),
            },
            'status': 'completed'
        }
-        self.update_state(
+        logger.info(f"Real cluster analysis completed successfully. Found {len(namespaces)} namespaces, {len(pods)} pods, {len(nodes)} nodes")
            state='PROGRESS',
            meta={'current': 3, 'total': 3, 'status': 'Analysis completed successfully'}
        )
        logger.info(f"Simple cluster analysis completed successfully. Found {results['summary']['total_errors']} errors, {results['summary']['total_warnings']} warnings")
        return results
@@ -78,6 +130,32 @@ def analyze_cluster(self, cluster_config=None):
            'summary': {'total_errors': 0, 'total_warnings': 0, 'total_info': 0}
        }
 def _parse_cpu_value(cpu_str):
    """Parse CPU value from string to float (cores)"""
    if cpu_str.endswith('m'):
        return float(cpu_str[:-1]) / 1000
    elif cpu_str.endswith('n'):
        return float(cpu_str[:-1]) / 1000000000
    else:
        return float(cpu_str)
 def _parse_memory_value(memory_str):
    """Parse memory value from string to float (bytes)"""
    if memory_str.endswith('Ki'):
        return float(memory_str[:-2]) * 1024
    elif memory_str.endswith('Mi'):
        return float(memory_str[:-2]) * 1024 * 1024
    elif memory_str.endswith('Gi'):
        return float(memory_str[:-2]) * 1024 * 1024 * 1024
    elif memory_str.endswith('K'):
        return float(memory_str[:-1]) * 1000
    elif memory_str.endswith('M'):
        return float(memory_str[:-1]) * 1000 * 1000
    elif memory_str.endswith('G'):
        return float(memory_str[:-1]) * 1000 * 1000 * 1000
    else:
        return float(memory_str)
@celery_app.task(name='app.tasks.cluster_analysis.health_check')
 def health_check():
    """
--- a/scripts/deploy-complete.sh
+++ b/scripts/deploy-complete.sh
@@ -103,7 +103,11 @@ oc apply -f k8s/service.yaml
 # Create Route (let OpenShift generate host automatically)
 echo -e "${YELLOW}Creating Route...${NC}"
-oc expose service resource-governance-service -n $NAMESPACE --name=resource-governance-route --path=/
+if oc get route resource-governance-route -n $NAMESPACE > /dev/null 2>&1; then
    echo -e "${YELLOW}Route already exists, skipping creation${NC}"
 else
    oc expose service resource-governance-service -n $NAMESPACE --name=resource-governance-route --path=/
 fi
 # Configure TLS for the route
 echo -e "${YELLOW}Configuring TLS for Route...${NC}"