feat: implement batch processing for large clusters (100 pods per batch) with memory optimization and progress tracking

This commit is contained in:
2025-10-15 16:22:40 -03:00
parent 4c6ce49526
commit 93a7a0988a
7 changed files with 718 additions and 0 deletions

View File

@@ -1738,6 +1738,199 @@ async def health_check():
"version": "1.0.0"
}
# ============================================================================
# BATCH PROCESSING ENDPOINTS - For Large Clusters (10,000+ pods)
# ============================================================================
@api_router.get("/batch/statistics")
async def get_batch_statistics(
k8s_client=Depends(get_k8s_client)
):
"""Get batch processing statistics for the cluster"""
try:
from app.tasks.batch_analysis import get_batch_statistics
# Start the task
task = get_batch_statistics.delay()
return {
"task_id": task.id,
"status": "started",
"message": "Batch statistics calculation started",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Error starting batch statistics: {e}")
raise HTTPException(status_code=500, detail=str(e))
@api_router.get("/batch/statistics/{task_id}")
async def get_batch_statistics_result(task_id: str):
"""Get batch statistics result"""
try:
from app.tasks.batch_analysis import get_batch_statistics
# Get task result
task = get_batch_statistics.AsyncResult(task_id)
if task.state == 'PENDING':
return {
"task_id": task_id,
"status": "pending",
"message": "Task is still processing..."
}
elif task.state == 'PROGRESS':
return {
"task_id": task_id,
"status": "progress",
"message": "Task is in progress...",
"meta": task.info
}
elif task.state == 'SUCCESS':
return {
"task_id": task_id,
"status": "success",
"result": task.result
}
else:
return {
"task_id": task_id,
"status": "error",
"error": str(task.info)
}
except Exception as e:
logger.error(f"Error getting batch statistics result: {e}")
raise HTTPException(status_code=500, detail=str(e))
@api_router.post("/batch/process")
async def start_batch_processing(
namespace: Optional[str] = None,
include_system_namespaces: bool = False,
batch_size: int = 100
):
"""Start batch processing for large clusters"""
try:
from app.tasks.batch_analysis import process_cluster_batch
# Validate batch size
if batch_size < 10 or batch_size > 500:
raise HTTPException(
status_code=400,
detail="Batch size must be between 10 and 500"
)
# Start the task
task = process_cluster_batch.delay({
'namespace': namespace,
'include_system_namespaces': include_system_namespaces,
'batch_size': batch_size
})
return {
"task_id": task.id,
"status": "started",
"message": f"Batch processing started with batch size {batch_size}",
"namespace": namespace,
"include_system_namespaces": include_system_namespaces,
"timestamp": datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Error starting batch processing: {e}")
raise HTTPException(status_code=500, detail=str(e))
@api_router.get("/batch/process/{task_id}")
async def get_batch_processing_result(task_id: str):
"""Get batch processing result"""
try:
from app.tasks.batch_analysis import process_cluster_batch
# Get task result
task = process_cluster_batch.AsyncResult(task_id)
if task.state == 'PENDING':
return {
"task_id": task_id,
"status": "pending",
"message": "Task is still processing..."
}
elif task.state == 'PROGRESS':
return {
"task_id": task_id,
"status": "progress",
"message": "Task is in progress...",
"meta": task.info
}
elif task.state == 'SUCCESS':
return {
"task_id": task_id,
"status": "success",
"result": task.result
}
else:
return {
"task_id": task_id,
"status": "error",
"error": str(task.info)
}
except Exception as e:
logger.error(f"Error getting batch processing result: {e}")
raise HTTPException(status_code=500, detail=str(e))
@api_router.get("/batch/validations")
async def get_batch_validations(
namespace: Optional[str] = None,
severity: Optional[str] = None,
page: int = 1,
page_size: int = 50,
include_system_namespaces: bool = False,
k8s_client=Depends(get_k8s_client)
):
"""Get validations using batch processing for large clusters"""
try:
from app.services.batch_processing import batch_processing_service
# Get all validations using batch processing
all_validations = []
async for batch_result in batch_processing_service.process_cluster_in_batches(
k8s_client,
namespace=namespace,
include_system_namespaces=include_system_namespaces
):
all_validations.extend(batch_result.validations)
# Filter by severity if specified
if severity:
all_validations = [
v for v in all_validations if v.get('severity') == severity
]
# Pagination
total = len(all_validations)
start = (page - 1) * page_size
end = start + page_size
paginated_validations = all_validations[start:end]
return {
"validations": paginated_validations,
"pagination": {
"page": page,
"page_size": page_size,
"total": total,
"total_pages": (total + page_size - 1) // page_size
},
"processing_method": "batch",
"batch_size": batch_processing_service.batch_size,
"timestamp": datetime.now().isoformat()
}
except Exception as e:
logger.error(f"Error getting batch validations: {e}")
raise HTTPException(status_code=500, detail=str(e))
# ============================================================================
# OPTIMIZED ENDPOINTS - 10x Performance Improvement
# ============================================================================