fix: correct KubernetesClient import to K8sClient in Celery tasks

This commit is contained in:
2025-10-06 10:40:20 -03:00
parent 5c5afc85ac
commit bf06ae190a
17 changed files with 1233 additions and 0 deletions

3
app/tasks/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
"""
Celery tasks package for background processing.
"""

View File

@@ -0,0 +1,189 @@
"""
Celery tasks for cluster analysis.
"""
from celery import current_task
from app.celery_app import celery_app
from app.core.kubernetes_client import K8sClient
from app.core.prometheus_client import PrometheusClient
from app.services.validation_service import ValidationService
import logging
logger = logging.getLogger(__name__)
@celery_app.task(bind=True, name='app.tasks.cluster_analysis.analyze_cluster')
def analyze_cluster(self, cluster_config=None):
"""
Analyze cluster resources and generate recommendations.
Args:
cluster_config: Cluster configuration dict
Returns:
dict: Analysis results
"""
try:
# Update task state
self.update_state(
state='PROGRESS',
meta={'current': 0, 'total': 5, 'status': 'Starting cluster analysis...'}
)
# Step 1: Initialize clients
self.update_state(
state='PROGRESS',
meta={'current': 1, 'total': 5, 'status': 'Connecting to Kubernetes API...'}
)
k8s_client = K8sClient()
prometheus_client = PrometheusClient()
validation_service = ValidationService()
# Step 2: Discover cluster resources
self.update_state(
state='PROGRESS',
meta={'current': 2, 'total': 5, 'status': 'Discovering cluster resources...'}
)
# Get cluster resources
namespaces = k8s_client.get_namespaces()
pods = k8s_client.get_pods()
nodes = k8s_client.get_nodes()
logger.info(f"Discovered {len(namespaces)} namespaces, {len(pods)} pods, {len(nodes)} nodes")
# Step 3: Analyze resource configurations
self.update_state(
state='PROGRESS',
meta={'current': 3, 'total': 5, 'status': 'Analyzing resource configurations...'}
)
# Validate resource configurations
validations = validation_service.validate_cluster_resources(pods)
# Step 4: Query Prometheus metrics
self.update_state(
state='PROGRESS',
meta={'current': 4, 'total': 5, 'status': 'Querying Prometheus metrics...'}
)
# Get cluster overcommit data
overcommit_data = prometheus_client.get_cluster_overcommit()
# Step 5: Generate recommendations
self.update_state(
state='PROGRESS',
meta={'current': 5, 'total': 5, 'status': 'Generating recommendations...'}
)
# Prepare results
results = {
'cluster_info': {
'total_namespaces': len(namespaces),
'total_pods': len(pods),
'total_nodes': len(nodes),
},
'validations': validations,
'overcommit': overcommit_data,
'summary': {
'total_errors': len([v for v in validations if v.get('severity') == 'error']),
'total_warnings': len([v for v in validations if v.get('severity') == 'warning']),
'total_info': len([v for v in validations if v.get('severity') == 'info']),
}
}
logger.info(f"Cluster analysis completed successfully. Found {results['summary']['total_errors']} errors, {results['summary']['total_warnings']} warnings")
return results
except Exception as exc:
logger.error(f"Cluster analysis failed: {str(exc)}")
self.update_state(
state='FAILURE',
meta={'error': str(exc), 'status': 'Analysis failed'}
)
raise exc
@celery_app.task(name='app.tasks.cluster_analysis.health_check')
def health_check():
"""
Health check task for monitoring.
Returns:
dict: Health status
"""
try:
k8s_client = K8sClient()
# Simple health check - try to get namespaces
namespaces = k8s_client.get_namespaces()
return {
'status': 'healthy',
'namespaces_count': len(namespaces),
'timestamp': '2024-01-04T10:00:00Z'
}
except Exception as exc:
logger.error(f"Health check failed: {str(exc)}")
return {
'status': 'unhealthy',
'error': str(exc),
'timestamp': '2024-01-04T10:00:00Z'
}
@celery_app.task(bind=True, name='app.tasks.cluster_analysis.analyze_namespace')
def analyze_namespace(self, namespace):
"""
Analyze specific namespace resources.
Args:
namespace: Namespace name
Returns:
dict: Namespace analysis results
"""
try:
self.update_state(
state='PROGRESS',
meta={'current': 0, 'total': 3, 'status': f'Analyzing namespace {namespace}...'}
)
k8s_client = K8sClient()
validation_service = ValidationService()
# Get namespace pods
self.update_state(
state='PROGRESS',
meta={'current': 1, 'total': 3, 'status': f'Getting pods in namespace {namespace}...'}
)
pods = k8s_client.get_pods(namespace=namespace)
# Validate resources
self.update_state(
state='PROGRESS',
meta={'current': 2, 'total': 3, 'status': f'Validating resources in namespace {namespace}...'}
)
validations = validation_service.validate_cluster_resources(pods)
# Prepare results
results = {
'namespace': namespace,
'pods_count': len(pods),
'validations': validations,
'summary': {
'total_errors': len([v for v in validations if v.get('severity') == 'error']),
'total_warnings': len([v for v in validations if v.get('severity') == 'warning']),
}
}
logger.info(f"Namespace {namespace} analysis completed. Found {results['summary']['total_errors']} errors, {results['summary']['total_warnings']} warnings")
return results
except Exception as exc:
logger.error(f"Namespace {namespace} analysis failed: {str(exc)}")
self.update_state(
state='FAILURE',
meta={'error': str(exc), 'status': f'Namespace {namespace} analysis failed'}
)
raise exc

View File

@@ -0,0 +1,218 @@
"""
Celery tasks for Prometheus queries.
"""
from celery import current_task
from app.celery_app import celery_app
from app.core.prometheus_client import PrometheusClient
from app.services.historical_analysis import HistoricalAnalysisService
import logging
logger = logging.getLogger(__name__)
@celery_app.task(bind=True, name='app.tasks.prometheus_queries.query_historical_data')
def query_historical_data(self, namespace, workload, time_range='24h'):
"""
Query historical data for a specific workload.
Args:
namespace: Namespace name
workload: Workload name
time_range: Time range for analysis
Returns:
dict: Historical analysis results
"""
try:
# Update task state
self.update_state(
state='PROGRESS',
meta={'current': 0, 'total': 4, 'status': f'Starting historical analysis for {namespace}/{workload}...'}
)
prometheus_client = PrometheusClient()
historical_service = HistoricalAnalysisService()
# Step 1: Query CPU metrics
self.update_state(
state='PROGRESS',
meta={'current': 1, 'total': 4, 'status': f'Querying CPU metrics for {namespace}/{workload}...'}
)
cpu_data = historical_service.get_workload_cpu_metrics(namespace, workload, time_range)
# Step 2: Query Memory metrics
self.update_state(
state='PROGRESS',
meta={'current': 2, 'total': 4, 'status': f'Querying Memory metrics for {namespace}/{workload}...'}
)
memory_data = historical_service.get_workload_memory_metrics(namespace, workload, time_range)
# Step 3: Analyze patterns
self.update_state(
state='PROGRESS',
meta={'current': 3, 'total': 4, 'status': f'Analyzing usage patterns for {namespace}/{workload}...'}
)
analysis = historical_service.analyze_workload_patterns(cpu_data, memory_data)
# Step 4: Generate recommendations
self.update_state(
state='PROGRESS',
meta={'current': 4, 'total': 4, 'status': f'Generating recommendations for {namespace}/{workload}...'}
)
recommendations = historical_service.generate_recommendations(analysis)
results = {
'namespace': namespace,
'workload': workload,
'time_range': time_range,
'cpu_data': cpu_data,
'memory_data': memory_data,
'analysis': analysis,
'recommendations': recommendations
}
logger.info(f"Historical analysis completed for {namespace}/{workload}")
return results
except Exception as exc:
logger.error(f"Historical analysis failed for {namespace}/{workload}: {str(exc)}")
self.update_state(
state='FAILURE',
meta={'error': str(exc), 'status': f'Historical analysis failed for {namespace}/{workload}'}
)
raise exc
@celery_app.task(bind=True, name='app.tasks.prometheus_queries.query_cluster_metrics')
def query_cluster_metrics(self):
"""
Query cluster-wide metrics from Prometheus.
Returns:
dict: Cluster metrics
"""
try:
self.update_state(
state='PROGRESS',
meta={'current': 0, 'total': 3, 'status': 'Querying cluster metrics...'}
)
prometheus_client = PrometheusClient()
# Step 1: Query CPU metrics
self.update_state(
state='PROGRESS',
meta={'current': 1, 'total': 3, 'status': 'Querying CPU cluster metrics...'}
)
cpu_metrics = prometheus_client.query_cluster_cpu_metrics()
# Step 2: Query Memory metrics
self.update_state(
state='PROGRESS',
meta={'current': 2, 'total': 3, 'status': 'Querying Memory cluster metrics...'}
)
memory_metrics = prometheus_client.query_cluster_memory_metrics()
# Step 3: Query overcommit data
self.update_state(
state='PROGRESS',
meta={'current': 3, 'total': 3, 'status': 'Querying overcommit metrics...'}
)
overcommit_data = prometheus_client.get_cluster_overcommit()
results = {
'cpu_metrics': cpu_metrics,
'memory_metrics': memory_metrics,
'overcommit': overcommit_data,
'timestamp': '2024-01-04T10:00:00Z'
}
logger.info("Cluster metrics query completed successfully")
return results
except Exception as exc:
logger.error(f"Cluster metrics query failed: {str(exc)}")
self.update_state(
state='FAILURE',
meta={'error': str(exc), 'status': 'Cluster metrics query failed'}
)
raise exc
@celery_app.task(bind=True, name='app.tasks.prometheus_queries.batch_query_workloads')
def batch_query_workloads(self, workloads):
"""
Batch query multiple workloads for efficiency.
Args:
workloads: List of workload dicts with namespace and workload name
Returns:
dict: Batch query results
"""
try:
total_workloads = len(workloads)
self.update_state(
state='PROGRESS',
meta={'current': 0, 'total': total_workloads, 'status': f'Starting batch query for {total_workloads} workloads...'}
)
prometheus_client = PrometheusClient()
historical_service = HistoricalAnalysisService()
results = []
for i, workload in enumerate(workloads):
namespace = workload['namespace']
workload_name = workload['workload']
self.update_state(
state='PROGRESS',
meta={'current': i + 1, 'total': total_workloads, 'status': f'Querying {namespace}/{workload_name}...'}
)
try:
# Query workload metrics
cpu_data = historical_service.get_workload_cpu_metrics(namespace, workload_name, '24h')
memory_data = historical_service.get_workload_memory_metrics(namespace, workload_name, '24h')
results.append({
'namespace': namespace,
'workload': workload_name,
'cpu_data': cpu_data,
'memory_data': memory_data,
'status': 'success'
})
except Exception as exc:
logger.warning(f"Failed to query {namespace}/{workload_name}: {str(exc)}")
results.append({
'namespace': namespace,
'workload': workload_name,
'error': str(exc),
'status': 'failed'
})
logger.info(f"Batch query completed for {total_workloads} workloads")
return {
'total_workloads': total_workloads,
'successful': len([r for r in results if r['status'] == 'success']),
'failed': len([r for r in results if r['status'] == 'failed']),
'results': results
}
except Exception as exc:
logger.error(f"Batch query failed: {str(exc)}")
self.update_state(
state='FAILURE',
meta={'error': str(exc), 'status': 'Batch query failed'}
)
raise exc

View File

@@ -0,0 +1,260 @@
"""
Celery tasks for generating recommendations.
"""
from celery import current_task
from app.celery_app import celery_app
from app.services.validation_service import ValidationService
from app.services.historical_analysis import HistoricalAnalysisService
import logging
logger = logging.getLogger(__name__)
@celery_app.task(bind=True, name='app.tasks.recommendations.generate_smart_recommendations')
def generate_smart_recommendations(self, cluster_data):
"""
Generate smart recommendations based on cluster analysis.
Args:
cluster_data: Cluster analysis data
Returns:
dict: Smart recommendations
"""
try:
self.update_state(
state='PROGRESS',
meta={'current': 0, 'total': 4, 'status': 'Starting smart recommendations generation...'}
)
validation_service = ValidationService()
historical_service = HistoricalAnalysisService()
# Step 1: Analyze resource configurations
self.update_state(
state='PROGRESS',
meta={'current': 1, 'total': 4, 'status': 'Analyzing resource configurations...'}
)
resource_recommendations = validation_service.generate_resource_recommendations(cluster_data.get('validations', []))
# Step 2: Analyze historical patterns
self.update_state(
state='PROGRESS',
meta={'current': 2, 'total': 4, 'status': 'Analyzing historical patterns...'}
)
historical_recommendations = historical_service.generate_historical_recommendations(cluster_data)
# Step 3: Generate VPA recommendations
self.update_state(
state='PROGRESS',
meta={'current': 3, 'total': 4, 'status': 'Generating VPA recommendations...'}
)
vpa_recommendations = validation_service.generate_vpa_recommendations(cluster_data)
# Step 4: Prioritize recommendations
self.update_state(
state='PROGRESS',
meta={'current': 4, 'total': 4, 'status': 'Prioritizing recommendations...'}
)
all_recommendations = resource_recommendations + historical_recommendations + vpa_recommendations
# Sort by priority
priority_order = {'critical': 1, 'high': 2, 'medium': 3, 'low': 4}
all_recommendations.sort(key=lambda x: priority_order.get(x.get('priority', 'low'), 4))
results = {
'total_recommendations': len(all_recommendations),
'by_priority': {
'critical': len([r for r in all_recommendations if r.get('priority') == 'critical']),
'high': len([r for r in all_recommendations if r.get('priority') == 'high']),
'medium': len([r for r in all_recommendations if r.get('priority') == 'medium']),
'low': len([r for r in all_recommendations if r.get('priority') == 'low']),
},
'recommendations': all_recommendations,
'summary': {
'resource_config': len(resource_recommendations),
'historical_analysis': len(historical_recommendations),
'vpa_activation': len(vpa_recommendations),
}
}
logger.info(f"Generated {len(all_recommendations)} smart recommendations")
return results
except Exception as exc:
logger.error(f"Smart recommendations generation failed: {str(exc)}")
self.update_state(
state='FAILURE',
meta={'error': str(exc), 'status': 'Smart recommendations generation failed'}
)
raise exc
@celery_app.task(bind=True, name='app.tasks.recommendations.generate_namespace_recommendations')
def generate_namespace_recommendations(self, namespace, namespace_data):
"""
Generate recommendations for a specific namespace.
Args:
namespace: Namespace name
namespace_data: Namespace analysis data
Returns:
dict: Namespace recommendations
"""
try:
self.update_state(
state='PROGRESS',
meta={'current': 0, 'total': 3, 'status': f'Generating recommendations for namespace {namespace}...'}
)
validation_service = ValidationService()
# Step 1: Analyze namespace validations
self.update_state(
state='PROGRESS',
meta={'current': 1, 'total': 3, 'status': f'Analyzing validations for namespace {namespace}...'}
)
validations = namespace_data.get('validations', [])
resource_recommendations = validation_service.generate_resource_recommendations(validations)
# Step 2: Generate namespace-specific recommendations
self.update_state(
state='PROGRESS',
meta={'current': 2, 'total': 3, 'status': f'Generating namespace-specific recommendations for {namespace}...'}
)
namespace_recommendations = validation_service.generate_namespace_recommendations(namespace, namespace_data)
# Step 3: Prioritize and format recommendations
self.update_state(
state='PROGRESS',
meta={'current': 3, 'total': 3, 'status': f'Prioritizing recommendations for namespace {namespace}...'}
)
all_recommendations = resource_recommendations + namespace_recommendations
# Add namespace context to recommendations
for rec in all_recommendations:
rec['namespace'] = namespace
rec['context'] = f"Namespace: {namespace}"
results = {
'namespace': namespace,
'total_recommendations': len(all_recommendations),
'recommendations': all_recommendations,
'summary': {
'errors': len([v for v in validations if v.get('severity') == 'error']),
'warnings': len([v for v in validations if v.get('severity') == 'warning']),
'pods_analyzed': namespace_data.get('pods_count', 0),
}
}
logger.info(f"Generated {len(all_recommendations)} recommendations for namespace {namespace}")
return results
except Exception as exc:
logger.error(f"Namespace recommendations generation failed for {namespace}: {str(exc)}")
self.update_state(
state='FAILURE',
meta={'error': str(exc), 'status': f'Namespace recommendations generation failed for {namespace}'}
)
raise exc
@celery_app.task(bind=True, name='app.tasks.recommendations.generate_export_report')
def generate_export_report(self, cluster_data, format='json'):
"""
Generate export report in specified format.
Args:
cluster_data: Cluster analysis data
format: Export format (json, csv, pdf)
Returns:
dict: Export report data
"""
try:
self.update_state(
state='PROGRESS',
meta={'current': 0, 'total': 3, 'status': f'Generating {format.upper()} export report...'}
)
# Step 1: Prepare data
self.update_state(
state='PROGRESS',
meta={'current': 1, 'total': 3, 'status': 'Preparing export data...'}
)
export_data = {
'timestamp': '2024-01-04T10:00:00Z',
'cluster_info': cluster_data.get('cluster_info', {}),
'validations': cluster_data.get('validations', []),
'overcommit': cluster_data.get('overcommit', {}),
'summary': cluster_data.get('summary', {}),
}
# Step 2: Generate recommendations
self.update_state(
state='PROGRESS',
meta={'current': 2, 'total': 3, 'status': 'Generating recommendations for export...'}
)
recommendations_task = generate_smart_recommendations.delay(cluster_data)
recommendations = recommendations_task.get()
export_data['recommendations'] = recommendations.get('recommendations', [])
# Step 3: Format export
self.update_state(
state='PROGRESS',
meta={'current': 3, 'total': 3, 'status': f'Formatting {format.upper()} export...'}
)
if format == 'csv':
# Convert to CSV format
csv_data = convert_to_csv(export_data)
export_data['csv_data'] = csv_data
elif format == 'pdf':
# Convert to PDF format
pdf_data = convert_to_pdf(export_data)
export_data['pdf_data'] = pdf_data
results = {
'format': format,
'data': export_data,
'size': len(str(export_data)),
'timestamp': '2024-01-04T10:00:00Z'
}
logger.info(f"Generated {format.upper()} export report successfully")
return results
except Exception as exc:
logger.error(f"Export report generation failed: {str(exc)}")
self.update_state(
state='FAILURE',
meta={'error': str(exc), 'status': f'Export report generation failed'}
)
raise exc
def convert_to_csv(data):
"""Convert data to CSV format."""
# Simple CSV conversion - in real implementation, use pandas or csv module
return "namespace,workload,severity,message,recommendation\n" + \
"\n".join([f"{v.get('namespace', '')},{v.get('workload', '')},{v.get('severity', '')},{v.get('message', '')},{v.get('recommendation', '')}"
for v in data.get('validations', [])])
def convert_to_pdf(data):
"""Convert data to PDF format."""
# Simple PDF conversion - in real implementation, use reportlab
return f"PDF Report for Cluster Analysis\n\n" + \
f"Total Namespaces: {data.get('cluster_info', {}).get('total_namespaces', 0)}\n" + \
f"Total Pods: {data.get('cluster_info', {}).get('total_pods', 0)}\n" + \
f"Total Errors: {data.get('summary', {}).get('total_errors', 0)}\n" + \
f"Total Warnings: {data.get('summary', {}).get('total_warnings', 0)}\n"