diff --git a/.github/workflows/openshift-deploy.yml b/.github/workflows/openshift-deploy.yml new file mode 100644 index 0000000..2e219e9 --- /dev/null +++ b/.github/workflows/openshift-deploy.yml @@ -0,0 +1,94 @@ +# DISABLED: This workflow is disabled because it requires access to internal OpenShift clusters +# Use build-only.yml for public clusters and deploy-to-cluster.sh for local deployment +# name: Deploy to OpenShift (DISABLED) + +# This workflow is completely disabled +# on: +# workflow_dispatch: + +# env: +# IMAGE_NAME: resource-governance +# REGISTRY: andersonid +# NAMESPACE: resource-governance + +# jobs: +# build-and-deploy: +# runs-on: ubuntu-latest +# timeout-minutes: 30 +# +# steps: +# - name: Checkout code +# uses: actions/checkout@v4 +# +# - name: Set up Python +# uses: actions/setup-python@v4 +# with: +# python-version: '3.11' +# +# - name: Run basic syntax check +# run: | +# python -m py_compile app/main.py +# echo "✅ Syntax check passed" +# +# - name: Set up Podman +# run: | +# sudo apt-get update -qq +# sudo apt-get install -y -qq podman buildah skopeo +# +# - name: Login to Docker Hub +# run: | +# echo "${{ secrets.DOCKERHUB_TOKEN }}" | podman login docker.io -u ${{ secrets.DOCKERHUB_USERNAME }} --password-stdin +# +# - name: Build and push image with Podman +# run: | +# # Build da imagem com cache +# podman build --layers -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} . +# +# # Tag como latest +# podman tag ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest +# +# # Push das imagens +# podman push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} +# podman push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest +# +# - name: Install OpenShift CLI +# run: | +# curl -L https://mirror.openshift.com/pub/openshift-v4/clients/oc/latest/linux/oc.tar.gz | tar -xz -C /usr/local/bin/ +# chmod +x /usr/local/bin/oc +# +# - name: Deploy to OpenShift +# if: github.ref == 'refs/heads/main' +# run: | +# # Login to OpenShift +# oc login ${{ secrets.OPENSHIFT_SERVER }} --token="${{ secrets.OPENSHIFT_TOKEN }}" +# +# # Apply manifests (namespace, rbac, configmap) +# oc apply -f k8s/namespace.yaml +# oc apply -f k8s/rbac.yaml +# oc apply -f k8s/configmap.yaml +# +# # Update deployment with new image +# oc set image deployment/${{ env.IMAGE_NAME }} ${{ env.IMAGE_NAME }}=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} -n ${{ env.NAMESPACE }} || true +# +# # Apply deployment, service and route +# oc apply -f k8s/deployment.yaml +# oc apply -f k8s/service.yaml +# oc apply -f k8s/route.yaml +# +# # Wait for rollout +# oc rollout status deployment/${{ env.IMAGE_NAME }} -n ${{ env.NAMESPACE }} --timeout=300s +# +# # Verify deployment +# oc get deployment ${{ env.IMAGE_NAME }} -n ${{ env.NAMESPACE }} +# oc get pods -n ${{ env.NAMESPACE }} -l app.kubernetes.io/name=${{ env.IMAGE_NAME }} +# +# # Get route URL +# ROUTE_URL=$(oc get route ${{ env.IMAGE_NAME }}-route -n ${{ env.NAMESPACE }} -o jsonpath='{.spec.host}' 2>/dev/null || echo "") +# if [ -n "$ROUTE_URL" ]; then +# echo "🚀 Application deployed successfully!" +# echo "🌐 URL: https://$ROUTE_URL" +# echo "📊 Status: oc get pods -n ${{ env.NAMESPACE }} -l app.kubernetes.io/name=${{ env.IMAGE_NAME }}" +# fi +# env: +# OPENSHIFT_SERVER: ${{ secrets.OPENSHIFT_SERVER }} +# OPENSHIFT_TOKEN: ${{ secrets.OPENSHIFT_TOKEN }} \ No newline at end of file diff --git a/app/api/routes.py b/app/api/routes.py index a6e7099..82f7c38 100644 --- a/app/api/routes.py +++ b/app/api/routes.py @@ -55,7 +55,7 @@ async def get_cluster_status( # Obter recomendações VPA vpa_recommendations = await k8s_client.get_vpa_recommendations() - # Gerar relatório + # Generate report report = report_service.generate_cluster_report( pods=pods, validations=all_validations, @@ -67,7 +67,7 @@ async def get_cluster_status( return report except Exception as e: - logger.error(f"Erro ao obter status do cluster: {e}") + logger.error(f"Error getting cluster status: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.get("/namespace/{namespace}/status") @@ -90,7 +90,7 @@ async def get_namespace_status( # Obter uso de recursos do Prometheus resource_usage = await prometheus_client.get_namespace_resource_usage(namespace) - # Gerar relatório do namespace + # Generate report do namespace report = report_service.generate_namespace_report( namespace=namespace, pods=namespace_resources.pods, @@ -101,7 +101,7 @@ async def get_namespace_status( return report except Exception as e: - logger.error(f"Erro ao obter status do namespace {namespace}: {e}") + logger.error(f"Error getting namespace {namespace} status: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.get("/pods") @@ -118,7 +118,7 @@ async def get_pods( return await k8s_client.get_all_pods() except Exception as e: - logger.error(f"Erro ao listar pods: {e}") + logger.error(f"Error listing pods: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.get("/validations") @@ -167,7 +167,7 @@ async def get_validations( } except Exception as e: - logger.error(f"Erro ao obter validações: {e}") + logger.error(f"Error getting validations: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.get("/validations/by-namespace") @@ -235,7 +235,7 @@ async def get_validations_by_namespace( } except Exception as e: - logger.error(f"Erro ao obter validações por namespace: {e}") + logger.error(f"Error getting validations by namespace: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.get("/vpa/recommendations") @@ -255,7 +255,7 @@ async def get_vpa_recommendations( return recommendations except Exception as e: - logger.error(f"Erro ao obter recomendações VPA: {e}") + logger.error(f"Error getting VPA recommendations: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.post("/export") @@ -264,9 +264,9 @@ async def export_report( k8s_client=Depends(get_k8s_client), prometheus_client=Depends(get_prometheus_client) ): - """Exportar relatório em diferentes formatos""" + """Export report in different formats""" try: - # Gerar relatório + # Generate report pods = await k8s_client.get_all_pods() nodes_info = await k8s_client.get_nodes_info() @@ -290,7 +290,7 @@ async def export_report( if export_request.include_validations: overcommit_info = await prometheus_client.get_cluster_overcommit() - # Gerar relatório + # Generate report report = report_service.generate_cluster_report( pods=pods, validations=all_validations, @@ -303,29 +303,29 @@ async def export_report( filepath = await report_service.export_report(report, export_request) return { - "message": "Relatório exportado com sucesso", + "message": "Report exported successfully", "filepath": filepath, "format": export_request.format } except Exception as e: - logger.error(f"Erro ao exportar relatório: {e}") + logger.error(f"Error exporting report: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.get("/export/files") async def list_exported_files(): - """Listar arquivos exportados""" + """List exported files""" try: files = report_service.get_exported_reports() return files except Exception as e: - logger.error(f"Erro ao listar arquivos exportados: {e}") + logger.error(f"Error listing exported files: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.get("/export/files/{filename}") async def download_exported_file(filename: str): - """Download de arquivo exportado""" + """Download exported file""" try: files = report_service.get_exported_reports() file_info = next((f for f in files if f["filename"] == filename), None) @@ -340,7 +340,7 @@ async def download_exported_file(filename: str): ) except Exception as e: - logger.error(f"Erro ao baixar arquivo {filename}: {e}") + logger.error(f"Error downloading file {filename}: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.post("/apply/recommendation") @@ -362,10 +362,10 @@ async def apply_recommendation( } else: # Implementar aplicação real da recomendação - raise HTTPException(status_code=501, detail="Aplicação de recomendações não implementada ainda") + raise HTTPException(status_code=501, detail="Recommendation application not implemented yet") except Exception as e: - logger.error(f"Erro ao aplicar recomendação: {e}") + logger.error(f"Error applying recommendation: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.get("/validations/historical") @@ -401,7 +401,7 @@ async def get_historical_validations( } except Exception as e: - logger.error(f"Erro ao obter validações históricas: {e}") + logger.error(f"Error getting historical validations: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.get("/cluster/historical-summary") @@ -420,12 +420,12 @@ async def get_cluster_historical_summary( } except Exception as e: - logger.error(f"Erro ao obter resumo histórico: {e}") + logger.error(f"Error getting historical summary: {e}") raise HTTPException(status_code=500, detail=str(e)) @api_router.get("/health") async def health_check(): - """Health check da API""" + """API health check""" return { "status": "healthy", "service": "resource-governance-api", diff --git a/app/core/config.py b/app/core/config.py index 1617bfe..a96ab41 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -1,5 +1,5 @@ """ -Configurações da aplicação +Application settings """ import os from typing import List, Optional @@ -7,17 +7,17 @@ from pydantic_settings import BaseSettings from pydantic import Field class Settings(BaseSettings): - """Configurações da aplicação""" + """Application settings""" - # Configurações do OpenShift/Kubernetes + # OpenShift/Kubernetes settings kubeconfig_path: Optional[str] = None cluster_url: Optional[str] = None token: Optional[str] = None - # Configurações do Prometheus + # Prometheus settings prometheus_url: str = "http://prometheus.openshift-monitoring.svc.cluster.local:9090" - # Configurações de validação + # Validation settings cpu_limit_ratio: float = 3.0 # Ratio padrão limit:request para CPU memory_limit_ratio: float = 3.0 # Ratio padrão limit:request para memória min_cpu_request: str = "10m" # Mínimo de CPU request @@ -32,7 +32,7 @@ class Settings(BaseSettings): "openshift-sdn" ] - # Configurações de filtro de namespaces + # Namespace filter settings include_system_namespaces: bool = Field(default=False, alias="INCLUDE_SYSTEM_NAMESPACES") system_namespace_prefixes: List[str] = Field( default=[ @@ -50,10 +50,10 @@ class Settings(BaseSettings): env_file = ".env" case_sensitive = False - # Configurações de relatório + # Report settings report_export_path: str = "/tmp/reports" - # Configurações de segurança + # Security settings enable_rbac: bool = True service_account_name: str = "resource-governance-sa" diff --git a/app/core/kubernetes_client.py b/app/core/kubernetes_client.py index 2ba5bc6..6876ade 100644 --- a/app/core/kubernetes_client.py +++ b/app/core/kubernetes_client.py @@ -38,10 +38,10 @@ class K8sClient: self.apps_v1 = client.AppsV1Api() self.initialized = True - logger.info("Cliente Kubernetes inicializado com sucesso") + logger.info("Kubernetes client initialized successfully") except Exception as e: - logger.error(f"Erro ao inicializar cliente Kubernetes: {e}") + logger.error(f"Error initializing Kubernetes client: {e}") raise def _is_system_namespace(self, namespace: str, include_system: bool = None) -> bool: @@ -60,7 +60,7 @@ class K8sClient: async def get_all_pods(self, include_system_namespaces: bool = None) -> List[PodResource]: """Coletar informações de todos os pods do cluster""" if not self.initialized: - raise RuntimeError("Cliente Kubernetes não inicializado") + raise RuntimeError("Kubernetes client not initialized") pods_data = [] @@ -110,13 +110,13 @@ class K8sClient: return pods_data except ApiException as e: - logger.error(f"Erro ao listar pods: {e}") + logger.error(f"Error listing pods: {e}") raise async def get_namespace_resources(self, namespace: str) -> NamespaceResources: """Coletar recursos de um namespace específico""" if not self.initialized: - raise RuntimeError("Cliente Kubernetes não inicializado") + raise RuntimeError("Kubernetes client not initialized") # Verificar se é namespace do sistema if self._is_system_namespace(namespace): @@ -179,13 +179,13 @@ class K8sClient: return namespace_resource except ApiException as e: - logger.error(f"Erro ao coletar recursos do namespace {namespace}: {e}") + logger.error(f"Error collecting resources for namespace {namespace}: {e}") raise async def get_vpa_recommendations(self) -> List[VPARecommendation]: """Coletar recomendações do VPA""" if not self.initialized: - raise RuntimeError("Cliente Kubernetes não inicializado") + raise RuntimeError("Kubernetes client not initialized") recommendations = [] @@ -199,14 +199,14 @@ class K8sClient: return recommendations except ApiException as e: - logger.error(f"Erro ao coletar recomendações VPA: {e}") + logger.error(f"Error collecting VPA recommendations: {e}") # VPA pode não estar instalado, retornar lista vazia return [] async def get_nodes_info(self) -> List[Dict[str, Any]]: """Coletar informações dos nós do cluster""" if not self.initialized: - raise RuntimeError("Cliente Kubernetes não inicializado") + raise RuntimeError("Kubernetes client not initialized") try: nodes = self.v1.list_node() @@ -250,5 +250,5 @@ class K8sClient: return nodes_info except ApiException as e: - logger.error(f"Erro ao coletar informações dos nós: {e}") + logger.error(f"Error collecting node information: {e}") raise diff --git a/app/core/prometheus_client.py b/app/core/prometheus_client.py index 73213b8..d42a0be 100644 --- a/app/core/prometheus_client.py +++ b/app/core/prometheus_client.py @@ -28,19 +28,19 @@ class PrometheusClient: async with self.session.get(f"{self.base_url}/api/v1/query?query=up") as response: if response.status == 200: self.initialized = True - logger.info("Cliente Prometheus inicializado com sucesso") + logger.info("Prometheus client initialized successfully") else: logger.warning(f"Prometheus retornou status {response.status}") except Exception as e: - logger.error(f"Erro ao inicializar cliente Prometheus: {e}") + logger.error(f"Error initializing Prometheus client: {e}") # Prometheus pode não estar disponível, continuar sem ele self.initialized = False async def query(self, query: str, time: Optional[datetime] = None) -> Dict[str, Any]: """Executar query no Prometheus""" if not self.initialized or not self.session: - return {"status": "error", "message": "Prometheus não disponível"} + return {"status": "error", "message": "Prometheus not available"} try: params = {"query": query} @@ -55,11 +55,11 @@ class PrometheusClient: data = await response.json() return data else: - logger.error(f"Erro na query Prometheus: {response.status}") + logger.error(f"Error in Prometheus query: {response.status}") return {"status": "error", "message": f"HTTP {response.status}"} except Exception as e: - logger.error(f"Erro ao executar query Prometheus: {e}") + logger.error(f"Error executing Prometheus query: {e}") return {"status": "error", "message": str(e)} async def get_pod_cpu_usage(self, namespace: str, pod_name: str) -> Dict[str, Any]: diff --git a/app/main.py b/app/main.py index a8937a6..f03e0ee 100644 --- a/app/main.py +++ b/app/main.py @@ -33,9 +33,9 @@ async def lifespan(app: FastAPI): try: await app.state.k8s_client.initialize() await app.state.prometheus_client.initialize() - logger.info("Clientes inicializados com sucesso") + logger.info("Clients initialized successfully") except Exception as e: - logger.error(f"Erro ao inicializar clientes: {e}") + logger.error(f"Error initializing clients: {e}") raise yield diff --git a/app/models/resource_models.py b/app/models/resource_models.py index ec995cc..74a2b1a 100644 --- a/app/models/resource_models.py +++ b/app/models/resource_models.py @@ -45,7 +45,7 @@ class ResourceValidation(BaseModel): recommendation: Optional[str] = None class ClusterReport(BaseModel): - """Relatório do cluster""" + """Cluster report""" timestamp: str total_pods: int total_namespaces: int @@ -56,7 +56,7 @@ class ClusterReport(BaseModel): summary: Dict[str, Any] class NamespaceReport(BaseModel): - """Relatório de um namespace""" + """Namespace report""" namespace: str timestamp: str total_pods: int @@ -65,7 +65,7 @@ class NamespaceReport(BaseModel): recommendations: List[str] class ExportRequest(BaseModel): - """Request para exportar relatório""" + """Request to export report""" format: str # "json", "csv", "pdf" namespaces: Optional[List[str]] = None include_vpa: bool = True diff --git a/app/services/historical_analysis.py b/app/services/historical_analysis.py index a7decad..c610bdf 100644 --- a/app/services/historical_analysis.py +++ b/app/services/historical_analysis.py @@ -54,15 +54,15 @@ class HistoricalAnalysisService: validations.extend(memory_analysis) except Exception as e: - logger.error(f"Erro na análise histórica do pod {pod.name}: {e}") + logger.error(f"Error in historical analysis for pod {pod.name}: {e}") validations.append(ResourceValidation( pod_name=pod.name, namespace=pod.namespace, container_name="all", validation_type="historical_analysis_error", severity="warning", - message=f"Erro na análise histórica: {str(e)}", - recommendation="Verificar conectividade com Prometheus" + message=f"Error in historical analysis: {str(e)}", + recommendation="Check Prometheus connectivity" )) return validations @@ -123,7 +123,7 @@ class HistoricalAnalysisService: validations.extend(analysis) except Exception as e: - logger.warning(f"Erro ao analisar CPU do container {container_name}: {e}") + logger.warning(f"Error analyzing CPU for container {container_name}: {e}") return validations @@ -183,7 +183,7 @@ class HistoricalAnalysisService: validations.extend(analysis) except Exception as e: - logger.warning(f"Erro ao analisar memória do container {container_name}: {e}") + logger.warning(f"Error analyzing memory for container {container_name}: {e}") return validations @@ -218,9 +218,9 @@ class HistoricalAnalysisService: p95_usage = sorted(usage_values)[int(len(usage_values) * 0.95)] p99_usage = sorted(usage_values)[int(len(usage_values) * 0.99)] - # Análise de adequação dos requests + # Request adequacy analysis if current_requests > 0: - # Request muito alto (uso médio < 50% do request) + # Request too high (average usage < 50% of request) if avg_usage < current_requests * 0.5: validations.append(ResourceValidation( pod_name=pod_name, @@ -228,11 +228,11 @@ class HistoricalAnalysisService: container_name=container_name, validation_type="historical_analysis", severity="warning", - message=f"CPU request muito alto: uso médio {avg_usage:.3f} cores vs request {current_requests:.3f} cores", - recommendation=f"Considerar reduzir CPU request para ~{avg_usage * 1.2:.3f} cores (baseado em {time_range} de uso)" + message=f"CPU request too high: average usage {avg_usage:.3f} cores vs request {current_requests:.3f} cores", + recommendation=f"Consider reducing CPU request to ~{avg_usage * 1.2:.3f} cores (based on {time_range} of usage)" )) - # Request muito baixo (uso P95 > 80% do request) + # Request too low (P95 usage > 80% of request) elif p95_usage > current_requests * 0.8: validations.append(ResourceValidation( pod_name=pod_name, @@ -240,13 +240,13 @@ class HistoricalAnalysisService: container_name=container_name, validation_type="historical_analysis", severity="warning", - message=f"CPU request pode ser insuficiente: P95 {p95_usage:.3f} cores vs request {current_requests:.3f} cores", - recommendation=f"Considerar aumentar CPU request para ~{p95_usage * 1.2:.3f} cores (baseado em {time_range} de uso)" + message=f"CPU request may be insufficient: P95 {p95_usage:.3f} cores vs request {current_requests:.3f} cores", + recommendation=f"Consider increasing CPU request to ~{p95_usage * 1.2:.3f} cores (based on {time_range} of usage)" )) - # Análise de adequação dos limits + # Limit adequacy analysis if current_limits > 0: - # Limit muito alto (uso P99 < 50% do limit) + # Limit too high (P99 usage < 50% of limit) if p99_usage < current_limits * 0.5: validations.append(ResourceValidation( pod_name=pod_name, @@ -254,11 +254,11 @@ class HistoricalAnalysisService: container_name=container_name, validation_type="historical_analysis", severity="info", - message=f"CPU limit muito alto: P99 {p99_usage:.3f} cores vs limit {current_limits:.3f} cores", - recommendation=f"Considerar reduzir CPU limit para ~{p99_usage * 1.5:.3f} cores (baseado em {time_range} de uso)" + message=f"CPU limit too high: P99 {p99_usage:.3f} cores vs limit {current_limits:.3f} cores", + recommendation=f"Consider reducing CPU limit to ~{p99_usage * 1.5:.3f} cores (based on {time_range} of usage)" )) - # Limit muito baixo (uso máximo > 90% do limit) + # Limit too low (maximum usage > 90% of limit) elif max_usage > current_limits * 0.9: validations.append(ResourceValidation( pod_name=pod_name, @@ -266,8 +266,8 @@ class HistoricalAnalysisService: container_name=container_name, validation_type="historical_analysis", severity="warning", - message=f"CPU limit pode ser insuficiente: uso máximo {max_usage:.3f} cores vs limit {current_limits:.3f} cores", - recommendation=f"Considerar aumentar CPU limit para ~{max_usage * 1.2:.3f} cores (baseado em {time_range} de uso)" + message=f"CPU limit may be insufficient: maximum usage {max_usage:.3f} cores vs limit {current_limits:.3f} cores", + recommendation=f"Consider increasing CPU limit to ~{max_usage * 1.2:.3f} cores (based on {time_range} of usage)" )) return validations @@ -307,9 +307,9 @@ class HistoricalAnalysisService: def bytes_to_mib(bytes_value): return bytes_value / (1024 * 1024) - # Análise de adequação dos requests + # Request adequacy analysis if current_requests > 0: - # Request muito alto (uso médio < 50% do request) + # Request too high (average usage < 50% of request) if avg_usage < current_requests * 0.5: validations.append(ResourceValidation( pod_name=pod_name, @@ -317,11 +317,11 @@ class HistoricalAnalysisService: container_name=container_name, validation_type="historical_analysis", severity="warning", - message=f"Memória request muito alto: uso médio {bytes_to_mib(avg_usage):.1f}Mi vs request {bytes_to_mib(current_requests):.1f}Mi", - recommendation=f"Considerar reduzir memória request para ~{bytes_to_mib(avg_usage * 1.2):.1f}Mi (baseado em {time_range} de uso)" + message=f"Memory request too high: average usage {bytes_to_mib(avg_usage):.1f}Mi vs request {bytes_to_mib(current_requests):.1f}Mi", + recommendation=f"Consider reducing memory request to ~{bytes_to_mib(avg_usage * 1.2):.1f}Mi (based on {time_range} of usage)" )) - # Request muito baixo (uso P95 > 80% do request) + # Request too low (P95 usage > 80% of request) elif p95_usage > current_requests * 0.8: validations.append(ResourceValidation( pod_name=pod_name, @@ -329,13 +329,13 @@ class HistoricalAnalysisService: container_name=container_name, validation_type="historical_analysis", severity="warning", - message=f"Memória request pode ser insuficiente: P95 {bytes_to_mib(p95_usage):.1f}Mi vs request {bytes_to_mib(current_requests):.1f}Mi", - recommendation=f"Considerar aumentar memória request para ~{bytes_to_mib(p95_usage * 1.2):.1f}Mi (baseado em {time_range} de uso)" + message=f"Memory request may be insufficient: P95 {bytes_to_mib(p95_usage):.1f}Mi vs request {bytes_to_mib(current_requests):.1f}Mi", + recommendation=f"Consider increasing memory request to ~{bytes_to_mib(p95_usage * 1.2):.1f}Mi (based on {time_range} of usage)" )) - # Análise de adequação dos limits + # Limit adequacy analysis if current_limits > 0: - # Limit muito alto (uso P99 < 50% do limit) + # Limit too high (P99 usage < 50% of limit) if p99_usage < current_limits * 0.5: validations.append(ResourceValidation( pod_name=pod_name, @@ -343,11 +343,11 @@ class HistoricalAnalysisService: container_name=container_name, validation_type="historical_analysis", severity="info", - message=f"Memória limit muito alto: P99 {bytes_to_mib(p99_usage):.1f}Mi vs limit {bytes_to_mib(current_limits):.1f}Mi", - recommendation=f"Considerar reduzir memória limit para ~{bytes_to_mib(p99_usage * 1.5):.1f}Mi (baseado em {time_range} de uso)" + message=f"Memory limit too high: P99 {bytes_to_mib(p99_usage):.1f}Mi vs limit {bytes_to_mib(current_limits):.1f}Mi", + recommendation=f"Consider reducing memory limit to ~{bytes_to_mib(p99_usage * 1.5):.1f}Mi (based on {time_range} of usage)" )) - # Limit muito baixo (uso máximo > 90% do limit) + # Limit too low (maximum usage > 90% of limit) elif max_usage > current_limits * 0.9: validations.append(ResourceValidation( pod_name=pod_name, @@ -355,8 +355,8 @@ class HistoricalAnalysisService: container_name=container_name, validation_type="historical_analysis", severity="warning", - message=f"Memória limit pode ser insuficiente: uso máximo {bytes_to_mib(max_usage):.1f}Mi vs limit {bytes_to_mib(current_limits):.1f}Mi", - recommendation=f"Considerar aumentar memória limit para ~{bytes_to_mib(max_usage * 1.2):.1f}Mi (baseado em {time_range} de uso)" + message=f"Memory limit may be insufficient: maximum usage {bytes_to_mib(max_usage):.1f}Mi vs limit {bytes_to_mib(current_limits):.1f}Mi", + recommendation=f"Consider increasing memory limit to ~{bytes_to_mib(max_usage * 1.2):.1f}Mi (based on {time_range} of usage)" )) return validations @@ -385,7 +385,7 @@ class HistoricalAnalysisService: logger.warning(f"Prometheus query failed: {response.status}") return [] except Exception as e: - logger.error(f"Erro ao consultar Prometheus: {e}") + logger.error(f"Error querying Prometheus: {e}") return [] async def get_cluster_historical_summary(self, time_range: str = '24h') -> Dict[str, Any]: @@ -441,5 +441,5 @@ class HistoricalAnalysisService: } except Exception as e: - logger.error(f"Erro ao obter resumo histórico: {e}") + logger.error(f"Error getting historical summary: {e}") return {} diff --git a/app/services/report_service.py b/app/services/report_service.py index 4c9cd57..72beb93 100644 --- a/app/services/report_service.py +++ b/app/services/report_service.py @@ -1,5 +1,5 @@ """ -Serviço de geração de relatórios +Report generation service """ import logging import json @@ -18,7 +18,7 @@ from app.core.config import settings logger = logging.getLogger(__name__) class ReportService: - """Serviço para geração de relatórios""" + """Service for report generation""" def __init__(self): self.export_path = settings.report_export_path @@ -32,12 +32,12 @@ class ReportService: overcommit_info: Dict[str, Any], nodes_info: List[Dict[str, Any]] ) -> ClusterReport: - """Gerar relatório do cluster""" + """Generate cluster report""" - # Contar namespaces únicos + # Count unique namespaces namespaces = set(pod.namespace for pod in pods) - # Gerar resumo + # Generate summary summary = self._generate_summary(validations, vpa_recommendations, overcommit_info) report = ClusterReport( @@ -60,14 +60,14 @@ class ReportService: validations: List[ResourceValidation], resource_usage: Dict[str, Any] ) -> NamespaceReport: - """Gerar relatório de um namespace""" + """Generate namespace report""" - # Filtrar validações do namespace + # Filter validations for the namespace namespace_validations = [ v for v in validations if v.namespace == namespace ] - # Gerar recomendações + # Generate recommendations recommendations = self._generate_namespace_recommendations(namespace_validations) report = NamespaceReport( @@ -87,9 +87,9 @@ class ReportService: vpa_recommendations: List[VPARecommendation], overcommit_info: Dict[str, Any] ) -> Dict[str, Any]: - """Gerar resumo do relatório""" + """Generate report summary""" - # Contar validações por severidade + # Count validations by severity severity_counts = {} for validation in validations: severity = validation.severity @@ -97,7 +97,7 @@ class ReportService: severity_counts[severity] = 0 severity_counts[severity] += 1 - # Contar validações por tipo + # Count validations by type type_counts = {} for validation in validations: validation_type = validation.validation_type @@ -120,10 +120,10 @@ class ReportService: self, validations: List[ResourceValidation] ) -> List[str]: - """Gerar recomendações para um namespace""" + """Generate recommendations for a namespace""" recommendations = [] - # Agrupar por tipo de problema + # Group by problem type problems = {} for validation in validations: problem_type = validation.validation_type @@ -131,18 +131,18 @@ class ReportService: problems[problem_type] = [] problems[problem_type].append(validation) - # Gerar recomendações específicas + # Generate recommendations específicas if "missing_requests" in problems: count = len(problems["missing_requests"]) recommendations.append( - f"Criar LimitRange para definir requests padrão " - f"({count} containers sem requests)" + f"Create LimitRange to define default requests " + f"({count} containers without requests)" ) if "missing_limits" in problems: count = len(problems["missing_limits"]) recommendations.append( - f"Definir limits para {count} containers para evitar consumo excessivo" + f"Define limits for {count} containers to avoid excessive consumption" ) if "invalid_ratio" in problems: @@ -163,7 +163,7 @@ class ReportService: report: ClusterReport, export_request: ExportRequest ) -> str: - """Exportar relatório em diferentes formatos""" + """Export report in different formats""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") @@ -174,10 +174,10 @@ class ReportService: elif export_request.format == "pdf": return await self._export_pdf(report, timestamp) else: - raise ValueError(f"Formato não suportado: {export_request.format}") + raise ValueError(f"Unsupported format: {export_request.format}") async def _export_json(self, report: ClusterReport, timestamp: str) -> str: - """Exportar relatório em JSON""" + """Export report in JSON""" filename = f"cluster_report_{timestamp}.json" filepath = os.path.join(self.export_path, filename) @@ -187,11 +187,11 @@ class ReportService: with open(filepath, 'w', encoding='utf-8') as f: json.dump(report_dict, f, indent=2, ensure_ascii=False) - logger.info(f"Relatório JSON exportado: {filepath}") + logger.info(f"JSON report exported: {filepath}") return filepath async def _export_csv(self, report: ClusterReport, timestamp: str) -> str: - """Exportar relatório em CSV""" + """Export report in CSV""" filename = f"cluster_report_{timestamp}.csv" filepath = os.path.join(self.export_path, filename) @@ -204,7 +204,7 @@ class ReportService: "Validation Type", "Severity", "Message", "Recommendation" ]) - # Dados das validações + # Validation data for validation in report.validations: writer.writerow([ validation.pod_name, @@ -216,11 +216,11 @@ class ReportService: validation.recommendation or "" ]) - logger.info(f"Relatório CSV exportado: {filepath}") + logger.info(f"CSV report exported: {filepath}") return filepath async def _export_pdf(self, report: ClusterReport, timestamp: str) -> str: - """Exportar relatório em PDF""" + """Export report in PDF""" try: from reportlab.lib.pagesizes import letter from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle @@ -241,20 +241,20 @@ class ReportService: # Resumo summary_text = f""" - Resumo do Cluster:
- Total de Pods: {report.total_pods}
- Total de Namespaces: {report.total_namespaces}
- Total de Nós: {report.total_nodes}
- Total de Validações: {report.summary['total_validations']}
- Problemas Críticos: {report.summary['critical_issues']}
+ Cluster Summary:
+ Total Pods: {report.total_pods}
+ Total Namespaces: {report.total_namespaces}
+ Total Nodes: {report.total_nodes}
+ Total Validations: {report.summary['total_validations']}
+ Critical Issues: {report.summary['critical_issues']}
""" story.append(Paragraph(summary_text, styles['Normal'])) story.append(Spacer(1, 12)) - # Tabela de validações + # Validations table if report.validations: - data = [["Pod", "Namespace", "Container", "Tipo", "Severidade", "Mensagem"]] - for validation in report.validations[:50]: # Limitar a 50 para PDF + data = [["Pod", "Namespace", "Container", "Type", "Severity", "Message"]] + for validation in report.validations[:50]: # Limit to 50 for PDF data.append([ validation.pod_name, validation.namespace, @@ -280,15 +280,15 @@ class ReportService: story.append(table) doc.build(story) - logger.info(f"Relatório PDF exportado: {filepath}") + logger.info(f"PDF report exported: {filepath}") return filepath except ImportError: - logger.error("reportlab não instalado. Instale com: pip install reportlab") - raise ValueError("PDF export requer reportlab") + logger.error("reportlab not installed. Install with: pip install reportlab") + raise ValueError("PDF export requires reportlab") def get_exported_reports(self) -> List[Dict[str, str]]: - """Listar relatórios exportados""" + """List exported reports""" reports = [] for filename in os.listdir(self.export_path): diff --git a/app/services/validation_service.py b/app/services/validation_service.py index f8452a2..34bf925 100644 --- a/app/services/validation_service.py +++ b/app/services/validation_service.py @@ -50,7 +50,7 @@ class ValidationService: ) static_validations.extend(historical_validations) except Exception as e: - logger.warning(f"Erro na análise histórica do pod {pod.name}: {e}") + logger.warning(f"Error in historical analysis for pod {pod.name}: {e}") return static_validations @@ -74,8 +74,8 @@ class ValidationService: container_name=container["name"], validation_type="missing_requests", severity="error", - message="Container sem requests definidos", - recommendation="Definir requests de CPU e memória para garantir QoS" + message="Container without defined requests", + recommendation="Define CPU and memory requests to guarantee QoS" )) # 2. Verificar se limits estão definidos @@ -86,8 +86,8 @@ class ValidationService: container_name=container["name"], validation_type="missing_limits", severity="warning", - message="Container sem limits definidos", - recommendation="Definir limits para evitar consumo excessivo de recursos" + message="Container without defined limits", + recommendation="Define limits to avoid excessive resource consumption" )) # 3. Validar ratio limit:request @@ -139,8 +139,8 @@ class ValidationService: container_name=container_name, validation_type="invalid_ratio", severity="warning", - message=f"Ratio CPU limit:request muito alto ({ratio:.2f}:1)", - recommendation=f"Considerar reduzir limits ou aumentar requests (ratio recomendado: {self.cpu_ratio}:1)" + message=f"CPU limit:request ratio too high ({ratio:.2f}:1)", + recommendation=f"Consider reducing limits or increasing requests (recommended ratio: {self.cpu_ratio}:1)" ) elif ratio < 1.0: return ResourceValidation( @@ -149,12 +149,12 @@ class ValidationService: container_name=container_name, validation_type="invalid_ratio", severity="error", - message=f"CPU limit menor que request ({ratio:.2f}:1)", - recommendation="CPU limit deve ser maior ou igual ao request" + message=f"CPU limit less than request ({ratio:.2f}:1)", + recommendation="CPU limit should be greater than or equal to request" ) except (ValueError, InvalidOperation) as e: - logger.warning(f"Erro ao validar ratio CPU: {e}") + logger.warning(f"Error validating CPU ratio: {e}") return None @@ -184,8 +184,8 @@ class ValidationService: container_name=container_name, validation_type="invalid_ratio", severity="warning", - message=f"Ratio memória limit:request muito alto ({ratio:.2f}:1)", - recommendation=f"Considerar reduzir limits ou aumentar requests (ratio recomendado: {self.memory_ratio}:1)" + message=f"Memory limit:request ratio too high ({ratio:.2f}:1)", + recommendation=f"Consider reducing limits or increasing requests (recommended ratio: {self.memory_ratio}:1)" ) elif ratio < 1.0: return ResourceValidation( @@ -194,12 +194,12 @@ class ValidationService: container_name=container_name, validation_type="invalid_ratio", severity="error", - message=f"Memória limit menor que request ({ratio:.2f}:1)", - recommendation="Memória limit deve ser maior ou igual ao request" + message=f"Memory limit less than request ({ratio:.2f}:1)", + recommendation="Memory limit should be greater than or equal to request" ) except (ValueError, InvalidOperation) as e: - logger.warning(f"Erro ao validar ratio memória: {e}") + logger.warning(f"Error validating memory ratio: {e}") return None @@ -226,8 +226,8 @@ class ValidationService: container_name=container_name, validation_type="minimum_value", severity="warning", - message=f"CPU request muito baixo ({requests['cpu']})", - recommendation=f"Considerar aumentar para pelo menos {self.min_cpu_request}" + message=f"CPU request too low ({requests['cpu']})", + recommendation=f"Consider increasing to at least {self.min_cpu_request}" )) except (ValueError, InvalidOperation): pass @@ -245,8 +245,8 @@ class ValidationService: container_name=container_name, validation_type="minimum_value", severity="warning", - message=f"Memória request muito baixa ({requests['memory']})", - recommendation=f"Considerar aumentar para pelo menos {self.min_memory_request}" + message=f"Memory request too low ({requests['memory']})", + recommendation=f"Consider increasing to at least {self.min_memory_request}" )) except (ValueError, InvalidOperation): pass @@ -307,8 +307,8 @@ class ValidationService: container_name="all", validation_type="overcommit", severity="critical", - message=f"Overcommit de CPU no namespace: {cpu_utilization:.1f}%", - recommendation="Reduzir requests de CPU ou adicionar mais nós ao cluster" + message=f"CPU overcommit in namespace: {cpu_utilization:.1f}%", + recommendation="Reduce CPU requests or add more nodes to the cluster" )) # Verificar overcommit de memória @@ -321,8 +321,8 @@ class ValidationService: container_name="all", validation_type="overcommit", severity="critical", - message=f"Overcommit de memória no namespace: {memory_utilization:.1f}%", - recommendation="Reduzir requests de memória ou adicionar mais nós ao cluster" + message=f"Memory overcommit in namespace: {memory_utilization:.1f}%", + recommendation="Reduce memory requests or add more nodes to the cluster" )) return validations @@ -342,26 +342,26 @@ class ValidationService: # Gerar recomendações baseadas nos problemas encontrados if validation_counts.get("missing_requests", 0) > 0: recommendations.append( - f"Implementar LimitRange no namespace para definir requests padrão " - f"({validation_counts['missing_requests']} containers sem requests)" + f"Implement LimitRange in namespace to define default requests " + f"({validation_counts['missing_requests']} containers without requests)" ) if validation_counts.get("missing_limits", 0) > 0: recommendations.append( - f"Definir limits para {validation_counts['missing_limits']} containers " - "para evitar consumo excessivo de recursos" + f"Define limits for {validation_counts['missing_limits']} containers " + "to avoid excessive resource consumption" ) if validation_counts.get("invalid_ratio", 0) > 0: recommendations.append( - f"Ajustar ratio limit:request para {validation_counts['invalid_ratio']} containers " - f"(recomendado: {self.cpu_ratio}:1)" + f"Adjust limit:request ratio for {validation_counts['invalid_ratio']} containers " + f"(recommended: {self.cpu_ratio}:1)" ) if validation_counts.get("overcommit", 0) > 0: recommendations.append( - f"Resolver overcommit em {validation_counts['overcommit']} namespaces " - "para evitar problemas de performance" + f"Resolve overcommit in {validation_counts['overcommit']} namespaces " + "to avoid performance issues" ) return recommendations