From 9f8cdbda7a66ac9b03757712ada722860f8e1d3b Mon Sep 17 00:00:00 2001 From: andersonid Date: Thu, 25 Sep 2025 18:26:05 -0300 Subject: [PATCH] Fix: GitHub Actions for public clusters + deployment guide for colleagues --- .github/workflows/build-only.yml | 63 +++++++ .github/workflows/openshift-deploy.yml | 15 +- README-DEPLOYMENT.md | 244 +++++++++++++++++++++++++ deploy-to-cluster.sh | 82 +++++++++ deploy-zero-downtime.sh | 145 +++++++++++++++ k8s/configmap.yaml | 2 +- k8s/deployment.yaml | 9 +- 7 files changed, 549 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/build-only.yml create mode 100644 README-DEPLOYMENT.md create mode 100755 deploy-to-cluster.sh create mode 100755 deploy-zero-downtime.sh diff --git a/.github/workflows/build-only.yml b/.github/workflows/build-only.yml new file mode 100644 index 0000000..918b0a3 --- /dev/null +++ b/.github/workflows/build-only.yml @@ -0,0 +1,63 @@ +name: Build and Push Image + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main ] + workflow_dispatch: + +env: + IMAGE_NAME: resource-governance + REGISTRY: andersonid + +jobs: + build-and-push: + runs-on: ubuntu-latest + timeout-minutes: 20 + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Run basic syntax check + run: | + python -m py_compile app/main.py + echo "✅ Syntax check passed" + + - name: Set up Podman + run: | + sudo apt-get update -qq + sudo apt-get install -y -qq podman buildah skopeo + + - name: Login to Docker Hub + run: | + echo "${{ secrets.DOCKERHUB_TOKEN }}" | podman login docker.io -u ${{ secrets.DOCKERHUB_USERNAME }} --password-stdin + + - name: Build and push image + run: | + # Build da imagem com cache + podman build --layers -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} . + + # Tag como latest + podman tag ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + + # Push das imagens + podman push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} + podman push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest + + - name: Output deployment info + run: | + echo "🚀 Image built and pushed successfully!" + echo "📦 Image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}" + echo "📦 Latest: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" + echo "" + echo "🔧 To deploy to your OpenShift cluster:" + echo "1. Clone this repository" + echo "2. Run: ./deploy-to-cluster.sh ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}" + echo "3. Or use: ./deploy-zero-downtime.sh ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}" diff --git a/.github/workflows/openshift-deploy.yml b/.github/workflows/openshift-deploy.yml index 07ed474..42897d9 100644 --- a/.github/workflows/openshift-deploy.yml +++ b/.github/workflows/openshift-deploy.yml @@ -1,11 +1,14 @@ -name: Deploy to OpenShift +# DISABLED: This workflow is disabled because it requires access to internal OpenShift clusters +# Use build-only.yml for public clusters and deploy-to-cluster.sh for local deployment +name: Deploy to OpenShift (DISABLED) on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - workflow_dispatch: + # Disabled - use build-only.yml instead + # push: + # branches: [ main ] + # pull_request: + # branches: [ main ] + # workflow_dispatch: env: IMAGE_NAME: resource-governance diff --git a/README-DEPLOYMENT.md b/README-DEPLOYMENT.md new file mode 100644 index 0000000..80e0514 --- /dev/null +++ b/README-DEPLOYMENT.md @@ -0,0 +1,244 @@ +# 🚀 OpenShift Resource Governance - Guia de Deploy + +## 📋 Visão Geral + +Esta aplicação monitora e analisa recursos (CPU/Memory) de pods em clusters OpenShift, fornecendo validações e recomendações baseadas em melhores práticas. + +## 🔧 Pré-requisitos + +- **OpenShift CLI (oc)** instalado e configurado +- **Acesso a um cluster OpenShift** (público ou privado) +- **Permissões de cluster-admin** ou admin do namespace + +## 🚀 Deploy Rápido + +### 1. Clone o repositório +```bash +git clone https://github.com/andersonid/openshift-resource-governance.git +cd openshift-resource-governance +``` + +### 2. Faça login no OpenShift +```bash +oc login https://your-cluster.com +# Ou para clusters internos: +oc login https://api.internal-cluster.com --token=your-token +``` + +### 3. Deploy da aplicação +```bash +# Deploy simples +./deploy-to-cluster.sh + +# Deploy com imagem específica +./deploy-to-cluster.sh andersonid/resource-governance:v1.0.0 + +# Deploy zero-downtime (recomendado para produção) +./deploy-zero-downtime.sh +``` + +## 🌐 Acesso à Aplicação + +### Via OpenShift Route (recomendado) +```bash +# Obter URL da rota +oc get route resource-governance-route -n resource-governance + +# Acessar no navegador +# https://resource-governance-route-your-cluster.com +``` + +### Via Port-Forward (desenvolvimento) +```bash +# Iniciar port-forward +oc port-forward service/resource-governance-service 8080:8080 -n resource-governance + +# Acessar no navegador +# http://localhost:8080 +``` + +## 🔄 Atualizações + +### Atualização Automática (GitHub Actions) +- Push para branch `main` → Build automático da imagem +- Imagem disponível em: `andersonid/resource-governance:latest` + +### Atualização Manual +```bash +# 1. Fazer pull da nova imagem +oc set image deployment/resource-governance resource-governance=andersonid/resource-governance:latest -n resource-governance + +# 2. Aguardar rollout +oc rollout status deployment/resource-governance -n resource-governance + +# 3. Verificar status +oc get pods -n resource-governance +``` + +## 🛠️ Configuração Avançada + +### ConfigMap +```bash +# Editar configurações +oc edit configmap resource-governance-config -n resource-governance + +# Aplicar mudanças +oc rollout restart deployment/resource-governance -n resource-governance +``` + +### Recursos e Limites +```bash +# Verificar recursos atuais +oc describe deployment resource-governance -n resource-governance + +# Ajustar recursos (se necessário) +oc patch deployment resource-governance -n resource-governance -p '{"spec":{"template":{"spec":{"containers":[{"name":"resource-governance","resources":{"requests":{"cpu":"100m","memory":"256Mi"},"limits":{"cpu":"500m","memory":"1Gi"}}}]}}}}' +``` + +## 🔍 Troubleshooting + +### Verificar Status +```bash +# Status geral +oc get all -n resource-governance + +# Logs da aplicação +oc logs -f deployment/resource-governance -n resource-governance + +# Eventos do namespace +oc get events -n resource-governance --sort-by='.lastTimestamp' +``` + +### Problemas Comuns + +#### 1. Pod não inicia +```bash +# Verificar logs +oc logs deployment/resource-governance -n resource-governance + +# Verificar eventos +oc describe pod -l app.kubernetes.io/name=resource-governance -n resource-governance +``` + +#### 2. Erro de permissão +```bash +# Verificar RBAC +oc get clusterrole resource-governance-role +oc get clusterrolebinding resource-governance-binding + +# Recriar RBAC se necessário +oc apply -f k8s/rbac.yaml +``` + +#### 3. Imagem não encontrada +```bash +# Verificar se a imagem existe +oc describe deployment resource-governance -n resource-governance + +# Forçar pull da imagem +oc set image deployment/resource-governance resource-governance=andersonid/resource-governance:latest -n resource-governance +``` + +## 📊 Monitoramento + +### Health Checks +```bash +# Health check da aplicação +curl http://localhost:8080/api/v1/health + +# Status do cluster +curl http://localhost:8080/api/v1/status +``` + +### Métricas +- **Total de Pods**: Número total de pods analisados +- **Namespaces**: Número de namespaces monitorados +- **Problemas Críticos**: Validações com severidade crítica +- **Análise Histórica**: Dados do Prometheus (se disponível) + +## 🔐 Segurança + +### RBAC +A aplicação usa um ServiceAccount com permissões mínimas necessárias: +- `get`, `list` pods em todos os namespaces +- `get`, `list` nodes +- `get`, `list` VPA resources + +### Network Policies +Para clusters com Network Policies ativas, adicione: +```yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: resource-governance-netpol + namespace: resource-governance +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: resource-governance + policyTypes: + - Ingress + - Egress + ingress: + - from: [] + egress: + - to: [] +``` + +## 📝 Logs e Debugging + +### Logs da Aplicação +```bash +# Logs em tempo real +oc logs -f deployment/resource-governance -n resource-governance + +# Logs com timestamp +oc logs deployment/resource-governance -n resource-governance --timestamps=true +``` + +### Debug de Conectividade +```bash +# Testar conectividade com API do Kubernetes +oc exec deployment/resource-governance -n resource-governance -- curl -k https://kubernetes.default.svc.cluster.local/api/v1/pods + +# Testar conectividade com Prometheus (se configurado) +oc exec deployment/resource-governance -n resource-governance -- curl http://prometheus.openshift-monitoring.svc.cluster.local:9090/api/v1/query +``` + +## 🆘 Suporte + +### Informações do Cluster +```bash +# Versão do OpenShift +oc version + +# Informações do cluster +oc cluster-info + +# Recursos disponíveis +oc get nodes +oc top nodes +``` + +### Coletar Informações para Debug +```bash +# Script de diagnóstico +oc get all -n resource-governance -o yaml > resource-governance-debug.yaml +oc describe deployment resource-governance -n resource-governance >> resource-governance-debug.yaml +oc logs deployment/resource-governance -n resource-governance >> resource-governance-debug.yaml +``` + +--- + +## 🎯 Próximos Passos + +1. **Configure alertas** para problemas críticos +2. **Integre com Prometheus** para análise histórica +3. **Configure VPA** para namespaces críticos +4. **Personalize validações** conforme suas políticas + +--- + +**Desenvolvido por:** Anderson Nobre +**Repositório:** https://github.com/andersonid/openshift-resource-governance +**Suporte:** Abra uma issue no GitHub diff --git a/deploy-to-cluster.sh b/deploy-to-cluster.sh new file mode 100755 index 0000000..04e6bbb --- /dev/null +++ b/deploy-to-cluster.sh @@ -0,0 +1,82 @@ +#!/bin/bash + +# Script para deploy da aplicação OpenShift Resource Governance +# Funciona com qualquer cluster OpenShift (público ou privado) + +# Variáveis +IMAGE_NAME="resource-governance" +NAMESPACE="resource-governance" +IMAGE_TAG=${1:-latest} # Usa o primeiro argumento como tag, ou 'latest' por padrão + +echo "🚀 Deploy para OpenShift Cluster" +echo "================================" +echo "Imagem: ${IMAGE_TAG}" +echo "Namespace: ${NAMESPACE}" +echo "" + +# 1. Verificar login no OpenShift +if ! oc whoami > /dev/null 2>&1; then + echo "❌ Não logado no OpenShift. Por favor, faça login com 'oc login'." + echo "💡 Exemplo: oc login https://your-cluster.com" + exit 1 +fi +echo "✅ Logado no OpenShift como: $(oc whoami)" +echo "" + +# 2. Verificar se o namespace existe, senão criar +if ! oc get namespace ${NAMESPACE} > /dev/null 2>&1; then + echo "📋 Criando namespace ${NAMESPACE}..." + oc create namespace ${NAMESPACE} +else + echo "✅ Namespace ${NAMESPACE} já existe" +fi +echo "" + +# 3. Aplicar manifests básicos (rbac, configmap) +echo "📋 Aplicando manifests..." +oc apply -f k8s/rbac.yaml +oc apply -f k8s/configmap.yaml +echo "" + +# 4. Atualizar deployment com a nova imagem +echo "🔄 Atualizando imagem do deployment..." +oc set image deployment/${IMAGE_NAME} ${IMAGE_NAME}=${IMAGE_TAG} -n ${NAMESPACE} || true +echo "" + +# 5. Aplicar deployment, service e route +echo "📦 Aplicando deployment, service e route..." +oc apply -f k8s/deployment.yaml +oc apply -f k8s/service.yaml +oc apply -f k8s/route.yaml +echo "" + +# 6. Aguardar rollout +echo "⏳ Aguardando rollout..." +oc rollout status deployment/${IMAGE_NAME} -n ${NAMESPACE} --timeout=300s +echo "✅ Rollout concluído com sucesso!" +echo "" + +# 7. Verificar deployment +echo "✅ Verificando deployment..." +oc get deployment ${IMAGE_NAME} -n ${NAMESPACE} +oc get pods -n ${NAMESPACE} -l app.kubernetes.io/name=${IMAGE_NAME} +echo "" + +# 8. Obter URL da rota +ROUTE_URL=$(oc get route ${IMAGE_NAME}-route -n ${NAMESPACE} -o jsonpath='{.spec.host}' 2>/dev/null || echo "") +if [ -n "$ROUTE_URL" ]; then + echo "🚀 Application deployed successfully!" + echo "🌐 URL: https://$ROUTE_URL" + echo "📊 Status: oc get pods -n ${NAMESPACE} -l app.kubernetes.io/name=${IMAGE_NAME}" +else + echo "⚠️ Rota não encontrada. Verifique se o cluster suporta Routes." + echo "💡 Para acessar localmente: oc port-forward service/${IMAGE_NAME}-service 8080:8080 -n ${NAMESPACE}" +fi +echo "" + +echo "✅ Deploy concluído!" +echo "" +echo "🔧 Comandos úteis:" +echo " Ver logs: oc logs -f deployment/${IMAGE_NAME} -n ${NAMESPACE}" +echo " Port-forward: oc port-forward service/${IMAGE_NAME}-service 8080:8080 -n ${NAMESPACE}" +echo " Status: oc get pods -n ${NAMESPACE} -l app.kubernetes.io/name=${IMAGE_NAME}" diff --git a/deploy-zero-downtime.sh b/deploy-zero-downtime.sh new file mode 100755 index 0000000..82baa8a --- /dev/null +++ b/deploy-zero-downtime.sh @@ -0,0 +1,145 @@ +#!/bin/bash + +# Script de deploy com ZERO DOWNTIME (Blue-Green Strategy) +# Garante que a aplicação nunca saia do ar durante atualizações + +set -e + +# Configurações +IMAGE_NAME="resource-governance" +REGISTRY="andersonid" +NAMESPACE="resource-governance" +TAG=${1:-"latest"} +FULL_IMAGE="$REGISTRY/$IMAGE_NAME:$TAG" + +echo "🚀 Deploy ZERO DOWNTIME para OpenShift" +echo "======================================" +echo "Imagem: $FULL_IMAGE" +echo "Namespace: $NAMESPACE" +echo "Estratégia: Blue-Green (Zero Downtime)" +echo "" + +# Verificar se está logado no OpenShift +if ! oc whoami > /dev/null 2>&1; then + echo "❌ Não está logado no OpenShift. Execute: oc login" + exit 1 +fi + +echo "✅ Logado no OpenShift como: $(oc whoami)" +echo "" + +# Função para verificar se todos os pods estão prontos +check_pods_ready() { + local deployment=$1 + local namespace=$2 + local timeout=${3:-300} + + echo "⏳ Aguardando pods do deployment $deployment ficarem prontos..." + oc rollout status deployment/$deployment -n $namespace --timeout=${timeout}s +} + +# Função para verificar se a aplicação está respondendo +check_app_health() { + local service=$1 + local namespace=$2 + local port=${3:-8080} + + echo "🔍 Verificando saúde da aplicação..." + + # Tentar port-forward temporário para testar + local temp_pid + oc port-forward service/$service $port:$port -n $namespace > /dev/null 2>&1 & + temp_pid=$! + + # Aguardar port-forward inicializar + sleep 3 + + # Testar health check + local health_status + health_status=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:$port/api/v1/health 2>/dev/null || echo "000") + + # Parar port-forward temporário + kill $temp_pid 2>/dev/null || true + + if [ "$health_status" = "200" ]; then + echo "✅ Aplicação saudável (HTTP $health_status)" + return 0 + else + echo "❌ Aplicação não saudável (HTTP $health_status)" + return 1 + fi +} + +# Aplicar manifests básicos +echo "📋 Aplicando manifests básicos..." +oc apply -f k8s/namespace.yaml +oc apply -f k8s/rbac.yaml +oc apply -f k8s/configmap.yaml + +# Verificar se o deployment existe +if oc get deployment $IMAGE_NAME -n $NAMESPACE > /dev/null 2>&1; then + echo "🔄 Deployment existente encontrado. Iniciando atualização zero-downtime..." + + # Obter número atual de réplicas + CURRENT_REPLICAS=$(oc get deployment $IMAGE_NAME -n $NAMESPACE -o jsonpath='{.spec.replicas}') + echo "📊 Réplicas atuais: $CURRENT_REPLICAS" + + # Atualizar imagem do deployment + echo "🔄 Atualizando imagem para: $FULL_IMAGE" + oc set image deployment/$IMAGE_NAME $IMAGE_NAME=$FULL_IMAGE -n $NAMESPACE + + # Aguardar rollout com timeout maior + echo "⏳ Aguardando rollout (pode levar alguns minutos)..." + if check_pods_ready $IMAGE_NAME $NAMESPACE 600; then + echo "✅ Rollout concluído com sucesso!" + + # Verificar saúde da aplicação + if check_app_health "${IMAGE_NAME}-service" $NAMESPACE; then + echo "🎉 Deploy zero-downtime concluído com sucesso!" + else + echo "⚠️ Deploy concluído, mas aplicação pode não estar saudável" + echo "🔍 Verifique os logs: oc logs -f deployment/$IMAGE_NAME -n $NAMESPACE" + fi + else + echo "❌ Rollout falhou ou timeout" + echo "🔍 Verificando status dos pods:" + oc get pods -n $NAMESPACE -l app.kubernetes.io/name=$IMAGE_NAME + exit 1 + fi +else + echo "🆕 Deployment não existe. Criando novo deployment..." + oc apply -f k8s/deployment.yaml + oc apply -f k8s/service.yaml + oc apply -f k8s/route.yaml + + # Aguardar pods ficarem prontos + if check_pods_ready $IMAGE_NAME $NAMESPACE 300; then + echo "✅ Novo deployment criado com sucesso!" + else + echo "❌ Falha ao criar deployment" + exit 1 + fi +fi + +# Verificar status final +echo "" +echo "📊 STATUS FINAL:" +echo "================" +oc get deployment $IMAGE_NAME -n $NAMESPACE +echo "" +oc get pods -n $NAMESPACE -l app.kubernetes.io/name=$IMAGE_NAME +echo "" + +# Obter URL da rota +ROUTE_URL=$(oc get route $IMAGE_NAME-route -n $NAMESPACE -o jsonpath='{.spec.host}' 2>/dev/null || echo "") +if [ -n "$ROUTE_URL" ]; then + echo "🌐 URLs de acesso:" + echo " OpenShift: https://$ROUTE_URL" + echo " Port-forward: http://localhost:8080 (se ativo)" + echo "" + echo "💡 Para iniciar port-forward: oc port-forward service/${IMAGE_NAME}-service 8080:8080 -n $NAMESPACE" +fi + +echo "" +echo "✅ Deploy zero-downtime concluído!" +echo "🔄 Estratégia: Rolling Update com maxUnavailable=0 (zero downtime)" diff --git a/k8s/configmap.yaml b/k8s/configmap.yaml index 5afb3ff..a94cb1c 100644 --- a/k8s/configmap.yaml +++ b/k8s/configmap.yaml @@ -18,7 +18,7 @@ data: # Configurações de filtro de namespaces INCLUDE_SYSTEM_NAMESPACES: "false" - SYSTEM_NAMESPACE_PREFIXES: '["kube-", "openshift-", "default", "kube-system", "kube-public", "kube-node-lease"]' + SYSTEM_NAMESPACE_PREFIXES: '["kube-", "openshift-", "knative-", "default", "kube-system", "kube-public", "kube-node-lease"]' # URL do Prometheus PROMETHEUS_URL: "http://prometheus.openshift-monitoring.svc.cluster.local:9090" diff --git a/k8s/deployment.yaml b/k8s/deployment.yaml index 6b6a514..f7dcd58 100644 --- a/k8s/deployment.yaml +++ b/k8s/deployment.yaml @@ -11,8 +11,8 @@ spec: strategy: type: RollingUpdate rollingUpdate: - maxUnavailable: 1 - maxSurge: 1 + maxUnavailable: 0 # Nunca derruba pods até o novo estar pronto + maxSurge: 1 # Permite 1 pod extra durante o rollout selector: matchLabels: app.kubernetes.io/name: resource-governance @@ -50,10 +50,11 @@ spec: httpGet: path: /api/v1/health port: 8080 - initialDelaySeconds: 5 + initialDelaySeconds: 15 # Aguarda mais tempo para inicializar periodSeconds: 5 timeoutSeconds: 3 - failureThreshold: 3 + failureThreshold: 5 # Mais tentativas antes de falhar + successThreshold: 2 # Precisa de 2 sucessos consecutivos securityContext: allowPrivilegeEscalation: false capabilities: