Fix: GitHub Actions for public clusters + deployment guide for colleagues

This commit is contained in:
2025-09-25 18:26:05 -03:00
parent 66a5bb116f
commit 9f8cdbda7a
7 changed files with 549 additions and 11 deletions

63
.github/workflows/build-only.yml vendored Normal file
View File

@@ -0,0 +1,63 @@
name: Build and Push Image
on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main ]
workflow_dispatch:
env:
IMAGE_NAME: resource-governance
REGISTRY: andersonid
jobs:
build-and-push:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Run basic syntax check
run: |
python -m py_compile app/main.py
echo "✅ Syntax check passed"
- name: Set up Podman
run: |
sudo apt-get update -qq
sudo apt-get install -y -qq podman buildah skopeo
- name: Login to Docker Hub
run: |
echo "${{ secrets.DOCKERHUB_TOKEN }}" | podman login docker.io -u ${{ secrets.DOCKERHUB_USERNAME }} --password-stdin
- name: Build and push image
run: |
# Build da imagem com cache
podman build --layers -t ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} .
# Tag como latest
podman tag ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
# Push das imagens
podman push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}
podman push ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
- name: Output deployment info
run: |
echo "🚀 Image built and pushed successfully!"
echo "📦 Image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}"
echo "📦 Latest: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest"
echo ""
echo "🔧 To deploy to your OpenShift cluster:"
echo "1. Clone this repository"
echo "2. Run: ./deploy-to-cluster.sh ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}"
echo "3. Or use: ./deploy-zero-downtime.sh ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }}"

View File

@@ -1,11 +1,14 @@
name: Deploy to OpenShift # DISABLED: This workflow is disabled because it requires access to internal OpenShift clusters
# Use build-only.yml for public clusters and deploy-to-cluster.sh for local deployment
name: Deploy to OpenShift (DISABLED)
on: on:
push: # Disabled - use build-only.yml instead
branches: [ main ] # push:
pull_request: # branches: [ main ]
branches: [ main ] # pull_request:
workflow_dispatch: # branches: [ main ]
# workflow_dispatch:
env: env:
IMAGE_NAME: resource-governance IMAGE_NAME: resource-governance

244
README-DEPLOYMENT.md Normal file
View File

@@ -0,0 +1,244 @@
# 🚀 OpenShift Resource Governance - Guia de Deploy
## 📋 Visão Geral
Esta aplicação monitora e analisa recursos (CPU/Memory) de pods em clusters OpenShift, fornecendo validações e recomendações baseadas em melhores práticas.
## 🔧 Pré-requisitos
- **OpenShift CLI (oc)** instalado e configurado
- **Acesso a um cluster OpenShift** (público ou privado)
- **Permissões de cluster-admin** ou admin do namespace
## 🚀 Deploy Rápido
### 1. Clone o repositório
```bash
git clone https://github.com/andersonid/openshift-resource-governance.git
cd openshift-resource-governance
```
### 2. Faça login no OpenShift
```bash
oc login https://your-cluster.com
# Ou para clusters internos:
oc login https://api.internal-cluster.com --token=your-token
```
### 3. Deploy da aplicação
```bash
# Deploy simples
./deploy-to-cluster.sh
# Deploy com imagem específica
./deploy-to-cluster.sh andersonid/resource-governance:v1.0.0
# Deploy zero-downtime (recomendado para produção)
./deploy-zero-downtime.sh
```
## 🌐 Acesso à Aplicação
### Via OpenShift Route (recomendado)
```bash
# Obter URL da rota
oc get route resource-governance-route -n resource-governance
# Acessar no navegador
# https://resource-governance-route-your-cluster.com
```
### Via Port-Forward (desenvolvimento)
```bash
# Iniciar port-forward
oc port-forward service/resource-governance-service 8080:8080 -n resource-governance
# Acessar no navegador
# http://localhost:8080
```
## 🔄 Atualizações
### Atualização Automática (GitHub Actions)
- Push para branch `main` → Build automático da imagem
- Imagem disponível em: `andersonid/resource-governance:latest`
### Atualização Manual
```bash
# 1. Fazer pull da nova imagem
oc set image deployment/resource-governance resource-governance=andersonid/resource-governance:latest -n resource-governance
# 2. Aguardar rollout
oc rollout status deployment/resource-governance -n resource-governance
# 3. Verificar status
oc get pods -n resource-governance
```
## 🛠️ Configuração Avançada
### ConfigMap
```bash
# Editar configurações
oc edit configmap resource-governance-config -n resource-governance
# Aplicar mudanças
oc rollout restart deployment/resource-governance -n resource-governance
```
### Recursos e Limites
```bash
# Verificar recursos atuais
oc describe deployment resource-governance -n resource-governance
# Ajustar recursos (se necessário)
oc patch deployment resource-governance -n resource-governance -p '{"spec":{"template":{"spec":{"containers":[{"name":"resource-governance","resources":{"requests":{"cpu":"100m","memory":"256Mi"},"limits":{"cpu":"500m","memory":"1Gi"}}}]}}}}'
```
## 🔍 Troubleshooting
### Verificar Status
```bash
# Status geral
oc get all -n resource-governance
# Logs da aplicação
oc logs -f deployment/resource-governance -n resource-governance
# Eventos do namespace
oc get events -n resource-governance --sort-by='.lastTimestamp'
```
### Problemas Comuns
#### 1. Pod não inicia
```bash
# Verificar logs
oc logs deployment/resource-governance -n resource-governance
# Verificar eventos
oc describe pod -l app.kubernetes.io/name=resource-governance -n resource-governance
```
#### 2. Erro de permissão
```bash
# Verificar RBAC
oc get clusterrole resource-governance-role
oc get clusterrolebinding resource-governance-binding
# Recriar RBAC se necessário
oc apply -f k8s/rbac.yaml
```
#### 3. Imagem não encontrada
```bash
# Verificar se a imagem existe
oc describe deployment resource-governance -n resource-governance
# Forçar pull da imagem
oc set image deployment/resource-governance resource-governance=andersonid/resource-governance:latest -n resource-governance
```
## 📊 Monitoramento
### Health Checks
```bash
# Health check da aplicação
curl http://localhost:8080/api/v1/health
# Status do cluster
curl http://localhost:8080/api/v1/status
```
### Métricas
- **Total de Pods**: Número total de pods analisados
- **Namespaces**: Número de namespaces monitorados
- **Problemas Críticos**: Validações com severidade crítica
- **Análise Histórica**: Dados do Prometheus (se disponível)
## 🔐 Segurança
### RBAC
A aplicação usa um ServiceAccount com permissões mínimas necessárias:
- `get`, `list` pods em todos os namespaces
- `get`, `list` nodes
- `get`, `list` VPA resources
### Network Policies
Para clusters com Network Policies ativas, adicione:
```yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: resource-governance-netpol
namespace: resource-governance
spec:
podSelector:
matchLabels:
app.kubernetes.io/name: resource-governance
policyTypes:
- Ingress
- Egress
ingress:
- from: []
egress:
- to: []
```
## 📝 Logs e Debugging
### Logs da Aplicação
```bash
# Logs em tempo real
oc logs -f deployment/resource-governance -n resource-governance
# Logs com timestamp
oc logs deployment/resource-governance -n resource-governance --timestamps=true
```
### Debug de Conectividade
```bash
# Testar conectividade com API do Kubernetes
oc exec deployment/resource-governance -n resource-governance -- curl -k https://kubernetes.default.svc.cluster.local/api/v1/pods
# Testar conectividade com Prometheus (se configurado)
oc exec deployment/resource-governance -n resource-governance -- curl http://prometheus.openshift-monitoring.svc.cluster.local:9090/api/v1/query
```
## 🆘 Suporte
### Informações do Cluster
```bash
# Versão do OpenShift
oc version
# Informações do cluster
oc cluster-info
# Recursos disponíveis
oc get nodes
oc top nodes
```
### Coletar Informações para Debug
```bash
# Script de diagnóstico
oc get all -n resource-governance -o yaml > resource-governance-debug.yaml
oc describe deployment resource-governance -n resource-governance >> resource-governance-debug.yaml
oc logs deployment/resource-governance -n resource-governance >> resource-governance-debug.yaml
```
---
## 🎯 Próximos Passos
1. **Configure alertas** para problemas críticos
2. **Integre com Prometheus** para análise histórica
3. **Configure VPA** para namespaces críticos
4. **Personalize validações** conforme suas políticas
---
**Desenvolvido por:** Anderson Nobre
**Repositório:** https://github.com/andersonid/openshift-resource-governance
**Suporte:** Abra uma issue no GitHub

82
deploy-to-cluster.sh Executable file
View File

@@ -0,0 +1,82 @@
#!/bin/bash
# Script para deploy da aplicação OpenShift Resource Governance
# Funciona com qualquer cluster OpenShift (público ou privado)
# Variáveis
IMAGE_NAME="resource-governance"
NAMESPACE="resource-governance"
IMAGE_TAG=${1:-latest} # Usa o primeiro argumento como tag, ou 'latest' por padrão
echo "🚀 Deploy para OpenShift Cluster"
echo "================================"
echo "Imagem: ${IMAGE_TAG}"
echo "Namespace: ${NAMESPACE}"
echo ""
# 1. Verificar login no OpenShift
if ! oc whoami > /dev/null 2>&1; then
echo "❌ Não logado no OpenShift. Por favor, faça login com 'oc login'."
echo "💡 Exemplo: oc login https://your-cluster.com"
exit 1
fi
echo "✅ Logado no OpenShift como: $(oc whoami)"
echo ""
# 2. Verificar se o namespace existe, senão criar
if ! oc get namespace ${NAMESPACE} > /dev/null 2>&1; then
echo "📋 Criando namespace ${NAMESPACE}..."
oc create namespace ${NAMESPACE}
else
echo "✅ Namespace ${NAMESPACE} já existe"
fi
echo ""
# 3. Aplicar manifests básicos (rbac, configmap)
echo "📋 Aplicando manifests..."
oc apply -f k8s/rbac.yaml
oc apply -f k8s/configmap.yaml
echo ""
# 4. Atualizar deployment com a nova imagem
echo "🔄 Atualizando imagem do deployment..."
oc set image deployment/${IMAGE_NAME} ${IMAGE_NAME}=${IMAGE_TAG} -n ${NAMESPACE} || true
echo ""
# 5. Aplicar deployment, service e route
echo "📦 Aplicando deployment, service e route..."
oc apply -f k8s/deployment.yaml
oc apply -f k8s/service.yaml
oc apply -f k8s/route.yaml
echo ""
# 6. Aguardar rollout
echo "⏳ Aguardando rollout..."
oc rollout status deployment/${IMAGE_NAME} -n ${NAMESPACE} --timeout=300s
echo "✅ Rollout concluído com sucesso!"
echo ""
# 7. Verificar deployment
echo "✅ Verificando deployment..."
oc get deployment ${IMAGE_NAME} -n ${NAMESPACE}
oc get pods -n ${NAMESPACE} -l app.kubernetes.io/name=${IMAGE_NAME}
echo ""
# 8. Obter URL da rota
ROUTE_URL=$(oc get route ${IMAGE_NAME}-route -n ${NAMESPACE} -o jsonpath='{.spec.host}' 2>/dev/null || echo "")
if [ -n "$ROUTE_URL" ]; then
echo "🚀 Application deployed successfully!"
echo "🌐 URL: https://$ROUTE_URL"
echo "📊 Status: oc get pods -n ${NAMESPACE} -l app.kubernetes.io/name=${IMAGE_NAME}"
else
echo "⚠️ Rota não encontrada. Verifique se o cluster suporta Routes."
echo "💡 Para acessar localmente: oc port-forward service/${IMAGE_NAME}-service 8080:8080 -n ${NAMESPACE}"
fi
echo ""
echo "✅ Deploy concluído!"
echo ""
echo "🔧 Comandos úteis:"
echo " Ver logs: oc logs -f deployment/${IMAGE_NAME} -n ${NAMESPACE}"
echo " Port-forward: oc port-forward service/${IMAGE_NAME}-service 8080:8080 -n ${NAMESPACE}"
echo " Status: oc get pods -n ${NAMESPACE} -l app.kubernetes.io/name=${IMAGE_NAME}"

145
deploy-zero-downtime.sh Executable file
View File

@@ -0,0 +1,145 @@
#!/bin/bash
# Script de deploy com ZERO DOWNTIME (Blue-Green Strategy)
# Garante que a aplicação nunca saia do ar durante atualizações
set -e
# Configurações
IMAGE_NAME="resource-governance"
REGISTRY="andersonid"
NAMESPACE="resource-governance"
TAG=${1:-"latest"}
FULL_IMAGE="$REGISTRY/$IMAGE_NAME:$TAG"
echo "🚀 Deploy ZERO DOWNTIME para OpenShift"
echo "======================================"
echo "Imagem: $FULL_IMAGE"
echo "Namespace: $NAMESPACE"
echo "Estratégia: Blue-Green (Zero Downtime)"
echo ""
# Verificar se está logado no OpenShift
if ! oc whoami > /dev/null 2>&1; then
echo "❌ Não está logado no OpenShift. Execute: oc login"
exit 1
fi
echo "✅ Logado no OpenShift como: $(oc whoami)"
echo ""
# Função para verificar se todos os pods estão prontos
check_pods_ready() {
local deployment=$1
local namespace=$2
local timeout=${3:-300}
echo "⏳ Aguardando pods do deployment $deployment ficarem prontos..."
oc rollout status deployment/$deployment -n $namespace --timeout=${timeout}s
}
# Função para verificar se a aplicação está respondendo
check_app_health() {
local service=$1
local namespace=$2
local port=${3:-8080}
echo "🔍 Verificando saúde da aplicação..."
# Tentar port-forward temporário para testar
local temp_pid
oc port-forward service/$service $port:$port -n $namespace > /dev/null 2>&1 &
temp_pid=$!
# Aguardar port-forward inicializar
sleep 3
# Testar health check
local health_status
health_status=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:$port/api/v1/health 2>/dev/null || echo "000")
# Parar port-forward temporário
kill $temp_pid 2>/dev/null || true
if [ "$health_status" = "200" ]; then
echo "✅ Aplicação saudável (HTTP $health_status)"
return 0
else
echo "❌ Aplicação não saudável (HTTP $health_status)"
return 1
fi
}
# Aplicar manifests básicos
echo "📋 Aplicando manifests básicos..."
oc apply -f k8s/namespace.yaml
oc apply -f k8s/rbac.yaml
oc apply -f k8s/configmap.yaml
# Verificar se o deployment existe
if oc get deployment $IMAGE_NAME -n $NAMESPACE > /dev/null 2>&1; then
echo "🔄 Deployment existente encontrado. Iniciando atualização zero-downtime..."
# Obter número atual de réplicas
CURRENT_REPLICAS=$(oc get deployment $IMAGE_NAME -n $NAMESPACE -o jsonpath='{.spec.replicas}')
echo "📊 Réplicas atuais: $CURRENT_REPLICAS"
# Atualizar imagem do deployment
echo "🔄 Atualizando imagem para: $FULL_IMAGE"
oc set image deployment/$IMAGE_NAME $IMAGE_NAME=$FULL_IMAGE -n $NAMESPACE
# Aguardar rollout com timeout maior
echo "⏳ Aguardando rollout (pode levar alguns minutos)..."
if check_pods_ready $IMAGE_NAME $NAMESPACE 600; then
echo "✅ Rollout concluído com sucesso!"
# Verificar saúde da aplicação
if check_app_health "${IMAGE_NAME}-service" $NAMESPACE; then
echo "🎉 Deploy zero-downtime concluído com sucesso!"
else
echo "⚠️ Deploy concluído, mas aplicação pode não estar saudável"
echo "🔍 Verifique os logs: oc logs -f deployment/$IMAGE_NAME -n $NAMESPACE"
fi
else
echo "❌ Rollout falhou ou timeout"
echo "🔍 Verificando status dos pods:"
oc get pods -n $NAMESPACE -l app.kubernetes.io/name=$IMAGE_NAME
exit 1
fi
else
echo "🆕 Deployment não existe. Criando novo deployment..."
oc apply -f k8s/deployment.yaml
oc apply -f k8s/service.yaml
oc apply -f k8s/route.yaml
# Aguardar pods ficarem prontos
if check_pods_ready $IMAGE_NAME $NAMESPACE 300; then
echo "✅ Novo deployment criado com sucesso!"
else
echo "❌ Falha ao criar deployment"
exit 1
fi
fi
# Verificar status final
echo ""
echo "📊 STATUS FINAL:"
echo "================"
oc get deployment $IMAGE_NAME -n $NAMESPACE
echo ""
oc get pods -n $NAMESPACE -l app.kubernetes.io/name=$IMAGE_NAME
echo ""
# Obter URL da rota
ROUTE_URL=$(oc get route $IMAGE_NAME-route -n $NAMESPACE -o jsonpath='{.spec.host}' 2>/dev/null || echo "")
if [ -n "$ROUTE_URL" ]; then
echo "🌐 URLs de acesso:"
echo " OpenShift: https://$ROUTE_URL"
echo " Port-forward: http://localhost:8080 (se ativo)"
echo ""
echo "💡 Para iniciar port-forward: oc port-forward service/${IMAGE_NAME}-service 8080:8080 -n $NAMESPACE"
fi
echo ""
echo "✅ Deploy zero-downtime concluído!"
echo "🔄 Estratégia: Rolling Update com maxUnavailable=0 (zero downtime)"

View File

@@ -18,7 +18,7 @@ data:
# Configurações de filtro de namespaces # Configurações de filtro de namespaces
INCLUDE_SYSTEM_NAMESPACES: "false" INCLUDE_SYSTEM_NAMESPACES: "false"
SYSTEM_NAMESPACE_PREFIXES: '["kube-", "openshift-", "default", "kube-system", "kube-public", "kube-node-lease"]' SYSTEM_NAMESPACE_PREFIXES: '["kube-", "openshift-", "knative-", "default", "kube-system", "kube-public", "kube-node-lease"]'
# URL do Prometheus # URL do Prometheus
PROMETHEUS_URL: "http://prometheus.openshift-monitoring.svc.cluster.local:9090" PROMETHEUS_URL: "http://prometheus.openshift-monitoring.svc.cluster.local:9090"

View File

@@ -11,8 +11,8 @@ spec:
strategy: strategy:
type: RollingUpdate type: RollingUpdate
rollingUpdate: rollingUpdate:
maxUnavailable: 1 maxUnavailable: 0 # Nunca derruba pods até o novo estar pronto
maxSurge: 1 maxSurge: 1 # Permite 1 pod extra durante o rollout
selector: selector:
matchLabels: matchLabels:
app.kubernetes.io/name: resource-governance app.kubernetes.io/name: resource-governance
@@ -50,10 +50,11 @@ spec:
httpGet: httpGet:
path: /api/v1/health path: /api/v1/health
port: 8080 port: 8080
initialDelaySeconds: 5 initialDelaySeconds: 15 # Aguarda mais tempo para inicializar
periodSeconds: 5 periodSeconds: 5
timeoutSeconds: 3 timeoutSeconds: 3
failureThreshold: 3 failureThreshold: 5 # Mais tentativas antes de falhar
successThreshold: 2 # Precisa de 2 sucessos consecutivos
securityContext: securityContext:
allowPrivilegeEscalation: false allowPrivilegeEscalation: false
capabilities: capabilities: