diff --git a/.gitignore b/.gitignore index d3db86ce..c90ffc76 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ sources.list.bak # Generated configs config/traefik/acme.json config/traefik/dynamic/*.generated.yml +.aider* diff --git a/config/cn-mirrors.yml b/config/cn-mirrors.yml new file mode 100644 index 00000000..2e37cf5d --- /dev/null +++ b/config/cn-mirrors.yml @@ -0,0 +1,26 @@ +# ============================================================================= +# CN Mirror Mapping — 国内镜像源映射表 +# Used by scripts/localize-images.sh to replace images for CN network. +# +# Format: original_image: cn_mirror_image +# Only gcr.io / ghcr.io images need replacement; Docker Hub images use +# the daemon-level mirror configured by scripts/setup-cn-mirrors.sh. +# ============================================================================= +mirrors: + # --- Monitoring / Observability --- + gcr.io/cadvisor/cadvisor: m.daocloud.io/gcr.io/cadvisor/cadvisor + + # --- SSO / Identity --- + ghcr.io/goauthentik/server: m.daocloud.io/ghcr.io/goauthentik/server + + # --- Home Automation --- + ghcr.io/home-assistant/home-assistant: m.daocloud.io/ghcr.io/home-assistant/home-assistant + + # --- AI Stack --- + ghcr.io/open-webui/open-webui: m.daocloud.io/ghcr.io/open-webui/open-webui + + # --- Registry prefixes (catch-all for future images) --- + # gcr.io: m.daocloud.io/gcr.io + # ghcr.io: m.daocloud.io/ghcr.io + # quay.io: m.daocloud.io/quay.io + # registry.k8s.io: m.daocloud.io/registry.k8s.io diff --git a/install.sh b/install.sh old mode 100644 new mode 100755 index e911d519..e72628f9 --- a/install.sh +++ b/install.sh @@ -1,6 +1,10 @@ #!/usr/bin/env bash # ============================================================================= -# HomeLab Stack — Installer +# HomeLab Stack — Robust Installer +# Handles Docker installation, system checks, CN network adaptation, and +# base infrastructure launch. +# +# Usage: sudo ./install.sh # ============================================================================= set -euo pipefail IFS=$'\n\t' @@ -13,13 +17,60 @@ log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } log_step() { echo -e "\n${BLUE}${BOLD}==> $*${NC}"; } +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd)" +LOG_DIR="$HOME/.homelab" +LOG_FILE="$LOG_DIR/install.log" + +mkdir -p "$LOG_DIR" +exec > >(tee -a "$LOG_FILE") 2>&1 + cleanup() { - if [[ $? -ne 0 ]]; then - log_error "Installation failed. Check logs at ~/.homelab/install.log" + local exit_code=$? + if [[ $exit_code -ne 0 ]]; then + log_error "Installation failed (exit code: $exit_code). Check logs at $LOG_FILE" fi } trap cleanup EXIT +# --------------------------------------------------------------------------- +# Network request wrapper with exponential backoff +# --------------------------------------------------------------------------- +curl_retry() { + local max_attempts=3 + local delay=5 + local i + for i in $(seq 1 "$max_attempts"); do + if curl --connect-timeout 10 --max-time 60 "$@"; then + return 0 + fi + if [[ $i -lt $max_attempts ]]; then + log_warn "Attempt $i failed, retrying in ${delay}s..." + sleep "$delay" + delay=$((delay * 2)) + fi + done + log_error "All $max_attempts attempts failed for: curl $*" + return 1 +} + +# --------------------------------------------------------------------------- +# Detect OS distribution +# --------------------------------------------------------------------------- +detect_os() { + if [[ -f /etc/os-release ]]; then + # shellcheck source=/dev/null + . /etc/os-release + OS_ID="${ID:-unknown}" + OS_ID_LIKE="${ID_LIKE:-}" + elif [[ -f /etc/redhat-release ]]; then + OS_ID="centos" + OS_ID_LIKE="rhel" + else + OS_ID="unknown" + export OS_ID_LIKE="" + fi +} + # --------------------------------------------------------------------------- # Banner # --------------------------------------------------------------------------- @@ -33,57 +84,233 @@ echo -e "${BOLD} ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚ echo -e "${BOLD} S T A C K v1.0.0${NC}" echo -e "" -# --------------------------------------------------------------------------- -# Step 1: Check dependencies -# --------------------------------------------------------------------------- -log_step "Checking dependencies" -bash "$(dirname "$0")/scripts/check-deps.sh" +detect_os -# --------------------------------------------------------------------------- -# Step 2: CN network detection -# --------------------------------------------------------------------------- -log_step "Network environment detection" -bash "$(dirname "$0")/scripts/check-deps.sh" --network-check +# ============================================================================= +# Step 1: System Resource Checks +# ============================================================================= +log_step "Step 1/8: System resource checks" -# --------------------------------------------------------------------------- -# Step 3: Setup environment -# --------------------------------------------------------------------------- -log_step "Environment configuration" -if [[ ! -f .env ]]; then - bash "$(dirname "$0")/scripts/setup-env.sh" +# Disk space check +free_gb=$(df -BG / | awk 'NR==2 {gsub(/G/,"",$4); print $4}') +if [[ "$free_gb" -lt 5 ]]; then + log_error "Insufficient disk space: ${free_gb}GB free. Minimum 5GB required." + exit 1 +elif [[ "$free_gb" -lt 20 ]]; then + log_warn "Low disk space: ${free_gb}GB free. Recommended: >= 20GB." +else + log_info "Disk space: ${free_gb}GB free ✓" +fi + +# Memory check +if command -v free &>/dev/null; then + mem_mb=$(free -m | awk '/^Mem:/ {print $2}') + if [[ "$mem_mb" -lt 2048 ]]; then + log_warn "Low memory: ${mem_mb}MB. Recommended: >= 2048MB (2GB)." + else + log_info "Memory: ${mem_mb}MB ✓" + fi +fi + +# ============================================================================= +# Step 2: Docker Installation +# ============================================================================= +log_step "Step 2/8: Docker installation" + +if command -v docker &>/dev/null; then + docker_ver=$(docker version --format '{{.Server.Version}}' 2>/dev/null || echo '0.0.0') + log_info "Docker already installed: v${docker_ver}" +else + log_info "Docker not found. Installing..." + + case "$OS_ID" in + ubuntu|debian|linuxmint|pop) + apt-get update -qq + apt-get install -y -qq ca-certificates curl gnupg lsb-release + + install -m 0755 -d /etc/apt/keyrings + curl_retry -fsSL "https://download.docker.com/linux/${OS_ID}/gpg" -o /etc/apt/keyrings/docker.asc + chmod a+r /etc/apt/keyrings/docker.asc + + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/${OS_ID} $(lsb_release -cs) stable" > /etc/apt/sources.list.d/docker.list + + apt-get update -qq + apt-get install -y -qq docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + ;; + centos|rhel|rocky|almalinux|fedora) + if command -v dnf &>/dev/null; then + PKG_MGR="dnf" + else + PKG_MGR="yum" + fi + $PKG_MGR install -y -q yum-utils + yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo + $PKG_MGR install -y -q docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + ;; + arch|manjaro) + pacman -Sy --noconfirm docker docker-compose + ;; + *) + log_error "Unsupported OS: $OS_ID. Please install Docker manually." + log_info " https://docs.docker.com/get-docker/" + exit 1 + ;; + esac + + systemctl enable docker + systemctl start docker + log_info "Docker installed and started ✓" +fi + +# ============================================================================= +# Step 3: Docker Compose v2 check +# ============================================================================= +log_step "Step 3/8: Docker Compose check" + +if docker compose version &>/dev/null; then + compose_ver=$(docker compose version --short 2>/dev/null) + log_info "Docker Compose v2 found: $compose_ver ✓" +elif command -v docker-compose &>/dev/null; then + log_warn "Docker Compose v1 detected. Please upgrade to v2 (plugin)." + log_info " https://docs.docker.com/compose/migrate/" + log_info " On Debian/Ubuntu: apt install docker-compose-plugin" + exit 1 +else + log_error "Docker Compose not found. Install docker-compose-plugin." + exit 1 +fi + +# ============================================================================= +# Step 4: User & Permissions +# ============================================================================= +log_step "Step 4/8: User & permissions" + +if [[ $EUID -ne 0 ]]; then + # Non-root: check docker group membership + if groups | grep -q docker; then + log_info "User $(whoami) is in docker group ✓" + else + log_info "Adding $(whoami) to docker group..." + usermod -aG docker "$(whoami)" + log_warn "Added to docker group. Please log out and back in, then re-run this script." + exit 0 + fi +else + log_info "Running as root ✓" +fi + +# ============================================================================= +# Step 5: Port Conflict Detection +# ============================================================================= +log_step "Step 5/8: Port conflict detection" + +port_conflict=false +for port in 53 80 443 3000 8080 9090; do + if ss -tlnp 2>/dev/null | grep -q ":${port} "; then + proc=$(ss -tlnp 2>/dev/null | grep ":${port} " | head -1 | sed 's/.*users:(("//' | cut -d'"' -f1) + log_warn "Port $port is in use by: ${proc:-unknown}" + port_conflict=true + fi +done + +if [[ "$port_conflict" == "false" ]]; then + log_info "All required ports available ✓" +else + log_warn "Some ports are in use. Services may fail to start. Resolve conflicts above." +fi + +# ============================================================================= +# Step 6: Firewall Check +# ============================================================================= +log_step "Step 6/8: Firewall check" + +if command -v ufw &>/dev/null && ufw status 2>/dev/null | grep -q "Status: active"; then + log_info "UFW is active. Checking rules..." + for port in 80 443; do + if ufw status | grep -qE "^${port}.*ALLOW"; then + log_info " Port $port: allowed ✓" + else + log_warn " Port $port: not explicitly allowed. Run: ufw allow $port/tcp" + fi + done +elif command -v firewall-cmd &>/dev/null && firewall-cmd --state 2>/dev/null | grep -q "running"; then + log_info "Firewalld is active. Checking rules..." + for port in 80 443; do + if firewall-cmd --query-port="${port}/tcp" &>/dev/null; then + log_info " Port $port: allowed ✓" + else + log_warn " Port $port: not open. Run: firewall-cmd --permanent --add-port=${port}/tcp && firewall-cmd --reload" + fi + done +else + log_info "No active firewall detected (ufw/firewalld) ✓" +fi + +# ============================================================================= +# Step 7: Network Environment & CN Detection +# ============================================================================= +log_step "Step 7/8: Network environment detection" + +if [[ -x "$SCRIPT_DIR/scripts/check-connectivity.sh" ]]; then + bash "$SCRIPT_DIR/scripts/check-connectivity.sh" || true +fi + +# ============================================================================= +# Step 8: Environment Setup & Launch +# ============================================================================= +log_step "Step 8/8: Environment configuration & launch" + +# Create proxy network if needed +if ! docker network inspect proxy &>/dev/null; then + docker network create proxy + log_info "Created docker network 'proxy' ✓" +else + log_info "Docker network 'proxy' exists ✓" +fi + +# Setup .env +if [[ ! -f "$SCRIPT_DIR/.env" ]]; then + if [[ -x "$SCRIPT_DIR/scripts/setup-env.sh" ]]; then + bash "$SCRIPT_DIR/scripts/setup-env.sh" + else + log_warn ".env not found. Copy .env.example and configure it." + fi else log_warn ".env already exists, skipping setup. Remove it to reconfigure." fi -# --------------------------------------------------------------------------- -# Step 4: Create data directories -# --------------------------------------------------------------------------- -log_step "Creating data directories" +# Create data directories mkdir -p \ - data/traefik/certs \ - data/portainer \ - data/prometheus \ - data/grafana \ - data/loki \ - data/authentik/media \ - data/nextcloud \ - data/gitea \ - data/vaultwarden - -chmod 600 config/traefik/acme.json 2>/dev/null || touch config/traefik/acme.json && chmod 600 config/traefik/acme.json + "$SCRIPT_DIR/data/traefik/certs" \ + "$SCRIPT_DIR/data/portainer" \ + "$SCRIPT_DIR/data/prometheus" \ + "$SCRIPT_DIR/data/grafana" \ + "$SCRIPT_DIR/data/loki" \ + "$SCRIPT_DIR/data/authentik/media" \ + "$SCRIPT_DIR/data/nextcloud" \ + "$SCRIPT_DIR/data/gitea" \ + "$SCRIPT_DIR/data/vaultwarden" -# --------------------------------------------------------------------------- -# Step 5: Launch base infrastructure -# --------------------------------------------------------------------------- +# Setup acme.json +acme_path="$SCRIPT_DIR/config/traefik/acme.json" +if [[ ! -f "$acme_path" ]]; then + touch "$acme_path" +fi +chmod 600 "$acme_path" + +# Launch base infrastructure log_step "Launching base infrastructure" -docker compose -f docker-compose.base.yml up -d +cd "$SCRIPT_DIR/stacks/base" +docker compose up -d log_info "" log_info "${GREEN}${BOLD}✓ Base infrastructure is up!${NC}" log_info "" log_info "Next steps:" log_info " ./scripts/stack-manager.sh start sso # Set up SSO first (recommended)" -log_info " ./scripts/stack-manager.sh start monitoring # Launch monitoring" -log_info " ./scripts/stack-manager.sh list # See all available stacks" +log_info " ./scripts/stack-manager.sh start monitoring # Launch monitoring" +log_info " ./scripts/stack-manager.sh list # See all available stacks" log_info "" +log_info "CN users: run 'sudo ./scripts/setup-cn-mirrors.sh' for Docker acceleration." log_info "Documentation: docs/getting-started.md" +log_info "Logs saved to: $LOG_FILE" diff --git a/scripts/check-connectivity.sh b/scripts/check-connectivity.sh new file mode 100755 index 00000000..6112242b --- /dev/null +++ b/scripts/check-connectivity.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +# ============================================================================= +# Check Connectivity — 网络连通性检测 +# Tests reachability of all registries and external services used by HomeLab. +# +# Usage: ./scripts/check-connectivity.sh +# ============================================================================= +set -euo pipefail + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m' +BLUE='\033[0;34m'; BOLD='\033[1m'; NC='\033[0m' + +PASS=0; SLOW=0; FAIL=0 + +# --------------------------------------------------------------------------- +# Test a single endpoint +# --------------------------------------------------------------------------- +check_endpoint() { + local name="$1" + local host="$2" + local url="${3:-https://$host}" + + local start end elapsed_ms + start=$(date +%s%N) + + if curl -sf --connect-timeout 5 --max-time 10 -o /dev/null "$url" 2>/dev/null; then + end=$(date +%s%N) + elapsed_ms=$(( (end - start) / 1000000 )) + + if [[ $elapsed_ms -lt 500 ]]; then + echo -e " ${GREEN}[OK]${NC} $name ($host) — ${elapsed_ms}ms" + ((PASS++)) + else + echo -e " ${YELLOW}[SLOW]${NC} $name ($host) — ${elapsed_ms}ms ⚠️ suggest enabling mirror" + ((SLOW++)) + fi + else + echo -e " ${RED}[FAIL]${NC} $name ($host) — connection timeout ✗ needs CN mirror" + ((FAIL++)) + fi +} + +# --------------------------------------------------------------------------- +# DNS resolution check +# --------------------------------------------------------------------------- +check_dns() { + local host="$1" + if nslookup "$host" &>/dev/null || host "$host" &>/dev/null || dig +short "$host" &>/dev/null; then + echo -e " ${GREEN}[OK]${NC} DNS resolution ($host)" + ((PASS++)) + else + echo -e " ${RED}[FAIL]${NC} DNS resolution ($host) — DNS may be blocked or misconfigured" + ((FAIL++)) + fi +} + +# --------------------------------------------------------------------------- +# Outbound port check +# --------------------------------------------------------------------------- +check_port() { + local port="$1" + local host="${2:-hub.docker.com}" + + if timeout 5 bash -c "echo >/dev/tcp/$host/$port" 2>/dev/null; then + echo -e " ${GREEN}[OK]${NC} Outbound port $port ($host)" + ((PASS++)) + else + echo -e " ${RED}[FAIL]${NC} Outbound port $port ($host) — may be blocked by firewall" + ((FAIL++)) + fi +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- +echo "" +echo -e "${BLUE}${BOLD}=== HomeLab Stack — Network Connectivity Check ===${NC}" +echo "" + +echo -e "${BOLD}[1/4] Registry Reachability${NC}" +check_endpoint "Docker Hub" "hub.docker.com" "https://hub.docker.com" +check_endpoint "GitHub" "github.com" "https://github.com" +check_endpoint "gcr.io" "gcr.io" "https://gcr.io" +check_endpoint "ghcr.io" "ghcr.io" "https://ghcr.io" +check_endpoint "Quay.io" "quay.io" "https://quay.io" +echo "" + +echo -e "${BOLD}[2/4] CN Mirror Reachability${NC}" +check_endpoint "DaoCloud Mirror" "docker.m.daocloud.io" "https://docker.m.daocloud.io" +check_endpoint "Baidu Mirror" "mirror.baidubce.com" "https://mirror.baidubce.com" +check_endpoint "163 Mirror" "hub-mirror.c.163.com" "https://hub-mirror.c.163.com" +echo "" + +echo -e "${BOLD}[3/4] DNS Resolution${NC}" +check_dns "hub.docker.com" +check_dns "github.com" +echo "" + +echo -e "${BOLD}[4/4] Outbound Ports${NC}" +check_port 443 "hub.docker.com" +check_port 80 "hub.docker.com" +echo "" + +# --------------------------------------------------------------------------- +# Summary & Recommendations +# --------------------------------------------------------------------------- +echo -e "${BLUE}${BOLD}=== Summary ===${NC}" +echo -e " ${GREEN}OK: $PASS${NC} ${YELLOW}SLOW: $SLOW${NC} ${RED}FAIL: $FAIL${NC}" +echo "" + +if [[ $FAIL -gt 0 ]]; then + echo -e "${YELLOW}${BOLD}Recommendation:${NC} Detected $FAIL unreachable source(s)." + echo -e " Run: ${BOLD}sudo ./scripts/setup-cn-mirrors.sh${NC} to configure Docker mirror acceleration." + echo -e " Run: ${BOLD}./scripts/localize-images.sh --cn${NC} to replace gcr.io/ghcr.io images." +elif [[ $SLOW -gt 0 ]]; then + echo -e "${YELLOW}Recommendation:${NC} Some sources are slow. Consider running setup-cn-mirrors.sh for better performance." +else + echo -e "${GREEN}All endpoints reachable. No mirror configuration needed.${NC}" +fi diff --git a/scripts/diagnose.sh b/scripts/diagnose.sh new file mode 100755 index 00000000..4412ad3b --- /dev/null +++ b/scripts/diagnose.sh @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# ============================================================================= +# Diagnose — 一键诊断报告 +# Collects system info, container status, recent errors, and connectivity +# results into a report for issue submissions. +# +# Usage: +# ./scripts/diagnose.sh # Print to stdout +# ./scripts/diagnose.sh --file # Write to diagnose-report.txt +# ============================================================================= +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.."; pwd)" +REPORT_FILE="$PROJECT_DIR/diagnose-report.txt" +OUTPUT_FILE="" + +if [[ "${1:-}" == "--file" ]]; then + OUTPUT_FILE="$REPORT_FILE" +fi + +# --------------------------------------------------------------------------- +# Output helper — write to file or stdout +# --------------------------------------------------------------------------- +out() { + if [[ -n "$OUTPUT_FILE" ]]; then + echo "$@" >> "$OUTPUT_FILE" + else + echo "$@" + fi +} + +# Clear report file if writing to file +if [[ -n "$OUTPUT_FILE" ]]; then + true > "$OUTPUT_FILE" +fi + +out "==============================================" +out "HomeLab Stack — Diagnostic Report" +out "Generated: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" +out "==============================================" +out "" + +# --------------------------------------------------------------------------- +# Section 1: System Information +# --------------------------------------------------------------------------- +out "=== System Information ===" +out "Hostname: $(hostname 2>/dev/null || echo 'unknown')" +out "OS: $(cat /etc/os-release 2>/dev/null | grep PRETTY_NAME | cut -d'"' -f2 || uname -s)" +out "Kernel: $(uname -r)" +out "Architecture: $(uname -m)" +out "CPU cores: $(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 'unknown')" + +# Memory +if command -v free &>/dev/null; then + mem_total=$(free -h | awk '/^Mem:/ {print $2}') + mem_used=$(free -h | awk '/^Mem:/ {print $3}') + mem_avail=$(free -h | awk '/^Mem:/ {print $7}') + out "Memory: ${mem_used} used / ${mem_total} total (${mem_avail} available)" +else + out "Memory: (free command not available)" +fi + +# Disk +disk_info=$(df -h / | awk 'NR==2 {printf "%s used / %s total (%s available, %s used)", $3, $2, $4, $5}') +out "Disk (/): $disk_info" +out "" + +# --------------------------------------------------------------------------- +# Section 2: Docker Information +# --------------------------------------------------------------------------- +out "=== Docker Information ===" +if command -v docker &>/dev/null; then + out "Docker: $(docker --version 2>/dev/null || echo 'error getting version')" + out "Compose: $(docker compose version 2>/dev/null || echo 'not available')" + if docker info &>/dev/null; then + out "Daemon: running" + out "Storage: $(docker info --format '{{.Driver}}' 2>/dev/null || echo 'unknown')" + out "Images: $(docker images -q 2>/dev/null | wc -l) images" + out "Containers: $(docker ps -aq 2>/dev/null | wc -l) total, $(docker ps -q 2>/dev/null | wc -l) running" + else + out "Daemon: NOT RUNNING" + fi +else + out "Docker: NOT INSTALLED" +fi +out "" + +# --------------------------------------------------------------------------- +# Section 3: Container Status +# --------------------------------------------------------------------------- +out "=== Container Status ===" +if docker info &>/dev/null; then + docker ps -a --format "table {{.Names}}\t{{.Status}}\t{{.Image}}\t{{.Ports}}" 2>/dev/null | while IFS= read -r line; do + out "$line" + done +else + out "(Docker daemon not running)" +fi +out "" + +# --------------------------------------------------------------------------- +# Section 4: Recent Error Logs (last 30 min) +# --------------------------------------------------------------------------- +out "=== Recent Error Logs (containers with errors) ===" +if docker info &>/dev/null; then + containers=$(docker ps -a --format '{{.Names}}' 2>/dev/null) + found_errors=false + while IFS= read -r container; do + [[ -z "$container" ]] && continue + errors=$(docker logs --since 30m "$container" 2>&1 | grep -iE '(error|fatal|panic|exception|fail)' | tail -5) + if [[ -n "$errors" ]]; then + found_errors=true + out "--- $container ---" + out "$errors" + out "" + fi + done <<< "$containers" + if [[ "$found_errors" == "false" ]]; then + out "(No errors found in the last 30 minutes)" + fi +else + out "(Docker daemon not running)" +fi +out "" + +# --------------------------------------------------------------------------- +# Section 5: Network Connectivity +# --------------------------------------------------------------------------- +out "=== Network Connectivity ===" +for host in hub.docker.com github.com gcr.io ghcr.io; do + start=$(date +%s%N) + if curl -sf --connect-timeout 5 --max-time 10 -o /dev/null "https://$host" 2>/dev/null; then + end=$(date +%s%N) + ms=$(( (end - start) / 1000000 )) + out "[OK] $host — ${ms}ms" + else + out "[FAIL] $host — unreachable" + fi +done +out "" + +# --------------------------------------------------------------------------- +# Section 6: Port Usage +# --------------------------------------------------------------------------- +out "=== Port Usage (80, 443, 3000, 8080, 9090) ===" +for port in 80 443 3000 8080 9090; do + proc=$(ss -tlnp 2>/dev/null | grep ":${port} " | head -1 || true) + if [[ -n "$proc" ]]; then + out "Port $port: IN USE — $proc" + else + out "Port $port: available" + fi +done +out "" + +# --------------------------------------------------------------------------- +# Section 7: Config File Validation +# --------------------------------------------------------------------------- +out "=== Config File Validation ===" +config_files=( + "config/traefik/traefik.yml" + "config/prometheus/prometheus.yml" + "config/alertmanager/alertmanager.yml" + "config/loki/loki-config.yml" + "config/grafana/grafana.ini" +) +for cf in "${config_files[@]}"; do + full_path="$PROJECT_DIR/$cf" + if [[ -f "$full_path" ]]; then + out "[OK] $cf exists ($(wc -l < "$full_path") lines)" + else + out "[MISS] $cf not found" + fi +done +out "" + +# --------------------------------------------------------------------------- +# Section 8: Docker Networks +# --------------------------------------------------------------------------- +out "=== Docker Networks ===" +docker network ls --format "table {{.Name}}\t{{.Driver}}\t{{.Scope}}" 2>/dev/null | while IFS= read -r line; do + out "$line" +done +out "" + +# --------------------------------------------------------------------------- +# Footer +# --------------------------------------------------------------------------- +out "==============================================" +out "End of diagnostic report" +out "==============================================" + +if [[ -n "$OUTPUT_FILE" ]]; then + echo "Diagnostic report written to: $OUTPUT_FILE" + echo "Attach this file when submitting an issue." +fi diff --git a/scripts/localize-images.sh b/scripts/localize-images.sh new file mode 100755 index 00000000..91aaba9d --- /dev/null +++ b/scripts/localize-images.sh @@ -0,0 +1,182 @@ +#!/usr/bin/env bash +# ============================================================================= +# Localize Images — 替换 compose 文件中的 gcr.io/ghcr.io 为国内镜像 +# +# Usage: +# ./scripts/localize-images.sh --cn # Replace with CN mirrors +# ./scripts/localize-images.sh --restore # Restore original images +# ./scripts/localize-images.sh --dry-run # Preview changes without modifying +# ./scripts/localize-images.sh --check # Check if replacement is needed +# ============================================================================= +set -euo pipefail + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m' +BLUE='\033[0;34m'; NC='\033[0m' + +log_info() { echo -e "${GREEN}[INFO]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.."; pwd)" +MIRROR_MAP="$PROJECT_DIR/config/cn-mirrors.yml" +BACKUP_DIR="$PROJECT_DIR/.image-backup" + +# --------------------------------------------------------------------------- +# Parse mirror mapping from cn-mirrors.yml +# --------------------------------------------------------------------------- +declare -A MIRRORS + +load_mirrors() { + if [[ ! -f "$MIRROR_MAP" ]]; then + log_error "Mirror mapping not found: $MIRROR_MAP" + exit 1 + fi + + while IFS= read -r line; do + # Skip comments and empty lines + [[ "$line" =~ ^[[:space:]]*# ]] && continue + [[ "$line" =~ ^[[:space:]]*$ ]] && continue + [[ "$line" == "mirrors:" ]] && continue + + # Parse " original: mirror" format + if [[ "$line" =~ ^[[:space:]]+([^:]+):[[:space:]]+(.+)$ ]]; then + local original="${BASH_REMATCH[1]}" + local mirror="${BASH_REMATCH[2]}" + # Trim whitespace + original=$(echo "$original" | xargs) + mirror=$(echo "$mirror" | xargs) + MIRRORS["$original"]="$mirror" + fi + done < "$MIRROR_MAP" +} + +# --------------------------------------------------------------------------- +# Find all compose files +# --------------------------------------------------------------------------- +find_compose_files() { + find "$PROJECT_DIR/stacks" -name "docker-compose*.yml" -type f | sort +} + +# --------------------------------------------------------------------------- +# --cn: Replace images with CN mirrors +# --------------------------------------------------------------------------- +do_cn() { + local dry_run="${1:-false}" + local changed=0 + + load_mirrors + + if [[ "$dry_run" == "false" ]]; then + mkdir -p "$BACKUP_DIR" + fi + + while IFS= read -r file; do + local rel_path="${file#"$PROJECT_DIR"/}" + local file_changed=false + + for original in "${!MIRRORS[@]}"; do + local mirror="${MIRRORS[$original]}" + if grep -q "$original" "$file"; then + if [[ "$dry_run" == "true" ]]; then + echo -e " ${BLUE}$rel_path${NC}: $original → $mirror" + else + # Backup before first modification + if [[ "$file_changed" == "false" ]]; then + local backup_path="$BACKUP_DIR/$rel_path" + mkdir -p "$(dirname "$backup_path")" + cp "$file" "$backup_path" + fi + sed -i "s|${original}|${mirror}|g" "$file" + fi + file_changed=true + ((changed++)) + fi + done + done < <(find_compose_files) + + if [[ $changed -eq 0 ]]; then + log_info "No images to replace. All compose files are clean." + elif [[ "$dry_run" == "true" ]]; then + echo "" + log_info "Found $changed replacement(s). Run with --cn to apply." + else + log_info "Replaced $changed image reference(s). Backups saved to $BACKUP_DIR" + fi +} + +# --------------------------------------------------------------------------- +# --restore: Restore original images from backup +# --------------------------------------------------------------------------- +do_restore() { + if [[ ! -d "$BACKUP_DIR" ]]; then + log_warn "No backup found at $BACKUP_DIR. Nothing to restore." + exit 0 + fi + + local restored=0 + while IFS= read -r backup; do + local rel_path="${backup#"$BACKUP_DIR"/}" + local target="$PROJECT_DIR/$rel_path" + if [[ -f "$target" ]]; then + cp "$backup" "$target" + log_info "Restored: $rel_path" + ((restored++)) + fi + done < <(find "$BACKUP_DIR" -name "docker-compose*.yml" -type f) + + if [[ $restored -eq 0 ]]; then + log_warn "No files restored." + else + log_info "Restored $restored file(s). Removing backup directory." + rm -rf "$BACKUP_DIR" + fi +} + +# --------------------------------------------------------------------------- +# --check: Report which images need replacement +# --------------------------------------------------------------------------- +do_check() { + load_mirrors + local needs_replace=0 + + while IFS= read -r file; do + local rel_path="${file#"$PROJECT_DIR"/}" + for original in "${!MIRRORS[@]}"; do + if grep -q "$original" "$file"; then + echo -e " ${YELLOW}[NEEDS REPLACE]${NC} $rel_path: $original" + ((needs_replace++)) + fi + done + done < <(find_compose_files) + + if [[ $needs_replace -eq 0 ]]; then + log_info "All images are already localized (or no gcr.io/ghcr.io images found)." + else + echo "" + log_warn "Found $needs_replace image(s) that can be replaced. Run: $0 --cn" + fi +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- +usage() { + echo "Usage: $0 {--cn|--restore|--dry-run|--check}" + echo "" + echo " --cn Replace gcr.io/ghcr.io images with CN mirrors" + echo " --restore Restore original images from backup" + echo " --dry-run Preview replacements without modifying files" + echo " --check Check if any images need replacement" + exit 1 +} + +[[ $# -lt 1 ]] && usage + +case "$1" in + --cn) do_cn false ;; + --restore) do_restore ;; + --dry-run) do_cn true ;; + --check) do_check ;; + *) usage ;; +esac diff --git a/scripts/setup-cn-mirrors.sh b/scripts/setup-cn-mirrors.sh new file mode 100755 index 00000000..7c32aba5 --- /dev/null +++ b/scripts/setup-cn-mirrors.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# ============================================================================= +# Setup CN Mirrors — 配置 Docker 镜像加速 +# Configures /etc/docker/daemon.json with CN mirror registries. +# +# Usage: sudo ./scripts/setup-cn-mirrors.sh +# ============================================================================= +set -euo pipefail + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m' +BLUE='\033[0;34m'; BOLD='\033[1m'; NC='\033[0m' + +log_info() { echo -e "${GREEN}[INFO]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } +log_step() { echo -e "\n${BLUE}${BOLD}==> $*${NC}"; } + +DAEMON_JSON="/etc/docker/daemon.json" + +# Mirror sources — primary + fallbacks +MIRRORS=( + "https://docker.m.daocloud.io" + "https://mirror.baidubce.com" + "https://hub-mirror.c.163.com" + "https://mirror.gcr.io" +) + +# --------------------------------------------------------------------------- +# Step 1: Interactive confirmation +# --------------------------------------------------------------------------- +log_step "Docker Mirror Accelerator Setup" +echo "" +echo "This script configures Docker daemon with CN mirror registries." +echo "It will modify ${DAEMON_JSON}." +echo "" +read -rp "Are you deploying in mainland China (y/N)? " answer +if [[ ! "$answer" =~ ^[Yy]$ ]]; then + log_info "Skipping CN mirror setup. No changes made." + exit 0 +fi + +# --------------------------------------------------------------------------- +# Step 2: Check root/sudo +# --------------------------------------------------------------------------- +if [[ $EUID -ne 0 ]]; then + log_error "This script must be run as root (or with sudo)." + exit 1 +fi + +# --------------------------------------------------------------------------- +# Step 3: Build mirror JSON array +# --------------------------------------------------------------------------- +log_step "Testing mirror connectivity" + +available_mirrors=() +for mirror in "${MIRRORS[@]}"; do + host="${mirror#https://}" + if curl -sf --connect-timeout 5 --max-time 10 "$mirror" &>/dev/null; then + log_info "[OK] $host — reachable" + available_mirrors+=("$mirror") + else + log_warn "[SLOW] $host — not reachable, skipping" + fi +done + +if [[ ${#available_mirrors[@]} -eq 0 ]]; then + log_warn "No mirrors reachable. Using full list anyway (may work once DNS resolves)." + available_mirrors=("${MIRRORS[@]}") +fi + +# Build JSON array +mirror_json="[" +for i in "${!available_mirrors[@]}"; do + [[ $i -gt 0 ]] && mirror_json+="," + mirror_json+="\"${available_mirrors[$i]}\"" +done +mirror_json+="]" + +# --------------------------------------------------------------------------- +# Step 4: Write daemon.json (merge if exists) +# --------------------------------------------------------------------------- +log_step "Configuring Docker daemon" + +mkdir -p /etc/docker + +if [[ -f "$DAEMON_JSON" ]]; then + log_info "Existing ${DAEMON_JSON} found — creating backup" + cp "$DAEMON_JSON" "${DAEMON_JSON}.bak.$(date +%Y%m%d%H%M%S)" + + # Merge: add/replace registry-mirrors key + if command -v jq &>/dev/null; then + jq --argjson mirrors "$mirror_json" '."registry-mirrors" = $mirrors' "$DAEMON_JSON" > "${DAEMON_JSON}.tmp" + mv "${DAEMON_JSON}.tmp" "$DAEMON_JSON" + else + log_warn "jq not found — overwriting daemon.json (backup saved)" + cat > "$DAEMON_JSON" < "$DAEMON_JSON" </dev/null; then + log_info "${GREEN}${BOLD}✓ Mirror configuration verified — docker pull hello-world succeeded${NC}" + docker rmi hello-world &>/dev/null || true +else + log_error "docker pull hello-world failed. Check your network or mirror configuration." + exit 1 +fi + +echo "" +log_info "CN mirror setup complete. All docker pull commands will now use accelerated mirrors." diff --git a/scripts/wait-healthy.sh b/scripts/wait-healthy.sh new file mode 100755 index 00000000..5d60206d --- /dev/null +++ b/scripts/wait-healthy.sh @@ -0,0 +1,151 @@ +#!/usr/bin/env bash +# ============================================================================= +# Wait Healthy — 等待容器健康检查通过 +# Waits for all containers in a stack to report healthy status. +# +# Usage: +# ./scripts/wait-healthy.sh --stack monitoring --timeout 300 +# ./scripts/wait-healthy.sh --stack base +# ============================================================================= +set -euo pipefail + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m' +BLUE='\033[0;34m'; BOLD='\033[1m'; NC='\033[0m' + +log_info() { echo -e "${GREEN}[INFO]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } +log_error() { echo -e "${RED}[ERROR]${NC} $*" >&2; } + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.."; pwd)" + +STACK="" +TIMEOUT=300 +INTERVAL=5 + +# --------------------------------------------------------------------------- +# Parse arguments +# --------------------------------------------------------------------------- +usage() { + echo "Usage: $0 --stack [--timeout ]" + echo "" + echo " --stack Stack name (e.g., base, monitoring, sso)" + echo " --timeout Max wait time in seconds (default: 300)" + exit 1 +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --stack) STACK="$2"; shift 2 ;; + --timeout) TIMEOUT="$2"; shift 2 ;; + *) usage ;; + esac +done + +[[ -z "$STACK" ]] && usage + +STACK_DIR="$PROJECT_DIR/stacks/$STACK" +if [[ ! -d "$STACK_DIR" ]]; then + log_error "Stack directory not found: $STACK_DIR" + exit 2 +fi + +# --------------------------------------------------------------------------- +# Get containers for this stack +# --------------------------------------------------------------------------- +get_stack_containers() { + docker compose -f "$STACK_DIR/docker-compose.yml" ps --format json 2>/dev/null \ + | jq -r '.Name // .name // empty' 2>/dev/null \ + || docker compose -f "$STACK_DIR/docker-compose.yml" ps -q 2>/dev/null +} + +# --------------------------------------------------------------------------- +# Check health of a single container +# Returns: healthy, unhealthy, starting, none (no healthcheck), exited +# --------------------------------------------------------------------------- +container_health() { + local container="$1" + local state + state=$(docker inspect --format='{{.State.Status}}' "$container" 2>/dev/null || echo "missing") + + if [[ "$state" == "exited" || "$state" == "dead" || "$state" == "missing" ]]; then + echo "exited" + return + fi + + local health + health=$(docker inspect --format='{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}' "$container" 2>/dev/null || echo "unknown") + echo "$health" +} + +# --------------------------------------------------------------------------- +# Main wait loop +# --------------------------------------------------------------------------- +echo -e "${BLUE}${BOLD}Waiting for stack '$STACK' to become healthy (timeout: ${TIMEOUT}s)${NC}" + +elapsed=0 +while [[ $elapsed -lt $TIMEOUT ]]; do + containers=() + while IFS= read -r c; do + [[ -n "$c" ]] && containers+=("$c") + done < <(get_stack_containers) + + if [[ ${#containers[@]} -eq 0 ]]; then + log_warn "No containers found for stack '$STACK'. Are they running?" + sleep "$INTERVAL" + elapsed=$((elapsed + INTERVAL)) + continue + fi + + all_healthy=true + status_line="" + + for c in "${containers[@]}"; do + health=$(container_health "$c") + case "$health" in + healthy) status_line+=" ${GREEN}✓${NC}$c" ;; + none) status_line+=" ${GREEN}~${NC}$c" ;; # No healthcheck defined + starting) status_line+=" ${YELLOW}…${NC}$c"; all_healthy=false ;; + exited) status_line+=" ${RED}✗${NC}$c"; all_healthy=false ;; + *) status_line+=" ${YELLOW}?${NC}$c"; all_healthy=false ;; + esac + done + + echo -ne "\r [${elapsed}s]${status_line} " + + if [[ "$all_healthy" == "true" ]]; then + echo "" + echo "" + log_info "${GREEN}${BOLD}✓ All ${#containers[@]} container(s) in '$STACK' are healthy!${NC}" + exit 0 + fi + + sleep "$INTERVAL" + elapsed=$((elapsed + INTERVAL)) +done + +# --------------------------------------------------------------------------- +# Timeout — print diagnostics +# --------------------------------------------------------------------------- +echo "" +echo "" +log_error "Timeout after ${TIMEOUT}s. Not all containers are healthy." +echo "" + +echo -e "${RED}${BOLD}=== Unhealthy Containers ===${NC}" +containers=() +while IFS= read -r c; do + [[ -n "$c" ]] && containers+=("$c") +done < <(get_stack_containers) + +for c in "${containers[@]}"; do + health=$(container_health "$c") + if [[ "$health" != "healthy" && "$health" != "none" ]]; then + echo "" + echo -e "${RED}--- $c (status: $health) ---${NC}" + echo "Last 50 lines of logs:" + docker logs --tail 50 "$c" 2>&1 | sed 's/^/ /' + fi +done + +exit 1