diff --git a/.github/workflows/cloudberry-backup-ci.yml b/.github/workflows/cloudberry-backup-ci.yml new file mode 100644 index 00000000..b4502e5d --- /dev/null +++ b/.github/workflows/cloudberry-backup-ci.yml @@ -0,0 +1,502 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# GitHub Actions Workflow: Cloudberry-backup CI +# -------------------------------------------------------------------- +# Description: +# +# Builds Apache Cloudberry from source, packages the installation, +# and runs Cloudberry-backup tests against a demo Cloudberry cluster. +# +# Workflow Overview: +# 1. Build Cloudberry from source and upload the installation as an artifact. +# 2. For each test target (unit, integration, end_to_end), restore the +# Cloudberry installation, create a demo cluster, and run tests. +# +# Notes: +# - Each test job runs in an isolated environment and creates its own demo cluster. +# - Artifacts are used to avoid rebuilding Cloudberry for every test target. +# -------------------------------------------------------------------- + +name: cloudberry-backup-ci + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + types: [ opened, synchronize, reopened, edited, ready_for_review ] + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: false + +permissions: + contents: read + +env: + CLOUDBERRY_REPO: apache/cloudberry + CLOUDBERRY_REF: main + CLOUDBERRY_DIR: cloudberry + CLOUDBERRY_BACKUP_DIR: cloudberry-backup + +jobs: + build_cloudberry: + name: Build Cloudberry From Source + runs-on: ubuntu-22.04 + timeout-minutes: 180 + container: + image: apache/incubator-cloudberry:cbdb-build-rocky9-latest + options: >- + --user root + -h cdw + -v /usr/share:/host_usr_share + -v /usr/local:/host_usr_local + -v /opt:/host_opt + + steps: + - name: Free Disk Space + run: | + set -euo pipefail + echo "=== Disk space before cleanup ===" + df -h / + + rm -rf /host_opt/hostedtoolcache || true + rm -rf /host_usr_local/lib/android || true + rm -rf /host_usr_share/dotnet || true + rm -rf /host_opt/ghc || true + rm -rf /host_usr_local/.ghcup || true + rm -rf /host_usr_share/swift || true + rm -rf /host_usr_local/share/powershell || true + rm -rf /host_usr_local/share/chromium || true + rm -rf /host_usr_share/miniconda || true + rm -rf /host_opt/az || true + rm -rf /host_usr_share/sbt || true + + echo "=== Disk space after cleanup ===" + df -h / + + - name: Checkout Cloudberry Source + uses: actions/checkout@v4 + with: + repository: ${{ env.CLOUDBERRY_REPO }} + ref: ${{ env.CLOUDBERRY_REF }} + fetch-depth: 1 + submodules: true + path: ${{ env.CLOUDBERRY_DIR }} + + - name: Cloudberry Environment Initialization + env: + SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }} + run: | + set -euo pipefail + if ! su - gpadmin -c "/tmp/init_system.sh"; then + echo "::error::Container initialization failed" + exit 1 + fi + + mkdir -p "${SRC_DIR}/build-logs/details" + chown -R gpadmin:gpadmin . + chmod -R 755 . + chmod 777 "${SRC_DIR}/build-logs" + df -h / + rm -rf /__t/* + df -h / + + - name: Configure Cloudberry + env: + SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }} + run: | + set -euo pipefail + chmod +x "${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh" + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then + echo "::error::Configure script failed" + exit 1 + fi + + - name: Build Cloudberry + env: + SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }} + run: | + set -euo pipefail + chmod +x "${SRC_DIR}/devops/build/automation/cloudberry/scripts/build-cloudberry.sh" + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/build-cloudberry.sh"; then + echo "::error::Build script failed" + exit 1 + fi + + - name: Package Cloudberry Source + env: + SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }} + run: | + set -euo pipefail + tar -C "${GITHUB_WORKSPACE}" -czf cloudberry-src.tgz "${CLOUDBERRY_DIR}" + + - name: Package Cloudberry Installation + run: | + set -euo pipefail + tar -C /usr/local -czf cloudberry-db.tgz cloudberry-db + + - name: Upload Cloudberry Installation + uses: actions/upload-artifact@v4 + with: + name: cloudberry-db-install + path: cloudberry-db.tgz + if-no-files-found: error + retention-days: 7 + + - name: Upload Cloudberry Source + uses: actions/upload-artifact@v4 + with: + name: cloudberry-source + path: cloudberry-src.tgz + if-no-files-found: error + retention-days: 7 + + test_cloudberry_backup: + name: Cloudberry-backup Tests (${{ matrix.test_target }}) + needs: [build_cloudberry] + runs-on: ubuntu-22.04 + timeout-minutes: 180 + container: + image: apache/incubator-cloudberry:cbdb-build-rocky9-latest + options: >- + --user root + -h cdw + -v /usr/share:/host_usr_share + -v /usr/local:/host_usr_local + -v /opt:/host_opt + + strategy: + fail-fast: false + matrix: + test_target: [unit, integration, end_to_end, s3_plugin_e2e, regression, scale] + + steps: + - name: Free Disk Space + run: | + set -euo pipefail + echo "=== Disk space before cleanup ===" + df -h / + + rm -rf /host_opt/hostedtoolcache || true + rm -rf /host_usr_local/lib/android || true + rm -rf /host_usr_share/dotnet || true + rm -rf /host_opt/ghc || true + rm -rf /host_usr_local/.ghcup || true + rm -rf /host_usr_share/swift || true + rm -rf /host_usr_local/share/powershell || true + rm -rf /host_usr_local/share/chromium || true + rm -rf /host_usr_share/miniconda || true + rm -rf /host_opt/az || true + rm -rf /host_usr_share/sbt || true + + echo "=== Disk space after cleanup ===" + df -h / + + - name: Checkout Cloudberry-backup + uses: actions/checkout@v4 + with: + fetch-depth: 0 + path: ${{ env.CLOUDBERRY_BACKUP_DIR }} + + - name: Download Cloudberry Installation + uses: actions/download-artifact@v4 + with: + name: cloudberry-db-install + + - name: Download Cloudberry Source + uses: actions/download-artifact@v4 + with: + name: cloudberry-source + + - name: Restore Cloudberry Installation + run: | + set -euo pipefail + tar -C /usr/local -xzf cloudberry-db.tgz + + - name: Restore Cloudberry Source + run: | + set -euo pipefail + tar -C "${GITHUB_WORKSPACE}" -xzf cloudberry-src.tgz + + - name: Cloudberry Environment Initialization + env: + SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }} + run: | + set -euo pipefail + if ! su - gpadmin -c "/tmp/init_system.sh"; then + echo "::error::Container initialization failed" + exit 1 + fi + + mkdir -p "${SRC_DIR}/build-logs/details" + chown -R gpadmin:gpadmin . + chmod -R 755 . + chmod 777 "${SRC_DIR}/build-logs" + df -h / + rm -rf /__t/* + df -h / + + - name: Setup Locale for Integration Tests + run: | + # Install German locale and recompile de_DE with UTF-8 encoding BEFORE + # the cluster starts. PostgreSQL memory-maps the locale archive at + # startup, so localedef must run before any PG process is launched. + dnf install -y glibc-langpack-de + localedef -i de_DE -f UTF-8 de_DE + + - name: Create Cloudberry Demo Cluster + env: + SRC_DIR: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }} + run: | + set -euo pipefail + chmod +x "${SRC_DIR}/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh" + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh"; then + echo "::error::Demo cluster creation failed" + exit 1 + fi + + - name: Cloudberry-backup Tests + env: + CLOUDBERRY_BACKUP_SRC: ${{ github.workspace }}/${{ env.CLOUDBERRY_BACKUP_DIR }} + CLOUDBERRY_SRC: ${{ github.workspace }}/${{ env.CLOUDBERRY_DIR }} + TEST_TARGET: ${{ matrix.test_target }} + run: | + set -euo pipefail + TEST_LOG_ROOT="${GITHUB_WORKSPACE}/test-logs/${TEST_TARGET}" + mkdir -p "${TEST_LOG_ROOT}" + chown -R gpadmin:gpadmin "${TEST_LOG_ROOT}" + + cat <<'SCRIPT' > /tmp/run_cloudberry_backup_tests.sh + #!/bin/bash + set -euo pipefail + + export GOPATH=/home/gpadmin/go + export PATH=/usr/local/go/bin:${GOPATH}/bin:${PATH} + + source /usr/local/cloudberry-db/cloudberry-env.sh + source ${CLOUDBERRY_SRC}/gpAux/gpdemo/gpdemo-env.sh + + pushd ${CLOUDBERRY_BACKUP_SRC} + make depend 2>&1 | tee "${TEST_LOG_ROOT}/cloudberry-backup-depend.log" + make build 2>&1 | tee "${TEST_LOG_ROOT}/cloudberry-backup-build.log" + make install 2>&1 | tee "${TEST_LOG_ROOT}/cloudberry-backup-install.log" + + dummy_dir=$(find ${CLOUDBERRY_SRC} -name dummy_seclabel -type d | head -n 1 || true) + if [ -n "${dummy_dir}" ]; then + pushd "${dummy_dir}" + make install + popd + gpconfig -c shared_preload_libraries -v dummy_seclabel + gpstop -ra + gpconfig -s shared_preload_libraries | grep dummy_seclabel + else + echo "dummy_seclabel not found, skipping preload setup" + fi + + psql postgres -c 'DROP TABLESPACE IF EXISTS test_tablespace' + + set +e + case "${TEST_TARGET}" in + unit) + make unit 2>&1 | tee "${TEST_LOG_ROOT}/cloudberry-backup-unit.log" + ;; + integration) + make integration 2>&1 | tee "${TEST_LOG_ROOT}/cloudberry-backup-integration.log" + ;; + end_to_end) + make end_to_end 2>&1 | tee "${TEST_LOG_ROOT}/cloudberry-backup-end_to_end.log" + ;; + s3_plugin_e2e) + curl -fsSL https://dl.min.io/server/minio/release/linux-amd64/minio -o /tmp/minio + chmod +x /tmp/minio + mkdir -p /tmp/minio-data + /tmp/minio server --address ":9000" /tmp/minio-data > "${TEST_LOG_ROOT}/minio.log" 2>&1 & + for i in {1..30}; do + if curl -fsS http://127.0.0.1:9000/minio/health/live >/dev/null; then + break + fi + sleep 1 + done + + curl -fsSL https://dl.min.io/client/mc/release/linux-amd64/mc -o /tmp/mc + chmod +x /tmp/mc + /tmp/mc alias set local http://127.0.0.1:9000 minioadmin minioadmin + /tmp/mc mb --ignore-existing local/cloudberry-backup-s3-test + + ${CLOUDBERRY_BACKUP_SRC}/plugins/generate_minio_config.sh + ${CLOUDBERRY_BACKUP_SRC}/plugins/plugin_test.sh "${GPHOME}/bin/gpbackup_s3_plugin" /tmp/minio_config.yaml 2>&1 | tee "${TEST_LOG_ROOT}/cloudberry-backup-s3-plugin-commands.log" + + # Start test + test_db=plugin_test_db_ci + backup_log="${TEST_LOG_ROOT}/cloudberry-backup-s3-plugin-gpbackup.log" + restore_log="${TEST_LOG_ROOT}/cloudberry-backup-s3-plugin-gprestore.log" + psql -X -d postgres -qc "DROP DATABASE IF EXISTS ${test_db}" 2>/dev/null || true + createdb "${test_db}" + psql -X -d "${test_db}" -qc "CREATE TABLE test1(i int) DISTRIBUTED RANDOMLY; INSERT INTO test1 SELECT generate_series(1,1000)" + + # Store minio PID for cleanup + minio_pid=$! + + gpbackup --dbname "${test_db}" --metadata-only --plugin-config /tmp/minio_config.yaml > "${backup_log}" 2>&1 + timestamp=$(grep -E "Backup Timestamp[[:space:]]*=" "${backup_log}" | grep -Eo "[[:digit:]]{14}" | head -n 1) + if [ -z "${timestamp}" ]; then + latest_gpbackup_log=$(ls -1t "${HOME}/gpAdminLogs"/gpbackup_*.log 2>/dev/null | head -n 1 || true) + if [ -n "${latest_gpbackup_log}" ]; then + timestamp=$(grep -E "Backup Timestamp[[:space:]]*=" "${latest_gpbackup_log}" | grep -Eo "[[:digit:]]{14}" | head -n 1) + fi + fi + if [ -z "${timestamp}" ]; then + echo "Could not parse backup timestamp from gpbackup logs" + echo "----- ${backup_log} -----" + cat "${backup_log}" || true + latest_gpbackup_log=$(ls -1t "${HOME}/gpAdminLogs"/gpbackup_*.log 2>/dev/null | head -n 1 || true) + if [ -n "${latest_gpbackup_log}" ]; then + echo "----- ${latest_gpbackup_log} -----" + cat "${latest_gpbackup_log}" || true + fi + exit 1 + fi + + dropdb "${test_db}" + gprestore --timestamp "${timestamp}" --plugin-config /tmp/minio_config.yaml --create-db > "${restore_log}" 2>&1 + + result=$(psql -X -d "${test_db}" -tc "SELECT count(*) FROM pg_class WHERE relname='test1'" | xargs) + if [ "${result}" != "1" ]; then + echo "Expected table test1 to exist after restore, got count=${result}" + exit 1 + fi + + # Cleanup + kill ${minio_pid} || true + wait ${minio_pid} 2>/dev/null || true + rm -rf /tmp/minio-data || true + exit 0 + ;; + regression) + pushd ${CLOUDBERRY_SRC}/src/test/regress + ./pg_regress --dbname=regression --host=localhost --port=7000 --init-file=init_file --schedule=./minimal_schedule || true + cat regression.diffs 2>/dev/null || true + popd + + psql -d postgres -c 'DROP TABLESPACE IF EXISTS test_tablespace' + + pg_dump regression -f /tmp/regression_schema_before.sql --schema-only + + backup_dir=/tmp/regression_backup + rm -rf "${backup_dir}" + mkdir -p "${backup_dir}" + + # Run gpbackup and capture output to extract timestamp + backup_log="${TEST_LOG_ROOT}/gpbackup_output.log" + gpbackup --dbname regression --backup-dir "${backup_dir}" --metadata-only 2>&1 | tee "${backup_log}" + + # Extract timestamp from backup command output (most reliable) + timestamp=$(grep -E "Backup Timestamp[[:space:]]*=" "${backup_log}" | grep -Eo "[[:digit:]]{14}" | head -1) + + # Fallback: Check gpAdminLogs if not found in direct output + if [ -z "${timestamp}" ]; then + latest_gpbackup_log=$(ls -1t "${HOME}/gpAdminLogs"/gpbackup_*.log 2>/dev/null | head -1 || true) + if [ -n "${latest_gpbackup_log}" ]; then + timestamp=$(grep -E "Backup Timestamp[[:space:]]*=" "${latest_gpbackup_log}" | grep -Eo "[[:digit:]]{14}" | head -1) + fi + fi + + # Check if timestamp is empty + if [ -z "${timestamp}" ]; then + echo "ERROR: Could not parse backup timestamp from gpbackup logs" + echo "=== Final backup directory structure ===" + find "${backup_dir}" -type f | sort + exit 1 + fi + + echo "backup timestamp: ${timestamp}" + + psql -d postgres -c 'DROP DATABASE IF EXISTS regression' + + set +e + gprestore --create-db --timestamp ${timestamp} --backup-dir "${backup_dir}" --with-globals --on-error-continue + set -e + + pg_dump regression -f /tmp/regression_schema_after.sql --schema-only + + set +e + diff -u /tmp/regression_schema_before.sql /tmp/regression_schema_after.sql > /tmp/regression_schema.diff + diff_status=$? + set -e + + cp -a /tmp/regression_schema_before.sql "${TEST_LOG_ROOT}/regression_schema_before.sql" || true + cp -a /tmp/regression_schema_after.sql "${TEST_LOG_ROOT}/regression_schema_after.sql" || true + cp -a /tmp/regression_schema.diff "${TEST_LOG_ROOT}/regression_schema.diff" || true + ;; + scale) + export BACKUP_DIR=/tmp/scale_backup + export LOG_DIR="${TEST_LOG_ROOT}/scale" + mkdir -p "${LOG_DIR}" + chmod +x "${CLOUDBERRY_BACKUP_SRC}/.github/workflows/scale-tests-cloudberry-ci.bash" + "${CLOUDBERRY_BACKUP_SRC}/.github/workflows/scale-tests-cloudberry-ci.bash" 2>&1 | tee "${TEST_LOG_ROOT}/cloudberry-backup-scale.log" + ;; + *) + echo "unknown test target: ${TEST_TARGET}" + exit 2 + ;; + esac + test_status=${PIPESTATUS[0]} + set -e + popd + + if [ -n "${MASTER_DATA_DIRECTORY:-}" ] && [ -d "${MASTER_DATA_DIRECTORY}/log" ]; then + cp -a "${MASTER_DATA_DIRECTORY}/log" "${TEST_LOG_ROOT}/gpdb-log" || true + fi + if [ -d "${CLOUDBERRY_SRC}/build-logs" ]; then + cp -a "${CLOUDBERRY_SRC}/build-logs" "${TEST_LOG_ROOT}/cloudberry-build-logs" || true + fi + + exit ${test_status} + SCRIPT + + chmod +x /tmp/run_cloudberry_backup_tests.sh + set +e + su - gpadmin -c "TEST_LOG_ROOT=${TEST_LOG_ROOT} CLOUDBERRY_BACKUP_SRC=${CLOUDBERRY_BACKUP_SRC} CLOUDBERRY_SRC=${CLOUDBERRY_SRC} TEST_TARGET=${TEST_TARGET} /tmp/run_cloudberry_backup_tests.sh" + status=$? + set -e + + { + echo "## Cloudberry-backup Test Summary" + echo "- Target: ${TEST_TARGET}" + if [ ${status} -eq 0 ]; then + echo "- Result: PASS" + else + echo "- Result: FAIL" + fi + echo "- Logs: ${TEST_LOG_ROOT}" + } >> "$GITHUB_STEP_SUMMARY" + + exit ${status} + + - name: Upload Test Logs (On Failure) + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cloudberry-backup-logs-${{ matrix.test_target }} + path: test-logs/${{ matrix.test_target }} + if-no-files-found: warn + retention-days: 7 diff --git a/.github/workflows/scale-tests-cloudberry-ci.bash b/.github/workflows/scale-tests-cloudberry-ci.bash new file mode 100644 index 00000000..75ed2f7c --- /dev/null +++ b/.github/workflows/scale-tests-cloudberry-ci.bash @@ -0,0 +1,175 @@ +#!/bin/bash + +# ------------------------------------------------------------------------------ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ------------------------------------------------------------------------------ +# Non-perf scale tests for GitHub Actions Cloudberry demo cluster. +# This focuses on backup/restore correctness under moderate object/data scale. +# ------------------------------------------------------------------------------ + +set -euo pipefail + +BACKUP_DIR="${BACKUP_DIR:-/tmp/scale_backup}" +LOG_DIR="${LOG_DIR:-/tmp/scale-test-logs}" + +mkdir -p "${BACKUP_DIR}" "${LOG_DIR}" + +extract_timestamp() { + local log_file="$1" + local ts + ts="$(grep -E "Backup Timestamp[[:space:]]*=" "${log_file}" | grep -Eo "[[:digit:]]{14}" | head -n 1 || true)" + if [ -z "${ts}" ]; then + local latest_gpbackup_log + latest_gpbackup_log="$(ls -1t "${HOME}/gpAdminLogs"/gpbackup_*.log 2>/dev/null | head -n 1 || true)" + if [ -n "${latest_gpbackup_log}" ]; then + ts="$(grep -E "Backup Timestamp[[:space:]]*=" "${latest_gpbackup_log}" | grep -Eo "[[:digit:]]{14}" | head -n 1 || true)" + fi + fi + if [ -z "${ts}" ]; then + echo "Could not parse backup timestamp from ${log_file}" + return 1 + fi + echo "${ts}" +} + +validate_datascaledb_restore() { + local restore_db="$1" + local src_tables + local dst_tables + local src_big + local dst_big + + src_tables="$(psql -X -d datascaledb -Atc "SELECT count(*) FROM pg_class c JOIN pg_namespace n ON n.oid=c.relnamespace WHERE n.nspname='public' AND c.relkind='r'")" + dst_tables="$(psql -X -d "${restore_db}" -Atc "SELECT count(*) FROM pg_class c JOIN pg_namespace n ON n.oid=c.relnamespace WHERE n.nspname='public' AND c.relkind='r'")" + src_big="$(psql -X -d datascaledb -Atc "SELECT count(*) FROM tbl_big")" + dst_big="$(psql -X -d "${restore_db}" -Atc "SELECT count(*) FROM tbl_big")" + + if [ "${src_tables}" != "${dst_tables}" ] || [ "${src_big}" != "${dst_big}" ]; then + echo "Data scale restore validation failed for ${restore_db}" + echo "source tables=${src_tables}, restored tables=${dst_tables}" + echo "source tbl_big=${src_big}, restored tbl_big=${dst_big}" + return 1 + fi +} + +echo "## Preparing copy queue scale database ##" +psql -X -d postgres -qc "DROP DATABASE IF EXISTS copyqueuedb" +createdb copyqueuedb +for j in $(seq 1 300); do + psql -X -d copyqueuedb -q -c "CREATE TABLE tbl_1k_${j}(i int) DISTRIBUTED BY (i);" + psql -X -d copyqueuedb -q -c "INSERT INTO tbl_1k_${j} SELECT generate_series(1,1000)" +done + +echo "## Copy queue backup/restore matrix ##" +for q in 2 4 8; do + b_log="${LOG_DIR}/copyqueue_backup_q${q}.log" + echo "Running gpbackup copy queue size ${q}" + gpbackup --dbname copyqueuedb --backup-dir "${BACKUP_DIR}" --single-data-file --no-compression --copy-queue-size "${q}" \ + 2>&1 | tee "${b_log}" + timestamp="$(extract_timestamp "${b_log}")" + restore_db="copyqueue_restore_q${q}" + psql -X -d postgres -qc "DROP DATABASE IF EXISTS ${restore_db}" + gprestore --timestamp "${timestamp}" --backup-dir "${BACKUP_DIR}" --create-db --redirect-db "${restore_db}" --copy-queue-size "${q}" \ + 2>&1 | tee "${LOG_DIR}/copyqueue_restore_q${q}.log" + src_tbl_count="$(psql -X -d copyqueuedb -Atc "SELECT count(*) FROM pg_class c JOIN pg_namespace n ON n.oid=c.relnamespace WHERE n.nspname='public' AND c.relkind='r'")" + dst_tbl_count="$(psql -X -d "${restore_db}" -Atc "SELECT count(*) FROM pg_class c JOIN pg_namespace n ON n.oid=c.relnamespace WHERE n.nspname='public' AND c.relkind='r'")" + if [ "${src_tbl_count}" != "${dst_tbl_count}" ]; then + echo "Copy queue restore validation failed for ${restore_db}: source tables=${src_tbl_count}, restored tables=${dst_tbl_count}" + exit 1 + fi +done + +echo "## Preparing data scale database ##" +psql -X -d postgres -qc "DROP DATABASE IF EXISTS datascaledb" +createdb datascaledb +for j in $(seq 1 200); do + psql -X -d datascaledb -q -c "CREATE TABLE tbl_1k_${j}(i int) DISTRIBUTED BY (i);" + psql -X -d datascaledb -q -c "INSERT INTO tbl_1k_${j} SELECT generate_series(1,1000)" +done + +psql -X -d datascaledb -q -c "CREATE TABLE tbl_big(i int) DISTRIBUTED BY (i);" +for j in $(seq 1 25); do + psql -X -d datascaledb -q -c "INSERT INTO tbl_big SELECT generate_series(1,100000)" +done + +psql -X -d datascaledb -q -c "CREATE TABLE big_partition(a int, b int, c int) DISTRIBUTED BY (a) PARTITION BY RANGE (b) (START (1) END (101) EVERY (1))" +psql -X -d datascaledb -q -c "INSERT INTO big_partition SELECT i, i, i FROM generate_series(1,100) i" +for j in $(seq 1 8); do + psql -X -d datascaledb -q -c "INSERT INTO big_partition SELECT * FROM big_partition" +done + +echo "## Running data scale backup/restore matrix ##" +run_data_scale_case() { + local case_name="$1" + local backup_flags="$2" + local restore_db="$3" + local jobs="$4" + local b_log="${LOG_DIR}/datascale_${case_name}_backup.log" + local r_log="${LOG_DIR}/datascale_${case_name}_restore.log" + + gpbackup --dbname datascaledb --backup-dir "${BACKUP_DIR}" ${backup_flags} 2>&1 | tee "${b_log}" + local ts + ts="$(extract_timestamp "${b_log}")" + psql -X -d postgres -qc "DROP DATABASE IF EXISTS ${restore_db}" + gprestore --timestamp "${ts}" --backup-dir "${BACKUP_DIR}" --create-db --redirect-db "${restore_db}" --jobs "${jobs}" \ + 2>&1 | tee "${r_log}" + validate_datascaledb_restore "${restore_db}" +} + +run_data_scale_case "multi_data_file" "--leaf-partition-data" "datascale_restore_multi" "4" +run_data_scale_case "multi_data_file_zstd" "--leaf-partition-data --compression-type zstd" "datascale_restore_multi_zstd" "4" +run_data_scale_case "single_data_file" "--leaf-partition-data --single-data-file" "datascale_restore_single" "1" +run_data_scale_case "single_data_file_zstd" "--leaf-partition-data --single-data-file --compression-type zstd" "datascale_restore_single_zstd" "1" + +echo "## Preparing metadata scale database ##" +psql -X -d postgres -qc "DROP DATABASE IF EXISTS metadatascaledb" +createdb metadatascaledb + +psql -X -d metadatascaledb <<'SQL' +DO $$ +DECLARE + i int; +BEGIN + FOR i IN 1..80 LOOP + EXECUTE format('CREATE SCHEMA IF NOT EXISTS s_%s', i); + EXECUTE format('CREATE TABLE s_%s.t_%s(id int, val text) DISTRIBUTED BY (id)', i, i); + EXECUTE format('CREATE VIEW s_%s.v_%s AS SELECT * FROM s_%s.t_%s', i, i, i, i); + END LOOP; +END$$; +SQL + +echo "## Running metadata-only backup/restore ##" +meta_backup_log="${LOG_DIR}/metadata_backup.log" +meta_restore_log="${LOG_DIR}/metadata_restore.log" +gpbackup --dbname metadatascaledb --backup-dir "${BACKUP_DIR}" --metadata-only --verbose 2>&1 | tee "${meta_backup_log}" +meta_ts="$(extract_timestamp "${meta_backup_log}")" +psql -X -d postgres -qc "DROP DATABASE IF EXISTS metadatascaledb_res" +gprestore --timestamp "${meta_ts}" --backup-dir "${BACKUP_DIR}" --redirect-db metadatascaledb_res --jobs 4 --create-db \ + 2>&1 | tee "${meta_restore_log}" + +echo "## Minimal correctness checks ##" +src_schema_count="$(psql -X -d metadatascaledb -Atc "SELECT count(*) FROM pg_namespace WHERE nspname LIKE 's_%'")" +dst_schema_count="$(psql -X -d metadatascaledb_res -Atc "SELECT count(*) FROM pg_namespace WHERE nspname LIKE 's_%'")" +if [ "${src_schema_count}" != "${dst_schema_count}" ]; then + echo "Metadata restore schema count mismatch: src=${src_schema_count} dst=${dst_schema_count}" + exit 1 +fi + +echo "Scale tests completed successfully" diff --git a/gpbackup_s3_plugin.go b/gpbackup_s3_plugin.go index b699e715..c511dc1b 100644 --- a/gpbackup_s3_plugin.go +++ b/gpbackup_s3_plugin.go @@ -19,7 +19,7 @@ func main() { Name: "version", Usage: "print version of gpbackup_s3_plugin", } - app.Version = s3plugin.Version + app.Version = s3plugin.GetVersion() app.Usage = "" app.UsageText = "Not supported as a standalone utility. " + "This plugin must be used in conjunction with gpbackup and gprestore." diff --git a/plugins/generate_minio_config.sh b/plugins/generate_minio_config.sh index b776e9ab..33b89f4e 100755 --- a/plugins/generate_minio_config.sh +++ b/plugins/generate_minio_config.sh @@ -6,7 +6,7 @@ options: endpoint: http://localhost:9000/ aws_access_key_id: minioadmin aws_secret_access_key: minioadmin - bucket: gpbackup-s3-test + bucket: cloudberry-backup-s3-test folder: test/backup backup_max_concurrent_requests: 2 backup_multipart_chunksize: 5MB diff --git a/plugins/plugin_test.sh b/plugins/plugin_test.sh index c793bcf0..9c1bf46c 100755 --- a/plugins/plugin_test.sh +++ b/plugins/plugin_test.sh @@ -40,8 +40,8 @@ testdatalarge="$testdir/testdatalarge_$time_second.txt" logdir="/tmp/test_bench_logs" text="this is some text" -data=`LC_ALL=C tr -dc 'A-Za-z0-9' /dev/null | head -c 1000 ; echo` +data_large=`LC_ALL=C tr -dc 'A-Za-z0-9' /dev/null | head -c 1000000 ; echo` mkdir -p $testdir mkdir -p $logdir echo $text > $testfile diff --git a/plugins/s3plugin/s3plugin.go b/plugins/s3plugin/s3plugin.go index 9d9f42f7..92f0e080 100644 --- a/plugins/s3plugin/s3plugin.go +++ b/plugins/s3plugin/s3plugin.go @@ -28,7 +28,15 @@ import ( "gopkg.in/yaml.v2" ) -var Version string +// version is set at build time via ldflags: +// +// go build -ldflags "-X github.com/apache/cloudberry-backup/plugins/s3plugin.version=..." +var version string + +// GetVersion returns the build version string injected via ldflags. +func GetVersion() string { + return version +} const apiVersion = "0.5.0" const Mebibyte = 1024 * 1024 diff --git a/utils/agent_remote.go b/utils/agent_remote.go index 39a019ab..aab33f78 100644 --- a/utils/agent_remote.go +++ b/utils/agent_remote.go @@ -155,6 +155,7 @@ func StartGpbackupHelpers(c *cluster.Cluster, fpInfo filepath.FilePathInfo, oper defer helperMutex.Unlock() gphomePath := operating.System.Getenv("GPHOME") + envSourceCommand := SourceClusterEnvCommand(gphomePath) pluginStr := "" if pluginConfigFile != "" { _, configFilename := path.Split(pluginConfigFile) @@ -188,12 +189,12 @@ func StartGpbackupHelpers(c *cluster.Cluster, fpInfo filepath.FilePathInfo, oper // we run these commands in sequence to ensure that any failure is critical; the last command ensures the agent process was successfully started return fmt.Sprintf(`cat << HEREDOC > %[1]s && chmod +x %[1]s && ( nohup %[1]s &> /dev/null &) #!/bin/bash -source %[2]s/greenplum_path.sh -%[2]s/bin/%s +%[3]s +%[2]s/bin/%[4]s HEREDOC -`, scriptFile, gphomePath, helperCmdStr) +`, scriptFile, gphomePath, envSourceCommand, helperCmdStr) }) c.CheckClusterError(remoteOutput, "Error starting gpbackup_helper agent", func(contentID int) string { return "Error starting gpbackup_helper agent" diff --git a/utils/plugin.go b/utils/plugin.go index d6f3259a..212ee7db 100644 --- a/utils/plugin.go +++ b/utils/plugin.go @@ -103,8 +103,8 @@ func (plugin *PluginConfig) CheckPluginExistsOnAllHosts(c *cluster.Cluster) stri } func (plugin *PluginConfig) checkPluginAPIVersion(c *cluster.Cluster) { - command := fmt.Sprintf("source %s/greenplum_path.sh && %s plugin_api_version", - operating.System.Getenv("GPHOME"), plugin.ExecutablePath) + command := fmt.Sprintf("%s && %s plugin_api_version", + SourceClusterEnvCommand(operating.System.Getenv("GPHOME")), plugin.ExecutablePath) remoteOutput := c.GenerateAndExecuteCommand( "Checking plugin api version on all hosts", cluster.ON_HOSTS&cluster.INCLUDE_COORDINATOR, @@ -159,8 +159,8 @@ func (plugin *PluginConfig) checkPluginAPIVersion(c *cluster.Cluster) { } func (plugin *PluginConfig) getPluginNativeVersion(c *cluster.Cluster) string { - command := fmt.Sprintf("source %s/greenplum_path.sh && %s --version", - operating.System.Getenv("GPHOME"), plugin.ExecutablePath) + command := fmt.Sprintf("%s && %s --version", + SourceClusterEnvCommand(operating.System.Getenv("GPHOME")), plugin.ExecutablePath) remoteOutput := c.GenerateAndExecuteCommand( "Checking plugin version on all hosts", cluster.ON_HOSTS|cluster.INCLUDE_COORDINATOR, @@ -284,8 +284,8 @@ func (plugin *PluginConfig) buildHookString(command string, } backupDir := fpInfo.GetDirForContent(contentID) - return fmt.Sprintf("source %s/greenplum_path.sh && %s %s %s %s %s %s", - operating.System.Getenv("GPHOME"), plugin.ExecutablePath, command, + return fmt.Sprintf("%s && %s %s %s %s %s %s", + SourceClusterEnvCommand(operating.System.Getenv("GPHOME")), plugin.ExecutablePath, command, plugin.ConfigPath, backupDir, scope, contentIDStr) } @@ -426,8 +426,8 @@ func (plugin *PluginConfig) BackupSegmentTOCs(c *cluster.Cluster, fpInfo filepat remoteOutput = c.GenerateAndExecuteCommand("Processing segment TOC files with plugin", cluster.ON_SEGMENTS, func(contentID int) string { tocFile := fpInfo.GetSegmentTOCFilePath(contentID) - return fmt.Sprintf("source %s/greenplum_path.sh && %s backup_file %s %s && "+ - "chmod 0755 %s", operating.System.Getenv("GPHOME"), plugin.ExecutablePath, plugin.ConfigPath, tocFile, tocFile) + return fmt.Sprintf("%s && %s backup_file %s %s && "+ + "chmod 0755 %s", SourceClusterEnvCommand(operating.System.Getenv("GPHOME")), plugin.ExecutablePath, plugin.ConfigPath, tocFile, tocFile) }) c.CheckClusterError(remoteOutput, "Unable to process segment TOC files using plugin", func(contentID int) string { return "See gpAdminLog for gpbackup_helper on segment host for details: Error occurred with plugin" @@ -445,8 +445,8 @@ func (plugin *PluginConfig) RestoreSegmentTOCs(c *cluster.Cluster, fpInfo filepa tocFile := fpInfo.GetSegmentTOCFilePath(contentID) // Restore the filename with the origin content to the directory with the destination content tocFile = strings.ReplaceAll(tocFile, fmt.Sprintf("gpbackup_%d", contentID), fmt.Sprintf("gpbackup_%d", origContent)) - command = fmt.Sprintf("mkdir -p %s && source %s/greenplum_path.sh && %s restore_file %s %s", - fpInfo.GetDirForContent(contentID), operating.System.Getenv("GPHOME"), + command = fmt.Sprintf("mkdir -p %s && %s && %s restore_file %s %s", + fpInfo.GetDirForContent(contentID), SourceClusterEnvCommand(operating.System.Getenv("GPHOME")), plugin.ExecutablePath, plugin.ConfigPath, tocFile) return command }) diff --git a/utils/util.go b/utils/util.go index f13b1c35..c2450c53 100644 --- a/utils/util.go +++ b/utils/util.go @@ -35,6 +35,25 @@ func CommandExists(cmd string) bool { return err == nil } +func ClusterEnvScriptPath(gphome string) string { + greenplumPath := path.Join(gphome, "greenplum_path.sh") + if FileExists(greenplumPath) { + return greenplumPath + } + + cloudberryPath := path.Join(gphome, "cloudberry-env.sh") + if FileExists(cloudberryPath) { + return cloudberryPath + } + + // Preserve previous behavior as fallback for clearer error messages upstream. + return greenplumPath +} + +func SourceClusterEnvCommand(gphome string) string { + return fmt.Sprintf("source %s", ClusterEnvScriptPath(gphome)) +} + func FileExists(filename string) bool { _, err := os.Stat(filename) return err == nil