Merge pull request #13 from TechnologyEnhancedLearning/staging-data-test-team-shared

Phil-NHS · web-flow · commit fd44032b2aa9 · 2025-12-23T17:43:26.000Z
Staging data test team shared
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,56 +1,68 @@
-# We want to run CI processes that can run independent of databricks as branch rules so that we dont # deploy at cost code that we already should know needs changing
-# such as linting, and unit test for python, maybe dab? verify
-# we run these on all pull request because if there is a hot fix it may not have passed through 
-# staging for example
-# qqqq check this is up to date
 name: CI - Pull Request Checks
 
-# Run CI on all pull requests
+# Run CI on all pull requests just incase of hot fixes
 on:
   pull_request:
     branches:
       - '**'  # all branches
+  workflow_dispatch:
 
 jobs:
-  ci_checks:
-    name: "Linting, Unit Tests, DAB Verify"
+  lint:
+    name: "Linting"
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Commit lint placeholder
+        # TODO: Implement commitlint with config from TELBlazor reference
+        run: |
+          echo "WARNING: Commit lint step is currently a placeholder."
+          echo "Reference project: TELBlazor"
+          # exit 0  <-- Change to 0 if you don't want to block the rest of the pipe yet
+  
+  pytest:
     runs-on: ubuntu-latest
 
     steps:
-      # Checkout code
-      - name: Checkout repository
+      - name: Check out repository
         uses: actions/checkout@v4
 
-      # Set up Python
-      - name: Setup Python
+      - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: "3.x"
-
-      # Install dependencies used for linting and unit tests
-      - name: Install dependencies
-        run: pip install -r requirements-dev.txt
-
-      # Run python unit tests
-      - name: Run Unit Tests
-        run: pytest tests/unit
-
-      # Run python lint
-      # qqqq on example used flake8 instead
-      # pyproject.toml will need configuring
-      - name: Run Linting
-        run: pylint src
-        
-      # qqqq to do run commit lint step and put in commit lint config
-      # see TELBlazor
-      - name: Commit lint
-      run: |
-          echo "Commit lint not implemented"
-          exit 1
-        
-      # qqqq to do run version generation step and put in commit lint config
-      # see TELBlazor
-      - name: Version Generation Test Run
-      run: |
-          echo "Version test run not implemented"
-          exit 1
+          python-version: "3.10"
+
+      - name: Upgrade pip
+        run: python -m pip install --upgrade pip
+
+      - name: Install project + test deps
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -e .
+      - name: Run pytest againt unit tests (exclude Databricks tests)
+        run: |
+          pytest tests/unit-tests -m "not databricks" -v
+          
+  dab-validate:
+    name: "Early warning of dab issues"
+    runs-on: ubuntu-latest
+    # Just dev we are not deploying so dont need access to more important service principle
+    # !!!! TODO !!!!! Should be dev but git needs the service principle putting into its environment 
+    environment: staging
+    env:
+      DATABRICKS_HOST: ${{ vars.DBX_HOST }}
+      DATABRICKS_CLIENT_ID: ${{ vars.DBX_SP_ID }}
+      DATABRICKS_CLIENT_SECRET: ${{ secrets.DBX_SP_SECRET }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+    
+      - name: Set up Databricks CLI
+        uses: databricks/setup-cli@main
+
+      - name: Validate Bundle
+        # Am i forced to have a target its after databrickcfg but shouldnt be
+        run: databricks bundle validate -t staging
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,34 @@
+name: Just a POC Manual Integration Tests - Staging
+
+on:
+  workflow_dispatch:
+
+jobs:
+  integration_tests_staging:
+    name: "Deploy & Test in Staging"
+    runs-on: ubuntu-latest
+    # Using the staging environment for Service Principal secrets
+    environment: staging
+    
+    env:
+      # Assume the DAB bundle is already initialized in the repo but this step would actually come after dab deployment unless it can be run against the file directory
+      # Using Staging Service Principal credentials from Git Secrets/Vars
+      # using staging here because i havent set up env vars in git for dev yet
+      DATABRICKS_HOST: ${{ vars.DBX_HOST }}
+      DATABRICKS_CLIENT_ID: ${{ vars.DBX_SP_ID }}
+      DATABRICKS_CLIENT_SECRET: ${{ secrets.DBX_SP_SECRET }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Databricks CLI
+        uses: databricks/setup-cli@main
+
+      # this step would come after dab deploy? or would run first dev dab?
+      # i dont want a fail really and the code to be sat there known to be bad so running locally via a hook could be good pre-push?
+      # The notebook will actually run both as set up atm but maybe its ok for now
+      - name: Run Integration Tests
+        run: |
+          echo "running the integration yml test job integration_test_job.yml"
+          databricks bundle run -t staging run_integration_tests
diff --git a/.github/workflows/manual-integration-test-poc.yml b/.github/workflows/manual-integration-test-poc.yml
@@ -0,0 +1,14 @@
+# This yml is so we can tell dbx to run integration tests
+name: Trying to trigger dbx integration tests in dbx environ
+
+on:
+  workflow_dispatch:
+    inputs:
+      deploy_target:
+        description: 'Which DAB target to deploy to?'
+        required: true
+        default: 'dev'
+        type: choice
+        options:
+          - dev
+          - staging
diff --git a/.github/workflows/manual-trigger-test-poc.yml b/.github/workflows/manual-trigger-test-poc.yml
@@ -24,6 +24,6 @@ jobs:
           pip install -r requirements-dev.txt
           pip install -e .
 
-      - name: Run pytest (exclude Databricks tests)
+      - name: Run pytest againt unit tests (exclude Databricks tests)
         run: |
-          pytest -m "not databricks" -v
+          pytest tests/unit-tests -m "not databricks" -v
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1 +1,2 @@
-pytest
+pytest
+pyspark
diff --git a/tests/unit-tests/transformations/test_date_transformations.py b/tests/unit-tests/transformations/test_date_transformations.py
@@ -25,13 +25,7 @@
 #     return session
 
 
-@pytest.fixture(scope="session")
-def spark():
-    """Provide Spark session for all tests"""
-    session = SparkSession.getActiveSession()
-    if session is None:
-        raise RuntimeError("No active Spark session found. Running in Databricks?")
-    return session
+
 
 
 @pytest.fixture(scope="function")
@@ -226,4 +220,4 @@ def test_working_days_values_are_reasonable(spark, sample_dataframe_multiple_mon
         assert 19 <= working_days <= 23, \
             f"Working days should be between 19-23, got {working_days} for {row['start_date']}"
     
-    print("✅ All working days values are reasonable")
+    print("✅ All working days values are reasonable")