diff --git a/.github/workflows/linguist.yml b/.github/workflows/linguist.yml new file mode 100644 index 00000000..1c6d5f76 --- /dev/null +++ b/.github/workflows/linguist.yml @@ -0,0 +1,126 @@ +# GitHub Actions Workflow: Linguist +# This workflow file defines automation tasks for the Linguist tool +--- +name: Linguist update + +on: + schedule: + - cron: '0 0 * * 1' # every monday at midnight + workflow_dispatch: # yamllint disable-line rule:truthy - false positive + +permissions: + contents: read + pull-requests: read + +jobs: + update-sha: + name: Update Linguist SHA + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 + + - name: Fetch latest Linguist SHA + id: fetch-sha + run: | + SHA=$(git ls-remote https://github.com/github/linguist.git HEAD | awk '{print $1}') + [[ -z "$SHA" ]] && { echo "Empty output"; exit 1; } + [[ ! "$SHA" =~ ^[0-9a-f]{40}$ ]] && { echo "Invalid SHA format"; exit 1; } + + echo "sha=$SHA" >> "$GITHUB_OUTPUT" + + - name: Check existing PR + id: check-pr + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + SHA_EXISTS=$(gh pr list \ + --state open \ + --search "in:title \"chore: update github-linguist SHA to ${{ steps.fetch-sha.outputs.sha }}\"" \ + --json number \ + --jq 'length' \ + --repo "${{ github.repository }}") + + if [[ "$SHA_EXISTS" != "0" ]]; then + echo "skip=true" >> "$GITHUB_OUTPUT" + echo "Skipping PR creation - existing PR with same SHA found" + else + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + + - name: Update pinned SHA in internal/generate/knownfiles/main.go + id: update-sha + if: steps.check-pr.outputs.skip != 'true' + run: | + FILEPATH=internal/generate/knownfiles/main.go + SHA_REGEX='[0-9a-f]{40}' + FIELD_REGEX='linguistSHA\s*=\s*"' + + if ! grep -qE 'linguistSHA\s*=\s*"[0-9a-f]{40}"' "$FILEPATH"; then + echo "Missing linguistSHA line" + exit 1 + fi + + EXISTING_SHA=$(grep -oE "${FIELD_REGEX}${SHA_REGEX}" "$FILEPATH" | grep -oE "$SHA_REGEX") + + if [[ "$EXISTING_SHA" == "${{ steps.fetch-sha.outputs.sha }}" ]]; then + echo "SHA already up to date, skipping" + exit 0 + fi + + sed -E -i "s/(${FIELD_REGEX})${SHA_REGEX}(\")/\1${{ steps.fetch-sha.outputs.sha }}\2/" "$FILEPATH" + + if git diff --quiet; then + echo "sha_updated=false" >> "$GITHUB_OUTPUT" + else + echo "sha_updated=true" >> "$GITHUB_OUTPUT" + fi + + - name: Generate known_files_gen.go + if: steps.update-sha.outputs.sha_updated == 'true' && steps.check-pr.outputs.skip != 'true' + run: | + go generate ./pkg/filetype/file_type.go + git diff pkg/filetype/known_files_gen.go + + - name: Commit changes + if: steps.update-sha.outputs.sha_updated == 'true' && steps.check-pr.outputs.skip != 'true' + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git add internal/generate/knownfiles/main.go pkg/filetype/known_files_gen.go + git commit -m "chore: update github-linguist SHA" + + - name: Create branch and push + id: create-branch + if: steps.update-sha.outputs.sha_updated == 'true' && steps.check-pr.outputs.skip != 'true' + run: | + BRANCH="autopr/${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" + git checkout -b "$BRANCH" + git push --set-upstream origin "$BRANCH" + echo "branch=$BRANCH" >> "$GITHUB_OUTPUT" + + - name: Create pull request + if: steps.update-sha.outputs.sha_updated == 'true' && steps.check-pr.outputs.skip != 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh pr create \ + --title "chore: update github-linguist SHA to ${{ steps.fetch-sha.outputs.sha }}" \ + --body "## Automated SHA bump + + Bumps the pinned [github/linguist](https://github.com/github/linguist) SHA in the file [internal/generate/knownfiles/main.go](internal/generate/knownfiles/main.go) to the latest version. + Also regenerates the [pkg/filetype/known_files_gen.go](pkg/filetype/known_files_gen.go) file with the updated SHA and includes it in the PR. + + > This PR was automatically created by workflow run [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})" \ + --base ${{ github.event.repository.default_branch }} \ + --head ${{ steps.create-branch.outputs.branch }} + + - name: Cleanup branch on failure + if: failure() && steps.create-branch.outputs.branch != '' + run: | + git push origin --delete "${{ steps.create-branch.outputs.branch }}" || true diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d1e9336..24f7ee9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - JSON and JSONC treated as a family for `--file-types` and `--exclude-file-types` - `go generate` step in CI pipeline to keep Linguist data fresh - CI lint check to ensure generated files are committed up to date +- Automated Linguist SHA updates via scheduled GitHub Actions workflow (`linguist.yml`) that checks SHA weekly ### Fixed diff --git a/internal/generate/knownfiles/main.go b/internal/generate/knownfiles/main.go index dc5cf359..e06db7dd 100644 --- a/internal/generate/knownfiles/main.go +++ b/internal/generate/knownfiles/main.go @@ -25,7 +25,8 @@ import ( ) const ( - linguistURL = "https://raw.githubusercontent.com/github-linguist/linguist/refs/heads/main/lib/linguist/languages.yml" + linguistSHA = "e535c9adf5306132e9df0b75ffe1ce2679873fe8" // DevSkim: ignore DS173237 - this is a commit SHA, not a secret + linguistURL = "https://raw.githubusercontent.com/github-linguist/linguist/" + linguistSHA + "/lib/linguist/languages.yml" outputFile = "known_files_gen.go" ) diff --git a/pkg/filetype/known_files_gen.go b/pkg/filetype/known_files_gen.go index 786d86a1..398bf0f2 100644 --- a/pkg/filetype/known_files_gen.go +++ b/pkg/filetype/known_files_gen.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Source: https://raw.githubusercontent.com/github-linguist/linguist/refs/heads/main/lib/linguist/languages.yml +// Source: https://raw.githubusercontent.com/github-linguist/linguist/e535c9adf5306132e9df0b75ffe1ce2679873fe8/lib/linguist/languages.yml package filetype