From b0afb594ccf419f7c62f5bdd3cf5f38e7da15d7c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 3 Oct 2025 23:35:17 +0000 Subject: [PATCH 1/7] Initial plan From 0d3d36704d6e59ec8e09b115905e7ce138600097 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 3 Oct 2025 23:40:21 +0000 Subject: [PATCH 2/7] Add PL HTML linter with GitHub Actions workflow Co-authored-by: pconrad <1119017+pconrad@users.noreply.github.com> --- .github/workflows/lint-html.yml | 27 +++++++ .gitignore | 40 ++++++++++ README.md | 48 ++++++++++++ lint_html.py | 127 ++++++++++++++++++++++++++++++++ 4 files changed, 242 insertions(+) create mode 100644 .github/workflows/lint-html.yml create mode 100644 .gitignore create mode 100644 README.md create mode 100755 lint_html.py diff --git a/.github/workflows/lint-html.yml b/.github/workflows/lint-html.yml new file mode 100644 index 0000000..9a231d0 --- /dev/null +++ b/.github/workflows/lint-html.yml @@ -0,0 +1,27 @@ +name: Lint PL HTML Files + +on: + workflow_dispatch: # Allow manual triggering + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + lint-html: + name: Lint HTML Files + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Run HTML linter + run: python3 lint_html.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5cb6f07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Temporary files +/tmp/ +*.tmp +*.bak diff --git a/README.md b/README.md new file mode 100644 index 0000000..41de44d --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ +# PL HTML Linter + +This repository contains a linter for PL (PrairieLearn) HTML files. + +## Features + +The linter checks HTML files for: + +1. **XML Syntax Validation**: Ensures that HTML files have valid XML syntax, including: + - Properly formatted tags + - Properly nested elements + - Correct attribute syntax + +2. **Custom PL Rules**: Framework for adding PrairieLearn-specific validation rules (extensible) + +## Automated Linting + +The linter runs automatically via GitHub Actions on: +- Manual workflow dispatch +- Push to the `main` branch +- Pull requests to the `main` branch + +## Running Locally + +To run the linter locally: + +```bash +python3 lint_html.py +``` + +The script will: +1. Find all `.html` and `.HTML` files in the repository (excluding `.git` directory) +2. Validate each file for XML syntax +3. Apply any custom PL-specific rules +4. Report errors with line numbers and descriptions + +## Exit Codes + +- `0`: All files passed linting +- `1`: One or more files failed linting + +## Requirements + +- Python 3.x (uses standard library modules) + +## Extending the Linter + +To add custom validation rules, modify the `check_custom_rules()` function in `lint_html.py`. diff --git a/lint_html.py b/lint_html.py new file mode 100755 index 0000000..1ec595a --- /dev/null +++ b/lint_html.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +""" +Linter for PL HTML files. + +This script checks HTML files for: +1. General XML syntax (proper tags, nesting, attributes) +2. Custom PL-specific rules (extensible) +""" + +import sys +import os +import glob +from xml.etree import ElementTree as ET +from pathlib import Path + + +def find_html_files(root_dir="."): + """Find all HTML files in the repository.""" + html_files = [] + for pattern in ["**/*.html", "**/*.HTML"]: + html_files.extend(glob.glob(os.path.join(root_dir, pattern), recursive=True)) + + # Filter out .git directory + html_files = [f for f in html_files if ".git" not in f] + return sorted(html_files) + + +def check_xml_syntax(file_path): + """ + Check if the HTML file has valid XML syntax. + This includes: + - Properly formatted tags + - Properly nested elements + - Correct attribute syntax + """ + errors = [] + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Try to parse as XML + try: + ET.fromstring(content) + except ET.ParseError as e: + errors.append(f"XML syntax error: {str(e)}") + + except FileNotFoundError: + errors.append(f"File not found: {file_path}") + except Exception as e: + errors.append(f"Error reading file: {str(e)}") + + return errors + + +def check_custom_rules(file_path): + """ + Check custom PL-specific rules. + + This function can be extended with additional rules as needed. + """ + errors = [] + + # Placeholder for custom rules + # Add specific rule checks here as they are defined + + return errors + + +def lint_file(file_path): + """Lint a single HTML file.""" + all_errors = [] + + # Check XML syntax + syntax_errors = check_xml_syntax(file_path) + if syntax_errors: + all_errors.extend(syntax_errors) + + # Check custom rules + custom_errors = check_custom_rules(file_path) + if custom_errors: + all_errors.extend(custom_errors) + + return all_errors + + +def main(): + """Main entry point for the linter.""" + # Get the repository root directory + repo_root = os.getenv("GITHUB_WORKSPACE", ".") + + print(f"Scanning for HTML files in: {repo_root}") + html_files = find_html_files(repo_root) + + if not html_files: + print("No HTML files found.") + return 0 + + print(f"Found {len(html_files)} HTML file(s) to lint:") + for f in html_files: + print(f" - {f}") + print() + + has_errors = False + for file_path in html_files: + print(f"Linting: {file_path}") + errors = lint_file(file_path) + + if errors: + has_errors = True + print(f" โŒ FAILED with {len(errors)} error(s):") + for error in errors: + print(f" - {error}") + else: + print(f" โœ“ PASSED") + print() + + if has_errors: + print("โŒ Linting failed! Please fix the errors above.") + return 1 + else: + print("โœ“ All files passed linting!") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From d1904738e4471831400dd62e0688e23bc60c8e50 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 3 Oct 2025 23:41:40 +0000 Subject: [PATCH 3/7] Add example HTML file for testing Co-authored-by: pconrad <1119017+pconrad@users.noreply.github.com> --- example.html | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 example.html diff --git a/example.html b/example.html new file mode 100644 index 0000000..d4239bb --- /dev/null +++ b/example.html @@ -0,0 +1,12 @@ + + + Example PL HTML File + + +

PrairieLearn Question

+

This is an example HTML file that passes the linter.

+
+ 42 +
+ + From bb139c0ea2b3dd997966773874b34f3123e6f82d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 3 Oct 2025 23:42:37 +0000 Subject: [PATCH 4/7] Fix YAML syntax for workflow triggers Co-authored-by: pconrad <1119017+pconrad@users.noreply.github.com> --- .github/workflows/lint-html.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint-html.yml b/.github/workflows/lint-html.yml index 9a231d0..2490325 100644 --- a/.github/workflows/lint-html.yml +++ b/.github/workflows/lint-html.yml @@ -1,6 +1,6 @@ name: Lint PL HTML Files -on: +'on': workflow_dispatch: # Allow manual triggering push: branches: From bbfc9a9aeac2ffc0a8bbed6df1a220403767cf54 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 3 Oct 2025 23:44:17 +0000 Subject: [PATCH 5/7] Enhance README with custom rule examples Co-authored-by: pconrad <1119017+pconrad@users.noreply.github.com> --- README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.md b/README.md index 41de44d..b0fd692 100644 --- a/README.md +++ b/README.md @@ -46,3 +46,28 @@ The script will: ## Extending the Linter To add custom validation rules, modify the `check_custom_rules()` function in `lint_html.py`. + +### Example: Adding a Custom Rule + +To add a new rule (e.g., checking for specific attributes or element patterns), edit the `check_custom_rules()` function: + +```python +def check_custom_rules(file_path): + """Check custom PL-specific rules.""" + errors = [] + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Example: Check for specific pattern + # if 'pattern' not in content: + # errors.append("Missing required pattern") + + except Exception as e: + errors.append(f"Error checking custom rules: {str(e)}") + + return errors +``` + +The linter will automatically run your custom rules on all HTML files. From 93c608ef15d21766b32e3dfd0290b19e728fee2a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 4 Oct 2025 00:08:24 +0000 Subject: [PATCH 6/7] Add invalid HTML example and pl-multiple-choice nesting rule Co-authored-by: pconrad <1119017+pconrad@users.noreply.github.com> --- README.md | 14 +++++++++++++- example_invalid.html | 12 ++++++++++++ example_pl_invalid.html | 13 +++++++++++++ example_pl_valid.html | 4 ++++ lint_html.py | 40 ++++++++++++++++++++++++++++++++++++++-- 5 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 example_invalid.html create mode 100644 example_pl_invalid.html create mode 100644 example_pl_valid.html diff --git a/README.md b/README.md index b0fd692..ec3072b 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,10 @@ The linter checks HTML files for: - Properly nested elements - Correct attribute syntax -2. **Custom PL Rules**: Framework for adding PrairieLearn-specific validation rules (extensible) +2. **PrairieLearn-Specific Rules**: + - `` elements must be the root element of the document (not nested inside any other element) + +3. **Extensible Framework**: Additional custom validation rules can be easily added ## Automated Linting @@ -39,6 +42,15 @@ The script will: - `0`: All files passed linting - `1`: One or more files failed linting +## Example Files + +The repository includes example HTML files to demonstrate the linter's functionality: + +- `example.html` - A valid HTML file that passes all checks +- `example_invalid.html` - An invalid HTML file with mismatched tags (XML syntax error) +- `example_pl_valid.html` - A valid PrairieLearn file with `` as root element +- `example_pl_invalid.html` - An invalid PrairieLearn file with nested `` element + ## Requirements - Python 3.x (uses standard library modules) diff --git a/example_invalid.html b/example_invalid.html new file mode 100644 index 0000000..c9edc7e --- /dev/null +++ b/example_invalid.html @@ -0,0 +1,12 @@ + + + Invalid HTML Example + + +

This heading is not closed properly +

This paragraph follows an unclosed heading tag

+
+ Properly nested content +
+ + diff --git a/example_pl_invalid.html b/example_pl_invalid.html new file mode 100644 index 0000000..fa55c7e --- /dev/null +++ b/example_pl_invalid.html @@ -0,0 +1,13 @@ + + + Invalid PL Multiple Choice - Nested + + +
+ + Option A + Option B + +
+ + diff --git a/example_pl_valid.html b/example_pl_valid.html new file mode 100644 index 0000000..c92d0d8 --- /dev/null +++ b/example_pl_valid.html @@ -0,0 +1,4 @@ + + Option A + Option B + diff --git a/lint_html.py b/lint_html.py index 1ec595a..d3a6175 100755 --- a/lint_html.py +++ b/lint_html.py @@ -61,8 +61,44 @@ def check_custom_rules(file_path): """ errors = [] - # Placeholder for custom rules - # Add specific rule checks here as they are defined + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Try to parse the file as XML + try: + tree = ET.fromstring(content) + except ET.ParseError: + # If XML parsing fails, we can't check custom rules + # The XML syntax error will be caught by check_xml_syntax + return errors + + # Rule: must NOT be nested inside another element + # It must be the root element (have no parent) + def check_pl_multiple_choice_nesting(element, is_root=True): + """Recursively check if pl-multiple-choice is properly placed.""" + local_errors = [] + + if element.tag == 'pl-multiple-choice' and not is_root: + # pl-multiple-choice found but it's not the root element + local_errors.append( + f" element must not be nested inside other elements. " + f"It must be the root element of the document." + ) + + # Recursively check children (they are not root) + for child in element: + local_errors.extend(check_pl_multiple_choice_nesting(child, False)) + + return local_errors + + # Check the rule starting from the root + errors.extend(check_pl_multiple_choice_nesting(tree, True)) + + except FileNotFoundError: + errors.append(f"File not found: {file_path}") + except Exception as e: + errors.append(f"Error checking custom rules: {str(e)}") return errors From 03fb363baed49a48267e6b29cd1e8628a2607557 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 4 Oct 2025 00:23:49 +0000 Subject: [PATCH 7/7] Convert to reusable workflow with test mode support Co-authored-by: pconrad <1119017+pconrad@users.noreply.github.com> --- .github/workflows/lint-html.yml | 24 ++++++---- .github/workflows/test-linter.yml | 18 ++++++++ README.md | 76 ++++++++++++++++++++++++++++--- lint_html.py | 67 ++++++++++++++++++++++++--- 4 files changed, 164 insertions(+), 21 deletions(-) create mode 100644 .github/workflows/test-linter.yml diff --git a/.github/workflows/lint-html.yml b/.github/workflows/lint-html.yml index 2490325..cb60936 100644 --- a/.github/workflows/lint-html.yml +++ b/.github/workflows/lint-html.yml @@ -1,13 +1,18 @@ -name: Lint PL HTML Files +name: Lint PL HTML Files (Reusable) 'on': - workflow_dispatch: # Allow manual triggering - push: - branches: - - main - pull_request: - branches: - - main + workflow_call: + inputs: + test-mode: + description: 'Enable test mode (expects certain files to fail for validation)' + required: false + type: boolean + default: false + expected-failures: + description: 'Comma-separated list of filenames that should fail (only in test mode)' + required: false + type: string + default: '' jobs: lint-html: @@ -24,4 +29,7 @@ jobs: python-version: '3.x' - name: Run HTML linter + env: + TEST_MODE: ${{ inputs.test-mode }} + EXPECTED_FAILURES: ${{ inputs.expected-failures }} run: python3 lint_html.py diff --git a/.github/workflows/test-linter.yml b/.github/workflows/test-linter.yml new file mode 100644 index 0000000..574273d --- /dev/null +++ b/.github/workflows/test-linter.yml @@ -0,0 +1,18 @@ +name: Test Linter + +'on': + workflow_dispatch: # Allow manual triggering + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test-lint: + name: Test Linter (with expected failures) + uses: ./.github/workflows/lint-html.yml + with: + test-mode: true + expected-failures: 'example_invalid.html,example_pl_invalid.html' diff --git a/README.md b/README.md index ec3072b..771ae3e 100644 --- a/README.md +++ b/README.md @@ -16,21 +16,80 @@ The linter checks HTML files for: 3. **Extensible Framework**: Additional custom validation rules can be easily added -## Automated Linting +## Using the Reusable Workflow -The linter runs automatically via GitHub Actions on: -- Manual workflow dispatch -- Push to the `main` branch -- Pull requests to the `main` branch +This repository provides a **reusable GitHub Actions workflow** that can be called from other repositories containing PrairieLearn content. + +### In Your Repository + +To use the linter in your own repository, create a workflow file (e.g., `.github/workflows/lint-html.yml`): + +```yaml +name: Lint HTML Files + +'on': + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + lint: + uses: ucsb-cs/pl-linter/.github/workflows/lint-html.yml@main +``` + +This will automatically lint all HTML files in your repository and fail the workflow if any errors are found. + +### In This Repository (Test Mode) + +This repository uses a special test mode to validate that the linter correctly detects errors. The workflow in this repo expects certain files to fail: + +```yaml +name: Test Linter + +'on': + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test-lint: + uses: ./.github/workflows/lint-html.yml + with: + test-mode: true + expected-failures: 'example_invalid.html,example_pl_invalid.html' +``` + +In test mode: +- Files listed in `expected-failures` must fail linting (to verify error detection works) +- All other files must pass linting +- The workflow succeeds only if this behavior is correct ## Running Locally -To run the linter locally: +### Normal Mode + +To run the linter locally in normal mode (all files must pass): ```bash python3 lint_html.py ``` +### Test Mode + +To run the linter in test mode (for testing the linter itself): + +```bash +TEST_MODE=true EXPECTED_FAILURES="example_invalid.html,example_pl_invalid.html" python3 lint_html.py +``` + The script will: 1. Find all `.html` and `.HTML` files in the repository (excluding `.git` directory) 2. Validate each file for XML syntax @@ -39,9 +98,14 @@ The script will: ## Exit Codes +**Normal Mode:** - `0`: All files passed linting - `1`: One or more files failed linting +**Test Mode:** +- `0`: Files that should fail did fail, and files that should pass did pass +- `1`: Unexpected pass/fail results + ## Example Files The repository includes example HTML files to demonstrate the linter's functionality: diff --git a/lint_html.py b/lint_html.py index d3a6175..05e351d 100755 --- a/lint_html.py +++ b/lint_html.py @@ -125,6 +125,17 @@ def main(): # Get the repository root directory repo_root = os.getenv("GITHUB_WORKSPACE", ".") + # Check if we're in test mode + test_mode = os.getenv("TEST_MODE", "").lower() in ["true", "1", "yes"] + expected_failures_str = os.getenv("EXPECTED_FAILURES", "") + expected_failures = set() + + if test_mode and expected_failures_str: + # Parse expected failures (comma-separated list of filenames) + expected_failures = set(f.strip() for f in expected_failures_str.split(",") if f.strip()) + print(f"๐Ÿงช TEST MODE: Expecting these files to fail: {', '.join(sorted(expected_failures))}") + print() + print(f"Scanning for HTML files in: {repo_root}") html_files = find_html_files(repo_root) @@ -137,13 +148,18 @@ def main(): print(f" - {f}") print() - has_errors = False + # Track results + results = {} + for file_path in html_files: print(f"Linting: {file_path}") errors = lint_file(file_path) + # Get just the filename for comparison + filename = os.path.basename(file_path) + results[filename] = {"has_errors": bool(errors), "errors": errors} + if errors: - has_errors = True print(f" โŒ FAILED with {len(errors)} error(s):") for error in errors: print(f" - {error}") @@ -151,12 +167,49 @@ def main(): print(f" โœ“ PASSED") print() - if has_errors: - print("โŒ Linting failed! Please fix the errors above.") - return 1 + # Determine overall pass/fail based on mode + if test_mode: + # In test mode: verify that expected failures actually fail + # and files not in expected failures pass + test_passed = True + + print("=" * 60) + print("TEST MODE VALIDATION") + print("=" * 60) + + for filename, result in sorted(results.items()): + should_fail = filename in expected_failures + did_fail = result["has_errors"] + + if should_fail and did_fail: + print(f"โœ“ {filename}: Correctly detected as INVALID") + elif not should_fail and not did_fail: + print(f"โœ“ {filename}: Correctly detected as VALID") + elif should_fail and not did_fail: + print(f"โœ— {filename}: Expected to FAIL but PASSED") + test_passed = False + else: # not should_fail and did_fail + print(f"โœ— {filename}: Expected to PASS but FAILED") + test_passed = False + + print("=" * 60) + + if test_passed: + print("โœ“ Test mode: All validations passed!") + return 0 + else: + print("โŒ Test mode: Some validations failed!") + return 1 else: - print("โœ“ All files passed linting!") - return 0 + # Normal mode: all files must pass + has_errors = any(result["has_errors"] for result in results.values()) + + if has_errors: + print("โŒ Linting failed! Please fix the errors above.") + return 1 + else: + print("โœ“ All files passed linting!") + return 0 if __name__ == "__main__":