diff --git a/.github/workflows/lint-html.yml b/.github/workflows/lint-html.yml new file mode 100644 index 0000000..cb60936 --- /dev/null +++ b/.github/workflows/lint-html.yml @@ -0,0 +1,35 @@ +name: Lint PL HTML Files (Reusable) + +'on': + workflow_call: + inputs: + test-mode: + description: 'Enable test mode (expects certain files to fail for validation)' + required: false + type: boolean + default: false + expected-failures: + description: 'Comma-separated list of filenames that should fail (only in test mode)' + required: false + type: string + default: '' + +jobs: + lint-html: + name: Lint HTML Files + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Run HTML linter + env: + TEST_MODE: ${{ inputs.test-mode }} + EXPECTED_FAILURES: ${{ inputs.expected-failures }} + run: python3 lint_html.py diff --git a/.github/workflows/test-linter.yml b/.github/workflows/test-linter.yml new file mode 100644 index 0000000..574273d --- /dev/null +++ b/.github/workflows/test-linter.yml @@ -0,0 +1,18 @@ +name: Test Linter + +'on': + workflow_dispatch: # Allow manual triggering + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test-lint: + name: Test Linter (with expected failures) + uses: ./.github/workflows/lint-html.yml + with: + test-mode: true + expected-failures: 'example_invalid.html,example_pl_invalid.html' diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5cb6f07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Temporary files +/tmp/ +*.tmp +*.bak diff --git a/README.md b/README.md new file mode 100644 index 0000000..771ae3e --- /dev/null +++ b/README.md @@ -0,0 +1,149 @@ +# PL HTML Linter + +This repository contains a linter for PL (PrairieLearn) HTML files. + +## Features + +The linter checks HTML files for: + +1. **XML Syntax Validation**: Ensures that HTML files have valid XML syntax, including: + - Properly formatted tags + - Properly nested elements + - Correct attribute syntax + +2. **PrairieLearn-Specific Rules**: + - `` elements must be the root element of the document (not nested inside any other element) + +3. **Extensible Framework**: Additional custom validation rules can be easily added + +## Using the Reusable Workflow + +This repository provides a **reusable GitHub Actions workflow** that can be called from other repositories containing PrairieLearn content. + +### In Your Repository + +To use the linter in your own repository, create a workflow file (e.g., `.github/workflows/lint-html.yml`): + +```yaml +name: Lint HTML Files + +'on': + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + lint: + uses: ucsb-cs/pl-linter/.github/workflows/lint-html.yml@main +``` + +This will automatically lint all HTML files in your repository and fail the workflow if any errors are found. + +### In This Repository (Test Mode) + +This repository uses a special test mode to validate that the linter correctly detects errors. The workflow in this repo expects certain files to fail: + +```yaml +name: Test Linter + +'on': + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test-lint: + uses: ./.github/workflows/lint-html.yml + with: + test-mode: true + expected-failures: 'example_invalid.html,example_pl_invalid.html' +``` + +In test mode: +- Files listed in `expected-failures` must fail linting (to verify error detection works) +- All other files must pass linting +- The workflow succeeds only if this behavior is correct + +## Running Locally + +### Normal Mode + +To run the linter locally in normal mode (all files must pass): + +```bash +python3 lint_html.py +``` + +### Test Mode + +To run the linter in test mode (for testing the linter itself): + +```bash +TEST_MODE=true EXPECTED_FAILURES="example_invalid.html,example_pl_invalid.html" python3 lint_html.py +``` + +The script will: +1. Find all `.html` and `.HTML` files in the repository (excluding `.git` directory) +2. Validate each file for XML syntax +3. Apply any custom PL-specific rules +4. Report errors with line numbers and descriptions + +## Exit Codes + +**Normal Mode:** +- `0`: All files passed linting +- `1`: One or more files failed linting + +**Test Mode:** +- `0`: Files that should fail did fail, and files that should pass did pass +- `1`: Unexpected pass/fail results + +## Example Files + +The repository includes example HTML files to demonstrate the linter's functionality: + +- `example.html` - A valid HTML file that passes all checks +- `example_invalid.html` - An invalid HTML file with mismatched tags (XML syntax error) +- `example_pl_valid.html` - A valid PrairieLearn file with `` as root element +- `example_pl_invalid.html` - An invalid PrairieLearn file with nested `` element + +## Requirements + +- Python 3.x (uses standard library modules) + +## Extending the Linter + +To add custom validation rules, modify the `check_custom_rules()` function in `lint_html.py`. + +### Example: Adding a Custom Rule + +To add a new rule (e.g., checking for specific attributes or element patterns), edit the `check_custom_rules()` function: + +```python +def check_custom_rules(file_path): + """Check custom PL-specific rules.""" + errors = [] + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Example: Check for specific pattern + # if 'pattern' not in content: + # errors.append("Missing required pattern") + + except Exception as e: + errors.append(f"Error checking custom rules: {str(e)}") + + return errors +``` + +The linter will automatically run your custom rules on all HTML files. diff --git a/example.html b/example.html new file mode 100644 index 0000000..d4239bb --- /dev/null +++ b/example.html @@ -0,0 +1,12 @@ + + + Example PL HTML File + + +

PrairieLearn Question

+

This is an example HTML file that passes the linter.

+
+ 42 +
+ + diff --git a/example_invalid.html b/example_invalid.html new file mode 100644 index 0000000..c9edc7e --- /dev/null +++ b/example_invalid.html @@ -0,0 +1,12 @@ + + + Invalid HTML Example + + +

This heading is not closed properly +

This paragraph follows an unclosed heading tag

+
+ Properly nested content +
+ + diff --git a/example_pl_invalid.html b/example_pl_invalid.html new file mode 100644 index 0000000..fa55c7e --- /dev/null +++ b/example_pl_invalid.html @@ -0,0 +1,13 @@ + + + Invalid PL Multiple Choice - Nested + + +
+ + Option A + Option B + +
+ + diff --git a/example_pl_valid.html b/example_pl_valid.html new file mode 100644 index 0000000..c92d0d8 --- /dev/null +++ b/example_pl_valid.html @@ -0,0 +1,4 @@ + + Option A + Option B + diff --git a/lint_html.py b/lint_html.py new file mode 100755 index 0000000..05e351d --- /dev/null +++ b/lint_html.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +""" +Linter for PL HTML files. + +This script checks HTML files for: +1. General XML syntax (proper tags, nesting, attributes) +2. Custom PL-specific rules (extensible) +""" + +import sys +import os +import glob +from xml.etree import ElementTree as ET +from pathlib import Path + + +def find_html_files(root_dir="."): + """Find all HTML files in the repository.""" + html_files = [] + for pattern in ["**/*.html", "**/*.HTML"]: + html_files.extend(glob.glob(os.path.join(root_dir, pattern), recursive=True)) + + # Filter out .git directory + html_files = [f for f in html_files if ".git" not in f] + return sorted(html_files) + + +def check_xml_syntax(file_path): + """ + Check if the HTML file has valid XML syntax. + This includes: + - Properly formatted tags + - Properly nested elements + - Correct attribute syntax + """ + errors = [] + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Try to parse as XML + try: + ET.fromstring(content) + except ET.ParseError as e: + errors.append(f"XML syntax error: {str(e)}") + + except FileNotFoundError: + errors.append(f"File not found: {file_path}") + except Exception as e: + errors.append(f"Error reading file: {str(e)}") + + return errors + + +def check_custom_rules(file_path): + """ + Check custom PL-specific rules. + + This function can be extended with additional rules as needed. + """ + errors = [] + + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Try to parse the file as XML + try: + tree = ET.fromstring(content) + except ET.ParseError: + # If XML parsing fails, we can't check custom rules + # The XML syntax error will be caught by check_xml_syntax + return errors + + # Rule: must NOT be nested inside another element + # It must be the root element (have no parent) + def check_pl_multiple_choice_nesting(element, is_root=True): + """Recursively check if pl-multiple-choice is properly placed.""" + local_errors = [] + + if element.tag == 'pl-multiple-choice' and not is_root: + # pl-multiple-choice found but it's not the root element + local_errors.append( + f" element must not be nested inside other elements. " + f"It must be the root element of the document." + ) + + # Recursively check children (they are not root) + for child in element: + local_errors.extend(check_pl_multiple_choice_nesting(child, False)) + + return local_errors + + # Check the rule starting from the root + errors.extend(check_pl_multiple_choice_nesting(tree, True)) + + except FileNotFoundError: + errors.append(f"File not found: {file_path}") + except Exception as e: + errors.append(f"Error checking custom rules: {str(e)}") + + return errors + + +def lint_file(file_path): + """Lint a single HTML file.""" + all_errors = [] + + # Check XML syntax + syntax_errors = check_xml_syntax(file_path) + if syntax_errors: + all_errors.extend(syntax_errors) + + # Check custom rules + custom_errors = check_custom_rules(file_path) + if custom_errors: + all_errors.extend(custom_errors) + + return all_errors + + +def main(): + """Main entry point for the linter.""" + # Get the repository root directory + repo_root = os.getenv("GITHUB_WORKSPACE", ".") + + # Check if we're in test mode + test_mode = os.getenv("TEST_MODE", "").lower() in ["true", "1", "yes"] + expected_failures_str = os.getenv("EXPECTED_FAILURES", "") + expected_failures = set() + + if test_mode and expected_failures_str: + # Parse expected failures (comma-separated list of filenames) + expected_failures = set(f.strip() for f in expected_failures_str.split(",") if f.strip()) + print(f"๐Ÿงช TEST MODE: Expecting these files to fail: {', '.join(sorted(expected_failures))}") + print() + + print(f"Scanning for HTML files in: {repo_root}") + html_files = find_html_files(repo_root) + + if not html_files: + print("No HTML files found.") + return 0 + + print(f"Found {len(html_files)} HTML file(s) to lint:") + for f in html_files: + print(f" - {f}") + print() + + # Track results + results = {} + + for file_path in html_files: + print(f"Linting: {file_path}") + errors = lint_file(file_path) + + # Get just the filename for comparison + filename = os.path.basename(file_path) + results[filename] = {"has_errors": bool(errors), "errors": errors} + + if errors: + print(f" โŒ FAILED with {len(errors)} error(s):") + for error in errors: + print(f" - {error}") + else: + print(f" โœ“ PASSED") + print() + + # Determine overall pass/fail based on mode + if test_mode: + # In test mode: verify that expected failures actually fail + # and files not in expected failures pass + test_passed = True + + print("=" * 60) + print("TEST MODE VALIDATION") + print("=" * 60) + + for filename, result in sorted(results.items()): + should_fail = filename in expected_failures + did_fail = result["has_errors"] + + if should_fail and did_fail: + print(f"โœ“ {filename}: Correctly detected as INVALID") + elif not should_fail and not did_fail: + print(f"โœ“ {filename}: Correctly detected as VALID") + elif should_fail and not did_fail: + print(f"โœ— {filename}: Expected to FAIL but PASSED") + test_passed = False + else: # not should_fail and did_fail + print(f"โœ— {filename}: Expected to PASS but FAILED") + test_passed = False + + print("=" * 60) + + if test_passed: + print("โœ“ Test mode: All validations passed!") + return 0 + else: + print("โŒ Test mode: Some validations failed!") + return 1 + else: + # Normal mode: all files must pass + has_errors = any(result["has_errors"] for result in results.values()) + + if has_errors: + print("โŒ Linting failed! Please fix the errors above.") + return 1 + else: + print("โœ“ All files passed linting!") + return 0 + + +if __name__ == "__main__": + sys.exit(main())