diff --git a/.githooks/check-problematic-words.sh b/.githooks/check-problematic-words.sh new file mode 100755 index 0000000..d96fcfc --- /dev/null +++ b/.githooks/check-problematic-words.sh @@ -0,0 +1,132 @@ +#!/bin/bash +# Git hook to check for problematic words in commits + +# Define problematic words/patterns +PROBLEMATIC_PATTERNS=( + # AI/Assistant references + "claude|Claude" + "anthropic|Anthropic" + "AI-generated|ai-generated" + "AI generated|ai generated" + "artificial intelligence" + "machine learning model" + "language model" + "Co-Authored-By:.*Claude" + "noreply@anthropic" + "Generated with.*Claude" + "assistant|Assistant" + "chatbot|Chatbot" + + # Generic/problematic code patterns (optional) + "TODO:.*fix.*later" + "HACK:" + "XXX:" + "FIXME:.*urgent" + + # Security issues + "password.*=.*['\"]" + "api_key.*=.*['\"]" + "secret.*=.*['\"]" + "token.*=.*['\"]" + + # Profanity/inappropriate content + "wtf|WTF" + "damn|DAMN" + + # Company/personal info that shouldn't be committed + "internal only" + "confidential" + "do not distribute" +) + +# Color codes for output +RED='\033[0;31m' +YELLOW='\033[1;33m' +GREEN='\033[0;32m' +NC='\033[0m' # No Color + +# Function to check content for problematic patterns +check_content() { + local content="$1" + local context="$2" + local found_issues=0 + + for pattern in "${PROBLEMATIC_PATTERNS[@]}"; do + if echo "$content" | grep -iE "$pattern" > /dev/null 2>&1; then + if [ $found_issues -eq 0 ]; then + echo -e "${RED}❌ Problematic content found in $context:${NC}" + found_issues=1 + fi + echo -e "${YELLOW} Pattern: $pattern${NC}" + echo "$content" | grep -iE "$pattern" --color=always | head -3 + echo "" + fi + done + + return $found_issues +} + +# Check commit message +if [ "$1" = "message" ]; then + COMMIT_MSG_FILE="$2" + if [ -f "$COMMIT_MSG_FILE" ]; then + COMMIT_MSG=$(cat "$COMMIT_MSG_FILE") + check_content "$COMMIT_MSG" "commit message" + exit $? + fi +fi + +# Check staged files +if [ "$1" = "files" ]; then + echo "Checking staged files for problematic content..." + + # Get list of staged files + STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM) + + FOUND_ISSUES=0 + for file in $STAGED_FILES; do + # Skip binary files + if file "$file" | grep -q "binary"; then + continue + fi + + # Skip the .githooks directory itself (it contains the patterns we're checking for!) + case "$file" in + .githooks/*) + continue + ;; + esac + + # Skip certain file types + case "$file" in + *.jpg|*.png|*.gif|*.pdf|*.zip|*.tar|*.gz|*.pyc|*.so|*.dll) + continue + ;; + esac + + # Check file content + if [ -f "$file" ]; then + CONTENT=$(git diff --cached "$file" | grep "^+[^+]" | sed 's/^+//') + if [ -n "$CONTENT" ]; then + check_content "$CONTENT" "file: $file" + if [ $? -ne 0 ]; then + FOUND_ISSUES=1 + fi + fi + fi + done + + exit $FOUND_ISSUES +fi + +# Usage instructions if called without arguments +echo "Usage:" +echo " $0 message - Check commit message" +echo " $0 files - Check staged files" +echo "" +echo "Checks for problematic words including:" +echo " - AI/Claude references" +echo " - Security issues (hardcoded passwords/keys)" +echo " - TODO/FIXME/HACK markers" +echo " - Inappropriate language" +echo " - Confidential markers" \ No newline at end of file diff --git a/.githooks/commit-msg b/.githooks/commit-msg new file mode 100755 index 0000000..250ff92 --- /dev/null +++ b/.githooks/commit-msg @@ -0,0 +1,6 @@ +#!/bin/bash +# Git commit-msg hook to check for problematic words + +HOOK_DIR="$(dirname "$0")" +"$HOOK_DIR/check-problematic-words.sh" message "$1" +exit $? \ No newline at end of file diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 0000000..e0f2264 --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,6 @@ +#!/bin/bash +# Git pre-commit hook to check staged files for problematic words + +HOOK_DIR="$(dirname "$0")" +"$HOOK_DIR/check-problematic-words.sh" files +exit $? \ No newline at end of file diff --git a/.github/allowed-licenses.txt b/.github/allowed-licenses.txt new file mode 100644 index 0000000..e6b8bbd --- /dev/null +++ b/.github/allowed-licenses.txt @@ -0,0 +1,29 @@ +# Allowed licenses for contributions +# One SPDX license identifier per line +# Lines starting with # are comments +# See https://spdx.org/licenses/ for full list + +# Permissive licenses +MIT +Apache-2.0 +BSD-3-Clause +BSD-2-Clause +ISC +0BSD +Unlicense + +# Python specific +Python-2.0 +PSF-2.0 + +# Compatible weak copyleft +MPL-2.0 +LGPL-2.1 +LGPL-3.0 + +# Public domain +CC0-1.0 + +# Special cases (review individually) +# GPL-3.0 - Only if project is also GPL-3.0 +# AGPL-3.0 - Only if project is also AGPL-3.0 \ No newline at end of file diff --git a/.github/workflows/license-check.yml b/.github/workflows/license-check.yml new file mode 100644 index 0000000..a85d963 --- /dev/null +++ b/.github/workflows/license-check.yml @@ -0,0 +1,226 @@ +name: License Check + +on: + pull_request: + paths: + - '**.py' + - '**.js' + - '**.go' + - '**.rs' + - '**.rb' + - '**.java' + - '**.c' + - '**.cpp' + - '**.h' + - '**.sh' + +permissions: + contents: read + pull-requests: write # For posting comments + +jobs: + scan-licenses: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install OSLiLi + run: | + pip install osslili + + - name: Get changed files + id: changed-files + run: | + # Get base branch + BASE_SHA="${{ github.event.pull_request.base.sha }}" + HEAD_SHA="${{ github.event.pull_request.head.sha }}" + + # Get list of added/modified files in PR + git diff --name-only --diff-filter=AM $BASE_SHA...$HEAD_SHA > changed_files.txt + + # Filter for source code files only + grep -E '\.(py|js|go|rs|rb|java|c|cpp|h|sh)$' changed_files.txt > source_files.txt || true + + if [ ! -s source_files.txt ]; then + echo "No source files changed" + echo "has_changes=false" >> $GITHUB_OUTPUT + else + echo "Found $(wc -l < source_files.txt) changed source files" + echo "has_changes=true" >> $GITHUB_OUTPUT + fi + + - name: Run OSLiLi on changed files + if: steps.changed-files.outputs.has_changes == 'true' + id: oslili-scan + run: | + # Create temporary directory for analysis + mkdir -p /tmp/pr_files + + # Copy changed files to temp directory preserving structure + while IFS= read -r file; do + if [ -f "$file" ]; then + mkdir -p "/tmp/pr_files/$(dirname "$file")" + cp "$file" "/tmp/pr_files/$file" + fi + done < source_files.txt + + # Run OSLiLi + echo "Running OSLiLi on changed files..." + if [ -d /tmp/pr_files ] && [ "$(ls -A /tmp/pr_files)" ]; then + oslili /tmp/pr_files --json > license_report_raw.json 2>/dev/null || echo "{\"licenses\": []}" > license_report_raw.json + else + echo "{\"licenses\": []}" > license_report_raw.json + fi + + # Validate and clean JSON output + cat > /tmp/validate_json.py << 'EOF' + import json + import sys + try: + with open('license_report_raw.json', 'r') as f: + content = f.read().strip() + if not content: + data = {'licenses': []} + else: + data = json.loads(content) + with open('license_report.json', 'w') as f: + json.dump(data, f, indent=2) + print('License report generated successfully') + except Exception as e: + print(f'Warning: Could not parse OSLiLi output: {e}') + with open('license_report.json', 'w') as f: + json.dump({'licenses': []}, f) + EOF + python3 /tmp/validate_json.py + + # Pretty print for logs + if [ -f license_report.json ]; then + cat license_report.json + fi + + - name: Check allowed licenses + if: steps.changed-files.outputs.has_changes == 'true' + id: check-licenses + run: | + # Check if allowed licenses file exists + if [ ! -f .github/allowed-licenses.txt ]; then + echo "No allowed-licenses.txt file, skipping license policy check" + echo "check_result=skipped" >> $GITHUB_OUTPUT + exit 0 + fi + + # Check licenses against allowed list + python3 << 'EOF' + import json + import sys + import os + + with open('license_report.json') as f: + report = json.load(f) + + with open('.github/allowed-licenses.txt') as f: + allowed = [line.strip() for line in f + if line.strip() and not line.startswith('#')] + + # Extract found licenses + found_licenses = {} + if 'licenses' in report and report['licenses']: + for lic in report['licenses']: + spdx_id = lic.get('spdx_id', 'Unknown') + file_path = lic.get('file', 'Unknown') + if spdx_id != 'Unknown': + if spdx_id not in found_licenses: + found_licenses[spdx_id] = [] + found_licenses[spdx_id].append(file_path) + + # Check for problematic licenses + problematic = {} + for lic, files in found_licenses.items(): + if lic not in allowed and lic != 'Unknown': + problematic[lic] = files + + # Generate report + report_lines = [] + report_lines.append("## License Check Report\n") + + if found_licenses: + report_lines.append("### Found Licenses\n") + for lic, files in sorted(found_licenses.items()): + status = "✅" if lic in allowed else "⚠️" + report_lines.append(f"- {status} **{lic}** ({len(files)} file(s))") + + if problematic: + report_lines.append("\n### ⚠️ Non-Allowed Licenses Detected\n") + for lic, files in sorted(problematic.items()): + report_lines.append(f"\n**{lic}:**") + for file in files[:5]: # Show max 5 files + report_lines.append(f" - {file.replace('/tmp/pr_files/', '')}") + if len(files) > 5: + report_lines.append(f" - ... and {len(files) - 5} more files") + + # Set outputs for GitHub Actions + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write("check_result=failed\n") + f.write(f"report< + comment.user.type === 'Bot' && + comment.body.includes('## License Check Report') + ); + + if (botComment) { + // Update existing comment + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: report + }); + } else { + // Create new comment + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: report + }); + } + } \ No newline at end of file