a-organvm · 4444J99 · Nov 18, 2025 · llamapreview · Nov 18, 2025 · gemini-code-assist
@@ -8,12 +8,30 @@ ARTIFACTS_DIR=artifacts
 help:
 	@echo "Architecture Governance Toolkit - Available Commands:"
 	@echo ""
+	@echo "Core Analysis:"
 	@echo "  make artifacts-dir          - Create artifacts directory"
 	@echo "  make build-analysis-image   - Build Docker analysis image"
 	@echo "  make run-analysis           - Run analysis in Docker"
+	@echo "  make hotspots               - Generate code hotspots"
+	@echo "  make ownership              - Analyze code ownership"
+	@echo "  make drift                  - Detect architecture drift"
+	@echo "  make risk                   - Generate consolidated risk register"
+	@echo "  make full-analysis          - Run complete analysis pipeline"
 	@echo "  make adr-new TITLE='...'    - Create new ADR"
 	@echo "  make clean                  - Remove artifacts directory"
 	@echo ""
+	@echo "Research System (NEW):"
+	@echo "  make research-check-deps    - Check research system dependencies"
+	@echo "  make research-profile       - Create organization profile"
+	@echo "  make research-discover      - Discover similar repositories"
+	@echo "  make research-similarity    - Calculate similarity scores"
+	@echo "  make research-report        - Generate research summary"
+	@echo "  make research-full          - Run complete research cycle"
+	@echo "  make research-clean         - Remove research artifacts"
+	@echo ""
+	@echo "Environment Variables:"
+	@echo "  GITHUB_TOKEN                - GitHub API token (for research-discover)"
+	@echo ""
 
 artifacts-dir:
 	mkdir -p $(ARTIFACTS_DIR)/sbom $(ARTIFACTS_DIR)/timeseries
@@ -67,3 +85,92 @@ risk: artifacts-dir
 
 full-analysis: hotspots ownership drift risk
 	@echo "Full analysis complete. Check artifacts/ directory."
+
+# ============================================================================
+# Research System Targets
+# ============================================================================
+
+RESEARCH_ARTIFACTS=$(ARTIFACTS_DIR)/research
+ORG_PROFILE=$(RESEARCH_ARTIFACTS)/profiles/org_profile.json
+DISCOVERED_REPOS=$(RESEARCH_ARTIFACTS)/discoveries/discovered_repos.json
+SIMILARITY_SCORES=$(RESEARCH_ARTIFACTS)/discoveries/similarity_scores.json
+
+research-dirs:
+	@mkdir -p $(RESEARCH_ARTIFACTS)/profiles
+	@mkdir -p $(RESEARCH_ARTIFACTS)/discoveries
+	@mkdir -p $(RESEARCH_ARTIFACTS)/analysis
+	@mkdir -p $(RESEARCH_ARTIFACTS)/patterns
+	@mkdir -p $(RESEARCH_ARTIFACTS)/recommendations
+	@mkdir -p $(RESEARCH_ARTIFACTS)/feedback
+
+research-profile: research-dirs
+	@echo "========================================="
+	@echo "Creating Organization Profile..."
+	@echo "========================================="
+	python3 scripts/research/profile_org.py \
+		--path . \
+		--artifacts $(ARTIFACTS_DIR) \
+		--out $(ORG_PROFILE)
+	@echo ""
+	@echo "✓ Organization profile created: $(ORG_PROFILE)"
+
+research-discover: research-profile
+	@echo "========================================="
+	@echo "Discovering Similar Repositories..."
+	@echo "========================================="
+	@if [ -z "$(GITHUB_TOKEN)" ]; then \
+		echo "WARNING: GITHUB_TOKEN not set. Rate limits will be restrictive."; \
+		echo "Set GITHUB_TOKEN environment variable for better results."; \
+		echo ""; \
+	fi
+	python3 scripts/research/discover_repos.py \
+		--config config/research/discovery_config.yaml \
+		--profile $(ORG_PROFILE) \
+		--out $(DISCOVERED_REPOS)
+	@echo ""
+	@echo "✓ Repository discovery complete: $(DISCOVERED_REPOS)"
+
+research-similarity: research-discover
+	@echo "========================================="
+	@echo "Calculating Similarity Scores..."
+	@echo "========================================="
+	python3 scripts/research/similarity_scorer.py \
+		--discovered $(DISCOVERED_REPOS) \
+		--profile $(ORG_PROFILE) \
+		--weights config/research/similarity_weights.yaml \
+		--out $(SIMILARITY_SCORES)
+	@echo ""
+	@echo "✓ Similarity scoring complete: $(SIMILARITY_SCORES)"
+
+research-report: research-similarity
+	@echo "========================================="
+	@echo "Research System Summary"
+	@echo "========================================="
+	@echo ""
+	@echo "Organization Profile:"
+	@python3 -c "import json; p=json.load(open('$(ORG_PROFILE)')); print(f\"  Fingerprint: {p.get('fingerprint', 'unknown')}\"); print(f\"  Languages: {', '.join(list(p.get('metrics', {}).get('primary_languages', []))[:5])}\"); print(f\"  Research Areas: {len(p.get('challenges', {}).get('research_areas', []))}\"); print(f\"  High Priority Challenges: {len(p.get('challenges', {}).get('high_priority', []))}\")"
+	@echo ""
+	@echo "Discovery Results:"
+	@python3 -c "import json; d=json.load(open('$(SIMILARITY_SCORES)')); meta=d.get('similarity_metadata', {}); print(f\"  Total Scored: {meta.get('total_scored', 0)}\"); print(f\"  Above Threshold: {meta.get('above_threshold', 0)}\"); print(f\"  Threshold: {meta.get('threshold', 0)}\"); repos=d.get('repositories', []); print(f\"  Top 5 Matches:\"); [print(f\"    {i+1}. {r.get('full_name', 'unknown')} (score: {r.get('similarity_score', 0):.4f})\") for i, r in enumerate(repos[:5])]"
+	@echo ""
+	@echo "========================================="
+
+research-clean:
+	rm -rf $(RESEARCH_ARTIFACTS)
+
+research-full: research-profile research-discover research-similarity research-report
+	@echo ""
+	@echo "✓ Full research cycle complete!"
+	@echo ""
+	@echo "Next steps:"
+	@echo "  1. Review discovered repositories in: $(SIMILARITY_SCORES)"
+	@echo "  2. Analyze top matches manually"
+	@echo "  3. Run 'make research-analyze' to analyze selected repositories (coming soon)"
+	@echo ""
+
+# Helper target to check research system dependencies
+research-check-deps:
+	@echo "Checking research system dependencies..."
+	@python3 -c "import github" 2>/dev/null || (echo "ERROR: PyGithub not installed. Run: pip install PyGithub" && exit 1)
+	@python3 -c "import yaml" 2>/dev/null || (echo "ERROR: PyYAML not installed. Run: pip install PyYAML" && exit 1)
+	@echo "✓ All dependencies installed"
@@ -0,0 +1,252 @@
+# Analysis Pipeline Configuration
+# Controls how discovered repositories are cloned and analyzed
+
+version: "1.0"
+last_updated: "2025-11-18"
+
+# Cloning configuration
+cloning:
+  # Clone depth (shallow clone)
+  depth: 1
+
+  # Maximum repository size (MB)
+  max_size_mb: 500
+
+  # Timeout per clone (seconds)
+  timeout_seconds: 300
+
+  # Concurrent clones
+  max_parallel: 3
+
+  # Clone location
+  workspace_dir: "/tmp/research_clones"
+
+  # Auto-cleanup
+  auto_cleanup: true
+  cleanup_after_hours: 24
+
+  # Sandbox mode
+  use_docker: false  # Set to true for production
+  docker_image: "research-analysis:latest"
+
+# Analysis modules to run
+modules:
+  structural:
+    enabled: true
+    analyze_directory_structure: true
+    detect_config_files: true
+    measure_doc_coverage: true
+    analyze_test_organization: true
+
+  quality:
+    enabled: true
+    run_complexity_analysis: true
+    detect_coverage_config: true
+    extract_linting_config: true
+    analyze_code_review_practices: true
+
+  architecture:
+    enabled: true
+    extract_dependency_graph: true
+    identify_service_boundaries: true
+    detect_api_patterns: true
+    analyze_data_flows: false  # Expensive, disable by default
+
+  devops:
+    enabled: true
+    parse_ci_cd_configs: true
+    detect_iac_patterns: true
+    extract_monitoring_setup: true
+    identify_security_tools: true
+
+  documentation:
+    enabled: true
+    analyze_readme: true
+    extract_adrs: true
+    find_runbooks: true
+    extract_contribution_guide: true
+
+# Structural Analysis
+structural:
+  # Directory patterns to analyze
+  analyze_patterns:
+    - "src/**"
+    - "lib/**"
+    - "app/**"
+    - "pkg/**"
+    - "tests/**"
+    - "docs/**"
+
+  # Config files to detect
+  config_patterns:
+    - "*.yml"
+    - "*.yaml"
+    - "*.json"
+    - "*.toml"
+    - "*.ini"
+    - "Dockerfile*"
+    - "docker-compose*.yml"
+    - "Makefile"
+    - ".github/**"
+    - ".gitlab-ci.yml"
+
+  # Documentation coverage
+  doc_indicators:
+    - "README.md"
+    - "CONTRIBUTING.md"
+    - "docs/"
+    - "*.md"
+    - "LICENSE"
+
+# Quality Analysis
+quality:
+  # Complexity tools by language
+  complexity_tools:
+    python: "radon"
+    javascript: "eslint"
+    go: "gocyclo"
+    java: "checkstyle"
+
+  # Coverage tools
+  coverage_tools:
+    python: ["pytest-cov", "coverage.py"]
+    javascript: ["jest", "nyc", "istanbul"]
+    go: ["go test -cover"]
+    java: ["jacoco"]
+
+  # Linters
+  linters:
+    python: ["pylint", "flake8", "black", "mypy"]
+    javascript: ["eslint", "prettier"]
+    go: ["golint", "gofmt"]
+    java: ["checkstyle", "pmd"]
+
+# DevOps Analysis
+devops:
+  # CI/CD platforms to detect
+  ci_platforms:
+    - ".github/workflows"      # GitHub Actions
+    - ".gitlab-ci.yml"         # GitLab CI
+    - ".travis.yml"            # Travis CI
+    - "Jenkinsfile"            # Jenkins
+    - ".circleci"              # CircleCI
+    - "azure-pipelines.yml"    # Azure Pipelines
+
+  # Infrastructure as Code
+  iac_tools:
+    - "terraform"
+    - "cloudformation"
+    - "kubernetes"
+    - "helm"
+    - "ansible"
+    - "pulumi"
+
+  # Monitoring/Observability
+  observability_tools:
+    - "prometheus"
+    - "grafana"
+    - "datadog"
+    - "newrelic"
+    - "sentry"
+    - "opentelemetry"
+
+  # Security tools
+  security_tools:
+    - "trivy"
+    - "semgrep"
+    - "snyk"
+    - "sonarqube"
+    - "dependabot"
+
+# Documentation Analysis
+documentation:
+  # README quality metrics
+  readme_quality:
+    min_length: 500  # characters
+    required_sections:
+      - "installation"
+      - "usage"
+      - "license"
+    bonus_sections:
+      - "contributing"
+      - "testing"
+      - "architecture"
+
+  # ADR detection
+  adr_patterns:
+    - "docs/adr/**/*.md"
+    - "docs/decisions/**/*.md"
+    - "adr/**/*.md"
+    - "decisions/**/*.md"
+
+  # Runbook detection
+  runbook_patterns:
+    - "runbooks/**"
+    - "playbooks/**"
+    - "docs/operations/**"
+    - "docs/runbooks/**"
+
+# Comparison with baseline
+baseline_comparison:
+  enabled: true
+
+  # Gap identification
+  identify_gaps:
+    - missing_tools
+    - missing_practices
+    - better_configurations
+    - superior_patterns
+
+  # Impact scoring
+  impact_factors:
+    risk_reduction: 0.35
+    velocity_improvement: 0.30
+    quality_improvement: 0.20
+    cost_reduction: 0.15
+
+  # Effort estimation
+  effort_estimation:
+    # Map pattern types to T-shirt sizes
+    simple_patterns: "S"      # < 8 hours
+    moderate_patterns: "M"    # 8-24 hours
+    complex_patterns: "L"     # 1-5 days
+    major_changes: "XL"       # > 5 days
+
+# Performance
+performance:
+  # Timeouts
+  module_timeout_seconds: 180
+  total_analysis_timeout_seconds: 600
+
+  # Resource limits
+  max_memory_mb: 2048
+  max_cpu_percent: 80
+
+  # Parallel processing
+  parallel_modules: false  # Run modules sequentially by default
+
+# Output
+output:
+  # Format: "json", "yaml"
+  format: "json"
+
+  # Compression
+  compress: true
+
+  # Include raw data
+  include_raw: true
+
+  # Include diffs
+  include_diff: true
+
+# Error handling
+error_handling:
+  # Continue on module failure
+  continue_on_error: true
+
+  # Retry failed modules
+  retry_failed: true
+  max_retries: 2
+
+  # Log level: "DEBUG", "INFO", "WARNING", "ERROR"
+  log_level: "INFO"