From fd0e6cbff2d9c44e7266a778039cea6ea76c6cc9 Mon Sep 17 00:00:00 2001 From: Tiago <913367+quintela@users.noreply.github.com> Date: Wed, 23 Jul 2025 21:52:19 +0100 Subject: [PATCH 1/3] feat: glab ci setup --- .env.example | 5 +- .github/workflows/tiw-review.yml | 2 +- .gitlab-ci.yml | 78 ++++++++++++++++++++++++++++ README.md | 88 ++++++++++++++++++++++++-------- src/config/config.ts | 2 +- 5 files changed, 147 insertions(+), 28 deletions(-) create mode 100644 .gitlab-ci.yml diff --git a/.env.example b/.env.example index 7de3ad7..5f15041 100644 --- a/.env.example +++ b/.env.example @@ -28,9 +28,6 @@ GIT_PLATFORM=gitlab # GitLab GITLAB_URL=https://git.boomstraat.de GITLAB_TOKEN=your_gitlab_personal_access_token -CI_PROJECT_ID= -CI_MERGE_REQUEST_IID= -CI_PIPELINE_SOURCE= # GitHub GITHUB_TOKEN=your_github_token @@ -39,4 +36,4 @@ GITHUB_TOKEN=your_github_token VERBOSE= # File Processing -IGNORE_LOCK_FILES= \ No newline at end of file +IGNORE_LOCK_FILES= diff --git a/.github/workflows/tiw-review.yml b/.github/workflows/tiw-review.yml index 9fd4502..d137721 100644 --- a/.github/workflows/tiw-review.yml +++ b/.github/workflows/tiw-review.yml @@ -47,7 +47,7 @@ jobs: GITHUB_TOKEN: ${{ github.token }} LLM_PROVIDER: anthropic ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - ANTHROPIC_MODEL: claude-3-7-sonnet-20250219 + ANTHROPIC_MODEL: claude-sonnet-4-20250514 VERBOSE: 'true' IGNORE_LOCK_FILES: 'true' MAX_PROMPT_TOKENS: 150000 diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..8d3baba --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,78 @@ +stages: + - review + +variables: + # GitLab automatically provides CI_PROJECT_ID and CI_MERGE_REQUEST_IID + # in merge request pipelines when CI_PIPELINE_SOURCE=merge_request_event + LLM_PROVIDER: anthropic + ANTHROPIC_MODEL: claude-sonnet-4-20250514 + VERBOSE: 'true' + IGNORE_LOCK_FILES: 'true' + MAX_PROMPT_TOKENS: 150000 + +tiw-code-review: + stage: review + image: node:20-alpine + + # Only run on merge request events + rules: + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + + # Cache node_modules for faster builds + cache: + key: + files: + - yarn.lock + paths: + - node_modules/ + - .yarn/cache/ + + before_script: + # Install dependencies + - apk add --no-cache git + - yarn install --frozen-lockfile + + # Debug environment variables + - echo "šŸ” CI Environment Check" + - echo "CI_PROJECT_ID=$CI_PROJECT_ID" + - echo "CI_MERGE_REQUEST_IID=$CI_MERGE_REQUEST_IID" + - echo "CI_PIPELINE_SOURCE=$CI_PIPELINE_SOURCE" + - | + if [ -z "$ANTHROPIC_API_KEY" ]; then + echo "āŒ ANTHROPIC_API_KEY is not set or empty" + exit 1 + else + echo "āœ… ANTHROPIC_API_KEY is set (length: ${#ANTHROPIC_API_KEY})" + echo "āœ… First 20 chars: ${ANTHROPIC_API_KEY:0:20}..." + fi + - | + if [ -z "$GITLAB_TOKEN" ]; then + echo "āŒ GITLAB_TOKEN is not set or empty" + echo "šŸ’” Please add GITLAB_TOKEN as a CI/CD variable in project settings" + echo "šŸ’” Create a personal access token with 'api' scope at:" + echo " https://gitlab.com/-/profile/personal_access_tokens" + exit 1 + else + echo "āœ… GITLAB_TOKEN is set (length: ${#GITLAB_TOKEN})" + echo "āœ… First 20 chars: ${GITLAB_TOKEN:0:20}..." + fi + + script: + # Build the project + - echo "šŸ”Ø Building Tiw" + - yarn build + + # Run the code review + - echo "šŸ¤– Running Tiw Code Review" + - yarn start ci --platform gitlab --verbose + + # Allow the job to fail without failing the entire pipeline + # This prevents blocking MRs if the review service has issues + allow_failure: true + + # Only keep artifacts for failed jobs for debugging + artifacts: + when: on_failure + paths: + - reviews/ + expire_in: 1 week diff --git a/README.md b/README.md index 7f34127..1d1563d 100644 --- a/README.md +++ b/README.md @@ -205,55 +205,99 @@ Create a `.gitlab-ci.yml` file in your repository: stages: - review -mr-review: +variables: + LLM_PROVIDER: anthropic + ANTHROPIC_MODEL: claude-3-7-sonnet-20250219 + VERBOSE: 'true' + IGNORE_LOCK_FILES: 'true' + MAX_PROMPT_TOKENS: 150000 + +tiw-code-review: stage: review - image: node:latest + image: node:20-alpine + + # Only run on merge request events + rules: + - if: $CI_PIPELINE_SOURCE == "merge_request_event" + + # Cache node_modules for faster builds + cache: + key: + files: + - yarn.lock + paths: + - node_modules/ + + before_script: + - apk add --no-cache git + - yarn install --frozen-lockfile + - yarn build + script: - - npm install -g tiw - - tiw ci - only: - - merge_requests - variables: - LLM_PROVIDER: anthropic - IGNORE_LOCK_FILES: 'true' - VERBOSE: 'true' + - yarn start ci --platform gitlab --verbose + + # Allow failure to prevent blocking MRs + allow_failure: true ``` **Important:** Configure these secure variables in GitLab CI/CD settings: -- `GITLAB_TOKEN`: Your GitLab personal access token -- `ANTHROPIC_API_KEY` (or other LLM provider API key) +- `GITLAB_TOKEN`: Your GitLab personal access token with `api` scope +- `ANTHROPIC_API_KEY`: Your Anthropic API key (or other LLM provider API key) #### GitHub Actions -Create a `.github/workflows/review.yml` file: +Create a `.github/workflows/tiw-review.yml` file: ```yaml -name: Code Review +name: Tiw Code Review on: pull_request: - types: [opened, synchronize] + types: [opened, synchronize, reopened] jobs: review: runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: - - uses: actions/checkout@v3 + - name: Checkout code + uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-node@v3 + + - name: Setup Node.js + uses: actions/setup-node@v4 with: - node-version: '18' - - run: npm install -g tiw - - name: Run Tiw - run: tiw ci --platform github + node-version: '20' + cache: 'yarn' + + - name: Install dependencies + run: yarn install + + - name: Build Tiw + run: yarn build + + - name: Run Tiw Review + run: yarn start ci --platform github --verbose env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ github.token }} LLM_PROVIDER: anthropic ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + ANTHROPIC_MODEL: claude-3-7-sonnet-20250219 + VERBOSE: 'true' + IGNORE_LOCK_FILES: 'true' + MAX_PROMPT_TOKENS: 150000 ``` +**Important:** Configure these secrets in GitHub repository settings: + +- `ANTHROPIC_API_KEY`: Your Anthropic API key (or other LLM provider API key) +- The `GITHUB_TOKEN` is automatically provided by GitHub Actions + ### Common Options The following options are available for all commands: diff --git a/src/config/config.ts b/src/config/config.ts index d859604..8ef6665 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -115,7 +115,7 @@ export class Config { return { // LLM provider configuration llmProvider: (this.getEnvVar('LLM_PROVIDER') as LLMProvider) || 'anthropic', - anthropicModel: this.getEnvVar('ANTHROPIC_MODEL') || 'claude-3-7-sonnet-20250219', + anthropicModel: this.getEnvVar('ANTHROPIC_MODEL') || 'claude-sonnet-4-20250514', openaiModel: this.getEnvVar('OPENAI_MODEL') || 'gpt-4', deepseekModel: this.getEnvVar('DEEPSEEK_MODEL') || 'deepseek-coder', copilotModel: this.getEnvVar('COPILOT_MODEL') || 'gpt-4', From 596cf90ae72c558686d7ec184daad0b8543c507a Mon Sep 17 00:00:00 2001 From: Tiago <913367+quintela@users.noreply.github.com> Date: Thu, 24 Jul 2025 10:41:28 +0100 Subject: [PATCH 2/3] fix: custom templates folder --- src/config/config.ts | 4 ++-- src/core/MRReviewer.ts | 28 ++++++++++++++++++++++++++++ src/index.ts | 6 ++++-- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/config/config.ts b/src/config/config.ts index 8ef6665..7ffb397 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -339,8 +339,8 @@ export class Config { if (this.options['templates'] && !this.options.formatterTemplate) { const templatesDir = this.options['templates'] as string; - updatedConfig.formatterTemplate = path.join(templatesDir, 'formatter.txt'); - updatedConfig.promptDir = templatesDir; + updatedConfig.formatterTemplate = path.join(templatesDir, 'formatters', 'markdown_format.md'); + updatedConfig.promptDir = path.join(templatesDir, 'prompts'); } return updatedConfig; diff --git a/src/core/MRReviewer.ts b/src/core/MRReviewer.ts index 10150bd..b327c26 100644 --- a/src/core/MRReviewer.ts +++ b/src/core/MRReviewer.ts @@ -403,6 +403,34 @@ export class MRReviewer { this.logger.debug(`Prompt length: ${prompt.length} characters`); } + // Debug mode: skip API call and return mock response + if (process.env['DEBUG_MODE'] === 'true' || process.env['SKIP_API'] === 'true') { + this.logger.info('🚧 DEBUG MODE: Skipping LLM API call'); + this.logger.debug('Generated prompt:'); + this.logger.debug('='.repeat(50)); + this.logger.debug(prompt); + this.logger.debug('='.repeat(50)); + + return JSON.stringify({ + overview: { + summary: "DEBUG MODE: Mock review response", + riskLevel: "low", + recommendedAction: "comment" + }, + fileReviews: [], + testReview: { + compliance: "unknown", + missingTests: ["Debug mode - no actual analysis performed"], + testQualityIssues: [] + }, + generalFeedback: { + strengths: ["Template system is working"], + concerns: ["This is a mock response for debugging"], + suggestions: ["Set DEBUG_MODE=false to use real LLM analysis"] + } + }, null, 2); + } + return await this.llmAdapter.analyzeCode(prompt); } catch (error) { this.logger.error('Error analyzing diff with LLM:', error as Error); diff --git a/src/index.ts b/src/index.ts index da03b66..e3302b0 100755 --- a/src/index.ts +++ b/src/index.ts @@ -184,9 +184,11 @@ function buildConfigOptions(command: Command): Record { llmProvider: mergedOptions.provider as LLMProvider, model: mergedOptions.model, gitPlatform: mergedOptions.platform as GitPlatform | undefined, - promptDir: mergedOptions.templates, + promptDir: mergedOptions.templates + ? path.join(mergedOptions.templates, 'prompts') + : undefined, formatterTemplate: mergedOptions.templates - ? path.join(mergedOptions.templates, 'formatter.txt') + ? path.join(mergedOptions.templates, 'formatters', 'markdown_format.md') : undefined, mrMode: mergedOptions.mrMode as MRMode, gitMrUrl: mergedOptions.gitMrUrl, From ce10a720dedbefc39b1ec121bba32dbeb3a0b484 Mon Sep 17 00:00:00 2001 From: Tiago <913367+quintela@users.noreply.github.com> Date: Thu, 24 Jul 2025 11:20:35 +0100 Subject: [PATCH 3/3] fix: prompts refactor --- README.md | 24 +- USAGE_EXAMPLES.md | 12 +- src/core/ReviewFormatter.ts | 2 +- src/templates/default-prompt.ts | 2 +- src/templates/formatters/markdown_format.md | 49 ++-- src/templates/prompts/00-lifecycle.md | 96 ++++++++ .../prompts/00-system_instructions.md | 48 ++++ .../prompts/01-workflow_methodology.md | 78 +++++++ src/templates/prompts/criteria.md | 213 ++++++++++++++---- src/templates/prompts/introduction.md | 23 +- src/templates/prompts/output_format.md | 59 +++-- src/templates/prompts/priorities.md | 103 ++++++++- tests/core/MRReviewer.test.ts | 2 +- tests/utils/fileUtils.test.ts | 6 +- 14 files changed, 590 insertions(+), 127 deletions(-) create mode 100644 src/templates/prompts/00-lifecycle.md create mode 100644 src/templates/prompts/00-system_instructions.md create mode 100644 src/templates/prompts/01-workflow_methodology.md diff --git a/README.md b/README.md index 1d1563d..a2d249e 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ cat > ~/.config/tiw/.env << 'EOF' # LLM Configuration LLM_PROVIDER=anthropic ANTHROPIC_API_KEY=your_anthropic_api_key -ANTHROPIC_MODEL=claude-3-7-sonnet-20250219 +ANTHROPIC_MODEL=claude-sonnet-4-20250514 # Git Platform Configuration GITLAB_URL=https://gitlab.com @@ -95,7 +95,7 @@ DEEPSEEK_API_KEY=your_deepseek_api_key COPILOT_API_KEY=your_copilot_api_key # LLM Model Selection -ANTHROPIC_MODEL=claude-3-7-sonnet-20250219 +ANTHROPIC_MODEL=claude-sonnet-4-20250514 OPENAI_MODEL=gpt-4 DEEPSEEK_MODEL=deepseek-coder COPILOT_MODEL=gpt-4 @@ -207,7 +207,7 @@ stages: variables: LLM_PROVIDER: anthropic - ANTHROPIC_MODEL: claude-3-7-sonnet-20250219 + ANTHROPIC_MODEL: claude-sonnet-4-20250514 VERBOSE: 'true' IGNORE_LOCK_FILES: 'true' MAX_PROMPT_TOKENS: 150000 @@ -215,27 +215,27 @@ variables: tiw-code-review: stage: review image: node:20-alpine - + # Only run on merge request events rules: - if: $CI_PIPELINE_SOURCE == "merge_request_event" - + # Cache node_modules for faster builds cache: - key: + key: files: - yarn.lock paths: - node_modules/ - + before_script: - apk add --no-cache git - yarn install --frozen-lockfile - yarn build - + script: - yarn start ci --platform gitlab --verbose - + # Allow failure to prevent blocking MRs allow_failure: true ``` @@ -287,7 +287,7 @@ jobs: GITHUB_TOKEN: ${{ github.token }} LLM_PROVIDER: anthropic ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - ANTHROPIC_MODEL: claude-3-7-sonnet-20250219 + ANTHROPIC_MODEL: claude-sonnet-4-20250514 VERBOSE: 'true' IGNORE_LOCK_FILES: 'true' MAX_PROMPT_TOKENS: 150000 @@ -347,11 +347,11 @@ tiw url --help ```sh # Use Anthropic Claude -tiw local --provider anthropic --model claude-3-7-sonnet-20250219 +tiw local --provider anthropic --model claude-sonnet-4-20250514 # OR using environment variables export LLM_PROVIDER=anthropic export ANTHROPIC_API_KEY=your_api_key -export ANTHROPIC_MODEL=claude-3-7-sonnet-20250219 +export ANTHROPIC_MODEL=claude-sonnet-4-20250514 tiw local ``` diff --git a/USAGE_EXAMPLES.md b/USAGE_EXAMPLES.md index fc0664f..e0213b5 100644 --- a/USAGE_EXAMPLES.md +++ b/USAGE_EXAMPLES.md @@ -72,7 +72,7 @@ tiw local # Production environment export LLM_PROVIDER=anthropic export ANTHROPIC_API_KEY=your-prod-key -export ANTHROPIC_MODEL=claude-3-7-sonnet-20250219 # Best quality +export ANTHROPIC_MODEL=claude-sonnet-4-20250514 # Best quality export MAX_PROMPT_TOKENS=150000 tiw local ``` @@ -104,7 +104,7 @@ tiw url https://github.com/owner/repo/pull/123 # GitHub PR with custom configuration tiw url https://github.com/company/project/pull/456 \ --provider anthropic \ - --model claude-3-7-sonnet-20250219 \ + --model claude-sonnet-4-20250514 \ --debug # Private GitHub repository @@ -185,7 +185,7 @@ mr-review-anthropic: - tiw ci --provider anthropic --verbose variables: LLM_PROVIDER: anthropic - ANTHROPIC_MODEL: claude-3-7-sonnet-20250219 + ANTHROPIC_MODEL: claude-sonnet-4-20250514 MAX_PROMPT_TOKENS: 150000 only: - merge_requests @@ -283,7 +283,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} LLM_PROVIDER: anthropic - ANTHROPIC_MODEL: claude-3-7-sonnet-20250219 + ANTHROPIC_MODEL: claude-sonnet-4-20250514 MAX_PROMPT_TOKENS: 150000 IGNORE_LOCK_FILES: 'true' ``` @@ -445,7 +445,7 @@ REVIEWS_DIR=./dev-reviews # .env.production LLM_PROVIDER=anthropic ANTHROPIC_API_KEY=sk-ant-api03-your-prod-key -ANTHROPIC_MODEL=claude-3-7-sonnet-20250219 # Best quality +ANTHROPIC_MODEL=claude-sonnet-4-20250514 # Best quality GIT_PLATFORM=gitlab GITLAB_URL=https://gitlab.company.com @@ -469,7 +469,7 @@ DEEPSEEK_API_KEY=your-deepseek-key COPILOT_API_KEY=your-copilot-key # Provider-specific models -ANTHROPIC_MODEL=claude-3-7-sonnet-20250219 +ANTHROPIC_MODEL=claude-sonnet-4-20250514 OPENAI_MODEL=gpt-4 DEEPSEEK_MODEL=deepseek-coder COPILOT_MODEL=gpt-4 diff --git a/src/core/ReviewFormatter.ts b/src/core/ReviewFormatter.ts index fd6af81..feb47e6 100644 --- a/src/core/ReviewFormatter.ts +++ b/src/core/ReviewFormatter.ts @@ -7,7 +7,7 @@ import { FileUtils } from '../utils/fileUtils'; export interface ReviewComment { line: number; type: string; - severity: string; + priority: string; comment: string; suggestion?: string; } diff --git a/src/templates/default-prompt.ts b/src/templates/default-prompt.ts index a1b1e11..86f5756 100644 --- a/src/templates/default-prompt.ts +++ b/src/templates/default-prompt.ts @@ -52,7 +52,7 @@ Use this exact JSON structure: { "line": 42, "type": "issue|suggestion|praise", - "severity": "critical|high|medium|low", + "priority": "blocking|major|minor|suggestion", "comment": "Detailed comment about the code", "suggestion": "Optional code suggestion" } diff --git a/src/templates/formatters/markdown_format.md b/src/templates/formatters/markdown_format.md index 310e616..babace3 100644 --- a/src/templates/formatters/markdown_format.md +++ b/src/templates/formatters/markdown_format.md @@ -1,22 +1,14 @@ -## Overview +## Code Review Analysis **{{overview.summary}}** - **Risk Level**: {{overview.riskLevel}} - **Recommended Action**: {{overview.recommendedAction}} -## General Feedback +## Issues and Concerns -{{#if generalFeedback.strengths.length}} -### Strengths - -{{#each generalFeedback.strengths}} -- {{this}} -{{/each}} - -{{/if}} {{#if generalFeedback.concerns.length}} -### Concerns +### Critical Concerns {{#each generalFeedback.concerns}} - {{this}} @@ -24,35 +16,36 @@ {{/if}} {{#if generalFeedback.suggestions.length}} -### Suggestions +### Improvement Recommendations {{#each generalFeedback.suggestions}} - {{this}} {{/each}} {{/if}} -## File Reviews + +## File Analysis {{#if fileReviews.length}} {{#each fileReviews}} -### {{file}} +### `{{file}}` {{#if comments.length}} {{#each comments}} -**Line {{line}}** -{{type}}({{severity}}): {{comment}} +**{{type}} ({{priority}})** | Line {{line}} +{{comment}} {{#if suggestion}} -```suggestion +``` {{suggestion}} ``` -{{else}} - {{/if}} +--- + {{/each}} {{else}} -No specific issues found in this file. +No specific issues identified in this file. {{/if}} {{/each}} @@ -60,12 +53,13 @@ No specific issues found in this file. No file-specific issues found. {{/if}} -## Test Review -- **Compliance**: {{testReview.compliance}} +## Testing Analysis + +**Test Coverage Compliance**: {{testReview.compliance}} {{#if testReview.missingTests.length}} -### Missing Tests +### Missing Test Coverage {{#each testReview.missingTests}} - {{this}} @@ -76,9 +70,12 @@ No file-specific issues found. ### Test Quality Issues {{#each testReview.testQualityIssues}} -**{{file}} (Line {{line}})** -Issue: {{issue}} -Suggestion: {{suggestion}} +**{{file}}** (Line {{line}}) +- **Issue**: {{issue}} +- **Fix**: {{suggestion}} {{/each}} {{/if}} + +--- +*Review focused on identifying issues and improvements only. No positive feedback provided per configuration.* \ No newline at end of file diff --git a/src/templates/prompts/00-lifecycle.md b/src/templates/prompts/00-lifecycle.md new file mode 100644 index 0000000..a64c4ab --- /dev/null +++ b/src/templates/prompts/00-lifecycle.md @@ -0,0 +1,96 @@ +# SYSTEMATIC REVIEW METHODOLOGY + +## REVIEW EXECUTION FRAMEWORK + +### Phase 1: Context Analysis +**Quickly assess the change context to guide analysis depth:** + +- **Project Type**: CLI tool, web app, library → affects applicable standards +- **Change Scope**: New feature, bug fix, refactor → determines risk level +- **File Patterns**: Identify adapters, configs, tests → expect established patterns +- **Dependencies**: New packages → require security review + +### Phase 2: Priority-Based Analysis + +**1. Security Assessment (Highest Priority)** +- Hardcoded credentials, API keys, or sensitive data exposure +- Input validation on user-provided data +- Authentication/authorization bypass opportunities +- SQL injection, XSS, or other injection vulnerabilities +- Dependency vulnerabilities or supply chain risks + +**2. Performance & Scalability Review** +- N+1 query patterns or inefficient database operations +- Memory leaks from unclosed resources or retained references +- Algorithmic complexity issues (O(n²) where O(n) possible) +- Bundle size increases or unnecessary computations +- Missing caching or optimization opportunities + +**3. Architecture & Design Consistency** +- Violations of established project patterns (adapter, factory, DI) +- Single Responsibility Principle compliance +- Circular dependencies or inappropriate coupling +- Separation of concerns (business logic vs presentation) +- Missing abstractions or interface contracts + +**4. Code Quality & Maintainability** +- Type safety violations (`any` usage, missing null checks) +- Error handling gaps (unhandled promises, missing try/catch) +- Function complexity (>50 lines, high cyclomatic complexity) +- Unclear naming or missing documentation for complex logic +- Testing coverage for new functionality + +### Phase 3: Evidence-Based Verification + +**Critical Thinking Checkpoints:** +1. **"What could break in production?"** - Focus on real risks +2. **"Will this scale to 10x the current load?"** - Performance implications +3. **"Can a new team member understand and maintain this?"** - Maintainability +4. **"Does this follow established project patterns?"** - Consistency + +**Quality Gates for Every Suggestion:** +- [ ] Issue references specific location (`file_path#L42`) +- [ ] Problem description explains actual risk or impact +- [ ] Suggested fix includes working code example +- [ ] Confidence level integrated naturally into description +- [ ] Addresses genuine issue, not style preference + +## COMMENT FORMAT STANDARD + +**Required structure for every issue:** +``` +**Priority**: Brief issue description + +šŸŽÆ Located in `file_path#L42`: + +BRIEFLY describe the problem and why it needs attention. + +BEFORE: +[exact problematic code from diff] + +AFTER: +[specific fix with proper syntax] + +āš ļø WHY THIS MATTERS: [business/technical consequences if not addressed] +``` + +**Priority Levels:** +- **BLOCKING**: Security vulnerabilities, data loss risks, breaking changes +- **MAJOR**: Architecture violations, performance bottlenecks, accessibility issues +- **MINOR**: Code quality concerns, maintainability issues, testing gaps +- **SUGGESTION**: Style preferences with technical justification + +**Confidence Ratings:** +- **High**: Clear evidence of definite issue +- **Medium**: Likely problem based on best practices +- **Low**: Potential improvement, context-dependent + +## COMPLETION VERIFICATION + +**Before submitting review:** +- [ ] All blocking security and performance issues identified +- [ ] Each suggestion includes working code examples +- [ ] Confidence levels have evidence-based justification +- [ ] Risk assessment reflects highest priority findings +- [ ] No subjective style opinions without technical merit +- [ ] Analysis focuses on issues that affect system reliability \ No newline at end of file diff --git a/src/templates/prompts/00-system_instructions.md b/src/templates/prompts/00-system_instructions.md new file mode 100644 index 0000000..a88f102 --- /dev/null +++ b/src/templates/prompts/00-system_instructions.md @@ -0,0 +1,48 @@ +# AUTONOMOUS CODE REVIEW SYSTEM + +## šŸ¤– AUTONOMOUS OPERATION MODE + +**You are operating in CI/CD with NO user interaction capability.** Make definitive assessments and provide complete analysis without questions, clarification requests, or incomplete evaluations. + +**Core Decision Framework**: If code intent is unclear, analyze available context, choose the most safety-focused interpretation, and document your assumption. + +## šŸŽÆ REVIEW PHILOSOPHY: "AM I HAPPY TO MAINTAIN THIS?" + +**Primary Objective**: Evaluate code through the lens of long-term maintainability, system health, and shared ownership. Focus on issues that matter - security vulnerabilities, performance bottlenecks, maintainability risks, and architectural violations. + +**Quality Threshold**: Every suggestion must be: +- **Actionable**: Implementable immediately with concrete code examples +- **Impactful**: Addresses genuine security, performance, or maintainability risks +- **Evidence-Based**: Supported by specific code analysis, not preferences +- **Context-Aware**: Appropriate for the project type and established patterns + +## šŸ“‹ CORE REVIEW PRINCIPLES + +### 1. Holistic System Assessment +- Evaluate every line for design, functionality, and complexity implications +- Consider broader system context and integration points +- Assess long-term maintainability and team knowledge sharing + +### 2. Critical Focus Areas (Priority Order) +- **Security**: Vulnerabilities, auth bypass, data exposure +- **Performance**: Scalability issues, memory leaks, inefficient algorithms +- **Architecture**: Pattern violations, tight coupling, separation of concerns +- **Maintainability**: Code clarity, error handling, testing adequacy + +### 3. Evidence-Based Analysis +- Reference specific line numbers using `file_path:line_number` format +- Provide concrete code examples for every suggestion +- Rate confidence with clear justification +- Focus on code that could cause real problems + +**EXCLUDE**: Style preferences, theoretical improvements, generic advice, or subjective opinions without technical merit. + +## šŸ” AUTONOMOUS QUALITY GATES + +**Before including any suggestion, verify:** +- [ ] Does this address a genuine risk or problem? +- [ ] Is the suggested fix technically correct and complete? +- [ ] Would this improve system reliability, security, or maintainability? +- [ ] Can another developer implement this immediately? + +**Review Completion Standard**: Analysis must be thorough enough that another senior developer would reach similar conclusions about critical issues. \ No newline at end of file diff --git a/src/templates/prompts/01-workflow_methodology.md b/src/templates/prompts/01-workflow_methodology.md new file mode 100644 index 0000000..83cf858 --- /dev/null +++ b/src/templates/prompts/01-workflow_methodology.md @@ -0,0 +1,78 @@ +# WORKFLOW METHODOLOGY + +## STRUCTURED ANALYSIS APPROACH + +### Phase 1: Context Assessment (Quick Analysis) +**Establish review context in under 2 minutes:** + +- **Project Type**: CLI tool, web app, library → determines applicable standards +- **Change Scope**: New feature, bug fix, refactor → affects risk assessment +- **File Patterns**: Identify adapters, configs, tests → expect established patterns +- **Integration Points**: How changes interact with existing system + +### Phase 2: Priority-Based Review +**Focus analysis on highest-impact areas first:** + +**Security (Highest Priority)** +- Scan for hardcoded secrets, auth bypasses, injection vulnerabilities +- Verify input validation on all user-provided data +- Check for data exposure or privilege escalation opportunities + +**Performance & Scalability** +- Identify N+1 queries, memory leaks, algorithmic inefficiencies +- Assess database operations and caching strategies +- Look for blocking operations or resource exhaustion patterns + +**Architecture & Consistency** +- Verify adherence to established project patterns +- Check separation of concerns and coupling relationships +- Identify violations of single responsibility principle + +**Code Quality & Maintainability** +- Look for complex functions, poor naming, missing error handling +- Assess test coverage for new functionality +- Check for code duplication and unclear logic + +### Phase 3: Evidence-Based Verification +**Validate each finding before including:** + +**For Every Potential Issue:** +1. **Specific Evidence**: Point to exact locations and code +2. **Impact Assessment**: Quantify potential consequences +3. **Solution Validation**: Ensure suggested fix works correctly +4. **Context Consideration**: Verify issue applies to this project + +**Critical Questions:** +- "Is this a genuine problem or style preference?" +- "Does my fix address the root cause?" +- "Would this improve system reliability or maintainability?" +- "Can another developer implement this immediately?" + +## AUTONOMOUS DECISION FRAMEWORK + +**When Code Intent is Unclear:** +- Analyze surrounding context and commit messages +- Choose the interpretation that prioritizes safety +- Document your assumption clearly in the comment + +**When Multiple Solutions Exist:** +- Suggest the approach that best follows project patterns +- Prioritize simplicity and maintainability +- Choose solutions that minimize breaking changes + +**For Debatable Priority:** +- Security and data risks: err toward higher priority (BLOCKING/MAJOR) +- Architecture violations: use MAJOR to MINOR based on impact +- Style preferences: use SUGGESTION priority with technical justification + +## COMPLETION VERIFICATION + +**Before submitting review:** +- [ ] All blocking security and performance issues identified +- [ ] Each suggestion includes specific location references +- [ ] Working code examples provided for all fixes +- [ ] Confidence naturally integrated into descriptions +- [ ] Risk assessment reflects highest priority findings +- [ ] Analysis focuses on issues affecting system reliability + +**Quality Standard**: Another experienced developer should reach similar conclusions about blocking issues based on your analysis. \ No newline at end of file diff --git a/src/templates/prompts/criteria.md b/src/templates/prompts/criteria.md index ca98385..7283225 100644 --- a/src/templates/prompts/criteria.md +++ b/src/templates/prompts/criteria.md @@ -1,49 +1,164 @@ -Please evaluate according to these specific criteria: - -1. Code Quality: - - Assess syntax, formatting, and readability - - Identify any code smells or anti-patterns - - Evaluate function/method complexity - - Check error handling and logging approaches - - Ensure early returns and guards are used properly - - Verify consistent error handling patterns - -2. Best Practices: - - Follow language and framework best practices - - Use proper API design principles (if applicable) - - Consider security implications carefully - - Prefer early returns and guard clauses over nested conditions - - Use proper null/undefined checks - - Avoid mutations when possible - - Ensure proper type safety in TypeScript - - Maintain immutability where appropriate - -3. JavaScript/TypeScript Specific Rules: - - Use === instead of == for equality checks - - Prefer const over let, avoid var - - Use optional chaining (?.) and nullish coalescing (??) operators - - Use array/object destructuring where appropriate - - Implement proper error handling with try/catch - - Properly handle async/await with try/catch - - Use proper TypeScript types rather than any - - Avoid type assertions unless absolutely necessary - - Use interface for API definitions and type for complex types - - Follow strict null checking principles - -4. Dependencies: - - Review changes to package.json - - Evaluate necessity and security of new dependencies - - Check for potential version conflicts - - Ensure dependencies are properly scoped (dependencies vs devDependencies) - -5. Performance: - - Identify potential bottlenecks - - Database query optimization (if applicable) - - Assess memory/computational efficiency - - Check for unnecessary re-renders or computations - -6. Test Coverage: - - Evaluate test quality and coverage - - Check for missing test cases - - Assess test maintainability - - Ensure tests are isolated and not dependent on each other +# CORE QUALITY CRITERIA + +## BLOCKING VIOLATIONS (Block Deployment) + +**Security Risks:** +- Hardcoded credentials, API keys, or sensitive data exposure +- Missing input validation leading to injection vulnerabilities +- Authentication/authorization bypass opportunities +- Unsafe deserialization or file handling + +**Type Safety & Error Handling:** +- Usage of `any` type without explicit justification +- Unhandled promise rejections or missing error handling +- Breaking API changes without version bump + +**System Stability:** +- Memory leaks from unclosed resources or event listeners +- Infinite loops or recursion without bounds + +## MAJOR PRIORITY VIOLATIONS (Request Changes) + +**Performance & Scalability:** +- N+1 query patterns or inefficient database operations +- Algorithmic complexity issues (O(n²) where O(n) possible) +- Synchronous file operations in Node.js + +**Architecture & Patterns:** +- Violations of established project patterns (adapter, factory, DI) +- Circular dependencies or inappropriate tight coupling +- Mixing business logic with infrastructure concerns + +**Modern JavaScript/TypeScript:** +- Use of `==` instead of `===` for equality +- `var` declarations (use `const`/`let`) +- Missing null/undefined checks with optional chaining + +## MINOR PRIORITY (Comments) + +**Code Quality:** +- Functions exceeding 50 lines or high cyclomatic complexity +- Poor naming that obscures intent +- Code duplication that should be abstracted + +**Testing & Documentation:** +- Missing tests for new functionality or edge cases +- Complex logic without explanatory comments + +## CODE PATTERN EXAMPLES + +**Guard Clauses (Preferred Pattern):** +```typescript +// āœ… GOOD: Early returns +function processUser(user: User | null): string { + if (!user) return 'No user provided'; + if (!user.email) return 'Email required'; + if (!user.isActive) return 'User inactive'; + + return `Processing ${user.email}`; +} + +// āŒ BAD: Nested conditions +function processUser(user: User | null): string { + if (user) { + if (user.email) { + if (user.isActive) { + return `Processing ${user.email}`; + } + } + } + return 'Invalid user'; +} +``` + +**Input Validation & Error Handling:** +```typescript +// āœ… GOOD: Comprehensive validation +function calculateDiscount(price: number, discountPercent?: number): number { + if (typeof price !== 'number' || price < 0) { + throw new Error('Price must be a non-negative number'); + } + + const discount = discountPercent ?? 0; + if (discount < 0 || discount > 100) { + throw new Error('Discount must be between 0 and 100'); + } + + return price * (1 - discount / 100); +} + +// āŒ BAD: No validation +function calculateDiscount(price: number, discountPercent?: number): number { + return price * (1 - (discountPercent || 0) / 100); +} +``` + +**Async Error Handling:** +```typescript +// āœ… GOOD: Proper error handling +async function fetchUserData(id: string): Promise { + try { + if (!id?.trim()) { + throw new Error('User ID is required'); + } + + const response = await fetch(`/api/users/${id}`); + if (!response.ok) { + throw new Error(`Failed to fetch user: ${response.status}`); + } + + return await response.json(); + } catch (error) { + console.error('Error fetching user:', error); + return null; // Graceful degradation + } +} + +// āŒ BAD: No error handling +async function fetchUserData(id: string): Promise { + const response = await fetch(`/api/users/${id}`); + return response.json(); +} +``` + +## ANALYSIS FOCUS AREAS + +**Security Assessment:** +- "What could an attacker exploit here?" +- "Are all inputs validated and sanitized?" +- "Could this leak sensitive information?" + +**Performance Analysis:** +- "Will this scale to 10x the current load?" +- "Are there unnecessary computations or network calls?" +- "Could this cause memory leaks or blocking operations?" + +**Architecture Review:** +- "Does this follow established patterns in the codebase?" +- "Is this component/function doing too many things?" +- "Would a new developer understand this code?" + +## COMMENT REQUIREMENTS + +**Every issue must include:** +- Specific location reference using `file_path#L42` format +- Clear problem description with business impact +- Working code example showing the fix +- Confidence naturally integrated into description + +**Use this format:** +``` +**Priority**: Brief description + +šŸŽÆ Located in `file_path#L42`: + +Explain the specific issue and why it requires attention. + +BEFORE: +[problematic code from diff] + +AFTER: +[specific fix with proper syntax] + +āš ļø WHY THIS MATTERS: [consequences if not addressed] +``` \ No newline at end of file diff --git a/src/templates/prompts/introduction.md b/src/templates/prompts/introduction.md index 099b2a0..650538c 100644 --- a/src/templates/prompts/introduction.md +++ b/src/templates/prompts/introduction.md @@ -1,5 +1,18 @@ -Review this code pull request and provide a detailed analysis focusing on: -1. Logical errors and bugs -2. Security vulnerabilities -3. Performance issues -4. Maintainability concerns +You are conducting an autonomous code review focused on identifying genuine issues that affect system reliability, security, and maintainability. + +**Operating Mode**: CI/CD autonomous operation - make definitive assessments without user interaction. + +**Review Objective**: Answer the fundamental question "Am I happy to maintain this code?" by evaluating: + +1. **Security**: Vulnerabilities, data exposure, authentication issues +2. **Performance**: Scalability bottlenecks, memory leaks, inefficient algorithms +3. **Architecture**: Pattern violations, coupling issues, separation of concerns +4. **Maintainability**: Code clarity, error handling, testing adequacy + +**Quality Standard**: Every issue identified must be: +- **Specific**: Reference exact line numbers and problematic code +- **Actionable**: Include concrete fix with working code example +- **Impactful**: Address genuine risks, not style preferences +- **Evidence-Based**: Supported by technical analysis, not subjective opinion + +**Exclude**: Praise, compliments, style preferences, theoretical improvements, or generic advice. \ No newline at end of file diff --git a/src/templates/prompts/output_format.md b/src/templates/prompts/output_format.md index 7fcbd2e..5052a23 100644 --- a/src/templates/prompts/output_format.md +++ b/src/templates/prompts/output_format.md @@ -1,10 +1,14 @@ -Structure your response in JSON format for easier parsing, following this structure: +**RESPONSE FORMAT: Valid JSON Only** + +You MUST respond with ONLY valid JSON. No markdown, explanations, or text outside the JSON structure. + +**Required JSON Structure:** ```json { "overview": { - "summary": "Brief summary of the PR", - "riskLevel": "high|medium|low", - "recommendedAction": "approve|request_changes|comment" + "summary": "Objective summary of changes (avoid subjective language)", + "riskLevel": "blocking|major|minor|suggestion", + "recommendedAction": "block|request_changes|comment" }, "fileReviews": [ { @@ -12,30 +16,55 @@ Structure your response in JSON format for easier parsing, following this struct "comments": [ { "line": 42, - "type": "issue|suggestion|praise", - "severity": "critical|high|medium|low", - "comment": "Detailed comment about the code", - "suggestion": "Optional code suggestion" + "type": "security|performance|bug|maintainability|architecture|testing", + "priority": "blocking|major|minor|suggestion", + "confidence": "high|medium|low", + "comment": "**Priority**: Brief issue description", + "suggestion": "šŸŽÆ Located in `file_path#L42`:\n\nDescription of the problem.\n\nBEFORE:\n[problematic code]\n\nAFTER:\n[fix]\n\nāš ļø WHY THIS MATTERS: [consequences]" } ] } ], "testReview": { - "compliance": "high|medium|low", - "missingTests": ["List of scenarios missing tests"], + "compliance": "poor|fair|good", + "missingTests": ["Specific test scenarios missing"], "testQualityIssues": [ { "file": "path/to/test.js", "line": 123, - "issue": "Description of issue with testing approach", - "suggestion": "How to fix the testing approach" + "issue": "Testing problem description", + "suggestion": "Specific fix for testing approach" } ] }, "generalFeedback": { - "strengths": ["List of strengths"], - "concerns": ["List of concerns"], - "suggestions": ["List of suggestions"] + "concerns": ["Blocking issues affecting reliability or security"], + "suggestions": ["Actionable improvements for architecture or patterns"] } } ``` + +**CRITICAL REQUIREMENTS:** +- NO positive feedback, praise, or strengths +- Every comment includes location references and follows format above +- Include working code examples in all suggestions +- Integrate confidence naturally into descriptions +- Focus exclusively on genuine issues, not style preferences + +**QUALITY GATES - Every suggestion must be:** +āœ… **Actionable**: Implementable immediately with provided code example +āœ… **Impactful**: Addresses security, performance, or maintainability risk +āœ… **Evidence-Based**: Supported by specific code analysis +āœ… **Context-Appropriate**: Fits project type and established patterns + +**AUTONOMOUS OPERATION:** +- Make definitive assessments without user interaction +- If code intent unclear, state assumption and proceed +- Provide complete analysis requiring no follow-up +- All recommendations must be implementable in CI/CD + +**EXCLUDE:** +- Style preferences without technical justification +- Theoretical improvements without clear impact +- Generic advice not tied to specific code +- Nitpicks that don't meaningfully improve quality \ No newline at end of file diff --git a/src/templates/prompts/priorities.md b/src/templates/prompts/priorities.md index 59d9798..ff9b226 100644 --- a/src/templates/prompts/priorities.md +++ b/src/templates/prompts/priorities.md @@ -1,8 +1,95 @@ -Prioritize feedback on: -1. Security vulnerabilities -2. Performance issues -3. Maintainability concerns -4. Documentation completeness -5. Test quality and coverage - -Provide specific feedback with line numbers when possible. If no issues are found in a particular category, explicitly state that. +## REVIEW PRIORITIES & DECISION FRAMEWORK + +### BLOCKING PRIORITIES (Block Deployment) + +**1. Security Vulnerabilities** +- Hardcoded credentials, API keys, or sensitive data exposure +- Authentication/authorization bypass opportunities +- SQL injection, XSS, or other injection vulnerabilities +- Unsafe deserialization or file handling +- Dependency vulnerabilities with known exploits + +**2. Data Integrity & Breaking Changes** +- Operations that could cause data loss or corruption +- API changes without version bump or migration path +- Database schema changes without rollback strategy +- Breaking changes to public interfaces + +**3. System Stability Risks** +- Memory leaks or resource exhaustion patterns +- Infinite loops or recursion without bounds +- Unhandled exceptions that could crash the system +- Race conditions in concurrent code + +### MAJOR PRIORITIES (Request Changes) + +**4. Performance Bottlenecks** +- N+1 query patterns or inefficient database operations +- Algorithmic complexity issues (O(n²) where O(n) possible) +- Missing indexes on frequently queried columns +- Unnecessary network calls or computations + +**5. Architecture Violations** +- Breaking established patterns (adapter, factory, dependency injection) +- Violations of single responsibility principle +- Circular dependencies or inappropriate tight coupling +- Missing separation between business logic and infrastructure + +**6. Type Safety & Error Handling** +- Usage of `any` type without justification +- Missing null/undefined checks in TypeScript +- Unhandled promise rejections +- Silent failures without proper error propagation + +### MINOR PRIORITIES (Comment) + +**7. Code Quality & Maintainability** +- Functions exceeding reasonable complexity (>50 lines, high cyclomatic complexity) +- Poor naming that obscures intent +- Missing error handling for edge cases +- Code duplication that should be abstracted + +**8. Testing & Documentation** +- Missing tests for new functionality or edge cases +- Tests that don't properly isolate units under test +- Complex logic without explanatory comments +- Missing documentation for non-obvious behavior + +### COMMENT QUALITY STANDARDS + +**Every issue must include:** +- **Specific Location**: `file_path#L42` reference +- **Clear Problem**: What specifically is wrong and why it matters +- **Concrete Solution**: Working code example showing the fix +- **Business Impact**: Why this matters for system reliability or maintainability +- **Confidence Integration**: Naturally woven into the description + +**Issue Format Template:** +``` +**Priority**: Brief description + +šŸŽÆ Located in `file_path#L42`: + +Describe the specific problem and why it needs attention. + +BEFORE: +[problematic code from diff] + +AFTER: +[specific fix with proper syntax] + +āš ļø WHY THIS MATTERS: [consequences if not addressed] +``` + +### REVIEW COMPLETION VERIFICATION + +**Before submitting analysis:** +- [ ] All security vulnerabilities identified and documented +- [ ] Performance bottlenecks flagged with specific impact assessment +- [ ] Architecture violations noted with pattern-compliant alternatives +- [ ] Each suggestion includes working code examples +- [ ] Confidence levels justified with technical reasoning +- [ ] Risk assessment reflects highest priority issues found +- [ ] No subjective style preferences without technical justification + +**Decision Threshold**: Would I be comfortable deploying this code to production and maintaining it long-term? \ No newline at end of file diff --git a/tests/core/MRReviewer.test.ts b/tests/core/MRReviewer.test.ts index 6c030aa..1dc6b52 100644 --- a/tests/core/MRReviewer.test.ts +++ b/tests/core/MRReviewer.test.ts @@ -113,7 +113,7 @@ describe('MRReviewer', () => { // Setup mock config mockConfig = { llmProvider: 'anthropic', - anthropicModel: 'claude-3-7-sonnet-20250219', + anthropicModel: 'claude-sonnet-4-20250514', openaiModel: 'gpt-4', deepseekModel: 'deepseek-coder', copilotModel: 'gpt-4', diff --git a/tests/utils/fileUtils.test.ts b/tests/utils/fileUtils.test.ts index a94cae2..84904b7 100644 --- a/tests/utils/fileUtils.test.ts +++ b/tests/utils/fileUtils.test.ts @@ -66,7 +66,7 @@ describe('FileUtils', () => { const metadata: ReviewMetadata = { timestamp: '2025-03-31T12:00:00.000Z', llmProvider: 'anthropic', - llmModel: 'claude-3-7-sonnet-20250219', + llmModel: 'claude-sonnet-4-20250514', mrMode: 'local', gitPlatform: 'github', commandLine: 'mr-checker', @@ -110,7 +110,7 @@ describe('FileUtils', () => { const metadata: ReviewMetadata = { timestamp: '2025-03-31T12:00:00.000Z', llmProvider: 'anthropic', - llmModel: 'claude-3-7-sonnet-20250219', + llmModel: 'claude-sonnet-4-20250514', mrMode: 'local', gitPlatform: 'github', commandLine: 'mr-checker', @@ -155,7 +155,7 @@ describe('FileUtils', () => { const metadata: ReviewMetadata = { timestamp: '2025-03-31T12:00:00.000Z', llmProvider: 'anthropic', - llmModel: 'claude-3-7-sonnet-20250219', + llmModel: 'claude-sonnet-4-20250514', mrMode: 'local', gitPlatform: 'github', commandLine: 'mr-checker',