diff --git a/.ralph/prd.json b/.ralph/prd.json index 4101d9a..92d221b 100644 --- a/.ralph/prd.json +++ b/.ralph/prd.json @@ -806,8 +806,8 @@ "000", "010" ], - "passes": false, - "iterations_taken": 0, + "passes": true, + "iterations_taken": 1, "blocked_reason": null, "test_files": [ "tests/ralph-failure-learning.bats" diff --git a/.ralph/progress.txt b/.ralph/progress.txt index 4312721..fe8f0a1 100644 --- a/.ralph/progress.txt +++ b/.ralph/progress.txt @@ -1356,3 +1356,61 @@ Notes for next iteration: - Next available high-priority features: 001 (auto-PR), 018 (spike mode) - Next available features: 001, 002, 005, 006, 016, 018 --- + +--- 2026-01-28 15:30:00 --- +Feature: [029] Persist failure context to progress.txt after rollback so next iteration can learn from mistakes +Status: Completed + +Implementation: +- Created log_failure_context() function in ralph.sh (lines 1600-1722) +- Function captures feature info from PRD using get_prd_data and Python JSON parsing +- Detects which quality gates failed by checking temp log files: + - /tmp/ralph_format_check.log for formatting errors + - /tmp/ralph_lint.log for linting errors + - /tmp/ralph_typecheck.log and /tmp/ralph_tsc.log for type checking errors + - /tmp/ralph_test.log for test failures (uses extract_failing_tests for details) +- Extracts actual error messages from temp files (tail -20/-30) for actionable context +- Formats failure context as ROLLBACK entry with timestamp, feature info, failed gates, and error details +- Appends to progress.txt AFTER rollback (so it survives git reset --hard) +- Updated run_single_iteration() to call log_failure_context after rollback_last_commit (line 1779) +- Updated run_continuous_loop() to call log_failure_context after rollback_last_commit (line 1860) +- Updated AGENT_PROMPT.md "Get Your Bearings" section to instruct agents to check for ROLLBACK entries +- Added guidance explaining what ROLLBACK entries contain and why they're important + +Testing: +- Created tests/ralph-failure-learning.bats with 20 comprehensive tests +- All 278 tests pass (added 20 new tests) +- Tests verify: function exists, called after rollback, checks all temp log files, appends to progress.txt +- Tests verify: ROLLBACK header format, feature info inclusion, error detail extraction +- Tests verify: AGENT_PROMPT.md documentation of ROLLBACK entries +- Bash syntax validation passed: bash -n ralph.sh + +Key Files Modified: +- ralph.sh: Added log_failure_context() function, updated 2 rollback call sites +- AGENT_PROMPT.md: Added ROLLBACK learning guidance to "Get Your Bearings" section +- tests/ralph-failure-learning.bats: New test file with 20 tests +- .ralph/prd.json: Marked feature 029 as complete with iterations_taken=1 + +Challenges: +- Initial test failures due to grep -A 100 not capturing full 123-line function +- Fixed by updating tests to use grep -A 150 to capture complete function + +Technical Solution: +- log_failure_context() is called AFTER rollback, so the progress.txt update survives the git reset +- Function dynamically detects which gates failed by checking for errors in temp log files +- Captures specific error messages, not just generic "failed" status +- Next iteration will read progress.txt and see exactly what went wrong +- This breaks the failure loop where agents repeatedly attempt the same failing approach + +Benefits: +- Agents now learn from previous failures instead of repeating mistakes +- Specific error messages guide the fix (e.g., "ESLint error on line 42: unused variable") +- Failure context persists across rollbacks, enabling incremental debugging +- Addresses core problem: rollback was destroying all failure context + +Notes for next iteration: +- Feature 029 is now complete +- Next available features to work on: 001, 002, 005, 006, 016, 018 +- Feature 001 (auto-PR creation) is medium priority and ready to implement +- The failure learning mechanism is now in place for continuous improvement +--- diff --git a/AGENT_PROMPT.md b/AGENT_PROMPT.md index d563214..7a9ebd5 100644 --- a/AGENT_PROMPT.md +++ b/AGENT_PROMPT.md @@ -35,6 +35,14 @@ cat .ralph/prd.json **Critical**: Understand what was recently worked on before starting new work. +**Important - Learning from Failures**: Check for ROLLBACK entries in progress.txt. These contain critical learning context from previous iterations where quality gates failed and changes were reverted. ROLLBACK entries include: +- Which feature was attempted +- Which quality gates failed (linting, tests, type checking, formatting) +- Specific error messages from the failures +- Guidance for next attempt + +If you see a ROLLBACK entry for the feature you're about to work on, READ IT CAREFULLY. It shows exactly what went wrong last time so you can avoid repeating the same mistakes. + ### 2. Verify Existing Functionality Before implementing anything new, verify core functionality still works: diff --git a/ralph.sh b/ralph.sh index d3067e8..47c7d98 100755 --- a/ralph.sh +++ b/ralph.sh @@ -1595,6 +1595,132 @@ rollback_last_commit() { fi } +# Log failure context to progress.txt after rollback (Feature 029) +# This preserves learning context so next iteration knows what failed +log_failure_context() { + local rolled_back_commit="$1" + + log_info "Capturing failure context for next iteration..." + + # Get current feature info + local prd_data=$(get_prd_data) + local feature_info=$(echo "$prd_data" | python3 -c " +import json +import sys + +try: + prd = json.load(sys.stdin) + features = prd.get('features', []) + + # Find next incomplete feature with met dependencies + for feature in features: + if feature.get('passes', False): + continue + if feature.get('blocked_reason'): + continue + + depends_on = feature.get('depends_on', []) + deps_met = True + for dep_id in depends_on: + dep_feature = next((f for f in features if f.get('id') == dep_id), None) + if not dep_feature or not dep_feature.get('passes', False): + deps_met = False + break + + if deps_met: + print(json.dumps({ + 'id': feature.get('id', 'unknown'), + 'type': feature.get('type', 'feature'), + 'description': feature.get('description', 'Unknown feature') + })) + break +except: + pass +" 2>/dev/null) + + local feature_id="unknown" + local feature_type="unknown" + local feature_desc="Unknown feature" + + if [ -n "$feature_info" ]; then + feature_id=$(echo "$feature_info" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id', 'unknown'))") + feature_type=$(echo "$feature_info" | python3 -c "import json,sys; print(json.load(sys.stdin).get('type', 'unknown'))") + feature_desc=$(echo "$feature_info" | python3 -c "import json,sys; print(json.load(sys.stdin).get('description', 'Unknown'))") + fi + + # Determine which quality gates failed + local failed_gates="" + local error_details="" + + # Check formatting + if [ -f "/tmp/ralph_format_check.log" ]; then + if grep -q "error" /tmp/ralph_format_check.log || grep -q "FAIL" /tmp/ralph_format_check.log; then + failed_gates="${failed_gates}\n - Formatting" + error_details="${error_details}\n\nFormatting Errors:\n" + error_details="${error_details}$(tail -20 /tmp/ralph_format_check.log)" + fi + fi + + # Check linting + if [ -f "/tmp/ralph_lint.log" ]; then + if grep -qE "error|Error|ERROR" /tmp/ralph_lint.log; then + failed_gates="${failed_gates}\n - Linting" + error_details="${error_details}\n\nLinting Errors:\n" + error_details="${error_details}$(tail -30 /tmp/ralph_lint.log)" + fi + fi + + # Check type checking + if [ -f "/tmp/ralph_typecheck.log" ]; then + if grep -qE "error|Error|ERROR" /tmp/ralph_typecheck.log; then + failed_gates="${failed_gates}\n - Type Checking" + error_details="${error_details}\n\nType Checking Errors:\n" + error_details="${error_details}$(tail -30 /tmp/ralph_typecheck.log)" + fi + fi + + if [ -f "/tmp/ralph_tsc.log" ]; then + if grep -qE "error|Error|ERROR" /tmp/ralph_tsc.log; then + failed_gates="${failed_gates}\n - Type Checking (tsc)" + error_details="${error_details}\n\nTypeScript Errors:\n" + error_details="${error_details}$(tail -30 /tmp/ralph_tsc.log)" + fi + fi + + # Check tests + if [ -f "/tmp/ralph_test.log" ]; then + # Use extract_failing_tests to get specific failures + local test_failures=$(extract_failing_tests "/tmp/ralph_test.log" 2>/dev/null || echo "") + if [ -n "$test_failures" ]; then + failed_gates="${failed_gates}\n - Tests" + error_details="${error_details}\n\nTest Failures:\n" + error_details="${error_details}${test_failures}" + fi + fi + + # If no specific gates identified, add generic message + if [ -z "$failed_gates" ]; then + failed_gates="\n - Unknown (quality gates failed but details not captured)" + fi + + # Append to progress.txt (AFTER rollback, so it survives) + { + echo "" + echo "--- ROLLBACK: $(date '+%Y-%m-%d %H:%M:%S') ---" + echo "Feature: [$feature_id] ($feature_type) $feature_desc" + echo "Rolled Back Commit: $rolled_back_commit" + echo "Reason: Quality gates failed" + echo -e "Failed Gates:${failed_gates}" + echo -e "${error_details}" + echo "" + echo "Next Iteration: Review these errors before re-attempting this feature." + echo "Consider: 1) Different approach, 2) Mark as blocked if impossible, 3) Fix specific errors listed above" + echo "---" + } >> "$PROGRESS_FILE" + + log_success "Failure context saved to $PROGRESS_FILE" +} + # Check for unauthorized git push operations check_for_git_push() { if [ "$ALLOW_GIT_PUSH" != "true" ]; then @@ -1677,6 +1803,7 @@ run_single_iteration() { if [ "$ROLLBACK_ON_FAILURE" = "true" ]; then log_error "Verification failed - rolling back changes" rollback_last_commit + log_failure_context "$LAST_COMMIT_MESSAGE" echo "" log_warning "Feature may need to be reworked or marked as blocked" else @@ -1758,6 +1885,7 @@ run_continuous_loop() { if [ "$ROLLBACK_ON_FAILURE" = "true" ]; then log_error "Verification failed - rolling back changes" rollback_last_commit + log_failure_context "$LAST_COMMIT_MESSAGE" log_warning "Continuing to next iteration (feature may be blocked)" else log_warning "Verification failed but rollback is disabled" diff --git a/tests/ralph-failure-learning.bats b/tests/ralph-failure-learning.bats new file mode 100644 index 0000000..244833a --- /dev/null +++ b/tests/ralph-failure-learning.bats @@ -0,0 +1,114 @@ +#!/usr/bin/env bats + +# Tests for Feature 029: Persist failure context after rollback + +setup() { + # Source ralph.sh functions for testing + source ralph.sh +} + +# Test 1: log_failure_context function exists +@test "ralph.sh has log_failure_context function" { + grep -q "^log_failure_context()" ralph.sh +} + +# Test 2: log_failure_context is called after rollback in run_single_iteration +@test "run_single_iteration calls log_failure_context after rollback" { + # Check that log_failure_context is called after rollback_last_commit + grep -A 3 "rollback_last_commit" ralph.sh | grep -q "log_failure_context" +} + +# Test 3: log_failure_context is called after rollback in run_continuous_loop +@test "run_continuous_loop calls log_failure_context after rollback" { + # Check continuous loop also calls log_failure_context after rollback + local count=$(grep -c "log_failure_context" ralph.sh) + [ "$count" -ge 3 ] # Function definition + 2 call sites +} + +# Test 4: log_failure_context captures feature information +@test "log_failure_context extracts feature information from PRD" { + # Check that function uses get_prd_data to fetch feature info + grep -A 50 "^log_failure_context()" ralph.sh | grep -q "get_prd_data" +} + +# Test 5: log_failure_context checks for formatting errors +@test "log_failure_context checks /tmp/ralph_format_check.log" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q "/tmp/ralph_format_check.log" +} + +# Test 6: log_failure_context checks for linting errors +@test "log_failure_context checks /tmp/ralph_lint.log" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q "/tmp/ralph_lint.log" +} + +# Test 7: log_failure_context checks for type checking errors +@test "log_failure_context checks /tmp/ralph_typecheck.log" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q "/tmp/ralph_typecheck.log" +} + +# Test 8: log_failure_context checks for test failures +@test "log_failure_context checks /tmp/ralph_test.log" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q "/tmp/ralph_test.log" +} + +# Test 9: log_failure_context uses extract_failing_tests for test errors +@test "log_failure_context uses extract_failing_tests function" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q "extract_failing_tests" +} + +# Test 10: log_failure_context appends to PROGRESS_FILE +@test "log_failure_context appends to progress file" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q ">> \"\$PROGRESS_FILE\"" +} + +# Test 11: log_failure_context creates ROLLBACK header +@test "log_failure_context writes ROLLBACK header with timestamp" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q "ROLLBACK:" +} + +# Test 12: log_failure_context includes feature ID +@test "log_failure_context includes feature ID in output" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q "Feature:" +} + +# Test 13: log_failure_context includes rolled back commit message +@test "log_failure_context includes rolled back commit message" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q "Rolled Back Commit:" +} + +# Test 14: log_failure_context lists failed quality gates +@test "log_failure_context lists failed quality gates" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q "Failed Gates:" +} + +# Test 15: log_failure_context provides guidance for next iteration +@test "log_failure_context provides guidance for next iteration" { + grep -A 150 "^log_failure_context()" ralph.sh | grep -q "Next Iteration:" +} + +# Test 16: log_failure_context passes commit message as parameter +@test "log_failure_context accepts commit message parameter" { + # Check call sites pass LAST_COMMIT_MESSAGE + grep "log_failure_context" ralph.sh | grep -v "^log_failure_context()" | grep -q "LAST_COMMIT_MESSAGE" +} + +# Test 17: AGENT_PROMPT.md mentions ROLLBACK entries +@test "AGENT_PROMPT.md documents ROLLBACK entries" { + grep -q "ROLLBACK" AGENT_PROMPT.md +} + +# Test 18: AGENT_PROMPT.md tells agent to check for failure context +@test "AGENT_PROMPT.md instructs agent to check for ROLLBACK entries" { + grep -A 5 "ROLLBACK" AGENT_PROMPT.md | grep -qE "(check|read|learn|failure)" +} + +# Test 19: AGENT_PROMPT.md explains what ROLLBACK entries contain +@test "AGENT_PROMPT.md explains ROLLBACK entry contents" { + grep -A 10 "ROLLBACK" AGENT_PROMPT.md | grep -qE "(quality gates|error|failure)" +} + +# Test 20: log_failure_context captures error details from temp files +@test "log_failure_context captures error details not just gate names" { + # Check that function reads content from temp files (tail, grep, etc) + grep -A 150 "^log_failure_context()" ralph.sh | grep -qE "(tail|head|cat).*ralph_.*\.log" +}