From 38a297a451e580ceede542b20e323cd4e195bce8 Mon Sep 17 00:00:00 2001 From: ArthurCRodrigues Date: Tue, 26 May 2026 07:43:55 -0300 Subject: [PATCH 1/6] feat(static_analysis): add AI algorithm tests (#332) --- .../template_library/static_analysis.py | 87 +++++++++++++++++++ autograder/translations/en.json | 18 ++++ autograder/translations/pt_br.json | 18 ++++ tests/unit/test_ai_algorithm_tests.py | 71 +++++++++++++++ 4 files changed, 194 insertions(+) create mode 100644 tests/unit/test_ai_algorithm_tests.py diff --git a/autograder/template_library/static_analysis.py b/autograder/template_library/static_analysis.py index f6426dcb..3805005e 100644 --- a/autograder/template_library/static_analysis.py +++ b/autograder/template_library/static_analysis.py @@ -3,6 +3,7 @@ from pydantic import BaseModel, Field from autograder.models.abstract.template import Template +from autograder.models.abstract.ai_test_function import AiTestFunction from autograder.models.abstract.test_function import TestFunction from autograder.models.dataclass.param_description import ParamDescription from autograder.models.dataclass.submission import SubmissionFile @@ -330,6 +331,89 @@ def execute(self, files: Optional[List[SubmissionFile]], sandbox: Optional[Sandb ) +class AiAlgorithmConfig(BaseModel): + algorithm_name: str = Field(..., min_length=1) + + +class AiAlgorithmTestBase(AiTestFunction): + algorithm_family: str = "" + test_name: str = "" + + @property + def name(self) -> str: + return self.test_name + + @property + def description(self) -> str: + return t(f"static_analysis.{self.name}.description") + + @property + def parameter_description(self) -> List[ParamDescription]: + return [ + ParamDescription( + "algorithm_name", + t(f"static_analysis.{self.name}.params.algorithm_name"), + "string", + ) + ] + + @property + def config_schema(self) -> Type[BaseModel]: + return AiAlgorithmConfig + + def build_prompt( + self, + files: Optional[List[SubmissionFile]], + **kwargs, + ) -> str: + algorithm_name = (kwargs.get("algorithm_name") or "").strip() + file_names = ", ".join(f.filename for f in files) if files else "" + + if file_names: + file_scope = f"Focus only on these files: {file_names}." + else: + file_scope = "No submission files were provided for this test." + + algo_label = algorithm_name or "Unknown algorithm" + + return ( + f"You are verifying a {self.algorithm_family} algorithm implementation.\n" + f"Requested algorithm: {algo_label}.\n" + f"{file_scope}\n\n" + "Analyze the provided code and determine whether it is a correct and faithful " + "implementation of the requested algorithm.\n" + "Be strict: only accept if the algorithm is clearly implemented as specified.\n\n" + "Criteria:\n" + "1. The implementation must follow the specific logic and complexity " + "characteristics of the requested algorithm.\n" + "2. It must NOT be a wrapper around a built-in library function or standard " + "library implementation.\n" + "3. If it implements a different algorithm, it is incorrect.\n\n" + "Scoring rules:\n" + "- Score 100 only if the implementation is correct and faithful.\n" + "- Otherwise score 0.\n\n" + "In your feedback, briefly justify the decision and cite relevant code " + "evidence. If the required algorithm is missing or there is no relevant " + "code, score 0.\n" + f"Use subject '{algo_label}'." + ) + + +class AiSortingAlgorithmTest(AiAlgorithmTestBase): + algorithm_family = "sorting" + test_name = "ai_sorting_algorithm" + + +class AiSearchAlgorithmTest(AiAlgorithmTestBase): + algorithm_family = "search" + test_name = "ai_search_algorithm" + + +class AiGraphAlgorithmTest(AiAlgorithmTestBase): + algorithm_family = "graph" + test_name = "ai_graph_algorithm" + + class StaticAnalysisTemplate(Template): """ A template for static analysis of code submissions. @@ -341,6 +425,9 @@ def __init__(self): self.tests = { "forbidden_import": ForbiddenImportTest(), "forbidden_keyword": ForbiddenKeywordTest(), + "ai_sorting_algorithm": AiSortingAlgorithmTest(), + "ai_search_algorithm": AiSearchAlgorithmTest(), + "ai_graph_algorithm": AiGraphAlgorithmTest(), } @property diff --git a/autograder/translations/en.json b/autograder/translations/en.json index 212c3315..713710b0 100644 --- a/autograder/translations/en.json +++ b/autograder/translations/en.json @@ -506,6 +506,24 @@ "success": "No forbidden language constructs found." } }, + "ai_sorting_algorithm": { + "description": "Uses AI to verify that the submission implements the requested sorting algorithm rather than a library shortcut.", + "params": { + "algorithm_name": "Name of the sorting algorithm to verify (e.g., 'Quick Sort')." + } + }, + "ai_search_algorithm": { + "description": "Uses AI to verify that the submission implements the requested search algorithm rather than a library shortcut.", + "params": { + "algorithm_name": "Name of the search algorithm to verify (e.g., 'Binary Search')." + } + }, + "ai_graph_algorithm": { + "description": "Uses AI to verify that the submission implements the requested graph algorithm rather than a library shortcut.", + "params": { + "algorithm_name": "Name of the graph algorithm to verify (e.g., 'Dijkstra')." + } + }, "template": { "name": "Static Analysis", "description": "A template for evaluating assignments through static and structural code analysis." diff --git a/autograder/translations/pt_br.json b/autograder/translations/pt_br.json index df57c025..1784d4f2 100644 --- a/autograder/translations/pt_br.json +++ b/autograder/translations/pt_br.json @@ -506,6 +506,24 @@ "success": "Nenhuma construção de linguagem proibida encontrada." } }, + "ai_sorting_algorithm": { + "description": "Usa IA para verificar se a submissão implementa o algoritmo de ordenação solicitado, sem atalhos de biblioteca.", + "params": { + "algorithm_name": "Nome do algoritmo de ordenação a verificar (ex: 'Quick Sort')." + } + }, + "ai_search_algorithm": { + "description": "Usa IA para verificar se a submissão implementa o algoritmo de busca solicitado, sem atalhos de biblioteca.", + "params": { + "algorithm_name": "Nome do algoritmo de busca a verificar (ex: 'Binary Search')." + } + }, + "ai_graph_algorithm": { + "description": "Usa IA para verificar se a submissão implementa o algoritmo de grafos solicitado, sem atalhos de biblioteca.", + "params": { + "algorithm_name": "Nome do algoritmo de grafos a verificar (ex: 'Dijkstra')." + } + }, "template": { "name": "Análise Estática", "description": "Um modelo para avaliar tarefas através de análise estática e estrutural de código." diff --git a/tests/unit/test_ai_algorithm_tests.py b/tests/unit/test_ai_algorithm_tests.py new file mode 100644 index 00000000..9dd4ea60 --- /dev/null +++ b/tests/unit/test_ai_algorithm_tests.py @@ -0,0 +1,71 @@ +"""Tests for AI-based algorithm verification tests.""" + +import pytest +from pydantic import ValidationError + +from autograder.models.dataclass.submission import SubmissionFile +from autograder.template_library.static_analysis import ( + AiAlgorithmConfig, + AiGraphAlgorithmTest, + AiSearchAlgorithmTest, + AiSortingAlgorithmTest, + StaticAnalysisTemplate, +) + + +@pytest.mark.parametrize( + "test_name,test_cls", + [ + ("ai_sorting_algorithm", AiSortingAlgorithmTest), + ("ai_search_algorithm", AiSearchAlgorithmTest), + ("ai_graph_algorithm", AiGraphAlgorithmTest), + ], +) +def test_ai_algorithm_tests_registered_in_template(test_name, test_cls): + template = StaticAnalysisTemplate() + test = template.get_test(test_name) + assert isinstance(test, test_cls) + assert test.name == test_name + + +@pytest.mark.parametrize( + "test_cls,expected_name", + [ + (AiSortingAlgorithmTest, "ai_sorting_algorithm"), + (AiSearchAlgorithmTest, "ai_search_algorithm"), + (AiGraphAlgorithmTest, "ai_graph_algorithm"), + ], +) +def test_ai_algorithm_metadata(test_cls, expected_name): + test_fn = test_cls() + assert test_fn.name == expected_name + assert len(test_fn.description) > 0 + params = test_fn.parameter_description + assert len(params) == 1 + assert params[0].name == "algorithm_name" + + +def test_ai_algorithm_config_requires_algorithm_name(): + with pytest.raises(ValidationError): + AiAlgorithmConfig() + + +def test_ai_algorithm_config_accepts_algorithm_name(): + config = AiAlgorithmConfig(algorithm_name="Quick Sort") + assert config.algorithm_name == "Quick Sort" + + +def test_build_prompt_includes_algorithm_and_files(): + test_fn = AiSortingAlgorithmTest() + files = [SubmissionFile("sort.py", "def quicksort(arr): return arr")] + prompt = test_fn.build_prompt(files, algorithm_name="Quick Sort") + assert "Quick Sort" in prompt + assert "sort.py" in prompt + assert "Score 100" in prompt + + +def test_build_prompt_handles_missing_files(): + test_fn = AiSearchAlgorithmTest() + prompt = test_fn.build_prompt(None, algorithm_name="Binary Search") + assert "Binary Search" in prompt + assert "No submission files were provided" in prompt From d60a1371a4dd348f731d3f41bda0fc50ee582b2a Mon Sep 17 00:00:00 2001 From: arthurcrodrigues-hotmart Date: Tue, 26 May 2026 08:17:22 -0300 Subject: [PATCH 2/6] ci: trigger checks From 4033534913730f5108059e4b94a0ad568167060d Mon Sep 17 00:00:00 2001 From: Arthur Carvalho Date: Tue, 26 May 2026 17:18:03 -0300 Subject: [PATCH 3/6] trigger --- autograder/template_library/static_analysis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autograder/template_library/static_analysis.py b/autograder/template_library/static_analysis.py index 3805005e..879dc1da 100644 --- a/autograder/template_library/static_analysis.py +++ b/autograder/template_library/static_analysis.py @@ -1,4 +1,4 @@ -import logging +import logging from typing import List, Optional, Dict, Any, Type from pydantic import BaseModel, Field From abfc917449f00e1f9a207074b12cfdfe4a6b6e06 Mon Sep 17 00:00:00 2001 From: ArthurCRodrigues Date: Tue, 26 May 2026 22:43:42 -0300 Subject: [PATCH 4/6] docs: document static analysis tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/API.md | 7 ++++++- docs/core/template-library.md | 16 ++++++++++++++++ docs/index.md | 1 + docs/pipeline/01-load-template.md | 3 ++- 4 files changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/API.md b/docs/API.md index f9905c47..328f6e50 100644 --- a/docs/API.md +++ b/docs/API.md @@ -141,7 +141,7 @@ Content-Type: application/json | Field | Type | Required | Description | |-------|------|----------|-------------| | `external_assignment_id` | string | ✓ | External assignment ID from your LMS/platform | -| `template_name` | string | ✓ | Template to use (`input_output`, `web_dev`, `api_testing`) | +| `template_name` | string | ✓ | Template to use (`input_output`, `web_dev`, `api_testing`, `static_analysis`) | | `criteria_config` | object | ✓ | Grading criteria tree configuration | | `languages` | list[string] | ✓ | Supported languages: `python`, `java`, `node`, `cpp` | | `setup_config` | object | ✗ | Setup configuration for preflight checks | @@ -791,6 +791,11 @@ GET /api/v1/templates "name": "web_dev", "description": "...", "test_functions": [ ... ] + }, + { + "name": "static_analysis", + "description": "...", + "test_functions": [ ... ] } ] } diff --git a/docs/core/template-library.md b/docs/core/template-library.md index 09177bdf..90ba9d2d 100644 --- a/docs/core/template-library.md +++ b/docs/core/template-library.md @@ -19,6 +19,7 @@ Current registry keys: - `input_output` - `api` - `webdev` +- `static_analysis` Each key resolves to a template class with: @@ -27,6 +28,20 @@ Each key resolves to a template class with: - `requires_sandbox` - `tests` map of available `TestFunction` objects +## Static Analysis Template (`static_analysis`) + +Static analysis runs on submission files without executing code (no sandbox required). It includes both rule-based checks and AI-assisted algorithm validation. + +Available tests: + +- `forbidden_import` — Parameters: `forbidden_imports` (list of strings), `submission_language` (string or Language enum). +- `forbidden_keyword` — Parameters: `forbidden_keywords` (list of strings), `custom_ast_grep_rules` (list of rule dicts). +- `ai_sorting_algorithm` — Parameters: `algorithm_name` (string). +- `ai_search_algorithm` — Parameters: `algorithm_name` (string). +- `ai_graph_algorithm` — Parameters: `algorithm_name` (string). + +AI algorithm tests require a valid `OPENAI_API_KEY` to be configured in the API environment. + ## How it integrates with the pipeline 1. **Load Template** resolves the chosen template. @@ -59,3 +74,4 @@ To add a new template: - [Input/Output Template](../template-library/input_output.md) - [API Testing Template](../template-library/api_testing.md) - [Web Development Template](../template-library/web_dev.md) +- [Static Analysis Template](#static-analysis-template) diff --git a/docs/index.md b/docs/index.md index 872382ff..e7e6b339 100644 --- a/docs/index.md +++ b/docs/index.md @@ -31,6 +31,7 @@ This documentation is organized to be both practical for first-time users and de - [Input/Output Template](template-library/input_output.md) - [API Testing Template](template-library/api_testing.md) - [Web Development Template](template-library/web_dev.md) +- [Static Analysis Template](core/template-library.md#static-analysis-template) ### GitHub Classroom / CI integrator diff --git a/docs/pipeline/01-load-template.md b/docs/pipeline/01-load-template.md index fb3b7bf4..a49c1f0a 100644 --- a/docs/pipeline/01-load-template.md +++ b/docs/pipeline/01-load-template.md @@ -8,7 +8,7 @@ The Load Template step is the entry point of the pipeline. It loads the grading The step uses the `TemplateLibraryService` singleton to load a template by name. There are two paths: -1. **Built-in template** — Loaded from the template registry by identifier (e.g., `"input_output"`, `"web_dev"`, `"api_testing"`). +1. **Built-in template** — Loaded from the template registry by identifier (e.g., `"input_output"`, `"web_dev"`, `"api_testing"`, `"static_analysis"`). 2. **Custom template** — User-provided template object. This path is planned but not yet implemented; it will require sandboxed loading for security. The loaded `Template` object is stored in the step result's `data` field, making it available to all subsequent steps. @@ -47,6 +47,7 @@ Available built-in templates: | `input_output` | Input/Output Testing | Yes | Command-line programs with stdin/stdout | | `web_dev` | Web Development | No | HTML/CSS/JS file validation | | `api_testing` | API Testing | Yes | HTTP endpoint validation | +| `static_analysis` | Static Analysis | No | Code-quality checks and AI algorithm validation | ## Failure Scenarios From f3df862a8a6020a3c19cdcb48a84d5b7aa54a0ed Mon Sep 17 00:00:00 2001 From: ArthurCRodrigues Date: Tue, 26 May 2026 22:55:47 -0300 Subject: [PATCH 5/6] test: fix broken mocks and update to new GradingRequest API --- tests/web/test_core.py | 11 ++++--- tests/web/test_grading_service.py | 52 ++++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/tests/web/test_core.py b/tests/web/test_core.py index 84aeb790..665a15af 100644 --- a/tests/web/test_core.py +++ b/tests/web/test_core.py @@ -89,17 +89,18 @@ async def test_lifespan_with_pending_tasks(): mock_task2 = Mock() mock_task2.done = Mock(return_value=True) - # Initially use real set, then replace with tasks during shutdown - import web.core.lifespan as lifespan_module - mock_sandbox_mgr = Mock() mock_sandbox_mgr.shutdown = Mock() mock_get_sandbox.return_value = mock_sandbox_mgr async with lifespan(mock_app): # Add tasks to the grading_tasks set - lifespan_module.grading_tasks.add(mock_task1) - lifespan_module.grading_tasks.add(mock_task2) + mock_tasks.add(mock_task1) + mock_tasks.add(mock_task2) + + # Also make the mock iterable so the loop in lifespan works + mock_tasks.__iter__.return_value = [mock_task1, mock_task2] + mock_tasks.__bool__.return_value = True # Verify only non-done tasks were cancelled mock_task1.cancel.assert_called_once() diff --git a/tests/web/test_grading_service.py b/tests/web/test_grading_service.py index 01bcff5c..b9eff52f 100644 --- a/tests/web/test_grading_service.py +++ b/tests/web/test_grading_service.py @@ -1,6 +1,7 @@ """Unit tests for the grading service.""" import pytest +import time from unittest.mock import Mock, AsyncMock, patch from web.service.grading_service import grade_submission, _node_to_dict @@ -22,17 +23,20 @@ async def test_grade_submission_success(): mock_result.focus = Mock() mock_result.focus.to_dict = Mock(return_value={"areas": ["testing"]}) + from autograder.models.pipeline_execution import PipelineStatus as AutograderPipelineStatus mock_execution = Mock() mock_execution.result = mock_result + mock_execution.start_time = time.time() + mock_execution.status = AutograderPipelineStatus.SUCCESS + mock_execution.step_results = [] mock_execution.get_pipeline_execution_summary = Mock(return_value={ "status": "success", "steps": ["pre_flight", "build_tree", "grade"] }) - mock_pipeline.run = Mock(return_value=mock_execution) - with patch("web.services.grading_service.build_pipeline", return_value=mock_pipeline), \ - patch("web.services.grading_service.get_session") as mock_session, \ + with patch("web.service.grading_service.build_pipeline", return_value=mock_pipeline), \ + patch("web.service.grading_service.get_session") as mock_session, \ patch("asyncio.to_thread", return_value=mock_execution): # Mock session and repositories @@ -46,20 +50,24 @@ async def test_grade_submission_success(): mock_result_repo.create = AsyncMock() mock_session_instance.commit = AsyncMock() - with patch("web.services.grading_service.SubmissionRepository", return_value=mock_submission_repo), \ - patch("web.services.grading_service.ResultRepository", return_value=mock_result_repo): + with patch("web.service.grading_service.SubmissionRepository", return_value=mock_submission_repo), \ + patch("web.service.grading_service.ResultRepository", return_value=mock_result_repo): - await grade_submission( + from web.service.grading_service import GradingRequest + request = GradingRequest( submission_id=1, grading_config_id=1, template_name="input_output", criteria_config={"tests": []}, setup_config={}, + feedback_config={}, + include_feedback=True, language="python", username="student1", external_user_id="user_001", submission_files={"main.py": {"filename": "main.py", "content": "print('hello')"}} ) + await grade_submission(request) # Verify status updates assert mock_submission_repo.update_status.call_count == 1 @@ -77,10 +85,22 @@ async def test_grade_submission_success(): async def test_grade_submission_pipeline_failure(): """Test grading when pipeline fails.""" mock_pipeline = Mock() + from autograder.models.pipeline_execution import PipelineStatus as AutograderPipelineStatus + from autograder.models.dataclass.step_result import StepStatus, StepName + mock_execution = Mock() mock_execution.result = None # Pipeline failed - mock_execution.step_results = [Mock()] - mock_execution.get_previous_step = Mock(return_value=Mock(error="Syntax error")) + mock_execution.start_time = time.time() + mock_execution.status = AutograderPipelineStatus.FAILED + + mock_failed_step = Mock() + mock_failed_step.status = StepStatus.FAIL + mock_failed_step.step = StepName.PRE_FLIGHT + mock_failed_step.error = "Syntax error" + mock_failed_step.error_data = [] + + mock_execution.step_results = [mock_failed_step] + mock_execution.get_previous_step = Mock(return_value=mock_failed_step) mock_execution.get_pipeline_execution_summary = Mock(return_value={ "status": "failed", "failed_at_step": "PreFlightStep" @@ -88,9 +108,9 @@ async def test_grade_submission_pipeline_failure(): mock_pipeline.run = Mock(return_value=mock_execution) - with patch("web.services.grading_service.build_pipeline", return_value=mock_pipeline), \ - patch("web.services.grading_service.get_session") as mock_session, \ - patch("web.services.grading_service.generate_preflight_feedback", return_value="Fix syntax errors"), \ + with patch("web.service.grading_service.build_pipeline", return_value=mock_pipeline), \ + patch("web.service.grading_service.get_session") as mock_session, \ + patch("web.service.grading_service.generate_preflight_feedback", return_value="Fix syntax errors"), \ patch("asyncio.to_thread", return_value=mock_execution): mock_session_instance = AsyncMock() @@ -102,20 +122,24 @@ async def test_grade_submission_pipeline_failure(): mock_result_repo.create = AsyncMock() mock_session_instance.commit = AsyncMock() - with patch("web.services.grading_service.SubmissionRepository", return_value=mock_submission_repo), \ - patch("web.services.grading_service.ResultRepository", return_value=mock_result_repo): + with patch("web.service.grading_service.SubmissionRepository", return_value=mock_submission_repo), \ + patch("web.service.grading_service.ResultRepository", return_value=mock_result_repo): - await grade_submission( + from web.service.grading_service import GradingRequest + request = GradingRequest( submission_id=2, grading_config_id=1, template_name="input_output", criteria_config={"tests": []}, setup_config={}, + feedback_config={}, + include_feedback=True, language="python", username="student2", external_user_id="user_002", submission_files={"main.py": {"filename": "main.py", "content": "invalid code"}} ) + await grade_submission(request) # Verify failure was recorded create_call = mock_result_repo.create.call_args[1] From aec34926b16d8167d7d0b222a296d13c57849af9 Mon Sep 17 00:00:00 2001 From: ArthurCRodrigues Date: Tue, 26 May 2026 22:58:57 -0300 Subject: [PATCH 6/6] docs: add detailed static analysis documentation and fix cross-references --- docs/core/template-library.md | 2 +- docs/index.md | 2 +- docs/template-library/input_output.md | 35 +----- docs/template-library/static_analysis.md | 138 +++++++++++++++++++++++ 4 files changed, 141 insertions(+), 36 deletions(-) create mode 100644 docs/template-library/static_analysis.md diff --git a/docs/core/template-library.md b/docs/core/template-library.md index 90ba9d2d..d0d78613 100644 --- a/docs/core/template-library.md +++ b/docs/core/template-library.md @@ -74,4 +74,4 @@ To add a new template: - [Input/Output Template](../template-library/input_output.md) - [API Testing Template](../template-library/api_testing.md) - [Web Development Template](../template-library/web_dev.md) -- [Static Analysis Template](#static-analysis-template) +- [Static Analysis Template](../template-library/static_analysis.md) diff --git a/docs/index.md b/docs/index.md index e7e6b339..18b58837 100644 --- a/docs/index.md +++ b/docs/index.md @@ -31,7 +31,7 @@ This documentation is organized to be both practical for first-time users and de - [Input/Output Template](template-library/input_output.md) - [API Testing Template](template-library/api_testing.md) - [Web Development Template](template-library/web_dev.md) -- [Static Analysis Template](core/template-library.md#static-analysis-template) +- [Static Analysis Template](template-library/static_analysis.md) ### GitHub Classroom / CI integrator diff --git a/docs/template-library/input_output.md b/docs/template-library/input_output.md index 3aa922c3..337c32fe 100644 --- a/docs/template-library/input_output.md +++ b/docs/template-library/input_output.md @@ -81,40 +81,7 @@ Executes a program with a specific input and verifies it completes **without cra --- -### `forbidden_keyword` - -Tests that a submission does **not** use specific forbidden keywords or language constructs. This test performs structural analysis using `ast-grep`, which is more reliable than regex because it correctly ignores matches inside comments or string literals. - -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `forbidden_keywords` | list[string] | ✗ | List of high-level keywords to forbid (e.g., `"for_loop"`, `"while_loop"`) | -| `custom_ast_grep_rules` | list[dict] | ✗ | Custom `ast-grep` rules for advanced structural matching | - -**Supported Keywords for Predefined Rules:** - -| Language | Supported Keywords | -|----------|--------------------| -| **Python** | `for_loop`, `while_loop`, `eval_call`, `exec_call` | -| **Java** | `for_loop`, `while_loop` | -| **Node.js** | `for_loop`, `while_loop`, `eval_call` | -| **C++ / C** | `for_loop`, `while_loop`, `do_while_loop` | - -**Scoring:** 100 if no forbidden constructs are found, 0 otherwise. - -**Example:** -```json -{ - "name": "forbidden_keyword", - "parameters": { - "forbidden_keywords": ["for_loop", "eval_call"] - }, - "weight": 100 -} -``` - ---- - -## Usage Example +## Multi-Language Command Resolution ```json { diff --git a/docs/template-library/static_analysis.md b/docs/template-library/static_analysis.md new file mode 100644 index 00000000..e964e7f4 --- /dev/null +++ b/docs/template-library/static_analysis.md @@ -0,0 +1,138 @@ +# Static Analysis Template (`static_analysis`) + +The Static Analysis template provides test functions for evaluating code quality, structure, and faithfulness to specific algorithms without executing the code. It leverages regular expressions, structural analysis (AST), and AI-based verification. + +> **Template name for configs:** `static_analysis` +> **Requires sandbox:** No (currently performed server-side on file contents) +> **Supported languages:** Python, Java, Node.js, C, C++ + +--- + +## Test Functions + +### `forbidden_import` + +Statically analyzes submission files to check if any forbidden libraries were imported. This is useful for ensuring students implement logic themselves rather than using high-level libraries. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `forbidden_imports` | list[string] | ✓ | List of library/module names whose import is forbidden. | +| `submission_language` | string | ✓ | Submission language (e.g., `python`, `java`). Required to identify correct import patterns. | + +**Scoring:** 100 if no forbidden imports are found, 0 if any violation is detected. + +**Example:** +```json +{ + "name": "forbidden_import", + "parameters": { + "forbidden_imports": ["math", "numpy"], + "submission_language": "python" + }, + "weight": 50 +} +``` + +--- + +### `forbidden_keyword` + +Statically analyzes submission files using structural analysis (`ast-grep`) to detect forbidden language constructs. Unlike regex, this is context-aware and won't flag keywords inside comments or strings. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `forbidden_keywords` | list[string] | ✗ | List of predefined constructs to forbid (e.g., `for_loop`, `eval_call`). | +| `custom_ast_grep_rules` | list[dict] | ✗ | List of custom `ast-grep` rules to apply for advanced matching. | + +**Supported Predefined Keywords:** + +| Language | Supported Keywords | +|----------|--------------------| +| **Python** | `for_loop`, `while_loop`, `eval_call`, `exec_call` | +| **Java** | `for_loop`, `while_loop` | +| **Node.js** | `for_loop`, `while_loop`, `eval_call` | +| **C++ / C** | `for_loop`, `while_loop`, `do_while_loop` | + +**Scoring:** 100 if no forbidden constructs are found, 0 otherwise. + +**Example:** +```json +{ + "name": "forbidden_keyword", + "parameters": { + "forbidden_keywords": ["while_loop"] + }, + "weight": 50 +} +``` + +--- + +### `ai_sorting_algorithm` / `ai_search_algorithm` / `ai_graph_algorithm` + +Uses AI to verify that the submission correctly implements a specific algorithm (e.g., "Quick Sort", "Dijkstra") and ensures it is not just a wrapper around a built-in library function. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `algorithm_name` | string | ✓ | Specific name of the algorithm to verify. | + +**Requires:** A valid `OPENAI_API_KEY` configured in the environment. + +**Scoring:** 100 if the AI confirms a faithful implementation, 0 otherwise. + +**Example:** +```json +{ + "name": "ai_sorting_algorithm", + "parameters": { + "algorithm_name": "Quick Sort" + }, + "weight": 100 +} +``` + +--- + +## Usage Example + +```json +{ + "external_assignment_id": "sorting-logic-assignment", + "template_name": "static_analysis", + "languages": ["python"], + "criteria_config": { + "base": { + "weight": 100, + "subjects": [ + { + "subject_name": "Constraints", + "weight": 40, + "tests": [ + { + "name": "forbidden_import", + "parameters": { "forbidden_imports": ["bisect"], "submission_language": "python" }, + "weight": 50 + }, + { + "name": "forbidden_keyword", + "parameters": { "forbidden_keywords": ["for_loop"] }, + "weight": 50 + } + ] + }, + { + "subject_name": "Implementation", + "weight": 60, + "tests": [ + { + "name": "ai_sorting_algorithm", + "parameters": { "algorithm_name": "Recursive Merge Sort" }, + "weight": 100 + } + ] + } + ] + } + } +} +```