diff --git a/autograder/autograder.py b/autograder/autograder.py index f6d6d1ea..715eb6ee 100644 --- a/autograder/autograder.py +++ b/autograder/autograder.py @@ -169,6 +169,7 @@ def build_pipeline( StepName.SANDBOX, StepName.PRE_FLIGHT, StepName.AI_BATCH, + StepName.STRUCTURAL_ANALYSIS, StepName.GRADE, StepName.FOCUS, StepName.FEEDBACK, diff --git a/autograder/models/abstract/test_function.py b/autograder/models/abstract/test_function.py index fa6207ec..5d126ee4 100644 --- a/autograder/models/abstract/test_function.py +++ b/autograder/models/abstract/test_function.py @@ -1,5 +1,7 @@ from abc import ABC, abstractmethod -from typing import List, Optional +from typing import List, Optional, Type + +from pydantic import BaseModel from autograder.models.dataclass.submission import SubmissionFile from autograder.models.dataclass.test_result import TestResult @@ -12,6 +14,11 @@ class TestFunction(ABC): An abstract base class for a single, executable test function. """ + @property + def config_schema(self) -> Optional[Type[BaseModel]]: + """Optional Pydantic model to validate test parameters during tree building.""" + return None + @property @abstractmethod def name(self) -> str: diff --git a/autograder/models/dataclass/step_result.py b/autograder/models/dataclass/step_result.py index 456921aa..ab14b4d7 100644 --- a/autograder/models/dataclass/step_result.py +++ b/autograder/models/dataclass/step_result.py @@ -28,6 +28,7 @@ class StepName(Enum): PRE_FLIGHT = "PreFlightStep" SANDBOX = "SandboxStep" AI_BATCH = "AiBatchStep" + STRUCTURAL_ANALYSIS = "StructuralAnalysisStep" GRADE = "GradeStep" FOCUS = "FocusStep" FEEDBACK = "FeedbackStep" diff --git a/autograder/models/dataclass/structural_analysis_result.py b/autograder/models/dataclass/structural_analysis_result.py new file mode 100644 index 00000000..c4143b1a --- /dev/null +++ b/autograder/models/dataclass/structural_analysis_result.py @@ -0,0 +1,16 @@ +from dataclasses import dataclass +from typing import Dict, Optional, TYPE_CHECKING + +if TYPE_CHECKING: + from ast_grep_py import SgRoot + +@dataclass +class StructuralAnalysisResult: + """ + Holds the results of structural analysis for a submission. + + Attributes: + roots: A dictionary mapping filenames to their corresponding ast-grep root nodes. + If a file could not be parsed, the value is None. + """ + roots: Dict[str, Optional['SgRoot']] diff --git a/autograder/models/pipeline_execution.py b/autograder/models/pipeline_execution.py index 355a98ba..0941eeae 100644 --- a/autograder/models/pipeline_execution.py +++ b/autograder/models/pipeline_execution.py @@ -12,6 +12,7 @@ from autograder.models.criteria_tree import CriteriaTree from autograder.models.dataclass.focus import Focus from autograder.models.dataclass.grade_step_result import GradeStepResult + from autograder.models.dataclass.structural_analysis_result import StructuralAnalysisResult from autograder.models.result_tree import ResultTree from sandbox_manager.sandbox_container import SandboxContainer @@ -116,6 +117,18 @@ def get_built_criteria_tree(self) -> "CriteriaTree": self._require_step_data(StepName.BUILD_TREE, "criteria tree"), ) + def get_structural_analysis_result(self) -> Optional["StructuralAnalysisResult"]: + """ + Retrieves the StructuralAnalysisResult object if it was produced during the pipeline. + """ + if not self.has_step_result(StepName.STRUCTURAL_ANALYSIS): + return None + from autograder.models.dataclass.structural_analysis_result import StructuralAnalysisResult + return cast( + StructuralAnalysisResult, + self._require_step_data(StepName.STRUCTURAL_ANALYSIS, "structural analysis result"), + ) + def get_sandbox(self) -> Optional["SandboxContainer"]: """ Retrieves the SandboxContainer object if it was created during the pipeline. diff --git a/autograder/services/criteria_tree_service.py b/autograder/services/criteria_tree_service.py index 945d82e7..c3d5e53c 100644 --- a/autograder/services/criteria_tree_service.py +++ b/autograder/services/criteria_tree_service.py @@ -1,6 +1,8 @@ import logging from typing import List, Optional +from pydantic import ValidationError + from autograder.models.abstract.template import Template from autograder.models.abstract.test_function import TestFunction from autograder.models.config.category import CategoryConfig @@ -99,17 +101,26 @@ def __parse_test(self, config: TestConfig) -> TestNode: raise ValueError(f"Couldn't find test function '{function_name}'") file_target = [config.file] if config.file else None + test_params = config.get_kwargs_dict() or {} + + # Perform early validation if the test function provides a schema. + if test_function.config_schema: + try: + # We use model_validate to leverage Pydantic's validation logic. + test_function.config_schema(**test_params) + except ValidationError as e: + raise ValueError( + f"Invalid parameters for test '{config.name}' ({function_name}): {e}" + ) from e test = TestNode( config.name, test_function, - config.get_kwargs_dict() or {}, + test_params, file_target, config.weight if config.weight is not None else 100.0, ) - - return test def __parse_category(self, category_name, config: CategoryConfig) -> CategoryNode: diff --git a/autograder/services/grader_service.py b/autograder/services/grader_service.py index fad2b63e..78de48a9 100644 --- a/autograder/services/grader_service.py +++ b/autograder/services/grader_service.py @@ -34,6 +34,7 @@ def grade_from_tree( submission_language=None, locale: str = "en", pre_computed_results: Optional[Dict[str, TestResult]] = None, + structural_analysis=None, ) -> ResultTree: """Traverse the generic built criteria tree to resolve inputs, grades and report to ResultTree.""" base_result = self.process_category( @@ -43,6 +44,7 @@ def grade_from_tree( submission_language=submission_language, locale=locale, pre_computed_results=pre_computed_results, + structural_analysis=structural_analysis, ) root = RootResultNode(name="root", base=base_result) @@ -54,6 +56,7 @@ def grade_from_tree( submission_language=submission_language, locale=locale, pre_computed_results=pre_computed_results, + structural_analysis=structural_analysis, ) root.bonus = bonus_result @@ -65,6 +68,7 @@ def grade_from_tree( submission_language=submission_language, locale=locale, pre_computed_results=pre_computed_results, + structural_analysis=structural_analysis, ) root.penalty = penalty_result @@ -104,6 +108,7 @@ def __process_holder( submission_language=None, locale: str = "en", pre_computed_results: Optional[Dict[str, TestResult]] = None, + structural_analysis=None, ) -> CategoryResultNode | SubjectResultNode: """Process a category or subject node and create corresponding result node.""" @@ -128,6 +133,7 @@ def __process_holder( submission_language=submission_language, locale=locale, pre_computed_results=pre_computed_results, + structural_analysis=structural_analysis, ) for inner_subject in holder.subjects ] @@ -144,6 +150,7 @@ def __process_holder( submission_language=submission_language, locale=locale, pre_computed_results=pre_computed_results, + structural_analysis=structural_analysis, ) for test in holder.tests ] @@ -174,6 +181,7 @@ def process_subject( submission_language=None, locale: str = "en", pre_computed_results: Optional[Dict[str, TestResult]] = None, + structural_analysis=None, ) -> SubjectResultNode: """Process a subject node from criteria tree and create result node.""" return self.__process_holder( @@ -183,6 +191,7 @@ def process_subject( submission_language=submission_language, locale=locale, pre_computed_results=pre_computed_results, + structural_analysis=structural_analysis, ) def process_test( @@ -193,6 +202,7 @@ def process_test( submission_language=None, locale: str = "en", pre_computed_results: Optional[Dict[str, TestResult]] = None, + structural_analysis=None, ) -> TestResultNode: """Execute a test and create a test result node. @@ -230,6 +240,8 @@ def process_test( sandbox=sandbox, locale=locale, pre_computed_results=pre_computed_results, + structural_analysis=structural_analysis, + submission_language=submission_language, **test_params, ) return TestResultNode( @@ -274,6 +286,7 @@ def process_category( submission_language=None, locale: str = "en", pre_computed_results: Optional[Dict[str, TestResult]] = None, + structural_analysis=None, ) -> CategoryResultNode: """Process a category node from criteria tree and create result node.""" return self.__process_holder( @@ -283,4 +296,5 @@ def process_category( submission_language=submission_language, locale=locale, pre_computed_results=pre_computed_results, + structural_analysis=structural_analysis, ) diff --git a/autograder/steps/grade_step.py b/autograder/steps/grade_step.py index ca875d2f..683b14a7 100644 --- a/autograder/steps/grade_step.py +++ b/autograder/steps/grade_step.py @@ -56,6 +56,8 @@ def _execute(self, pipeline_exec: PipelineExecution) -> PipelineExecution: if pipeline_exec.has_step_result(StepName.AI_BATCH): pre_computed_results = pipeline_exec.get_step_result(StepName.AI_BATCH).data + structural_analysis = pipeline_exec.get_structural_analysis_result() + result_tree = self._grader_service.grade_from_tree( criteria_tree=criteria_tree, submission_files=pipeline_exec.submission.submission_files, @@ -63,6 +65,7 @@ def _execute(self, pipeline_exec: PipelineExecution) -> PipelineExecution: submission_language=pipeline_exec.submission.language, locale=pipeline_exec.locale, pre_computed_results=pre_computed_results, + structural_analysis=structural_analysis, ) # Create grading result diff --git a/autograder/steps/step_registry.py b/autograder/steps/step_registry.py index 3425b105..a228811b 100644 --- a/autograder/steps/step_registry.py +++ b/autograder/steps/step_registry.py @@ -10,6 +10,7 @@ from autograder.steps.pre_flight_step import PreFlightStep from autograder.steps.sandbox_step import SandboxStep from autograder.steps.ai_batch_step import AiBatchStep +from autograder.steps.structural_analysis_step import StructuralAnalysisStep from autograder.steps.grade_step import GradeStep from autograder.steps.focus_step import FocusStep from autograder.steps.feedback_step import FeedbackStep @@ -37,6 +38,7 @@ def __init__(self, config: Dict[str, Any]): StepName.PRE_FLIGHT: self._build_pre_flight, StepName.SANDBOX: self._build_sandbox, StepName.AI_BATCH: self._build_ai_batch, + StepName.STRUCTURAL_ANALYSIS: self._build_structural_analysis, StepName.GRADE: self._build_grade, StepName.FOCUS: self._build_focus, StepName.FEEDBACK: self._build_feedback, @@ -65,6 +67,9 @@ def _build_sandbox(self) -> Optional[Step]: def _build_ai_batch(self) -> Optional[Step]: return AiBatchStep() + def _build_structural_analysis(self) -> Optional[Step]: + return StructuralAnalysisStep() + def _build_grade(self) -> Optional[Step]: return GradeStep() diff --git a/autograder/steps/structural_analysis_step.py b/autograder/steps/structural_analysis_step.py new file mode 100644 index 00000000..aa498457 --- /dev/null +++ b/autograder/steps/structural_analysis_step.py @@ -0,0 +1,73 @@ +import logging +from typing import Dict, Optional + +from autograder.models.abstract.step import Step +from autograder.models.pipeline_execution import PipelineExecution +from autograder.models.dataclass.step_result import StepResult, StepName +from autograder.models.dataclass.structural_analysis_result import StructuralAnalysisResult +from sandbox_manager.models.sandbox_models import Language + +logger = logging.getLogger(__name__) + +try: + from ast_grep_py import SgRoot +except ImportError: + SgRoot = None + +class StructuralAnalysisStep(Step): + """ + Parses submission files into ast-grep SgRoot objects. + This enables structural pattern matching in subsequent grading steps. + """ + + @property + def step_name(self) -> StepName: + return StepName.STRUCTURAL_ANALYSIS + + def _execute(self, pipeline_exec: PipelineExecution) -> PipelineExecution: + submission = pipeline_exec.submission + language = submission.language + + if not language: + logger.warning("No language specified for submission; skipping structural analysis.") + return pipeline_exec.add_step_result(StepResult.success(self.step_name, StructuralAnalysisResult(roots={}))) + + if SgRoot is None: + logger.error("ast-grep-py is not installed; structural analysis will be skipped.") + return pipeline_exec.add_step_result(StepResult.fail(self.step_name, "ast-grep-py not installed")) + + ast_grep_lang = self._map_language(language) + if not ast_grep_lang: + logger.warning(f"Language {language.value} is not supported by ast-grep; skipping.") + return pipeline_exec.add_step_result(StepResult.success(self.step_name, StructuralAnalysisResult(roots={}))) + + roots: Dict[str, Optional[SgRoot]] = {} + for filename, sub_file in submission.submission_files.items(): + # Only parse files that likely contain code + if not self._is_code_file(filename): + continue + + try: + roots[filename] = SgRoot(sub_file.content, ast_grep_lang) + except Exception as e: + logger.warning(f"Failed to parse {filename} with ast-grep: {e}") + roots[filename] = None + + result = StructuralAnalysisResult(roots=roots) + return pipeline_exec.add_step_result(StepResult.success(self.step_name, result)) + + def _map_language(self, language: Language) -> Optional[str]: + mapping = { + Language.PYTHON: "python", + Language.JAVA: "java", + Language.NODE: "javascript", + Language.CPP: "cpp", + Language.C: "c", + } + return mapping.get(language) + + def _is_code_file(self, filename: str) -> bool: + """Heuristic to avoid parsing non-code files.""" + # Common binary/config/doc extensions to ignore + ignored_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.pdf', '.zip', '.tar', '.gz', '.json', '.yaml', '.yml', '.md', '.txt'} + return not any(filename.lower().endswith(ext) for ext in ignored_extensions) diff --git a/autograder/template_library/input_output.py b/autograder/template_library/input_output.py index 822760f7..e5c98e27 100644 --- a/autograder/template_library/input_output.py +++ b/autograder/template_library/input_output.py @@ -1,12 +1,15 @@ import logging import re -from typing import List, Optional +from typing import List, Optional, Dict, Any, Type + +from pydantic import BaseModel, Field from autograder.models.abstract.template import Template from autograder.models.abstract.test_function import TestFunction from autograder.models.dataclass.param_description import ParamDescription from autograder.models.dataclass.submission import SubmissionFile from autograder.models.dataclass.test_result import TestResult +from autograder.models.dataclass.structural_analysis_result import StructuralAnalysisResult from autograder.translations import t from sandbox_manager.sandbox_container import SandboxContainer from sandbox_manager.models.sandbox_models import Language, ResponseCategory @@ -528,6 +531,170 @@ def execute(self, files: Optional[List[SubmissionFile]], sandbox: Optional[Sandb ) +# =============================================================== +# TestFunction for Forbidden Keyword/Construct Detection +# =============================================================== + +class ForbiddenKeywordConfig(BaseModel): + """Configuration schema for ForbiddenKeywordTest.""" + forbidden_keywords: List[str] = Field(default_factory=list) + custom_ast_grep_rules: List[Dict[str, Any]] = Field(default_factory=list) + +class ForbiddenKeywordTest(TestFunction): + """ + Tests that a submission does NOT use any of the specified forbidden + keywords or language constructs. + + Uses ast-grep for structural analysis, which accurately detects + constructs even when they are not at the start of a line and + correctly ignores comments and string literals. + """ + + # Mapping of high-level keywords to language-specific ast-grep rules. + PREDEFINED_RULES: Dict[Language, Dict[str, Dict[str, Any]]] = { + Language.PYTHON: { + "for_loop": {"kind": "for_statement"}, + "while_loop": {"kind": "while_statement"}, + "eval_call": {"pattern": "eval($$$)"}, + "exec_call": {"pattern": "exec($$$)"}, + }, + Language.JAVA: { + "for_loop": {"kind": "for_statement"}, + "while_loop": {"kind": "while_statement"}, + }, + Language.NODE: { + "for_loop": {"kind": "for_statement"}, + "while_loop": {"kind": "while_statement"}, + "eval_call": {"pattern": "eval($$$)"}, + }, + Language.CPP: { + "for_loop": {"kind": "for_statement"}, + "while_loop": {"kind": "while_statement"}, + "do_while_loop": {"kind": "do_statement"}, + }, + Language.C: { + "for_loop": {"kind": "for_statement"}, + "while_loop": {"kind": "while_statement"}, + "do_while_loop": {"kind": "do_statement"}, + }, + } + + @property + def name(self): + return "forbidden_keyword" + + @property + def description(self): + return t("io.forbidden_keyword.description") + + @property + def parameter_description(self): + return [ + ParamDescription("forbidden_keywords", t("io.forbidden_keyword.params.keywords"), "list of strings"), + ParamDescription("custom_ast_grep_rules", t("io.forbidden_keyword.params.custom_rules"), "list of dicts"), + ] + + @property + def config_schema(self) -> Type[BaseModel]: + """Optional Pydantic model to validate test parameters during tree building.""" + return ForbiddenKeywordConfig + + def execute(self, files: Optional[List[SubmissionFile]], sandbox: Optional[SandboxContainer], + *args, forbidden_keywords: List[str] = None, + custom_ast_grep_rules: List[Dict[str, Any]] = None, + structural_analysis: Optional[StructuralAnalysisResult] = None, + submission_language: Optional[Language] = None, + **kwargs) -> TestResult: + """ + Scan target files for forbidden keywords or structural patterns. + """ + locale = kwargs.get("locale") + forbidden_keywords = forbidden_keywords or [] + custom_ast_grep_rules = custom_ast_grep_rules or [] + + if not forbidden_keywords and not custom_ast_grep_rules: + return TestResult( + test_name=self.name, + score=100.0, + report=t("io.forbidden_keyword.report.no_rules", locale=locale) + ) + + if structural_analysis is None: + # Fallback if structural analysis was not executed or failed + return TestResult( + test_name=self.name, + score=0.0, + report=t("io.forbidden_keyword.report.no_analysis", locale=locale) + ) + + if submission_language is None: + return TestResult( + test_name=self.name, + score=0.0, + report=t("io.forbidden_keyword.report.no_lang", locale=locale) + ) + + # Consolidate rules to run + active_rules: List[Dict[str, Any]] = list(custom_ast_grep_rules) + lang_predefined = self.PREDEFINED_RULES.get(submission_language, {}) + + for kw in forbidden_keywords: + if kw in lang_predefined: + active_rules.append(lang_predefined[kw]) + else: + # If a keyword is not predefined for this language, we skip it + # but might want to log it or report it as an internal warning. + pass + + if not active_rules: + return TestResult( + test_name=self.name, + score=100.0, + report=t("io.forbidden_keyword.report.success", locale=locale) + ) + + all_violations: List[str] = [] + + # Only check files passed to this test + if not files: + return TestResult( + test_name=self.name, + score=100.0, + report=t("io.forbidden_keyword.report.no_files", locale=locale) + ) + + for sub_file in files: + root = structural_analysis.roots.get(sub_file.filename) + if root is None: + continue + + for rule in active_rules: + matches = root.root().find_all(**rule) + if matches: + # Provide some feedback about what was found + rule_desc = rule.get("kind") or rule.get("pattern") or str(rule) + all_violations.append( + t("io.forbidden_keyword.report.violation", + locale=locale, + rule=rule_desc, + file=sub_file.filename) + ) + + if all_violations: + details = "\n".join(all_violations) + return TestResult( + test_name=self.name, + score=0.0, + report=t("io.forbidden_keyword.report.failure", locale=locale, details=details) + ) + + return TestResult( + test_name=self.name, + score=100.0, + report=t("io.forbidden_keyword.report.success", locale=locale) + ) + + class InputOutputTemplate(Template): """ A template for command-line I/O assignments. It uses the SandboxExecutor @@ -542,6 +709,7 @@ def __init__(self): "dont_fail": DontFailTest(), "expect_file_artifact": ExpectFileArtifactTest(), "forbidden_import": ForbiddenImportTest(), + "forbidden_keyword": ForbiddenKeywordTest(), } @property diff --git a/autograder/translations/en.json b/autograder/translations/en.json index 5655aae0..830256cc 100644 --- a/autograder/translations/en.json +++ b/autograder/translations/en.json @@ -487,6 +487,22 @@ "invalid_match_mode": "CONFIGURATION ERROR: Invalid match_mode '{mode}'. Must be 'exact', 'contains', or 'regex'.", "invalid_regex": "CONFIGURATION ERROR: Invalid regex pattern.\nDetails: {error}" } + }, + "forbidden_keyword": { + "description": "Statically analyzes submission files using structural analysis to detect forbidden language constructs (like loops or eval calls).", + "params": { + "keywords": "List of predefined constructs to forbid (e.g., 'for_loop', 'while_loop', 'eval_call').", + "custom_rules": "List of custom ast-grep rules to apply." + }, + "report": { + "no_rules": "No forbidden constructs or rules specified — nothing to check.", + "no_analysis": "FAILURE: Structural analysis data is missing. Cannot check for forbidden constructs.", + "no_lang": "FAILURE: Submission language is not defined. Cannot check for forbidden constructs.", + "no_files": "No submission files to analyze.", + "violation": " - Forbidden construct '{rule}' found in '{file}'", + "failure": "FAILURE: Forbidden language constructs detected:\n{details}", + "success": "No forbidden language constructs found." + } } }, "api_testing": { diff --git a/autograder/translations/pt_br.json b/autograder/translations/pt_br.json index 0b311101..7a95df3e 100644 --- a/autograder/translations/pt_br.json +++ b/autograder/translations/pt_br.json @@ -487,6 +487,22 @@ "invalid_match_mode": "ERRO DE CONFIGURAÇÃO: match_mode '{mode}' inválido. Deve ser 'exact', 'contains' ou 'regex'.", "invalid_regex": "ERRO DE CONFIGURAÇÃO: Padrão regex inválido.\nDetalhes: {error}" } + }, + "forbidden_keyword": { + "description": "Analisa estaticamente os arquivos de submissão usando análise estrutural para detectar construções de linguagem proibidas (como loops ou chamadas eval).", + "params": { + "keywords": "Lista de construções predefinidas a proibir (ex: 'for_loop', 'while_loop', 'eval_call').", + "custom_rules": "Lista de regras ast-grep personalizadas para aplicar." + }, + "report": { + "no_rules": "Nenhuma construção ou regra proibida especificada — nada para verificar.", + "no_analysis": "FALHA: Dados de análise estrutural ausentes. Não é possível verificar construções proibidas.", + "no_lang": "FALHA: A linguagem da submissão não foi definida. Não é possível verificar construções proibidas.", + "no_files": "Nenhum arquivo de submissão para analisar.", + "violation": " - Construção proibida '{rule}' encontrada em '{file}'", + "failure": "FALHA: Construções de linguagem proibidas detectadas:\n{details}", + "success": "Nenhuma construção de linguagem proibida encontrada." + } } }, "api_testing": { diff --git a/docker-compose.yml b/docker-compose.yml index d63d2ec6..727ce6b3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,11 +10,11 @@ services: POSTGRES_PASSWORD: autograder_password POSTGRES_DB: autograder ports: - - "5432:5432" + - '5432:5432' volumes: - postgres_data:/var/lib/postgresql/data healthcheck: - test: ["CMD-SHELL", "pg_isready -U autograder"] + test: ['CMD-SHELL', 'pg_isready -U autograder'] interval: 10s timeout: 5s retries: 5 @@ -28,13 +28,13 @@ services: environment: # Database URL - use PostgreSQL DATABASE_URL: postgresql+asyncpg://autograder:autograder_password@postgres:5432/autograder - DATABASE_ECHO: "False" + DATABASE_ECHO: 'False' DATABASE_POOL_SIZE: 10 DATABASE_MAX_OVERFLOW: 20 - + # Sandbox Configuration SANDBOX_POOL_SIZE: 2 - + # API Configuration API_HOST: 0.0.0.0 API_PORT: 8000 @@ -45,13 +45,13 @@ services: AWS_SECRET_ACCESS_KEY: test AWS_REGION: us-east-1 S3_ENDPOINT_URL: http://ministack:4566 - + # Optional: Add your API keys here # OPENAI_API_KEY: ${OPENAI_API_KEY} # UPSTASH_REDIS_URL: ${UPSTASH_REDIS_URL} # UPSTASH_REDIS_TOKEN: ${UPSTASH_REDIS_TOKEN} ports: - - "8000:8000" + - '8000:8000' volumes: # Mount Docker socket for sandbox management - /var/run/docker.sock:/var/run/docker.sock @@ -62,7 +62,7 @@ services: condition: service_healthy restart: unless-stopped healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/health"] + test: ['CMD', 'curl', '-f', 'http://localhost:8000/api/v1/health'] interval: 30s timeout: 10s retries: 3 @@ -70,12 +70,10 @@ services: # Ministack for S3 emulation (AWS-compatible services) ministack: - # Pin to a specific Ministack version for reproducible local/dev environments. - # Upgrade this tag intentionally after validating compatibility. - image: ministackorg/ministack:1.0.0 + image: ministackorg/ministack:1.3 container_name: autograder-ministack ports: - - "4566:4566" + - '4566:4566' environment: - GATEWAY_PORT=4566 - MINISTACK_HOST=ministack diff --git a/docs/pipeline/04.8-structural-analysis.md b/docs/pipeline/04.8-structural-analysis.md new file mode 100644 index 00000000..feb71c77 --- /dev/null +++ b/docs/pipeline/04.8-structural-analysis.md @@ -0,0 +1,58 @@ +# Step 4.8: Structural Analysis + +## Purpose + +The Structural Analysis step parses student submission files into Abstract Syntax Trees (ASTs) using `ast-grep`. This step runs **after** AI Batch (if applicable) and **before** Grade, enabling subsequent test functions to perform structural pattern matching on the source code without re-parsing files multiple times. + +## How It Works + +1. **Detect Language** — The step identifies the submission language (Python, Java, Node.js, C++, or C). +2. **Heuristic File Filtering** — It scans the submission files and identifies those likely to contain source code, skipping binary files, images, and non-code configurations (e.g., `.png`, `.json`, `.yaml`, `.md`). +3. **Parse ASTs** — For each identified code file, it uses `ast-grep-py` to parse the content into an `SgRoot` object. +4. **Store Results** — The resulting mapping of `Dict[filename, SgRoot]` is stored in `StepResult.data` under `StepName.STRUCTURAL_ANALYSIS`. + +If `ast-grep-py` is not installed or the language is not supported, the step logs a warning and proceeds with an empty result set, allowing the pipeline to continue. + +## Dependencies + +| Step | What It Needs | +|------|---------------| +| **Bootstrap** | The raw `Submission` object containing files and language metadata | + +## Input + +| Source | Data | +|--------|------| +| Pipeline | `pipeline_exec.submission` → submission files and language | + +## Output + +| Field | Type | Description | +|-------|------|-------------| +| `data` | `StructuralAnalysisResult` | Contains a mapping of filenames to `ast-grep` `SgRoot` objects | +| `status` | `StepStatus.SUCCESS` | Usually succeeds even if parsing fails for some files (stores `None` for those files) | + +## How It Integrates with Grade + +The `GradeStep` reads the `STRUCTURAL_ANALYSIS` step result from the pipeline and passes it as `structural_analysis` to `GraderService.grade_from_tree()`. The grader threads this object through the tree traversal into every `process_test()` call, which forwards it as a kwarg to `test_function.execute()`. + +Test functions like `ForbiddenKeywordTest` can then use this pre-computed AST to perform efficient and accurate queries using `ast-grep`'s pattern matching syntax. + +## Key Design Decisions + +- **Pre-computed ASTs** — Parsing is done once at the pipeline level rather than inside individual tests to ensure efficiency when multiple structural tests are defined. +- **Fail-safe** — If parsing fails for a specific file (e.g., due to syntax errors), the step stores `None` for that file instead of failing the entire pipeline. +- **Library Choice** — `ast-grep` was chosen for its performance (Rust-based) and its ability to provide high-level, language-agnostic pattern matching queries. + +## Source Files + +| File | Contents | +|------|----------| +| `autograder/steps/structural_analysis_step.py` | The `StructuralAnalysisStep` pipeline step | +| `autograder/models/dataclass/structural_analysis_result.py` | Data container for parsed AST roots | + +## Next Step + +After structural analysis is complete, the pipeline proceeds to **[Step 5: Grade](05-grade.md)** to execute all tests. + +--- diff --git a/docs/pipeline/README.md b/docs/pipeline/README.md index 27952a30..88e0f1e1 100644 --- a/docs/pipeline/README.md +++ b/docs/pipeline/README.md @@ -10,7 +10,7 @@ The pipeline is: - **Fail-fast**: If any step fails, the pipeline stops immediately and reports the failure point. ``` -Submission ──▶ [LOAD_TEMPLATE] ──▶ [BUILD_TREE] ──▶ [SANDBOX] ──▶ [PRE_FLIGHT] ──▶ [AI_BATCH] ──▶ [GRADE] ──▶ [FOCUS] ──▶ [FEEDBACK] ──▶ [EXPORT] +Submission ──▶ [LOAD_TEMPLATE] ──▶ [BUILD_TREE] ──▶ [SANDBOX] ──▶ [PRE_FLIGHT] ──▶ [AI_BATCH] ──▶ [STRUCTURAL_ANALYSIS] ──▶ [GRADE] ──▶ [FOCUS] ──▶ [FEEDBACK] ──▶ [EXPORT] │ PipelineExecution (with GradingResult) @@ -96,6 +96,7 @@ The `data` field is polymorphic — each step stores a different type: | BUILD_TREE | `CriteriaTree` | | PRE_FLIGHT | `None` | | SANDBOX | `SandboxContainer | None` | +| STRUCTURAL_ANALYSIS | `StructuralAnalysisResult` | | GRADE | `GradeStepResult` | | FOCUS | `Focus` | | FEEDBACK | `str` (Markdown feedback) | @@ -126,7 +127,8 @@ Steps read data from previous steps via `pipeline_exec.get_step_result(StepName. | **Build Tree** | Load Template (needs the `Template` to match test functions) | | **Sandbox** | Load Template (checks if template requires sandbox) | | **Pre-Flight** | Load Template, Sandbox (requires sandbox to run setup commands) | -| **Grade** | Load Template, Build Tree, Sandbox (optional — only if template requires sandbox) | +| **Structural Analysis** | — (needs `Submission` from context) | +| **Grade** | Load Template, Build Tree, Sandbox*, Structural Analysis* | | **Focus** | Grade (needs the `ResultTree` from grading) | | **Feedback** | Grade, Focus (needs the `ResultTree` and `Focus` objects) | | **Export** | Grade (needs the final score) | @@ -162,7 +164,7 @@ The `StepRegistry` applies the specific conditionality parameters natively for s These instantiated elements are natively pushed into the `AutograderPipeline` linearly along their static `execution_order` mapping: ``` -LOAD_TEMPLATE → BUILD_TREE → SANDBOX → PRE_FLIGHT → GRADE → FOCUS → FEEDBACK → EXPORT +LOAD_TEMPLATE → BUILD_TREE → SANDBOX → PRE_FLIGHT → STRUCTURAL_ANALYSIS → GRADE → FOCUS → FEEDBACK → EXPORT ``` --- @@ -175,6 +177,7 @@ LOAD_TEMPLATE → BUILD_TREE → SANDBOX → PRE_FLIGHT → GRADE → FOCUS → | [Build Tree](02-build-tree.md) | `build_tree_step.py` | Constructs the `CriteriaTree` from JSON config, matching test functions from the template | Load Template | | [Sandbox](03-sandbox.md) | `sandbox_step.py` | Creates the sandbox environment and prepares the workspace | Load Template | | [Pre-Flight](04-pre-flight.md) | `pre_flight_step.py` | Validates required files and executes setup commands (compilation, etc.) | Sandbox | +| [Structural Analysis](04.8-structural-analysis.md) | `structural_analysis_step.py` | Parses submission files into ast-grep SgRoot objects | None | | [Grade](05-grade.md) | `grade_step.py` | Executes all tests against the submission and produces the scored `ResultTree` | Load Template, Build Tree, Sandbox* | | [Focus](06-focus.md) | `focus_step.py` | Ranks all tests by their impact on the final score | Grade | | [Feedback](07-feedback.md) | `feedback_step.py` | Generates student-facing feedback reports (default or AI-powered) | Focus | diff --git a/docs/template-library/input_output.md b/docs/template-library/input_output.md index 19b8f534..3aa922c3 100644 --- a/docs/template-library/input_output.md +++ b/docs/template-library/input_output.md @@ -81,6 +81,39 @@ Executes a program with a specific input and verifies it completes **without cra --- +### `forbidden_keyword` + +Tests that a submission does **not** use specific forbidden keywords or language constructs. This test performs structural analysis using `ast-grep`, which is more reliable than regex because it correctly ignores matches inside comments or string literals. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `forbidden_keywords` | list[string] | ✗ | List of high-level keywords to forbid (e.g., `"for_loop"`, `"while_loop"`) | +| `custom_ast_grep_rules` | list[dict] | ✗ | Custom `ast-grep` rules for advanced structural matching | + +**Supported Keywords for Predefined Rules:** + +| Language | Supported Keywords | +|----------|--------------------| +| **Python** | `for_loop`, `while_loop`, `eval_call`, `exec_call` | +| **Java** | `for_loop`, `while_loop` | +| **Node.js** | `for_loop`, `while_loop`, `eval_call` | +| **C++ / C** | `for_loop`, `while_loop`, `do_while_loop` | + +**Scoring:** 100 if no forbidden constructs are found, 0 otherwise. + +**Example:** +```json +{ + "name": "forbidden_keyword", + "parameters": { + "forbidden_keywords": ["for_loop", "eval_call"] + }, + "weight": 100 +} +``` + +--- + ## Usage Example ```json diff --git a/requirements.txt b/requirements.txt index 58c8280d..fbe39405 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ alembic~=1.13.0 asyncpg>=0.30.0 aiosqlite~=0.20.0 pyyaml~=6.0.2 +ast-grep-py>=0.30.0 diff --git a/tests/unit/pipeline/test_structural_analysis_step.py b/tests/unit/pipeline/test_structural_analysis_step.py new file mode 100644 index 00000000..de3d85df --- /dev/null +++ b/tests/unit/pipeline/test_structural_analysis_step.py @@ -0,0 +1,68 @@ +import pytest +from unittest.mock import MagicMock, patch +from autograder.steps.structural_analysis_step import StructuralAnalysisStep +from autograder.models.pipeline_execution import PipelineExecution +from autograder.models.dataclass.submission import Submission, SubmissionFile +from autograder.models.dataclass.step_result import StepName, StepStatus +from sandbox_manager.models.sandbox_models import Language + +@pytest.fixture +def mock_pipeline_exec(): + submission = Submission( + username="testuser", + user_id=1, + assignment_id=1, + submission_files={ + "main.py": SubmissionFile(filename="main.py", content="print('hello')"), + "data.txt": SubmissionFile(filename="data.txt", content="some data") + }, + language=Language.PYTHON + ) + pipeline_exec = PipelineExecution.start_execution(submission) + return pipeline_exec + +def test_structural_analysis_step_name(): + step = StructuralAnalysisStep() + assert step.step_name == StepName.STRUCTURAL_ANALYSIS + +@patch("autograder.steps.structural_analysis_step.SgRoot") +def test_structural_analysis_step_execution_success(mock_sg_root, mock_pipeline_exec): + # Setup mock + mock_root_instance = MagicMock() + mock_sg_root.return_value = mock_root_instance + + step = StructuralAnalysisStep() + result_exec = step.execute(mock_pipeline_exec) + + # Verify StepResult + step_result = result_exec.get_step_result(StepName.STRUCTURAL_ANALYSIS) + assert step_result.status == StepStatus.SUCCESS + assert "main.py" in step_result.data.roots + assert "data.txt" not in step_result.data.roots # Heuristic should skip .txt + assert step_result.data.roots["main.py"] == mock_root_instance + + # Verify SgRoot called correctly + mock_sg_root.assert_called_once_with("print('hello')", "python") + +@patch("autograder.steps.structural_analysis_step.SgRoot") +def test_structural_analysis_step_parsing_failure(mock_sg_root, mock_pipeline_exec): + # Setup mock to raise error for parsing + mock_sg_root.side_effect = Exception("Parsing failed") + + step = StructuralAnalysisStep() + result_exec = step.execute(mock_pipeline_exec) + + # Verify StepResult is still SUCCESS (graceful handling) + step_result = result_exec.get_step_result(StepName.STRUCTURAL_ANALYSIS) + assert step_result.status == StepStatus.SUCCESS + assert "main.py" in step_result.data.roots + assert step_result.data.roots["main.py"] is None # Failed parsing results in None + +def test_structural_analysis_step_no_language(mock_pipeline_exec): + mock_pipeline_exec.submission.language = None + step = StructuralAnalysisStep() + result_exec = step.execute(mock_pipeline_exec) + + step_result = result_exec.get_step_result(StepName.STRUCTURAL_ANALYSIS) + assert step_result.status == StepStatus.SUCCESS + assert step_result.data.roots == {} diff --git a/tests/unit/test_forbidden_keyword.py b/tests/unit/test_forbidden_keyword.py new file mode 100644 index 00000000..94d4ff2d --- /dev/null +++ b/tests/unit/test_forbidden_keyword.py @@ -0,0 +1,155 @@ +"""Tests for ForbiddenKeywordTest.""" + +from unittest.mock import MagicMock +import pytest +from pydantic import ValidationError + +from autograder.template_library.input_output import ForbiddenKeywordTest, InputOutputTemplate, ForbiddenKeywordConfig +from autograder.models.dataclass.submission import SubmissionFile +from autograder.models.dataclass.structural_analysis_result import StructuralAnalysisResult +from sandbox_manager.models.sandbox_models import Language + + +class TestForbiddenKeywordRegistration: + """Test that ForbiddenKeywordTest is properly registered in the template.""" + + def test_forbidden_keyword_registered_in_template(self): + """Test that the forbidden_keyword test is available in InputOutputTemplate.""" + template = InputOutputTemplate() + test = template.get_test("forbidden_keyword") + assert test is not None + assert test.name == "forbidden_keyword" + + +class TestForbiddenKeywordConfig: + """Test ForbiddenKeywordConfig validation.""" + + def test_valid_config(self): + """Test with valid parameters.""" + config = ForbiddenKeywordConfig( + forbidden_keywords=["for_loop"], + custom_ast_grep_rules=[{"kind": "if_statement"}] + ) + assert config.forbidden_keywords == ["for_loop"] + assert config.custom_ast_grep_rules == [{"kind": "if_statement"}] + + def test_empty_config(self): + """Test with empty parameters.""" + config = ForbiddenKeywordConfig() + assert config.forbidden_keywords == [] + assert config.custom_ast_grep_rules == [] + + +class TestForbiddenKeywordMetadata: + """Test ForbiddenKeywordTest metadata and properties.""" + + test_fn: ForbiddenKeywordTest + + def setup_method(self): + """Set up test fixtures.""" + self.test_fn = ForbiddenKeywordTest() + + def test_name(self): + """Test that the test name is 'forbidden_keyword'.""" + assert self.test_fn.name == "forbidden_keyword" + + def test_description_not_empty(self): + """Test that the description is not empty.""" + assert len(self.test_fn.description) > 0 + + def test_parameter_descriptions(self): + """Test that the test has correct parameter descriptions.""" + params = self.test_fn.parameter_description + assert len(params) == 2 + assert params[0].name == "forbidden_keywords" + assert params[1].name == "custom_ast_grep_rules" + + +class TestForbiddenKeywordExecution: + """Test execution logic for ForbiddenKeywordTest.""" + + test_fn: ForbiddenKeywordTest + + def setup_method(self): + """Set up test fixtures.""" + self.test_fn = ForbiddenKeywordTest() + + def test_no_rules_gives_100(self): + """Test that no keywords or rules returns full score.""" + result = self.test_fn.execute([], None) + assert result.score == 100.0 + + def test_no_analysis_gives_0(self): + """Test that missing structural analysis returns score 0.""" + result = self.test_fn.execute([], None, forbidden_keywords=["for_loop"]) + assert result.score == 0.0 + assert "analysis" in result.report.lower() + + def test_no_language_gives_0(self): + """Test that missing language returns score 0.""" + sa_result = StructuralAnalysisResult(roots={}) + result = self.test_fn.execute([], None, + forbidden_keywords=["for_loop"], + structural_analysis=sa_result) + assert result.score == 0.0 + assert "language" in result.report.lower() + + def test_no_files_gives_100(self): + """Test that empty file list returns full score.""" + sa_result = StructuralAnalysisResult(roots={}) + result = self.test_fn.execute([], None, + forbidden_keywords=["for_loop"], + structural_analysis=sa_result, + submission_language=Language.PYTHON) + assert result.score == 100.0 + + def test_violation_found(self): + """Test that finding a violation returns score 0.""" + # Mock ast-grep root and matches + mock_root = MagicMock() + mock_match = MagicMock() + mock_root.root().find_all.return_value = [mock_match] + + sa_result = StructuralAnalysisResult(roots={"main.py": mock_root}) + files = [SubmissionFile("main.py", "for i in range(10): pass")] + + result = self.test_fn.execute(files, None, + forbidden_keywords=["for_loop"], + structural_analysis=sa_result, + submission_language=Language.PYTHON) + + assert result.score == 0.0 + assert "main.py" in result.report + assert "for_statement" in result.report # The 'kind' for for_loop in Python + + def test_custom_rule_violation(self): + """Test that a custom ast-grep rule violation returns score 0.""" + mock_root = MagicMock() + mock_root.root().find_all.return_value = [MagicMock()] + + sa_result = StructuralAnalysisResult(roots={"main.py": mock_root}) + files = [SubmissionFile("main.py", "if True: pass")] + custom_rules = [{"kind": "if_statement"}] + + result = self.test_fn.execute(files, None, + custom_ast_grep_rules=custom_rules, + structural_analysis=sa_result, + submission_language=Language.PYTHON) + + assert result.score == 0.0 + assert "if_statement" in result.report + + def test_no_violation_found(self): + """Test that no violations returns full score.""" + mock_root = MagicMock() + mock_root.root().find_all.return_value = [] + + sa_result = StructuralAnalysisResult(roots={"main.py": mock_root}) + files = [SubmissionFile("main.py", "x = 1")] + + result = self.test_fn.execute(files, None, + forbidden_keywords=["for_loop"], + structural_analysis=sa_result, + submission_language=Language.PYTHON) + + assert result.score == 100.0 diff --git a/tests/unit/test_test_function_validation.py b/tests/unit/test_test_function_validation.py new file mode 100644 index 00000000..b38ce806 --- /dev/null +++ b/tests/unit/test_test_function_validation.py @@ -0,0 +1,123 @@ +"""Tests for TestFunction parameter validation in CriteriaTreeService.""" + +from typing import List, Optional, Type +from pydantic import BaseModel, Field, ValidationError +import pytest + +from autograder.services.criteria_tree_service import CriteriaTreeService +from autograder.models.abstract.test_function import TestFunction +from autograder.models.abstract.template import Template +from autograder.models.config.test import TestConfig +from autograder.models.config.criteria import CriteriaConfig +from autograder.models.config.category import CategoryConfig + + +class MockParams(BaseModel): + """Mock Pydantic model for testing validation.""" + required_str: str + optional_int: int = 42 + list_of_items: List[str] = Field(default_factory=list) + + +class ValidatedTestFunction(TestFunction): + """A mock test function that provides a config schema.""" + @property + def name(self): return "validated_test" + @property + def description(self): return "A test with validation" + @property + def parameter_description(self): return [] + @property + def config_schema(self) -> Type[BaseModel]: return MockParams + def execute(self, files, sandbox, *args, **kwargs): pass + + +class MockTemplate(Template): + """Mock template containing the validated test function.""" + def __init__(self): + self.tests = {"validated_test": ValidatedTestFunction()} + @property + def template_name(self): return "Mock Template" + @property + def template_description(self): return "Mock" + @property + def requires_sandbox(self): return False + def get_test(self, name): return self.tests.get(name) + + +class TestCriteriaTreeServiceValidation: + """Test suite for early validation logic in CriteriaTreeService.""" + + def setup_method(self): + self.service = CriteriaTreeService() + self.template = MockTemplate() + + def test_validation_success(self): + """Test that valid parameters pass validation during tree building.""" + criteria_dict = { + "base": { + "weight": 100, + "tests": [ + { + "name": "Test 1", + "type": "validated_test", + "parameters": [ + {"name": "required_str", "value": "hello"}, + {"name": "optional_int", "value": 100} + ] + } + ] + } + } + config = CriteriaConfig.from_dict(criteria_dict) + # Should NOT raise ValueError + tree = self.service.build_tree(config, self.template) + assert tree is not None + assert tree.base.tests[0].parameters["required_str"] == "hello" + + def test_validation_failure_missing_field(self): + """Test that missing required fields raise ValueError.""" + criteria_dict = { + "base": { + "weight": 100, + "tests": [ + { + "name": "Test Fail", + "type": "validated_test", + "parameters": [ + {"name": "optional_int", "value": 10} + ] + } + ] + } + } + config = CriteriaConfig.from_dict(criteria_dict) + with pytest.raises(ValueError) as excinfo: + self.service.build_tree(config, self.template) + + assert "Invalid parameters" in str(excinfo.value) + assert "required_str" in str(excinfo.value) + + def test_validation_failure_wrong_type(self): + """Test that incorrect field types raise ValueError.""" + criteria_dict = { + "base": { + "weight": 100, + "tests": [ + { + "name": "Test Fail Type", + "type": "validated_test", + "parameters": [ + {"name": "required_str", "value": "hello"}, + {"name": "optional_int", "value": "not-an-int"} + ] + } + ] + } + } + config = CriteriaConfig.from_dict(criteria_dict) + with pytest.raises(ValueError) as excinfo: + self.service.build_tree(config, self.template) + + assert "Invalid parameters" in str(excinfo.value) + assert "optional_int" in str(excinfo.value)