diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index d8859587..bcaab23b 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -1,32 +1,18 @@ name: Bug report -description: Report a reproducible defect affecting Autograder behavior. +description: Report a reproducible defect with minimal required information. title: "[Bug] " body: - type: markdown attributes: value: | - Thanks for reporting this issue. - Please include concrete reproduction details so maintainers can triage quickly. + Thanks for reporting this bug. Keep it short and focused. - type: textarea id: summary attributes: label: Summary - description: A short description of the bug. - placeholder: What is broken? - validations: - required: true - - - type: textarea - id: environment - attributes: - label: Environment - description: Runtime details where the issue happens. - placeholder: | - - Environment: local / staging / production - - Branch or commit: - - Language/template involved: - - API/Web client version: + description: What is broken? + placeholder: One short paragraph describing the problem. validations: required: true @@ -34,62 +20,37 @@ body: id: steps attributes: label: Steps to reproduce - description: Numbered steps to trigger the issue. + description: Optional, but helpful for reliable reproduction. placeholder: | 1. Go to ... 2. Configure ... 3. Run/submit ... 4. Observe ... - validations: - required: true - type: textarea id: observed attributes: label: Observed behavior description: What actually happened? - validations: - required: true - type: textarea id: expected attributes: label: Expected behavior description: What should have happened? - validations: - required: true - - - type: textarea - id: suspected_root_cause - attributes: - label: Suspected root cause (optional) - description: If you already have a hypothesis, add it here. - type: textarea - id: impact + id: environment attributes: - label: Impact - description: Why this matters (who is affected, severity, blocked flows). - placeholder: e.g. Teachers cannot publish assignments with `dont_fail` tests. - validations: - required: true + label: Environment (optional) + description: Version, branch, runtime, or deployment context if relevant. + placeholder: | + - Branch/commit: + - Runtime: + - Local/CI/Prod: - type: textarea id: references attributes: label: References (optional) description: Related issues/PRs/logs/screenshots. - placeholder: | - - Related: #123 - - API issue: webtech-network/api-grader-prisma#456 - - - type: checkboxes - id: checks - attributes: - label: Checklist - options: - - label: I searched for existing issues before opening this one. - required: true - - label: I added enough details for someone else to reproduce this bug. - required: true - diff --git a/.github/ISSUE_TEMPLATE/feature-proposal.yml b/.github/ISSUE_TEMPLATE/feature-proposal.yml index 5ad9e84f..bf3f40ee 100644 --- a/.github/ISSUE_TEMPLATE/feature-proposal.yml +++ b/.github/ISSUE_TEMPLATE/feature-proposal.yml @@ -1,101 +1,46 @@ name: Feature proposal -description: Propose a new feature with scope, rationale, and acceptance criteria. +description: Propose a new feature with lightweight structure. title: "[Feature] " body: - type: markdown attributes: value: | Use this template for new features and product capabilities. + Keep details concise. - type: textarea id: summary attributes: label: Feature overview - description: What is the feature? + description: What should be added or changed? placeholder: Describe the feature in a few lines. validations: required: true - type: textarea - id: why + id: motivation attributes: - label: Why - description: Problem/opportunity and expected value. - placeholder: Why do we need this now? - validations: - required: true - - - type: textarea - id: scope - attributes: - label: Scope (phase-based if applicable) - description: Define rollout phases, boundaries, and priorities. - placeholder: | - Phase 1: - - ... - - Phase 2: - - ... - validations: - required: true - - - type: textarea - id: functional_requirements - attributes: - label: Functional requirements summary - description: Core behaviors and constraints. - placeholder: List required capabilities and expected behavior. - validations: - required: true - - - type: textarea - id: architecture - attributes: - label: Architecture notes - description: Expected implementation direction, data model, or integration notes. + label: Motivation (optional) + description: Why is this feature needed? - type: textarea - id: deliverables + id: proposal attributes: - label: Deliverables - description: Concrete outputs expected across backend/frontend/docs/tests. - placeholder: | - - Backend: - - Frontend: - - Documentation: - - Tests: - validations: - required: true + label: Proposed solution (optional) + description: Suggested implementation direction. - type: textarea id: acceptance_criteria attributes: - label: Acceptance criteria + label: Acceptance criteria (optional) description: Use checklist format whenever possible. placeholder: | - [ ] Criterion 1 - [ ] Criterion 2 - [ ] Criterion 3 - validations: - required: true - type: textarea id: references attributes: - label: References + label: References (optional) description: Related issues/PRs/docs/specs. - placeholder: | - - Parent issue: - - Depends on: - - Spec doc: - - - type: checkboxes - id: checks - attributes: - label: Checklist - options: - - label: I checked for related existing feature requests. - required: true - - label: I included scope boundaries and acceptance criteria. - required: true - diff --git a/.github/ISSUE_TEMPLATE/hypothesis-rfc.yml b/.github/ISSUE_TEMPLATE/hypothesis-rfc.yml index 784f80aa..df2a8969 100644 --- a/.github/ISSUE_TEMPLATE/hypothesis-rfc.yml +++ b/.github/ISSUE_TEMPLATE/hypothesis-rfc.yml @@ -1,5 +1,5 @@ name: Hypothesis / RFC -description: Capture exploratory proposals and open design questions. +description: Capture exploratory proposals with minimal process overhead. title: "[Hypothesis] " body: - type: markdown @@ -7,14 +7,6 @@ body: value: | Use this template for early-stage ideas that still require validation. - - type: textarea - id: context - attributes: - label: Context - description: What current scenario motivated this hypothesis? - validations: - required: true - - type: textarea id: hypothesis attributes: @@ -24,60 +16,28 @@ body: required: true - type: textarea - id: constraints + id: value attributes: - label: Constraints and assumptions - description: Known constraints, decisions, or guardrails. - placeholder: | - Defined constraints: - - ... - - Assumptions: - - ... + label: Expected value (optional) + description: What outcome do we expect if this works? - type: textarea id: open_questions attributes: - label: Open questions + label: Open questions (optional) description: List unresolved decisions that need team input. placeholder: | - Should we...? - What is the impact of...? - validations: - required: true - type: textarea id: validation attributes: - label: Validation plan + label: Validation plan (optional) description: How this hypothesis will be evaluated (tests, spikes, metrics). - validations: - required: true - - - type: textarea - id: success_criteria - attributes: - label: Success criteria - description: Evidence that would validate or invalidate the hypothesis. - placeholder: | - - [ ] Signal 1 - - [ ] Signal 2 - validations: - required: true - type: textarea id: references attributes: label: References (optional) description: Related discussions/issues/PRs/docs. - - - type: checkboxes - id: checks - attributes: - label: Checklist - options: - - label: I clearly separated confirmed constraints from open questions. - required: true - - label: I included a validation plan and measurable success criteria. - required: true - diff --git a/.github/ISSUE_TEMPLATE/implementation-task.yml b/.github/ISSUE_TEMPLATE/implementation-task.yml index a96188e0..a4342a57 100644 --- a/.github/ISSUE_TEMPLATE/implementation-task.yml +++ b/.github/ISSUE_TEMPLATE/implementation-task.yml @@ -1,5 +1,5 @@ name: Implementation task -description: Track a concrete execution task (performance, security, refactor, docs, etc.). +description: Track a focused execution task with clear objective and done criteria. title: "[Task] " body: - type: markdown @@ -7,95 +7,36 @@ body: value: | Use this for focused execution issues, including work derived from umbrella issues. - - type: dropdown - id: category - attributes: - label: Category - options: - - Performance - - Security/API - - Refactor/Technical Debt - - Documentation - - Testing/Quality - - Integration - - Other - validations: - required: true - - - type: textarea - id: context - attributes: - label: Context - description: Relevant background and current behavior. - validations: - required: true - - - type: textarea - id: problem - attributes: - label: Problem - description: What is wrong with the current state? - validations: - required: true - - type: textarea - id: goal + id: objective attributes: - label: Goal - description: What outcome should this task deliver? + label: Objective + description: What should this task achieve? validations: required: true - type: textarea id: proposal attributes: - label: Proposal + label: Proposed approach (optional) description: Proposed implementation approach. - validations: - required: true - type: textarea id: scope attributes: - label: Scope and non-goals - description: Include what is explicitly in and out of scope. - placeholder: | - In scope: - - ... - - Out of scope: - - ... - validations: - required: true + label: Scope (optional) + description: What's in and out of this task. - type: textarea - id: acceptance_criteria + id: done_criteria attributes: - label: Acceptance criteria + label: Definition of done (optional) placeholder: | - - [ ] Criterion 1 - - [ ] Criterion 2 - - [ ] Criterion 3 - validations: - required: true + - [ ] Expected outcome 1 + - [ ] Expected outcome 2 - type: textarea id: references attributes: - label: References and dependencies + label: References and dependencies (optional) description: Parent issues, related PRs, or linked backend/frontend tasks. - placeholder: | - - Parent umbrella: - - Related issue: - - Dependency: - - - type: checkboxes - id: checks - attributes: - label: Checklist - options: - - label: I identified concrete acceptance criteria. - required: true - - label: I documented dependencies and scope boundaries. - required: true - diff --git a/.github/ISSUE_TEMPLATE/umbrella-initiative.yml b/.github/ISSUE_TEMPLATE/umbrella-initiative.yml index 33d16a67..09676c21 100644 --- a/.github/ISSUE_TEMPLATE/umbrella-initiative.yml +++ b/.github/ISSUE_TEMPLATE/umbrella-initiative.yml @@ -1,5 +1,5 @@ name: Umbrella initiative -description: Coordinate a multi-issue initiative with phased strategy and child tasks. +description: Coordinate multi-issue initiatives with a lightweight structure. title: "Umbrella: " body: - type: markdown @@ -15,73 +15,30 @@ body: validations: required: true - - type: textarea - id: why - attributes: - label: Why - description: Product/engineering rationale and urgency. - validations: - required: true - - - type: textarea - id: decisions - attributes: - label: Product and architecture decisions - description: Key decisions that constrain implementation. - placeholder: | - - Decision 1: - - Decision 2: - validations: - required: true - - type: textarea id: strategy attributes: - label: Implementation strategy (phased) + label: Plan / milestones (optional) description: Ordered phases and major milestones. placeholder: | 1. Phase one 2. Phase two 3. Phase three - validations: - required: true - type: textarea id: child_issues attributes: - label: Child issues - description: Add a checklist of linked issues. + label: Child issues (optional) + description: Add linked implementation issues when available. placeholder: | - [ ] #123 Child issue title - [ ] #124 Child issue title - validations: - required: true - type: textarea id: done attributes: - label: Definition of done + label: Definition of done (optional) description: Measurable completion criteria for the initiative. placeholder: | - [ ] Outcome 1 - [ ] Outcome 2 - - [ ] Outcome 3 - validations: - required: true - - - type: textarea - id: roadmap_reference - attributes: - label: Roadmap reference (optional) - description: Link roadmap docs or strategic planning artifacts. - - - type: checkboxes - id: checks - attributes: - label: Checklist - options: - - label: I split the initiative into child issues with clear ownership. - required: true - - label: I provided measurable completion criteria. - required: true - diff --git a/docs/architecture/sandbox_manager.md b/docs/architecture/sandbox_manager.md index 776caa6f..a46ae51a 100644 --- a/docs/architecture/sandbox_manager.md +++ b/docs/architecture/sandbox_manager.md @@ -131,7 +131,7 @@ sandbox.prepare_workdir(submission_files) ``` ### `inject_assets(resolved_assets)` -Injects static assets (datasets, fixtures) into the container's `/tmp` directory. Uses a secure Base64-encoded `exec_run` method compatible with gVisor. +Injects static assets (datasets, fixtures) into the container's `/tmp` directory using a shell-escaped Base64 `exec_run` write path compatible with gVisor. ```python sandbox.inject_assets(resolved_assets) @@ -303,4 +303,3 @@ The monitor thread logs load warnings automatically: - **≥90% utilization:** 🚨 `HIGH LOAD` warning - **≥70% utilization:** ⚠️ `MODERATE LOAD` warning - **Otherwise:** 📊 periodic stats - diff --git a/docs/features/setup_config_feature.md b/docs/features/setup_config_feature.md index c57d32df..68a8ad35 100644 --- a/docs/features/setup_config_feature.md +++ b/docs/features/setup_config_feature.md @@ -55,7 +55,7 @@ The root-level `assets` field allows injecting grader-owned files (e.g., dataset ### Secure Injection Method -Assets are resolved and injected **before** language-specific setup commands run. The autograder uses a secure **Base64-encoded `exec_run`** method to inject files, which provides several benefits: +Assets are resolved and injected **before** language-specific setup commands run. The autograder uses a **shell-escaped Base64 `exec_run`** method to inject files safely, which provides several benefits: - **gVisor Compatibility**: Works seamlessly with high-isolation runtimes like gVisor (`runsc`). - **Security**: Allows maintaining the `noexec` flag on `/tmp` while still supporting dynamic file injection. - **Root-to-Sandbox Handover**: Files are created as `root` to ensure correct placement, then `chmod`ed to be readable by the non-privileged `sandbox` user. @@ -346,4 +346,3 @@ Benefits: - [Command Resolver & Multi-Language Support](command_resolver.md) — How command resolution works across languages - [Configuration Examples](../guides/criteria_configuration_examples.md) — Full grading configuration examples - [Pipeline Execution Tracking](../architecture/pipeline_execution_tracking.md) — How preflight results appear in pipeline summaries - diff --git a/docs/pipeline/04-pre-flight.md b/docs/pipeline/04-pre-flight.md index 492a3b35..ceb62796 100644 --- a/docs/pipeline/04-pre-flight.md +++ b/docs/pipeline/04-pre-flight.md @@ -13,7 +13,7 @@ This step performs three main functions: The step execution follows these logic gates: -1. **Asset Injection**: If the `setup_config` contains an `assets` list, the `PreFlightService` resolves each asset via the `AssetSourceResolver`. Assets are fetched from S3 and injected into the container's `/tmp` directory using a secure Base64-encoded `exec_run` method (fully compatible with gVisor). +1. **Asset Injection**: If the `setup_config` contains an `assets` list, the `PreFlightService` resolves each asset via the `AssetSourceResolver`. Assets are fetched from S3 and injected into the container's `/tmp` directory using a shell-escaped Base64 `exec_run` write flow. 2. **Required Files Check**: It compares the files in the submission against the list provided in the `required_files` section of the `setup_config` for the submission's language. diff --git a/sandbox_manager/sandbox_container.py b/sandbox_manager/sandbox_container.py index 179ac37f..d6b0f89c 100644 --- a/sandbox_manager/sandbox_container.py +++ b/sandbox_manager/sandbox_container.py @@ -1,8 +1,6 @@ import base64 -import base64 -import io import os -import tarfile +import shlex import threading import time from datetime import datetime @@ -45,6 +43,27 @@ def release(self): self.state = SandboxState.IDLE self.last_updated = datetime.now() + @staticmethod + def _normalize_relative_path(path: str) -> str: + """Normalize user-provided relative paths and prevent traversal/absolute paths.""" + if not path: + raise ValueError("Path cannot be empty") + + normalized = os.path.normpath(path).replace("\\", "/") + if normalized in ("", ".", ".."): + raise ValueError(f"Invalid path: {path}") + if normalized.startswith("/") or normalized.startswith("../") or "/../" in f"/{normalized}": + raise ValueError(f"Path must stay within working directory: {path}") + return normalized + + @staticmethod + def _normalize_tmp_path(path: str) -> str: + """Normalize absolute asset paths and keep them constrained under /tmp.""" + normalized = os.path.normpath(path).replace("\\", "/") + if not normalized.startswith("/tmp/"): + raise ValueError(f"Asset target must be under /tmp: {path}") + return normalized + def prepare_workdir(self, submission_files: Dict[str, 'SubmissionFile']) -> None: """ Copy submission files into the container's /app directory with smart directory structure. @@ -64,7 +83,7 @@ def prepare_workdir(self, submission_files: Dict[str, 'SubmissionFile']) -> None try: for submission_file in submission_files.values(): - file_path = submission_file.filename + file_path = self._normalize_relative_path(submission_file.filename) file_content = submission_file.content # Get the directory path @@ -74,23 +93,20 @@ def prepare_workdir(self, submission_files: Dict[str, 'SubmissionFile']) -> None if dir_path: full_dir_path = f"/app/{dir_path}" result = self.container_ref.exec_run( - cmd=f"mkdir -p {full_dir_path}", + cmd=["mkdir", "-p", full_dir_path], user="sandbox" ) if result.exit_code != 0: raise RuntimeError(f"Failed to create directory {full_dir_path}") - # Encode content as base64 to safely pass through shell - content_b64 = base64.b64encode(file_content.encode('utf-8')).decode('ascii') - - # Create the file using base64 decode full_file_path = f"/app/{file_path}" - cmd = f"echo '{content_b64}' | base64 -d > {full_file_path}" + safe_file_path = shlex.quote(full_file_path) + content_b64 = base64.b64encode(file_content.encode('utf-8')).decode('ascii') + cmd = f"echo '{content_b64}' | base64 -d > {safe_file_path}" result = self.container_ref.exec_run( cmd=["/bin/sh", "-c", cmd], user="sandbox" ) - if result.exit_code != 0: raise RuntimeError(f"Failed to create file {full_file_path}: {result.output}") @@ -112,40 +128,41 @@ def inject_assets(self, resolved_assets: List['ResolvedAsset']) -> None: if not resolved_assets: return - import base64 - for asset in resolved_assets: # Ensure target path starts with /tmp/ target_path = asset.target if not target_path.startswith('/tmp/'): target_path = os.path.join('/tmp', target_path.lstrip('/')) + target_path = self._normalize_tmp_path(target_path) content = asset.content # Ensure parent directory exists in container and has correct permissions parent_dir = os.path.dirname(target_path) if parent_dir and parent_dir != '/': - self.container_ref.exec_run( - cmd=f"mkdir -p {parent_dir}", + mkdir_result = self.container_ref.exec_run( + cmd=["mkdir", "-p", parent_dir], user="root" ) + if mkdir_result.exit_code != 0: + output = mkdir_result.output.decode() if mkdir_result.output else "No output" + raise RuntimeError(f"Failed to create asset directory {parent_dir}: {output}") # Ensure world-readable so sandbox user can read injected assets - self.container_ref.exec_run( - cmd=f"chmod 755 {parent_dir}", + chmod_dir_result = self.container_ref.exec_run( + cmd=["chmod", "755", parent_dir], user="root" ) - - # Encode content as base64 to safely pass through shell - content_b64 = base64.b64encode(content).decode('ascii') + if chmod_dir_result.exit_code != 0: + output = chmod_dir_result.output.decode() if chmod_dir_result.output else "No output" + raise RuntimeError(f"Failed to set permissions on {parent_dir}: {output}") - # Create the file using base64 decode - # Using /bin/sh -c to support redirection and pipes - cmd = f"echo '{content_b64}' | base64 -d > {target_path}" + safe_target_path = shlex.quote(target_path) + content_b64 = base64.b64encode(content).decode('ascii') + cmd = f"echo '{content_b64}' | base64 -d > {safe_target_path}" result = self.container_ref.exec_run( cmd=["/bin/sh", "-c", cmd], user="root" ) - if result.exit_code != 0: output = result.output.decode() if result.output else "No output" raise RuntimeError(f"Failed to create asset file {target_path}: {output}") @@ -153,7 +170,7 @@ def inject_assets(self, resolved_assets: List['ResolvedAsset']) -> None: # Set file permissions (read-only if requested) mode = "444" if asset.read_only else "644" self.container_ref.exec_run( - cmd=f"chmod {mode} {target_path}", + cmd=["chmod", mode, target_path], user="root" ) @@ -292,9 +309,13 @@ def extract_file(self, path: str, max_bytes: int = 1_048_576) -> ExtractedFile: ValueError: If the file exceeds max_bytes. RuntimeError: If the extraction command fails. """ + if not path.startswith("/"): + raise ValueError(f"Path must be absolute inside container: {path}") + safe_path = shlex.quote(path) + # Check file exists and get its size check = self.container_ref.exec_run( - cmd=["/bin/sh", "-c", f"test -f {path} && stat -c %s {path} 2>/dev/null || stat -f %z {path} 2>/dev/null"], + cmd=["/bin/sh", "-c", f"test -f {safe_path} && stat -c %s {safe_path} 2>/dev/null || stat -f %z {safe_path} 2>/dev/null"], user="sandbox", ) if check.exit_code != 0: @@ -311,7 +332,7 @@ def extract_file(self, path: str, max_bytes: int = 1_048_576) -> ExtractedFile: # Read file content via base64 to safely transport binary data result = self.container_ref.exec_run( - cmd=["/bin/sh", "-c", f"base64 {path}"], + cmd=["/bin/sh", "-c", f"base64 {safe_path}"], user="sandbox", ) if result.exit_code != 0: diff --git a/tests/unit/test_asset_injection.py b/tests/unit/test_asset_injection.py index 8226549c..09d11c11 100644 --- a/tests/unit/test_asset_injection.py +++ b/tests/unit/test_asset_injection.py @@ -61,7 +61,7 @@ def test_asset_resolver(self, mock_s3_provider_cls): mock_s3_provider.get_asset.assert_called_once_with("src", "/tmp/dst", True) def test_sandbox_inject_assets(self): - """Test SandboxContainer.inject_assets refactored Base64 injection.""" + """Test SandboxContainer.inject_assets with shell-escaped Base64 injection.""" container_ref = MagicMock() # Mock success response mock_res = MagicMock() @@ -78,17 +78,16 @@ def test_sandbox_inject_assets(self): # Check if mkdir -p was called for parent dir container_ref.exec_run.assert_any_call( - cmd="mkdir -p /tmp", + cmd=["mkdir", "-p", "/tmp"], user="root" ) - - # Check if Base64 injection was called - # The command contains the base64 encoded string - # b"rawcontent" encoded is "cmF3Y29udGVudA==" + + # Check if Base64 write command was called call_args = container_ref.exec_run.call_args_list found_b64_call = False for call in call_args: - if "cmF3Y29udGVudA==" in str(call): + cmd = call.kwargs.get("cmd") + if isinstance(cmd, list) and len(cmd) == 3 and "cmF3Y29udGVudA==" in cmd[2]: found_b64_call = True break - assert found_b64_call, "Base64 injection call not found in exec_run calls" + assert found_b64_call, "Base64 write call not found in exec_run calls" diff --git a/tests/unit/test_extract_file.py b/tests/unit/test_extract_file.py index d5194451..0952fea5 100644 --- a/tests/unit/test_extract_file.py +++ b/tests/unit/test_extract_file.py @@ -12,8 +12,9 @@ """ import base64 +import shlex import unittest -from unittest.mock import MagicMock, call +from unittest.mock import MagicMock from sandbox_manager.sandbox_container import SandboxContainer from sandbox_manager.models.sandbox_models import Language, ExtractedFile @@ -118,6 +119,24 @@ def test_docker_api_generic_error(self): with self.assertRaises(Exception): self.sandbox.extract_file("/app/file.txt") + def test_rejects_relative_path(self): + """Reject non-absolute extraction paths before running container commands.""" + with self.assertRaises(ValueError): + self.sandbox.extract_file("relative.txt") + + self.mock_container.exec_run.assert_not_called() + + def test_shell_escapes_path(self): + """Use shell-escaped path to avoid command injection.""" + malicious = "/app/file.txt; id > /tmp/pwned #" + self.mock_container.exec_run.return_value = _exec_run_result(1, b"") + + with self.assertRaises(FileNotFoundError): + self.sandbox.extract_file(malicious) + + first_cmd = self.mock_container.exec_run.call_args_list[0].kwargs["cmd"][2] + assert shlex.quote(malicious) in first_cmd + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_issue_315_repro.py b/tests/unit/test_issue_315_repro.py new file mode 100644 index 00000000..54c76db3 --- /dev/null +++ b/tests/unit/test_issue_315_repro.py @@ -0,0 +1,53 @@ +import unittest +from unittest.mock import MagicMock +from sandbox_manager.sandbox_container import SandboxContainer +from sandbox_manager.models.sandbox_models import Language + +class TestIssue315Reproduction(unittest.TestCase): + def setUp(self): + self.mock_container = MagicMock() + self.sandbox = SandboxContainer( + language=Language.PYTHON, + container_ref=self.mock_container + ) + + # Setup mock result for exec_run + self.mock_result = MagicMock() + self.mock_result.exit_code = 0 + self.mock_result.output = (b"", b"") + self.mock_container.exec_run.return_value = self.mock_result + + def test_run_commands_injection(self): + """ + Demonstrate that program_command can be used for shell injection in run_commands. + """ + # A malicious program_command that escapes the intended command + malicious_command = "python3 main.py ) && touch /tmp/pwned && ( #" + + self.sandbox.run_commands(["input1"], program_command=malicious_command) + + # Verify the actual command sent to exec_run + exec_run_call = self.mock_container.exec_run.call_args + cmd_sent = exec_run_call[1]['cmd'] + + full_cmd_string = cmd_sent[2] + print(f"\nCommand sent to shell: {full_cmd_string}") + + # Check if the injected part is in the command outside of the parentheses it was supposed to be in + self.assertIn("&& touch /tmp/pwned", full_cmd_string) + + def test_run_command_injection(self): + """ + Check if run_command is also vulnerable. + """ + malicious_command = "echo hello && touch /tmp/pwned" + self.sandbox.run_command(malicious_command) + + exec_run_call = self.mock_container.exec_run.call_args + cmd_sent = exec_run_call[1]['cmd'] + + full_cmd_string = cmd_sent[2] + self.assertEqual(full_cmd_string, malicious_command) + +if __name__ == '__main__': + unittest.main() diff --git a/tests/unit/test_sandbox_manager.py b/tests/unit/test_sandbox_manager.py index 72e7ec32..015bbb92 100644 --- a/tests/unit/test_sandbox_manager.py +++ b/tests/unit/test_sandbox_manager.py @@ -114,7 +114,7 @@ def test_prepare_workdir_with_simple_files(self): self.sandbox.prepare_workdir(submission_files) self.assertTrue(self.sandbox._workdir_prepared) - # Verify exec_run was called for each file (2 times) + # Verify file creation command was called for each file. self.assertEqual(self.mock_container.exec_run.call_count, 2) def test_prepare_workdir_with_nested_structure(self): @@ -295,4 +295,3 @@ def test_build_container_name_increments(self): assert name2.endswith("-0002") # Same pool prefix assert name1[:-5] == name2[:-5] -