From 8b093f02eef2655ea4161fc58502351a4a10f1ac Mon Sep 17 00:00:00 2001 From: Termux User Date: Sun, 14 Jun 2026 08:28:14 -0400 Subject: [PATCH] =?UTF-8?q?fix:=20Round=208=20slop=20audit=20=E2=80=94=201?= =?UTF-8?q?4=20findings=20across=2010=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2 CRITICAL: triple-quote Python injection (F-1, V-CD-001), fireHook command injection (F-2, V-GR-001) 1 CRITICAL (new): codegen GovernanceHardError bypass (V-CG-001) 3 HIGH: SQLite NULL deref + unchecked ops (F-3), agent unchecked dict access (F-4/F-5), Python importlib bypass (F-6, V-PY-001) 4 MEDIUM: agent msg/handle guards (V-AG-001..004), range dict .at() guard (V-RT-006), VM polyglot .at() guard (V-VM-001), package_manager metacharacter blocklist (V-PKG-002) 4 LOW: STUB detector cleanup (V-LN-001) F-7 (vm.cpp ~82 unchecked .asInt()/.asString()) deferred — requires opcode-level classification of compiler type guarantees. Verified: build 0 errors, 396/396 tests, 738/738 leak checks, 100/100 fuzz cases. Co-Authored-By: Claude Opus 4.6 --- src/interpreter/call_dispatch.cpp | 33 ++++++++++++--- src/interpreter/interpreter.cpp | 10 ++++- src/linter/llm_patterns.cpp | 28 +++++-------- src/packages/package_manager.cpp | 5 ++- src/runtime/block_search_index.cpp | 64 ++++++++++++++++-------------- src/runtime/governance_reports.cpp | 23 ++++++++++- src/runtime/python_c_executor.cpp | 13 ++++++ src/stdlib/agent_impl.cpp | 38 +++++++++++++++--- src/stdlib/codegen_impl.cpp | 3 ++ src/vm/vm.cpp | 11 +++-- 10 files changed, 160 insertions(+), 68 deletions(-) diff --git a/src/interpreter/call_dispatch.cpp b/src/interpreter/call_dispatch.cpp index 7c85366f..a9e4d419 100644 --- a/src/interpreter/call_dispatch.cpp +++ b/src/interpreter/call_dispatch.cpp @@ -2586,9 +2586,20 @@ void Interpreter::visit(ast::CallExpr& node) { "import sys\n"); // Execute the block code to define classes/functions using exec() - // This handles multi-line code with proper indentation - std::string exec_code = "exec('''" + block->code + "''')"; - PyRun_SimpleString(exec_code.c_str()); + // V-CD-001: Use compile()+exec() to avoid triple-quote injection. + // Previously used exec('''...''') which allowed breakout via ''' in block->code. + { + PyObject* code_obj = Py_CompileString(block->code.c_str(), + block->metadata.name.c_str(), + Py_file_input); + if (code_obj) { + PyObject* result = PyEval_EvalCode(code_obj, nullptr, nullptr); + Py_XDECREF(result); + Py_DECREF(code_obj); + } else { + PyErr_Print(); + } + } // Handle member access calls if (!block->member_path.empty()) { @@ -3072,9 +3083,19 @@ void Interpreter::visit(ast::MemberExpr& node) { // Fallback: Legacy Python handling for blocks without executor if (block->metadata.language == "python") { #ifdef NAAB_HAS_PYTHON - // Execute the block code using exec() to handle multi-line properly - std::string exec_code = "exec('''" + block->code + "''')"; - PyRun_SimpleString(exec_code.c_str()); + // V-CD-001: Use compile()+exec() to avoid triple-quote injection + { + PyObject* code_obj = Py_CompileString(block->code.c_str(), + block->metadata.name.c_str(), + Py_file_input); + if (code_obj) { + PyObject* result = PyEval_EvalCode(code_obj, nullptr, nullptr); + Py_XDECREF(result); + Py_DECREF(code_obj); + } else { + PyErr_Print(); + } + } // Build member path std::string full_member_path = block->member_path.empty() diff --git a/src/interpreter/interpreter.cpp b/src/interpreter/interpreter.cpp index e6386f77..e7eb5871 100644 --- a/src/interpreter/interpreter.cpp +++ b/src/interpreter/interpreter.cpp @@ -2015,8 +2015,14 @@ void Interpreter::visit(ast::ForStmt& node) { auto it = dict.find("__is_range"); if (it != dict.end() && it->second.toBool()) { // This is a range - iterate from start to end - int start = dict.at("__range_start").toInt(); - int end_val = dict.at("__range_end").toInt(); + // V-RT-006: Guard range dict key access + auto rs_it = dict.find("__range_start"); + auto re_it = dict.find("__range_end"); + if (rs_it == dict.end() || re_it == dict.end()) { + throw std::runtime_error("Runtime error: malformed range object"); + } + int start = rs_it->second.toInt(); + int end_val = re_it->second.toInt(); bool inclusive = false; // Check for inclusive flag (..= operator) diff --git a/src/linter/llm_patterns.cpp b/src/linter/llm_patterns.cpp index 4ddb7481..f2e11514 100644 --- a/src/linter/llm_patterns.cpp +++ b/src/linter/llm_patterns.cpp @@ -102,30 +102,24 @@ std::vector LLMPatternDetector::detectPatterns(const ast::Program& p // ============================================================================ std::vector LLMPatternDetector::detectUnnecessaryTypeAnnotations(const ast::Program& program) { - // STUB: not implemented — requires AST traversal of VarDeclStmt type annotations + // V-LN-001: STUB — not yet implemented (requires AST traversal of VarDeclStmt type annotations) (void)program; - std::vector diagnostics; - return diagnostics; + return {}; // Returns empty: no false positives, but no detection either } // Removed: detectRedundantNullChecks, detectOveruseOfAny, detectIncorrectErrorHandling // — superseded by scanner checks (empty_catch, catch_and_ignore) or never dispatched std::vector LLMPatternDetector::detectPolyglotBlockMisuse(const ast::Program& program) { - // STUB: not implemented — requires AST traversal to detect: - // 1. Missing variable list: <> → <> - // 2. Wrong variable list syntax - // 3. Trying to use async in polyglot blocks + // V-LN-001: STUB — not yet implemented (requires AST traversal for variable list + async checks) (void)program; - std::vector diagnostics; - return diagnostics; + return {}; } std::vector LLMPatternDetector::detectModuleImportIssues(const ast::Program& program) { - // STUB: not implemented — requires AST traversal for JS/Python import syntax + // V-LN-001: STUB — not yet implemented (requires AST traversal for JS/Python import syntax) (void)program; - std::vector diagnostics; - return diagnostics; + return {}; } std::vector LLMPatternDetector::detectAsyncWithoutImplementation(const ast::Program& program) { @@ -177,17 +171,15 @@ std::vector LLMPatternDetector::detectIncorrectMainFunction(const as // Removed: detectUnquotedDictKeys — NAAb supports bare dict keys by design, not a bug std::vector LLMPatternDetector::detectJavaScriptIdioms(const ast::Program& program) { - // STUB: not implemented — requires AST traversal for const/var/===/ undefined patterns + // V-LN-001: STUB — not yet implemented (requires AST traversal for const/var/===/ undefined) (void)program; - std::vector diagnostics; - return diagnostics; + return {}; } std::vector LLMPatternDetector::detectPythonIdioms(const ast::Program& program) { - // STUB: not implemented — requires AST traversal for def/None/: type annotation patterns + // V-LN-001: STUB — not yet implemented (requires AST traversal for def/None/: annotations) (void)program; - std::vector diagnostics; - return diagnostics; + return {}; } // Removed: detectUnnecessaryComplexity — superseded by scanner (god_functions + deep_nesting) diff --git a/src/packages/package_manager.cpp b/src/packages/package_manager.cpp index ceda4c58..ec8fd4f2 100644 --- a/src/packages/package_manager.cpp +++ b/src/packages/package_manager.cpp @@ -381,11 +381,14 @@ PackageManager::ParsedSpec PackageManager::parseSpec(const std::string& spec) co } // V-PKG-001: Reject shell metacharacters in owner/repo/version + // V-PKG-002: Extended blocklist covers backslash, tilde, bang, hash, etc. for (const auto& s : {result.owner, result.repo, result.version}) { for (char c : s) { if (c == '\'' || c == '"' || c == ';' || c == '|' || c == '&' || c == '$' || c == '`' || c == '(' || c == ')' || c == '<' || - c == '>' || c == '\n' || c == '\r' || c == ' ' || c == '\t') { + c == '>' || c == '\n' || c == '\r' || c == ' ' || c == '\t' || + c == '\\' || c == '~' || c == '!' || c == '#' || c == '%' || + c == '^' || c == '[' || c == ']' || c == '{' || c == '}') { return ParsedSpec{}; // empty = invalid } } diff --git a/src/runtime/block_search_index.cpp b/src/runtime/block_search_index.cpp index a4e13421..24405879 100644 --- a/src/runtime/block_search_index.cpp +++ b/src/runtime/block_search_index.cpp @@ -148,8 +148,10 @@ class BlockSearchIndex::Impl { int buildIndex(const std::string& blocks_path) { // Building search index (silent) - // Begin transaction for faster inserts - sqlite3_exec(db_, "BEGIN TRANSACTION;", nullptr, nullptr, nullptr); + // V-SQ-001: Begin transaction with error check + if (sqlite3_exec(db_, "BEGIN TRANSACTION;", nullptr, nullptr, nullptr) != SQLITE_OK) { + fmt::print(stderr, "[block-search] Failed to begin transaction\n"); + } int indexed_count = 0; @@ -169,8 +171,10 @@ class BlockSearchIndex::Impl { return 0; } - // Commit transaction - sqlite3_exec(db_, "COMMIT;", nullptr, nullptr, nullptr); + // V-SQ-001: Commit transaction with error check + if (sqlite3_exec(db_, "COMMIT;", nullptr, nullptr, nullptr) != SQLITE_OK) { + fmt::print(stderr, "[block-search] Failed to commit transaction\n"); + } // Indexed blocks (silent) return indexed_count; @@ -433,21 +437,21 @@ std::vector BlockSearchIndex::search(const SearchQuery& query) { while (sqlite3_step(stmt) == SQLITE_ROW) { SearchResult result; - // Parse BlockMetadata - result.metadata.block_id = reinterpret_cast(sqlite3_column_text(stmt, 0)); - result.metadata.name = reinterpret_cast(sqlite3_column_text(stmt, 1)); - result.metadata.language = reinterpret_cast(sqlite3_column_text(stmt, 2)); - - const char* cat = reinterpret_cast(sqlite3_column_text(stmt, 3)); - result.metadata.category = cat ? cat : ""; - const char* subcat = reinterpret_cast(sqlite3_column_text(stmt, 4)); - result.metadata.subcategory = subcat ? subcat : ""; - - result.metadata.file_path = reinterpret_cast(sqlite3_column_text(stmt, 5)); - result.metadata.code_hash = reinterpret_cast(sqlite3_column_text(stmt, 6)); + // Parse BlockMetadata — V-SQ-002: NULL-guard all sqlite3_column_text results + auto col_text = [&](int col) -> std::string { + const char* s = reinterpret_cast(sqlite3_column_text(stmt, col)); + return s ? s : ""; + }; + result.metadata.block_id = col_text(0); + result.metadata.name = col_text(1); + result.metadata.language = col_text(2); + result.metadata.category = col_text(3); + result.metadata.subcategory = col_text(4); + result.metadata.file_path = col_text(5); + result.metadata.code_hash = col_text(6); result.metadata.token_count = sqlite3_column_int(stmt, 7); result.metadata.times_used = sqlite3_column_int(stmt, 8); - result.metadata.version = reinterpret_cast(sqlite3_column_text(stmt, 9)); + result.metadata.version = col_text(9); // AI discovery fields const char* desc = reinterpret_cast(sqlite3_column_text(stmt, 10)); @@ -518,22 +522,22 @@ std::optional BlockSearchIndex::getBlock(const std::string& block return std::nullopt; } - // Parse BlockMetadata + // Parse BlockMetadata — V-SQ-002: NULL-guard all sqlite3_column_text results + auto col_text = [&](int col) -> std::string { + const char* s = reinterpret_cast(sqlite3_column_text(stmt, col)); + return s ? s : ""; + }; BlockMetadata metadata; - metadata.block_id = reinterpret_cast(sqlite3_column_text(stmt, 0)); - metadata.name = reinterpret_cast(sqlite3_column_text(stmt, 1)); - metadata.language = reinterpret_cast(sqlite3_column_text(stmt, 2)); - - const char* cat = reinterpret_cast(sqlite3_column_text(stmt, 3)); - metadata.category = cat ? cat : ""; - const char* subcat = reinterpret_cast(sqlite3_column_text(stmt, 4)); - metadata.subcategory = subcat ? subcat : ""; - - metadata.file_path = reinterpret_cast(sqlite3_column_text(stmt, 5)); - metadata.code_hash = reinterpret_cast(sqlite3_column_text(stmt, 6)); + metadata.block_id = col_text(0); + metadata.name = col_text(1); + metadata.language = col_text(2); + metadata.category = col_text(3); + metadata.subcategory = col_text(4); + metadata.file_path = col_text(5); + metadata.code_hash = col_text(6); metadata.token_count = sqlite3_column_int(stmt, 7); metadata.times_used = sqlite3_column_int(stmt, 8); - metadata.version = reinterpret_cast(sqlite3_column_text(stmt, 9)); + metadata.version = col_text(9); // AI discovery fields const char* desc = reinterpret_cast(sqlite3_column_text(stmt, 10)); diff --git a/src/runtime/governance_reports.cpp b/src/runtime/governance_reports.cpp index 34dc84c7..b9a53cd8 100644 --- a/src/runtime/governance_reports.cpp +++ b/src/runtime/governance_reports.cpp @@ -310,6 +310,23 @@ void GovernanceEngine::emitRefusalAttestation( } // --- Hooks --- + +// V-HK-001: Shell-escape a value for safe interpolation into shell commands. +// Wraps in single quotes with internal quote escaping: val → 'val' +// Single quotes prevent all shell metacharacter interpretation. +static std::string shellEscape(const std::string& val) { + std::string escaped = "'"; + for (char c : val) { + if (c == '\'') { + escaped += "'\\''"; // end quote, literal quote, restart quote + } else { + escaped += c; + } + } + escaped += "'"; + return escaped; +} + void GovernanceEngine::fireHook(const HookConfig& hook, const std::unordered_map& vars) { if (hook.command.empty()) return; @@ -319,10 +336,12 @@ void GovernanceEngine::fireHook(const HookConfig& hook, std::string expanded = arg; for (const auto& [key, val] : vars) { std::string placeholder = "${" + key + "}"; + // V-HK-001: Shell-escape substituted values to prevent command injection + std::string safe_val = shellEscape(val); size_t pos = expanded.find(placeholder); while (pos != std::string::npos) { - expanded.replace(pos, placeholder.size(), val); - pos = expanded.find(placeholder, pos + val.size()); + expanded.replace(pos, placeholder.size(), safe_val); + pos = expanded.find(placeholder, pos + safe_val.size()); } } cmd += " " + expanded; diff --git a/src/runtime/python_c_executor.cpp b/src/runtime/python_c_executor.cpp index 5791b133..a2d7c2ad 100644 --- a/src/runtime/python_c_executor.cpp +++ b/src/runtime/python_c_executor.cpp @@ -236,6 +236,19 @@ interpreter::NaabVal PythonCExecutor::executeWithReturn(const std::string& code) << " raise ImportError('Import blocked by governance policy: ' + name)\n" << " return _naab_original_import(name, *args, **kwargs)\n" << "_naab_builtins.__import__ = _naab_safe_import\n" + // V-PY-001: Block importlib bypass and hide original import from user scope + << "try:\n" + << " import importlib as _naab_importlib\n" + << " _naab_orig_import_module = _naab_importlib.import_module\n" + << " def _naab_safe_import_module(name, package=None):\n" + << " _top = name.split('.')[0]\n" + << " if _top in _naab_blocked_modules:\n" + << " raise ImportError('Import blocked by governance policy: ' + name)\n" + << " return _naab_orig_import_module(name, package)\n" + << " _naab_importlib.import_module = _naab_safe_import_module\n" + << " del _naab_importlib, _naab_orig_import_module\n" + << "except Exception:\n" + << " pass\n" << "del _naab_builtins\n"; PyRun_SimpleString(hook.str().c_str()); } diff --git a/src/stdlib/agent_impl.cpp b/src/stdlib/agent_impl.cpp index e6e3f996..3808d129 100644 --- a/src/stdlib/agent_impl.cpp +++ b/src/stdlib/agent_impl.cpp @@ -914,13 +914,29 @@ static NaabVal agentSend(std::vector& args) { } // unlock before API call // Build messages array from handle history + new message + // V-AG-003: Guard messages key access json messages_json = json::array(); - auto& msg_list = handle["messages"].asList(); + auto msgs_it = handle.find("messages"); + if (msgs_it == handle.end() || !msgs_it->second.isList()) { + throw std::runtime_error( + "Agent error: handle missing 'messages' list\n\n" + " Help:\n - Use a handle returned by agent.create()\n"); + } + auto& msg_list = msgs_it->second.asList(); for (auto& msg : msg_list) { auto& msg_dict = msg.asDict(); + // V-AG-002: Guard message dict key access + auto role_it = msg_dict.find("role"); + auto content_it = msg_dict.find("content"); + if (role_it == msg_dict.end() || !role_it->second.isString() || + content_it == msg_dict.end() || !content_it->second.isString()) { + throw std::runtime_error( + "Agent error: each message must be a dict with 'role' and 'content' string keys\n\n" + " Help:\n - Use {role: \"user\", content: \"message\"}\n"); + } json msg_obj; - msg_obj["role"] = msg_dict["role"].asString(); - msg_obj["content"] = msg_dict["content"].asString(); + msg_obj["role"] = role_it->second.asString(); + msg_obj["content"] = content_it->second.asString(); messages_json.push_back(msg_obj); } // Append new user message @@ -2632,7 +2648,11 @@ static NaabVal agentRun(std::vector& args) { ge->setLastReturnTainted(true, "agent.run"); } - return response.asDict()["content"]; + // V-AG-004: Guard response content access + if (!response.isDict()) return NaabVal::makeNull(); + auto& resp_dict = response.asDictConst(); + auto c_it = resp_dict.find("content"); + return (c_it != resp_dict.end()) ? c_it->second : NaabVal::makeNull(); } // ============================================================================ @@ -2675,8 +2695,14 @@ static NaabVal agentUsage(std::vector& args) { " Help:\n - Use the handle returned by agent.create()\n"); } - // Read from server-side tracker (immune to handle mutation) - int handle_id = handle["id"].asInt(); + // V-AG-001: Read handle_id with type guard (same pattern as validateHandle) + auto id_it = handle.find("id"); + if (id_it == handle.end() || !id_it->second.isInt()) { + throw std::runtime_error( + "Agent error: Invalid agent handle\n\n" + " Help:\n - Use the handle returned by agent.create()\n"); + } + int handle_id = id_it->second.asInt(); std::lock_guard lock(s_agent_mutex); auto tracker_it = s_trackers.find(handle_id); if (tracker_it == s_trackers.end()) { diff --git a/src/stdlib/codegen_impl.cpp b/src/stdlib/codegen_impl.cpp index b42977c6..29ab78bf 100644 --- a/src/stdlib/codegen_impl.cpp +++ b/src/stdlib/codegen_impl.cpp @@ -504,6 +504,9 @@ interpreter::NaabVal CodegenModule::call( if (output.empty() && result.isString()) { output = result.asString(); } + } catch (const governance::GovernanceHardError&) { + security::ResourceLimiter::clearTimeout(); + throw; // V-CG-001: HARD blocks propagate without suppression } catch (const std::exception& e) { security::ResourceLimiter::clearTimeout(); exit_code = 1; diff --git a/src/vm/vm.cpp b/src/vm/vm.cpp index 32b83eee..bdffa528 100644 --- a/src/vm/vm.cpp +++ b/src/vm/vm.cpp @@ -2672,10 +2672,15 @@ interpreter::NaabVal VM::run() { } for (int i = 0; i < num_vars; i++) pop(); - // Extract info + // Extract info — V-VM-001: Guard polyglot info dict access auto& info = block_info.asDictConst(); - std::string language = info.at("language").toString(); - std::string raw_code = info.at("code").toString(); + auto lang_it = info.find("language"); + auto code_it = info.find("code"); + if (lang_it == info.end() || code_it == info.end()) { + runtimeError("Internal error: malformed polyglot block info"); + } + std::string language = lang_it->second.toString(); + std::string raw_code = code_it->second.toString(); std::string return_type; auto rt_it = info.find("return_type"); if (rt_it != info.end()) return_type = rt_it->second.toString();