From 578d36f868bfb4526531784bfdf3890af6658072 Mon Sep 17 00:00:00 2001 From: Thomas Dyar Date: Sun, 14 Jun 2026 11:57:02 -0400 Subject: [PATCH] feat(objectscript): add InterSystems IRIS ObjectScript language support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ObjectScript (InterSystems IRIS / Caché) as a supported language, covering the UDL class format (.cls), MAC/INT routines (.mac/.int/.rtn), include/macro files (.inc), and IRIS Studio Export XML. Definition extraction (extract_defs.c): Class, Method, ClassMethod, Property, Parameter, Index, Trigger (with body text), XData, Storage, and Query members as graph nodes; base classes from the Extends clause. Call dispatch resolution (extract_calls.c) — four ObjectScript patterns that are structurally invisible to text search: 1. ##class(Pkg.Class).Method() explicit cross-class call 2. ..Method() relative-dot self-call (the dominant intra-class form; large impact on CALLS completeness) 3. $$$Macro macro expansion via a per-project table built from .inc files 4. type inference from %New/%OpenId + declared return types Ensemble production topology (pass_ensemble_routing.c): EnsembleItem nodes per production component and ROUTES_TO edges resolved from ProductionDefinition XData, plus WorkMgr .Queue("##class(X).method") dispatch — all parsed statically at index time, no live IRIS required. Language detection (language.c): .mac/.int/.rtn map to ObjectScript routine directly; .cls (shared with Apex) and .inc (shared with BitBake) are disambiguated by content, defaulting to the existing language on any doubt so neither Apex nor BitBake detection regresses. The two new per-project tables (macros, return types) are threaded through a new internal cbm_extract_file_ex() so the public cbm_extract_file() signature is unchanged. The tree-sitter grammars are NOT vendored in this PR; they are a dependency to be vendored separately from https://github.com/intersystems/tree-sitter-objectscript (MIT). The build will not link until the grammar is present. Refs #462 Signed-off-by: Thomas Dyar --- Makefile.cbm | 3 + internal/cbm/cbm.c | 11 + internal/cbm/cbm.h | 53 +- internal/cbm/extract_calls.c | 208 +++++- internal/cbm/extract_defs.c | 325 +++++++- internal/cbm/extract_unified.c | 283 +++++++ internal/cbm/extract_unified.h | 16 + internal/cbm/grammar_objectscript_routine.c | 4 + internal/cbm/grammar_objectscript_udl.c | 4 + internal/cbm/iris_export_xml.c | 444 +++++++++++ internal/cbm/iris_export_xml.h | 19 + internal/cbm/lang_specs.c | 44 ++ internal/cbm/macro_table.c | 243 ++++++ internal/cbm/macro_table.h | 43 ++ src/discover/discover.c | 22 + src/discover/discover.h | 11 + src/discover/language.c | 94 ++- src/pipeline/pass_calls.c | 94 ++- src/pipeline/pass_definitions.c | 41 +- src/pipeline/pass_ensemble_routing.c | 645 ++++++++++++++++ src/pipeline/pass_ensemble_routing.h | 8 + src/pipeline/pass_parallel.c | 95 ++- src/pipeline/pipeline.c | 86 ++- src/pipeline/pipeline_internal.h | 8 + tests/test_extraction.c | 784 ++++++++++++++++++++ 25 files changed, 3564 insertions(+), 24 deletions(-) create mode 100644 internal/cbm/grammar_objectscript_routine.c create mode 100644 internal/cbm/grammar_objectscript_udl.c create mode 100644 internal/cbm/iris_export_xml.c create mode 100644 internal/cbm/iris_export_xml.h create mode 100644 internal/cbm/macro_table.c create mode 100644 internal/cbm/macro_table.h create mode 100644 src/pipeline/pass_ensemble_routing.c create mode 100644 src/pipeline/pass_ensemble_routing.h diff --git a/Makefile.cbm b/Makefile.cbm index 3ff50b81..729ef908 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -129,6 +129,8 @@ EXTRACTION_SRCS = \ $(CBM_DIR)/extract_k8s.c \ $(CBM_DIR)/helpers.c \ $(CBM_DIR)/lang_specs.c \ + $(CBM_DIR)/macro_table.c \ + $(CBM_DIR)/iris_export_xml.c \ $(CBM_DIR)/service_patterns.c # LSP resolvers (compiled as one unit via lsp_all.c) @@ -200,6 +202,7 @@ PIPELINE_SRCS = \ src/pipeline/pass_semantic_edges.c \ src/pipeline/pass_complexity.c \ src/pipeline/pass_cross_repo.c \ + src/pipeline/pass_ensemble_routing.c \ src/pipeline/artifact.c \ src/pipeline/pass_pkgmap.c diff --git a/internal/cbm/cbm.c b/internal/cbm/cbm.c index d611f186..42ae4398 100644 --- a/internal/cbm/cbm.c +++ b/internal/cbm/cbm.c @@ -497,6 +497,15 @@ static int count_params_from_signature(const char *sig) { CBMFileResult *cbm_extract_file(const char *source, int source_len, CBMLanguage language, const char *project, const char *rel_path, int64_t timeout_micros, const char **extra_defines, const char **include_paths) { + return cbm_extract_file_ex(source, source_len, language, project, rel_path, timeout_micros, + extra_defines, include_paths, NULL, NULL); +} + +CBMFileResult *cbm_extract_file_ex(const char *source, int source_len, CBMLanguage language, + const char *project, const char *rel_path, + int64_t timeout_micros, const char **extra_defines, + const char **include_paths, const CBMMacroTable *macro_table, + const CBMReturnTypeTable *return_type_table) { // Allocate result on heap (arena inside for all string data) enum { SINGLE = 1 }; CBMFileResult *result = (CBMFileResult *)calloc(SINGLE, sizeof(CBMFileResult)); @@ -580,6 +589,8 @@ CBMFileResult *cbm_extract_file(const char *source, int source_len, CBMLanguage .rel_path = rel_path, .module_qn = result->module_qn, .root = root, + .macro_table = macro_table, + .return_type_table = return_type_table, }; // Run extractors: defs + imports use separate walks (unique recursion patterns), diff --git a/internal/cbm/cbm.h b/internal/cbm/cbm.h index cc3607ee..9bbc60c7 100644 --- a/internal/cbm/cbm.h +++ b/internal/cbm/cbm.h @@ -164,12 +164,15 @@ typedef enum { CBM_LANG_APEX, CBM_LANG_SOQL, CBM_LANG_SOSL, - CBM_LANG_KUSTOMIZE, // kustomization.yaml — Kubernetes overlay tool - CBM_LANG_K8S, // Generic Kubernetes manifest (apiVersion: detected) - CBM_LANG_PINE, // Pine Script (TradingView indicator / strategy language) - CBM_LANG_QML, // Qt QML (Qt Modeling Language — declarative UI + embedded JS) - CBM_LANG_CFSCRIPT, // CFML script dialect (.cfc components — Lucee/ColdFusion) - CBM_LANG_CFML, // CFML tag dialect (.cfm templates — Lucee/ColdFusion) + CBM_LANG_KUSTOMIZE, // kustomization.yaml — Kubernetes overlay tool + CBM_LANG_K8S, // Generic Kubernetes manifest (apiVersion: detected) + CBM_LANG_PINE, // Pine Script (TradingView indicator / strategy language) + CBM_LANG_QML, // Qt QML (Qt Modeling Language — declarative UI + embedded JS) + CBM_LANG_CFSCRIPT, // CFML script dialect (.cfc components — Lucee/ColdFusion) + CBM_LANG_CFML, // CFML tag dialect (.cfm templates — Lucee/ColdFusion) + CBM_LANG_OBJECTSCRIPT_UDL, // InterSystems ObjectScript UDL (.cls class files) + CBM_LANG_OBJECTSCRIPT_ROUTINE, // InterSystems ObjectScript routine (.mac/.int/.rtn/.inc) + CBM_LANG_OBJECTSCRIPT_EXPORT, // InterSystems Studio Export XML () CBM_LANG_COUNT } CBMLanguage; @@ -485,6 +488,24 @@ typedef struct { int count; } CBMStringConstantMap; +// Forward declaration: ObjectScript macro table (defined in macro_table.h). +typedef struct CBMMacroTable CBMMacroTable; + +// Method-return-type table for ObjectScript variable type inference. Populated +// from definition nodes (method QN -> declared return type) so a later +// `Set x = obj.Method()` can resolve x's class. +#define CBM_RETURN_TYPE_TABLE_CAP 2048 + +typedef struct { + const char *method_qn; + const char *return_type; +} CBMReturnTypeEntry; + +typedef struct { + CBMReturnTypeEntry entries[CBM_RETURN_TYPE_TABLE_CAP]; + int count; +} CBMReturnTypeTable; + typedef struct { CBMArena *arena; CBMFileResult *result; @@ -495,9 +516,11 @@ typedef struct { const char *rel_path; const char *module_qn; TSNode root; - EFCache ef_cache; // enclosing function cache - const char *enclosing_class_qn; // for nested class QN computation - CBMStringConstantMap string_constants; // module-level NAME = "value" pairs + EFCache ef_cache; // enclosing function cache + const char *enclosing_class_qn; // for nested class QN computation + CBMStringConstantMap string_constants; // module-level NAME = "value" pairs + const CBMMacroTable *macro_table; // ObjectScript $$$macro table (NULL if none) + const CBMReturnTypeTable *return_type_table; // ObjectScript method return types (NULL if none) } CBMExtractCtx; // --- Public API --- @@ -524,6 +547,18 @@ CBMFileResult *cbm_extract_file(const char *source, int source_len, CBMLanguage const char **include_paths // NULL-terminated, or NULL ); +// Pipeline-internal variant of cbm_extract_file() carrying ObjectScript +// per-project tables (macro table + method-return-type table). The public +// cbm_extract_file() is a thin wrapper that passes NULL, NULL for both. +CBMFileResult *cbm_extract_file_ex( + const char *source, int source_len, CBMLanguage language, const char *project, + const char *rel_path, int64_t timeout_micros, + const char **extra_defines, // NULL-terminated, or NULL + const char **include_paths, // NULL-terminated, or NULL + const CBMMacroTable *macro_table, // ObjectScript macros, or NULL + const CBMReturnTypeTable *return_type_table // OS return types, or NULL +); + // Free all memory associated with a result. void cbm_free_result(CBMFileResult *result); diff --git a/internal/cbm/extract_calls.c b/internal/cbm/extract_calls.c index 98b924b8..f506af98 100644 --- a/internal/cbm/extract_calls.c +++ b/internal/cbm/extract_calls.c @@ -2,6 +2,7 @@ #include "arena.h" // CBMArena, cbm_arena_sprintf #include "helpers.h" #include "lang_specs.h" +#include "macro_table.h" #include "extract_unified.h" #include "foundation/constants.h" #include "extract_node_stack.h" @@ -592,6 +593,60 @@ static char *extract_callee_lang_specific(CBMArena *a, TSNode node, const char * if (lang == CBM_LANG_SWIFT) { return extract_swift_callee(a, node, source, nk); } + if (lang == CBM_LANG_OBJECTSCRIPT_UDL || lang == CBM_LANG_OBJECTSCRIPT_ROUTINE) { + // ##class(Pkg.Class).Method() -> "Pkg.Class.Method" + if (strcmp(nk, "class_method_call") == 0) { + TSNode class_ref = cbm_find_child_by_kind(node, "class_ref"); + TSNode method_name = cbm_find_child_by_kind(node, "method_name"); + if (!ts_node_is_null(class_ref) && !ts_node_is_null(method_name)) { + TSNode cname = cbm_find_child_by_kind(class_ref, "class_name"); + if (ts_node_is_null(cname)) { + return NULL; + } + char *cls = cbm_node_text(a, cname, source); + if (!cls || !cls[0]) { + return NULL; + } + TSNode mname_ident = ts_node_named_child_count(method_name) > 0 + ? ts_node_named_child(method_name, 0) + : (TSNode){0}; + if (ts_node_is_null(mname_ident)) { + return cls; + } + char *meth = cbm_node_text(a, mname_ident, source); + if (!meth || !meth[0]) { + return cls; + } + return cbm_arena_sprintf(a, "%s.%s", cls, meth); + } + return NULL; + } + // $$label^routine extrinsic / routine tag call -> the line_ref text + if (strcmp(nk, "routine_tag_call") == 0) { + TSNode line_ref = cbm_find_child_by_kind(node, "line_ref"); + if (!ts_node_is_null(line_ref)) { + return cbm_node_text(a, line_ref, source); + } + return NULL; + } + // $$$Macro(...) -> raw "$$$Name" callee (expanded later in handle_calls) + if (strcmp(nk, "macro") == 0) { + char *raw = cbm_node_text(a, node, source); + if (!raw || raw[0] != '$' || raw[1] != '$' || raw[2] != '$') { + return NULL; + } + char *name_start = raw + 3; + char *paren = strchr(name_start, '('); + if (paren) { + *paren = '\0'; + } + if (!name_start[0]) { + return NULL; + } + return cbm_arena_sprintf(a, "$$$%s", name_start); + } + return NULL; + } return extract_scripting_callee(a, node, source, lang, nk); } @@ -1120,6 +1175,72 @@ static void extract_jsx_component_ref(CBMExtractCtx *ctx, TSNode node, const cha } } +// ObjectScript: resolve `var.Method(...)` / `..Property.Method(...)` instance +// calls against the per-method variable type map. Returns arena "Class.Method" +// or NULL if the receiver's type is unknown. +static char *resolve_objectscript_instance_call(CBMArena *a, TSNode node, const char *source, + os_type_map_t *type_map) { + TSNode receiver = {0}; + TSNode oref = {0}; + const char *nk_first = NULL; + for (uint32_t i = 0; i < ts_node_named_child_count(node); i++) { + TSNode child = ts_node_named_child(node, i); + const char *ck = ts_node_type(child); + if (strcmp(ck, "lvn") == 0 || strcmp(ck, "variable") == 0) { + receiver = child; + } else if (strcmp(ck, "relative_dot_property") == 0) { + receiver = child; + nk_first = "relative_dot_property"; + } else if (strcmp(ck, "oref_method") == 0) { + oref = child; + } + } + if (ts_node_is_null(oref)) { + return NULL; + } + TSNode method_name_node = cbm_find_child_by_kind(oref, "method_name"); + if (ts_node_is_null(method_name_node)) { + return NULL; + } + TSNode mn_ident = ts_node_named_child_count(method_name_node) > 0 + ? ts_node_named_child(method_name_node, 0) + : (TSNode){0}; + if (ts_node_is_null(mn_ident)) { + return NULL; + } + char *method = cbm_node_text(a, mn_ident, source); + if (!method || !method[0]) { + return NULL; + } + if (ts_node_is_null(receiver)) { + return NULL; + } + char *var_text = NULL; + if (nk_first && strcmp(nk_first, "relative_dot_property") == 0) { + TSNode prop_name = cbm_find_child_by_kind(receiver, "member_name"); + if (!ts_node_is_null(prop_name)) { + char *pname = cbm_node_text(a, prop_name, source); + if (pname && pname[0]) { + var_text = cbm_arena_sprintf(a, "..%s", pname); + } + } + if (!var_text) { + var_text = cbm_node_text(a, receiver, source); + } + } else { + var_text = cbm_node_text(a, receiver, source); + } + if (!var_text || !var_text[0]) { + return NULL; + } + for (int i = 0; i < type_map->count; i++) { + if (strcasecmp(type_map->entries[i].var_name, var_text) == 0) { + return cbm_arena_sprintf(a, "%s.%s", type_map->entries[i].class_name, method); + } + } + return NULL; +} + void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, WalkState *state) { if (!spec->call_node_types || !spec->call_node_types[0]) { return; @@ -1127,6 +1248,56 @@ void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, Walk if (cbm_kind_in_set(node, spec->call_node_types)) { char *callee = extract_callee_name(ctx->arena, node, ctx->source, ctx->language); + + // ObjectScript: var.Method() / ..Property.Method() instance dispatch. + if (!callee && + (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) && + strcmp(ts_node_type(node), "instance_method_call") == 0) { + callee = resolve_objectscript_instance_call(ctx->arena, node, ctx->source, + &state->os_type_map); + } + + // ObjectScript: ..Method() oref self-call resolves against the enclosing class. + if (!callee && + (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) && + strcmp(ts_node_type(node), "relative_dot_method") == 0 && state->enclosing_class_qn && + state->enclosing_class_qn[0]) { + TSNode oref = cbm_find_child_by_kind(node, "oref_method"); + if (!ts_node_is_null(oref)) { + TSNode mname_node = cbm_find_child_by_kind(oref, "method_name"); + if (!ts_node_is_null(mname_node)) { + TSNode ident = ts_node_named_child_count(mname_node) > 0 + ? ts_node_named_child(mname_node, 0) + : (TSNode){0}; + if (!ts_node_is_null(ident)) { + char *mname = cbm_node_text(ctx->arena, ident, ctx->source); + if (mname && mname[0]) { + callee = cbm_arena_sprintf(ctx->arena, "%s.%s", + state->enclosing_class_qn, mname); + } + } + } + } + } + + // ObjectScript: expand a $$$Macro callee via the macro table. + if (callee && callee[0] == '$' && callee[1] == '$' && callee[2] == '$' && + ctx->macro_table) { + const char *macro_name = callee + 3; + const CBMMacroEntry *entry = cbm_macro_table_find(ctx->macro_table, macro_name); + if (entry) { + if (entry->resolved_callee) { + callee = cbm_arena_strdup(ctx->arena, entry->resolved_callee); + } else if (entry->expansion) { + callee = cbm_macro_extract_callee(ctx->arena, entry->expansion); + } else { + callee = NULL; + } + } + } + if (callee && callee[0] && !cbm_is_keyword(callee, ctx->language)) { CBMCall call = {0}; call.callee_name = callee; @@ -1136,12 +1307,47 @@ void handle_calls(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, Walk call.start_line = (int)ts_node_start_point(node).row + TS_LINE_OFFSET; TSNode args = ts_node_child_by_field_name(node, TS_FIELD("arguments")); + // ObjectScript stores args under oref_method/method_args, not the + // generic "arguments" field. + if (ts_node_is_null(args) && (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE)) { + TSNode oref = cbm_find_child_by_kind(node, "oref_method"); + if (!ts_node_is_null(oref)) { + args = cbm_find_child_by_kind(oref, "method_args"); + } + if (ts_node_is_null(args)) { + args = cbm_find_child_by_kind(node, "method_args"); + } + } if (!ts_node_is_null(args)) { call.first_string_arg = extract_url_or_topic_arg(ctx, args); if (call.first_string_arg && call.first_string_arg[0] == '/') { call.second_arg_name = extract_handler_arg(ctx, args); } - extract_call_args(ctx, args, &call); + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) { + for (uint32_t ai = 0; + ai < ts_node_named_child_count(args) && call.arg_count < CBM_MAX_CALL_ARGS; + ai++) { + TSNode achild = ts_node_named_child(args, ai); + const char *ack = ts_node_type(achild); + if (strcmp(ack, "bracket") == 0) { + continue; + } + if (strcmp(ack, "method_arg") != 0) { + continue; + } + CBMCallArg *ca = &call.args[call.arg_count]; + memset(ca, 0, sizeof(*ca)); + ca->index = call.arg_count; + ca->expr = cbm_node_text(ctx->arena, achild, ctx->source); + if (ca->expr && ca->expr[0]) { + call.arg_count++; + } + } + } else { + extract_call_args(ctx, args, &call); + } } cbm_calls_push(&ctx->result->calls, ctx->arena, call); diff --git a/internal/cbm/extract_defs.c b/internal/cbm/extract_defs.c index 913268d8..323de9d4 100644 --- a/internal/cbm/extract_defs.c +++ b/internal/cbm/extract_defs.c @@ -8,6 +8,7 @@ #include "semantic/ast_profile.h" #include "tree_sitter/api.h" // TSNode, ts_node_* #include // uint32_t +#include // snprintf (ObjectScript storage/trigger sidecars) #include #include @@ -92,7 +93,9 @@ static char *extract_body_ident_tokens(CBMExtractCtx *ctx, TSNode body) { if (nc == 0) { const char *k = ts_node_type(nd); if (strcmp(k, "identifier") == 0 || strcmp(k, "field_identifier") == 0 || - strcmp(k, "property_identifier") == 0) { + strcmp(k, "property_identifier") == 0 || + strcmp(k, "objectscript_identifier") == 0 || + strcmp(k, "identifier_segment_immediate") == 0) { uint32_t s = ts_node_start_byte(nd); int len = (int)(ts_node_end_byte(nd) - s); if (len > 0 && len < CBM_SZ_64 && s < (uint32_t)ctx->source_len) { @@ -693,6 +696,25 @@ static TSNode resolve_func_name(TSNode node, CBMLanguage lang) { return null_node; } + // ObjectScript routine tag is its own name node. + if (lang == CBM_LANG_OBJECTSCRIPT_ROUTINE && strcmp(kind, "tag") == 0) { + return node; + } + // ObjectScript method/classmethod: name lives under method_definition -> + // method_name -> first named child. + if (lang == CBM_LANG_OBJECTSCRIPT_UDL && + (strcmp(kind, "method") == 0 || strcmp(kind, "classmethod") == 0)) { + TSNode mdef = cbm_find_child_by_kind(node, "method_definition"); + if (!ts_node_is_null(mdef)) { + TSNode mname = cbm_find_child_by_kind(mdef, "method_name"); + if (!ts_node_is_null(mname) && ts_node_named_child_count(mname) > 0) { + return ts_node_named_child(mname, 0); + } + } + TSNode null_node = {0}; + return null_node; + } + TSNode name = func_name_node(node); if (lang == CBM_LANG_R && strcmp(kind, "function_definition") == 0) { @@ -1952,6 +1974,37 @@ static const char **extract_julia_base_classes(CBMArena *a, TSNode node, const c static const char **extract_base_classes(CBMArena *a, TSNode node, const char *source, CBMLanguage lang) { + // ObjectScript: `Class X Extends (A, B)` — bases are class_name children of + // the class_extends node. + if (lang == CBM_LANG_OBJECTSCRIPT_UDL) { + TSNode ext = cbm_find_child_by_kind(node, "class_extends"); + if (!ts_node_is_null(ext)) { + const char *bases[MAX_BASES]; + int base_count = 0; + uint32_t nc = ts_node_named_child_count(ext); + for (uint32_t i = 0; i < nc && base_count < MAX_BASES_MINUS_1; i++) { + TSNode ch = ts_node_named_child(ext, i); + if (strcmp(ts_node_type(ch), "class_name") == 0) { + char *base = cbm_node_text(a, ch, source); + if (base && base[0]) { + bases[base_count++] = base; + } + } + } + if (base_count > 0) { + const char **result = + (const char **)cbm_arena_alloc(a, (base_count + 1) * sizeof(const char *)); + if (result) { + for (int i = 0; i < base_count; i++) { + result[i] = bases[i]; + } + result[base_count] = NULL; + return result; + } + } + } + return NULL; + } // Languages whose heritage is not exposed via a tree-sitter field need // dedicated walkers; the generic field/keyword path mis-captures them. if (lang == CBM_LANG_TYPESCRIPT || lang == CBM_LANG_TSX) { @@ -2977,6 +3030,10 @@ static void extract_class_def(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec if (ts_node_is_null(name_node) && ctx->language == CBM_LANG_OBJC) { name_node = cbm_find_child_by_kind(node, "identifier"); } + // ObjectScript UDL: class name is a `class_name` child (no "name" field). + if (ts_node_is_null(name_node) && ctx->language == CBM_LANG_OBJECTSCRIPT_UDL) { + name_node = cbm_find_child_by_kind(node, "class_name"); + } // Swift and newer tree-sitter-kotlin: class/object name is a type_identifier // child (no "name" field). if (ts_node_is_null(name_node) && @@ -3473,6 +3530,22 @@ static TSNode resolve_method_name(TSNode child, CBMLanguage lang) { return cbm_find_child_by_kind(child, "identifier"); } + // ObjectScript method/classmethod: name under method_definition->method_name. + if (lang == CBM_LANG_OBJECTSCRIPT_UDL && + (strcmp(ck, "method") == 0 || strcmp(ck, "classmethod") == 0)) { + TSNode mdef = cbm_find_child_by_kind(child, "method_definition"); + if (!ts_node_is_null(mdef)) { + TSNode mname = cbm_find_child_by_kind(mdef, "method_name"); + if (!ts_node_is_null(mname) && ts_node_named_child_count(mname) > 0) { + return ts_node_named_child(mname, 0); + } + } + } + // ObjectScript query member. + if (lang == CBM_LANG_OBJECTSCRIPT_UDL && strcmp(ck, "query") == 0) { + return cbm_find_child_by_kind(child, "query_name"); + } + if (strcmp(ck, "arrow_function") == 0) { return resolve_arrow_func_name(child); } @@ -3506,6 +3579,11 @@ static void push_method_def(CBMExtractCtx *ctx, TSNode child, const char *class_ def.is_exported = cbm_is_exported(name, ctx->language); TSNode params = ts_node_child_by_field_name(child, TS_FIELD("parameters")); + // ObjectScript exposes the parameter list under a `parameter_list` field. + if (ts_node_is_null(params) && (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE)) { + params = ts_node_child_by_field_name(child, TS_FIELD("parameter_list")); + } if (!ts_node_is_null(params)) { def.signature = cbm_node_text(a, params, ctx->source); def.param_types = extract_param_types(a, params, ctx->source, ctx->language); @@ -3523,6 +3601,22 @@ static void push_method_def(CBMExtractCtx *ctx, TSNode child, const char *class_ } } + // ObjectScript: return type is method_definition -> return_type -> typename. + if (!def.return_type && (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE)) { + TSNode mdef = cbm_find_child_by_kind(child, "method_definition"); + if (ts_node_is_null(mdef)) { + mdef = child; + } + TSNode rt_node = cbm_find_child_by_kind(mdef, "return_type"); + if (!ts_node_is_null(rt_node)) { + TSNode tname = cbm_find_child_by_kind(rt_node, "typename"); + if (!ts_node_is_null(tname)) { + def.return_type = cbm_node_text(a, tname, ctx->source); + } + } + } + // C++: trailing return type (auto method() -> Type) if (def.return_type && strcmp(def.return_type, "auto") == 0 && (ctx->language == CBM_LANG_CPP || ctx->language == CBM_LANG_CUDA)) { @@ -3604,6 +3698,19 @@ static void extract_class_methods(CBMExtractCtx *ctx, TSNode class_node, const c method_node = def; } + // ObjectScript UDL wraps each method/classmethod in a class_statement. + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL && + strcmp(ts_node_type(child), "class_statement") == 0) { + if (ts_node_named_child_count(child) == 0) { + continue; + } + TSNode inner = ts_node_named_child(child, 0); + if (!cbm_kind_in_set(inner, spec->function_node_types)) { + continue; + } + method_node = inner; + } + if (!cbm_kind_in_set(method_node, spec->function_node_types)) { continue; } @@ -4762,6 +4869,14 @@ static void extract_class_fields(CBMExtractCtx *ctx, TSNode class_node, const ch uint32_t count = ts_node_named_child_count(body); for (uint32_t i = 0; i < count; i++) { TSNode child = ts_node_named_child(body, i); + + // ObjectScript UDL wraps each member in a class_statement node. + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL && + strcmp(ts_node_type(child), "class_statement") == 0 && + ts_node_named_child_count(child) > 0) { + child = ts_node_named_child(child, 0); + } + if (!cbm_kind_in_set(child, spec->field_node_types)) { continue; } @@ -4770,6 +4885,211 @@ static void extract_class_fields(CBMExtractCtx *ctx, TSNode class_node, const ch continue; } + // ObjectScript UDL member extraction. property/parameter -> Variable; + // index/trigger/xdata/storage/foreignkey -> labelled members with + // storage-XML and trigger-body sidecars. + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL) { + if (strcmp(ts_node_type(child), "property") == 0 || + strcmp(ts_node_type(child), "parameter") == 0) { + TSNode pname = cbm_find_child_by_kind(child, "property_name"); + if (ts_node_is_null(pname)) { + pname = cbm_find_child_by_kind(child, "parameter_name"); + } + if (!ts_node_is_null(pname) && ts_node_named_child_count(pname) > 0) { + TSNode ident = ts_node_named_child(pname, 0); + char *pn = cbm_node_text(a, ident, ctx->source); + if (pn && pn[0]) { + CBMDefinition pdef; + memset(&pdef, 0, sizeof(pdef)); + pdef.name = pn; + pdef.qualified_name = cbm_arena_sprintf(a, "%s.%s", class_qn, pn); + pdef.label = "Variable"; + pdef.file_path = ctx->rel_path; + pdef.parent_class = class_qn; + pdef.start_line = ts_node_start_point(child).row + TS_LINE_OFFSET; + pdef.end_line = ts_node_end_point(child).row + TS_LINE_OFFSET; + cbm_defs_push(&ctx->result->defs, a, pdef); + } + } + continue; + } + + const char *ntype = ts_node_type(child); + const char *name_child_kind = NULL; + const char *member_label = NULL; + if (strcmp(ntype, "index") == 0) { + name_child_kind = "index_name"; + member_label = "Index"; + } else if (strcmp(ntype, "trigger") == 0) { + name_child_kind = "trigger_name"; + member_label = "Trigger"; + } else if (strcmp(ntype, "xdata") == 0) { + name_child_kind = "xdata_name"; + member_label = "XData"; + } else if (strcmp(ntype, "storage") == 0) { + name_child_kind = "storage_name"; + member_label = "Storage"; + } else if (strcmp(ntype, "foreignkey") == 0) { + name_child_kind = "foreignkey_name"; + member_label = "Variable"; + } + + if (name_child_kind) { + TSNode nname = cbm_find_child_by_kind(child, name_child_kind); + if (!ts_node_is_null(nname)) { + char *mn = cbm_node_text(a, nname, ctx->source); + if (mn && mn[0]) { + CBMDefinition mdef; + memset(&mdef, 0, sizeof(mdef)); + mdef.name = mn; + mdef.qualified_name = cbm_arena_sprintf(a, "%s.%s", class_qn, mn); + mdef.label = member_label; + mdef.file_path = ctx->rel_path; + mdef.parent_class = class_qn; + mdef.start_line = ts_node_start_point(child).row + TS_LINE_OFFSET; + mdef.end_line = ts_node_end_point(child).row + TS_LINE_OFFSET; + + if (strcmp(member_label, "Storage") == 0) { + TSNode sbody = cbm_find_child_by_kind(child, "storage_body"); + if (!ts_node_is_null(sbody)) { + char *xml = cbm_node_text(a, sbody, ctx->source); + if (xml) { + char props[CBM_SZ_2K]; + int pos = snprintf(props, sizeof(props), "{"); + static const struct { + const char *tag; + const char *key; + } kv[] = {{"ExtentSize", "extent_size"}, + {"DataLocation", "data_global"}, + {"IdLocation", "id_global"}, + {"IndexLocation", "index_global"}, + {"StreamLocation", "stream_global"}, + {"Type", "storage_type"}, + {NULL, NULL}}; + bool first = true; + for (int ki = 0; kv[ki].tag; ki++) { + char open[64], close[64], buf[256]; + snprintf(open, sizeof(open), "<%s>", kv[ki].tag); + snprintf(close, sizeof(close), "", kv[ki].tag); + const char *s = strstr(xml, open); + if (!s) { + continue; + } + s += strlen(open); + const char *e = strstr(s, close); + if (!e) { + continue; + } + size_t vlen = (size_t)(e - s); + if (vlen >= sizeof(buf)) { + vlen = sizeof(buf) - 1; + } + memcpy(buf, s, vlen); + buf[vlen] = '\0'; + char esc[300]; + int ei = 0; + for (size_t ci = 0; ci < vlen && ei < (int)sizeof(esc) - 2; + ci++) { + if (buf[ci] == '"' || buf[ci] == '\\') { + esc[ei++] = '\\'; + } + esc[ei++] = buf[ci]; + } + esc[ei] = '\0'; + if (pos < 0 || pos >= (int)sizeof(props) - 1) { + break; // buffer full — stop appending + } + pos += snprintf(props + pos, sizeof(props) - (size_t)pos, + "%s\"%s\":\"%s\"", first ? "" : ",", + kv[ki].key, esc); + if (pos >= (int)sizeof(props)) { + pos = (int)sizeof(props) - 1; // truncated + } + first = false; + } + const char *sql_tag = ""; + const char *sql_end = ""; + char sql_map_buf[512]; + int smi = 0; + const char *sp = xml; + bool sql_first = true; + while ((sp = strstr(sp, sql_tag)) != NULL) { + sp += strlen(sql_tag); + const char *ep = strstr(sp, sql_end); + if (!ep) { + break; + } + size_t glen = (size_t)(ep - sp); + if (smi + (int)glen + 2 < (int)sizeof(sql_map_buf) - 1) { + if (!sql_first) { + sql_map_buf[smi++] = ' '; + } + memcpy(sql_map_buf + smi, sp, glen); + smi += (int)glen; + sql_first = false; + } + sp = ep + strlen(sql_end); + } + sql_map_buf[smi] = '\0'; + if (smi > 0 && pos >= 0 && pos < (int)sizeof(props) - 1) { + pos += snprintf(props + pos, sizeof(props) - (size_t)pos, + "%s\"sql_map_globals\":\"%s\"", + first ? "" : ",", sql_map_buf); + if (pos >= (int)sizeof(props)) { + pos = (int)sizeof(props) - 1; // truncated + } + first = false; + } + if (pos < (int)sizeof(props) - 1) { + props[pos++] = '}'; + props[pos] = '\0'; + } + if (!first) { + mdef.docstring = cbm_arena_strdup(a, props); + } + } + } + } + + if (strcmp(member_label, "Trigger") == 0) { + TSNode tbody = cbm_find_child_by_kind(child, "core_trigger"); + if (ts_node_is_null(tbody)) { + tbody = cbm_find_child_by_kind(child, "external_trigger"); + } + if (!ts_node_is_null(tbody)) { + mdef.body_tokens = extract_body_ident_tokens(ctx, tbody); + char *raw = cbm_node_text(a, tbody, ctx->source); + if (raw && raw[0]) { + char esc[CBM_SZ_512]; + int ei = 0; + for (int ci = 0; raw[ci] && ei < (int)sizeof(esc) - 3; ci++) { + if (raw[ci] == '"' || raw[ci] == '\\') { + esc[ei++] = '\\'; + } else if (raw[ci] == '\n') { + esc[ei++] = '\\'; + esc[ei++] = 'n'; + continue; + } else if (raw[ci] == '\r') { + continue; + } + esc[ei++] = raw[ci]; + } + esc[ei] = '\0'; + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), "{\"trigger_body\":\"%s\"}", + esc); + mdef.docstring = cbm_arena_strdup(a, props); + } + } + } + + cbm_defs_push(&ctx->result->defs, a, mdef); + } + } + continue; + } + } + /* Locate the field's "type" + name node. Two shapes: * - direct (Java/Go/Rust/C/C++): * field_declaration .type=identifier .declarator=variable_declarator(.name) @@ -4926,6 +5246,9 @@ static const char *compute_class_qn(CBMExtractCtx *ctx, TSNode node, const char if (ts_node_is_null(name_node) && ctx->language == CBM_LANG_SWIFT) { name_node = cbm_find_child_by_kind(node, "type_identifier"); } + if (ts_node_is_null(name_node) && ctx->language == CBM_LANG_OBJECTSCRIPT_UDL) { + name_node = cbm_find_child_by_kind(node, "class_name"); + } if (!ts_node_is_null(name_node)) { char *cname = cbm_node_text(ctx->arena, name_node, ctx->source); if (cname && cname[0]) { diff --git a/internal/cbm/extract_unified.c b/internal/cbm/extract_unified.c index 7274158f..b5a6aad8 100644 --- a/internal/cbm/extract_unified.c +++ b/internal/cbm/extract_unified.c @@ -10,6 +10,7 @@ enum { MAX_INFRA_BINDINGS = 8 }; #include // uint32_t, uint8_t #include +#include // strcasecmp (ObjectScript type inference) // --- Scope stack management --- @@ -104,6 +105,225 @@ static TSNode resolve_func_name_node(TSNode node) { return name_node; } +// --- ObjectScript variable type inference (instance_method_call resolution) --- + +// Insert or update var_name -> class_name. Silent on overflow. +static void os_type_map_add(os_type_map_t *map, const char *var_name, const char *class_name) { + if (map->count >= OS_TYPE_MAP_CAP || !var_name || !class_name) { + return; + } + for (int i = 0; i < map->count; i++) { + if (strcmp(map->entries[i].var_name, var_name) == 0) { + map->entries[i].class_name = class_name; + return; + } + } + map->entries[map->count].var_name = var_name; + map->entries[map->count].class_name = class_name; + map->count++; +} + +// Locate the class_method_call inside an RHS expression (peeking through a +// couple of common ObjectScript expression container node types). +static TSNode find_class_method_call(TSNode root, const char *end) { + (void)end; + if (strcmp(ts_node_type(root), "class_method_call") == 0) { + return root; + } + static const char *containers[] = {"expression", "expr_atom", NULL}; + for (const char **c = containers; *c; c++) { + TSNode inner = cbm_find_child_by_kind(root, *c); + if (!ts_node_is_null(inner)) { + TSNode hit = cbm_find_child_by_kind(inner, "class_method_call"); + if (!ts_node_is_null(hit)) { + return hit; + } + TSNode inner2 = cbm_find_child_by_kind(inner, "expr_atom"); + if (!ts_node_is_null(inner2)) { + hit = cbm_find_child_by_kind(inner2, "class_method_call"); + if (!ts_node_is_null(hit)) { + return hit; + } + } + } + } + return cbm_find_child_by_kind(root, "class_method_call"); +} + +// On a `Set var = ##class(X).%New()` (or %OpenId/%Open, or a method whose +// return type is known) map var -> X. On a class `Property`/`Relationship`, +// map `..PropName -> typename` (surviving method-scope resets). +static void handle_objectscript_type_map(CBMExtractCtx *ctx, TSNode node, WalkState *state) { + if (ctx->language != CBM_LANG_OBJECTSCRIPT_UDL && + ctx->language != CBM_LANG_OBJECTSCRIPT_ROUTINE) { + return; + } + + const char *nk = ts_node_type(node); + + if (strcmp(nk, "command_set") == 0) { + for (uint32_t i = 0; i < ts_node_named_child_count(node); i++) { + TSNode set_arg = ts_node_named_child(node, i); + const char *sak = ts_node_type(set_arg); + if (strcmp(sak, "set_argument") != 0 && strcmp(sak, "assignment") != 0) { + continue; + } + TSNode lhs = {0}; + TSNode rhs = {0}; + for (uint32_t j = 0; j < ts_node_named_child_count(set_arg); j++) { + TSNode achild = ts_node_named_child(set_arg, j); + const char *ak = ts_node_type(achild); + if (strcmp(ak, "set_target") == 0 || strcmp(ak, "lvn") == 0 || + strcmp(ak, "variable") == 0 || strcmp(ak, "glvn") == 0) { + lhs = achild; + } else if (strcmp(ak, "expression") == 0 || strcmp(ak, "expr_atom") == 0 || + strcmp(ak, "class_method_call") == 0) { + rhs = achild; + } + } + if (ts_node_is_null(lhs) || ts_node_is_null(rhs)) { + continue; + } + + TSNode cm_call = find_class_method_call(rhs, NULL); + if (ts_node_is_null(cm_call)) { + continue; + } + + TSNode method_name_node = cbm_find_child_by_kind(cm_call, "method_name"); + if (ts_node_is_null(method_name_node)) { + continue; + } + TSNode mn_ident = ts_node_named_child_count(method_name_node) > 0 + ? ts_node_named_child(method_name_node, 0) + : (TSNode){0}; + if (ts_node_is_null(mn_ident)) { + continue; + } + char *method_text = cbm_node_text(ctx->arena, mn_ident, ctx->source); + if (!method_text) { + continue; + } + + TSNode class_ref = cbm_find_child_by_kind(cm_call, "class_ref"); + if (ts_node_is_null(class_ref)) { + continue; + } + TSNode cname = cbm_find_child_by_kind(class_ref, "class_name"); + if (ts_node_is_null(cname)) { + continue; + } + char *cls = cbm_node_text(ctx->arena, cname, ctx->source); + if (!cls || !cls[0]) { + continue; + } + + bool is_constructor = + (strcasecmp(method_text, "%New") == 0 || strcasecmp(method_text, "%OpenId") == 0 || + strcasecmp(method_text, "%Open") == 0); + if (!is_constructor) { + if (!ctx->return_type_table) { + continue; + } + char *method_qn = cbm_arena_sprintf(ctx->arena, "%s.%s", cls, method_text); + for (int rti = 0; rti < ctx->return_type_table->count; rti++) { + if (strcasecmp(ctx->return_type_table->entries[rti].method_qn, method_qn) == + 0) { + cls = cbm_arena_strdup(ctx->arena, + ctx->return_type_table->entries[rti].return_type); + is_constructor = true; + break; + } + } + if (!is_constructor) { + continue; + } + } + + TSNode var_node = lhs; + TSNode inner = cbm_find_child_by_kind(lhs, "glvn"); + if (!ts_node_is_null(inner)) { + var_node = inner; + } + inner = cbm_find_child_by_kind(var_node, "lvn"); + if (!ts_node_is_null(inner)) { + var_node = inner; + } + char *var = cbm_node_text(ctx->arena, var_node, ctx->source); + if (!var || !var[0]) { + continue; + } + + os_type_map_add(&state->os_type_map, var, cls); + } + } + + if (strcmp(nk, "property") == 0 || strcmp(nk, "relationship") == 0) { + TSNode prop_name_node = cbm_find_child_by_kind(node, "property_name"); + if (ts_node_is_null(prop_name_node)) { + prop_name_node = cbm_find_child_by_kind(node, "relationship_name"); + } + TSNode ret_type = cbm_find_child_by_kind(node, "return_type"); + if (!ts_node_is_null(prop_name_node) && !ts_node_is_null(ret_type)) { + TSNode tname = cbm_find_child_by_kind(ret_type, "typename"); + if (!ts_node_is_null(tname)) { + char *pname = cbm_node_text(ctx->arena, prop_name_node, ctx->source); + char *ptype = cbm_node_text(ctx->arena, tname, ctx->source); + if (pname && pname[0] && ptype && ptype[0]) { + char *dot_name = cbm_arena_sprintf(ctx->arena, "..%s", pname); + os_type_map_add(&state->os_type_map, dot_name, ptype); + state->os_type_map.class_base_count = state->os_type_map.count; + } + } + } + } +} + +// Resolve the FQN of an ObjectScript class_definition node (via its class_name). +static const char *objectscript_get_class_name(CBMExtractCtx *ctx, TSNode node) { + for (uint32_t i = 0; i < ts_node_named_child_count(node); i++) { + TSNode child = ts_node_named_child(node, i); + if (strcmp(ts_node_type(child), "class_name") == 0) { + char *name = cbm_node_text(ctx->arena, child, ctx->source); + if (name && name[0]) { + return cbm_fqn_compute(ctx->arena, ctx->project, ctx->rel_path, name); + } + } + } + return NULL; +} + +// Resolve the QN of an ObjectScript method/classmethod node for scope tracking. +static const char *objectscript_get_method_qn(CBMExtractCtx *ctx, TSNode node, + const char *enclosing_class_qn) { + const char *nk = ts_node_type(node); + if (strcmp(nk, "method") != 0 && strcmp(nk, "classmethod") != 0) { + return NULL; + } + for (uint32_t i = 0; i < ts_node_named_child_count(node); i++) { + TSNode child = ts_node_named_child(node, i); + if (strcmp(ts_node_type(child), "method_definition") == 0) { + for (uint32_t j = 0; j < ts_node_named_child_count(child); j++) { + TSNode mchild = ts_node_named_child(child, j); + if (strcmp(ts_node_type(mchild), "method_name") == 0) { + if (ts_node_named_child_count(mchild) > 0) { + TSNode ident = ts_node_named_child(mchild, 0); + char *name = cbm_node_text(ctx->arena, ident, ctx->source); + if (name && name[0]) { + if (enclosing_class_qn) { + return cbm_arena_sprintf(ctx->arena, "%s.%s", enclosing_class_qn, + name); + } + return cbm_fqn_compute(ctx->arena, ctx->project, ctx->rel_path, name); + } + } + } + } + } + } + return NULL; +} + // Compute function QN for scope tracking (mirrors cbm_enclosing_func_qn logic). static const char *compute_func_qn(CBMExtractCtx *ctx, TSNode node, const CBMLangSpec *spec, WalkState *state) { @@ -111,6 +331,18 @@ static const char *compute_func_qn(CBMExtractCtx *ctx, TSNode node, const CBMLan if (ctx->language == CBM_LANG_WOLFRAM) { return compute_wolfram_func_qn(ctx, node); } + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL) { + return objectscript_get_method_qn(ctx, node, state->enclosing_class_qn); + } + if (ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) { + if (strcmp(ts_node_type(node), "tag") == 0) { + char *name = cbm_node_text(ctx->arena, node, ctx->source); + if (name && name[0]) { + return cbm_fqn_compute(ctx->arena, ctx->project, ctx->rel_path, name); + } + } + return NULL; + } TSNode name_node = resolve_func_name_node(node); if (ts_node_is_null(name_node)) { @@ -130,6 +362,9 @@ static const char *compute_func_qn(CBMExtractCtx *ctx, TSNode node, const CBMLan // Compute class QN for scope tracking. static const char *compute_class_qn(CBMExtractCtx *ctx, TSNode node) { + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL) { + return objectscript_get_class_name(ctx, node); + } TSNode name_node = ts_node_child_by_field_name(node, TS_FIELD("name")); /* Newer tree-sitter-kotlin: class/object name is a type_identifier child. */ if (ts_node_is_null(name_node) && ctx->language == CBM_LANG_KOTLIN) { @@ -793,11 +1028,58 @@ static void push_boundary_scopes(CBMExtractCtx *ctx, TSNode node, const CBMLangS const char *fqn = compute_func_qn(ctx, node, spec, state); if (fqn) { push_scope(state, SCOPE_FUNC, depth, fqn); + // ObjectScript: entering a method resets local var types (keeping + // class-level property types) and seeds the declared parameter types. + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) { + state->os_type_map.count = state->os_type_map.class_base_count; + TSNode mdef = cbm_find_child_by_kind(node, "method_definition"); + if (ts_node_is_null(mdef)) { + mdef = node; + } + TSNode args_node = cbm_find_child_by_kind(mdef, "arguments"); + if (!ts_node_is_null(args_node)) { + for (uint32_t ai = 0; ai < ts_node_named_child_count(args_node); ai++) { + TSNode arg = ts_node_named_child(args_node, ai); + if (strcmp(ts_node_type(arg), "argument") != 0) { + continue; + } + TSNode param_name_node = {0}; + TSNode type_node = {0}; + for (uint32_t pi = 0; pi < ts_node_named_child_count(arg); pi++) { + TSNode pchild = ts_node_named_child(arg, pi); + const char *pk = ts_node_type(pchild); + if (strcmp(pk, "method_arg") == 0) { + param_name_node = pchild; + } else if (strcmp(pk, "return_type") == 0) { + type_node = cbm_find_child_by_kind(pchild, "typename"); + } + } + if (!ts_node_is_null(param_name_node) && !ts_node_is_null(type_node)) { + TSNode lvn = cbm_find_child_by_kind(param_name_node, "expr_atom"); + if (ts_node_is_null(lvn)) { + lvn = param_name_node; + } + char *pname = cbm_node_text(ctx->arena, lvn, ctx->source); + char *ptype = cbm_node_text(ctx->arena, type_node, ctx->source); + if (pname && pname[0] && ptype && ptype[0]) { + os_type_map_add(&state->os_type_map, pname, ptype); + } + } + } + } + } } } else if (spec->class_node_types && cbm_kind_in_set(node, spec->class_node_types)) { const char *cqn = compute_class_qn(ctx, node); if (cqn) { push_scope(state, SCOPE_CLASS, depth, cqn); + // ObjectScript: a new class clears the type map entirely. + if (ctx->language == CBM_LANG_OBJECTSCRIPT_UDL || + ctx->language == CBM_LANG_OBJECTSCRIPT_ROUTINE) { + state->os_type_map.count = 0; + state->os_type_map.class_base_count = 0; + } } } else if (ctx->language == CBM_LANG_RUST && strcmp(ts_node_type(node), "impl_item") == 0) { TSNode type_node = ts_node_child_by_field_name(node, TS_FIELD("type")); @@ -848,6 +1130,7 @@ void cbm_extract_unified(CBMExtractCtx *ctx) { recompute_state(&state, ctx->module_qn); handle_string_constants(ctx, node, &state); + handle_objectscript_type_map(ctx, node, &state); handle_calls(ctx, node, spec, &state); handle_usages(ctx, node, spec, &state); handle_throws(ctx, node, spec, &state); diff --git a/internal/cbm/extract_unified.h b/internal/cbm/extract_unified.h index 6e2eb60f..ac9d6b64 100644 --- a/internal/cbm/extract_unified.h +++ b/internal/cbm/extract_unified.h @@ -14,6 +14,20 @@ #define MAX_SCOPES 64 +// ObjectScript type map: variable name → class name (for instance_method_call +// resolution). Stack-allocated, per-method scope. Overflow is silent (no crash). +#define OS_TYPE_MAP_CAP 64 +typedef struct { + const char *var_name; + const char *class_name; +} os_type_entry_t; + +typedef struct { + os_type_entry_t entries[OS_TYPE_MAP_CAP]; + int count; + int class_base_count; // entries [0,class_base_count) survive method-scope resets +} os_type_map_t; + // WalkState tracks scope context during the unified cursor walk. // Replaces parent-chain walks for enclosing_func_qn, inside_call, etc. typedef struct { @@ -30,6 +44,8 @@ typedef struct { uint8_t kind; } scopes[MAX_SCOPES]; int scope_top; + + os_type_map_t os_type_map; // ObjectScript variable → type mapping } WalkState; // Per-node handler prototypes. Each is called once per node during the diff --git a/internal/cbm/grammar_objectscript_routine.c b/internal/cbm/grammar_objectscript_routine.c new file mode 100644 index 00000000..9006eda3 --- /dev/null +++ b/internal/cbm/grammar_objectscript_routine.c @@ -0,0 +1,4 @@ +// Vendored tree-sitter grammar: objectscript_routine +// Each grammar compiled as separate unit (conflicting static symbols). +#include "vendored/grammars/objectscript_routine/parser.c" +#include "vendored/grammars/objectscript_routine/scanner.c" diff --git a/internal/cbm/grammar_objectscript_udl.c b/internal/cbm/grammar_objectscript_udl.c new file mode 100644 index 00000000..5aea58c4 --- /dev/null +++ b/internal/cbm/grammar_objectscript_udl.c @@ -0,0 +1,4 @@ +// Vendored tree-sitter grammar: objectscript_udl +// Each grammar compiled as separate unit (conflicting static symbols). +#include "vendored/grammars/objectscript_udl/parser.c" +#include "vendored/grammars/objectscript_udl/scanner.c" diff --git a/internal/cbm/iris_export_xml.c b/internal/cbm/iris_export_xml.c new file mode 100644 index 00000000..148d4a3a --- /dev/null +++ b/internal/cbm/iris_export_xml.c @@ -0,0 +1,444 @@ +#include "iris_export_xml.h" +#include "arena.h" +#include +#include +#include +#include + +#define EXPORT_MARKER "= sz) + vl = sz - 1; + memcpy(out, v, vl); + out[vl] = '\0'; + return; + } + } +} +static const char *elem_content(const char *p, const char *end, const char *tag, char *buf, + size_t bufsz) { + buf[0] = '\0'; + char open[MAX_NAME + 2]; + snprintf(open, sizeof(open), "<%s", tag); + const char *start = find_s(p, end, open); + if (!start) + return NULL; + const char *gt = find_s(start, end, ">"); + if (!gt) + return NULL; + if (is_self_closing(start, gt)) + return gt + 1; + const char *cs = gt + 1; + if (sw(cs, end, ""); + if (!ce) + return NULL; + size_t l = (size_t)(ce - cs); + if (l >= bufsz) + l = bufsz - 1; + memcpy(buf, cs, l); + buf[l] = '\0'; + return ce + 3; + } + char close[MAX_NAME + 4]; + snprintf(close, sizeof(close), "", tag); + const char *cl = find_s(cs, end, close); + if (!cl) + return NULL; + size_t l = (size_t)(cl - cs); + if (l >= bufsz) + l = bufsz - 1; + memcpy(buf, cs, l); + buf[l] = '\0'; + return cl + strlen(close); +} +static bool tag_is_one(const char *p, const char *end, const char *tag) { + char buf[8]; + return elem_content(p, end, tag, buf, sizeof(buf)) && strcmp(buf, "1") == 0; +} + +static void ub_init(UdlBuf *b, CBMArena *arena) { + b->buf = (char *)cbm_arena_alloc(arena, BUF_CAP); + b->pos = 0; + b->cap = BUF_CAP; + if (b->buf) + b->buf[0] = '\0'; +} +static void ub_app(UdlBuf *b, const char *s) { + if (!b->buf || !s) + return; + size_t n = strlen(s); + if (b->pos + (int)n + 1 >= b->cap) + return; + memcpy(b->buf + b->pos, s, n); + b->pos += (int)n; + b->buf[b->pos] = '\0'; +} + +static void emit_header(UdlBuf *b, const char *cs, const char *ce) { + char name[MAX_NAME]; + extract_attr(cs, ce, "name", name, sizeof(name)); + if (!name[0]) + return; + ub_app(b, "Class "); + ub_app(b, name); + char sup[MAX_NAME * 4] = ""; + elem_content(cs, ce, "Super", sup, sizeof(sup)); + if (sup[0]) { + if (strchr(sup, ',')) { + ub_app(b, " Extends ("); + ub_app(b, sup); + ub_app(b, ")"); + } else { + ub_app(b, " Extends "); + ub_app(b, sup); + } + } + char pragma[64] = ""; + if (tag_is_one(cs, ce, "Abstract")) + strncat(pragma, "Abstract,", sizeof(pragma) - strlen(pragma) - 1); + if (tag_is_one(cs, ce, "Final")) + strncat(pragma, "Final,", sizeof(pragma) - strlen(pragma) - 1); + if (pragma[0]) { + pragma[strlen(pragma) - 1] = '\0'; + ub_app(b, " [ "); + ub_app(b, pragma); + ub_app(b, " ]"); + } + ub_app(b, "\n{\n\n"); +} + +static void emit_method(UdlBuf *b, const char *ms, const char *me) { + char mn[MAX_NAME]; + extract_attr(ms, me, "name", mn, sizeof(mn)); + if (!mn[0]) + return; + bool cm = tag_is_one(ms, me, "ClassMethod"); + char formal[1024] = ""; + elem_content(ms, me, "FormalSpec", formal, sizeof(formal)); + char ret[MAX_NAME] = ""; + elem_content(ms, me, "ReturnType", ret, sizeof(ret)); + char desc[4096] = ""; + elem_content(ms, me, "Description", desc, sizeof(desc)); + if (desc[0]) { + ub_app(b, "/// "); + for (char *c = desc; *c; c++) { + if (*c == '\n') + ub_app(b, "\n/// "); + else { + char t[2] = {*c, 0}; + ub_app(b, t); + } + } + ub_app(b, "\n"); + } + ub_app(b, cm ? "ClassMethod " : "Method "); + ub_app(b, mn); + ub_app(b, "("); + ub_app(b, formal); + ub_app(b, ")"); + if (ret[0]) { + ub_app(b, " As "); + ub_app(b, ret); + } + ub_app(b, "\n{\n"); + char impl[1024 * 32] = ""; + elem_content(ms, me, "Implementation", impl, sizeof(impl)); + ub_app(b, impl); + ub_app(b, "}\n\n"); +} + +static void emit_property(UdlBuf *b, const char *ps, const char *pe) { + char pn[MAX_NAME]; + extract_attr(ps, pe, "name", pn, sizeof(pn)); + if (!pn[0]) + return; + char pt[MAX_NAME] = ""; + elem_content(ps, pe, "Type", pt, sizeof(pt)); + PropParam params[MAX_PARAMS]; + int np = 0; + const char *pp = ps; + while (pp < pe && np < MAX_PARAMS) { + const char *po = find_s(pp, pe, ""); + if (!pg) + break; + extract_attr(po, pg, "name", params[np].param_name, MAX_NAME); + extract_attr(po, pg, "value", params[np].param_value, MAX_NAME); + if (!params[np].param_value[0]) { + char db[MAX_NAME]; + const char *a = elem_content(po, pe, "Parameter", db, MAX_NAME); + if (a && db[0]) + strncpy(params[np].param_value, db, MAX_NAME - 1); + } + if (params[np].param_name[0]) + np++; + pp = pg + 1; + } + ub_app(b, "Property "); + ub_app(b, pn); + if (pt[0]) { + ub_app(b, " As "); + ub_app(b, pt); + } + if (np > 0) { + ub_app(b, "("); + for (int i = 0; i < np; i++) { + if (i > 0) + ub_app(b, ", "); + ub_app(b, params[i].param_name); + if (params[i].param_value[0]) { + ub_app(b, " = "); + ub_app(b, params[i].param_value); + } + } + ub_app(b, ")"); + } + ub_app(b, ";\n\n"); +} + +static void emit_parameter(UdlBuf *b, const char *ps, const char *pe) { + char pn[MAX_NAME]; + extract_attr(ps, pe, "name", pn, sizeof(pn)); + if (!pn[0]) + return; + char dv[MAX_NAME] = ""; + elem_content(ps, pe, "Default", dv, sizeof(dv)); + ub_app(b, "Parameter "); + ub_app(b, pn); + if (dv[0]) { + ub_app(b, " = \""); + ub_app(b, dv); + ub_app(b, "\""); + } + ub_app(b, ";\n\n"); +} + +static void emit_index(UdlBuf *b, const char *is_, const char *ie) { + char in_[MAX_NAME]; + extract_attr(is_, ie, "name", in_, sizeof(in_)); + if (!in_[0]) + return; + char props[MAX_NAME * 4] = ""; + elem_content(is_, ie, "Properties", props, sizeof(props)); + bool uniq = tag_is_one(is_, ie, "Unique"); + bool pkey = tag_is_one(is_, ie, "PrimaryKey"); + ub_app(b, "Index "); + ub_app(b, in_); + if (props[0]) { + ub_app(b, " On "); + ub_app(b, props); + } + if (uniq || pkey) { + ub_app(b, " [ "); + if (pkey) + ub_app(b, "PrimaryKey, "); + if (uniq) + ub_app(b, "Unique"); + ub_app(b, " ]"); + } + ub_app(b, ";\n\n"); +} + +static void emit_xdata(UdlBuf *b, const char *xs, const char *xe) { + char xn[MAX_NAME]; + extract_attr(xs, xe, "name", xn, sizeof(xn)); + if (!xn[0]) + return; + char data[1024 * 32] = ""; + elem_content(xs, xe, "Data", data, sizeof(data)); + ub_app(b, "XData "); + ub_app(b, xn); + ub_app(b, "\n{\n"); + ub_app(b, data); + ub_app(b, "\n}\n\n"); +} + +static char *transcode_class(CBMArena *arena, const char *cs, const char *ce) { + UdlBuf b; + ub_init(&b, arena); + if (!b.buf) + return NULL; + emit_header(&b, cs, ce); + const char *p = cs; + while (p < ce) { + p = skip_ws(p, ce); + if (p >= ce || *p != '<') { + if (p < ce) + p++; + continue; + } + if (sw(p, ce, "")) { + const char *gt = find_s(p, ce, ">"); + if (!gt) + break; + const char *me = find_s(gt + 1, ce, ""); + if (!me) { + p = gt + 1; + continue; + } + emit_method(&b, p, me + strlen("")); + p = me + strlen(""); + continue; + } + if (sw(p, ce, ""); + if (!gt) + break; + const char *pe = find_s(gt + 1, ce, ""); + if (!pe) { + p = gt + 1; + continue; + } + emit_property(&b, p, pe + strlen("")); + p = pe + strlen(""); + continue; + } + if (sw(p, ce, ""); + if (!gt) + break; + if (is_self_closing(p, gt)) { + p = gt + 1; + continue; + } + const char *pe = find_s(gt + 1, ce, ""); + if (!pe) { + p = gt + 1; + continue; + } + emit_parameter(&b, p, pe + strlen("")); + p = pe + strlen(""); + continue; + } + if (sw(p, ce, "")) { + const char *gt = find_s(p, ce, ">"); + if (!gt) + break; + const char *ie = find_s(gt + 1, ce, ""); + if (!ie) { + p = gt + 1; + continue; + } + emit_index(&b, p, ie + strlen("")); + p = ie + strlen(""); + continue; + } + if (sw(p, ce, "")) { + const char *gt = find_s(p, ce, ">"); + if (!gt) + break; + const char *xe = find_s(gt + 1, ce, ""); + if (!xe) { + p = gt + 1; + continue; + } + emit_xdata(&b, p, xe + strlen("")); + p = xe + strlen(""); + continue; + } + p = skip_tag(p, ce); + } + ub_app(&b, "}\n"); + return b.buf; +} + +char **cbm_iris_export_to_udl(CBMArena *arena, const char *xml, int xml_len, int *class_count) { + if (class_count) + *class_count = 0; + if (!arena || !xml || xml_len <= 0) + return NULL; + const char *end = xml + xml_len; + if (!find_s(xml, end, EXPORT_MARKER)) + return NULL; + char *results[MAX_CLASSES]; + int count = 0; + const char *p = xml; + while (p < end && count < MAX_CLASSES) { + const char *co = find_s(p, end, ""); + if (!gt) + break; + const char *cc = find_s(gt + 1, end, ""); + if (!cc) + break; + char *udl = transcode_class(arena, co, cc); + if (udl && udl[0]) + results[count++] = udl; + p = cc + strlen(""); + } + if (!count) + return NULL; + char **arr = (char **)cbm_arena_alloc(arena, (size_t)(count + 1) * sizeof(char *)); + if (!arr) + return NULL; + for (int i = 0; i < count; i++) + arr[i] = results[i]; + arr[count] = NULL; + if (class_count) + *class_count = count; + return arr; +} diff --git a/internal/cbm/iris_export_xml.h b/internal/cbm/iris_export_xml.h new file mode 100644 index 00000000..30c289ff --- /dev/null +++ b/internal/cbm/iris_export_xml.h @@ -0,0 +1,19 @@ +#pragma once +#include "arena.h" + +/* + * IRIS Studio Export XML transcoder. + * + * Converts XML files to equivalent UDL text so + * they can be fed to the existing ObjectScript UDL extraction pipeline. + * The XML-to-UDL mapping is 1:1; no new extraction logic is needed. + * + * One Export file may contain multiple blocks. Each produces a + * separate UDL string. The caller iterates the returned array and calls + * cbm_extract_file(..., CBM_LANG_OBJECTSCRIPT_UDL, ...) for each entry. + * + * Returns arena-allocated array of NUL-terminated UDL strings, or NULL + * if the file is not an Export file or parsing fails gracefully. + * *class_count is set to the number of classes found (0 on failure). + */ +char **cbm_iris_export_to_udl(CBMArena *arena, const char *xml, int xml_len, int *class_count); diff --git a/internal/cbm/lang_specs.c b/internal/cbm/lang_specs.c index 68d2afad..0af2b3e9 100644 --- a/internal/cbm/lang_specs.c +++ b/internal/cbm/lang_specs.c @@ -164,6 +164,8 @@ extern const TSLanguage *tree_sitter_apex(void); extern const TSLanguage *tree_sitter_soql(void); extern const TSLanguage *tree_sitter_sosl(void); extern const TSLanguage *tree_sitter_pine(void); +extern const TSLanguage *tree_sitter_objectscript_udl(void); +extern const TSLanguage *tree_sitter_objectscript_routine(void); // -- Empty sentinel -- static const char *empty_types[] = {NULL}; @@ -1562,6 +1564,25 @@ static const char *pine_var_types[] = {"variable_definition_statement", static const char *pine_branch_types[] = {"if_statement", "switch_statement", "for_statement", "for_in_statement", "while_statement", NULL}; static const char *pine_assign_types[] = {"reassignment_statement", NULL}; + +// InterSystems ObjectScript. Node names verified against +// intersystems/tree-sitter-objectscript grammar. +static const char *objectscript_udl_func_types[] = {"method", "classmethod", "query", NULL}; +static const char *objectscript_udl_class_types[] = {"class_definition", NULL}; +static const char *objectscript_udl_field_types[] = { + "property", "parameter", "index", "trigger", "xdata", "storage", "foreignkey", NULL}; +static const char *objectscript_udl_call_types[] = {"class_method_call", "instance_method_call", + "relative_dot_method", "macro", NULL}; +static const char *objectscript_udl_module_types[] = {"source_file", NULL}; +/* Branching nodes for cyclomatic complexity (verified against grammar node-types) */ +static const char *objectscript_udl_branch_types[] = { + "command_if", "command_for", "command_while", "elseif_block", "catch_block", NULL}; + +static const char *objectscript_routine_func_types[] = {"tag", NULL}; +static const char *objectscript_routine_call_types[] = {"extrinsic_function", "routine_tag_call", + NULL}; +static const char *objectscript_routine_module_types[] = {"source_file", NULL}; + // ==================== SPEC TABLE ==================== static const CBMLangSpec lang_specs[CBM_LANG_COUNT] = { @@ -2537,6 +2558,29 @@ static const CBMLangSpec lang_specs[CBM_LANG_COUNT] = { pine_branch_types, pine_var_types, pine_assign_types, empty_types, NULL, empty_types, NULL, NULL, tree_sitter_pine, NULL}, + // CBM_LANG_OBJECTSCRIPT_UDL — InterSystems ObjectScript class (.cls) UDL. + // intersystems/tree-sitter-objectscript. + [CBM_LANG_OBJECTSCRIPT_UDL] = {CBM_LANG_OBJECTSCRIPT_UDL, objectscript_udl_func_types, + objectscript_udl_class_types, objectscript_udl_field_types, + objectscript_udl_module_types, objectscript_udl_call_types, + empty_types, empty_types, objectscript_udl_branch_types, + empty_types, empty_types, empty_types, NULL, empty_types, NULL, + NULL, tree_sitter_objectscript_udl, NULL}, + + // CBM_LANG_OBJECTSCRIPT_ROUTINE — InterSystems ObjectScript routine (.mac/.int/.rtn/.inc). + [CBM_LANG_OBJECTSCRIPT_ROUTINE] = {CBM_LANG_OBJECTSCRIPT_ROUTINE, + objectscript_routine_func_types, empty_types, empty_types, + objectscript_routine_module_types, + objectscript_routine_call_types, empty_types, empty_types, + empty_types, empty_types, empty_types, empty_types, NULL, + empty_types, NULL, NULL, tree_sitter_objectscript_routine, + NULL}, + + // CBM_LANG_OBJECTSCRIPT_EXPORT — Studio Export XML. No grammar row: the + // pipeline transcodes Export XML to UDL (iris_export_xml.c) and re-extracts + // each class as CBM_LANG_OBJECTSCRIPT_UDL, so this language never reaches + // cbm_lang_spec()/cbm_ts_language() directly. Left as a zero spec. + }; _Static_assert(sizeof(lang_specs) / sizeof(lang_specs[0]) == CBM_LANG_COUNT, diff --git a/internal/cbm/macro_table.c b/internal/cbm/macro_table.c new file mode 100644 index 00000000..cca63679 --- /dev/null +++ b/internal/cbm/macro_table.c @@ -0,0 +1,243 @@ +#include "macro_table.h" +#include "arena.h" +#include +#include +#include + +static const struct { + const char *name; + int param_count; + const char *callee; +} SYSTEM_MACROS[] = {{"OK", 0, NULL}, + {"ISERR", 1, "%SYSTEM.Status.IsError"}, + {"ISOK", 1, "%SYSTEM.Status.IsOK"}, + {"GETERRORTEXT", 1, "%SYSTEM.Status.GetErrorText"}, + {"ADDSC", 2, "%SYSTEM.Status.AppendStatus"}, + {"ThrowStatus", 1, "%SYSTEM.Status.ThrowStatus"}, + {"ThrowOnError", 1, "%SYSTEM.Status.ThrowStatus"}, + {"ERROR", 2, "%SYSTEM.Status.Error"}, + {"NULLOREF", 0, NULL}, + {"LISTBUILD", -1, NULL}, + {"LISTGET", 2, NULL}, + {"LISTNEXT", 3, NULL}, + {"LISTLENGTH", 1, NULL}, + {"SORTBEGIN", 1, NULL}, + {"SORTEND", 0, NULL}, + {"AUDITSTART", 3, "%SYSTEM.Audit.Event"}, + {"logoutput", 1, NULL}, + {"objExists", 1, NULL}, + {"traceStatus", 1, NULL}, + {NULL, 0, NULL}}; + +void cbm_macro_table_init_system(CBMMacroTable *t) { + t->count = 0; + for (int i = 0; SYSTEM_MACROS[i].name; i++) { + if (t->count >= CBM_MACRO_TABLE_CAP) + break; + CBMMacroEntry *e = &t->entries[t->count++]; + e->name = SYSTEM_MACROS[i].name; + e->param_count = SYSTEM_MACROS[i].param_count; + e->expansion = NULL; + e->resolved_callee = SYSTEM_MACROS[i].callee; + for (int p = 0; p < CBM_MACRO_MAX_PARAMS; p++) + e->param_names[p] = NULL; + } +} + +void cbm_macro_table_add(CBMMacroTable *t, CBMArena *arena, const char *name, int param_count, + const char **param_names, const char *expansion, + const char *resolved_callee) { + if (t->count >= CBM_MACRO_TABLE_CAP || !name) + return; + for (int i = 0; i < t->count; i++) { + if (strcasecmp(t->entries[i].name, name) == 0) + return; + } + CBMMacroEntry *e = &t->entries[t->count++]; + e->name = cbm_arena_strdup(arena, name); + e->param_count = param_count; + e->expansion = expansion ? cbm_arena_strdup(arena, expansion) : NULL; + e->resolved_callee = resolved_callee ? cbm_arena_strdup(arena, resolved_callee) : NULL; + for (int p = 0; p < CBM_MACRO_MAX_PARAMS; p++) { + e->param_names[p] = + (param_names && p < param_count) ? cbm_arena_strdup(arena, param_names[p]) : NULL; + } +} + +const CBMMacroEntry *cbm_macro_table_find(const CBMMacroTable *t, const char *name) { + if (!t || !name) + return NULL; + for (int i = 0; i < t->count; i++) { + if (strcasecmp(t->entries[i].name, name) == 0) + return &t->entries[i]; + } + return NULL; +} + +void cbm_parse_inc_file(CBMMacroTable *t, CBMArena *arena, const char *content) { + if (!content) + return; + const char *line = content; + while (*line) { + const char *end = strchr(line, '\n'); + if (!end) + end = line + strlen(line); + + const char *p = line; + while (*p == ' ' || *p == '\t') + p++; + + if (strncmp(p, "#define", 7) == 0 && (p[7] == ' ' || p[7] == '\t')) { + p += 8; + while (*p == ' ' || *p == '\t') + p++; + + const char *name_start = p; + while (*p && *p != '(' && *p != ' ' && *p != '\t' && p < end) + p++; + if (p == name_start) + goto next_line; + + char name[256]; + int nlen = (int)(p - name_start); + if (nlen >= (int)sizeof(name)) + goto next_line; + memcpy(name, name_start, nlen); + name[nlen] = '\0'; + + int param_count = -1; + char param_names_buf[CBM_MACRO_MAX_PARAMS][64]; + const char *param_name_ptrs[CBM_MACRO_MAX_PARAMS] = {NULL}; + + if (*p == '(') { + param_count = 0; + p++; + while (*p && *p != ')' && p < end) { + while (*p == ' ' || *p == '\t') + p++; + if (*p == ')') + break; + const char *pn_start = p; + while (*p && *p != ',' && *p != ')' && p < end) + p++; + int plen = (int)(p - pn_start); + while (plen > 0 && (pn_start[plen - 1] == ' ' || pn_start[plen - 1] == '\t')) + plen--; + if (plen > 0 && param_count < CBM_MACRO_MAX_PARAMS) { + memcpy(param_names_buf[param_count], pn_start, plen < 63 ? plen : 63); + param_names_buf[param_count][plen < 63 ? plen : 63] = '\0'; + param_name_ptrs[param_count] = param_names_buf[param_count]; + param_count++; + } + if (*p == ',') + p++; + } + if (*p == ')') + p++; + } + + while (*p == ' ' || *p == '\t') + p++; + int explen = (int)(end - p); + while (explen > 0 && + (p[explen - 1] == '\r' || p[explen - 1] == ' ' || p[explen - 1] == '\t')) + explen--; + char *expansion = NULL; + if (explen > 0) { + expansion = cbm_arena_strndup(arena, p, explen); + } + + cbm_macro_table_add(t, arena, name, param_count, + param_count > 0 ? param_name_ptrs : NULL, expansion, NULL); + } + + next_line: + if (!*end) + break; + line = end + 1; + } +} + +char *cbm_macro_expand(CBMArena *arena, const CBMMacroEntry *entry, const char **args, + int arg_count) { + if (!entry || !entry->expansion) + return NULL; + const char *tmpl = entry->expansion; + char buf[1024]; + int out = 0; + const char *p = tmpl; + while (*p && out < (int)sizeof(buf) - 1) { + if (*p == '%') { + bool matched = false; + for (int i = 0; i < entry->param_count && i < CBM_MACRO_MAX_PARAMS; i++) { + if (!entry->param_names[i]) + continue; + int pnlen = (int)strlen(entry->param_names[i]); + if (strncasecmp(p, entry->param_names[i], pnlen) == 0) { + const char *arg = (args && i < arg_count) ? args[i] : ""; + int alen = (int)strlen(arg); + if (out + alen < (int)sizeof(buf) - 1) { + memcpy(buf + out, arg, alen); + out += alen; + } + p += pnlen; + matched = true; + break; + } + } + if (!matched) + buf[out++] = *p++; + } else { + buf[out++] = *p++; + } + } + buf[out] = '\0'; + return cbm_arena_strdup(arena, buf); +} + +char *cbm_macro_extract_callee(CBMArena *arena, const char *expansion) { + if (!expansion) + return NULL; + + const char *p = strstr(expansion, "##class("); + if (p) { + p += 8; + const char *cls_end = strchr(p, ')'); + if (!cls_end) + return NULL; + int clen = (int)(cls_end - p); + const char *dot = cls_end + 1; + if (*dot != '.') + return NULL; + dot++; + const char *method_start = dot; + const char *method_end = method_start; + while (*method_end && *method_end != '(' && *method_end != ' ') + method_end++; + int mlen = (int)(method_end - method_start); + if (clen <= 0 || mlen <= 0) + return NULL; + return cbm_arena_sprintf(arena, "%.*s.%.*s", clen, p, mlen, method_start); + } + + p = strstr(expansion, "$$"); + if (p && p[2] != '$') { + p += 2; + const char *tag_end = p; + while (*tag_end && *tag_end != '^' && *tag_end != '(' && *tag_end != ' ') + tag_end++; + if (*tag_end == '^') { + const char *rtn = tag_end + 1; + const char *rtn_end = rtn; + while (*rtn_end && *rtn_end != '(' && *rtn_end != ' ') + rtn_end++; + int tlen = (int)(tag_end - p); + int rlen = (int)(rtn_end - rtn); + if (tlen > 0 && rlen > 0) { + return cbm_arena_sprintf(arena, "%.*s^%.*s", tlen, p, rlen, rtn); + } + } + } + + return NULL; +} diff --git a/internal/cbm/macro_table.h b/internal/cbm/macro_table.h new file mode 100644 index 00000000..661475cb --- /dev/null +++ b/internal/cbm/macro_table.h @@ -0,0 +1,43 @@ +#pragma once +#include +#include "arena.h" + +#define CBM_MACRO_MAX_PARAMS 4 +#define CBM_MACRO_TABLE_CAP 4096 + +typedef struct { + const char *name; + int param_count; + const char *param_names[CBM_MACRO_MAX_PARAMS]; + const char *expansion; + const char *resolved_callee; +} CBMMacroEntry; + +typedef struct CBMMacroTable { + CBMMacroEntry entries[CBM_MACRO_TABLE_CAP]; + int count; +} CBMMacroTable; + +// Add an entry. Silently drops on overflow. +void cbm_macro_table_add(CBMMacroTable *t, CBMArena *arena, const char *name, int param_count, + const char **param_names, const char *expansion, + const char *resolved_callee); + +// Look up by name. Returns NULL if not found. +const CBMMacroEntry *cbm_macro_table_find(const CBMMacroTable *t, const char *name); + +// Parse a single .inc file content into the table (arena-allocated strings). +void cbm_parse_inc_file(CBMMacroTable *t, CBMArena *arena, const char *content); + +// Expand a macro call: substitute args into expansion text. +// Returns arena-allocated expanded text, or NULL if no expansion. +char *cbm_macro_expand(CBMArena *arena, const CBMMacroEntry *entry, const char **args, + int arg_count); + +// Extract a callee name from expanded text (looks for ##class(X).Method or $$Label^Routine). +// Returns arena-allocated "X.Method" or "Label^Routine", or NULL. +char *cbm_macro_extract_callee(CBMArena *arena, const char *expansion); + +// Allocate and populate a new table with the hardcoded system macros. +// Caller owns the table (stack or heap). +void cbm_macro_table_init_system(CBMMacroTable *t); diff --git a/src/discover/discover.c b/src/discover/discover.c index 314c00c5..a6a8bc49 100644 --- a/src/discover/discover.c +++ b/src/discover/discover.c @@ -337,6 +337,28 @@ static CBMLanguage detect_file_language(const char *entry_name, const char *abs_ if (dot && strcmp(dot, ".m") == 0) { lang = cbm_disambiguate_m(abs_path); } + /* Special: .cls is shared by ObjectScript UDL and Apex */ + if (dot && strcmp(dot, ".cls") == 0) { + lang = cbm_disambiguate_cls(abs_path); + } + /* Special: .inc is shared by BitBake and ObjectScript include files */ + if (dot && strcmp(dot, ".inc") == 0) { + lang = cbm_disambiguate_inc(abs_path); + } + /* Special: ObjectScript Studio Export XML () is + * detected by content; otherwise .xml stays XML. */ + if (lang == CBM_LANG_XML) { + FILE *xf = fopen(abs_path, "r"); + if (xf) { + char xbuf[CBM_SZ_256]; + size_t xn = fread(xbuf, SKIP_ONE, sizeof(xbuf) - SKIP_ONE, xf); + (void)fclose(xf); + xbuf[xn] = '\0'; + if (strstr(xbuf, "", + * otherwise CBM_LANG_APEX. On read failure, defaults to CBM_LANG_APEX. */ +CBMLanguage cbm_disambiguate_cls(const char *path); + +/* Disambiguate .inc files by reading first 4KB of content. + * Returns CBM_LANG_OBJECTSCRIPT_ROUTINE if it looks like an ObjectScript + * include (a "ROUTINE " header), otherwise CBM_LANG_BITBAKE. + * On read failure, defaults to CBM_LANG_BITBAKE. */ +CBMLanguage cbm_disambiguate_inc(const char *path); + /* ── Gitignore pattern matching ──────────────────────────────────── */ typedef struct cbm_gitignore cbm_gitignore_t; diff --git a/src/discover/language.c b/src/discover/language.c index a0254306..b4b80d1e 100644 --- a/src/discover/language.c +++ b/src/discover/language.c @@ -264,9 +264,17 @@ static const ext_entry_t EXT_TABLE[] = { {"WORKSPACE", CBM_LANG_STARLARK}, {"WORKSPACE.bazel", CBM_LANG_STARLARK}, - /* BitBake include fragments — `require/include foo.inc` target files. */ + /* BitBake include fragments — `require/include foo.inc` target files. + * NOTE: .inc is also used by ObjectScript include (macro) files; the + * ambiguity is resolved by content in cbm_disambiguate_inc(). */ {".inc", CBM_LANG_BITBAKE}, + /* InterSystems ObjectScript routines (.mac/.int/.rtn unambiguous; .cls is + * shared with Apex and resolved by content in cbm_disambiguate_cls()). */ + {".mac", CBM_LANG_OBJECTSCRIPT_ROUTINE}, + {".int", CBM_LANG_OBJECTSCRIPT_ROUTINE}, + {".rtn", CBM_LANG_OBJECTSCRIPT_ROUTINE}, + /* Vue */ {".vue", CBM_LANG_VUE}, @@ -834,6 +842,9 @@ static const char *LANG_NAMES[CBM_LANG_COUNT] = { [CBM_LANG_APEX] = "Apex", [CBM_LANG_SOQL] = "SOQL", [CBM_LANG_SOSL] = "SOSL", + [CBM_LANG_OBJECTSCRIPT_UDL] = "ObjectScript UDL", + [CBM_LANG_OBJECTSCRIPT_ROUTINE] = "ObjectScript Routine", + [CBM_LANG_OBJECTSCRIPT_EXPORT] = "ObjectScript Export XML", }; @@ -1025,3 +1036,84 @@ CBMLanguage cbm_disambiguate_m(const char *path) { return CBM_LANG_MATLAB; } + +/* Disambiguate .cls files: shared by InterSystems ObjectScript UDL and + * Salesforce Apex. ObjectScript class files begin with a line of the form + * "Class ...". Defaults to Apex on any doubt. */ +CBMLanguage cbm_disambiguate_cls(const char *path) { + if (!path) { + return CBM_LANG_APEX; + } + + FILE *f = fopen(path, "r"); + if (!f) { + return CBM_LANG_APEX; + } + + char buf[CBM_SZ_4K + SKIP_ONE]; + size_t n = fread(buf, SKIP_ONE, CBM_SZ_4K, f); + buf[n] = '\0'; + (void)fclose(f); + + const char *line = buf; + while (*line) { + if (strncmp(line, "Class ", SLEN("Class ")) == 0 && + isupper((unsigned char)line[SLEN("Class ")])) { + return CBM_LANG_OBJECTSCRIPT_UDL; + } + const char *nl = strchr(line, '\n'); + if (!nl) { + break; + } + line = nl + SKIP_ONE; + } + return CBM_LANG_APEX; +} + +/* Disambiguate .inc files: shared by BitBake include fragments and + * InterSystems ObjectScript include (macro) files. ObjectScript .inc files are + * predominantly macro definitions ("#define NAME ..." / "#def1arg NAME ..."); + * some also carry a "ROUTINE " header. The macro-preprocessor directives + * are the strongest signal because that is the primary content of an .inc file, + * whereas BitBake uses '#' only for "# comment" lines (always '#' + space). + * We therefore match ObjectScript preprocessor directives ('#' immediately + * followed by 'def'/';'), which BitBake never produces. Defaults to BitBake on + * any doubt (preserves existing behaviour). */ +CBMLanguage cbm_disambiguate_inc(const char *path) { + if (!path) { + return CBM_LANG_BITBAKE; + } + + FILE *f = fopen(path, "r"); + if (!f) { + return CBM_LANG_BITBAKE; + } + + char buf[CBM_SZ_4K + SKIP_ONE]; + size_t n = fread(buf, SKIP_ONE, CBM_SZ_4K, f); + buf[n] = '\0'; + (void)fclose(f); + + const char *line = buf; + while (*line) { + /* ObjectScript include header: a line beginning "ROUTINE ". */ + if (strncmp(line, "ROUTINE ", SLEN("ROUTINE ")) == 0 && + isupper((unsigned char)line[SLEN("ROUTINE ")])) { + return CBM_LANG_OBJECTSCRIPT_ROUTINE; + } + /* ObjectScript macro directives — the primary content of .inc files. + * "#define"/"#def1arg" (macro defs) and "#;" (line comment). BitBake's + * only '#' use is "# comment" (hash + space), so these never collide. */ + if (strncmp(line, "#define", SLEN("#define")) == 0 || + strncmp(line, "#def1arg", SLEN("#def1arg")) == 0 || + strncmp(line, "#;", SLEN("#;")) == 0) { + return CBM_LANG_OBJECTSCRIPT_ROUTINE; + } + const char *nl = strchr(line, '\n'); + if (!nl) { + break; + } + line = nl + SKIP_ONE; + } + return CBM_LANG_BITBAKE; +} diff --git a/src/pipeline/pass_calls.c b/src/pipeline/pass_calls.c index 15d691d3..ad9b7601 100644 --- a/src/pipeline/pass_calls.c +++ b/src/pipeline/pass_calls.c @@ -375,6 +375,85 @@ static int resolve_single_call(cbm_pipeline_ctx_t *ctx, CBMCall *call, return SKIP_ONE; } +/* ObjectScript: build a method-QN -> return-type table from the Method nodes + * already in the graph buffer (definitions pass ran first). Scalar return types + * (%String, %Integer, ...) are skipped since they cannot host method dispatch. + * Returns NULL when no usable entries exist. Caller owns the heap table. */ +static CBMReturnTypeTable *build_return_type_table(const cbm_gbuf_t *gbuf) { + if (!gbuf) { + return NULL; + } + const cbm_gbuf_node_t **method_nodes = NULL; + int method_count = 0; + if (cbm_gbuf_find_by_label(gbuf, "Method", &method_nodes, &method_count) != 0 || + method_count <= 0 || !method_nodes) { + return NULL; + } + + CBMReturnTypeTable *rtt = (CBMReturnTypeTable *)calloc(1, sizeof(CBMReturnTypeTable)); + if (!rtt) { + free((void *)method_nodes); + return NULL; + } + + static const char *scalar_types[] = {"%String", "%Integer", "%Float", "%Boolean", + "%Status", "%Numeric", "%Date", "%Time", + "%TimeStamp", "%Binary", NULL}; + + for (int i = 0; i < method_count && rtt->count < CBM_RETURN_TYPE_TABLE_CAP; i++) { + const cbm_gbuf_node_t *n = method_nodes[i]; + if (!n->qualified_name || !n->properties_json) { + continue; + } + + const char *p = strstr(n->properties_json, "\"return_type\":"); + if (!p) { + continue; + } + p += 14; /* strlen("\"return_type\":") */ + while (*p == ' ') { + p++; + } + if (*p != '"') { + continue; + } + p++; + const char *end = strchr(p, '"'); + if (!end) { + continue; + } + int rtlen = (int)(end - p); + if (rtlen <= 0 || rtlen > 255) { + continue; + } + + char rt_buf[256]; + memcpy(rt_buf, p, (size_t)rtlen); + rt_buf[rtlen] = '\0'; + + bool is_scalar = false; + for (int si = 0; scalar_types[si]; si++) { + if (strcmp(rt_buf, scalar_types[si]) == 0) { + is_scalar = true; + break; + } + } + if (is_scalar) { + continue; + } + + rtt->entries[rtt->count].method_qn = n->qualified_name; + rtt->entries[rtt->count].return_type = strdup(rt_buf); + rtt->count++; + } + free((void *)method_nodes); + if (rtt->count == 0) { + free(rtt); + return NULL; + } + return rtt; +} + static CBMFileResult *calls_get_or_extract(cbm_pipeline_ctx_t *ctx, int idx, const cbm_file_info_t *fi, bool *owned) { *owned = false; @@ -386,8 +465,9 @@ static CBMFileResult *calls_get_or_extract(cbm_pipeline_ctx_t *ctx, int idx, if (!src) { return NULL; } - CBMFileResult *r = cbm_extract_file(src, slen, fi->language, ctx->project_name, fi->rel_path, - CBM_EXTRACT_BUDGET, NULL, NULL); + CBMFileResult *r = cbm_extract_file_ex(src, slen, fi->language, ctx->project_name, fi->rel_path, + CBM_EXTRACT_BUDGET, NULL, NULL, ctx->macro_table, + ctx->return_type_table); free(src); if (r) { *owned = true; @@ -398,6 +478,16 @@ static CBMFileResult *calls_get_or_extract(cbm_pipeline_ctx_t *ctx, int idx, int cbm_pipeline_pass_calls(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count) { cbm_log_info("pass.start", "pass", "calls", "files", itoa_log(file_count)); + /* ObjectScript: build the method-return-type table from the definitions + * already in the graph buffer so `Set x = obj.Method()` can resolve x's + * class for subsequent x.Method() dispatch. NULL if no Method nodes. */ + if (!ctx->return_type_table) { + CBMReturnTypeTable *rtt = build_return_type_table(ctx->gbuf); + if (rtt) { + ctx->return_type_table = rtt; + } + } + int total_calls = 0; int resolved = 0; int unresolved = 0; diff --git a/src/pipeline/pass_definitions.c b/src/pipeline/pass_definitions.c index 676f1b16..2aedbda4 100644 --- a/src/pipeline/pass_definitions.c +++ b/src/pipeline/pass_definitions.c @@ -23,6 +23,8 @@ enum { PD_JSON_FIELD_OVERHEAD = 6 }; #include "foundation/log.h" #include "foundation/compat.h" #include "cbm.h" +#include "arena.h" +#include "iris_export_xml.h" #include "simhash/minhash.h" #include "semantic/ast_profile.h" @@ -489,11 +491,42 @@ int cbm_pipeline_pass_definitions(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t continue; } + /* ObjectScript Studio Export XML: transcode each to UDL and + * extract it as CBM_LANG_OBJECTSCRIPT_UDL. The XML→UDL mapping is 1:1, + * so the same UDL extractor handles the result. These files are not + * cached (their defs/edges are emitted directly here). */ + if (lang == CBM_LANG_OBJECTSCRIPT_EXPORT) { + CBMArena export_arena; + cbm_arena_init(&export_arena); + int class_count = 0; + char **udl_strings = + cbm_iris_export_to_udl(&export_arena, source, source_len, &class_count); + free(source); + for (int ci = 0; ci < class_count; ci++) { + CBMFileResult *xr = cbm_extract_file_ex( + udl_strings[ci], (int)strlen(udl_strings[ci]), CBM_LANG_OBJECTSCRIPT_UDL, + ctx->project_name, rel, CBM_EXTRACT_BUDGET, NULL, NULL, ctx->macro_table, NULL); + if (!xr) { + continue; + } + for (int d = 0; d < xr->defs.count; d++) { + process_def(ctx, &xr->defs.items[d], rel); + total_defs++; + } + total_calls += xr->calls.count; + total_imports += create_import_edges_for_file(ctx, xr, rel, NULL); + create_channel_edges_for_file(ctx, xr, rel); + create_env_configures_for_file(ctx, xr, rel); + cbm_free_result(xr); + } + cbm_arena_destroy(&export_arena); + continue; + } + /* Extract */ - CBMFileResult *result = - cbm_extract_file(source, source_len, lang, ctx->project_name, rel, CBM_EXTRACT_BUDGET, - NULL, NULL /* no extra defines or include paths */ - ); + CBMFileResult *result = cbm_extract_file_ex( + source, source_len, lang, ctx->project_name, rel, CBM_EXTRACT_BUDGET, NULL, + NULL /* no extra defines or include paths */, ctx->macro_table, NULL); free(source); if (!result) { diff --git a/src/pipeline/pass_ensemble_routing.c b/src/pipeline/pass_ensemble_routing.c new file mode 100644 index 00000000..a3ded5a5 --- /dev/null +++ b/src/pipeline/pass_ensemble_routing.c @@ -0,0 +1,645 @@ +#include "pipeline/pass_ensemble_routing.h" +#include "pipeline/pipeline_internal.h" +#include "graph_buffer/graph_buffer.h" +#include "foundation/log.h" +#include "foundation/compat.h" +#include "foundation/compat_fs.h" +#include "foundation/constants.h" +#include "foundation/str_util.h" + +#include +#include +#include +#include +#include + +#define CONF_LITERAL 0.95 +#define CONF_PROP 0.85 + +#define MAX_ITEMS 256 +#define MAX_SETTINGS 8 + +static const char *TOPOLOGY_SETTINGS[] = {"TargetConfigName", "PatientHost", "ConformanceOperation", + NULL}; + +static const char *ENTRY_POINTS[] = {"OnProcessInput", "OnMessage", "OnRequest", "OnTask", NULL}; + +typedef struct { + char setting_name[CBM_SZ_256]; + char value[CBM_SZ_256]; +} ens_setting_t; + +typedef struct { + char item_name[CBM_SZ_256]; + char class_name[CBM_SZ_256]; + bool enabled; + ens_setting_t settings[MAX_SETTINGS]; + int n_settings; +} ens_item_t; + +typedef struct { + char production_class[CBM_SZ_256]; + char file_path[CBM_SZ_512]; + ens_item_t items[MAX_ITEMS]; + int n_items; +} ens_prod_def_t; + +static void extract_xml_attr(const char *xml, int offset, const char *attr, char *out, int outsz) { + char needle[CBM_SZ_64]; + snprintf(needle, sizeof(needle), "%s=\"", attr); + const char *p = strstr(xml + offset, needle); + out[0] = '\0'; + if (!p) + return; + p += strlen(needle); + const char *e = strchr(p, '"'); + if (!e) + return; + int len = (int)(e - p); + if (len >= outsz) + len = outsz - 1; + memcpy(out, p, (size_t)len); + out[len] = '\0'; +} + +static bool is_topology_setting(const char *name) { + for (int i = 0; TOPOLOGY_SETTINGS[i]; i++) + if (strcmp(name, TOPOLOGY_SETTINGS[i]) == 0) + return true; + return false; +} + +/* True if `qn` equals `seg` or ends with "." — a segment-anchored match. + * Avoids the false positives of a bare strstr (e.g. "MyService" matching + * "NotMyService", or "A.B.Meth" matching the unrelated "X.B.Meth"). */ +static bool qn_ends_with_segment(const char *qn, const char *seg) { + if (!qn || !seg) + return false; + size_t lq = strlen(qn), ls = strlen(seg); + if (ls == 0 || ls > lq) + return false; + if (strcmp(qn + (lq - ls), seg) != 0) + return false; + return lq == ls || qn[lq - ls - 1] == '.'; +} + +static ens_prod_def_t *parse_production_xml(const char *xml, const char *class_qn, + const char *file_path) { + ens_prod_def_t *def = calloc(1, sizeof(ens_prod_def_t)); + if (!def) + return NULL; + snprintf(def->production_class, CBM_SZ_256, "%s", class_qn); + snprintf(def->file_path, sizeof(def->file_path), "%s", file_path ? file_path : ""); + + const char *p = xml; + while (*p && def->n_items < MAX_ITEMS) { + const char *item_start = strstr(p, "items[def->n_items]; + memset(item, 0, sizeof(*item)); + item->enabled = true; + + int off = (int)(item_start - xml); + extract_xml_attr(xml, off, "Name", item->item_name, CBM_SZ_256); + extract_xml_attr(xml, off, "ClassName", item->class_name, CBM_SZ_256); + char en[16]; + extract_xml_attr(xml, off, "Enabled", en, sizeof(en)); + if (en[0] && strcasecmp(en, "false") == 0) + item->enabled = false; + + if (!item->item_name[0] || !item->class_name[0]) { + p = item_start + 6; + continue; + } + + const char *item_end = strstr(item_start, ""); + if (!item_end) + item_end = item_start + strlen(item_start); + + const char *sp = item_start; + while (sp < item_end && item->n_settings < MAX_SETTINGS) { + const char *set = strstr(sp, "= item_end) + break; + int soff = (int)(set - xml); + char tgt[64], sname[CBM_SZ_256]; + extract_xml_attr(xml, soff, "Target", tgt, sizeof(tgt)); + extract_xml_attr(xml, soff, "Name", sname, CBM_SZ_256); + if (strcmp(tgt, "Host") == 0 && is_topology_setting(sname)) { + const char *vs = strchr(set + 9, '>'); + if (vs) { + vs++; + const char *ve = strstr(vs, ""); + if (ve && ve < item_end) { + int vlen = (int)(ve - vs); + if (vlen > 0 && vlen < CBM_SZ_256) { + ens_setting_t *s = &item->settings[item->n_settings++]; + snprintf(s->setting_name, CBM_SZ_256, "%s", sname); + memcpy(s->value, vs, (size_t)vlen); + s->value[vlen] = '\0'; + } + } + } + } + sp = set + 9; + } + def->n_items++; + p = item_end + 7; + } + return def; +} + +static char *read_file(const char *full_path) { + FILE *f = fopen(full_path, "rb"); + if (!f) + return NULL; + fseek(f, 0, SEEK_END); + long sz = ftell(f); + fseek(f, 0, SEEK_SET); + if (sz <= 0 || sz > 8 * 1024 * 1024) { + fclose(f); + return NULL; + } + char *buf = malloc((size_t)sz + 1); + if (!buf) { + fclose(f); + return NULL; + } + fread(buf, 1, (size_t)sz, f); + buf[sz] = '\0'; + fclose(f); + return buf; +} + +static const char *jstr(const char *json, const char *key, char *buf, int sz) { + if (!json || !key) + return NULL; + char needle[CBM_SZ_64]; + snprintf(needle, sizeof(needle), "\"%s\":\"", key); + const char *s = strstr(json, needle); + if (!s) + return NULL; + s += strlen(needle); + const char *e = strchr(s, '"'); + if (!e) + return NULL; + int len = (int)(e - s); + if (len >= sz) + len = sz - 1; + memcpy(buf, s, (size_t)len); + buf[len] = '\0'; + return buf; +} + +static const ens_item_t *find_item(const ens_prod_def_t *def, const char *name) { + for (int i = 0; i < def->n_items; i++) + if (strcmp(def->items[i].item_name, name) == 0) + return &def->items[i]; + return NULL; +} + +static int64_t find_entry_point(cbm_pipeline_ctx_t *ctx, const char *class_name) { + for (int ei = 0; ENTRY_POINTS[ei]; ei++) { + char suffix[CBM_SZ_512]; + snprintf(suffix, sizeof(suffix), "%s.%s", class_name, ENTRY_POINTS[ei]); + + const cbm_gbuf_node_t **nodes = NULL; + int count = 0; + cbm_gbuf_find_by_name(ctx->gbuf, ENTRY_POINTS[ei], (const cbm_gbuf_node_t ***)&nodes, + &count); + for (int ni = 0; ni < count; ni++) { + if (nodes[ni]->qualified_name && + qn_ends_with_segment(nodes[ni]->qualified_name, suffix)) + return nodes[ni]->id; + } + } + return 0; +} + +static void emit_route(cbm_pipeline_ctx_t *ctx, int64_t src_id, const ens_item_t *item, + const char *via, double confidence, const char *production_class) { + int64_t tgt_id = find_entry_point(ctx, item->class_name); + if (!tgt_id) { + char cls_qn[CBM_SZ_512]; + snprintf(cls_qn, sizeof(cls_qn), "%s.%s", production_class, item->item_name); + const cbm_gbuf_node_t *cls = cbm_gbuf_find_by_qn(ctx->gbuf, cls_qn); + if (!cls) + return; + tgt_id = cls->id; + confidence -= 0.10; + } + char conf_str[32]; + snprintf(conf_str, sizeof(conf_str), "%.2f", confidence); + char props[CBM_SZ_512]; + snprintf(props, sizeof(props), + "{\"via\":\"%s\",\"production\":\"%s\",\"item_name\":\"%s\"," + "\"confidence\":%s,\"enabled\":%s}", + via, production_class, item->item_name, conf_str, item->enabled ? "true" : "false"); + cbm_gbuf_insert_edge(ctx->gbuf, src_id, tgt_id, "ROUTES_TO", props); +} + +/* Scan a .cls source file for SendRequestSync call targets and + * InitialExpression values for a given method/property name. */ +static void scan_source_for_send_targets(const char *source, const char *method_name, + char *literal_out, int lit_sz, char *prop_name_out, + int prop_sz) { + literal_out[0] = '\0'; + prop_name_out[0] = '\0'; + if (!source || !method_name) + return; + + const char *p = source; + while ((p = strstr(p, "SendRequestSync")) != NULL) { + p += 15; + while (*p == ' ' || *p == '\t') + p++; + if (*p != '(') + continue; + p++; + while (*p == ' ' || *p == '\t') + p++; + + if (*p == '"') { + const char *ns = p + 1, *ne = strchr(ns, '"'); + if (ne) { + int len = (int)(ne - ns); + if (len > 0 && len < lit_sz) { + memcpy(literal_out, ns, (size_t)len); + literal_out[len] = '\0'; + return; + } + } + } else if (p[0] == '.' && p[1] == '.') { + const char *ps = p + 2; + int plen = 0; + while (ps[plen] && (isalnum((unsigned char)ps[plen]) || ps[plen] == '_')) + plen++; + if (plen > 0 && plen < prop_sz) { + memcpy(prop_name_out, ps, (size_t)plen); + prop_name_out[plen] = '\0'; + return; + } + } + } + (void)method_name; +} + +/* Find InitialExpression value for a Property in the source. */ +static void scan_initial_expression(const char *source, const char *prop_name, char *out, + int outsz) { + out[0] = '\0'; + if (!source || !prop_name) + return; + char needle[CBM_SZ_256]; + snprintf(needle, sizeof(needle), "Property %s ", prop_name); + const char *p = strstr(source, needle); + if (!p) { + snprintf(needle, sizeof(needle), "Property %s[", prop_name); + p = strstr(source, needle); + } + if (!p) + return; + const char *ie = strstr(p, "InitialExpression ="); + if (!ie) + return; + ie = strchr(ie, '"'); + if (!ie) + return; + ie++; + const char *ie_end = strchr(ie, '"'); + if (!ie_end) + return; + int len = (int)(ie_end - ie); + if (len >= outsz) + len = outsz - 1; + memcpy(out, ie, (size_t)len); + out[len] = '\0'; +} + +static void collect_prod_defs(cbm_pipeline_ctx_t *ctx, ens_prod_def_t ***defs_out, int *count_out) { + const cbm_gbuf_node_t **xdata_nodes = NULL; + int xdata_count = 0; + cbm_gbuf_find_by_label(ctx->gbuf, "XData", (const cbm_gbuf_node_t ***)&xdata_nodes, + &xdata_count); + + ens_prod_def_t **defs = NULL; + int n = 0; + + for (int xi = 0; xi < xdata_count; xi++) { + const cbm_gbuf_node_t *xd = xdata_nodes[xi]; + if (!xd->name || strcmp(xd->name, "ProductionDefinition") != 0) + continue; + if (!xd->file_path || !ctx->repo_path) + continue; + + char full_path[CBM_SZ_1K]; + snprintf(full_path, sizeof(full_path), "%s/%s", ctx->repo_path, xd->file_path); + + char *source = read_file(full_path); + if (!source) + continue; + + char class_qn[CBM_SZ_256]; + class_qn[0] = '\0'; + if (xd->qualified_name) { + const char *dot = strrchr(xd->qualified_name, '.'); + if (dot) { + int len = (int)(dot - xd->qualified_name); + if (len > 0 && len < CBM_SZ_256) { + memcpy(class_qn, xd->qualified_name, (size_t)len); + class_qn[len] = '\0'; + } + } + } + if (!class_qn[0]) { + free(source); + continue; + } + + const char *xml_start = strstr(source, "file_path); + free(source); + if (!def) + continue; + + char n_items_buf[32]; + snprintf(n_items_buf, sizeof(n_items_buf), "%d", def->n_items); + cbm_log_info("ensemble_routing.parse", "class", class_qn, "items", n_items_buf); + + for (int i = 0; i < def->n_items; i++) { + ens_item_t *item = &def->items[i]; + char item_qn[CBM_SZ_512]; + snprintf(item_qn, sizeof(item_qn), "%s.%s", class_qn, item->item_name); + char iprops[CBM_SZ_512]; + snprintf(iprops, sizeof(iprops), + "{\"class_name\":\"%s\",\"enabled\":%s,\"production\":\"%s\"}", + item->class_name, item->enabled ? "true" : "false", class_qn); + cbm_gbuf_upsert_node(ctx->gbuf, "EnsembleItem", item->item_name, item_qn, xd->file_path, + xd->start_line, 0, iprops); + } + + ens_prod_def_t **tmp = realloc(defs, (size_t)(n + 1) * sizeof(ens_prod_def_t *)); + if (!tmp) { + free(def); + continue; + } + defs = tmp; + defs[n++] = def; + } + *defs_out = defs; + *count_out = n; +} + +static bool method_belongs_to_production(const cbm_gbuf_node_t *method, const ens_prod_def_t *def) { + if (!method->properties_json) + return false; + char parent_class[CBM_SZ_512]; + if (!jstr(method->properties_json, "parent_class", parent_class, sizeof(parent_class))) + return false; + for (int i = 0; i < def->n_items; i++) { + /* parent_class may carry a project/package prefix; match the class as a + * trailing dotted segment rather than a bare substring to avoid e.g. + * "Service" matching "MyDataService". */ + if (qn_ends_with_segment(parent_class, def->items[i].class_name)) + return true; + } + return false; +} + +static void resolve_method_routes(cbm_pipeline_ctx_t *ctx, const cbm_gbuf_node_t *method, + const char *source, const ens_prod_def_t *def) { + if (!method->properties_json) + return; + if (!method_belongs_to_production(method, def)) + return; + if (!strstr(source, "SendRequestSync")) + return; + + char literal[CBM_SZ_256], prop_name[CBM_SZ_256]; + scan_source_for_send_targets(source, method->name, literal, sizeof(literal), prop_name, + sizeof(prop_name)); + + if (literal[0]) { + const ens_item_t *item = find_item(def, literal); + if (item) + emit_route(ctx, method->id, item, "literal", CONF_LITERAL, def->production_class); + } else if (prop_name[0]) { + char init_expr[CBM_SZ_256]; + scan_initial_expression(source, prop_name, init_expr, sizeof(init_expr)); + if (init_expr[0]) { + const ens_item_t *item = find_item(def, init_expr); + if (item) + emit_route(ctx, method->id, item, prop_name, CONF_PROP, def->production_class); + } + } +} + +#define CONF_WORKMGR 0.90 + +/* Scan source for WorkMgr parallel dispatch: .Queue("##class(X).method", ...) + * Pattern: any receiver .Queue() call where first arg is "##class(Cls).Meth" */ +static void scan_workmgr_dispatch(cbm_pipeline_ctx_t *ctx, const cbm_gbuf_node_t *method, + const char *source) { + if (!source) + return; + const char *p = source; + const char *needle = ".Queue(\"##class("; + while ((p = strstr(p, needle)) != NULL) { + p += strlen(needle); + /* Extract class name up to ')' */ + const char *cls_end = strchr(p, ')'); + if (!cls_end) + continue; + int cls_len = (int)(cls_end - p); + if (cls_len <= 0 || cls_len >= CBM_SZ_256) { + p = cls_end; + continue; + } + char cls_name[CBM_SZ_256]; + memcpy(cls_name, p, (size_t)cls_len); + cls_name[cls_len] = '\0'; + + /* Expect '.' after ')' then method name up to '"' */ + const char *dot = cls_end + 1; + if (*dot != '.') { + p = dot; + continue; + } + const char *meth_start = dot + 1; + const char *meth_end = strchr(meth_start, '"'); + if (!meth_end) + continue; + int meth_len = (int)(meth_end - meth_start); + if (meth_len <= 0 || meth_len >= CBM_SZ_256) { + p = meth_end; + continue; + } + char meth_name[CBM_SZ_256]; + memcpy(meth_name, meth_start, (size_t)meth_len); + meth_name[meth_len] = '\0'; + + /* Find the target method in the gbuf by name within cls_name */ + char target_qn_suffix[CBM_SZ_512]; + snprintf(target_qn_suffix, sizeof(target_qn_suffix), "%s.%s", cls_name, meth_name); + + const cbm_gbuf_node_t **candidates = NULL; + int ccount = 0; + cbm_gbuf_find_by_name(ctx->gbuf, meth_name, (const cbm_gbuf_node_t ***)&candidates, + &ccount); + for (int ci = 0; ci < ccount; ci++) { + if (candidates[ci]->qualified_name && + qn_ends_with_segment(candidates[ci]->qualified_name, target_qn_suffix)) { + char props[CBM_SZ_256]; + snprintf(props, sizeof(props), "{\"via\":\"WorkMgr.Queue\",\"confidence\":%.2f}", + CONF_WORKMGR); + cbm_gbuf_insert_edge(ctx->gbuf, method->id, candidates[ci]->id, "CALLS", props); + break; + } + } + p = meth_end; + } +} + +void cbm_pipeline_pass_ensemble_routing(cbm_pipeline_ctx_t *ctx) { + if (!ctx || !ctx->gbuf || !ctx->repo_path) + return; + + const cbm_gbuf_node_t **method_nodes = NULL; + int method_count = 0; + cbm_gbuf_find_by_label(ctx->gbuf, "Method", (const cbm_gbuf_node_t ***)&method_nodes, + &method_count); + + /* Pass A: WorkMgr parallel dispatch — CALLS edges, independent of productions */ + int workmgr_edges = 0; + char last_path[CBM_SZ_1K] = {0}; + char *last_source = NULL; + for (int mi = 0; mi < method_count; mi++) { + const cbm_gbuf_node_t *m = method_nodes[mi]; + if (!m->file_path) + continue; + char full_path[CBM_SZ_1K]; + snprintf(full_path, sizeof(full_path), "%s/%s", ctx->repo_path, m->file_path); + if (strcmp(full_path, last_path) != 0) { + free(last_source); + last_source = read_file(full_path); + snprintf(last_path, sizeof(last_path), "%s", full_path); + } + if (!last_source || !strstr(last_source, ".Queue(\"##class(")) + continue; + /* Scope scan to this method's line range to avoid cross-method false positives */ + char *method_slice = NULL; + if (m->start_line > 0 && m->end_line >= m->start_line) { + const char *p = last_source; + int line = 1; + const char *method_start = NULL, *method_end = NULL; + while (*p) { + if (line == m->start_line) + method_start = p; + if (line == m->end_line + 1) { + method_end = p; + break; + } + if (*p == '\n') + line++; + p++; + } + if (!method_end) + method_end = p; + if (method_start && method_end > method_start) { + int slen = (int)(method_end - method_start); + method_slice = malloc((size_t)slen + 1); + if (method_slice) { + memcpy(method_slice, method_start, (size_t)slen); + method_slice[slen] = '\0'; + } + } + } + const char *scan_src = method_slice ? method_slice : last_source; + int before_w = cbm_gbuf_edge_count_by_type(ctx->gbuf, "CALLS"); + scan_workmgr_dispatch(ctx, m, scan_src); + free(method_slice); + workmgr_edges += cbm_gbuf_edge_count_by_type(ctx->gbuf, "CALLS") - before_w; + } + free(last_source); + if (workmgr_edges > 0) { + char wbuf[32]; + snprintf(wbuf, sizeof(wbuf), "%d", workmgr_edges); + cbm_log_info("ensemble_routing.workmgr", "edges", wbuf); + } + + /* Pass B: Ensemble production routing — ROUTES_TO edges */ + ens_prod_def_t **defs = NULL; + int n_defs = 0; + collect_prod_defs(ctx, &defs, &n_defs); + if (n_defs == 0) + return; + + int before = cbm_gbuf_edge_count_by_type(ctx->gbuf, "ROUTES_TO"); + + /* 1-deep file cache shared across the def/method loops below: the same .cls + * source is re-visited once per method per production, so caching the last + * file read avoids re-reading it from disk each time (mirrors Pass A). */ + char rb_last_path[CBM_SZ_1K] = {0}; + char *rb_last_source = NULL; + + for (int di = 0; di < n_defs; di++) { + ens_prod_def_t *def = defs[di]; + + for (int mi = 0; mi < method_count; mi++) { + const cbm_gbuf_node_t *m = method_nodes[mi]; + if (!m->properties_json || !m->file_path) + continue; + if (!method_belongs_to_production(m, def)) + continue; + + char meth_full_path[CBM_SZ_1K]; + snprintf(meth_full_path, sizeof(meth_full_path), "%s/%s", ctx->repo_path, m->file_path); + if (strcmp(meth_full_path, rb_last_path) != 0) { + free(rb_last_source); + rb_last_source = read_file(meth_full_path); + snprintf(rb_last_path, sizeof(rb_last_path), "%s", meth_full_path); + } + if (!rb_last_source) + continue; + resolve_method_routes(ctx, m, rb_last_source, def); + } + + for (int ii = 0; ii < def->n_items; ii++) { + const ens_item_t *item = &def->items[ii]; + for (int si = 0; si < item->n_settings; si++) { + const ens_setting_t *setting = &item->settings[si]; + if (!setting->value[0]) + continue; + const ens_item_t *target = find_item(def, setting->value); + if (!target) + continue; + char item_qn[CBM_SZ_512]; + snprintf(item_qn, sizeof(item_qn), "%s.%s", def->production_class, item->item_name); + const cbm_gbuf_node_t *item_node = cbm_gbuf_find_by_qn(ctx->gbuf, item_qn); + if (!item_node) + continue; + emit_route(ctx, item_node->id, target, setting->setting_name, CONF_PROP, + def->production_class); + } + } + + free(defs[di]); + } + free(rb_last_source); + free(defs); + + int routes = cbm_gbuf_edge_count_by_type(ctx->gbuf, "ROUTES_TO") - before; + char n_defs_buf[32], n_routes_buf[32]; + snprintf(n_defs_buf, sizeof(n_defs_buf), "%d", n_defs); + snprintf(n_routes_buf, sizeof(n_routes_buf), "%d", routes); + cbm_log_info("ensemble_routing.done", "productions", n_defs_buf, "routes", n_routes_buf); +} diff --git a/src/pipeline/pass_ensemble_routing.h b/src/pipeline/pass_ensemble_routing.h new file mode 100644 index 00000000..3017bf6a --- /dev/null +++ b/src/pipeline/pass_ensemble_routing.h @@ -0,0 +1,8 @@ +#ifndef CBM_PASS_ENSEMBLE_ROUTING_H +#define CBM_PASS_ENSEMBLE_ROUTING_H + +#include "pipeline/pipeline_internal.h" + +void cbm_pipeline_pass_ensemble_routing(cbm_pipeline_ctx_t *ctx); + +#endif /* CBM_PASS_ENSEMBLE_ROUTING_H */ diff --git a/src/pipeline/pass_parallel.c b/src/pipeline/pass_parallel.c index 180ee85f..b480b183 100644 --- a/src/pipeline/pass_parallel.c +++ b/src/pipeline/pass_parallel.c @@ -66,6 +66,9 @@ enum { PP_CSHARP_M_PREFIX_LEN = 2 }; #include "foundation/profile.h" #include "foundation/compat_regex.h" #include "cbm.h" +#include "arena.h" +#include "macro_table.h" +#include "iris_export_xml.h" #include "simhash/minhash.h" #include "semantic/ast_profile.h" @@ -485,6 +488,9 @@ typedef struct { cbm_pkg_entries_t *pkg_entries; /* per-worker manifest arrays (separate allocation) */ _Atomic int64_t retained_bytes; /* total source bytes copied into result arenas */ + + const CBMMacroTable *macro_table; /* ObjectScript $$$macros (NULL if none) */ + const CBMReturnTypeTable *return_type_table; /* ObjectScript return types (NULL if none) */ } extract_ctx_t; /* Insert one definition node (and its route if present) into the local gbuf. */ @@ -586,8 +592,38 @@ static void extract_worker(int worker_id, void *ctx_ptr) { uint64_t file_t0 = extract_now_ns(); - CBMFileResult *result = cbm_extract_file(source, source_len, fi->language, ec->project_name, - fi->rel_path, CBM_EXTRACT_BUDGET, NULL, NULL); + /* ObjectScript Studio Export XML: transcode each to UDL and + * extract directly into the local gbuf (the per-file cache holds a single + * result, so multi-class Export files are processed inline here). */ + if (fi->language == CBM_LANG_OBJECTSCRIPT_EXPORT) { + CBMArena ea; + cbm_arena_init(&ea); + int cc = 0; + char **udls = cbm_iris_export_to_udl(&ea, source, source_len, &cc); + for (int ci = 0; ci < cc; ci++) { + CBMFileResult *xr = + cbm_extract_file_ex(udls[ci], (int)strlen(udls[ci]), CBM_LANG_OBJECTSCRIPT_UDL, + ec->project_name, fi->rel_path, CBM_EXTRACT_BUDGET, NULL, + NULL, ec->macro_table, ec->return_type_table); + if (!xr) { + continue; + } + for (int d = 0; d < xr->defs.count; d++) { + CBMDefinition *def = &xr->defs.items[d]; + if (def->qualified_name && def->name) { + insert_def_into_gbuf(ws, fi, def); + } + } + cbm_free_result(xr); + } + cbm_arena_destroy(&ea); + free_source(source); + continue; + } + + CBMFileResult *result = cbm_extract_file_ex( + source, source_len, fi->language, ec->project_name, fi->rel_path, CBM_EXTRACT_BUDGET, + NULL, NULL, ec->macro_table, ec->return_type_table); uint64_t file_elapsed_ms = (extract_now_ns() - file_t0) / PP_USEC_PER_MS; @@ -707,6 +743,55 @@ static void log_extract_mem_stats(int worker_count) { } } +/* ObjectScript: build the $$$macro table from .inc files (parallel path). + * Returns NULL when no ObjectScript include files exist. Caller owns it. */ +static CBMMacroTable *pp_build_macro_table(const cbm_file_info_t *files, int count) { + bool has_inc = false; + for (int i = 0; i < count; i++) { + if (files[i].language == CBM_LANG_OBJECTSCRIPT_ROUTINE && files[i].path && + strstr(files[i].path, ".inc")) { + has_inc = true; + break; + } + } + if (!has_inc) { + return NULL; + } + CBMMacroTable *mt = (CBMMacroTable *)calloc(1, sizeof(CBMMacroTable)); + if (!mt) { + return NULL; + } + CBMArena mt_arena; + cbm_arena_init(&mt_arena); + cbm_macro_table_init_system(mt); + for (int i = 0; i < count; i++) { + if (files[i].language != CBM_LANG_OBJECTSCRIPT_ROUTINE) { + continue; + } + if (!files[i].path || !strstr(files[i].path, ".inc")) { + continue; + } + FILE *f = fopen(files[i].path, "rb"); + if (!f) { + continue; + } + fseek(f, 0, SEEK_END); + long fsize = ftell(f); + rewind(f); + if (fsize > 0) { + char *src = (char *)malloc((size_t)fsize + 1); + if (src) { + size_t nread = fread(src, 1, (size_t)fsize, f); + src[nread] = '\0'; + cbm_parse_inc_file(mt, &mt_arena, src); + free(src); + } + } + (void)fclose(f); + } + return mt; +} + int cbm_parallel_extract(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count, CBMFileResult **result_cache, _Atomic int64_t *shared_ids, int worker_count) { @@ -756,6 +841,9 @@ int cbm_parallel_extract(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, /* Per-worker manifest entry arrays (separate from cache-line-aligned worker state) */ cbm_pkg_entries_t *pkg_entries = calloc(worker_count, sizeof(cbm_pkg_entries_t)); + /* ObjectScript macro table (NULL when no .inc include files present). */ + CBMMacroTable *pp_macro_table = pp_build_macro_table(files, file_count); + extract_ctx_t ec = { .files = files, .sorted = sorted, @@ -768,6 +856,8 @@ int cbm_parallel_extract(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, .shared_ids = shared_ids, .cancelled = ctx->cancelled, .pkg_entries = pkg_entries, + .macro_table = pp_macro_table, + .return_type_table = ctx->return_type_table, }; atomic_init(&ec.next_worker_id, 0); atomic_init(&ec.next_file_idx, 0); @@ -797,6 +887,7 @@ int cbm_parallel_extract(cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, cbm_aligned_free(workers); free(sorted); + free(pp_macro_table); /* ObjectScript macro table (NULL-safe) */ if (atomic_load(ctx->cancelled)) { return CBM_NOT_FOUND; diff --git a/src/pipeline/pipeline.c b/src/pipeline/pipeline.c index c080a285..3fa0b63f 100644 --- a/src/pipeline/pipeline.c +++ b/src/pipeline/pipeline.c @@ -18,9 +18,12 @@ enum { CBM_DIR_PERMS = 0755, PL_RING = 4, PL_RING_MASK = 3, PL_SEQ_PASSES = 6, P #include "pipeline/artifact.h" #include "pipeline/pipeline_internal.h" #include "pipeline/pass_lsp_cross.h" +#include "pipeline/pass_ensemble_routing.h" #include "pipeline/worker_pool.h" #include "graph_buffer/graph_buffer.h" #include "store/store.h" +#include "macro_table.h" +#include "arena.h" #include "discover/discover.h" #include "discover/userconfig.h" #include "foundation/platform.h" @@ -489,6 +492,9 @@ static void predump_cfg(cbm_pipeline_ctx_t *ctx) { static void predump_complexity(cbm_pipeline_ctx_t *ctx) { cbm_pipeline_pass_complexity(ctx); } +static void predump_ensemble(cbm_pipeline_ctx_t *ctx) { + cbm_pipeline_pass_ensemble_routing(ctx); +} static void run_predump_passes(cbm_pipeline_t *p, cbm_pipeline_ctx_t *ctx) { static const struct { @@ -496,11 +502,12 @@ static void run_predump_passes(cbm_pipeline_t *p, cbm_pipeline_ctx_t *ctx) { const char *name; bool moderate_only; /* true = skip in fast mode */ } passes[] = { - {predump_deco, "decorator_tags", false}, {predump_cfg, "configlink", false}, - {predump_route, "route_match", false}, {predump_sim, "similarity", true}, - {predump_sem, "semantic_edges", true}, {predump_complexity, "complexity", false}, + {predump_deco, "decorator_tags", false}, {predump_cfg, "configlink", false}, + {predump_route, "route_match", false}, {predump_sim, "similarity", true}, + {predump_sem, "semantic_edges", true}, {predump_complexity, "complexity", false}, + {predump_ensemble, "ensemble_routing", false}, }; - enum { PREDUMP_PASS_COUNT = 6 }; + enum { PREDUMP_PASS_COUNT = 7 }; struct timespec t; for (int i = 0; i < PREDUMP_PASS_COUNT && !check_cancel(p); i++) { /* "moderate_only" passes (similarity/semantic edges) run in FULL, @@ -533,6 +540,61 @@ static int seq_pass_lsp_cross_dispatch(cbm_pipeline_ctx_t *ctx, const cbm_file_i } /* Run the sequential pipeline path: definitions, k8s, lsp_cross, calls, usages, semantic. */ +/* Build the ObjectScript $$$macro table from .inc include files in the repo. + * Returns NULL (and does no work) when no ObjectScript include files exist. + * Caller owns the returned heap table. */ +static CBMMacroTable *cbm_build_macro_table_from_files(const cbm_file_info_t *files, int count, + const char *repo_path) { + (void)repo_path; + bool has_inc = false; + for (int i = 0; i < count; i++) { + if (files[i].language == CBM_LANG_OBJECTSCRIPT_ROUTINE && files[i].path && + strstr(files[i].path, ".inc")) { + has_inc = true; + break; + } + } + if (!has_inc) { + return NULL; + } + + CBMMacroTable *mt = (CBMMacroTable *)calloc(1, sizeof(CBMMacroTable)); + if (!mt) { + return NULL; + } + + CBMArena arena; + cbm_arena_init(&arena); + cbm_macro_table_init_system(mt); + + for (int i = 0; i < count; i++) { + if (files[i].language != CBM_LANG_OBJECTSCRIPT_ROUTINE) { + continue; + } + if (!files[i].path || !strstr(files[i].path, ".inc")) { + continue; + } + FILE *f = fopen(files[i].path, "rb"); + if (!f) { + continue; + } + fseek(f, 0, SEEK_END); + long fsize = ftell(f); + rewind(f); + if (fsize > 0) { + char *src = (char *)malloc((size_t)fsize + 1); + if (src) { + size_t nread = fread(src, 1, (size_t)fsize, f); + src[nread] = '\0'; + cbm_parse_inc_file(mt, &arena, src); + free(src); + } + } + (void)fclose(f); + } + return mt; +} + static int run_sequential_pipeline(cbm_pipeline_t *p, cbm_pipeline_ctx_t *ctx, const cbm_file_info_t *files, int file_count, struct timespec *t) { @@ -549,6 +611,13 @@ static int run_sequential_pipeline(cbm_pipeline_t *p, cbm_pipeline_ctx_t *ctx, if (seq_cache) { ctx->result_cache = seq_cache; } + + /* ObjectScript: build the $$$macro table from .inc include files so that + * pass_calls can resolve macro-mediated dispatch. NULL when not present. */ + CBMMacroTable *mt = cbm_build_macro_table_from_files(files, file_count, ctx->repo_path); + if (mt) { + ctx->macro_table = mt; + } typedef int (*seq_pass_fn)(cbm_pipeline_ctx_t *, const cbm_file_info_t *, int); static const struct { seq_pass_fn fn; @@ -592,6 +661,15 @@ static int run_sequential_pipeline(cbm_pipeline_t *p, cbm_pipeline_ctx_t *ctx, free(seq_cache); ctx->result_cache = NULL; } + /* ObjectScript: free the macro / return-type tables built for this run. */ + if (ctx->macro_table) { + free((void *)ctx->macro_table); + ctx->macro_table = NULL; + } + if (ctx->return_type_table) { + free((void *)ctx->return_type_table); + ctx->return_type_table = NULL; + } return rc; } diff --git a/src/pipeline/pipeline_internal.h b/src/pipeline/pipeline_internal.h index 1eb10842..0beb2f57 100644 --- a/src/pipeline/pipeline_internal.h +++ b/src/pipeline/pipeline_internal.h @@ -69,6 +69,14 @@ typedef struct { * configs are an easy follow-on). NULL when no usable configs were found. * Owned by pipeline.c / pipeline_incremental.c. */ const cbm_path_alias_collection_t *path_aliases; + + /* ObjectScript $$$macro table built from .inc files in the repo (NULL if + * no ObjectScript include files were found). Owned by pipeline.c. */ + const CBMMacroTable *macro_table; + + /* ObjectScript method-return-type table built from extracted definitions + * (NULL until pass_calls builds it). Owned by pipeline.c. */ + const CBMReturnTypeTable *return_type_table; } cbm_pipeline_ctx_t; /* Get the current pipeline's package map (NULL if none). */ diff --git a/tests/test_extraction.c b/tests/test_extraction.c index 0308372c..4006d1a2 100644 --- a/tests/test_extraction.c +++ b/tests/test_extraction.c @@ -7,6 +7,8 @@ */ #include "test_framework.h" #include "cbm.h" +#include "macro_table.h" +#include "iris_export_xml.h" /* ── Helpers ───────────────────────────────────────────────────── */ @@ -64,6 +66,14 @@ static CBMFileResult *extract(const char *src, CBMLanguage lang, const char *pro return r; } +/* As extract(), but threads an ObjectScript macro table through. */ +static CBMFileResult *extract_with_macros(const char *src, CBMLanguage lang, const char *proj, + const char *path, const CBMMacroTable *mt) { + CBMFileResult *r = + cbm_extract_file_ex(src, (int)strlen(src), lang, proj, path, 0, NULL, NULL, mt, NULL); + return r; +} + /* ═══════════════════════════════════════════════════════════════════ * Group A: OOP Languages * ═══════════════════════════════════════════════════════════════════ */ @@ -2818,10 +2828,784 @@ TEST(complexity_access_depth_and_params) { * Suite * ═══════════════════════════════════════════════════════════════════ */ +/* =================================================================== + * Group H3: ObjectScript return type extraction + * =================================================================== */ + +TEST(objectscript_udl_method_return_type) { + CBMFileResult *r = extract("Class MyApp.Factory Extends %RegisteredObject\n" + "{\n" + "Method GetAdapter() As EnsLib.SQL.OutboundAdapter\n" + "{\n" + " Quit ##class(EnsLib.SQL.OutboundAdapter).%New()\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Factory.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + bool found_rt = false; + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].name, "GetAdapter") == 0) { + ASSERT_NOT_NULL(r->defs.items[i].return_type); + ASSERT(strstr(r->defs.items[i].return_type, "EnsLib.SQL.OutboundAdapter") != NULL); + found_rt = true; + } + } + ASSERT(found_rt); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_scalar_return_type_not_resolved) { + CBMFileResult *r = extract("Class MyApp.Counter Extends %RegisteredObject\n" + "{\n" + "Method GetName() As %String\n" + "{\n" + " Quit \"hello\"\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Counter.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].name, "GetName") == 0) { + ASSERT_NOT_NULL(r->defs.items[i].return_type); + ASSERT(strstr(r->defs.items[i].return_type, "%String") != NULL); + } + } + cbm_free_result(r); + PASS(); +} + +/* =================================================================== + * Group H2: ObjectScript macro expansion + * =================================================================== */ + +TEST(objectscript_udl_class) { + CBMFileResult *r = extract("Class MyApp.Patient Extends %Persistent\n{\n}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Patient.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "MyApp.Patient")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_methods_after_goto_label) { + CBMFileResult *r = extract("Class Graph.KG.Test Extends %RegisteredObject\n" + "{\n" + "ClassMethod First() As %String\n" + "{\n" + " If 1 { Goto Done }\n" + "Done\n" + " Quit \"x\"\n" + "}\n" + "ClassMethod Second() As %String\n" + "{\n" + " Quit \"y\"\n" + "}\n" + "ClassMethod Third() As %String\n" + "{\n" + " Quit \"z\"\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Test.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "Graph.KG.Test")); + ASSERT(has_def(r, "Method", "First")); + ASSERT(has_def(r, "Method", "Second")); + ASSERT(has_def(r, "Method", "Third")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_methods) { + CBMFileResult *r = extract("Class MyApp.Utils Extends %RegisteredObject\n" + "{\n" + "ClassMethod Format(pVal As %String) As %String\n" + "{\n" + " Quit pVal\n" + "}\n" + "Method Save() As %Status\n" + "{\n" + " Quit ..%Save()\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Utils.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "MyApp.Utils")); + ASSERT(has_def(r, "Method", "Format")); + ASSERT(has_def(r, "Method", "Save")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_base_classes) { + CBMFileResult *r = extract("Class MyApp.Patient Extends %Persistent\n" + "{\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Patient.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "MyApp.Patient")); + int found = 0; + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].name, "MyApp.Patient") == 0) { + found = 1; + ASSERT_NOT_NULL(r->defs.items[i].base_classes); + ASSERT_NOT_NULL(r->defs.items[i].base_classes[0]); + ASSERT_STR_EQ(r->defs.items[i].base_classes[0], "%Persistent"); + } + } + ASSERT_TRUE(found); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_multiple_bases) { + CBMFileResult *r = extract("Class MyApp.Dual Extends (MyApp.Base, %RegisteredObject)\n" + "{\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Dual.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + int found = 0; + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].name, "MyApp.Dual") == 0) { + found = 1; + ASSERT_NOT_NULL(r->defs.items[i].base_classes); + ASSERT_NOT_NULL(r->defs.items[i].base_classes[0]); + ASSERT_NOT_NULL(r->defs.items[i].base_classes[1]); + } + } + ASSERT_TRUE(found); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_properties) { + CBMFileResult *r = extract("Class MyApp.Patient Extends %Persistent\n" + "{\n" + "Property Name As %String;\n" + "Property DOB As %Date;\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Patient.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "MyApp.Patient")); + ASSERT(has_def(r, "Variable", "Name")); + ASSERT(has_def(r, "Variable", "DOB")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_routine_tags) { + CBMFileResult *r = extract("UTILS\n" + " Quit\n" + "\n" + "Format(value,fmt)\n" + " Set result = $ZDate(value, fmt)\n" + " Quit result\n" + "\n" + "Log(msg)\n" + " Write msg,!\n" + " Quit\n", + CBM_LANG_OBJECTSCRIPT_ROUTINE, "t", "Utils.mac"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Function", "Format")); + ASSERT(has_def(r, "Function", "Log")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_query_member) { + CBMFileResult *r = + extract("Class MyApp.Repo Extends %Persistent\n" + "{\n" + "Query FindAll(name As %String) As %SQLQuery { SELECT * FROM MyApp_Repo }\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Repo.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Class", "MyApp.Repo")); + ASSERT(has_def(r, "Method", "FindAll")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_index_member) { + CBMFileResult *r = extract("Class MyApp.Repo Extends %Persistent\n" + "{\n" + "Property Name As %String;\n" + "Index NameIdx On Name;\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Repo.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Index", "NameIdx")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_xdata_member) { + CBMFileResult *r = extract("Class MyApp.Service Extends %CSP.REST\n" + "{\n" + "XData UrlMap { }\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Service.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "XData", "UrlMap")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_trigger_member) { + CBMFileResult *r = extract("Class MyApp.Log Extends %Persistent\n" + "{\n" + "Trigger AfterInsert [ Event = INSERT ] { }\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Log.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Trigger", "AfterInsert")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_trigger_body_quit) { + CBMFileResult *r = extract("Class MyApp.Patient Extends %Persistent\n" + "{\n" + "Trigger OnDeleteSQL [ Event = DELETE, Time = AFTER ] {\n" + " Quit\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Patient.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Trigger", "OnDeleteSQL")); + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].label, "Trigger") == 0 && + strcmp(r->defs.items[i].name, "OnDeleteSQL") == 0) { + ASSERT_NOT_NULL(r->defs.items[i].docstring); + ASSERT(strstr(r->defs.items[i].docstring, "trigger_body") != NULL); + ASSERT(strstr(r->defs.items[i].docstring, "Quit") != NULL); + break; + } + } + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_trigger_body_tokens) { + CBMFileResult *r = extract("Class MyApp.Order Extends %Persistent\n" + "{\n" + "Trigger AfterInsert [ Event = INSERT, Time = AFTER ] {\n" + " Set id = ..%Id()\n" + " Do ##class(MyApp.Audit).Log(id)\n" + " Quit\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Order.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Trigger", "AfterInsert")); + for (int i = 0; i < r->defs.count; i++) { + if (strcmp(r->defs.items[i].label, "Trigger") == 0 && + strcmp(r->defs.items[i].name, "AfterInsert") == 0) { + ASSERT_NOT_NULL(r->defs.items[i].docstring); + ASSERT(strstr(r->defs.items[i].docstring, "trigger_body") != NULL); + ASSERT_NOT_NULL(r->defs.items[i].body_tokens); + ASSERT(strstr(r->defs.items[i].body_tokens, "Log") != NULL || + strstr(r->defs.items[i].body_tokens, "Audit") != NULL || + strstr(r->defs.items[i].body_tokens, "id") != NULL); + break; + } + } + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_self_call_relative_dot_method) { + CBMFileResult *r = + extract("Class HS.Flash.UpdateManager Extends Ens.BusinessProcess\n" + "{\n" + "Method MakeMRNUpToDate(pRequest As HS.Message.FlashQueueUpdate) As %Status\n" + "{\n" + " Set tSC = ..processStreamlet(pSession, pTS, tMPIID, tSourceMRN, ii)\n" + " Quit tSC\n" + "}\n" + "Method processStreamlet(pSession As %Integer) As %Status\n" + "{\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "UpdateManager.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "Method", "MakeMRNUpToDate")); + ASSERT(has_call(r, "HS.Flash.UpdateManager.processStreamlet")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_calls_typed_new) { + CBMFileResult *r = extract("Class MyApp.Caller Extends %RegisteredObject\n" + "{\n" + "Method Run() As %Status\n" + "{\n" + " Set adapter = ##class(EnsLib.SQL.OutboundAdapter).%New()\n" + " Do adapter.ExecuteQuery(\"SELECT 1\")\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Caller.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "EnsLib.SQL.OutboundAdapter.ExecuteQuery")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_ensemble_production_def_parses_items) { + CBMFileResult *r = + extract("Class Sample.Production Extends Ens.Production\n" + "{\n" + "XData ProductionDefinition\n" + "{\n" + "\n" + " \n" + " \n" + " \n" + " \n" + "\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Production.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "XData", "ProductionDefinition")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_ensemble_production_def_hs_settings) { + CBMFileResult *r = extract( + "Class HS.Flash.Production Extends Ens.Production\n" + "{\n" + "XData ProductionDefinition\n" + "{\n" + "\n" + " \n" + " FHIROps\n" + " PatientOps\n" + " ConformOps\n" + " \n" + "\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "HSProduction.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_def(r, "XData", "ProductionDefinition")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_ensemble_production_def_absent_no_error) { + CBMFileResult *r = extract("Class Sample.NonProduction Extends %Persistent\n" + "{\n" + "Method DoSomething() As %Status\n" + "{\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "NonProduction.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(!has_def(r, "XData", "ProductionDefinition")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_calls_typed_param) { + CBMFileResult *r = extract("Class MyApp.Handler Extends %RegisteredObject\n" + "{\n" + "Method Process(req As Ens.Request) As %Status\n" + "{\n" + " Do req.Send()\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Handler.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "Ens.Request.Send")); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_udl_calls_typed_property) { + CBMFileResult *r = extract("Class MyApp.Service Extends Ens.BusinessService\n" + "{\n" + "Property Adapter As EnsLib.SQL.InboundAdapter;\n" + "Method OnProcessInput() As %Status\n" + "{\n" + " Do ..Adapter.ExecuteQuery(\"SELECT 1\")\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Service.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "EnsLib.SQL.InboundAdapter.ExecuteQuery")); + cbm_free_result(r); + PASS(); +} + +/* =================================================================== + * Group H2: ObjectScript macro expansion + * =================================================================== */ + +TEST(objectscript_macro_expand_system) { + CBMMacroTable mt; + cbm_macro_table_init_system(&mt); + CBMFileResult *r = extract_with_macros("Class MyApp.Caller Extends %RegisteredObject\n" + "{\n" + "Method Run(sc As %Status) As %Status\n" + "{\n" + " If $$$ISERR(sc) { Quit sc }\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Caller.cls", &mt); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "%SYSTEM.Status.IsError")); + cbm_free_result(r); + PASS(); +} + +/* ═══════════════════════════════════════════════════════════════════ + * Group H3: ObjectScript DATA_FLOWS argument extraction + * ═══════════════════════════════════════════════════════════════════ */ + +static int find_call_args(const CBMFileResult *r, const char *callee, const char **out_arg0, + const char **out_arg1) { + if (out_arg0) + *out_arg0 = NULL; + if (out_arg1) + *out_arg1 = NULL; + for (int i = 0; i < r->calls.count; i++) { + if (strstr(r->calls.items[i].callee_name, callee)) { + if (out_arg0 && r->calls.items[i].arg_count > 0) + *out_arg0 = r->calls.items[i].args[0].expr; + if (out_arg1 && r->calls.items[i].arg_count > 1) + *out_arg1 = r->calls.items[i].args[1].expr; + return r->calls.items[i].arg_count; + } + } + return -1; +} + +TEST(objectscript_data_flows_class_method_args) { + CBMFileResult *r = extract("Class MyApp.Caller Extends %RegisteredObject\n" + "{\n" + "Method Run() As %Status\n" + "{\n" + " Set sql = \"SELECT 1\"\n" + " Do ##class(MyApp.Utils).Transform(sql, \"JSON\")\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Caller.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "MyApp.Utils.Transform")); + const char *arg0 = NULL; + const char *arg1 = NULL; + int argc = find_call_args(r, "MyApp.Utils.Transform", &arg0, &arg1); + ASSERT(argc == 2); + ASSERT_NOT_NULL(arg0); + ASSERT(strstr(arg0, "sql") != NULL); + cbm_free_result(r); + PASS(); +} + +TEST(objectscript_macro_expand_local) { + CBMMacroTable mt; + cbm_macro_table_init_system(&mt); + CBMArena arena; + cbm_arena_init(&arena); + const char *inc_content = "ROUTINE MyApp.Include [Type=INC]\n" + "#define MyCheck(%sc) ##class(MyApp.Utils).Validate(%sc)\n"; + cbm_parse_inc_file(&mt, &arena, inc_content); + CBMFileResult *r = extract_with_macros("Class MyApp.Caller Extends %RegisteredObject\n" + "{\n" + "Method Run(sc As %Status) As %Status\n" + "{\n" + " If $$$MyCheck(sc) { Quit $$$OK }\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Caller.cls", &mt); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "MyApp.Utils.Validate")); + cbm_free_result(r); + cbm_arena_destroy(&arena); + PASS(); +} + +TEST(objectscript_macro_constant_no_extra_call) { + CBMMacroTable mt; + cbm_macro_table_init_system(&mt); + CBMArena arena; + cbm_arena_init(&arena); + const char *inc_content = "ROUTINE MyApp.Include [Type=INC]\n" + "#define MyConst 42\n"; + cbm_parse_inc_file(&mt, &arena, inc_content); + CBMFileResult *r = extract_with_macros("Class MyApp.Caller Extends %RegisteredObject\n" + "{\n" + "Method Run() As %Integer\n" + "{\n" + " Set x = $$$MyConst\n" + " Quit x\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Caller.cls", &mt); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(!has_call(r, "$$$MyConst")); + cbm_free_result(r); + cbm_arena_destroy(&arena); + PASS(); +} + +TEST(objectscript_data_flows_instance_method_args) { + CBMFileResult *r = extract("Class MyApp.Service Extends %RegisteredObject\n" + "{\n" + "Method Run() As %Status\n" + "{\n" + " Set adapter = ##class(EnsLib.SQL.OutboundAdapter).%New()\n" + " Do adapter.ExecuteQuery(\"SELECT 1\")\n" + " Quit $$$OK\n" + "}\n" + "}\n", + CBM_LANG_OBJECTSCRIPT_UDL, "t", "Service.cls"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "EnsLib.SQL.OutboundAdapter.ExecuteQuery")); + const char *arg0 = NULL; + int argc = find_call_args(r, "EnsLib.SQL.OutboundAdapter.ExecuteQuery", &arg0, NULL); + ASSERT(argc == 1); + ASSERT_NOT_NULL(arg0); + cbm_free_result(r); + PASS(); +} + +/* =================================================================== + * Group H4: IRIS Export XML → UDL transcoder + * =================================================================== */ + +#define SIMPLE_EXPORT \ + "\n" \ + "\n" \ + "\n" \ + "%RegisteredObject\n" \ + "\n" \ + "%String\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" + +TEST(iris_export_xml_simple_class) { + CBMArena arena; + cbm_arena_init(&arena); + int count = 0; + char **udl = cbm_iris_export_to_udl(&arena, SIMPLE_EXPORT, (int)strlen(SIMPLE_EXPORT), &count); + ASSERT_NOT_NULL(udl); + ASSERT(count == 1); + ASSERT_NOT_NULL(udl[0]); + ASSERT(strstr(udl[0], "Test.Simple") != NULL); + ASSERT(strstr(udl[0], "%RegisteredObject") != NULL); + ASSERT(strstr(udl[0], "Hello") != NULL); + ASSERT(strstr(udl[0], "Quit \"hello\"") != NULL); + cbm_arena_destroy(&arena); + PASS(); +} + +#define CLASSMETHOD_EXPORT \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "1\n" \ + "pArg:%String,pFlag:%Boolean=0\n" \ + "%Status\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" + +TEST(iris_export_xml_classmethod) { + CBMArena arena; + cbm_arena_init(&arena); + int count = 0; + char **udl = + cbm_iris_export_to_udl(&arena, CLASSMETHOD_EXPORT, (int)strlen(CLASSMETHOD_EXPORT), &count); + ASSERT_NOT_NULL(udl); + ASSERT(count == 1); + ASSERT(strstr(udl[0], "ClassMethod") != NULL); + ASSERT(strstr(udl[0], "pArg") != NULL); + ASSERT(strstr(udl[0], "pFlag") != NULL); + ASSERT(strstr(udl[0], "%Status") != NULL); + cbm_arena_destroy(&arena); + PASS(); +} + +#define MEMBER_EXPORT \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "%String\n" \ + "\n" \ + "\n" \ + "\n" \ + "1\n" \ + "\n" \ + "\n" \ + "Name\n" \ + "1\n" \ + "\n" \ + "\n" \ + "\n" + +TEST(iris_export_xml_property_parameter_index) { + CBMArena arena; + cbm_arena_init(&arena); + int count = 0; + char **udl = cbm_iris_export_to_udl(&arena, MEMBER_EXPORT, (int)strlen(MEMBER_EXPORT), &count); + ASSERT_NOT_NULL(udl); + ASSERT(count == 1); + ASSERT(strstr(udl[0], "Property Name") != NULL); + ASSERT(strstr(udl[0], "%String") != NULL); + ASSERT(strstr(udl[0], "Parameter VERSION") != NULL); + ASSERT(strstr(udl[0], "Index NameIdx") != NULL); + cbm_arena_destroy(&arena); + PASS(); +} + +#define CALLS_EXPORT \ + "\n" \ + "\n" \ + "\n" \ + "%RegisteredObject\n" \ + "\n" \ + "1\n" \ + "%Status\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" + +TEST(iris_export_xml_calls_extracted) { + CBMArena arena; + cbm_arena_init(&arena); + int count = 0; + char **udl = cbm_iris_export_to_udl(&arena, CALLS_EXPORT, (int)strlen(CALLS_EXPORT), &count); + ASSERT_NOT_NULL(udl); + ASSERT(count == 1); + CBMFileResult *r = cbm_extract_file(udl[0], (int)strlen(udl[0]), CBM_LANG_OBJECTSCRIPT_UDL, "t", + "Caller.cls", 0, NULL, NULL); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT(has_call(r, "Target.Worker.Execute")); + cbm_free_result(r); + cbm_arena_destroy(&arena); + PASS(); +} + +#define MULTI_EXPORT \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" \ + "\n" + +TEST(iris_export_xml_multi_class) { + CBMArena arena; + cbm_arena_init(&arena); + int count = 0; + char **udl = cbm_iris_export_to_udl(&arena, MULTI_EXPORT, (int)strlen(MULTI_EXPORT), &count); + ASSERT_NOT_NULL(udl); + ASSERT(count == 2); + ASSERT(strstr(udl[0], "Test.First") != NULL || strstr(udl[1], "Test.First") != NULL); + ASSERT(strstr(udl[0], "Test.Second") != NULL || strstr(udl[1], "Test.Second") != NULL); + cbm_arena_destroy(&arena); + PASS(); +} + SUITE(extraction) { /* Initialize extraction library */ cbm_init(); + /* InterSystems ObjectScript (UDL / routine / Export XML). + * NOTE: the *_udl / *_routine / ensemble / self-call / type-inference / + * macro-expand tests require the tree-sitter-objectscript grammar to be + * linked. The grammar is vendored separately; until then these tests will + * fail to link. The pure cbm_iris_export_to_udl transcoder and macro_table + * unit assertions are grammar-free. */ + RUN_TEST(objectscript_udl_class); + RUN_TEST(objectscript_udl_methods_after_goto_label); + RUN_TEST(objectscript_udl_methods); + RUN_TEST(objectscript_udl_base_classes); + RUN_TEST(objectscript_udl_multiple_bases); + RUN_TEST(objectscript_udl_properties); + RUN_TEST(objectscript_routine_tags); + RUN_TEST(objectscript_udl_query_member); + RUN_TEST(objectscript_udl_index_member); + RUN_TEST(objectscript_udl_xdata_member); + RUN_TEST(objectscript_udl_trigger_member); + RUN_TEST(objectscript_udl_trigger_body_quit); + RUN_TEST(objectscript_udl_trigger_body_tokens); + RUN_TEST(objectscript_udl_ensemble_production_def_parses_items); + RUN_TEST(objectscript_udl_ensemble_production_def_hs_settings); + RUN_TEST(objectscript_udl_ensemble_production_def_absent_no_error); + RUN_TEST(objectscript_udl_self_call_relative_dot_method); + RUN_TEST(objectscript_udl_calls_typed_new); + RUN_TEST(objectscript_udl_calls_typed_param); + RUN_TEST(objectscript_udl_calls_typed_property); + RUN_TEST(objectscript_macro_expand_system); + RUN_TEST(objectscript_macro_expand_local); + RUN_TEST(objectscript_macro_constant_no_extra_call); + RUN_TEST(objectscript_udl_method_return_type); + RUN_TEST(objectscript_udl_scalar_return_type_not_resolved); + RUN_TEST(objectscript_data_flows_class_method_args); + RUN_TEST(objectscript_data_flows_instance_method_args); + RUN_TEST(iris_export_xml_simple_class); + RUN_TEST(iris_export_xml_classmethod); + RUN_TEST(iris_export_xml_property_parameter_index); + RUN_TEST(iris_export_xml_calls_extracted); + RUN_TEST(iris_export_xml_multi_class); + /* R box-module imports + member calls */ RUN_TEST(extract_r_box_use_imports_issue218); RUN_TEST(extract_r_dollar_call_issue219);