From 0c95cd4a98a28d63cbe7ea6339fa0544df32ef2b Mon Sep 17 00:00:00 2001
From: Mahuwa-Barman <mahuwa.barman@oracle.com>
Date: Thu, 18 Dec 2025 18:29:02 +0530
Subject: [PATCH] JCS-15031 refactor ds discovery

---
 bin/owm.sh                |  31 ++-
 lib/python/discover_ds.py | 410 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 436 insertions(+), 5 deletions(-)
 create mode 100644 lib/python/discover_ds.py
diff --git a/bin/owm.sh b/bin/owm.sh
index 1a4b2f8..5bef454 100755
--- a/bin/owm.sh
+++ b/bin/owm.sh
@@ -154,13 +154,34 @@ discover_infra_remote(){
 }
 
 process_datasources(){
-  local wls_inventory_file=$1
-  SCRIPT_PATH="$toolHome/bin/discoverDatasources.sh"
-  discover "local" "$SCRIPT_PATH" "-model_file $wls_inventory_file"
+  # Input file as a parameter to the function
+  local wls_inventory_file="$1"
+
+  # Validate input file
+  if [[ -f "$wls_inventory_file" ]]; then
+      # If the file exists, use it
+      input_file="$wls_inventory_file"
+  elif [[ -f "$toolHome/out/$wls_inventory_file" ]]; then
+      # If not found directly, check in the tool's output directory
+      input_file="$toolHome/out/$wls_inventory_file"
+  else
+      # Log an error if the file is not found
+      log "error" "<discoverDomain><process_datasources><error> Model file [$wls_inventory_file] not found in [$toolHome/out]. Exiting."
+      exit 2
+  fi
+
+  # Path to the Python script for discovering datasources
+  PYTHON_SCRIPT="$toolHome/lib/python/discover_ds.py"
+
+  # Log the start of Python script execution
+  log "info" "Executing discover_ds.py on [$input_file]"
+  python3 "$PYTHON_SCRIPT" \
+          --input_model "$input_file" \
+          --env_file "$ON_PREM_ENV_FILE"
   exit_code=$?
-  log "info" "Executed discoverDatasources.sh with exit code [$exit_code]"
+  log "info" "Executed discover_ds.py with exit code [$exit_code]"
   if [ $exit_code -ne 0 ] && [ $exit_code -ne 1 ]; then
-      log "error" "<discoverDomain><process_datasources><error> Error executing datasource discovery"
+      log "error" "<discoverDomain><process_datasources><error> Error executing datasource discovery with exit code [$exit_code]."
       exit $exit_code
   fi
   log "info" "<discoverDomain><process_datasources><exit>"
diff --git a/lib/python/discover_ds.py b/lib/python/discover_ds.py
new file mode 100644
index 0000000..d8eb169
--- /dev/null
+++ b/lib/python/discover_ds.py
@@ -0,0 +1,410 @@
+"""
+Copyright (c) 2025, Oracle Corporation and/or its affiliates.
+Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
+
+This script processes a WebLogic Deploy Tooling (WDT) model file and extracts
+unique JDBC connection URLs from all JDBCSystemResource entries. It replicates
+the behavior of the WDT discoverDomain functionality ***without requiring any
+Oracle/WLS imports*** and ***without using any third-party Python libraries***.
+
+The script:
+  - Loads a WDT model (JSON or YAML)
+  - Extracts JDBC URLs from JDBCSystemResource/JDBCDriverParams
+  - Ensures uniqueness and order preservation
+  - Reads bucket_name from on-prem.env
+  - Renders Mustache templates (pure-Python)
+  - Outputs Terraform-compatible files
+
+Usage:
+  python discover_ds.py --input_model <Path_to_discovered.json> --env_file <path_to_on_prem_env_file>
+
+Arguments:
+  --input_model    Path to input discovered.json
+  --env_file       Path to the on-prem.env file
+
+Output:
+  The script generates Terraform-compatible files in the "oci/generated" directory.
+  The output includes:
+    - db-connection-string.auto.tfvars
+    - schema.yaml
+    - locals-db-connection-string.tf
+    - data-oci-db-resources.tf
+    - variables-db-connection-string.tf
+"""
+
+import os
+import json
+import yaml
+from collections import OrderedDict
+import re
+import argparse
+
+# Resolve toolHome from script location
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+TOOL_HOME = os.path.abspath(os.path.join(SCRIPT_DIR, "..", ".."))
+
+# Templates directory (toolHome-relative)
+TEMPLATE_DIR = os.path.join(TOOL_HOME, "lib", "python", "templates")
+
+# ---------------------------------------------------------
+# Pure-Python Mustache-like template renderer
+# ---------------------------------------------------------
+
+def render_template_string(template, context):
+    """
+    YAML-safe Mustache-like template renderer in pure Python.
+
+    This function renders a template string using a simplified Mustache-style
+    syntax, suitable for generating YAML or Terraform-compatible files.
+    It preserves indentation, removes standalone section lines, supports nested
+    sections, and collapses excessive blank lines.
+
+    Supported syntax:
+      - {{var}}        → Replaces with `context['var']` (escaped/str)
+      - {{{var}}}      → Replaces with `context['var']` literally (unescaped)
+      - {{#section}} ... {{/section}} → Section rendering:
+          * Boolean: renders block if True; skips if False
+          * String: renders block if "true" (case-insensitive); skips if "false"
+          * List of dicts: renders block for each item, merging item keys into context
+          * Nested sections are supported recursively
+
+    Key behaviors:
+      - Preserves indentation within sections
+      - Removes standalone section lines if the section evaluates to empty
+      - Multiple passes to handle nested sections
+      - Cleans up excessive blank lines for YAML safety
+
+    Arguments:
+        template (str): Raw template string containing Mustache-like placeholders.
+        context (dict): Dictionary containing variables and section values for rendering.
+
+    Returns:
+        str: Fully rendered template string, ready for YAML/Terraform use.
+    """
+
+    # Regex to match sections including indentation:
+    #   ^[ \t]*           → leading whitespace (indentation) is captured
+    #   {{#(\w+)}}        → section start, captures section name (\w+)
+    #   \s*\n             → allows optional spaces and newline after section start
+    #   (.*?)             → non-greedy capture of all content inside the section
+    #   \n[ \t]*{{/\1}}   → matches section end with same indentation
+    #   \s*$              → optional trailing whitespace until end of line
+    section_re = re.compile(
+        r'(?m)^[ \t]*{{#(\w+)}}\s*\n(.*?)\n[ \t]*{{/\1}}\s*$',
+        re.DOTALL
+    )
+
+    def render_section(match):
+        """
+        Render a single Mustache section ({{#section}} ... {{/section}}).
+
+        Args:
+            match: regex match object for the section
+
+        Returns:
+            str: rendered section text or empty string if section evaluates to False/empty
+        """
+        section = match.group(1)
+        block = match.group(2)
+        value = context.get(section)
+
+        # Boolean section: render block if True
+        if isinstance(value, bool):
+            return render_template_string(block, context) if value else ""
+
+        # String section: render block if value is "true" (case-insensitive)
+        if isinstance(value, str):
+            return render_template_string(block, context) if value.lower() == "true" else ""
+
+        # List of dicts: render block for each item, merging context
+        if isinstance(value, list):
+            rendered = []
+            for item in value:
+                sub_ctx = context.copy()
+                if isinstance(item, dict):
+                    sub_ctx.update(item)
+                rendered.append(render_template_string(block, sub_ctx))
+            return "\n".join(rendered)
+
+        # If section not found or unsupported type, return empty string
+        return ""
+
+    # Recursively resolve all sections until template no longer changes
+    prev = None
+    while prev != template:
+        prev = template
+        template = section_re.sub(render_section, template)
+
+    # Replace triple braces {{{ var }}} literally
+    template = re.sub(
+        r'{{{\s*(\w+)\s*}}}',
+        lambda m: str(context.get(m.group(1), "")),
+        template
+    )
+
+    # Replace double braces {{ var }} normally
+    template = re.sub(
+        r'{{\s*(\w+)\s*}}',
+        lambda m: str(context.get(m.group(1), "")),
+        template
+    )
+
+    # Collapse excessive blank lines to at most two
+    template = re.sub(r'\n{3,}', '\n\n', template)
+
+    # Ensure final output ends with a single newline
+    return template.rstrip() + "\n"
+
+
+def render_template_file(template_path, output_path, context):
+    """
+    Read a .mustache template from file, render it using context,
+    and write the output to the final path.
+
+    Arguments:
+        template_path (str): Path to template file
+        output_path (str): Path to write rendered output
+        context (dict): Variables for rendering
+    """
+
+    with open(template_path, "r") as f:
+        tmpl = f.read()
+
+    rendered = render_template_string(tmpl, context)
+
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+    with open(output_path, "w") as f:
+        f.write(rendered)
+
+
+# ---------------------------------------------------------
+# Load model
+# ---------------------------------------------------------
+
+def load_model(path):
+    """
+    Load a WDT model file (YAML or JSON).
+
+    Arguments:
+        path (str): Path to the WDT model file.
+
+    Returns:
+        dict: Parsed WDT model.
+    """
+    if path.lower().endswith((".yaml", ".yml")):
+        return yaml.safe_load(open(path))
+    return json.load(open(path))
+
+
+# ---------------------------------------------------------
+# Read bucket_name from on-prem.env
+# ---------------------------------------------------------
+
+def get_bucket_name(env_file):
+    """
+    Read bucket_name and skip_transfer behavior from the provided on-prem.env file.
+
+    Follows WDT tool logic:
+      - If skip_transfer=true → return empty bucket name
+      - Otherwise return bucket_name value
+
+    Arguments:
+        env_file (str): Path to the on-prem.env file
+
+    Returns:
+        str: bucket_name or "" if skip_transfer is enabled
+    """
+
+    if not os.path.exists(env_file):
+        return ""
+
+    bucket = ""
+    skip = False
+
+    with open(env_file, "r") as f:
+        for line in f:
+            if "=" not in line or line.startswith("#"):
+                continue
+
+            k, v = [x.strip().strip('"').strip("'") for x in line.split("=", 1)]
+
+            if k == "skip_transfer" and v.lower() in ("true", "1", "yes"):
+                skip = True
+            elif k == "bucket_name":
+                bucket = v
+
+    return "" if skip else bucket
+
+
+# ---------------------------------------------------------
+# WDT-accurate JDBC URL extraction
+# ---------------------------------------------------------
+
+def extract_jdbc_urls_from_driverparams(params):
+    """
+    Extract JDBC connection URL(s) from a JDBCDriverParams block.
+
+    Supports:
+        URL
+        Url
+        url
+
+    Arguments:
+        params (dict): JDBCDriverParams object
+
+    Returns:
+        list[str]: JDBC URLs
+    """
+    if not params:
+        return []
+
+    for key in ("URL", "Url", "url"):
+        if key in params:
+            val = params[key]
+
+            if isinstance(val, list):
+                return [v for v in val if isinstance(v, str)]
+
+            if isinstance(val, str):
+                return [val]
+
+            return []
+
+    return []
+
+
+def extract_datasources(model):
+    """
+    Extract all JDBC datasources from the WDT model.
+
+    Arguments:
+        model (dict): Loaded WDT model
+
+    Returns:
+        dict with:
+            datasources: list of {datasourceName, datasourceUrl}
+            hasDatasources: bool
+            is_mds: "true"/"false"
+    """
+
+    resources = model.get("resources", {})
+    jdbc_sys = resources.get("JDBCSystemResource", {})
+
+    unique_urls = OrderedDict()
+    is_mds = False
+
+    for name, ds in jdbc_sys.items():
+        jdbc_res = ds.get("JdbcResource", {})
+        driver_params = jdbc_res.get("JDBCDriverParams", {})
+        data_params = jdbc_res.get("JDBCDataSourceParams", {})
+
+        if "DataSourceList" in data_params:
+            is_mds = True
+
+        urls = extract_jdbc_urls_from_driverparams(driver_params)
+        for u in urls:
+            unique_urls[u] = True
+
+    entries = [
+        {"datasourceName": str(i), "url": url}
+        for i, url in enumerate(unique_urls.keys())
+    ]
+
+    return {
+        "datasources": entries,
+        "hasDatasources": bool(entries),
+        "is_mds": "true" if is_mds else "false"
+    }
+
+
+# ---------------------------------------------------------
+# GENERIC PLACEHOLDER NORMALIZATION
+# ---------------------------------------------------------
+
+def normalize_placeholders(obj):
+    """
+    Replace WDT/ORM placeholders like {value} with empty string.
+    Applied recursively to dicts/lists.
+    """
+    if isinstance(obj, dict):
+        return {k: normalize_placeholders(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [normalize_placeholders(v) for v in obj]
+    if isinstance(obj, str):
+        if re.fullmatch(r"\{[^}]+\}", obj):
+            return ""
+        return obj
+    return obj
+
+# ---------------------------------------------------------
+# Main
+# ---------------------------------------------------------
+
+def main(input_model, env_file):
+    """
+    Main orchestration function.
+
+    Arguments:
+        input_model (str): Path to WDT model
+        env_file (str): Path to on-prem.env file
+    """
+
+    # Always use the fixed output directory
+    out_dir = os.path.join(TOOL_HOME, "oci", "generated")
+    os.makedirs(out_dir, exist_ok=True)
+
+    # Load WDT model
+    model = load_model(input_model)
+
+    # Read bucket name
+    bucket = get_bucket_name(env_file)
+
+    # Extract datasource URLs
+    ds_info = extract_datasources(model)
+    ds_info["oci_bucket_name"] = bucket
+
+    # Templates to generate → matching WDT structure
+    template_map = {
+        "db-connection-string.auto.tfvars": "db-connection-string.auto.tfvars.mustache",
+        "schema.yaml": "schema.yaml.mustache",
+        "locals-db-connection-string.tf": "locals-db-connection-string.tf.mustache",
+        "data-oci-db-resources.tf": "data-oci-db-resources.tf.mustache",
+        "variables-db-connection-string.tf": "variables-db-connection-string.tf.mustache"
+    }
+
+    # Render each mustache template into output files
+    for outfile, tmpl in template_map.items():
+        render_template_file(
+            os.path.join(TEMPLATE_DIR, tmpl),
+            os.path.join(out_dir, outfile),
+            ds_info
+        )
+
+    # Summary output
+    print("\nGenerated Terraform datasource artifacts in:")
+    print(os.path.abspath(out_dir))
+
+    print("\nDiscovered JDBC URLs:")
+    for d in ds_info["datasources"]:
+        print("  -", d["datasourceUrl"])
+
+
+# ---------------------------------------------------------
+# CLI entry
+# ---------------------------------------------------------
+
+if __name__ == "__main__":
+    import argparse
+
+    # Command-line argument parser
+    p = argparse.ArgumentParser(description="WDT-compatible JDBC discovery (no Jinja2)")
+    p.add_argument("--input_model", required=True, help="Path to input discovered.json")
+    p.add_argument("--env_file", required=True, help="Path to the on-prem.env file")
+    args = p.parse_args()
+
+    # Invoke main
+    main(
+        args.input_model,
+        args.env_file
+    )
\ No newline at end of file