diff --git a/.github/dependabot.yml b/.github/dependabot.yml index c0c0eb1..e4a59ab 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,10 +1,79 @@ -# .github/dependabot.yml version: 2 + updates: - package-ecosystem: "pip" - directory: "/" # location of requirements.txt or pyproject.toml - target-branch: "staging" # open PRs against staging instead of main + directory: "/" + target-branch: "staging" schedule: - interval: "weekly" # check for updates once a week - open-pull-requests-limit: 5 # max concurrent Dependabot PRs - rebase-strategy: "auto" # auto-rebase PRs when they fall out of date + interval: "weekly" + day: "monday" + time: "04:00" + timezone: "Etc/UTC" + open-pull-requests-limit: 5 + rebase-strategy: "auto" + labels: + - "dependencies" + - "python" + commit-message: + prefix: "deps" + include: "scope" + groups: + python-runtime: + patterns: + - "networkx" + - "pandas" + - "rdkit" + - "regex" + - "requests" + - "scikit-learn" + - "seaborn" + python-optional: + patterns: + - "numpy" + - "sympy" + - "torch" + docs: + patterns: + - "sphinx*" + - "pydata-sphinx-theme" + - "sphinx-rtd-theme" + - "graphviz" + - "myst-parser" + + - package-ecosystem: "github-actions" + directory: "/" + target-branch: "staging" + schedule: + interval: "weekly" + day: "monday" + time: "04:30" + timezone: "Etc/UTC" + open-pull-requests-limit: 5 + rebase-strategy: "auto" + labels: + - "dependencies" + - "github-actions" + commit-message: + prefix: "ci" + include: "scope" + groups: + github-actions: + patterns: + - "*" + + - package-ecosystem: "docker" + directory: "/" + target-branch: "staging" + schedule: + interval: "weekly" + day: "monday" + time: "05:00" + timezone: "Etc/UTC" + open-pull-requests-limit: 2 + rebase-strategy: "auto" + labels: + - "dependencies" + - "docker" + commit-message: + prefix: "deps" + include: "scope" diff --git a/.github/workflows/conda-forge-publish.yml b/.github/workflows/conda-forge-publish.yml index d6e1bf8..e0cb9fd 100644 --- a/.github/workflows/conda-forge-publish.yml +++ b/.github/workflows/conda-forge-publish.yml @@ -21,13 +21,14 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: + miniconda-version: "latest" channels: conda-forge auto-update-conda: true auto-activate-base: true @@ -53,13 +54,14 @@ jobs: pkg_paths: ${{ steps.build.outputs.paths }} steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 0 - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: + miniconda-version: "latest" channels: conda-forge auto-update-conda: true auto-activate-base: true diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 363e899..18b04c8 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -20,16 +20,16 @@ jobs: steps: - name: Check out repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up QEMU (optional, for multi‑arch) uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Log in to Docker Hub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USER }} password: ${{ secrets.DOCKERHUB_TOKEN }} diff --git a/.github/workflows/test-and-lint.yml b/.github/workflows/test-and-lint.yml index d18ff2a..52934be 100644 --- a/.github/workflows/test-and-lint.yml +++ b/.github/workflows/test-and-lint.yml @@ -2,7 +2,7 @@ name: Test & Lint on: push: - branches: [ "main", "staging", "fix_query" ] + branches: [ "main", "staging", "mech" ] pull_request: branches: [ "main" ] @@ -20,11 +20,11 @@ jobs: steps: # 0) Check out the code - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 # 1) Install Miniconda (downloaded — the “bundled” version was removed) - name: Set up Miniconda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: miniconda-version: "latest" # <<–‑‑ mandatory or the action fails python-version: "3.11" diff --git a/.github/workflows/verify-pypi-install.yml b/.github/workflows/verify-pypi-install.yml index 4306e76..201bc7e 100644 --- a/.github/workflows/verify-pypi-install.yml +++ b/.github/workflows/verify-pypi-install.yml @@ -1,65 +1,96 @@ -# .github/workflows/verify-synkit-pypi-install.yml -name: Verify SynKit PyPI install +name: Verify PyPI install on: workflow_dispatch: inputs: - branches: + package-version: + description: "Optional exact SynKit version to install, for example 1.4.0" + required: false type: string - required: true - default: refractor - - # Scheduled test every Monday at 03:00 UTC schedule: - - cron: '0 3 * * 1' + - cron: "0 3 * * 1" + +permissions: + contents: read + +concurrency: + group: verify-pypi-install-${{ github.event_name }}-${{ github.event.inputs['package-version'] || 'latest' }} + cancel-in-progress: false jobs: verify: - name: Verify PyPI install on ${{ matrix.os }} + name: ${{ matrix.os }} / Python ${{ matrix.python-version }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ["3.11", "3.12"] steps: - - name: Setup Python - uses: actions/setup-python@v4 + - name: Set up Python + uses: actions/setup-python@v6 with: - python-version: '3.x' + python-version: ${{ matrix.python-version }} + cache: "pip" - - name: Create & activate virtualenv, upgrade pip, install SynKit + - name: Install SynKit from PyPI + shell: bash run: | - python -m venv venv - source venv/bin/activate python -m pip install --upgrade pip - pip install synkit[all] - - name: Show installed SynKit version + version="${{ github.event.inputs['package-version'] }}" + if [ -n "$version" ]; then + python -m pip install "synkit==$version" + else + python -m pip install synkit + fi + python -m pip install packaging + + - name: Show installed package metadata + shell: bash run: | - source venv/bin/activate - python -c "import importlib.metadata as m; print('SynKit version:', m.version('synkit'))" + python - <<'PY' + import importlib.metadata as metadata + import sys + + print("Python:", sys.version) + print("SynKit:", metadata.version("synkit")) + PY - - name: Write smoke-test script + - name: Run import and smoke tests + shell: bash run: | - cat << 'EOF' > test_synkit.py - from synkit.IO import rsmi_to_rsmarts + python - <<'PY' + import importlib.metadata as metadata - template = ( - '[C:2]=[O:3].[C:4]([H:7])[H:8]' - '>>' - '[C:2]=[C:4].[O:3]([H:7])[H:8]' - ) + from packaging.version import Version - smart = rsmi_to_rsmarts(template) - print("Reaction SMARTS:", smart) - EOF + from synkit.IO import rsmi_to_its, rsmi_to_rsmarts - - name: Run smoke-test - run: | - source venv/bin/activate - python test_synkit.py + rsmi = "[CH3:1][Cl:2].[NH3:3]>>[CH3:1][NH3+:3].[Cl-:2]" + smarts = rsmi_to_rsmarts(rsmi) + assert ">>" in smarts + + version = Version(metadata.version("synkit")) + if version >= Version("1.4.0"): + import networkx as nx + + from synkit.Graph.MTG.mtg import MTG + + its = rsmi_to_its(rsmi, core=False, format="tuple") + assert isinstance(its, nx.Graph) + assert not any("typesGH" in attrs for _, attrs in its.nodes(data=True)) + + mtg_steps = [ + "[CH3:1][Cl:2].[NH3:3]>>[CH3:1][NH3+:3].[Cl-:2]", + "[CH3:1][NH3+:3].[Cl-:2]>>[CH3:1][NH2:3].[Cl:2][H]", + ] + mtg = MTG(mtg_steps, mcs_mol=True) + graph = mtg.get_mtg() + assert mtg._tuple_its + assert graph.number_of_nodes() > 0 - - name: Success message - run: echo "✅ synkit[all] installed and smoke-test passed" + print("PyPI SynKit smoke tests passed.") + PY diff --git a/.gitignore b/.gitignore index 914dfa2..00269bf 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,8 @@ synkit/Graph/dev/* *.json *.txt *.log +sprint/ +test_syn.py +.gitignore +measure_candidate_stages.py +run_valid_bug_cases.py diff --git a/Test/CRN/Props/test_dynamics.py b/Test/CRN/Props/test_dynamics.py index b01b4b6..ac7ebfa 100644 --- a/Test/CRN/Props/test_dynamics.py +++ b/Test/CRN/Props/test_dynamics.py @@ -206,6 +206,9 @@ def test_jacobian_pattern_bipartite(self) -> None: self.assertTrue(B.has_edge("row:1", "col:1")) +@unittest.skipUnless( + dynamics._SYMPY_AVAILABLE, "sympy is required for symbolic dynamics tests" +) class TestDynamicsMatrices(unittest.TestCase): def test_symbolic_reactivity_matrix_cycle(self) -> None: G = make_cycle_crn() @@ -429,6 +432,9 @@ def test_to_dict_and_str(self) -> None: self.assertIn("classification", text) +@unittest.skipUnless( + dynamics._SYMPY_AVAILABLE, "sympy is required for exact singularity tests" +) class TestStructuralSingularitySummary(unittest.TestCase): def test_single_step_is_singular_by_pattern(self) -> None: G = make_single_step_crn() diff --git a/Test/Graph/FG/__init__.py b/Test/Graph/FG/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Test/Graph/FG/__init__.py @@ -0,0 +1 @@ + diff --git a/Test/Graph/FG/test_api.py b/Test/Graph/FG/test_api.py new file mode 100644 index 0000000..c71f73e --- /dev/null +++ b/Test/Graph/FG/test_api.py @@ -0,0 +1,24 @@ +import pytest + +from synkit.Graph.FG import smiles_to_graph_and_functional_groups + + +def test_smiles_to_graph_and_functional_groups_supports_unmapped_smiles(): + graph, groups = smiles_to_graph_and_functional_groups("CC(=O)O") + + assert tuple(graph.nodes) == (1, 2, 3, 4) + assert groups == [("carboxylic_acid", (2, 3, 4))] + + +def test_smiles_to_graph_and_functional_groups_preserves_atom_map_node_ids(): + graph, groups = smiles_to_graph_and_functional_groups( + "[CH3:10][C:20](=[O:30])[OH:40]" + ) + + assert tuple(graph.nodes) == (10, 20, 30, 40) + assert groups == [("carboxylic_acid", (20, 30, 40))] + + +def test_smiles_to_graph_and_functional_groups_rejects_invalid_smiles(): + with pytest.raises(ValueError): + smiles_to_graph_and_functional_groups("not_smiles") diff --git a/Test/Graph/FG/test_audit.py b/Test/Graph/FG/test_audit.py new file mode 100644 index 0000000..2b25cb9 --- /dev/null +++ b/Test/Graph/FG/test_audit.py @@ -0,0 +1,21 @@ +from synkit.Graph.FG.audit import audit_reaction_smiles + + +def test_audit_reaction_smiles_summarizes_small_corpus(): + report = audit_reaction_smiles( + [ + "CCO>>CC=O", + "c1ncnnc1>>c1ncnnc1", + ] + ) + + assert report.reactions == 2 + assert report.molecules == 4 + assert report.parse_failures == 0 + assert report.label_counts["primary_alcohol"] == 1 + assert report.label_counts["aldehyde"] == 1 + assert report.label_counts["heteroaromatic_ring"] == 2 + assert report.label_counts["triazine"] == 2 + assert report.heteroaromatic_systems == 2 + assert report.named_heteroaromatic_systems == 2 + assert report.unnamed_heteroaromatic_count == 0 diff --git a/Test/Graph/FG/test_detector.py b/Test/Graph/FG/test_detector.py new file mode 100644 index 0000000..5c6777e --- /dev/null +++ b/Test/Graph/FG/test_detector.py @@ -0,0 +1,288 @@ +import pytest +from rdkit import Chem + +from synkit.Graph.FG import FunctionalGroupDetector +from synkit.IO.mol_to_graph import MolToGraph + + +def _detect(smiles: str): + mol = Chem.MolFromSmiles(smiles) + graph = MolToGraph().transform(mol) + return FunctionalGroupDetector().detect(graph) + + +@pytest.mark.parametrize( + "smiles, expected", + [ + ("C=O", [("aldehyde", (1, 2))]), + ("C(=O)N", [("amide", (1, 2, 3))]), + ("CC(=O)O", [("carboxylic_acid", (2, 3, 4))]), + ("COC(C)=O", [("ester", (2, 3, 5))]), + ("NCC(=O)O", [("amine", (1,)), ("carboxylic_acid", (3, 4, 5))]), + ("CCSCC", [("thioether", (3,))]), + ("CSC(=O)c1ccccc1", [("thioester", (2, 3, 4))]), + ( + "O=C(C)Oc1ccccc1C(=O)O", + [("ester", (1, 2, 4)), ("carboxylic_acid", (11, 12, 13))], + ), + ("CC(C)(C)OO", [("peroxide", (5, 6))]), + ("CC(=O)OO", [("peroxy_acid", (2, 3, 4, 5))]), + ("CCO", [("primary_alcohol", (2, 3))]), + ("CC(C)O", [("secondary_alcohol", (2, 4))]), + ("CC(C)(C)O", [("tertiary_alcohol", (2, 5))]), + ("C=CO", [("enol", (1, 2, 3))]), + ("C1CO1", [("epoxide", (1, 2, 3))]), + ("c1ccccc1O", [("phenol", (7,))]), + ("c1ccccc1N", [("aniline", (7,))]), + ("CC(=O)Cl", [("acyl_chloride", (2, 3, 4))]), + ("CC#N", [("nitrile", (2, 3))]), + ("CN=O", [("nitroso", (2, 3))]), + ("C[N+](=O)[O-]", [("nitro", (2, 3, 4))]), + ("COC(=O)N", [("carbamate", (2, 3, 4, 5))]), + ("CC(=O)OC(C)=O", [("anhydride", (2, 3, 4, 5, 7))]), + ("COC(O)(C)C", [("hemiketal", (2, 3, 4))]), + ("CO[CH](O)C", [("hemiacetal", (2, 3, 4))]), + ("COC(OC)(C)C", [("ketal", (2, 3, 4))]), + ("CO[CH](OC)C", [("acetal", (2, 3, 4))]), + ( + "n1ccccc1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5, 6)), + ("pyridine", (1, 2, 3, 4, 5, 6)), + ], + ), + ( + "[nH]1cccc1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("pyrrole", (1, 2, 3, 4, 5)), + ], + ), + ( + "Cn1cccc1", + [ + ("heteroaromatic_ring", (2, 3, 4, 5, 6)), + ("pyrrole", (2, 3, 4, 5, 6)), + ], + ), + ( + "o1cccc1", + [ + ("furan", (1, 2, 3, 4, 5)), + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ], + ), + ( + "s1cccc1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("thiophene", (1, 2, 3, 4, 5)), + ], + ), + ( + "n1ccncc1", + [ + ("diazine", (1, 2, 3, 4, 5, 6)), + ("heteroaromatic_ring", (1, 2, 3, 4, 5, 6)), + ], + ), + ( + "c1ncc[nH]1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("imidazole", (1, 2, 3, 4, 5)), + ], + ), + ( + "c1cn[nH]c1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("pyrazole", (1, 2, 3, 4, 5)), + ], + ), + ( + "c1ccc2[nH]cnc2c1", + [ + ("benzimidazole", (1, 2, 3, 4, 5, 6, 7, 8, 9)), + ("heteroaromatic_ring", (1, 2, 3, 4, 5, 6, 7, 8, 9)), + ("imidazole", (4, 5, 6, 7, 8)), + ], + ), + ( + "c1ccc2[nH]ccc2c1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5, 6, 7, 8, 9)), + ("indole", (1, 2, 3, 4, 5, 6, 7, 8, 9)), + ("pyrrole", (4, 5, 6, 7, 8)), + ], + ), + ( + "c1ccc2[nH]ncc2c1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5, 6, 7, 8, 9)), + ("indazole", (1, 2, 3, 4, 5, 6, 7, 8, 9)), + ("pyrazole", (4, 5, 6, 7, 8)), + ], + ), + ( + "c1ccc2ocnc2c1", + [ + ("benzoxazole", (1, 2, 3, 4, 5, 6, 7, 8, 9)), + ("heteroaromatic_ring", (1, 2, 3, 4, 5, 6, 7, 8, 9)), + ("oxazole", (4, 5, 6, 7, 8)), + ], + ), + ( + "c1ccc2scnc2c1", + [ + ("benzothiazole", (1, 2, 3, 4, 5, 6, 7, 8, 9)), + ("heteroaromatic_ring", (1, 2, 3, 4, 5, 6, 7, 8, 9)), + ("thiazole", (4, 5, 6, 7, 8)), + ], + ), + ( + "c1nccs1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("thiazole", (1, 2, 3, 4, 5)), + ], + ), + ( + "c1ccns1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("isothiazole", (1, 2, 3, 4, 5)), + ], + ), + ( + "c1ncco1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("oxazole", (1, 2, 3, 4, 5)), + ], + ), + ( + "c1ccon1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("isoxazole", (1, 2, 3, 4, 5)), + ], + ), + ( + "c1n[nH]cn1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("triazole", (1, 2, 3, 4, 5)), + ], + ), + ( + "c1nocn1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("oxadiazole", (1, 2, 3, 4, 5)), + ], + ), + ( + "c1nn[nH]n1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("tetrazole", (1, 2, 3, 4, 5)), + ], + ), + ( + "c1nscn1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5)), + ("thiadiazole", (1, 2, 3, 4, 5)), + ], + ), + ( + "c1ncnnc1", + [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5, 6)), + ("triazine", (1, 2, 3, 4, 5, 6)), + ], + ), + ("CCCl", [("organohalide", (2, 3))]), + ("c1ccccc1Cl", [("aryl_halide", (6, 7))]), + ("CS(=O)C", [("sulfoxide", (2, 3))]), + ("CS(=O)(=O)C", [("sulfone", (2, 3, 4))]), + ("CS(=O)(=O)N", [("sulfonamide", (2, 3, 4, 5))]), + ("OB(O)c1ccccc1", [("boronic_acid", (1, 2, 3))]), + ("B(OC)(OC)c1ccccc1", [("boronate_ester", (1, 2, 4))]), + ("CO[Si](C)(C)C", [("silyl_ether", (2, 3))]), + ("COP(=O)(OC)OC", [("phosphate", (2, 3, 4, 5, 7))]), + ("CP(=O)(OC)OC", [("phosphonate", (1, 2, 3, 4, 6))]), + ("CP(=O)(C)C", [("phosphine_oxide", (1, 2, 3, 4, 5))]), + ("COP(OC)OC", [("phosphite", (2, 3, 4, 6))]), + ("O=C=Nc1ccccc1", [("isocyanate", (1, 2, 3))]), + ("ON=Cc1ccccc1", [("oxime", (1, 2, 3))]), + ("CNN=C(C)C", [("hydrazone", (2, 3, 4))]), + ("CC=NC", [("imine", (2, 3))]), + ("N=C(N)c1ccccc1", [("amidine", (1, 2, 3))]), + ("NC(=NO)c1ccccc1", [("amidoxime", (1, 2, 3, 4))]), + ("CN=[N+]=[N-]", [("azide", (2, 3, 4))]), + ("c1ccccc1N=Nc1ccccc1", [("azo", (7, 8))]), + ("S=C=Nc1ccccc1", [("isothiocyanate", (1, 2, 3))]), + ("NC(=S)N", [("thiourea", (1, 2, 3, 4))]), + ("CC(=S)N", [("thioamide", (2, 3, 4))]), + ], +) +def test_detects_compatibility_groups(smiles, expected): + assert _detect(smiles) == expected + + +def test_raw_matches_keep_parent_before_resolution(): + mol = Chem.MolFromSmiles("CC(=O)O") + graph = MolToGraph().transform(mol) + detector = FunctionalGroupDetector() + + raw_names = {match.name for match in detector.raw_matches(graph)} + assert {"carbonyl", "carboxylic_acid"}.issubset(raw_names) + assert detector.detect(graph) == [("carboxylic_acid", (2, 3, 4))] + + +def test_internal_prerequisite_patterns_do_not_leak_into_public_results(): + mol = Chem.MolFromSmiles("COC") + graph = MolToGraph().transform(mol) + detector = FunctionalGroupDetector() + + assert {match.name for match in detector.raw_matches(graph)} == {"ether"} + assert { + match.name for match in detector.raw_matches(graph, include_internal=True) + } == {"ether", "oxygen_link"} + + +def test_water_is_not_alcohol(): + assert _detect("O") == [] + + +def test_heteroaromatic_ring_suppresses_generic_amine(): + assert _detect("n1ccccc1") == [ + ("heteroaromatic_ring", (1, 2, 3, 4, 5, 6)), + ("pyridine", (1, 2, 3, 4, 5, 6)), + ] + + +def test_diazine_keeps_generic_heteroaromatic_coverage(): + assert _detect("n1ccncc1") == [ + ("diazine", (1, 2, 3, 4, 5, 6)), + ("heteroaromatic_ring", (1, 2, 3, 4, 5, 6)), + ] + + +@pytest.mark.parametrize( + "implicit_smiles, explicit_smiles, expected", + [ + ("CCO", "[CH3][CH2][OH]", [("primary_alcohol", (2, 3))]), + ("C=O", "[CH2]=O", [("aldehyde", (1, 2))]), + ("CC(=O)O", "[CH3][C](=O)[OH]", [("carboxylic_acid", (2, 3, 4))]), + ], +) +def test_implicit_and_explicit_hydrogen_inputs_agree( + implicit_smiles, + explicit_smiles, + expected, +): + assert _detect(implicit_smiles) == expected + assert _detect(explicit_smiles) == expected diff --git a/Test/Graph/FG/test_ring_system.py b/Test/Graph/FG/test_ring_system.py new file mode 100644 index 0000000..6af5b9a --- /dev/null +++ b/Test/Graph/FG/test_ring_system.py @@ -0,0 +1,42 @@ +from rdkit import Chem + +from synkit.Graph.FG.ring_system import AromaticRingSystemDetector +from synkit.IO.mol_to_graph import MolToGraph + + +def _systems(smiles: str): + mol = Chem.MolFromSmiles(smiles) + graph = MolToGraph().transform(mol) + return AromaticRingSystemDetector.detect(graph) + + +def test_detects_single_heteroaromatic_ring(): + system = _systems("n1ccccc1")[0] + + assert system.nodes == (1, 2, 3, 4, 5, 6) + assert system.hetero_nodes == (1,) + assert system.element_counts == {"C": 5, "N": 1} + assert system.ring_sizes == (6,) + assert system.is_fused is False + assert system.hetero_pattern == "1N-6ring" + + +def test_detects_multi_hetero_ring(): + system = _systems("n1ccncc1")[0] + + assert system.element_counts == {"C": 4, "N": 2} + assert system.ring_sizes == (6,) + assert system.hetero_pattern == "2N-6ring" + + +def test_detects_fused_aromatic_system(): + system = _systems("c1ccc2ncccc2c1")[0] + + assert system.element_counts == {"C": 9, "N": 1} + assert system.ring_sizes == (6, 6) + assert system.is_fused is True + assert system.hetero_sequence is None + assert tuple(ring.nodes for ring in system.subrings) == ( + (1, 2, 3, 4, 9, 10), + (4, 5, 6, 7, 8, 9), + ) diff --git a/Test/Graph/ITS/test_electron_aware_its.py b/Test/Graph/ITS/test_electron_aware_its.py new file mode 100644 index 0000000..69cbc2d --- /dev/null +++ b/Test/Graph/ITS/test_electron_aware_its.py @@ -0,0 +1,206 @@ +import unittest + +import networkx as nx + +from synkit.Graph.Hyrogen._misc import h_to_explicit, normalize_h_pair_graph +from synkit.Graph.ITS.its_construction import ITSConstruction +from synkit.Graph.ITS.its_reverter import ITSReverter +from synkit.Graph.ITS.rc_extractor import RCExtractor + + +class TestElectronAwareITS(unittest.TestCase): + def setUp(self): + self.reactant = nx.Graph() + self.reactant.add_node( + 1, + element="N", + aromatic=False, + hcount=2, + charge=0, + neighbors=["C"], + lone_pairs=1, + radical=0, + valence_electrons=5, + ) + self.reactant.add_node( + 2, + element="C", + aromatic=False, + hcount=3, + charge=0, + neighbors=["N"], + lone_pairs=0, + radical=0, + valence_electrons=4, + ) + self.reactant.add_edge( + 1, + 2, + order=1.0, + kekule_order=1.0, + sigma_order=1.0, + pi_order=0.0, + ) + + self.product = nx.Graph() + self.product.add_node( + 1, + element="N", + aromatic=False, + hcount=1, + charge=0, + neighbors=["C"], + lone_pairs=1, + radical=1, + valence_electrons=5, + ) + self.product.add_node( + 2, + element="C", + aromatic=False, + hcount=3, + charge=0, + neighbors=["N"], + lone_pairs=0, + radical=0, + valence_electrons=4, + ) + self.product.add_edge( + 1, + 2, + order=2.0, + kekule_order=2.0, + sigma_order=1.0, + pi_order=1.0, + ) + + def test_construct_stores_default_electron_pairs(self): + its = ITSConstruction.construct(self.reactant, self.product) + + self.assertEqual(its.nodes[1]["lone_pairs"], (1, 1)) + self.assertEqual(its.nodes[1]["radical"], (0, 1)) + self.assertEqual(its.nodes[1]["valence_electrons"], (5, 5)) + self.assertEqual(its.edges[1, 2]["sigma_order"], (1.0, 1.0)) + self.assertEqual(its.edges[1, 2]["pi_order"], (0.0, 1.0)) + + def test_rc_extractor_marks_radical_change(self): + its = ITSConstruction.construct(self.reactant, self.product) + + rc = RCExtractor().extract(its) + + self.assertIn(1, rc) + self.assertIn("radical", rc.graph["rc"]["node_reasons"][1]) + self.assertEqual(rc.edges[1, 2]["standard_order"], -1.0) + + def test_reverter_restores_electron_fields(self): + its = ITSConstruction.construct(self.reactant, self.product) + reactant, product = ( + ITSReverter(its).to_reactant_graph(), + ITSReverter(its).to_product_graph(), + ) + + self.assertEqual(reactant.nodes[1]["radical"], 0) + self.assertEqual(product.nodes[1]["radical"], 1) + self.assertEqual(reactant.edges[1, 2]["pi_order"], 0.0) + self.assertEqual(product.edges[1, 2]["pi_order"], 1.0) + + def test_normalize_h_pair_graph_supports_named_pair_storage(self): + its = ITSConstruction.construct(self.reactant, self.product) + + normalized = normalize_h_pair_graph(its) + + self.assertEqual(normalized.nodes[1]["hcount"], (1, 0)) + + def test_rc_extraction_survives_named_hcount_normalization(self): + its = ITSConstruction.construct(self.reactant, self.product) + normalized = normalize_h_pair_graph(its) + + before = RCExtractor().extract(its) + after = RCExtractor().extract(normalized) + + self.assertEqual(before.graph["rc"]["nodes"], after.graph["rc"]["nodes"]) + self.assertEqual( + before.graph["rc"]["node_reasons"], + after.graph["rc"]["node_reasons"], + ) + + def test_preserve_full_attrs_exports_unfiltered_rc_snapshots(self): + its = ITSConstruction.construct(self.reactant, self.product) + its.nodes[1]["custom_marker"] = "kept" + + rc = RCExtractor(preserve_full_attrs=True).extract(its) + + self.assertEqual(rc.graph["rc"]["node_attrs"][1]["custom_marker"], "kept") + + def test_reverter_drops_nodes_absent_on_one_side(self): + self.reactant.add_node( + 3, + element="H", + aromatic=False, + hcount=0, + charge=0, + neighbors=["N"], + lone_pairs=0, + radical=0, + valence_electrons=1, + ) + self.reactant.add_edge( + 1, + 3, + order=1.0, + kekule_order=1.0, + sigma_order=1.0, + pi_order=0.0, + ) + + its = ITSConstruction.construct(self.reactant, self.product) + reactant, product = ( + ITSReverter(its).to_reactant_graph(), + ITSReverter(its).to_product_graph(), + ) + + self.assertIn(3, reactant) + self.assertNotIn(3, product) + + def test_h_to_explicit_expands_named_pair_hcounts_by_side(self): + its = ITSConstruction.construct(self.reactant, self.product) + + expanded = h_to_explicit(its, [1], its=True) + hydrogens = [ + node + for node, attrs in expanded.nodes(data=True) + if attrs.get("element") == ("H", "H") + ] + + self.assertEqual(expanded.nodes[1]["hcount"], (0, 0)) + self.assertEqual(len(hydrogens), 2) + self.assertEqual( + {expanded.nodes[node]["present"] for node in hydrogens}, + {(True, True), (True, False)}, + ) + self.assertEqual( + {expanded.edges[1, node]["order"] for node in hydrogens}, + {(1.0, 1.0), (1.0, 0.0)}, + ) + + def test_explicit_tuple_hydrogens_revert_to_correct_side_graphs(self): + its = ITSConstruction.construct(self.reactant, self.product) + expanded = h_to_explicit(its, [1], its=True) + reactant, product = ( + ITSReverter(expanded).to_reactant_graph(), + ITSReverter(expanded).to_product_graph(), + ) + + reactant_hydrogens = [ + node for node, attrs in reactant.nodes(data=True) if attrs["element"] == "H" + ] + product_hydrogens = [ + node for node, attrs in product.nodes(data=True) if attrs["element"] == "H" + ] + + self.assertEqual(len(reactant_hydrogens), 2) + self.assertEqual(len(product_hydrogens), 1) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Graph/ITS/test_its_destruction.py b/Test/Graph/ITS/test_its_destruction.py new file mode 100644 index 0000000..3c77f9b --- /dev/null +++ b/Test/Graph/ITS/test_its_destruction.py @@ -0,0 +1,31 @@ +import unittest + +from synkit.Graph.ITS.its_construction import ITSConstruction +from synkit.Graph.ITS.its_destruction import ITSDestruction +from synkit.IO.chem_converter import rsmi_to_graph + + +class TestITSDestruction(unittest.TestCase): + def test_direct_tuple_mode_preserves_electron_fields(self): + reactant, product = rsmi_to_graph( + "[NH2:1][CH3:2]>>[NH:1]=[CH2:2]", + ) + reactant.nodes[1]["lone_pairs"] = 1 + reactant.nodes[1]["radical"] = 0 + reactant.nodes[1]["valence_electrons"] = 5 + product.nodes[1]["lone_pairs"] = 1 + product.nodes[1]["radical"] = 1 + product.nodes[1]["valence_electrons"] = 5 + + its = ITSConstruction.construct(reactant, product) + left, right = ITSDestruction(its).decompose() + + self.assertEqual(left.nodes[1]["lone_pairs"], 1) + self.assertEqual(right.nodes[1]["radical"], 1) + self.assertEqual(right.nodes[1]["valence_electrons"], 5) + self.assertEqual(left.edges[1, 2]["sigma_order"], 1.0) + self.assertEqual(right.edges[1, 2]["pi_order"], 1.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Graph/MTG/test_group_comp.py b/Test/Graph/MTG/test_group_comp.py deleted file mode 100644 index 90be4bc..0000000 --- a/Test/Graph/MTG/test_group_comp.py +++ /dev/null @@ -1,52 +0,0 @@ -import unittest -from synkit.IO.chem_converter import rsmi_to_its -from synkit.Graph.MTG.groupoid import node_constraint -from synkit.Graph.MTG.group_comp import GroupComp - - -class TestGroupComp(unittest.TestCase): - - def setUp(self) -> None: - test_1 = [ - "[CH:4]([H:7])([H:8])[CH:5]=[O:6]>>[CH:4]([H:8])=[CH:5][O:6]([H:7])", - "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:7])>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]", - ] - self.test_graph_1 = [rsmi_to_its(var) for var in test_1] - test_2 = [ - "[CH2:1]=[CH:2]-[CH2+:3]>>[CH2+:1]-[CH:2]=[CH2:3]", - "[H:1]-[CH2:2]-[CH2+:3]>>[CH2:2]=[CH2:3].[H+:1]", - ] - self.test_graph_2 = [rsmi_to_its(var) for var in test_2] - - def test_get_mapping(self): - g = GroupComp(self.test_graph_1[0], self.test_graph_1[1]) - m = g.get_mapping(include_singleton=False) - self.assertEqual(len(m), 4) - - def test_get_mapping_singleton(self): - g = GroupComp(self.test_graph_1[0], self.test_graph_1[1]) - m = g.get_mapping(include_singleton=True) - self.assertEqual(len(m), 10) - - def test_get_mapping_from_nodes(self): - m0 = node_constraint( - self.test_graph_2[0].nodes(data=True), self.test_graph_2[1].nodes(data=True) - ) - g = GroupComp(self.test_graph_2[0], self.test_graph_2[1]) - m = g.get_mapping_from_nodes( - m0, - self.test_graph_2[0].edges(data=True), - self.test_graph_2[1].edges(data=True), - ) - self.assertEqual(len(m), 1) - - def test_get_mapping_fallback(self): - g = GroupComp(self.test_graph_2[0], self.test_graph_2[1]) - m = g.get_mapping( - include_singleton=False - ) # even False if cannot find candidate will fall back - self.assertEqual(len(m), 1) - - -if __name__ == "__main__": - unittest.main() diff --git a/Test/Graph/MTG/test_groupoid.py b/Test/Graph/MTG/test_groupoid.py deleted file mode 100644 index 3f34501..0000000 --- a/Test/Graph/MTG/test_groupoid.py +++ /dev/null @@ -1,181 +0,0 @@ -import unittest -from synkit.IO.chem_converter import rsmi_to_its -from synkit.Graph.MTG.groupoid import charge_tuple, node_constraint, edge_constraint - - -class TestGroupoid(unittest.TestCase): - - def setUp(self) -> None: - test_1 = [ - "[CH:4]([H:7])([H:8])[CH:5]=[O:6]>>[CH:4]([H:8])=[CH:5][O:6]([H:7])", - "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:7])>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]", - ] - self.test_graph_1 = [rsmi_to_its(var) for var in test_1] - test_2 = [ - "[CH2:1]=[CH:2]-[CH2+:3]>>[CH2+:1]-[CH:2]=[CH2:3]", - "[H:1]-[CH2:2]-[CH2+:3]>>[CH2:2]=[CH2:3].[H+:1]", - ] - self.test_graph_2 = [rsmi_to_its(var) for var in test_2] - - def test_direct_charges(self): - attrs = {"charges": (0, 1)} - self.assertEqual(charge_tuple(attrs), (0, 1)) - - def test_both_fields_prioritize_charges(self): - attrs = { - "charges": (1, 2), - "typesGH": ((None, None, None, 3, None), (None, None, None, 4, None)), - } - # 'charges' should take precedence over 'typesGH' - self.assertEqual(charge_tuple(attrs), (1, 2)) - - def test_charges_not_tuple(self): - attrs = { - "charges": [0, 1], # not a tuple - "typesGH": ((None, None, None, 2, None), (None, None, None, 3, None)), - } - # Non-tuple 'charges' should be ignored in favor of typesGH - self.assertEqual(charge_tuple(attrs), (2, 3)) - - def test_charges_wrong_length(self): - attrs = { - "charges": (0,), # length != 2 - "typesGH": ((None, None, None, 5, None), (None, None, None, 6, None)), - } - # Invalid 'charges' length => fallback to typesGH - self.assertEqual(charge_tuple(attrs), (5, 6)) - - def test_typesGH_valid(self): - attrs = {"typesGH": ((None, None, None, 7, None), (None, None, None, 8, None))} - self.assertEqual(charge_tuple(attrs), (7, 8)) - - def test_typesGH_too_short(self): - attrs = {"typesGH": ((None, None, None, 9, None),)} # only one tuple - # Not enough entries => (None, None) - self.assertEqual(charge_tuple(attrs), (None, None)) - - def test_typesGH_inner_exception(self): - attrs = {"typesGH": ((None, None), (None,))} # inner tuples too short - # Should catch exception and return (None, None) - self.assertEqual(charge_tuple(attrs), (None, None)) - - def test_no_fields(self): - # No relevant keys => (None, None) - self.assertEqual(charge_tuple({}), (None, None)) - - def test_node_constraint(self): - m1 = node_constraint( - self.test_graph_1[0].nodes(data=True), self.test_graph_1[1].nodes(data=True) - ) - self.assertEqual(len(m1.keys()), 5) - - m2 = node_constraint( - self.test_graph_2[0].nodes(data=True), self.test_graph_2[1].nodes(data=True) - ) - self.assertEqual(len(m2.keys()), 3) - - def test_edge_constraint_no_map(self): - m1 = edge_constraint( - self.test_graph_1[0].edges(data=True), - self.test_graph_1[1].edges(data=True), - algorithm="bt", - ) - self.assertEqual(len(m1), 46) # backtracking - - self.assertEqual( - len( - edge_constraint( - self.test_graph_1[0].edges(data=True), - self.test_graph_1[1].edges(data=True), - algorithm="vf2", - ) - ), - 30, - ) # vf2 - - self.assertEqual( - len( - edge_constraint( - self.test_graph_1[0].edges(data=True), - self.test_graph_1[1].edges(data=True), - algorithm="vf3", - ) - ), - 30, - ) # vf3 - - m2 = edge_constraint( - self.test_graph_2[0].edges(data=True), self.test_graph_2[1].edges(data=True) - ) - self.assertEqual(len(m2), 2) # backtracking - - self.assertEqual( - len( - edge_constraint( - self.test_graph_2[0].edges(data=True), - self.test_graph_2[1].edges(data=True), - algorithm="vf2", - ) - ), - 2, - ) - - self.assertEqual( - len( - edge_constraint( - self.test_graph_2[0].edges(data=True), - self.test_graph_2[1].edges(data=True), - algorithm="vf3", - ) - ), - 2, - ) - - def test_edge_constraint_map(self): - m0 = node_constraint( - self.test_graph_1[0].nodes(data=True), self.test_graph_1[1].nodes(data=True) - ) - m1 = edge_constraint( - self.test_graph_1[0].edges(data=True), - self.test_graph_1[1].edges(data=True), - m0, - ) - self.assertEqual(len(m1), 4) - - self.assertEqual( - len( - edge_constraint( - self.test_graph_1[0].edges(data=True), - self.test_graph_1[1].edges(data=True), - m0, - algorithm="vf2", - ) - ), - 1, - ) - - self.assertEqual( - len( - edge_constraint( - self.test_graph_1[0].edges(data=True), - self.test_graph_1[1].edges(data=True), - m0, - algorithm="vf3", - ) - ), - 1, - ) - - m0 = node_constraint( - self.test_graph_2[0].nodes(data=True), self.test_graph_2[1].nodes(data=True) - ) - m2 = edge_constraint( - self.test_graph_2[0].edges(data=True), - self.test_graph_2[1].edges(data=True), - m0, - ) - self.assertEqual(len(m2), 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/Test/Graph/MTG/test_mtg.py b/Test/Graph/MTG/test_mtg.py index 0f8827d..55098b2 100644 --- a/Test/Graph/MTG/test_mtg.py +++ b/Test/Graph/MTG/test_mtg.py @@ -1,7 +1,6 @@ import unittest from synkit.IO.chem_converter import rsmi_to_its from synkit.Graph.ITS.its_decompose import get_rc -from synkit.Graph.MTG.group_comp import GroupComp from synkit.Graph.MTG.mtg import MTG @@ -13,25 +12,12 @@ def setUp(self) -> None: "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:7])>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]", ] self.test_graph_1 = [get_rc(rsmi_to_its(var)) for var in test_1] - test_2 = [ - "[CH2:1]=[CH:2]-[CH2+:3]>>[CH2+:1]-[CH:2]=[CH2:3]", - "[H:1]-[CH2:2]-[CH2+:3]>>[CH2:2]=[CH2:3].[H+:1]", - ] - self.test_graph_2 = [get_rc(rsmi_to_its(var)) for var in test_2] def test_MTG_1(self): mtg = MTG(self.test_graph_1[0:2], mcs_mol=True) self.assertEqual(mtg._graph.number_of_nodes(), 6) self.assertEqual(mtg._graph.number_of_edges(), 7) - def test_MTG_2(self): - grp = GroupComp(self.test_graph_2[0], self.test_graph_2[1]) - candidates = grp.get_mapping() - # print(candidates) - mtg = MTG(self.test_graph_2[0:], candidates) - self.assertEqual(mtg._graph.number_of_nodes(), 5) - self.assertEqual(mtg._graph.number_of_edges(), 4) - if __name__ == "__main__": unittest.main() diff --git a/Test/Graph/MTG/test_mtg_tuple.py b/Test/Graph/MTG/test_mtg_tuple.py new file mode 100644 index 0000000..a60eaab --- /dev/null +++ b/Test/Graph/MTG/test_mtg_tuple.py @@ -0,0 +1,370 @@ +import unittest + +import networkx as nx + +from synkit.Graph.ITS.its_reverter import ITSReverter +from synkit.Graph.ITS.its_construction import ITSConstruction +from synkit.Graph.Mech.electron_accounting import refresh_electron_fields +from synkit.Graph.MTG.mtg import MTG +from synkit.IO import load_database, rsmi_to_its + + +class TestTupleMTG(unittest.TestCase): + def setUp(self): + g0 = nx.Graph() + g0.add_node( + 1, + element="N", + aromatic=False, + hcount=2, + charge=0, + lone_pairs=1, + radical=0, + valence_electrons=5, + ) + g0.add_node( + 2, + element="C", + aromatic=False, + hcount=3, + charge=0, + lone_pairs=0, + radical=0, + valence_electrons=4, + ) + g0.add_edge( + 1, + 2, + order=1.0, + kekule_order=1.0, + sigma_order=1.0, + pi_order=0.0, + ) + + g1 = g0.copy() + g1.nodes[1]["hcount"] = 1 + g1.nodes[1]["radical"] = 1 + g1.edges[1, 2].update( + order=2.0, + kekule_order=2.0, + sigma_order=1.0, + pi_order=1.0, + ) + + g2 = g0.copy() + g2.nodes[1]["charge"] = 1 + g2.nodes[1]["lone_pairs"] = 0 + + self.step_1 = ITSConstruction.construct(g0, g1) + self.step_2 = ITSConstruction.construct(g1, g2) + + def test_tuple_its_detection_preserves_electron_fields(self): + mtg = MTG([self.step_1, self.step_2], mappings=[{1: 1, 2: 2}]) + + self.assertTrue(mtg._tuple_its) + self.assertEqual(mtg._graphs[0].nodes[1]["hcount"], (2, 1)) + self.assertEqual(mtg._graphs[0].nodes[1]["radical"], (0, 1)) + self.assertEqual(mtg._graphs[1].nodes[1]["lone_pairs"], (1, 0)) + + def test_tuple_composed_its_keeps_tuple_node_state(self): + mtg = MTG([self.step_1, self.step_2], mappings=[{1: 1, 2: 2}]) + + composed = mtg.get_compose_its() + + self.assertEqual(composed.nodes[1]["hcount"], (2, 2)) + self.assertEqual(composed.nodes[1]["lone_pairs"], (1, 0)) + self.assertEqual(composed.edges[1, 2]["order"], (1.0, 1.0)) + self.assertEqual(composed.edges[1, 2]["kekule_order"], (1.0, 1.0)) + self.assertEqual(composed.edges[1, 2]["sigma_order"], (1.0, 1.0)) + self.assertEqual(composed.edges[1, 2]["pi_order"], (0.0, 0.0)) + + def test_tuple_mtg_round_trips_ordered_its_steps(self): + mtg = MTG([self.step_1, self.step_2]) + + rebuilt = mtg.get_its_steps() + + self.assertEqual(len(rebuilt), 2) + self.assertEqual(rebuilt[0].nodes[1]["hcount"], (2, 1)) + self.assertEqual(rebuilt[0].edges[1, 2]["pi_order"], (0.0, 1.0)) + self.assertEqual(rebuilt[1].nodes[1]["lone_pairs"], (1, 0)) + self.assertEqual(rebuilt[1].edges[1, 2]["pi_order"], (1.0, 0.0)) + + def test_tuple_mtg_keeps_node_timelines(self): + mtg = MTG([self.step_1, self.step_2], mappings=[{1: 1, 2: 2}]) + + graph = mtg.get_mtg() + + self.assertEqual(graph.nodes[1]["element"], "N") + self.assertEqual(graph.nodes[1]["valence_electrons"], 5) + self.assertEqual(graph.nodes[1]["hcount"], (2, 1, 2)) + self.assertEqual(graph.nodes[1]["radical"], (0, 1, 0)) + self.assertEqual(graph.nodes[1]["lone_pairs"], (1, 1, 0)) + self.assertNotIn("typesGH", graph.nodes[1]) + self.assertNotIn("neighbors", graph.nodes[1]) + self.assertNotIn("hcount_history", graph.nodes[1]) + + def test_tuple_mtg_keeps_electron_authoritative_edge_timelines(self): + mtg = MTG([self.step_1, self.step_2], mappings=[{1: 1, 2: 2}]) + + edge = mtg.get_mtg().edges[1, 2] + + self.assertEqual(edge["kekule_order"], (1.0, 2.0, 1.0)) + self.assertEqual(edge["sigma_order"], (1.0, 1.0, 1.0)) + self.assertEqual(edge["pi_order"], (0.0, 1.0, 0.0)) + self.assertNotIn("pi_order_history", edge) + self.assertNotIn("pi_order_step_history", edge) + + +class TestCuratedTupleMTGMechanisms(unittest.TestCase): + @staticmethod + def _atom( + element, + *, + hcount=0, + charge=0, + lone_pairs=0, + radical=0, + valence_electrons=None, + ): + valence = { + "H": 1, + "C": 4, + "N": 5, + "O": 6, + "Cl": 7, + } + return { + "element": element, + "aromatic": False, + "hcount": hcount, + "charge": charge, + "lone_pairs": lone_pairs, + "radical": radical, + "valence_electrons": valence_electrons or valence[element], + } + + @staticmethod + def _add_bond(graph, u, v, sigma=1.0, pi=0.0): + graph.add_edge( + u, + v, + order=sigma + pi, + kekule_order=sigma + pi, + sigma_order=sigma, + pi_order=pi, + ) + + def _graph(self, nodes, edges): + graph = nx.Graph() + for node, attrs in nodes.items(): + graph.add_node(node, **attrs) + for edge in edges: + self._add_bond(graph, *edge) + return refresh_electron_fields(graph) + + def test_lone_pair_donation_history_recomputes_charge_path(self): + g0 = self._graph( + { + 1: self._atom("N", hcount=3, lone_pairs=1), + 2: self._atom("C", hcount=3), + 3: self._atom("Cl", lone_pairs=3), + }, + [(2, 3, 1.0, 0.0)], + ) + g1 = self._graph( + { + 1: self._atom("N", hcount=3, charge=1, lone_pairs=0), + 2: self._atom("C", hcount=3), + 3: self._atom("Cl", charge=-1, lone_pairs=4), + }, + [(1, 2, 1.0, 0.0)], + ) + g2 = self._graph( + { + 1: self._atom("N", hcount=2, lone_pairs=1), + 2: self._atom("C", hcount=3), + 3: self._atom("Cl", hcount=1, lone_pairs=3), + }, + [(1, 2, 1.0, 0.0), (3, 1, 1.0, 0.0)], + ) + + mtg = MTG( + [ITSConstruction.construct(g0, g1), ITSConstruction.construct(g1, g2)], + mappings=[{1: 1, 2: 2, 3: 3}], + ) + + self.assertEqual(mtg.get_mtg().nodes[1]["lone_pairs"], (1, 0, 1)) + self.assertEqual(mtg.get_mtg().nodes[1]["charge"], (0, 1, 0)) + self.assertEqual( + mtg.get_mtg().edges[1, 2]["sigma_order"], + (0.0, 1.0, 1.0), + ) + self.assertEqual(mtg.get_compose_its().edges[1, 2]["sigma_order"], (0.0, 1.0)) + + def test_radical_progression_keeps_unpaired_electron_timeline(self): + g0 = self._graph( + { + 1: self._atom("C", hcount=3), + 2: self._atom("Cl", lone_pairs=3), + }, + [(1, 2, 1.0, 0.0)], + ) + g1 = self._graph( + { + 1: self._atom("C", hcount=3, radical=1), + 2: self._atom("Cl", radical=1, lone_pairs=3), + }, + [], + ) + g2 = self._graph( + { + 1: self._atom("C", hcount=3), + 2: self._atom("Cl", lone_pairs=3), + }, + [(1, 2, 1.0, 0.0)], + ) + + mtg = MTG( + [ITSConstruction.construct(g0, g1), ITSConstruction.construct(g1, g2)], + mappings=[{1: 1, 2: 2}], + ) + + self.assertEqual(mtg.get_mtg().nodes[1]["radical"], (0, 1, 0)) + self.assertEqual( + mtg.get_mtg().edges[1, 2]["sigma_order"], + (1.0, 0.0, 1.0), + ) + self.assertEqual(mtg.get_compose_its().edges[1, 2]["sigma_order"], (1.0, 1.0)) + + def test_rsmi_tuple_mtg_composes_back_to_outer_states(self): + step_1 = rsmi_to_its( + "[CH2:1]=[CH2:2].[H:3][H:4]>>[CH3:1][CH2:2][H:4]", + format="tuple", + ) + step_2 = rsmi_to_its( + "[CH3:1][CH2:2][H:4]>>[CH3:1][CH3:2]", + format="tuple", + ) + + mtg = MTG([step_1, step_2], mappings=[{1: 1, 2: 2, 4: 4}]) + composed = mtg.get_compose_its() + + left = ITSReverter(composed).to_reactant_graph() + right = ITSReverter(composed).to_product_graph() + + self.assertTrue(left.has_edge(1, 2)) + self.assertTrue(right.has_edge(1, 2)) + self.assertEqual(composed.edges[1, 2]["pi_order"], (1.0, 0.0)) + self.assertEqual(composed.nodes[2]["hcount"], (2, 3)) + + def test_mech_fixture_round_trips_ordered_tuple_rsmi_steps(self): + data = load_database("./Data/Testcase/mech.json.gz") + mech = data[0]["mechanisms"][1] + steps = [step["smart_string"] for step in mech["steps"]] + its_steps = [rsmi_to_its(step, format="tuple", core=False) for step in steps] + mtg = MTG(its_steps) + rebuilt = mtg.get_its_steps() + + self.assertEqual(len(rebuilt), len(its_steps)) + for original, recovered in zip(its_steps, rebuilt): + self.assertEqual(set(original.nodes()), set(recovered.nodes())) + self.assertEqual( + {tuple(sorted(edge)) for edge in original.edges()}, + {tuple(sorted(edge)) for edge in recovered.edges()}, + ) + + for node in original.nodes(): + for key in ( + "element", + "atom_map", + "hcount", + "charge", + "radical", + "lone_pairs", + "valence_electrons", + ): + self.assertEqual( + recovered.nodes[node].get(key), + original.nodes[node].get(key), + ) + for u, v in original.edges(): + edge = tuple(sorted((u, v))) + for key in ("order", "kekule_order", "sigma_order", "pi_order"): + self.assertEqual( + recovered.edges[edge].get(key), + original.edges[edge].get(key), + ) + + exported = mtg.get_rsmi_steps() + self.assertEqual(len(exported), len(steps)) + self.assertTrue(all(">>" in step for step in exported)) + + def test_string_sequences_default_to_lewis_state_graph(self): + data = load_database("./Data/Testcase/mech.json.gz") + mech = data[0]["mechanisms"][1] + steps = [step["smart_string"] for step in mech["steps"]] + + mtg = MTG(steps, mcs_mol=True) + graph = mtg.get_mtg() + + self.assertTrue(mtg._tuple_its) + self.assertFalse(any("typesGH" in attrs for _, attrs in graph.nodes(data=True))) + self.assertTrue( + all("sigma_order" in attrs for _, _, attrs in graph.edges(data=True)) + ) + + def test_string_sequences_can_request_legacy_typesgh(self): + data = load_database("./Data/Testcase/mech.json.gz") + mech = data[0]["mechanisms"][1] + steps = [step["smart_string"] for step in mech["steps"][:2]] + + mtg = MTG(steps, mcs_mol=True, its_format="typesGH") + + self.assertFalse(mtg._tuple_its) + self.assertTrue( + any("typesGH" in attrs for _, attrs in mtg.get_mtg().nodes(data=True)) + ) + + def test_mech_fixture_tuple_mtg_automatic_mapping_matches_identity_mapping(self): + data = load_database("./Data/Testcase/mech.json.gz") + mech = data[0]["mechanisms"][1] + its_steps = [ + rsmi_to_its(step["smart_string"], format="tuple", core=False) + for step in mech["steps"] + ] + identity_mappings = [ + {n: n for n in sorted(set(left.nodes()) & set(right.nodes()))} + for left, right in zip(its_steps, its_steps[1:]) + ] + + auto = MTG(its_steps) + explicit = MTG(its_steps, mappings=identity_mappings) + + self.assertEqual(auto.node_mapping, explicit.node_mapping) + self.assertEqual( + set(auto.get_mtg().edges()), + set(explicit.get_mtg().edges()), + ) + + def test_all_mech_fixture_mechanisms_round_trip_ordered_tuple_its(self): + data = load_database("./Data/Testcase/mech.json.gz") + + for mech in data[0]["mechanisms"]: + with self.subTest(mech=mech["mech_name"]): + its_steps = [ + rsmi_to_its(step["smart_string"], format="tuple", core=False) + for step in mech["steps"] + ] + rebuilt = MTG(its_steps).get_its_steps() + + self.assertEqual(len(rebuilt), len(its_steps)) + for original, recovered in zip(its_steps, rebuilt): + self.assertEqual(set(original.nodes()), set(recovered.nodes())) + self.assertEqual( + {tuple(sorted(edge)) for edge in original.edges()}, + {tuple(sorted(edge)) for edge in recovered.edges()}, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Graph/Matcher/test_graph_matcher.py b/Test/Graph/Matcher/test_graph_matcher.py index 996b017..6c58d5d 100644 --- a/Test/Graph/Matcher/test_graph_matcher.py +++ b/Test/Graph/Matcher/test_graph_matcher.py @@ -148,6 +148,34 @@ def test_edge_attribute_mismatch(self): g2.nodes["b"]["charge"] = 0 self.assertFalse(self.gm.isomorphic(g1, g2)) + def test_lone_pairs_use_host_greater_or_equal_semantics(self): + host = nx.Graph() + host.add_node(1, element="O", lone_pairs=3, radical=0, hcount=0) + + pattern = nx.Graph() + pattern.add_node(10, element="O", lone_pairs=2, radical=0, hcount=0) + + gm = GraphMatcherEngine( + node_attrs=["element", "lone_pairs", "radical"], + edge_attrs=[], + max_mappings=None, + ) + self.assertEqual(gm.get_mappings(host, pattern), [{10: 1}]) + + def test_radical_requires_exact_match(self): + host = nx.Graph() + host.add_node(1, element="O", lone_pairs=3, radical=1, hcount=0) + + pattern = nx.Graph() + pattern.add_node(10, element="O", lone_pairs=2, radical=0, hcount=0) + + gm = GraphMatcherEngine( + node_attrs=["element", "lone_pairs", "radical"], + edge_attrs=[], + max_mappings=None, + ) + self.assertEqual(gm.get_mappings(host, pattern), []) + def test_available_backends(self): # available_backends should list at least 'nx' backends = GraphMatcherEngine.available_backends() diff --git a/Test/Graph/Matcher/test_subgraph_matcher.py b/Test/Graph/Matcher/test_subgraph_matcher.py index 6f27674..28c9806 100644 --- a/Test/Graph/Matcher/test_subgraph_matcher.py +++ b/Test/Graph/Matcher/test_subgraph_matcher.py @@ -1,8 +1,15 @@ import unittest +import networkx as nx from synkit.IO.data_io import load_from_pickle from synkit.IO.chem_converter import rsmi_to_its from synkit.Graph.ITS.its_decompose import get_rc -from synkit.Graph.Matcher.subgraph_matcher import SubgraphMatch, SubgraphSearchEngine +from synkit.Graph.Matcher.subgraph_matcher import ( + SubgraphMatch, + SubgraphSearchEngine, + diagnose_candidate_node_match, + electron_aware_edge_match, + resolve_template_match_attrs, +) # Determine if the rule backend is available try: @@ -131,6 +138,134 @@ def test_graph_subgraph_morphism_false(self): ) self.assertEqual(len(mapping), 0) + def test_electron_aware_node_matching(self): + host = nx.Graph() + host.add_node(1, element="O", lone_pairs=3, radical=0, hcount=1) + + pattern = nx.Graph() + pattern.add_node(10, element="O", lone_pairs=2, radical=0, hcount=0) + + matches = self.gm.find_subgraph_mappings( + host, + pattern, + node_attrs=["element", "lone_pairs", "radical"], + edge_attrs=[], + ) + self.assertEqual(matches, [{10: 1}]) + + def test_electron_aware_node_matching_rejects_low_lone_pairs(self): + host = nx.Graph() + host.add_node(1, element="O", lone_pairs=1, radical=0, hcount=0) + + pattern = nx.Graph() + pattern.add_node(10, element="O", lone_pairs=2, radical=0, hcount=0) + + matches = self.gm.find_subgraph_mappings( + host, + pattern, + node_attrs=["element", "lone_pairs", "radical"], + edge_attrs=[], + ) + self.assertEqual(matches, []) + + def test_resolve_template_match_attrs_keeps_legacy_template_legacy(self): + pattern = nx.Graph() + pattern.add_node(1, element="O", charge=0) + pattern.add_edge(1, 2, order=1.0) + + node_attrs, edge_attrs = resolve_template_match_attrs(pattern) + + self.assertEqual(node_attrs, ["element", "charge"]) + self.assertEqual(edge_attrs, ["order"]) + + def test_resolve_template_match_attrs_uses_new_template_fields(self): + pattern = nx.Graph() + pattern.add_node( + 1, + element="O", + charge=0, + aromatic=False, + hcount=0, + lone_pairs=2, + radical=0, + ) + pattern.add_node( + 2, + element="C", + charge=0, + aromatic=False, + hcount=3, + lone_pairs=0, + radical=0, + ) + pattern.add_edge(1, 2, order=2.0, sigma_order=1.0, pi_order=1.0) + + node_attrs, edge_attrs = resolve_template_match_attrs(pattern) + + self.assertEqual( + node_attrs, + [ + "element", + "charge", + "aromatic", + "hcount", + "lone_pairs", + "radical", + ], + ) + self.assertEqual(edge_attrs, ["order", "sigma_order", "pi_order"]) + + def test_resolve_template_match_attrs_uses_aromatic_n_pi_role(self): + pattern = nx.Graph() + pattern.add_node( + 1, + element="N", + charge=0, + aromatic=True, + hcount=0, + lone_pairs=1, + radical=0, + aromatic_n_pi_count=1, + ) + + node_attrs, _ = resolve_template_match_attrs(pattern) + + self.assertIn("aromatic_n_pi_count", node_attrs) + + def test_diagnose_candidate_node_match_reports_electron_reason(self): + diagnostic = diagnose_candidate_node_match( + {"element": "O", "lone_pairs": 1, "radical": 0}, + {"element": "O", "lone_pairs": 2, "radical": 1}, + ["element", "lone_pairs", "radical"], + ) + + self.assertFalse(diagnostic["matched"]) + self.assertEqual( + diagnostic["reasons"], + [ + "lone_pairs: host 1 < pattern 2", + "radical: host 0 != pattern 1", + ], + ) + + def test_electron_aware_edge_matching_ignores_aromatic_kekule_phase(self): + self.assertTrue( + electron_aware_edge_match( + {"order": 1.5, "sigma_order": 1.0, "pi_order": 1.0}, + {"order": 1.5, "sigma_order": 1.0, "pi_order": 0.0}, + ["order", "sigma_order", "pi_order"], + ) + ) + + def test_electron_aware_edge_matching_keeps_non_aromatic_sigma_pi_exact(self): + self.assertFalse( + electron_aware_edge_match( + {"order": 2.0, "sigma_order": 1.0, "pi_order": 1.0}, + {"order": 2.0, "sigma_order": 1.0, "pi_order": 0.0}, + ["order", "sigma_order", "pi_order"], + ) + ) + if __name__ == "__main__": unittest.main() diff --git a/Test/Graph/Mech/test_conversion.py b/Test/Graph/Mech/test_conversion.py new file mode 100644 index 0000000..4f86560 --- /dev/null +++ b/Test/Graph/Mech/test_conversion.py @@ -0,0 +1,33 @@ +import networkx as nx + +from synkit.Graph.Mech.conversion import ( + extract_atom_maps_from_smiles, + typed_convert_arrow_code, +) + + +class CountingGraph(nx.Graph): + def __init__(self): + super().__init__() + self.nodes_calls = 0 + + def nodes(self, *args, **kwargs): + self.nodes_calls += 1 + return super().nodes(*args, **kwargs) + + +def test_extract_atom_maps_from_smiles(): + assert extract_atom_maps_from_smiles("[CH:10][N+:61]") == [10, 61] + + +def test_typed_convert_arrow_code_reuses_atom_map_index(): + its = CountingGraph() + its.add_node("a", atom_map=1) + its.add_node("b", atom_map=2) + its.add_edge("a", "b", order=(1.0, 2.0)) + + assert typed_convert_arrow_code("1=1,2;1,2=1", its) == [ + ["LP-/Pi+", [1], [1, 2]], + ["Sigma-/LP+", [1, 2], [1]], + ] + assert its.nodes_calls == 1 diff --git a/Test/Graph/Mech/test_electron_accounting.py b/Test/Graph/Mech/test_electron_accounting.py new file mode 100644 index 0000000..b1614cb --- /dev/null +++ b/Test/Graph/Mech/test_electron_accounting.py @@ -0,0 +1,170 @@ +import unittest + +import networkx as nx +from rdkit import Chem + +from synkit.Graph.Mech.electron_accounting import ( + bond_order_sum, + graph_to_sanitized_kekule_mol, + recompute_charge, + refresh_electron_fields, +) + + +class TestElectronAccounting(unittest.TestCase): + @staticmethod + def _graph_from_kekule_smiles(smiles): + mol = Chem.MolFromSmiles(smiles) + kekule = Chem.Mol(mol) + Chem.Kekulize(kekule, clearAromaticFlags=True) + + valence_electrons = { + "C": 4, + "N": 5, + "O": 6, + } + graph = nx.Graph() + for atom in kekule.GetAtoms(): + graph.add_node( + atom.GetIdx(), + element=atom.GetSymbol(), + charge=atom.GetFormalCharge(), + hcount=atom.GetTotalNumHs(), + lone_pairs=0, + radical=atom.GetNumRadicalElectrons(), + valence_electrons=valence_electrons[atom.GetSymbol()], + ) + for bond in kekule.GetBonds(): + order = bond.GetBondTypeAsDouble() + graph.add_edge( + bond.GetBeginAtomIdx(), + bond.GetEndAtomIdx(), + sigma_order=1.0, + pi_order=order - 1.0, + ) + return graph + + def test_refresh_recomputes_kekule_order_and_charge(self): + graph = nx.Graph() + graph.add_node( + 1, + element="O", + charge=0, + hcount=0, + lone_pairs=2, + radical=0, + valence_electrons=6, + ) + graph.add_node( + 2, + element="C", + charge=0, + hcount=2, + lone_pairs=0, + radical=0, + valence_electrons=4, + ) + graph.add_edge(1, 2, sigma_order=1.0, pi_order=1.0) + + refreshed = refresh_electron_fields(graph) + + self.assertEqual(refreshed.edges[1, 2]["kekule_order"], 2.0) + self.assertEqual(bond_order_sum(refreshed, 1), 2.0) + self.assertEqual(recompute_charge(refreshed, 1), 0.0) + self.assertFalse(refreshed.nodes[1]["charge_mismatch"]) + + def test_refresh_detects_charge_mismatch(self): + graph = nx.Graph() + graph.add_node( + 1, + element="O", + charge=0, + hcount=1, + lone_pairs=3, + radical=0, + valence_electrons=6, + ) + graph.add_node( + 2, + element="H", + charge=0, + hcount=0, + lone_pairs=0, + radical=0, + valence_electrons=1, + ) + graph.add_edge(1, 2, sigma_order=1.0, pi_order=0.0) + + refreshed = refresh_electron_fields(graph) + + self.assertEqual(refreshed.nodes[1]["recomputed_charge"], -2.0) + self.assertTrue(refreshed.nodes[1]["charge_mismatch"]) + + def test_radical_count_prevents_false_charge_on_hydroxyl_radical(self): + graph = nx.Graph() + graph.add_node( + 1, + element="O", + charge=0, + hcount=1, + lone_pairs=2, + radical=1, + valence_electrons=6, + ) + + refreshed = refresh_electron_fields(graph) + + self.assertEqual(refreshed.nodes[1]["recomputed_charge"], 0) + self.assertFalse(refreshed.nodes[1]["charge_mismatch"]) + + def test_radical_count_prevents_false_charge_on_methyl_radical(self): + graph = nx.Graph() + graph.add_node( + 1, + element="C", + charge=0, + hcount=3, + lone_pairs=0, + radical=1, + valence_electrons=4, + ) + + refreshed = refresh_electron_fields(graph) + + self.assertEqual(refreshed.nodes[1]["recomputed_charge"], 0) + self.assertFalse(refreshed.nodes[1]["charge_mismatch"]) + + def test_kekule_reconstruction_reperceives_aromatic_examples(self): + cases = { + "c1ccccc1": "c1ccccc1", + "n1ccccc1": "c1ccncc1", + "[nH]1cccc1": "c1cc[nH]c1", + "o1cccc1": "c1ccoc1", + "[nH+]1ccccc1": "c1cc[nH+]cc1", + } + + for input_smiles, expected_smiles in cases.items(): + with self.subTest(smiles=input_smiles): + graph = self._graph_from_kekule_smiles(input_smiles) + mol = graph_to_sanitized_kekule_mol(graph) + + self.assertEqual(Chem.MolToSmiles(mol), expected_smiles) + self.assertTrue(all(bond.GetIsAromatic() for bond in mol.GetBonds())) + + def test_kekule_reconstruction_does_not_invent_aromaticity(self): + cases = { + "C=CC=C": "C=CC=C", + "C1=CC=CCC1": "C1=CCCC=C1", + } + + for input_smiles, expected_smiles in cases.items(): + with self.subTest(smiles=input_smiles): + graph = self._graph_from_kekule_smiles(input_smiles) + mol = graph_to_sanitized_kekule_mol(graph) + + self.assertEqual(Chem.MolToSmiles(mol), expected_smiles) + self.assertFalse(any(bond.GetIsAromatic() for bond in mol.GetBonds())) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Graph/test_canon_graph.py b/Test/Graph/test_canon_graph.py index 0c0c164..0e6ad21 100644 --- a/Test/Graph/test_canon_graph.py +++ b/Test/Graph/test_canon_graph.py @@ -46,6 +46,25 @@ def test_canonical_signature_difference(self): sigG, sigH, "Different graphs should have different signatures" ) + def test_electron_state_changes_canonical_signature(self): + neutral = nx.Graph() + neutral.add_node(1, element="N", charge=0, lone_pairs=1, radical=0) + + radical = nx.Graph() + radical.add_node(1, element="N", charge=0, lone_pairs=1, radical=1) + + lone_pair_changed = nx.Graph() + lone_pair_changed.add_node(1, element="N", charge=0, lone_pairs=0, radical=0) + + self.assertNotEqual( + self.canon.canonical_signature(neutral), + self.canon.canonical_signature(radical), + ) + self.assertNotEqual( + self.canon.canonical_signature(neutral), + self.canon.canonical_signature(lone_pair_changed), + ) + def test_make_canonical_graph_structure(self): G_can = self.canon.make_canonical_graph(self.G_swapped) # Canonical graph should have nodes labeled 1 and 2 diff --git a/Test/IO/test_chemical_converter.py b/Test/IO/test_chemical_converter.py index 52a75f4..d0f32e5 100644 --- a/Test/IO/test_chemical_converter.py +++ b/Test/IO/test_chemical_converter.py @@ -235,6 +235,14 @@ def test_rsmi_to_rc(self): rc = rsmi_to_its(smart, core=True) self.assertFalse(graph_isomorphism(its, rc)) + def test_tuple_rsmi_to_rc(self): + smart = "[CH3:5][CH:1]=[CH2:2].[H:3][H:4]>>[CH3:5][CH2:1][CH3:2]" + its = rsmi_to_its(smart, format="tuple") + rc = rsmi_to_its(smart, core=True, format="tuple") + + self.assertFalse(graph_isomorphism(its, rc)) + self.assertIn("radical", next(iter(its.nodes(data=True)))[1]) + def test_its_to_rsmi(self): smart = "[CH3:5][CH:1]=[CH2:2].[H:3][H:4]>>[CH3:5][CH:1]([H:3])[CH2:2][H:4]" its = rsmi_to_its(smart) @@ -244,6 +252,39 @@ def test_its_to_rsmi(self): CanonRSMI().canonicalise(new_smart).canonical_rsmi, ) + def test_tuple_its_to_rsmi(self): + smart = "[CH3:5][CH:1]=[CH2:2].[H:3][H:4]>>[CH3:5][CH2:1][CH3:2]" + its = rsmi_to_its(smart, format="tuple") + new_smart = its_to_rsmi(its, format="tuple") + + self.assertEqual( + CanonRSMI().canonicalise(smart).canonical_rsmi, + CanonRSMI().canonicalise(new_smart).canonical_rsmi, + ) + + def test_tuple_its_to_rsmi_reperceives_aromatic_product(self): + smart = "[CH2:1]1[CH:2]=[CH:3][CH:4]=[CH:5][CH2:6]1>>[cH:1]1[cH:2][cH:3][cH:4][cH:5][cH:6]1" + its = rsmi_to_its(smart, format="tuple") + new_smart = its_to_rsmi(its, format="tuple") + + self.assertEqual( + CanonRSMI().canonicalise(smart).canonical_rsmi, + CanonRSMI().canonicalise(new_smart).canonical_rsmi, + ) + + def test_tuple_rsmi_to_its_explicit_hydrogen(self): + smart = "[CH3:1][CH2:2]>>[CH2:1]=[CH2:2]" + its = rsmi_to_its(smart, explicit_hydrogen=True, format="tuple") + + hydrogens = [ + attrs + for _, attrs in its.nodes(data=True) + if attrs.get("element") == ("H", "H") + ] + + self.assertTrue(hydrogens) + self.assertTrue(any(attrs["present"] != (True, True) for attrs in hydrogens)) + def test_rsmi_to_rsmarts_and_back(self): rsmi = "[H:3][O:4].[N:1][C:2]>>[C:2][O:4].[N:1][H:3]" diff --git a/Test/IO/test_graph_to_mol.py b/Test/IO/test_graph_to_mol.py index 5cdf84a..9608a0c 100644 --- a/Test/IO/test_graph_to_mol.py +++ b/Test/IO/test_graph_to_mol.py @@ -54,6 +54,25 @@ def test_molecule_with_charges(self): mol = self.converter.graph_to_mol(graph) self.assertEqual(Chem.CanonSmiles(Chem.MolToSmiles(mol)), "[NH4+]") + def test_molecule_with_radical(self): + graph = nx.Graph() + graph.add_node(0, element="C", charge=0, radical=1) + + mol = self.converter.graph_to_mol(graph, sanitize=False) + + self.assertEqual(mol.GetAtomWithIdx(0).GetNumRadicalElectrons(), 1) + + def test_aromatic_order_is_reperceived_on_sanitized_output(self): + graph = nx.cycle_graph(6) + nx.set_node_attributes(graph, "C", "element") + nx.set_node_attributes(graph, 0, "charge") + nx.set_edge_attributes(graph, 1.5, "order") + + mol = self.converter.graph_to_mol(graph) + + self.assertEqual(Chem.MolToSmiles(mol), "c1ccccc1") + self.assertTrue(all(bond.GetIsAromatic() for bond in mol.GetBonds())) + if __name__ == "__main__": unittest.main() diff --git a/Test/IO/test_mol_to_graph.py b/Test/IO/test_mol_to_graph.py index 8d827f4..c262a8c 100644 --- a/Test/IO/test_mol_to_graph.py +++ b/Test/IO/test_mol_to_graph.py @@ -90,6 +90,7 @@ def test_transform_node_keys_minimal(self): "aromatic", "radical", "lone_pairs", + "valence_electrons", "available_lp", "oxidation_state", ): @@ -112,6 +113,8 @@ def test_transform_edge_keys(self): "bond_type", "aromatic", "kekule_order", + "sigma_order", + "pi_order", "kekule_bond_type", "ez_isomer", "conjugated", @@ -157,6 +160,28 @@ def test_transform_edge_whitelist(self): for _, _, data in g.edges(data=True): self.assertEqual(set(data.keys()), {"order"}) + def test_sigma_pi_order_split_matches_kekule_order(self): + mol = Chem.MolFromSmiles("C#CC=C") + g = MolToGraph().transform(mol) + for _, _, data in g.edges(data=True): + self.assertEqual( + data["kekule_order"], + data["sigma_order"] + data["pi_order"], + ) + + def test_aromatic_bonds_preserve_matching_and_rewrite_views(self): + mol = Chem.MolFromSmiles("c1ccccc1") + g = MolToGraph().transform(mol) + + self.assertEqual({data["order"] for _, _, data in g.edges(data=True)}, {1.5}) + self.assertEqual( + { + (data["kekule_order"], data["sigma_order"], data["pi_order"]) + for _, _, data in g.edges(data=True) + }, + {(1.0, 1.0, 0.0), (2.0, 1.0, 1.0)}, + ) + def test_drop_non_aam_requires_use_index(self): with self.assertRaises(ValueError): self.converter.transform( @@ -239,6 +264,66 @@ def test_radical_in_light_weight_graph(self): for _, data in g.nodes(data=True): self.assertIn("radical", data) + # ------------------------------------------------------------------ + # Lone-pair chemistry audit + # ------------------------------------------------------------------ + + def test_lone_pair_audit_matrix(self): + cases = { + "O": [("O", 2)], + "[OH-]": [("O", 3)], + "N": [("N", 1)], + "[NH4+]": [("N", 0)], + "[Cl-]": [("Cl", 4)], + "n1ccccc1": [("N", 1)], + "[nH]1cccc1": [("N", 1)], + "C=O": [("O", 2)], + "[CH3]": [("C", 0)], + "S": [("S", 2)], + "[SH-]": [("S", 3)], + "[SH3+]": [("S", 1)], + "P": [("P", 1)], + "[PH4+]": [("P", 0)], + "P(=O)(O)(O)O": [("P", 0)], + "S(=O)(=O)(O)O": [("S", 0)], + } + + for smiles, expected_atoms in cases.items(): + with self.subTest(smiles=smiles): + mol = Chem.MolFromSmiles(smiles) + observed = [ + (atom.GetSymbol(), MolToGraph.estimate_lone_pairs(atom)) + for atom in mol.GetAtoms() + if atom.GetSymbol() in {symbol for symbol, _ in expected_atoms} + ] + self.assertEqual(observed[: len(expected_atoms)], expected_atoms) + + def test_lone_pairs_match_for_explicit_and_implicit_hydrogen_forms(self): + equivalent_pairs = [ + ("O", "[OH2]"), + ("N", "[NH3]"), + ("[nH]1cccc1", "[n]1([H])cccc1"), + ("Oc1ccccc1", "[OH]c1ccccc1"), + ("Nc1ccccc1", "[NH2]c1ccccc1"), + ] + + for implicit_smiles, explicit_smiles in equivalent_pairs: + with self.subTest( + implicit_smiles=implicit_smiles, + explicit_smiles=explicit_smiles, + ): + implicit_mol = Chem.MolFromSmiles(implicit_smiles) + explicit_mol = Chem.MolFromSmiles(explicit_smiles) + implicit_values = [ + MolToGraph.estimate_lone_pairs(atom) + for atom in implicit_mol.GetAtoms() + ] + explicit_values = [ + MolToGraph.estimate_lone_pairs(atom) + for atom in explicit_mol.GetAtoms() + ] + self.assertEqual(implicit_values, explicit_values) + # ------------------------------------------------------------------ # Lone-pair estimation # ------------------------------------------------------------------ @@ -255,6 +340,14 @@ def test_estimate_lone_pairs_pyrrolic_n(self): lp = MolToGraph.estimate_lone_pairs(n_atom) self.assertGreater(lp, 0) + def test_estimate_lone_pairs_fused_aromatic_bridgehead_n(self): + mol = Chem.MolFromSmiles("c1nc2cnccn2c1") + n_atoms = [atom for atom in mol.GetAtoms() if atom.GetSymbol() == "N"] + self.assertEqual( + [MolToGraph.estimate_lone_pairs(atom) for atom in n_atoms], + [1, 1, 1], + ) + def test_estimate_available_lone_pairs_pyrrolic_n_zero(self): # [nH] lone pair is conjugated into the ring — not available for donation mol = Chem.MolFromSmiles("c1cc[nH]c1") diff --git a/Test/Rule/Apply/test_rule_matcher.py b/Test/Rule/Apply/test_rule_matcher.py index e9649be..b8a5fae 100644 --- a/Test/Rule/Apply/test_rule_matcher.py +++ b/Test/Rule/Apply/test_rule_matcher.py @@ -31,6 +31,16 @@ def test_rule_match_balance(self): # The returned rule graph should be isomorphic to the input rule self.assertTrue(nx.is_isomorphic(returned_rule, rule)) + def test_tuple_rule_match_balance(self): + input_rsmi = "CC[CH2:3][Cl:1].[NH2:2][H:4]>>CC[CH2:3][NH2:2].[Cl:1][H:4]" + rule = rsmi_to_its(input_rsmi, core=True, format="tuple") + rsmi_std = Standardize().fit(input_rsmi) + matcher = RuleMatcher(rsmi_std, rule) + smarts, returned_rule = matcher.get_result() + + self.assertEqual(Standardize().fit(smarts), rsmi_std) + self.assertIs(returned_rule, rule) + def test_rbl_missing_product(self): """Partial (RBL) match when product fragments are missing in rule.""" rsmi = "CC(Br)C.CB(O)O>>CC(C)C" @@ -89,6 +99,17 @@ def test_help_output(self): self.assertIn("RuleMatcher for RSMI", out) self.assertIn("Candidate SMARTS patterns:", out) + def test_diagnostics_passthrough_is_opt_in(self): + input_rsmi = "CC[CH2:3][Cl:1].[NH2:2][H:4]>>CC[CH2:3][NH2:2].[Cl:1][H:4]" + rule = rsmi_to_its(input_rsmi, core=True) + matcher = RuleMatcher( + Standardize().fit(input_rsmi), + rule, + electron_diagnostics=True, + ) + + self.assertTrue(matcher.diagnostics) + if __name__ == "__main__": unittest.main() diff --git a/Test/Rule/test_syn_rule.py b/Test/Rule/test_syn_rule.py index 497694a..674e906 100644 --- a/Test/Rule/test_syn_rule.py +++ b/Test/Rule/test_syn_rule.py @@ -94,6 +94,43 @@ def test_str_repr(self): self.assertIn("left=(|V|=", r) self.assertIn("right=(|V|=", r) + def test_tuple_rule_preserves_tuple_representation(self): + smart = "[CH3:1][CH3:2]>>[CH2:1]=[CH2:2]" + + rule = SynRule.from_smart( + smart, + canon=False, + implicit_h=False, + format="tuple", + ) + + self.assertEqual(rule._format, "tuple") + self.assertEqual(rule.rc.raw.nodes[1]["element"], ("C", "C")) + self.assertEqual(rule.rc.raw.edges[1, 2]["pi_order"], (0.0, 1.0)) + self.assertEqual(rule.left.raw.edges[1, 2]["pi_order"], 0.0) + self.assertEqual(rule.right.raw.edges[1, 2]["pi_order"], 1.0) + + def test_tuple_rule_implicit_h_strips_removable_explicit_hydrogens(self): + smart = "[CH3:1][Cl:2].[O:3]([H:4])[H:5]>>[CH3:1][O:3][H:4].[Cl:2][H:5]" + rule = SynRule.from_smart( + smart, + canon=False, + implicit_h=True, + format="tuple", + ) + + self.assertFalse( + any(data["element"] == "H" for _, data in rule.left.raw.nodes(data=True)) + ) + self.assertFalse( + any(data["element"] == "H" for _, data in rule.right.raw.nodes(data=True)) + ) + self.assertEqual(rule.rc.raw.nodes[1]["hcount"], (0, 0)) + self.assertEqual(rule.rc.raw.nodes[2]["hcount"], (0, 1)) + self.assertEqual(rule.rc.raw.nodes[3]["hcount"], (2, 1)) + self.assertTrue(rule.rc.raw.nodes[2]["h_pairs"]) + self.assertTrue(rule.rc.raw.nodes[3]["h_pairs"]) + if __name__ == "__main__": unittest.main() diff --git a/Test/Synthesis/Reactor/test_imba_engine.py b/Test/Synthesis/Reactor/test_imba_engine.py index 4e895bf..5ee5962 100644 --- a/Test/Synthesis/Reactor/test_imba_engine.py +++ b/Test/Synthesis/Reactor/test_imba_engine.py @@ -60,7 +60,7 @@ def test_pipeline_backward(self): partial=True, ) out = engine.smarts_list - self.assertEqual(len(out), 3) + self.assertEqual(len(out), 2) out_rsmi = Standardize().fit(out[0], remove_aam=True) self.assertIn("*", out_rsmi) self.assertNotEqual(out_rsmi, self.rsmi) @@ -76,7 +76,7 @@ def test_pipeline_backward(self): ) out_clean = engine_clean.smarts_list - self.assertEqual(len(out_clean), 3) + self.assertEqual(len(out_clean), 2) # outs = [Standardize().fit(o, remove_aam=True) for o in out_clean] # self.assertIn(self.rsmi, outs) @@ -89,6 +89,18 @@ def test_invalid_rsmi(self): wild = WildCard().rsmi_with_wildcards(rsmi) _ = rsmi_to_its(wild, core=True) + def test_diagnostics_passthrough_is_opt_in(self): + engine = ImbaEngine( + "[CH3:1][CH3:2]", + "[CH3:1][CH3:2]>>[CH2:1]=[CH2:2]", + add_wildcard=False, + electron_diagnostics=True, + ) + + self.assertEqual(len(engine.smarts_list), 1) + self.assertEqual(len(engine.diagnostics), 1) + self.assertEqual(engine.diagnostics[0]["mismatch_count"], 0) + if __name__ == "__main__": unittest.main() diff --git a/Test/Synthesis/Reactor/test_partial_engine.py b/Test/Synthesis/Reactor/test_partial_engine.py index b3b1422..3400a5b 100644 --- a/Test/Synthesis/Reactor/test_partial_engine.py +++ b/Test/Synthesis/Reactor/test_partial_engine.py @@ -43,6 +43,18 @@ def test_backward_direction_example(self): ] self.assertEqual(result, expected) + def test_diagnostics_passthrough_is_opt_in(self): + engine = PartialEngine( + "CCC(=O)OC", + "[C:1][O:2].[O:3][H:4]>>[C:1][O:3].[O:2][H:4]", + electron_diagnostics=True, + ) + + result = engine.fit(invert=False) + + self.assertTrue(result) + self.assertTrue(engine.diagnostics) + if __name__ == "__main__": unittest.main() diff --git a/Test/Synthesis/Reactor/test_rbl_engine.py b/Test/Synthesis/Reactor/test_rbl_engine.py index 13bb67a..b8fca4a 100644 --- a/Test/Synthesis/Reactor/test_rbl_engine.py +++ b/Test/Synthesis/Reactor/test_rbl_engine.py @@ -126,6 +126,20 @@ def test_repr_contains_key_info(self) -> None: self.assertIn("wildcard_element='X*'", rep) self.assertIn("reactor_cls=", rep) + def test_diagnostics_are_grouped_by_reactor_stage(self) -> None: + engine = RBLEngine(early_stop=False, electron_diagnostics=True) + engine.process( + "CCC(=O)OC>>CCC(=O)OCC", + "[C:1][O:2].[O:3][H:4]>>[C:1][O:3].[O:2][H:4]", + ) + + self.assertEqual( + set(engine.diagnostics), {"forward", "backward", "quick_check"} + ) + self.assertTrue(engine.diagnostics["forward"]) + self.assertTrue(engine.diagnostics["backward"]) + self.assertIn("diagnostics", engine.result) + def test_quick_check_short_circuits_pipeline(self) -> None: """ When early_stop=True and _quick_check succeeds, process() diff --git a/Test/Synthesis/Reactor/test_rule_filter.py b/Test/Synthesis/Reactor/test_rule_filter.py new file mode 100644 index 0000000..f407412 --- /dev/null +++ b/Test/Synthesis/Reactor/test_rule_filter.py @@ -0,0 +1,22 @@ +import unittest + +from synkit.IO.chem_converter import rsmi_to_graph, rsmi_to_its +from synkit.Synthesis.Reactor.rule_filter import RuleFilter + + +class TestRuleFilter(unittest.TestCase): + def test_tuple_rule_uses_tuple_decomposition(self): + host, _ = rsmi_to_graph("[CH3:1][Cl:2]>>[CH3:1][Cl:2]") + rule = rsmi_to_its( + "[CH3:1][Cl:2]>>[CH3:1].[Cl:2]", + core=True, + format="tuple", + ) + + filtered = RuleFilter(host, [rule], engine="nx") + + self.assertEqual(filtered.new_rules, [rule]) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Synthesis/Reactor/test_syn_reactor_bug_cases.py b/Test/Synthesis/Reactor/test_syn_reactor_bug_cases.py new file mode 100644 index 0000000..828a120 --- /dev/null +++ b/Test/Synthesis/Reactor/test_syn_reactor_bug_cases.py @@ -0,0 +1,51 @@ +import unittest + +from synkit.Chem.Reaction.standardize import Standardize +from synkit.IO.chem_converter import rsmi_to_its +from synkit.Synthesis.Reactor.syn_reactor import SynReactor + + +class TestSynReactorBugCases(unittest.TestCase): + def test_tuple_backward_cross_coupling_keeps_aromatic_role_context(self): + smart = ( + "[CH3:10][CH2:11][O:12][C:13](=[O:14])[c:15]1[cH:16][cH:18][cH:19]" + "[c:20]([B:21]([OH:22])[OH:23])[cH:17]1." + "[CH3:1][c:2]1[cH:3][cH:5][cH:6][c:7]([Br:8])[c:4]1[I:9]" + ">>" + "[CH3:1][c:2]1[cH:3][cH:5][cH:6][c:7]([Br:8])[c:4]1-" + "[c:20]1[cH:17][c:15]([C:13]([O:12][CH2:11][CH3:10])=[O:14])" + "[cH:16][cH:18][cH:19]1.[I:9][B:21]([OH:22])[OH:23]" + ) + expected = Standardize().fit(smart) + reactants, products = expected.split(">>") + rc = rsmi_to_its(smart, core=True, format="tuple") + + forward = SynReactor( + substrate=reactants, + template=rc, + implicit_temp=False, + explicit_h=False, + ) + backward = SynReactor( + substrate=products, + template=rc, + implicit_temp=False, + explicit_h=False, + invert=True, + ) + + forward_smis = [ + Standardize().fit(candidate, remove_aam=True) + for candidate in forward.smarts + ] + backward_smis = [ + Standardize().fit(candidate, remove_aam=True) + for candidate in backward.smarts + ] + + self.assertIn(expected, forward_smis) + self.assertIn(expected, backward_smis) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Synthesis/Reactor/test_syn_reactor_electron_cases.py b/Test/Synthesis/Reactor/test_syn_reactor_electron_cases.py new file mode 100644 index 0000000..bddc3cc --- /dev/null +++ b/Test/Synthesis/Reactor/test_syn_reactor_electron_cases.py @@ -0,0 +1,436 @@ +import unittest + +from synkit.Graph.ITS.its_reverter import ITSReverter +from synkit.Chem.Reaction.aam_validator import AAMValidator +from synkit.Chem.Reaction.standardize import Standardize +from synkit.IO.chem_converter import rsmi_to_its +from synkit.Synthesis.Reactor.syn_reactor import SynReactor + + +class TestSynReactorElectronCases(unittest.TestCase): + @staticmethod + def _has_equivalent_candidate(smart: str, candidates: list[str]) -> bool: + return any( + AAMValidator().smiles_check(smart, candidate) for candidate in candidates + ) + + @staticmethod + def _has_standardized_candidate(smart: str, candidates: list[str]) -> bool: + standardizer = Standardize() + expected = standardizer.fit(smart) + return expected in [ + standardizer.fit(candidate, remove_aam=True) for candidate in candidates + ] + + @staticmethod + def _tuple_reactors( + smart: str, + *, + core: bool, + implicit_temp: bool, + explicit_h: bool, + ) -> tuple[SynReactor, SynReactor]: + substrate, product = Standardize().fit(smart, remove_aam=True).split(">>") + rc = rsmi_to_its(smart, core=core, format="tuple") + return ( + SynReactor( + substrate=substrate, + template=rc, + electron_diagnostics=True, + implicit_temp=implicit_temp, + explicit_h=explicit_h, + ), + SynReactor( + substrate=product, + template=rc, + electron_diagnostics=True, + implicit_temp=implicit_temp, + explicit_h=explicit_h, + invert=True, + ), + ) + + def _assert_bidirectional_equivalent( + self, + smart: str, + *, + core: bool, + implicit_temp: bool, + explicit_h: bool, + ) -> tuple[SynReactor, SynReactor]: + forward, backward = self._tuple_reactors( + smart, + core=core, + implicit_temp=implicit_temp, + explicit_h=explicit_h, + ) + self.assertTrue(forward.smarts) + self.assertTrue(backward.smarts) + self.assertTrue(self._has_equivalent_candidate(smart, forward.smarts)) + self.assertTrue(self._has_equivalent_candidate(smart, backward.smarts)) + return forward, backward + + def _assert_bidirectional_standardized( + self, + smart: str, + *, + core: bool, + implicit_temp: bool, + explicit_h: bool, + ) -> tuple[SynReactor, SynReactor]: + forward, backward = self._tuple_reactors( + smart, + core=core, + implicit_temp=implicit_temp, + explicit_h=explicit_h, + ) + self.assertTrue(forward.smarts) + self.assertTrue(backward.smarts) + self.assertTrue(self._has_standardized_candidate(smart, forward.smarts)) + self.assertTrue(self._has_standardized_candidate(smart, backward.smarts)) + return forward, backward + + def test_lone_pair_donation_recomputes_product_charge(self): + smart = "[NH3:1].[CH3:2][Cl:3]>>[NH3+:1][CH3:2].[Cl-:3]" + rc = rsmi_to_its(smart, core=True, format="tuple") + + # The rewrite should not need the product charge labels from the RC. + # Keep the electron-state changes, but erase direct product charge. + rc.nodes[1]["charge"] = (0, 0) + rc.nodes[3]["charge"] = (0, 0) + n_types = list(rc.nodes[1]["typesGH"]) + cl_types = list(rc.nodes[3]["typesGH"]) + rc.nodes[1]["typesGH"] = ( + n_types[0], + n_types[1][:3] + (0,) + n_types[1][4:], + ) + rc.nodes[3]["typesGH"] = ( + cl_types[0], + cl_types[1][:3] + (0,) + cl_types[1][4:], + ) + + reactor = SynReactor( + "[NH3:1].[CH3:2][Cl:3]", + rc, + implicit_temp=False, + explicit_h=False, + ) + product = ITSReverter(reactor.its_list[0]).to_product_graph() + + self.assertEqual(product.nodes[1]["lone_pairs"], 0) + self.assertEqual(product.nodes[3]["lone_pairs"], 4) + self.assertEqual(product.nodes[1]["charge"], 1.0) + self.assertEqual(product.nodes[3]["charge"], -1.0) + self.assertEqual( + reactor.smarts, + ["[CH3:2][Cl:3].[NH3:1]>>[Cl-:3].[NH3+:1][CH3:2]"], + ) + + reverse_rc = rsmi_to_its(smart, core=True, format="tuple") + backward = SynReactor( + "[NH3+:1][CH3:2].[Cl-:3]", + reverse_rc, + implicit_temp=False, + explicit_h=False, + invert=True, + ) + self.assertTrue(self._has_equivalent_candidate(smart, backward.smarts)) + + def test_tuple_reactor_assigns_fresh_atom_maps_for_unmapped_substrate(self): + smart = "[NH3:1].[CH3:2][Cl:3]>>[NH3+:1][CH3:2].[Cl-:3]" + forward, backward = self._assert_bidirectional_equivalent( + smart, + core=False, + implicit_temp=True, + explicit_h=False, + ) + + for reactor in (forward, backward): + self.assertEqual(len(reactor.smarts), 1) + self.assertTrue( + all( + pair[0] > 0 and pair[1] > 0 + for _, data in reactor.its_list[0].nodes(data=True) + for pair in [data["atom_map"]] + ) + ) + + def test_tuple_reactor_assigns_fresh_atom_maps_to_expanded_hydrogens(self): + smart = "[CH3:1][Cl:2].[O:3]([H:4])[H:5]" ">>[CH3:1][O:3][H:4].[Cl:2][H:5]" + forward, backward = self._assert_bidirectional_standardized( + smart, + core=False, + implicit_temp=False, + explicit_h=True, + ) + + for reactor in (forward, backward): + self.assertEqual(len(reactor.smarts), 1) + self.assertTrue(all("[H]" not in smarts for smarts in reactor.smarts)) + hydrogen_maps = [ + data["atom_map"] + for _, data in reactor.its_list[0].nodes(data=True) + if data["element"] == ("H", "H") + ] + self.assertTrue(hydrogen_maps) + self.assertTrue(all(pair[0] > 0 and pair[1] > 0 for pair in hydrogen_maps)) + + def test_tuple_explicit_h_only_reconstructs_template_explicit_hydrogens(self): + smart = "[H:4][NH2:1].[CH3:2][Cl:3]>>[NH2:1][CH3:2].[Cl:3][H:4]" + forward, backward = self._assert_bidirectional_equivalent( + smart, + core=True, + implicit_temp=False, + explicit_h=True, + ) + + for reactor in (forward, backward): + self.assertEqual(len(reactor.smarts), 1) + self.assertIn("[H:4]", reactor.smarts[0]) + + def test_tuple_implicit_output_omits_mapped_explicit_hydrogens(self): + smart = "[H:4][NH2:1].[CH3:2][Cl:3]>>[NH2:1][CH3:2].[Cl:3][H:4]" + forward, backward = self._tuple_reactors( + smart, + core=True, + implicit_temp=False, + explicit_h=False, + ) + + for reactor in (forward, backward): + self.assertEqual(len(reactor.smarts), 1) + self.assertNotIn("[H:", reactor.smarts[0]) + + def test_tuple_reactor_keeps_removable_hydrogens_implicit_when_requested(self): + smart = "[CH3:1][Cl:2].[O:3]([H:4])[H:5]" ">>[CH3:1][O:3][H:4].[Cl:2][H:5]" + forward, backward = self._tuple_reactors( + smart, + core=False, + implicit_temp=False, + explicit_h=False, + ) + + self.assertEqual( + forward.smarts, ["[CH3:1][Cl:2].[OH2:3]>>[CH3:1][OH:3].[ClH:2]"] + ) + self.assertTrue(self._has_standardized_candidate(smart, backward.smarts)) + self.assertTrue(all("[H:" not in candidate for candidate in backward.smarts)) + + def test_tuple_explicit_h_renders_remaining_water_hydrogens(self): + smart = ( + "[cH:1]1[cH:2][cH:3][cH:4][cH:5][c:6]1[C:7]([H:23])=[O:8]." + "[cH:9]1[cH:10][cH:11][cH:12][cH:13][c:14]1[C:15]([H:19])=[O:16]." + "[C-:17]#[N:18].[O:20]([H:21])[H:22]" + ">>" + "[cH:1]1[cH:2][cH:3][cH:4][cH:5][c:6]1[C:7]([H:23])([O:8][H:21])" + "[C:15](=[O:16])[c:14]1[cH:13][cH:12][cH:11][cH:10][cH:9]1." + "[C-:17]#[N:18].[O:20]([H:19])[H:22]" + ) + forward, backward = self._tuple_reactors( + smart, + core=False, + implicit_temp=False, + explicit_h=True, + ) + + self.assertEqual(len(forward.smarts), 1) + self.assertEqual(len(backward.smarts), 1) + for reactor in (forward, backward): + self.assertTrue(all("[OH:1]" in smarts for smarts in reactor.smarts)) + self.assertTrue(all("[cH:" in smarts for smarts in reactor.smarts)) + atom_maps = [ + pair + for _, data in reactor.its_list[0].nodes(data=True) + if data["element"] == ("H", "H") + for pair in [data["atom_map"]] + ] + self.assertEqual(len(atom_maps), len(set(atom_maps))) + + def test_tuple_explicit_h_has_equivalent_candidate_for_aromatic_case(self): + smart = ( + "[cH:1]1[cH:2][cH:3][cH:4][cH:5][c:6]1[CH:7]=[O:8]." + "[cH:9]1[cH:10][cH:11][cH:12][cH:13][c:14]1[C:15]([H:19])=[O:16]." + "[C-:17]#[N:18].[OH:20]([H:21])>>" + "[cH:1]1[cH:2][cH:3][cH:4][cH:5][c:6]1[CH:7]([O:8][H:21])" + "[C:15](=[O:16])[c:14]1[cH:13][cH:12][cH:11][cH:10][cH:9]1." + "[C-:17]#[N:18].[OH:20]([H:19])" + ) + forward, backward = self._assert_bidirectional_equivalent( + smart, + core=False, + implicit_temp=False, + explicit_h=True, + ) + self.assertTrue(forward.smarts) + self.assertTrue(backward.smarts) + + def test_tuple_hh_reaction_keeps_molecular_hydrogen_explicit(self): + smart = ( + "[C:1](#[C:2][CH3:6])[CH3:5].[H:3][H:4]" + ">>[C:1](=[C:2]([H:4])[CH3:6])([H:3])[CH3:5]" + ) + for explicit_h in (False, True): + with self.subTest(explicit_h=explicit_h): + forward, backward = self._assert_bidirectional_equivalent( + smart, + core=True, + implicit_temp=False, + explicit_h=explicit_h, + ) + + for reactor in (forward, backward): + self.assertTrue( + all("[H:" in candidate for candidate in reactor.smarts) + ) + + def test_tuple_implicit_hh_reaction_consumes_hydrogen_into_hcount(self): + smart = ( + "[C:1](#[C:2][CH3:6])[CH3:5].[H:3][H:4]" ">>[CH:1](=[CH:2][CH3:6])[CH3:5]" + ) + forward, backward = self._assert_bidirectional_equivalent( + smart, + core=True, + implicit_temp=True, + explicit_h=False, + ) + + self.assertTrue(forward.smarts) + self.assertTrue(backward.smarts) + + def test_tuple_implicit_hydrogen_permutations_are_pruned_before_rewrite(self): + smart = ( + "[CH3:1][C:2]#[C:3][CH3:4].[H:5][H:6].[H:7][H:8]" + ">>[CH3:1][CH2:2][CH2:3][CH3:4]" + ) + forward, backward = self._tuple_reactors( + smart, + core=True, + implicit_temp=True, + explicit_h=False, + ) + + # Swapping equivalent H atoms within one H2 or swapping two equivalent + # H2 reagent components is provenance only and is pruned. + self.assertEqual(len(forward.mappings), 2) + self.assertEqual(len(forward.its_list), 1) + self.assertTrue(self._has_standardized_candidate(smart, forward.smarts)) + self.assertTrue(self._has_standardized_candidate(smart, backward.smarts)) + + def test_tuple_real_backward_explicit_h_case_is_reproducible(self): + smart = ( + "[CH3:1][CH2:2][C:3]([CH2:4][CH3:7])([c:5]1[cH:8][cH:10][c:11]" + "([O:12][CH2:14][CH:15]([O:16][Si:18]([CH3:19])([CH3:20])[C:21]" + "([CH3:22])([CH3:23])[CH3:24])[C:17]([CH3:25])([CH3:26])[CH3:27])" + "[c:13]([CH3:28])[cH:9]1)[c:6]1[cH:29][c:31]([CH3:32])[c:33]" + "([CH:34]=[O:35])[s:30]1.[CH3:36][O:37][C:38](=[O:39])[CH2:40]" + "[NH:41][H:42].[H:43][H:44]>>[CH3:1][CH2:2][C:3]([CH2:4][CH3:7])" + "([c:5]1[cH:8][cH:10][c:11]([O:12][CH2:14][CH:15]([O:16][Si:18]" + "([CH3:19])([CH3:20])[C:21]([CH3:22])([CH3:23])[CH3:24])[C:17]" + "([CH3:25])([CH3:26])[CH3:27])[c:13]([CH3:28])[cH:9]1)[c:6]1[cH:29]" + "[c:31]([CH3:32])[c:33]([CH:34]([NH:41][CH2:40][C:38]([O:37][CH3:36])" + "=[O:39])[H:43])[s:30]1.[O:35]([H:42])[H:44]" + ) + standardizer = Standardize() + expected = standardizer.fit(smart) + substrate, product = standardizer.fit(smart, remove_aam=True).split(">>") + rc = rsmi_to_its(smart, core=True, format="tuple") + + forward = SynReactor( + substrate=substrate, + template=rc, + electron_diagnostics=True, + implicit_temp=False, + explicit_h=False, + ) + backward = SynReactor( + substrate=product, + template=rc, + electron_diagnostics=True, + implicit_temp=False, + explicit_h=False, + invert=True, + ) + + self.assertIn( + expected, + [ + standardizer.fit(candidate, remove_aam=True) + for candidate in forward.smarts + ], + ) + self.assertIn( + expected, + [ + standardizer.fit(candidate, remove_aam=True) + for candidate in backward.smarts + ], + ) + self.assertTrue(backward.its_list) + product_graph = ITSReverter(backward.its_list[0]).to_product_graph() + self.assertTrue( + any( + attrs.get("element") == "O" and attrs.get("hcount") == -1 + for _, attrs in product_graph.nodes(data=True) + ) + ) + + def test_radical_homolytic_cc_cleavage(self): + self._assert_radicals( + "[CH3:1][CH3:2]>>[CH3:1].[CH3:2]", + expected_product_radicals={1: 1, 2: 1}, + ) + + def test_radical_cc_recombination(self): + self._assert_radicals( + "[CH3:1].[CH3:2]>>[CH3:1][CH3:2]", + expected_product_radicals={1: 0, 2: 0}, + ) + + def test_radical_homolytic_cbr_cleavage(self): + self._assert_radicals( + "[CH3:1][Br:2]>>[CH3:1].[Br:2]", + expected_product_radicals={1: 1, 2: 1}, + ) + + def test_radical_cbr_recombination(self): + self._assert_radicals( + "[CH3:1].[Br:2]>>[CH3:1][Br:2]", + expected_product_radicals={1: 0, 2: 0}, + ) + + def _assert_radicals( + self, + smart: str, + *, + expected_product_radicals: dict[int, int], + ) -> None: + reactants, _ = smart.split(">>") + rc = rsmi_to_its(smart, core=True, format="tuple") + reactor = SynReactor( + reactants, + rc, + implicit_temp=False, + explicit_h=False, + electron_diagnostics=True, + ) + product = ITSReverter(reactor.its_list[0]).to_product_graph() + + for node, radical in expected_product_radicals.items(): + self.assertEqual(product.nodes[node]["radical"], radical) + self.assertEqual(product.nodes[node]["charge"], 0.0) + + _, products = smart.split(">>") + backward = SynReactor( + products, + rc, + implicit_temp=False, + explicit_h=False, + electron_diagnostics=True, + invert=True, + ) + self.assertTrue(self._has_equivalent_candidate(smart, reactor.smarts)) + self.assertTrue(self._has_equivalent_candidate(smart, backward.smarts)) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Synthesis/Reactor/test_syn_reactor_real_cases.py b/Test/Synthesis/Reactor/test_syn_reactor_real_cases.py new file mode 100644 index 0000000..b5c9c86 --- /dev/null +++ b/Test/Synthesis/Reactor/test_syn_reactor_real_cases.py @@ -0,0 +1,149 @@ +import unittest +import logging +from pathlib import Path + +from rdkit import Chem + +from synkit.Chem.Reaction.standardize import Standardize +from synkit.IO import load_database +from synkit.IO.chem_converter import detect_its_format, rsmi_to_its +from synkit.Synthesis.Reactor.syn_reactor import SynReactor + + +class TestSynReactorRealCases(unittest.TestCase): + REAL_CASE_BATCH_SIZE = 34393 + REAL_CASE_BATCH_SIZE = 100 + PROGRESS_STEPS = 10 + ERROR_LOG = Path("error.txt") + + @classmethod + def setUpClass(cls): + cls.data = load_database("./Data/smart.json.gz") + cls.standardizer = Standardize() + + @staticmethod + def _canonical_fragments(side: str) -> list[str]: + fragments = [] + for fragment in side.split("."): + mol = Chem.MolFromSmiles(fragment) + if mol is None: + raise AssertionError(f"Could not parse fragment: {fragment}") + for atom in mol.GetAtoms(): + atom.SetAtomMapNum(0) + fragments.append(Chem.MolToSmiles(Chem.RemoveHs(mol))) + return sorted(fragments) + + def _round_trip_tuple_rc(self, index: int) -> None: + smart = self.data[index]["smart"] + rsmi = self.standardizer.fit(smart) + reactants, products = rsmi.split(">>") + rc = rsmi_to_its(smart, core=True, format="tuple") + + forward = SynReactor( + substrate=reactants, + template=rc, + implicit_temp=False, + explicit_h=False, + ) + backward = SynReactor( + substrate=products, + template=rc, + implicit_temp=False, + explicit_h=False, + invert=True, + ) + + forward_smis = [ + self.standardizer.fit(item, remove_aam=True) for item in forward.smarts + ] + backward_smis = [ + self.standardizer.fit(item, remove_aam=True) for item in backward.smarts + ] + + self.assertEqual(detect_its_format(rc), "tuple") + self.assertIn(rsmi, forward_smis) + self.assertIn(rsmi, backward_smis) + + def _write_case_error(self, index: int, exc: BaseException) -> None: + """Append one reproducible failed real-case record to ``error.txt``.""" + entry = self.data[index] + smart = entry["smart"] + try: + rsmi = self.standardizer.fit(smart) + except Exception as standardize_exc: + rsmi = f"" + + with self.ERROR_LOG.open("a", encoding="utf-8") as handle: + handle.write( + "\n".join( + [ + "=" * 88, + f"index: {index}", + f"reaction_id: {entry.get('R-id')}", + f"error_type: {type(exc).__name__}", + f"error: {exc}", + "smart:", + smart, + "standardized_rsmi:", + rsmi, + "", + ] + ) + ) + + def test_first_fixture_runs_through_tuple_template(self): + smart = self.data[0]["smart"] + substrate, expected_product = smart.split(">>") + + reactor = SynReactor( + substrate, + smart, + explicit_h=False, + template_format="tuple", + ) + + self.assertEqual(detect_its_format(reactor.rule.rc.raw), "tuple") + self.assertTrue(reactor.its_list) + self.assertEqual(len(reactor.smarts), 1) + actual_product = reactor.smarts[0].split(">>")[1] + self.assertEqual( + self._canonical_fragments(actual_product), + self._canonical_fragments(expected_product), + ) + + def test_first_fixture_round_trips_with_tuple_rc(self): + self._round_trip_tuple_rc(0) + + def test_curated_tuple_rc_matrix_round_trips(self): + for index in (0, 1, 2, 10, 25, 100): + with self.subTest(index=index, reaction_id=self.data[index]["R-id"]): + self._round_trip_tuple_rc(index) + + def test_backward_role_regression_from_index_33(self): + self._round_trip_tuple_rc(33) + + def test_tuple_rc_round_trip_batch(self): + logger = logging.getLogger(__name__) + total = self.REAL_CASE_BATCH_SIZE + progress_every = max(1, total // self.PROGRESS_STEPS) + self.ERROR_LOG.unlink(missing_ok=True) + + for index in range(total): + completed = index + 1 + if completed == 1 or completed % progress_every == 0 or completed == total: + logger.info( + "tuple RC real-case progress: %d/%d (%.0f%%)", + completed, + total, + completed / total * 100, + ) + with self.subTest(index=index, reaction_id=self.data[index]["R-id"]): + try: + self._round_trip_tuple_rc(index) + except Exception as exc: + self._write_case_error(index, exc) + raise + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Synthesis/Reactor/test_syn_reactor_rewrite_modes.py b/Test/Synthesis/Reactor/test_syn_reactor_rewrite_modes.py new file mode 100644 index 0000000..41ec7aa --- /dev/null +++ b/Test/Synthesis/Reactor/test_syn_reactor_rewrite_modes.py @@ -0,0 +1,199 @@ +import unittest + +import networkx as nx +from synkit.IO.chem_converter import detect_its_format, rsmi_to_its +from synkit.Synthesis.Reactor.syn_reactor import SynReactor + + +class TestSynReactorRewriteModes(unittest.TestCase): + def test_detects_legacy_template(self): + rc = nx.Graph() + rc.add_edge(1, 2, order=(1.0, 2.0)) + + self.assertFalse(SynReactor._is_electron_aware_template(rc)) + + def test_detects_electron_aware_template(self): + rc = nx.Graph() + rc.add_edge( + 1, + 2, + order=(1.0, 2.0), + sigma_order=(1.0, 1.0), + pi_order=(0.0, 1.0), + ) + + self.assertTrue(SynReactor._is_electron_aware_template(rc)) + + def test_invert_tuple_template_preserves_tuple_representation(self): + template = "[CH3:1][CH3:2]>>[CH2:1]=[CH2:2]" + reactor = SynReactor( + "[CH2:1]=[CH2:2]", + template, + invert=True, + explicit_h=False, + template_format="tuple", + ) + + self.assertEqual(detect_its_format(reactor.rule.rc.raw), "tuple") + self.assertEqual(reactor.rule.rc.raw.edges[1, 2]["pi_order"], (1.0, 0.0)) + + def test_electron_aware_rewrite_refreshes_product_accounting(self): + host = nx.Graph() + host.add_node( + 1, + element="C", + charge=0, + hcount=3, + lone_pairs=0, + radical=0, + valence_electrons=4, + ) + host.add_node( + 2, + element="C", + charge=0, + hcount=3, + lone_pairs=0, + radical=0, + valence_electrons=4, + ) + host.add_edge(1, 2, order=1.0, sigma_order=1.0, pi_order=0.0) + + rc = nx.Graph() + rc.add_node(10, typesGH=(("C", False, 3, 0, []), ("C", False, 2, 0, []))) + rc.add_node(20, typesGH=(("C", False, 3, 0, []), ("C", False, 2, 0, []))) + rc.add_edge( + 10, + 20, + order=(1.0, 2.0), + sigma_order=(1.0, 1.0), + pi_order=(0.0, 1.0), + standard_order=-1.0, + ) + + rewritten = SynReactor._glue_graph(host, rc, {10: 1, 20: 2})[0] + + self.assertEqual(rewritten.edges[1, 2]["sigma_order"], (1.0, 1.0)) + self.assertEqual(rewritten.edges[1, 2]["pi_order"], (0.0, 1.0)) + self.assertEqual(rewritten.edges[1, 2]["kekule_order"][1], 2.0) + self.assertEqual(rewritten.nodes[1]["hcount"], (3, 2)) + self.assertEqual(rewritten.nodes[1]["recomputed_charge"][1], 0.0) + + def test_electron_aware_to_smarts_uses_kekule_product_reconstruction(self): + its = nx.Graph() + for node in range(6): + its.add_node( + node, + element=("C", "C"), + charge=(0, 0), + hcount=(1, 1), + lone_pairs=(0, 0), + radical=(0, 0), + valence_electrons=(4, 4), + present=(True, True), + ) + cycle_edges = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 0)] + for idx, edge in enumerate(cycle_edges): + order = 1.0 if idx % 2 == 0 else 2.0 + its.add_edge( + *edge, + order=(1.5, 1.5), + kekule_order=(order, order), + sigma_order=(1.0, 1.0), + pi_order=(order - 1.0, order - 1.0), + standard_order=0.0, + ) + its.graph["electron_aware_rewrite"] = True + + self.assertEqual(SynReactor._to_smarts(its), "c1ccccc1>>c1ccccc1") + + def test_legacy_output_does_not_switch_modes_from_host_sigma_pi(self): + its = nx.Graph() + its.add_node( + 1, + element="C", + charge=0, + hcount=3, + typesGH=(("C", False, 3, 0, []), ("C", False, 3, 0, [])), + ) + its.add_node( + 2, + element="C", + charge=0, + hcount=3, + typesGH=(("C", False, 3, 0, []), ("C", False, 3, 0, [])), + ) + its.add_edge( + 1, + 2, + order=(1.0, 1.0), + sigma_order=(1.0, 1.0), + pi_order=(0.0, 0.0), + standard_order=0.0, + ) + its.graph["electron_aware_rewrite"] = False + + self.assertEqual( + SynReactor._to_smarts(its), + "[CH3:1][CH3:2]>>[CH3:1][CH3:2]", + ) + + def test_public_tuple_template_reaches_electron_aware_rewrite(self): + reactor = SynReactor( + "[CH3:1][CH3:2]", + "[CH3:1][CH3:2]>>[CH2:1]=[CH2:2]", + explicit_h=False, + template_format="tuple", + ) + + self.assertEqual(detect_its_format(reactor.rule.rc.raw), "tuple") + self.assertTrue(reactor.its_list) + self.assertTrue(reactor.its_list[0].graph["electron_aware_rewrite"]) + self.assertEqual(reactor.smarts, ["[CH3:1][CH3:2]>>[CH2:1]=[CH2:2]"]) + + def test_diagnostics_are_opt_in_and_do_not_change_products(self): + template = "[CH3:1][CH3:2]>>[CH2:1]=[CH2:2]" + baseline = SynReactor( + "[CH3:1][CH3:2]", + template, + explicit_h=False, + template_format="tuple", + ) + diagnosed = SynReactor( + "[CH3:1][CH3:2]", + template, + explicit_h=False, + template_format="tuple", + electron_diagnostics=True, + ) + + self.assertEqual(baseline.diagnostics, []) + self.assertEqual(diagnosed.smarts, baseline.smarts) + self.assertEqual(len(diagnosed.diagnostics), 1) + self.assertTrue(diagnosed.diagnostics[0]["electron_aware_rewrite"]) + self.assertEqual(diagnosed.diagnostics[0]["mismatch_count"], 0) + + def test_diagnostics_report_public_nonzero_mismatch(self): + template = rsmi_to_its( + "[OH:1][CH3:2]>>[O+:1]=[CH2:2]", + format="tuple", + ) + template.nodes[1]["lone_pairs"] = (2, 0) + reactor = SynReactor( + "[OH:1][CH3:2]", + template, + explicit_h=False, + electron_diagnostics=True, + ) + + self.assertEqual(len(reactor.smarts), 1) + self.assertEqual(len(reactor.diagnostics), 1) + self.assertEqual(reactor.diagnostics[0]["mismatch_count"], 1) + self.assertEqual( + reactor.diagnostics[0]["mismatches"][1], + {"charge": 1, "recomputed_charge": 3.0}, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Vis/test_its_drawer.py b/Test/Vis/test_its_drawer.py new file mode 100644 index 0000000..78d452c --- /dev/null +++ b/Test/Vis/test_its_drawer.py @@ -0,0 +1,134 @@ +import unittest + +import matplotlib + +matplotlib.use("Agg") + +from synkit.IO import rsmi_to_its # noqa: E402 +from synkit.Vis.its_drawer import ( # noqa: E402 + draw_its_from_rsmi, + draw_its_graph, + draw_its_only, +) + + +class TestITSDrawer(unittest.TestCase): + rsmi = ( + "[Cl:1][Cl:2].[H:9][c:3]1[cH:4][cH:5][cH:6][cH:7][cH:8]1" + ">>" + "[Cl:1][H:9].[Cl:2][c:3]1[cH:4][cH:5][cH:6][cH:7][cH:8]1" + ) + + def test_draw_tuple_its_from_rsmi(self): + fig, axes = draw_its_from_rsmi( + self.rsmi, + format="tuple", + core=False, + title="chlorination ITS", + ) + + self.assertIs(fig, axes[0].figure) + self.assertEqual(len(axes), 1) + self.assertEqual(axes[0].get_title(), "chlorination ITS") + + def test_draw_tuple_its_graph(self): + its = rsmi_to_its(self.rsmi, core=False, format="tuple") + + fig, axes = draw_its_graph(its, title="tuple ITS") + + self.assertIs(fig, axes[0].figure) + self.assertEqual(len(axes), 1) + self.assertEqual(axes[0].get_title(), "tuple ITS") + + def test_draw_its_only_can_show_changed_edge_labels(self): + its = rsmi_to_its(self.rsmi, core=False, format="tuple") + + ax = draw_its_only( + its, + title="pretty ITS", + show_edge_labels=True, + edge_label_mode="kekule", + ) + + self.assertEqual(ax.get_title(), "pretty ITS") + + def test_draw_its_only_supports_sigma_pi_labels(self): + its = rsmi_to_its(self.rsmi, core=False, format="tuple") + + ax = draw_its_only( + its, + title="sigma/pi ITS", + show_edge_labels=True, + edge_label_mode="sigma_pi", + ) + + self.assertEqual(ax.get_title(), "sigma/pi ITS") + + def test_sigma_pi_labels_only_include_changed_components(self): + from synkit.Vis.its_drawer import _its_display_graph + + its = rsmi_to_its(self.rsmi, core=False, format="tuple") + display = _its_display_graph(its) + labels = [ + attrs["its_label_sigma_pi"] + for _, _, attrs in display.edges(data=True) + if attrs["its_state"] != "unchanged" + ] + + self.assertTrue(labels) + self.assertTrue(all(label.startswith("σ") for label in labels)) + self.assertTrue(all("π" not in label for label in labels)) + + def test_electron_labels_capture_charge_and_lone_pair_changes_separately(self): + from synkit.Vis.its_drawer import _its_display_graph + + rsmi = "[CH3:1][Cl:2].[NH3:3]>>[CH3:1][NH3+:3].[Cl-:2]" + its = rsmi_to_its(rsmi, core=False, format="tuple") + display = _its_display_graph(its) + + self.assertEqual(display.nodes[2]["its_electron_label_charge"], "q0→-1") + self.assertEqual(display.nodes[2]["its_electron_label_lone_pair"], "λ3→4") + self.assertEqual(display.nodes[3]["its_electron_label_charge"], "q0→+1") + self.assertEqual(display.nodes[3]["its_electron_label_lone_pair"], "λ1→0") + + def test_draw_its_only_can_show_electron_labels(self): + rsmi = "[CH3:1][Cl:2].[NH3:3]>>[CH3:1][NH3+:3].[Cl-:2]" + its = rsmi_to_its(rsmi, core=False, format="tuple") + + ax = draw_its_only( + its, + title="SN2 ITS", + show_electron_labels=True, + electron_label_mode="lone_pair", + ) + + self.assertEqual(ax.get_title(), "SN2 ITS") + + def test_invalid_its_label_modes_raise(self): + its = rsmi_to_its(self.rsmi, core=False, format="tuple") + + with self.assertRaises(ValueError): + draw_its_only(its, edge_label_mode="verbose") + with self.assertRaises(ValueError): + draw_its_only(its, show_electron_labels=True, electron_label_mode="both") + + def test_draw_tuple_its_projection_when_requested(self): + its = rsmi_to_its(self.rsmi, core=False, format="tuple") + + fig, axes = draw_its_graph(its, title="tuple ITS", projection=True) + + self.assertIs(fig, axes[-1].figure) + self.assertGreaterEqual(len(axes), 4) + self.assertEqual(axes[-1].get_title(), "ITS delta") + + def test_draw_legacy_its_graph_without_delta(self): + its = rsmi_to_its(self.rsmi, core=False, format="typesGH") + + fig, axes = draw_its_graph(its, include_delta_panel=False, projection=True) + + self.assertIs(fig, axes[0].figure) + self.assertGreaterEqual(len(axes), 4) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Vis/test_molecule_drawer.py b/Test/Vis/test_molecule_drawer.py new file mode 100644 index 0000000..6f76ae7 --- /dev/null +++ b/Test/Vis/test_molecule_drawer.py @@ -0,0 +1,73 @@ +import unittest + +import matplotlib +import networkx as nx + +matplotlib.use("Agg") + +from synkit.IO.chem_converter import smiles_to_graph # noqa: E402 +from synkit.Vis.molecule_drawer import draw_molecule_graph # noqa: E402 + + +class TestMoleculeDrawer(unittest.TestCase): + def test_draw_molecule_graph_returns_axes_without_mutation(self): + graph = nx.Graph() + graph.add_node(1, element="C", charge=0, atom_map=1) + graph.add_node(2, element="O", charge=0, atom_map=2) + graph.add_edge(1, 2, order=2) + before_nodes = dict(graph.nodes(data=True)) + before_edges = list(graph.edges(data=True)) + + ax = draw_molecule_graph(graph, label_mode="all", show_atom_map=True) + + self.assertEqual(ax.get_aspect(), 1.0) + self.assertEqual(dict(graph.nodes(data=True)), before_nodes) + self.assertEqual(list(graph.edges(data=True)), before_edges) + + def test_draw_aromatic_molecule(self): + graph = nx.cycle_graph(6) + mapping = {node: node + 1 for node in graph.nodes} + graph = nx.relabel_nodes(graph, mapping) + nx.set_node_attributes(graph, "C", "element") + nx.set_node_attributes(graph, 0, "charge") + nx.set_node_attributes(graph, True, "aromatic") + nx.set_edge_attributes(graph, 1.5, "order") + nx.set_edge_attributes(graph, True, "aromatic") + + ax = draw_molecule_graph(graph, aromatic_style="circle") + + self.assertEqual(ax.get_aspect(), 1.0) + + def test_draw_with_rdkit_panel(self): + graph = nx.Graph() + graph.add_node(1, element="N", charge=1, atom_map=1) + graph.add_node(2, element="C", charge=0, atom_map=2) + graph.add_edge(1, 2, order=1) + + fig, axes = draw_molecule_graph(graph, include_rdkit_panel=True) + + self.assertEqual(len(axes), 2) + self.assertIs(fig, axes[0].figure) + + def test_draw_real_smiles_graph_aspirin_like_case(self): + graph = smiles_to_graph( + "CC(=O)OC1=CC=CC=C1C(=O)O", + sanitize=True, + use_index_as_atom_map=True, + ) + + ax = draw_molecule_graph( + graph, + label_mode="hetero", + show_atom_map=True, + aromatic_style="circle", + title="Aspirin-like SMILES graph", + ) + + self.assertEqual(graph.number_of_nodes(), 13) + self.assertEqual(graph.number_of_edges(), 13) + self.assertEqual(ax.get_title(), "Aspirin-like SMILES graph") + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Vis/test_mtg_drawer.py b/Test/Vis/test_mtg_drawer.py new file mode 100644 index 0000000..6e6fb40 --- /dev/null +++ b/Test/Vis/test_mtg_drawer.py @@ -0,0 +1,165 @@ +import unittest + +import matplotlib +import networkx as nx + +matplotlib.use("Agg") + +from synkit.Graph.ITS.its_construction import ITSConstruction # noqa: E402 +from synkit.Graph.MTG.mtg import MTG # noqa: E402 +from synkit.IO import load_database # noqa: E402 +from synkit.Vis.mtg_drawer import ( # noqa: E402 + _mtg_display_graph, + draw_mtg_graph, + draw_mtg_steps, +) + + +class TestMTGDrawer(unittest.TestCase): + @staticmethod + def _atom(element, *, hcount=0, charge=0, lone_pairs=0, radical=0): + return { + "element": element, + "aromatic": False, + "hcount": hcount, + "charge": charge, + "lone_pairs": lone_pairs, + "radical": radical, + "valence_electrons": {"H": 1, "C": 4, "N": 5, "O": 6, "Cl": 7}[element], + } + + @staticmethod + def _bond(graph, u, v, sigma=1.0, pi=0.0): + graph.add_edge( + u, + v, + order=sigma + pi, + kekule_order=sigma + pi, + sigma_order=sigma, + pi_order=pi, + ) + + def _graph(self, nodes, edges): + graph = nx.Graph() + for node, attrs in nodes.items(): + graph.add_node(node, **attrs) + for edge in edges: + self._bond(graph, *edge) + return graph + + def _mtg(self): + g0 = self._graph( + { + 1: self._atom("C", hcount=3), + 2: self._atom("Cl", lone_pairs=3), + }, + [(1, 2, 1.0, 0.0)], + ) + g1 = self._graph( + { + 1: self._atom("C", hcount=3, radical=1), + 2: self._atom("Cl", radical=1, lone_pairs=3), + }, + [], + ) + g2 = self._graph( + { + 1: self._atom("C", hcount=3), + 2: self._atom("Cl", lone_pairs=3), + }, + [(1, 2, 1.0, 0.0)], + ) + return MTG( + [ITSConstruction.construct(g0, g1), ITSConstruction.construct(g1, g2)], + mappings=[{1: 1, 2: 2}], + ) + + def test_draw_mtg_graph_accepts_mtg_object(self): + mtg = self._mtg() + + fig, ax = draw_mtg_graph(mtg, title="radical rebound") + + self.assertIs(fig, ax.figure) + self.assertEqual(ax.get_title(), "radical rebound") + + def test_draw_mtg_graph_accepts_raw_graph_without_mutation(self): + graph = self._mtg().get_mtg() + before_nodes = dict(graph.nodes(data=True)) + before_edges = list(graph.edges(data=True)) + + fig, ax = draw_mtg_graph(graph) + + self.assertIs(fig, ax.figure) + self.assertEqual(dict(graph.nodes(data=True)), before_nodes) + self.assertEqual(list(graph.edges(data=True)), before_edges) + + def test_draw_mtg_graph_supports_3d_layout(self): + mtg = self._mtg() + + fig, ax = draw_mtg_graph(mtg, dimension="3d", layout="spring") + + self.assertIs(fig, ax.figure) + self.assertEqual(getattr(ax, "name", None), "3d") + + def test_mtg_edge_labels_compress_by_default(self): + graph = self._mtg().get_mtg() + + compact = _mtg_display_graph( + graph, + mode="timeline", + show_atom_map=True, + show_node_badges=False, + hydrogen_mode="changed", + changed_only=True, + compress=True, + ) + full = _mtg_display_graph( + graph, + mode="timeline", + show_atom_map=True, + show_node_badges=False, + hydrogen_mode="changed", + changed_only=True, + compress=False, + ) + + self.assertEqual(compact.edges[1, 2]["label"], "1→1") + self.assertEqual(full.edges[1, 2]["label"], "1→0→1") + + def test_draw_mtg_steps_draws_ordered_its_panels_and_composed_panel(self): + mtg = self._mtg() + + fig, axes = draw_mtg_steps(mtg, include_composed=True, show_edge_labels=True) + + self.assertIs(fig, axes[0].figure) + self.assertEqual(len(axes), 3) + self.assertEqual( + [ax.get_title() for ax in axes], ["Step 1", "Step 2", "Composed"] + ) + + def test_draw_mtg_steps_validates_indices(self): + with self.assertRaises(IndexError): + draw_mtg_steps(self._mtg(), steps=[2]) + + def test_draw_mtg_graph_handles_real_neutral_mechanism(self): + data = load_database("Data/Testcase/mech.json.gz")[0] + neutral = data["mechanisms"][1] + steps = [step["smart_string"] for step in neutral["steps"]] + mtg = MTG(steps, mcs_mol=True) + graph = mtg.get_mtg() + + fig, ax = draw_mtg_graph( + mtg, + title=neutral["mech_name"], + hydrogen_mode="changed", + show_edge_labels=True, + ) + + self.assertIs(fig, ax.figure) + self.assertEqual(ax.get_title(), "Aldol reaction (neutral cat)") + self.assertTrue(mtg._tuple_its) + self.assertFalse(any("typesGH" in attrs for _, attrs in graph.nodes(data=True))) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Vis/test_reaction_drawer.py b/Test/Vis/test_reaction_drawer.py new file mode 100644 index 0000000..ed242c1 --- /dev/null +++ b/Test/Vis/test_reaction_drawer.py @@ -0,0 +1,54 @@ +import unittest + +import matplotlib + +matplotlib.use("Agg") + +# flake8: noqa: E402 +from synkit.IO.chem_converter import rsmi_to_graph # noqa: E402 +from synkit.Vis.reaction_drawer import ( # noqa: E402 + draw_reaction_graph, + draw_reaction_graphs, + find_reaction_highlights, +) + + +class TestReactionDrawer(unittest.TestCase): + def test_find_reaction_highlights_detects_broken_and_formed_bonds(self): + rsmi = "[CH3:1][Cl:2].[NH3:3]>>[CH3:1][NH3+:3].[Cl-:2]" + reactant, product = rsmi_to_graph( + rsmi, + drop_non_aam=False, + use_index_as_atom_map=True, + ) + + highlights = find_reaction_highlights(reactant, product) + + self.assertIn(frozenset({1, 2}), highlights.broken_bonds) + self.assertIn(frozenset({1, 3}), highlights.formed_bonds) + self.assertEqual(highlights.changed_atoms, frozenset({1, 2, 3})) + + def test_draw_reaction_graph_from_rsmi(self): + rsmi = "[CH3:1][Cl:2].[NH3:3]>>[CH3:1][NH3+:3].[Cl-:2]" + + fig, axes = draw_reaction_graph(rsmi, title="SN2") + + self.assertIs(fig, axes[0].figure) + self.assertEqual(len(axes), 5) + + def test_draw_reaction_graphs_accepts_prebuilt_graphs(self): + rsmi = "[C:1]=[O:2].[O:3]>>[C:1]([O:2])[O:3]" + reactant, product = rsmi_to_graph( + rsmi, + drop_non_aam=False, + use_index_as_atom_map=True, + ) + + fig, axes = draw_reaction_graphs(reactant, product, title="addition") + + self.assertIs(fig, axes[-1].figure) + self.assertEqual(len(axes), 4) + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Vis/test_visual_drawer.py b/Test/Vis/test_visual_drawer.py new file mode 100644 index 0000000..b20e5b6 --- /dev/null +++ b/Test/Vis/test_visual_drawer.py @@ -0,0 +1,48 @@ +import unittest + +import matplotlib +import networkx as nx + +matplotlib.use("Agg") + +from synkit.Vis.visual_drawer import draw_graph # noqa: E402 +from synkit.Vis.visual_model import to_visual_graph # noqa: E402 + + +class TestVisualDrawer(unittest.TestCase): + def test_draw_graph_returns_figure_and_axes_without_mutating_input(self): + graph = nx.Graph() + graph.add_node(1, element="C", atom_map=1, charge=0) + graph.add_node(2, element="O", atom_map=2, charge=0) + graph.add_edge(1, 2, order=(1.0, 2.0)) + before_nodes = dict(graph.nodes(data=True)) + before_edges = list(graph.edges(data=True)) + + fig, ax = draw_graph(graph) + + self.assertIs(fig, ax.figure) + self.assertEqual(dict(graph.nodes(data=True)), before_nodes) + self.assertEqual(list(graph.edges(data=True)), before_edges) + + def test_draw_graph_accepts_visual_graph(self): + graph = nx.Graph() + graph.add_node(1, element="N", atom_map=1, hcount=(2, 1)) + graph.add_node(2, element="C", atom_map=2, hcount=(3, 3)) + graph.add_edge( + 1, + 2, + order=(1.0, 2.0), + kekule_order=(1.0, 2.0), + sigma_order=(1.0, 1.0), + pi_order=(0.0, 1.0), + ) + visual = to_visual_graph(graph, mode="sigma_pi") + + fig, ax = draw_graph(visual, mode="sigma_pi") + + self.assertIs(fig, ax.figure) + self.assertEqual(ax.get_title(), "tuple_its") + + +if __name__ == "__main__": + unittest.main() diff --git a/Test/Vis/test_visual_model.py b/Test/Vis/test_visual_model.py new file mode 100644 index 0000000..f2fdc8c --- /dev/null +++ b/Test/Vis/test_visual_model.py @@ -0,0 +1,137 @@ +import unittest + +import networkx as nx + +from synkit.Graph.ITS.its_construction import ITSConstruction +from synkit.Graph.MTG.mtg import MTG +from synkit.Vis.visual_model import ( + detect_visual_kind, + iter_changed_edges, + iter_changed_nodes, + summarize_visual_graph, + to_visual_graph, +) + + +class TestVisualModel(unittest.TestCase): + @staticmethod + def _atom(element, *, hcount=0, charge=0, lone_pairs=0, radical=0): + return { + "element": element, + "aromatic": False, + "hcount": hcount, + "charge": charge, + "lone_pairs": lone_pairs, + "radical": radical, + "valence_electrons": {"H": 1, "C": 4, "N": 5, "O": 6, "Cl": 7}[element], + } + + @staticmethod + def _bond(graph, u, v, sigma=1.0, pi=0.0): + graph.add_edge( + u, + v, + order=sigma + pi, + kekule_order=sigma + pi, + sigma_order=sigma, + pi_order=pi, + ) + + def _graph(self, nodes, edges): + graph = nx.Graph() + for node, attrs in nodes.items(): + graph.add_node(node, **attrs) + for edge in edges: + self._bond(graph, *edge) + return graph + + def test_detects_molecule(self): + graph = self._graph( + { + 1: self._atom("C", hcount=3), + 2: self._atom("Cl", lone_pairs=3), + }, + [(1, 2, 1.0, 0.0)], + ) + + visual = to_visual_graph(graph) + + self.assertEqual(detect_visual_kind(graph), "molecule") + self.assertEqual(visual.kind, "molecule") + self.assertEqual(visual.edges[0].label, "—") + + def test_detects_legacy_its(self): + its = nx.Graph() + its.add_node(1, element="C", atom_map=1) + its.add_node(2, element="O", atom_map=2) + its.add_edge(1, 2, order=(1.0, 2.0), standard_order=-1.0) + + visual = to_visual_graph(its) + + self.assertEqual(detect_visual_kind(its), "legacy_its") + self.assertEqual(visual.edges[0].state, "order_changed") + self.assertEqual(visual.edges[0].label, "—>=") + + def test_detects_tuple_its_and_sigma_pi_labels(self): + reactant = self._graph( + { + 1: self._atom("N", hcount=2, lone_pairs=1), + 2: self._atom("C", hcount=3), + }, + [(1, 2, 1.0, 0.0)], + ) + product = self._graph( + { + 1: self._atom("N", hcount=1, lone_pairs=1, radical=1), + 2: self._atom("C", hcount=3), + }, + [(1, 2, 1.0, 1.0)], + ) + its = ITSConstruction.construct(reactant, product) + + visual = to_visual_graph(its, mode="sigma_pi") + + self.assertEqual(detect_visual_kind(its), "tuple_its") + self.assertIn("π0>1", visual.edges[0].label) + self.assertEqual(visual.edges[0].state, "order_changed") + self.assertEqual([node.node_id for node in iter_changed_nodes(visual)], [1]) + + def test_detects_compact_mtg_and_transient_edges(self): + g0 = self._graph( + { + 1: self._atom("C", hcount=3), + 2: self._atom("Cl", lone_pairs=3), + }, + [(1, 2, 1.0, 0.0)], + ) + g1 = self._graph( + { + 1: self._atom("C", hcount=3, radical=1), + 2: self._atom("Cl", radical=1, lone_pairs=3), + }, + [], + ) + g2 = self._graph( + { + 1: self._atom("C", hcount=3), + 2: self._atom("Cl", lone_pairs=3), + }, + [(1, 2, 1.0, 0.0)], + ) + mtg = MTG( + [ITSConstruction.construct(g0, g1), ITSConstruction.construct(g1, g2)], + mappings=[{1: 1, 2: 2}], + ).get_mtg() + + visual = to_visual_graph(mtg, mode="timeline") + changed_edges = list(iter_changed_edges(visual)) + + self.assertEqual(detect_visual_kind(mtg), "compact_mtg") + self.assertEqual(changed_edges[0].state, "transient") + self.assertIn("σ:1-0-1", changed_edges[0].label) + summary = summarize_visual_graph(visual) + self.assertEqual(summary["kind"], "compact_mtg") + + +if __name__ == "__main__": + unittest.main() diff --git a/doc/_static/custom.css b/doc/_static/custom.css index e79ec76..b9ce14a 100644 --- a/doc/_static/custom.css +++ b/doc/_static/custom.css @@ -21,6 +21,8 @@ --sk-header-height: 64px; --sk-accent: #1f77b4; + --sk-accent-2: #0f766e; + --sk-accent-3: #7c3aed; --sk-radius: 10px; } @@ -45,6 +47,23 @@ padding-bottom: 0.35rem; } +/* Keep the PyData header tools slightly farther to the right. */ +@media (min-width: 992px) { + .navbar-header-items { + justify-content: flex-end; + } + + .navbar-header-items__end { + margin-left: auto; + padding-right: 0.45rem; + column-gap: 0.35rem; + } + + .navbar-header-items__end .navbar-persistent--container { + margin-right: 0.65rem; + } +} + /* ========================================================================= Sidebar widths ========================================================================= */ @@ -113,6 +132,74 @@ box-shadow: 0 1px 6px rgba(15, 23, 42, 0.04); } +.sk-feature-card { + border: 1px solid rgba(31, 119, 180, 0.16); + border-radius: 14px; + background: + linear-gradient(180deg, rgba(255, 255, 255, 0.92), rgba(248, 250, 252, 0.88)); + box-shadow: 0 10px 24px rgba(15, 23, 42, 0.06); +} + +.sk-feature-card .sd-card-title { + color: var(--sk-accent); +} + +html[data-theme="dark"] .sk-feature-card { + background: + linear-gradient(180deg, rgba(30, 41, 59, 0.78), rgba(15, 23, 42, 0.78)); + border-color: rgba(125, 211, 252, 0.18); +} + +.sk-badge-row { + display: flex; + flex-wrap: wrap; + gap: 0.45rem; + margin: 0.75rem 0 1.1rem; +} + +.sk-badge { + display: inline-flex; + align-items: center; + min-height: 1.75rem; + padding: 0.18rem 0.62rem; + border-radius: 999px; + background: rgba(31, 119, 180, 0.1); + color: var(--sk-accent); + font-weight: 650; + font-size: 0.82rem; + letter-spacing: 0.01em; +} + +.sk-badge.green { + background: rgba(15, 118, 110, 0.1); + color: var(--sk-accent-2); +} + +.sk-badge.purple { + background: rgba(124, 58, 237, 0.1); + color: var(--sk-accent-3); +} + +.bd-content .list-table table { + border: 1px solid rgba(15, 23, 42, 0.08); + border-radius: 10px; + overflow: hidden; + display: table; +} + +.bd-content .list-table th { + background: rgba(31, 119, 180, 0.07); + font-size: 0.84rem; +} + +html[data-theme="dark"] .bd-content .list-table table { + border-color: rgba(255, 255, 255, 0.09); +} + +html[data-theme="dark"] .bd-content .list-table th { + background: rgba(125, 211, 252, 0.09); +} + div.highlight pre, pre.literal-block, .codehilite pre, @@ -314,4 +401,4 @@ html[data-theme="dark"] .sk-globaltoc.card { html[data-theme="dark"] .sk-bibliography .bibtex-bibliography { background: rgba(255, 255, 255, 0.05); border-color: rgba(255, 255, 255, 0.08); -} \ No newline at end of file +} diff --git a/doc/api/graph.rst b/doc/api/graph.rst index c61e8a0..c2e739b 100644 --- a/doc/api/graph.rst +++ b/doc/api/graph.rst @@ -76,6 +76,33 @@ Features :members: :show-inheritance: +Functional groups +----------------- + +.. automodule:: synkit.Graph.FG.api + :members: + :show-inheritance: + +.. automodule:: synkit.Graph.FG.audit + :members: + :show-inheritance: + +.. automodule:: synkit.Graph.FG.catalog + :members: + :show-inheritance: + +.. automodule:: synkit.Graph.FG.detector + :members: + :show-inheritance: + +.. automodule:: synkit.Graph.FG.model + :members: + :show-inheritance: + +.. automodule:: synkit.Graph.FG.ring_system + :members: + :show-inheritance: + Hydrogen utilities ------------------ @@ -126,6 +153,17 @@ ITS :members: :show-inheritance: +Mechanistic and Lewis-state utilities +------------------------------------- + +.. automodule:: synkit.Graph.Mech.conversion + :members: + :show-inheritance: + +.. automodule:: synkit.Graph.Mech.electron_accounting + :members: + :show-inheritance: + Matcher ------- @@ -196,14 +234,6 @@ Matcher MTG --- -.. automodule:: synkit.Graph.MTG.group_comp - :members: - :show-inheritance: - -.. automodule:: synkit.Graph.MTG.groupoid - :members: - :show-inheritance: - .. automodule:: synkit.Graph.MTG.mcs_matcher :members: :show-inheritance: diff --git a/doc/api/vis.rst b/doc/api/vis.rst index 1023a17..d285a51 100644 --- a/doc/api/vis.rst +++ b/doc/api/vis.rst @@ -1,8 +1,41 @@ Visualization ============= -Visualization utilities for reactions, rules, graphs, CRNs, embeddings, -and output export helpers. +Visualization utilities for molecule graphs, reactions, ITS graphs, diagnostic +graph adapters, CRNs, embeddings, and output export helpers. + +Modern molecule/reaction/ITS renderers +-------------------------------------- + +.. automodule:: synkit.Vis.molecule_drawer + :members: + :show-inheritance: + +.. automodule:: synkit.Vis.reaction_drawer + :members: + :show-inheritance: + +.. automodule:: synkit.Vis.its_drawer + :members: + :show-inheritance: + +.. automodule:: synkit.Vis.mtg_drawer + :members: + :show-inheritance: + +Diagnostic adapter layer +------------------------ + +.. automodule:: synkit.Vis.visual_model + :members: + :show-inheritance: + +.. automodule:: synkit.Vis.visual_drawer + :members: + :show-inheritance: + +Legacy and utility visualizers +------------------------------ .. automodule:: synkit.Vis.rxn_vis :members: @@ -16,15 +49,15 @@ and output export helpers. :members: :show-inheritance: -.. automodule:: synkit.Vis.crn_vis +.. automodule:: synkit.Vis.chemical_space :members: :show-inheritance: -.. automodule:: synkit.Vis.embedding +.. automodule:: synkit.Vis.crn_vis :members: :show-inheritance: -.. automodule:: synkit.Vis.chemical_space +.. automodule:: synkit.Vis.embedding :members: :show-inheritance: diff --git a/doc/changelog.rst b/doc/changelog.rst index 742fd7f..d40d263 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,6 +1,94 @@ Changelog ========= +Version 1.4.0 +------------- + +**Highlights** + +- Added the Lewis State Graph (LSG) framework for ``SynReactor``. LSG + templates carry ``lone_pairs``, ``radical``, ``valence_electrons``, + ``sigma_order``, ``pi_order``, and ``kekule_order`` so the NetworkX reactor + can rewrite from explicit valence-state information while keeping the legacy + ``typesGH`` path available. +- Added graph-native functional-group detection under ``synkit.Graph.FG``. + The detector works directly on SynKit molecular ``networkx`` graphs and + provides a SMILES convenience API returning both the graph and detected + ``(name, atom_indices)`` labels. +- Added compact MTG and visualization helpers for LSG/ITS and MTG timeline + inspection. The modern Vis API now covers molecule graphs, reaction panels, + ITS-only drawings, Lewis-state labels, and MTG step/timeline panels. + +**Lewis State Graph reactor** + +- LSG matching now uses explicit valence-state fields for new-mode templates: + element, charge, lone-pair count, radical count, and bond changes represented + by ``sigma_order`` / ``pi_order`` / ``kekule_order``. +- Product charge recomputation is driven from Lewis-state accounting in + new-mode rewrites, with ``kekule_order = sigma_order + pi_order`` used + instead of aromatic ``order`` values. +- Hydrogen handling was tightened for explicit-H reaction centers, implicit-H + templates, and simple ``H-H`` transfer cases. +- Atom-map preservation for LSG-reactor SMARTS output was fixed by using graph + node identity where the template does not carry original AAM. +- Real-case regression tooling was added around the first smart-database + fixture, batch round trips, and previously failing LSG rewrite examples. + +**Functional groups** + +- Added ``FunctionalGroupDetector``, ``FunctionalGroupRegistry``, + ``FunctionalGroupAudit``, and + ``smiles_to_graph_and_functional_groups``. +- Added hierarchical family handling so more specific labels such as + ``carboxylic_acid`` suppress generic nested labels such as ``carbonyl`` when + appropriate. +- Added aromatic ring-system detection, selected fused heteroaromatic public + names, and transform-relevant families across carbonyl/acyl, oxygen, + nitrogen/C=N, sulfur, boron, silicon, and phosphorus chemistry. +- Replaced the previous ``fgutils`` usage in tautomerization support with the + SynKit-native functional-group API. + +**MTG** + +- MTG construction from RSMI strings now defaults to Lewis State Graph ITS, + producing compact atom and bond timelines without ``typesGH``. Use + ``its_format="typesGH"`` to request legacy string conversion. +- Reworked the MTG plan around LSG/ITS representation: invariant atom fields + are stored once, while temporal fields store compact histories across + mechanism snapshots. +- Added round-trip coverage for converting reaction sequences to MTG and back + to ordered ITS steps / composed ITS views. +- Marked aromatic relabeling and partial-order mechanism DAGs as active design + areas rather than solved MTG semantics. + +**Visualization** + +- Added ``draw_molecule_graph``, ``draw_reaction_graph``, + ``draw_its_from_rsmi``, ``draw_its_only``, ``draw_mtg_graph``, and + ``draw_mtg_steps`` as the preferred modern rendering helpers. +- Added compact LSG/ITS labels for ``kekule_order`` transitions and optional + ``sigma/pi`` labels that suppress unchanged components. +- Added selectable Lewis-state labels for charge, lone-pair, and radical + changes. +- Added Matplotlib ``Agg`` smoke tests for molecule, reaction, ITS, visual + adapter, and MTG drawing paths. + +**Compatibility and known limits** + +- Legacy ITS / ``typesGH`` behavior remains available for existing workflows. +- MØD-backed workflows remain separate from the new SynKit LSG reactor path. +- Aromatic LSG matching is still conservative. Some aromatic false-positive + or false-negative cases require a future aromatic-system relabeling policy + rather than a local matcher tweak. +- Functional-group fused positional isomers such as quinoline vs isoquinoline + are not fully distinguished yet. + +**Infrastructure** + +- Added ``networkx>=3.3`` to ``requirements.txt`` so non-Linux CI jobs do not + rely on the Linux-only ``mod`` install to pull in NetworkX indirectly. + + Version 1.1.1 ------------- diff --git a/doc/chem.rst b/doc/chem.rst index 0a63db4..88a7593 100644 --- a/doc/chem.rst +++ b/doc/chem.rst @@ -120,6 +120,27 @@ and downstream CRN construction. 'CC=O.CC=O>>CC=CC=O.O' +Tautomerization and functional-group support +-------------------------------------------- + +``Tautomerize`` now uses SynKit's native functional-group detector instead of +an external FG utility. The detector works on the same molecular graph +representation used elsewhere in SynKit, so tautomer targets and graph-indexed +functional-group labels stay aligned. + +.. code-block:: python + :caption: Detecting tautomer-relevant functional groups + :linenos: + + from synkit.Graph.FG import smiles_to_graph_and_functional_groups + + graph, groups = smiles_to_graph_and_functional_groups("C=C(O)C") + print(groups) + +The tautomerization helper still keeps a small local compatibility rule for +geminal diols. Those are treated as hydrated-carbonyl repair targets, not as a +general public functional-group label. + See Also -------- diff --git a/doc/figures/mtg_lsg_changed_core.png b/doc/figures/mtg_lsg_changed_core.png new file mode 100644 index 0000000..e1d9bae Binary files /dev/null and b/doc/figures/mtg_lsg_changed_core.png differ diff --git a/doc/figures/vis_lsg_sn2.png b/doc/figures/vis_lsg_sn2.png new file mode 100644 index 0000000..ae91a5c Binary files /dev/null and b/doc/figures/vis_lsg_sn2.png differ diff --git a/doc/figures/vis_molecule_aspirin.png b/doc/figures/vis_molecule_aspirin.png new file mode 100644 index 0000000..0ff8066 Binary files /dev/null and b/doc/figures/vis_molecule_aspirin.png differ diff --git a/doc/figures/vis_mtg_steps.png b/doc/figures/vis_mtg_steps.png new file mode 100644 index 0000000..3777d24 Binary files /dev/null and b/doc/figures/vis_mtg_steps.png differ diff --git a/doc/figures/vis_mtg_timeline.png b/doc/figures/vis_mtg_timeline.png new file mode 100644 index 0000000..e1d9bae Binary files /dev/null and b/doc/figures/vis_mtg_timeline.png differ diff --git a/doc/figures/vis_reaction_sn2.png b/doc/figures/vis_reaction_sn2.png new file mode 100644 index 0000000..899aef8 Binary files /dev/null and b/doc/figures/vis_reaction_sn2.png differ diff --git a/doc/graph.rst b/doc/graph.rst index fb5c645..c13cbb5 100644 --- a/doc/graph.rst +++ b/doc/graph.rst @@ -12,6 +12,7 @@ Key submodules include: - **Matcher** — graph isomorphism and subgraph search engines - **ITS** — Internal Transition State (ITS) graph construction and decomposition - **MTG** — Mechanistic Transition Graph generation and exploration +- **FG** — graph-native functional-group detection and audit tooling - **Context** — reaction-center expansion for context-aware matching and analysis .. raw:: html @@ -49,6 +50,12 @@ Key submodules include: Build **Mechanistic Transition Graphs** from reaction-center ITS graphs to represent stepwise mechanisms and compare pathways. + .. grid-item-card:: :octicon:`filter` FG + :class-card: sd-shadow-sm + + Detect functional groups directly on SynKit molecular graphs, with + hierarchical labels and aromatic ring-system reporting. + Graph Canonicalization ---------------------- @@ -178,10 +185,58 @@ ITS The ``synkit.Graph.ITS`` package supports the construction and decomposition of **Internal Transition State (ITS)** graphs: -- :py:class:`~synkit.Graph.ITS.its_construction.ITSConstructor` — build ITS graphs from reactant/product graphs +- :py:class:`~synkit.Graph.ITS.its_construction.ITSConstruction` — build ITS graphs from reactant/product graphs - :py:func:`~synkit.Graph.ITS.its_decompose.get_rc` — extract the minimal reaction-center subgraph - :py:func:`~synkit.Graph.ITS.its_decompose.its_decompose` — split an ITS graph into reactant/product graphs +Lewis State Graph fields +~~~~~~~~~~~~~~~~~~~~~~~~ + +SynKit 1.4 introduces the Lewis State Graph (LSG) framework for the +pure-Python reactor and new mechanistic work. Legacy ITS remains available, +but LSG is the preferred representation when valence-state information must be +explicit. In the current API this representation is requested with +``format="tuple"``. + +Important LSG fields: + +.. list-table:: + :header-rows: 1 + + * - Field + - Meaning + * - ``sigma_order`` / ``pi_order`` + - Authoritative bond components for Lewis-state rewriting. + * - ``kekule_order`` + - Integer-like bond order used for product reconstruction; normally + ``sigma_order + pi_order``. + * - ``lone_pairs`` / ``radical`` + - Valence-state fields used by LSG matching and product accounting. + * - ``valence_electrons`` + - Element valence-shell reference used when recomputing charge. + * - ``order`` + - Legacy or presentation order. Aromatic ``1.5`` values are useful for + matching and visualization, but not the LSG-authoritative rewrite + source. + +.. code-block:: python + :caption: Building an LSG/ITS graph with Lewis-state fields + :linenos: + + from synkit.IO import rsmi_to_its + + rsmi = "[CH3:1][Cl:2].[NH3:3]>>[CH3:1][NH3+:3].[Cl-:2]" + its = rsmi_to_its(rsmi, format="tuple", core=False) + + print(its.nodes[2]["lone_pairs"]) + print(its.edges[1, 2]["sigma_order"]) + +.. note:: + + Aromatic LSG matching is intentionally conservative. Aromaticity is still + useful for presentation and pruning, but full aromatic-system relabeling is + tracked as ongoing work. + Example: Construct and Visualize an ITS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -232,61 +287,172 @@ The ``synkit.Graph.MTG`` package provides tools for constructing and analyzing - :py:class:`~synkit.Graph.MTG.mcs_matcher.MCSMatcher` — maximum common substructure mappings - :py:class:`~synkit.Graph.MTG.mtg.MTG` — MTG construction from ITS graphs and MCS mapping -Example: Generate an MTG (with Composite Reaction Visualization) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This example builds reaction-center ITS graphs for two mechanistic sequences, constructs -MTGs, and visualizes both MTG-style centers and minimal centers (without MTG annotations). +The current MTG direction is aligned with LSG/ITS. Invariant atom data such +as ``element`` and ``atom_map`` should be stored once, while temporal fields +such as ``charge``, ``hcount``, ``lone_pairs``, ``radical``, +``sigma_order``, and ``pi_order`` store compact histories across snapshots. +This avoids redundant ``*_step_history`` attributes and makes MTG-to-ITS +round trips easier to inspect. .. code-block:: python - :caption: Building and visualizing MTGs for aldol mechanisms + :caption: MTG to ordered ITS steps :linenos: from synkit.Graph.MTG.mtg import MTG - from synkit.Graph.ITS.its_decompose import get_rc - from synkit.examples import load_example - import matplotlib.pyplot as plt - from synkit.Vis.graph_visualizer import GraphVisualizer - data = load_example("aldol") + mtg = MTG([step_1_its, step_2_its]) + step_its = mtg.get_its_steps() + composed = mtg.get_compose_its() - mech_neutral = data[0]['mechanisms'][1]['steps'] - smart_neutral = [i['smart_string'] for i in mech_neutral] +When an MTG is built from RSMI strings, SynKit 1.4.0 converts those strings +to Lewis State Graph ITS by default: - mech_acid = data[0]['mechanisms'][2]['steps'] - smart_acid = [i['smart_string'] for i in mech_acid] +.. code-block:: python + :caption: RSMI sequence to LSG MTG + :linenos: - # neutral - mtg = MTG(smart_neutral, mcs_mol=True) - its_neutral = mtg.get_compose_its() - mtg_rc_neutral = get_rc(its_neutral, keep_mtg=True) - rc_neutral = get_rc(its_neutral, keep_mtg=False) + mtg = MTG(step_rsmis, mcs_mol=True) - # acid - mtg = MTG(smart_acid, mcs_mol=True) - its_acid = mtg.get_compose_its() - mtg_rc_acid = get_rc(its_acid, keep_mtg=True) - rc_acid = get_rc(its_acid, keep_mtg=False) +Legacy string conversion is still available for compatibility: - fig, ax = plt.subplots(2, 2, figsize=(16, 8)) - vis = GraphVisualizer() +.. code-block:: python + :caption: Legacy MTG from RSMI strings + :linenos: - vis.plot_its(mtg_rc_neutral, ax=ax[0, 0], use_edge_color=True, og=True, title='A. MTG (neutral)') - vis.plot_its(rc_neutral, ax=ax[0, 1], use_edge_color=True, og=True, title='B. Reaction center (neutral)') - vis.plot_its(mtg_rc_acid, ax=ax[1, 0], use_edge_color=True, og=True, title='C. MTG (acid)') - vis.plot_its(rc_acid, ax=ax[1, 1], use_edge_color=True, og=True, title='D. Reaction center (acid)') + mtg = MTG(step_rsmis, mcs_mol=True, its_format="typesGH") + +Compact MTG data model +~~~~~~~~~~~~~~~~~~~~~~ + +An LSG-backed MTG is a normal ``networkx.Graph``. Node attributes split into +two categories: + +.. list-table:: + :header-rows: 1 + + * - Attribute type + - Examples + - Meaning + * - Invariant atom fields + - ``element``, ``atom_map``, ``valence_electrons`` + - Stored once because the atom identity does not change across the + mechanism. + * - State timelines + - ``hcount``, ``charge``, ``radical``, ``lone_pairs``, ``present`` + - Tuples with one value per mechanism state. For ``n`` elementary + steps, these timelines have length ``n + 1``. + * - Bond timelines + - ``kekule_order``, ``sigma_order``, ``pi_order`` + - Tuples with one bond state per mechanism state. ``None`` means the + bond or one endpoint is outside that state; ``0`` means both atoms are + present but no bond exists. + +This compact form intentionally avoids legacy ``typesGH`` and redundant +``*_step_history`` attributes in the new Lewis State Graph path. + +Example: LSG MTG changed core +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This example reads a stepwise aldol mechanism, constructs an LSG-backed MTG +directly from the RSMI strings, and visualizes the changed core. The default +MTG string conversion uses ``format="tuple"`` internally, so the result stores +Lewis-state timelines rather than legacy ``typesGH`` fields. - plt.tight_layout() - plt.show() +.. code-block:: python + :caption: Building and visualizing a compact LSG MTG + :linenos: + + from synkit.IO import load_database + from synkit.Graph.MTG.mtg import MTG + from synkit.Vis import draw_mtg_graph + + data = load_database("Data/Testcase/mech.json.gz")[0] + neutral = data["mechanisms"][1] + steps = [step["smart_string"] for step in neutral["steps"]] + + mtg = MTG(steps, mcs_mol=True) + graph = mtg.get_mtg() + + assert mtg._tuple_its + assert not any("typesGH" in attrs for _, attrs in graph.nodes(data=True)) + + fig, ax = draw_mtg_graph( + mtg, + title=f"{neutral['mech_name']} - changed core", + changed_only=True, + show_edge_labels=True, + compress=True, + ) + +``compress=True`` labels only the first and final state of each changed edge. +Use ``compress=False`` when debugging the full mechanism-state sequence. .. container:: figure - .. image:: ./figures/mtg_mechanism.png - :alt: Composite ITS and MTG visualization + .. image:: ./figures/mtg_lsg_changed_core.png + :alt: Compact LSG MTG changed-core visualization :align: center - :width: 1000px + :width: 760px + + *Figure:* LSG MTG changed-core view for the neutral aldol mechanism. + Green edges are net formed, red edges are net broken, and pink dashed edges + are transient timelines that change internally but have the same compressed + first/final state. + +Round-trip helpers +~~~~~~~~~~~~~~~~~~ + +MTGs can be projected back to their ordered ITS steps or to a composed +outer-state ITS: + +.. code-block:: python + :caption: MTG projections + :linenos: + + step_its = mtg.get_its_steps() + step_rsmi = mtg.get_rsmi_steps() + composed = mtg.get_compose_its() + +Use ``get_its_steps()`` when validating temporal history. Use +``get_compose_its()`` when you need the net start/end reaction encoded as a +single ITS graph. + +Functional Groups +----------------- + +The ``synkit.Graph.FG`` package detects functional groups directly on SynKit +molecular ``networkx`` graphs. It avoids an external FG representation and +returns labels in graph/node-index space. + +Core APIs: + +- :py:class:`~synkit.Graph.FG.detector.FunctionalGroupDetector` +- :py:func:`~synkit.Graph.FG.api.smiles_to_graph_and_functional_groups` +- :py:class:`~synkit.Graph.FG.audit.FunctionalGroupAudit` + +.. code-block:: python + :caption: Functional groups from SMILES + :linenos: + + from synkit.Graph.FG import smiles_to_graph_and_functional_groups + + graph, groups = smiles_to_graph_and_functional_groups( + "CC(=O)OC1=CC=CC=C1C(=O)O" + ) + + print(groups) + +.. admonition:: Example output + :class: note synkit-example-output + + .. code-block:: text + + [('ester', (2, 3, 4)), ('carboxylic_acid', (11, 12, 13))] - *Figure:* Composite MTG visualization for aldol addition under neutral and acidic conditions. +Detection is hierarchical: specific labels such as ``carboxylic_acid`` can +suppress generic nested labels such as ``carbonyl`` when the broader label +would be less useful. Public labels cover common carbonyl/acyl, oxygen, +nitrogen/C=N, sulfur, boron, silicon, phosphorus, and heteroaromatic families. Context graph ------------- diff --git a/doc/index.rst b/doc/index.rst index 32be310..eb9f17c 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -34,6 +34,45 @@ production workflows. :align: center :width: 100% +Version 1.4.0 highlights +------------------------ + +.. raw:: html + +
+ Lewis State Graph + graph-native FG + MTG timelines + modern Vis +
+ +.. grid:: 1 1 2 4 + :gutter: 2 + + .. grid-item-card:: :octicon:`zap` Lewis State Graph + :class-card: sk-feature-card + + LSG stores sigma/pi orders, lone pairs, radicals, valence electrons, and + charge recomputation metadata for the pure-Python reactor. + + .. grid-item-card:: :octicon:`filter` Functional Groups + :class-card: sk-feature-card + + Native ``networkx`` functional-group detection now replaces the previous + external FG helper and returns graph-indexed labels. + + .. grid-item-card:: :octicon:`git-branch` MTG + :class-card: sk-feature-card + + Mechanistic Transition Graphs keep temporal bond histories and can be + projected back to ordered ITS steps or composed transformations. + + .. grid-item-card:: :octicon:`eye` Visualization + :class-card: sk-feature-card + + Modern drawers cover molecule graphs, reaction panels, ITS-only views, + Lewis-state labels, and compact MTG timelines. + .. Core features .. ------------- @@ -115,6 +154,12 @@ Documentation quick links ITS/MTG construction, WL hashing, and cluster/search primitives. + .. grid-item-card:: :octicon:`eye` Visualization + :link: vis + :link-type: doc + + Molecule, reaction, ITS, and diagnostic graph rendering. + .. grid-item-card:: :octicon:`terminal` API Reference :link: api/index :link-type: doc @@ -159,7 +204,7 @@ Funded by the European Union Horizon Europe Doctoral Network (Marie-Skłodowska- rule synthesis crn + vis api/index reference changelog - diff --git a/doc/synthesis.rst b/doc/synthesis.rst index d96b8ad..2edbfca 100644 --- a/doc/synthesis.rst +++ b/doc/synthesis.rst @@ -70,6 +70,22 @@ Reactor parameters - ``'comp'``: component-aware matching (fastest; recommended for multi-component SMILES) - ``'all'``: exhaustive arbitrary subgraph search (most expensive) - ``'bt'``: fallback strategy (tries ``comp`` first, then ``all`` if no match is found) + * - ``template_format`` + - str + - ``'typesGH'`` + - ITS representation used when the template is a reaction string. + Use ``'tuple'`` for the Lewis State Graph representation. + * - ``electron_diagnostics`` + - bool + - ``False`` + - When ``True``, keep Lewis-state accounting diagnostics on generated ITS + objects. This is useful when inspecting charge, lone-pair, or radical + recomputation. The option name remains ``electron_diagnostics`` for API + compatibility. + * - ``automorphism`` + - bool + - ``True`` + - Deduplicate symmetry-equivalent matches before rewriting. Example: Forward Prediction (NetworkX) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -174,6 +190,75 @@ while keeping ``explicit_h=False``. '[CH3:1][CH3:2].[CH:3]([CH:4]=[O:5])=[O:6]>>[CH3:1][CH:2]=[CH:3][CH:4]=[O:5].[OH2:6]' ] +Lewis State Graph Templates +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The NetworkX reactor can consume Lewis State Graph (LSG) templates. This is +the SynKit-native path for transformations where valence-state information +matters: lone pairs, radicals, valence electrons, and sigma/pi bond components +are stored in the template and used during matching/rewrite. In the current API +LSG construction is requested with ``format="tuple"``. + +There are two common entry points: + +.. code-block:: python + :caption: Build the LSG template explicitly + :linenos: + + from synkit.IO import rsmi_to_its + from synkit.Synthesis.Reactor.syn_reactor import SynReactor + + smart = "[NH3:1].[CH3:2][Cl:3]>>[NH3+:1][CH3:2].[Cl-:3]" + substrate = "CCl.N" + template = rsmi_to_its(smart, core=False, format="tuple") + + reactor = SynReactor( + substrate=substrate, + template=template, + implicit_temp=True, + explicit_h=False, + electron_diagnostics=True, + ) + + print(reactor.smarts) + +.. code-block:: python + :caption: Let SynReactor build an LSG template from a reaction string + :linenos: + + reactor = SynReactor( + substrate="CCl.N", + template="[NH3:1].[CH3:2][Cl:3]>>[NH3+:1][CH3:2].[Cl-:3]", + template_format="tuple", + implicit_temp=True, + explicit_h=False, + electron_diagnostics=True, + ) + +LSG rewrite policy: + +.. list-table:: + :header-rows: 1 + + * - Concept + - Policy + * - Bond truth + - ``sigma_order`` and ``pi_order`` are authoritative in new mode. + * - Product reconstruction + - ``kekule_order`` is computed from ``sigma_order + pi_order`` before + conversion through RDKit. + * - Charge + - Charge is recomputed from valence electrons, lone pairs, hydrogen count, + radical count, and Kekule bond-order sum. + * - Aromaticity + - Aromatic flags are still useful for matching and display, but aromatic + ``order=1.5`` is not used as the LSG-authoritative rewrite value. + +.. note:: + + LSG rewriting is currently a SynKit ``SynReactor`` path. MØD-backed + reactors remain on the legacy rule representation. + Example: Forward Prediction (MØD) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/vis.rst b/doc/vis.rst new file mode 100644 index 0000000..2f17018 --- /dev/null +++ b/doc/vis.rst @@ -0,0 +1,327 @@ +.. _vis: + +Visualization +============= + +SynKit's visualization layer has two roles: + +* chemistry-first drawings for molecular graphs, reaction panels, and ITS graphs; +* diagnostic graph drawings for raw NetworkX objects, adapters, and future MTG + development. + +For normal chemistry work, prefer the molecule, reaction, and ITS helpers from +``synkit.Vis``. The generic graph drawer is useful when debugging attributes or +new graph representations, but it is intentionally less polished. + +.. raw:: html + +
+ molecule graph + reaction panel + ITS-only + MTG timeline +
+ +Molecular Graphs From SMILES +---------------------------- + +Start from a real SMILES, convert it to a SynKit molecular graph, and draw it +with atom-map-aware labels. + +.. code-block:: python + + from synkit.IO.chem_converter import smiles_to_graph + from synkit.Vis import draw_molecule_graph + + smiles = "CC(=O)OC1=CC=CC=C1C(=O)O" + graph = smiles_to_graph( + smiles, + sanitize=True, + use_index_as_atom_map=True, + ) + + ax = draw_molecule_graph( + graph, + title=smiles, + label_mode="hetero", + show_atom_map=True, + aromatic_style="circle", + ) + ax.figure + +.. container:: figure + + .. image:: ./figures/vis_molecule_aspirin.png + :alt: Molecular graph visualization of aspirin + :align: center + :width: 760px + + *Figure:* A molecular graph rendered from aspirin SMILES with aromatic rings + drawn compactly and atom indices visible. + +Useful molecule options: + +.. list-table:: + :header-rows: 1 + + * - Option + - Use + * - ``label_mode="hetero"`` + - Keep carbon labels compact while showing hetero atoms explicitly. + * - ``show_atom_map=True`` + - Show atom-map numbers when present. + * - ``aromatic_style="circle"`` + - Draw aromatic rings with a compact ring marker instead of cluttered edge labels. + +Reaction Panels +--------------- + +Reaction drawings show reactants and products side by side and highlight atoms +and bonds that change. + +.. code-block:: python + + from synkit.Vis import draw_reaction_graph + + rsmi = "[CH3:1][Cl:2].[NH3:3]>>[CH3:1][NH3+:3].[Cl-:2]" + + fig, axes = draw_reaction_graph( + rsmi, + title="SN2 reaction", + show_atom_map=True, + ) + +.. container:: figure + + .. image:: ./figures/vis_reaction_sn2.png + :alt: Reaction panel visualization for an SN2 reaction + :align: center + :width: 820px + + *Figure:* Reactant/product panels with the reaction center highlighted. + +ITS Graphs +---------- + +ITS visualization is centered on the transformation graph itself, not the full +reactant/product panels. By default, changed bonds are shown as compact +``kekule_order`` transitions such as ``1->0`` or ``0->1``. + +.. code-block:: python + + from synkit.Vis import draw_its_from_rsmi + + rsmi = ( + "[Cl:1][Cl:2].[H:9][c:3]1[cH:4][cH:5][cH:6][cH:7][cH:8]1" + ">>" + "[Cl:1][H:9].[Cl:2][c:3]1[cH:4][cH:5][cH:6][cH:7][cH:8]1" + ) + + fig, axes = draw_its_from_rsmi( + rsmi, + format="tuple", + core=False, + title="ITS: chlorine transfer to arene", + edge_label_mode="kekule", + ) + +ITS edge-label modes: + +.. list-table:: + :header-rows: 1 + + * - Mode + - Meaning + * - ``edge_label_mode="kekule"`` + - Show changed ``kekule_order`` only. This is the recommended compact view. + * - ``edge_label_mode="sigma_pi"`` + - Show changed sigma/pi components. Unchanged components are suppressed. + * - ``edge_label_mode="none"`` + - Hide edge labels and use only edge color/style. + +Lewis-State Labels +------------------ + +For Lewis State Graph / ITS graphs, node labels can show charge, lone-pair, or +radical changes. Use one signal at a time for readable figures. + +.. code-block:: python + + from synkit.Vis import draw_its_from_rsmi + + sn2 = "[CH3:1][Cl:2].[NH3:3]>>[CH3:1][NH3+:3].[Cl-:2]" + + fig, axes = draw_its_from_rsmi( + sn2, + format="tuple", + core=False, + title="ITS: SN2 lone-pair changes", + show_electron_labels=True, + electron_label_mode="lone_pair", + ) + +.. container:: figure + + .. image:: ./figures/vis_lsg_sn2.png + :alt: Lewis State Graph visualization for SN2 lone-pair changes + :align: center + :width: 820px + + *Figure:* LSG/ITS view of the SN2 example. Bond colors show broken/formed + edges and the node badges show a lone-pair transfer. + +Lewis-state label modes: + +.. list-table:: + :header-rows: 1 + + * - Mode + - Meaning + * - ``electron_label_mode="charge"`` + - Show charge changes, for example ``q0->+1``. + * - ``electron_label_mode="lone_pair"`` + - Show lone-pair changes, for example ``lambda1->0``. + * - ``electron_label_mode="radical"`` + - Show radical changes. + * - ``electron_label_mode="all"`` + - Show every changed Lewis-state attribute. This is useful for debugging + but can be busy. + +Reactant/Product Projections +---------------------------- + +ITS helpers can also render reactant and product projections when needed for +debugging. + +.. code-block:: python + + fig, axes = draw_its_from_rsmi( + rsmi, + format="tuple", + core=False, + projection=True, + title="ITS with reactant/product projections", + ) + +Use ``projection=True`` when you need to inspect how an ITS decomposes back into +left and right molecular graphs. Use the default ITS-only view for reports and +notebooks. + +MTG Timelines +------------- + +Compact MTG visualization has two complementary views: + +* ``draw_mtg_graph`` shows the fused MTG as a timeline graph; +* ``draw_mtg_steps`` reconstructs ordered ITS steps and draws each step with + the ITS renderer. + +.. code-block:: python + + from synkit.Graph.MTG.mtg import MTG + from synkit.Vis import draw_mtg_graph, draw_mtg_steps + + # Step RSMI strings are converted to Lewis State Graph ITS by default. + mtg = MTG(step_rsmis, mcs_mol=True) + + fig, ax = draw_mtg_graph( + mtg, + title="MTG changed core", + mode="timeline", + changed_only=True, + show_edge_labels=True, + compress=True, + ) + + fig, ax = draw_mtg_graph( + mtg, + title="MTG timeline 3D", + dimension="3d", + layout="spring", + ) + + fig, axes = draw_mtg_steps( + mtg, + include_composed=True, + show_edge_labels=True, + ) + +.. container:: figure + + .. image:: ./figures/vis_mtg_timeline.png + :alt: Compact MTG timeline visualization + :align: center + :width: 760px + + *Figure:* Compact MTG changed-core view for the neutral aldol mechanism. + Green edges are net formed, red edges are net broken, and pink dashed edges + are transient timelines. + +.. container:: figure + + .. image:: ./figures/vis_mtg_steps.png + :alt: MTG step projection visualization + :align: center + :width: 900px + + *Figure:* Ordered ITS step panels reconstructed from the MTG, plus the + composed outer-state view. + +Use the timeline graph to see transient bonds and Lewis-state paths across +the mechanism. Use the step panels when you need to check each reconstructed +ITS independently. + +The 2D view is the default and gives a flattened changed-core drawing. The 3D +view is optional and is useful when a dense MTG has too many overlapping +timeline edges in a single plane. + +MTG display conventions: + +.. list-table:: + :header-rows: 1 + + * - Signal + - Display + * - Edge timeline + - Compressed first/final state by default, such as ``1-1``. Set + ``compress=False`` to show a full state path such as ``1-2-1-2-1``. + ``∅`` means the edge or atom is outside that state. + * - Formed / broken edge + - Green for net formation, red for net loss. + * - Transient edge + - Pink dashed edge for any changing timeline that is not simple net + formation or net loss. + * - Step panels + - Reuse the ITS-only renderer so a step can be checked with the same + visual language as a normal LSG/ITS graph. + +Diagnostic Graph View +--------------------- + +The visual model adapter normalizes molecule, reaction, ITS, and MTG-like graph +objects into a common drawing model. This layer is mainly for development and +debugging. + +.. code-block:: python + + from synkit.Vis import detect_visual_kind, summarize_visual_graph, to_visual_graph + from synkit.Vis import draw_graph + + kind = detect_visual_kind(graph) + visual_graph = to_visual_graph(graph) + summary = summarize_visual_graph(visual_graph) + + ax = draw_graph(graph, title=f"{kind}: {summary.kind}") + +Legacy Helpers +-------------- + +The older visualization classes are still exported for compatibility: + +* ``RXNVis`` for reaction image grids; +* ``RuleVis`` for rule/ITS style drawings; +* ``GraphVisualizer`` for general NetworkX graph visualization. + +New code should use ``draw_molecule_graph``, ``draw_reaction_graph``, and +``draw_its_from_rsmi`` unless a legacy workflow specifically depends on the +class-based API. diff --git a/lint.sh b/lint.sh index 4b0e23b..2ee614a 100755 --- a/lint.sh +++ b/lint.sh @@ -25,12 +25,13 @@ its_destruction.py:C901, conversion.py:C901, injectivity.py:C901, deficiency.py:C901, -mol_to_graph.py:C901" \ +mol_to_graph.py:C901, +detector.py:C901, +mtg.py:C901" \ --exclude=venv,\ core_engine.py,\ rule_apply.py,\ reactor_engine.py,\ -groupoid.py,\ syn_rule.py,\ __init__.py,\ wl_mapper.py,\ diff --git a/pyproject.toml b/pyproject.toml index 2826742..eb4edbd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "synkit" -version = "1.3.2" +version = "1.4.0" description = "Utility for reaction modeling using graph grammar" readme = "README.md" long-description = { file = "CHANGELOG.md" } diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 99851a8..ba84e89 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,6 +1,6 @@ package: name: synkit - version: "1.3.2" + version: "1.4.0" source: path: .. diff --git a/requirements.txt b/requirements.txt index 296d0a5..b050ee8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ scikit-learn>=1.4.0 seaborn>=0.13.2 -fgutils rdkit>=2025.3.1 pandas>=2.2.0 +networkx>=3.3 requests>=2.32.3 numpy>=2.2.0 regex>=2024.11.6 -sympy>=1.13.3 \ No newline at end of file +sympy>=1.13.3 diff --git a/synkit/Chem/Reaction/tautomerize.py b/synkit/Chem/Reaction/tautomerize.py index 2da5b2d..0518180 100644 --- a/synkit/Chem/Reaction/tautomerize.py +++ b/synkit/Chem/Reaction/tautomerize.py @@ -1,8 +1,9 @@ from typing import List, Dict, Optional from rdkit import Chem -from fgutils import FGQuery from joblib import Parallel, delayed +from synkit.Graph.FG import smiles_to_graph_and_functional_groups + class Tautomerize: """Standardize molecules by converting enol and hemiketal tautomers into @@ -103,18 +104,60 @@ def fix_smiles(smiles: str) -> str: :returns: Canonical SMILES of the standardized molecule. :rtype: str """ - query = FGQuery() - fg = query.get(smiles) - for item in fg: - label, indices = item + while True: + targets = Tautomerize._tautomer_targets(smiles) + if not targets: + break + label, indices = targets[0] if label == "hemiketal": smiles = Tautomerize.standardize_hemiketal(smiles, indices) - fg = query.get(smiles) elif label == "enol": smiles = Tautomerize.standardize_enol(smiles, indices) - fg = query.get(smiles) return Chem.CanonSmiles(smiles) + @staticmethod + def _tautomer_targets(smiles: str) -> list[tuple[str, List[int]]]: + """Return RDKit-index targets used by the tautomer repair helpers.""" + mol = Chem.MolFromSmiles(smiles) + if mol is None: + return [] + graph, groups = smiles_to_graph_and_functional_groups(smiles) + node_to_idx = { + ( + atom.GetAtomMapNum() if atom.GetAtomMapNum() else atom.GetIdx() + 1 + ): atom.GetIdx() + for atom in mol.GetAtoms() + } + + targets = [ + (label, [node_to_idx[node] for node in nodes]) + for label, nodes in groups + if label in {"hemiketal", "enol"} + ] + targets.extend( + ("hemiketal", [node_to_idx[node] for node in nodes]) + for nodes in Tautomerize._geminal_diol_nodes(graph) + ) + return targets + + @staticmethod + def _geminal_diol_nodes(graph) -> list[tuple[int, ...]]: + """Legacy tautomerization compatibility for hydrated carbonyls.""" + targets: list[tuple[int, ...]] = [] + for carbon, data in graph.nodes(data=True): + if data.get("element") != "C": + continue + hydroxyls = [ + neighbor + for neighbor in graph.neighbors(carbon) + if graph.nodes[neighbor].get("element") == "O" + and graph.nodes[neighbor].get("hcount", 0) >= 1 + and graph.edges[carbon, neighbor].get("order") == 1.0 + ] + if len(hydroxyls) >= 2: + targets.append((carbon, hydroxyls[0], hydroxyls[1])) + return targets + @staticmethod def fix_dict(data: Dict[str, str], reaction_column: str) -> Dict[str, str]: """Standardize the reactant and product SMILES in a reaction diff --git a/synkit/Graph/Canon/canon_graph.py b/synkit/Graph/Canon/canon_graph.py index 53e19a7..eb8d8a0 100644 --- a/synkit/Graph/Canon/canon_graph.py +++ b/synkit/Graph/Canon/canon_graph.py @@ -316,8 +316,8 @@ def _serialise(self, g: nx.Graph) -> str: nodes = sorted(g.nodes(data=True), key=lambda x: self._node_key(*x)) edges = sorted(g.edges(data=True), key=lambda x: self._edge_key(*x)) - node_str = ";".join(f"{n}:{self._node_key(n,d)}" for n, d in nodes) - edge_str = ";".join(f"{(u,v)}:{self._edge_key(u,v,d)}" for u, v, d in edges) + node_str = ";".join(f"{n}:{self._node_key(n, d)}" for n, d in nodes) + edge_str = ";".join(f"{(u, v)}:{self._edge_key(u, v, d)}" for u, v, d in edges) return f"N[{node_str}]|E[{edge_str}]" # ------------------------------------------------------------------ # diff --git a/synkit/Graph/FG/__init__.py b/synkit/Graph/FG/__init__.py new file mode 100644 index 0000000..bf415bf --- /dev/null +++ b/synkit/Graph/FG/__init__.py @@ -0,0 +1,19 @@ +"""Functional-group detection on SynKit molecular graphs.""" + +from .catalog import default_registry +from .audit import FunctionalGroupAudit, audit_reaction_smiles +from .api import FunctionalGroupLabels, smiles_to_graph_and_functional_groups +from .detector import FunctionalGroupDetector +from .model import FunctionalGroupMatch, FunctionalGroupPattern, FunctionalGroupRegistry + +__all__ = [ + "FunctionalGroupDetector", + "FunctionalGroupLabels", + "FunctionalGroupAudit", + "FunctionalGroupMatch", + "FunctionalGroupPattern", + "FunctionalGroupRegistry", + "default_registry", + "audit_reaction_smiles", + "smiles_to_graph_and_functional_groups", +] diff --git a/synkit/Graph/FG/api.py b/synkit/Graph/FG/api.py new file mode 100644 index 0000000..2ae5086 --- /dev/null +++ b/synkit/Graph/FG/api.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import networkx as nx + +from synkit.IO.chem_converter import smiles_to_graph + +from .detector import FunctionalGroupDetector + +FunctionalGroupLabels = list[tuple[str, tuple[int, ...]]] + + +def smiles_to_graph_and_functional_groups( + smiles: str, + *, + sanitize: bool = True, +) -> tuple[nx.Graph, FunctionalGroupLabels]: + """Convert SMILES to a molecular graph and detect functional groups. + + Atom-mapped SMILES keep their non-zero atom-map numbers as graph node IDs. + Unmapped atoms use their 1-based atom order as node IDs, so both mapped and + unmapped SMILES can be passed to the same API. + + :param smiles: Input SMILES, with or without atom-map labels. + :type smiles: str + :param sanitize: If ``True``, sanitize the RDKit molecule during conversion. + :type sanitize: bool + :return: Molecular graph and detected ``(name, node_ids)`` FG labels. + :rtype: tuple[nx.Graph, list[tuple[str, tuple[int, ...]]]] + :raises ValueError: If the SMILES cannot be converted to a molecular graph. + """ + graph = smiles_to_graph( + smiles, + drop_non_aam=False, + sanitize=sanitize, + use_index_as_atom_map=True, + ) + if graph is None: + raise ValueError(f"Could not convert SMILES to molecular graph: {smiles!r}") + return graph, FunctionalGroupDetector().detect(graph) diff --git a/synkit/Graph/FG/audit.py b/synkit/Graph/FG/audit.py new file mode 100644 index 0000000..5e3088c --- /dev/null +++ b/synkit/Graph/FG/audit.py @@ -0,0 +1,155 @@ +from __future__ import annotations + +from collections import Counter +from dataclasses import dataclass +from time import perf_counter +from typing import Iterable + +import networkx as nx + +from synkit.Chem.Reaction.standardize import Standardize +from synkit.IO.chem_converter import smiles_to_graph + +from .detector import FunctionalGroupDetector +from .ring_system import AromaticRingSystemDetector + + +@dataclass(frozen=True) +class FunctionalGroupAudit: + """Aggregated detector coverage over a reaction-SMILES corpus.""" + + reactions: int + molecules: int + parse_failures: int + elapsed_seconds: float + label_counts: Counter[str] + heteroaromatic_systems: int + named_heteroaromatic_systems: int + unnamed_heteroaromatic_systems: Counter[tuple] + uncovered_atom_signatures: Counter[tuple] + uncovered_edge_signatures: Counter[tuple] + + @property + def unnamed_heteroaromatic_count(self) -> int: + return self.heteroaromatic_systems - self.named_heteroaromatic_systems + + +def audit_reaction_smiles( + reactions: Iterable[str], + *, + standardizer: Standardize | None = None, +) -> FunctionalGroupAudit: + """Audit FG coverage for an iterable of reaction SMILES strings.""" + std = Standardize() if standardizer is None else standardizer + detector = FunctionalGroupDetector() + + reaction_count = 0 + molecule_count = 0 + parse_failures = 0 + heteroaromatic_systems = 0 + named_heteroaromatic_systems = 0 + label_counts: Counter[str] = Counter() + unnamed_systems: Counter[tuple] = Counter() + uncovered_atoms: Counter[tuple] = Counter() + uncovered_edges: Counter[tuple] = Counter() + + started = perf_counter() + for reaction in reactions: + reaction_count += 1 + standardized = std.fit(reaction, remove_aam=True) + for side in standardized.split(">>"): + for smiles in side.split("."): + graph = smiles_to_graph( + smiles, + drop_non_aam=False, + use_index_as_atom_map=True, + ) + if graph is None: + parse_failures += 1 + continue + molecule_count += 1 + matches = detector.matches(graph) + label_counts.update(match.name for match in matches) + covered = {node for match in matches for node in match.group_nodes} + _count_uncovered_signatures( + graph, + covered, + uncovered_atoms, + uncovered_edges, + ) + + named_ring_nodes = { + match.group_nodes + for match in matches + if match.name != "heteroaromatic_ring" + and match.pattern.priority == 70 + } + for system in AromaticRingSystemDetector.detect(graph): + if not system.hetero_nodes: + continue + heteroaromatic_systems += 1 + has_named_subring = any( + set(nodes).issubset(system.nodes) for nodes in named_ring_nodes + ) + if has_named_subring: + named_heteroaromatic_systems += 1 + continue + unnamed_systems[ + ( + system.hetero_pattern, + system.is_fused, + system.ring_sizes, + tuple(sorted(system.element_counts.items())), + ) + ] += 1 + + return FunctionalGroupAudit( + reactions=reaction_count, + molecules=molecule_count, + parse_failures=parse_failures, + elapsed_seconds=perf_counter() - started, + label_counts=label_counts, + heteroaromatic_systems=heteroaromatic_systems, + named_heteroaromatic_systems=named_heteroaromatic_systems, + unnamed_heteroaromatic_systems=unnamed_systems, + uncovered_atom_signatures=uncovered_atoms, + uncovered_edge_signatures=uncovered_edges, + ) + + +def _count_uncovered_signatures( + graph: nx.Graph, + covered: set[int], + atom_counts: Counter[tuple], + edge_counts: Counter[tuple], +) -> None: + for node, data in graph.nodes(data=True): + if data.get("element") == "H" or node in covered: + continue + neighbors = tuple( + sorted( + graph.nodes[neighbor].get("element") + for neighbor in graph.neighbors(node) + if graph.nodes[neighbor].get("element") != "H" + ) + ) + atom_counts[ + ( + data.get("element"), + data.get("aromatic", False), + data.get("hcount", 0), + neighbors, + ) + ] += 1 + + for left, right, data in graph.edges(data=True): + if left in covered or right in covered: + continue + left_element = graph.nodes[left].get("element") + right_element = graph.nodes[right].get("element") + if "H" in {left_element, right_element}: + continue + edge_counts[ + tuple(sorted((left_element, right_element))) + + (data.get("order"), data.get("aromatic", False)) + ] += 1 diff --git a/synkit/Graph/FG/catalog.py b/synkit/Graph/FG/catalog.py new file mode 100644 index 0000000..1bf3196 --- /dev/null +++ b/synkit/Graph/FG/catalog.py @@ -0,0 +1,1168 @@ +from __future__ import annotations + +from collections.abc import Iterable + +import networkx as nx + +from .model import FunctionalGroupPattern, FunctionalGroupRegistry, Mapping +from .model import FunctionalGroupMatch + + +def _graph( + nodes: Iterable[tuple[int, dict]], + edges: Iterable[tuple[int, int, dict]], +) -> nx.Graph: + graph = nx.Graph() + graph.add_nodes_from(nodes) + graph.add_edges_from(edges) + return graph + + +def _single_heavy_neighbors( + graph: nx.Graph, node: int, *, exclude: set[int] +) -> list[int]: + return [ + neighbor + for neighbor in graph.neighbors(node) + if neighbor not in exclude and graph.nodes[neighbor].get("element") != "H" + ] + + +def _alcohol_carbon_heavy_degree(expected: int): + def validator(graph: nx.Graph, mapping: Mapping) -> bool: + carbon, oxygen = mapping[1], mapping[2] + if not _alcohol_like(graph, mapping): + return False + return len(_single_heavy_neighbors(graph, carbon, exclude={oxygen})) == expected + + return validator + + +def _alcohol_like(graph: nx.Graph, mapping: Mapping) -> bool: + carbon, oxygen = mapping[1], mapping[2] + if graph.nodes[carbon].get("aromatic"): + return False + if graph.nodes[oxygen].get("hcount", 0) < 1: + return False + return all( + graph.edges[carbon, neighbor].get("order") == 1.0 + for neighbor in graph.neighbors(carbon) + ) + + +def _aldehyde(graph: nx.Graph, mapping: Mapping) -> bool: + carbon, oxygen = mapping[1], mapping[2] + if graph.nodes[carbon].get("hcount", 0) < 1: + return False + others = _single_heavy_neighbors(graph, carbon, exclude={oxygen}) + return all(graph.nodes[node].get("element") == "C" for node in others) + + +def _amine(graph: nx.Graph, mapping: Mapping) -> bool: + nitrogen = mapping[1] + if graph.nodes[nitrogen].get("aromatic"): + return False + return all( + graph.edges[nitrogen, neighbor].get("order") == 1.0 + for neighbor in graph.neighbors(nitrogen) + ) + + +def _phenol(graph: nx.Graph, mapping: Mapping) -> bool: + carbon, oxygen = mapping[1], mapping[2] + return ( + graph.nodes[carbon].get("aromatic") is True + and graph.nodes[oxygen].get("hcount", 0) >= 1 + ) + + +def _enol(graph: nx.Graph, mapping: Mapping) -> bool: + return graph.nodes[mapping[3]].get("hcount", 0) >= 1 + + +def _epoxide(graph: nx.Graph, mapping: Mapping) -> bool: + return all(graph.nodes[mapping[node]].get("in_ring") for node in (1, 2, 3)) + + +def _phosphite(graph: nx.Graph, mapping: Mapping) -> bool: + phosphorus = mapping[1] + return all( + not ( + graph.nodes[neighbor].get("element") == "O" + and graph.edges[phosphorus, neighbor].get("order") == 2.0 + ) + for neighbor in graph.neighbors(phosphorus) + ) + + +def _azide(graph: nx.Graph, mapping: Mapping) -> bool: + middle, terminal = mapping[2], mapping[3] + return ( + graph.nodes[middle].get("charge") == 1 + and graph.nodes[terminal].get("charge") == -1 + ) + + +def _hydrazone(graph: nx.Graph, mapping: Mapping) -> bool: + imine_nitrogen = mapping[2] + hydrazine_nitrogen = mapping[3] + return not ( + graph.nodes[imine_nitrogen].get("charge") == 1 + and graph.nodes[hydrazine_nitrogen].get("charge") == -1 + ) + + +def _amidine(graph: nx.Graph, mapping: Mapping) -> bool: + carbon = mapping[1] + imine_nitrogen = mapping[2] + amino_nitrogen = mapping[3] + if any( + graph.nodes[neighbor].get("element") == "O" + for neighbor in graph.neighbors(imine_nitrogen) + if neighbor != carbon + ): + return False + if any( + graph.nodes[neighbor].get("element") == "O" + for neighbor in graph.neighbors(amino_nitrogen) + if neighbor != carbon + ): + return False + return True + + +def _imine(graph: nx.Graph, mapping: Mapping) -> bool: + carbon = mapping[1] + nitrogen = mapping[2] + if graph.nodes[carbon].get("aromatic") or graph.nodes[nitrogen].get("aromatic"): + return False + carbon_neighbors = { + graph.nodes[neighbor].get("element") + for neighbor in graph.neighbors(carbon) + if neighbor != nitrogen + } + nitrogen_neighbors = { + graph.nodes[neighbor].get("element") + for neighbor in graph.neighbors(nitrogen) + if neighbor != carbon + } + if carbon_neighbors & {"O", "S", "N"}: + return False + if nitrogen_neighbors & {"O", "N"}: + return False + return True + + +def _aniline(graph: nx.Graph, mapping: Mapping) -> bool: + return graph.nodes[mapping[1]].get("aromatic") is True and _amine( + graph, {1: mapping[2]} + ) + + +def _aryl_halide(graph: nx.Graph, mapping: Mapping) -> bool: + return graph.nodes[mapping[1]].get("aromatic") is True + + +def _oxygen_has_h(graph: nx.Graph, node: int) -> bool: + return graph.nodes[node].get("hcount", 0) >= 1 + + +def _carbon_substituent_count(graph: nx.Graph, carbon: int, excluded: set[int]) -> int: + return len(_single_heavy_neighbors(graph, carbon, exclude=excluded)) + + +def _aromatic_ring_nodes(graph: nx.Graph, mapping: Mapping) -> set[int]: + return {mapping[node] for node in mapping} + + +def _all_aromatic(graph: nx.Graph, nodes: set[int]) -> bool: + return all(graph.nodes[node].get("aromatic") for node in nodes) + + +def _single_node_recognizer(element: str): + def recognize( + graph: nx.Graph, pattern: FunctionalGroupPattern + ) -> list[FunctionalGroupMatch]: + matches: list[FunctionalGroupMatch] = [] + for node, data in graph.nodes(data=True): + if data.get("element") != element: + continue + mapping = {1: node} + if pattern.validator is not None and not pattern.validator(graph, mapping): + continue + matches.append( + FunctionalGroupMatch( + name=pattern.name, + group_nodes=(node,), + mapping=mapping, + pattern=pattern, + ) + ) + return matches + + return recognize + + +def _two_node_bond_recognizer( + left_element: str, + right_elements: tuple[str, ...], + order: float, +): + def recognize( + graph: nx.Graph, pattern: FunctionalGroupPattern + ) -> list[FunctionalGroupMatch]: + matches: list[FunctionalGroupMatch] = [] + seen: set[tuple[int, ...]] = set() + for left, right, data in graph.edges(data=True): + if data.get("order") != order: + continue + pairs = ((left, right), (right, left)) + for first, second in pairs: + if graph.nodes[first].get("element") != left_element: + continue + if graph.nodes[second].get("element") not in right_elements: + continue + mapping = {1: first, 2: second} + if pattern.validator is not None and not pattern.validator( + graph, mapping + ): + continue + group_nodes = tuple( + sorted(mapping[node] for node in pattern.group_nodes) + ) + if group_nodes in seen: + continue + seen.add(group_nodes) + matches.append( + FunctionalGroupMatch( + name=pattern.name, + group_nodes=group_nodes, + mapping=mapping, + pattern=pattern, + ) + ) + return matches + + return recognize + + +def _symmetric_two_node_bond_recognizer(element: str, order: float): + def recognize( + graph: nx.Graph, pattern: FunctionalGroupPattern + ) -> list[FunctionalGroupMatch]: + matches: list[FunctionalGroupMatch] = [] + for left, right, data in graph.edges(data=True): + if data.get("order") != order: + continue + if graph.nodes[left].get("element") != element: + continue + if graph.nodes[right].get("element") != element: + continue + mapping = {1: left, 2: right} + if pattern.validator is not None and not pattern.validator(graph, mapping): + continue + matches.append( + FunctionalGroupMatch( + name=pattern.name, + group_nodes=tuple(sorted((left, right))), + mapping=mapping, + pattern=pattern, + ) + ) + return matches + + return recognize + + +def default_registry() -> FunctionalGroupRegistry: + """Build the default graph-native functional-group registry.""" + patterns = [ + FunctionalGroupPattern( + "carbonyl", + _graph( + [(1, {"element": "C"}), (2, {"element": "O"})], + [(1, 2, {"order": 2.0})], + ), + (1, 2), + anchor_node=2, + priority=10, + recognizer=_two_node_bond_recognizer("C", ("O",), 2.0), + ), + FunctionalGroupPattern( + "aldehyde", + _graph( + [(1, {"element": "C", "hcount_min": 1}), (2, {"element": "O"})], + [(1, 2, {"order": 2.0})], + ), + (1, 2), + parents=("carbonyl",), + requires=("carbonyl",), + anchor_node=2, + priority=30, + validator=_aldehyde, + recognizer=_two_node_bond_recognizer("C", ("O",), 2.0), + ), + FunctionalGroupPattern( + "ketone", + _graph( + [(1, {"element": "C"}), (2, {"element": "O"})], + [(1, 2, {"order": 2.0})], + ), + (1, 2), + parents=("carbonyl",), + requires=("carbonyl",), + anchor_node=2, + priority=20, + validator=lambda graph, mapping: graph.nodes[mapping[1]].get("hcount", 0) + == 0, + recognizer=_two_node_bond_recognizer("C", ("O",), 2.0), + ), + FunctionalGroupPattern( + "carboxylic_acid", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "O"}), + (3, {"element": "O", "hcount_min": 1}), + ], + [(1, 2, {"order": 2.0}), (1, 3, {"order": 1.0})], + ), + (1, 2, 3), + parents=("ester",), + requires=("carbonyl",), + anchor_node=3, + priority=60, + ), + FunctionalGroupPattern( + "amide", + _graph( + [(1, {"element": "C"}), (2, {"element": "O"}), (3, {"element": "N"})], + [(1, 2, {"order": 2.0}), (1, 3, {"order": 1.0})], + ), + (1, 2, 3), + parents=("ketone", "amine"), + requires=("carbonyl",), + anchor_node=3, + priority=50, + ), + FunctionalGroupPattern( + "carbamate", + _graph( + [ + (1, {"element": "O"}), + (2, {"element": "C"}), + (3, {"element": "O"}), + (4, {"element": "N"}), + ], + [ + (1, 2, {"order": 1.0}), + (2, 3, {"order": 2.0}), + (2, 4, {"order": 1.0}), + ], + ), + (1, 2, 3, 4), + parents=("amide", "ester"), + requires=("carbonyl",), + anchor_node=4, + priority=60, + ), + FunctionalGroupPattern( + "ester", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "O"}), + (3, {"element": "O"}), + (4, {"element": "C"}), + ], + [ + (1, 2, {"order": 2.0}), + (1, 3, {"order": 1.0}), + (3, 4, {"order": 1.0}), + ], + ), + (1, 2, 3), + parents=("ketone", "ether"), + requires=("carbonyl",), + anchor_node=3, + priority=50, + ), + FunctionalGroupPattern( + "alcohol", + _graph( + [(1, {"element": "C"}), (2, {"element": "O", "hcount_min": 1})], + [(1, 2, {"order": 1.0})], + ), + (1, 2), + parents=("ether",), + anchor_node=2, + priority=20, + validator=_alcohol_like, + recognizer=_two_node_bond_recognizer("C", ("O",), 1.0), + ), + FunctionalGroupPattern( + "primary_alcohol", + _graph( + [(1, {"element": "C"}), (2, {"element": "O", "hcount_min": 1})], + [(1, 2, {"order": 1.0})], + ), + (1, 2), + parents=("alcohol",), + anchor_node=2, + priority=30, + validator=_alcohol_carbon_heavy_degree(1), + recognizer=_two_node_bond_recognizer("C", ("O",), 1.0), + ), + FunctionalGroupPattern( + "secondary_alcohol", + _graph( + [(1, {"element": "C"}), (2, {"element": "O", "hcount_min": 1})], + [(1, 2, {"order": 1.0})], + ), + (1, 2), + parents=("primary_alcohol",), + requires=("alcohol",), + anchor_node=2, + priority=40, + validator=_alcohol_carbon_heavy_degree(2), + recognizer=_two_node_bond_recognizer("C", ("O",), 1.0), + ), + FunctionalGroupPattern( + "tertiary_alcohol", + _graph( + [(1, {"element": "C"}), (2, {"element": "O", "hcount_min": 1})], + [(1, 2, {"order": 1.0})], + ), + (1, 2), + parents=("secondary_alcohol",), + requires=("alcohol",), + anchor_node=2, + priority=50, + validator=_alcohol_carbon_heavy_degree(3), + recognizer=_two_node_bond_recognizer("C", ("O",), 1.0), + ), + FunctionalGroupPattern( + "oxygen_link", + _graph( + [(1, {"element": "O"}), (2, {"element": "C"})], + [(1, 2, {"order": 1.0})], + ), + (1,), + priority=0, + recognizer=_two_node_bond_recognizer("O", ("C",), 1.0), + public=False, + ), + FunctionalGroupPattern( + "ether", + _graph( + [(1, {"element": "O"}), (2, {"element": "C"}), (3, {"element": "C"})], + [(1, 2, {"order": 1.0}), (1, 3, {"order": 1.0})], + ), + (1,), + requires=("oxygen_link",), + anchor_node=1, + priority=10, + ), + FunctionalGroupPattern( + "acetal", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "O"}), + (3, {"element": "C"}), + (4, {"element": "O"}), + (5, {"element": "C"}), + ], + [ + (1, 2, {"order": 1.0}), + (2, 3, {"order": 1.0}), + (1, 4, {"order": 1.0}), + (4, 5, {"order": 1.0}), + ], + ), + (1, 2, 4), + parents=("ketal",), + requires=("oxygen_link",), + anchor_node=1, + priority=50, + validator=lambda graph, mapping: graph.nodes[mapping[1]].get("hcount", 0) + >= 1, + ), + FunctionalGroupPattern( + "ketal", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "O"}), + (3, {"element": "C"}), + (4, {"element": "O"}), + (5, {"element": "C"}), + ], + [ + (1, 2, {"order": 1.0}), + (2, 3, {"order": 1.0}), + (1, 4, {"order": 1.0}), + (4, 5, {"order": 1.0}), + ], + ), + (1, 2, 4), + parents=("ether",), + requires=("oxygen_link",), + anchor_node=1, + priority=40, + validator=lambda graph, mapping: graph.nodes[mapping[1]].get("hcount", 0) + == 0 + and _carbon_substituent_count(graph, mapping[1], {mapping[2], mapping[4]}) + >= 2, + ), + FunctionalGroupPattern( + "hemiacetal", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "O"}), + (3, {"element": "C"}), + (4, {"element": "O"}), + ], + [ + (1, 2, {"order": 1.0}), + (2, 3, {"order": 1.0}), + (1, 4, {"order": 1.0}), + ], + ), + (1, 2, 4), + parents=("hemiketal",), + requires=("oxygen_link",), + suppresses=( + "alcohol", + "primary_alcohol", + "secondary_alcohol", + "tertiary_alcohol", + ), + anchor_node=1, + priority=60, + validator=lambda graph, mapping: graph.nodes[mapping[1]].get("hcount", 0) + >= 1 + and _oxygen_has_h(graph, mapping[4]), + ), + FunctionalGroupPattern( + "hemiketal", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "O"}), + (3, {"element": "C"}), + (4, {"element": "O"}), + ], + [ + (1, 2, {"order": 1.0}), + (2, 3, {"order": 1.0}), + (1, 4, {"order": 1.0}), + ], + ), + (1, 2, 4), + parents=("ketal", "alcohol"), + requires=("oxygen_link",), + suppresses=( + "alcohol", + "primary_alcohol", + "secondary_alcohol", + "tertiary_alcohol", + ), + anchor_node=1, + priority=60, + validator=lambda graph, mapping: _oxygen_has_h(graph, mapping[4]) + and graph.nodes[mapping[1]].get("hcount", 0) == 0 + and _carbon_substituent_count(graph, mapping[1], {mapping[2], mapping[4]}) + >= 2, + ), + FunctionalGroupPattern( + "amine", + _graph([(1, {"element": "N"})], []), + (1,), + anchor_node=1, + priority=10, + validator=_amine, + recognizer=_single_node_recognizer("N"), + ), + FunctionalGroupPattern( + "aniline", + _graph( + [(1, {"element": "C", "aromatic": True}), (2, {"element": "N"})], + [(1, 2, {"order": 1.0})], + ), + (2,), + parents=("amine",), + requires=("amine",), + anchor_node=2, + priority=30, + validator=_aniline, + recognizer=_two_node_bond_recognizer("C", ("N",), 1.0), + ), + FunctionalGroupPattern( + "nitrile", + _graph( + [(1, {"element": "C"}), (2, {"element": "N"})], + [(1, 2, {"order": 3.0})], + ), + (1, 2), + anchor_node=2, + priority=30, + recognizer=_two_node_bond_recognizer("C", ("N",), 3.0), + ), + FunctionalGroupPattern( + "nitroso", + _graph( + [(1, {"element": "N"}), (2, {"element": "O"})], + [(1, 2, {"order": 2.0})], + ), + (1, 2), + anchor_node=1, + priority=30, + recognizer=_two_node_bond_recognizer("N", ("O",), 2.0), + ), + FunctionalGroupPattern( + "nitro", + _graph( + [(1, {"element": "N"}), (2, {"element": "O"}), (3, {"element": "O"})], + [(1, 2, {"order": 2.0}), (1, 3, {"order": 1.0})], + ), + (1, 2, 3), + parents=("nitroso",), + requires=("nitroso",), + anchor_node=1, + priority=40, + ), + FunctionalGroupPattern( + "thioether", + _graph( + [(1, {"element": "S"}), (2, {"element": "C"}), (3, {"element": "C"})], + [(1, 2, {"order": 1.0}), (1, 3, {"order": 1.0})], + ), + (1,), + anchor_node=1, + priority=20, + ), + FunctionalGroupPattern( + "sulfoxide", + _graph( + [ + (1, {"element": "S"}), + (2, {"element": "O"}), + ], + [(1, 2, {"order": 2.0})], + ), + (1, 2), + suppresses=("thioether",), + anchor_node=1, + priority=30, + ), + FunctionalGroupPattern( + "sulfone", + _graph( + [ + (1, {"element": "S"}), + (2, {"element": "O"}), + (3, {"element": "O"}), + ], + [(1, 2, {"order": 2.0}), (1, 3, {"order": 2.0})], + ), + (1, 2, 3), + parents=("sulfoxide",), + requires=("sulfoxide",), + suppresses=("thioether",), + anchor_node=1, + priority=40, + ), + FunctionalGroupPattern( + "sulfonamide", + _graph( + [ + (1, {"element": "S"}), + (2, {"element": "O"}), + (3, {"element": "O"}), + (4, {"element": "N"}), + ], + [ + (1, 2, {"order": 2.0}), + (1, 3, {"order": 2.0}), + (1, 4, {"order": 1.0}), + ], + ), + (1, 2, 3, 4), + parents=("sulfone", "amine"), + requires=("sulfone",), + suppresses=("thioether",), + anchor_node=1, + priority=50, + ), + FunctionalGroupPattern( + "thioester", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "O"}), + (3, {"element": "S"}), + (4, {"element": "C"}), + ], + [ + (1, 2, {"order": 2.0}), + (1, 3, {"order": 1.0}), + (3, 4, {"order": 1.0}), + ], + ), + (1, 2, 3), + parents=("ketone", "thioether"), + requires=("carbonyl",), + anchor_node=3, + priority=50, + ), + FunctionalGroupPattern( + "phenol", + _graph( + [(1, {"element": "C", "aromatic": True}), (2, {"element": "O"})], + [(1, 2, {"order": 1.0})], + ), + (2,), + parents=("alcohol",), + anchor_node=2, + priority=50, + validator=_phenol, + recognizer=_two_node_bond_recognizer("C", ("O",), 1.0), + ), + FunctionalGroupPattern( + "enol", + _graph( + [(1, {"element": "C"}), (2, {"element": "C"}), (3, {"element": "O"})], + [(1, 2, {"order": 2.0}), (2, 3, {"order": 1.0})], + ), + (1, 2, 3), + parents=("alcohol",), + anchor_node=3, + priority=40, + validator=_enol, + ), + FunctionalGroupPattern( + "peroxide", + _graph( + [(1, {"element": "O"}), (2, {"element": "O"})], + [(1, 2, {"order": 1.0})], + ), + (1, 2), + parents=("ether",), + anchor_node=1, + priority=30, + recognizer=_symmetric_two_node_bond_recognizer("O", 1.0), + ), + FunctionalGroupPattern( + "peroxy_acid", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "O"}), + (3, {"element": "O"}), + (4, {"element": "O", "hcount_min": 1}), + ], + [ + (1, 2, {"order": 2.0}), + (1, 3, {"order": 1.0}), + (3, 4, {"order": 1.0}), + ], + ), + (1, 2, 3, 4), + parents=("ester", "peroxide"), + requires=("carbonyl",), + anchor_node=3, + priority=60, + ), + FunctionalGroupPattern( + "anhydride", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "O"}), + (3, {"element": "O"}), + (4, {"element": "C"}), + (5, {"element": "O"}), + ], + [ + (1, 2, {"order": 2.0}), + (1, 3, {"order": 1.0}), + (3, 4, {"order": 1.0}), + (4, 5, {"order": 2.0}), + ], + ), + (1, 2, 3, 4, 5), + parents=("ester",), + requires=("carbonyl",), + anchor_node=3, + priority=60, + ), + FunctionalGroupPattern( + "acyl_chloride", + _graph( + [(1, {"element": "C"}), (2, {"element": "O"}), (3, {"element": "Cl"})], + [(1, 2, {"order": 2.0}), (1, 3, {"order": 1.0})], + ), + (1, 2, 3), + parents=("ketone",), + requires=("carbonyl",), + suppresses=("organohalide",), + anchor_node=3, + priority=50, + ), + FunctionalGroupPattern( + "epoxide", + _graph( + [(1, {"element": "C"}), (2, {"element": "C"}), (3, {"element": "O"})], + [ + (1, 2, {"order": 1.0}), + (1, 3, {"order": 1.0}), + (2, 3, {"order": 1.0}), + ], + ), + (1, 2, 3), + parents=("ether",), + requires=("oxygen_link",), + anchor_node=3, + priority=40, + validator=_epoxide, + ), + FunctionalGroupPattern( + "boronic_acid", + _graph( + [ + (1, {"element": "B"}), + (2, {"element": "O", "hcount_min": 1}), + (3, {"element": "O", "hcount_min": 1}), + ], + [(1, 2, {"order": 1.0}), (1, 3, {"order": 1.0})], + ), + (1, 2, 3), + anchor_node=1, + priority=40, + ), + FunctionalGroupPattern( + "boronate_ester", + _graph( + [ + (1, {"element": "B"}), + (2, {"element": "O"}), + (3, {"element": "C"}), + (4, {"element": "O"}), + (5, {"element": "C"}), + ], + [ + (1, 2, {"order": 1.0}), + (2, 3, {"order": 1.0}), + (1, 4, {"order": 1.0}), + (4, 5, {"order": 1.0}), + ], + ), + (1, 2, 4), + anchor_node=1, + priority=40, + ), + FunctionalGroupPattern( + "silyl_ether", + _graph( + [(1, {"element": "O"}), (2, {"element": "Si"})], + [(1, 2, {"order": 1.0})], + ), + (1, 2), + anchor_node=2, + priority=30, + recognizer=_two_node_bond_recognizer("O", ("Si",), 1.0), + ), + FunctionalGroupPattern( + "phosphate", + _graph( + [ + (1, {"element": "P"}), + (2, {"element": "O"}), + (3, {"element": "O"}), + (4, {"element": "O"}), + (5, {"element": "O"}), + ], + [ + (1, 2, {"order": 2.0}), + (1, 3, {"order": 1.0}), + (1, 4, {"order": 1.0}), + (1, 5, {"order": 1.0}), + ], + ), + (1, 2, 3, 4, 5), + anchor_node=1, + priority=40, + ), + FunctionalGroupPattern( + "phosphonate", + _graph( + [ + (1, {"element": "P"}), + (2, {"element": "O"}), + (3, {"element": "O"}), + (4, {"element": "O"}), + (5, {"element": "C"}), + ], + [ + (1, 2, {"order": 2.0}), + (1, 3, {"order": 1.0}), + (1, 4, {"order": 1.0}), + (1, 5, {"order": 1.0}), + ], + ), + (1, 2, 3, 4, 5), + anchor_node=1, + priority=40, + ), + FunctionalGroupPattern( + "phosphine_oxide", + _graph( + [ + (1, {"element": "P"}), + (2, {"element": "O"}), + (3, {"element": "C"}), + (4, {"element": "C"}), + (5, {"element": "C"}), + ], + [ + (1, 2, {"order": 2.0}), + (1, 3, {"order": 1.0}), + (1, 4, {"order": 1.0}), + (1, 5, {"order": 1.0}), + ], + ), + (1, 2, 3, 4, 5), + anchor_node=1, + priority=40, + ), + FunctionalGroupPattern( + "phosphite", + _graph( + [ + (1, {"element": "P"}), + (2, {"element": "O"}), + (3, {"element": "O"}), + (4, {"element": "O"}), + ], + [ + (1, 2, {"order": 1.0}), + (1, 3, {"order": 1.0}), + (1, 4, {"order": 1.0}), + ], + ), + (1, 2, 3, 4), + anchor_node=1, + priority=30, + validator=_phosphite, + ), + FunctionalGroupPattern( + "isocyanate", + _graph( + [ + (1, {"element": "O"}), + (2, {"element": "C"}), + (3, {"element": "N"}), + ], + [(1, 2, {"order": 2.0}), (2, 3, {"order": 2.0})], + ), + (1, 2, 3), + suppresses=("carbonyl", "ketone"), + anchor_node=2, + priority=40, + ), + FunctionalGroupPattern( + "oxime", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "N"}), + (3, {"element": "O"}), + ], + [(1, 2, {"order": 2.0}), (2, 3, {"order": 1.0})], + ), + (1, 2, 3), + anchor_node=2, + priority=40, + ), + FunctionalGroupPattern( + "hydrazone", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "N"}), + (3, {"element": "N"}), + ], + [(1, 2, {"order": 2.0}), (2, 3, {"order": 1.0})], + ), + (1, 2, 3), + suppresses=("amine",), + anchor_node=2, + priority=40, + validator=_hydrazone, + ), + FunctionalGroupPattern( + "imine", + _graph( + [(1, {"element": "C"}), (2, {"element": "N"})], + [(1, 2, {"order": 2.0})], + ), + (1, 2), + anchor_node=2, + priority=20, + validator=_imine, + recognizer=_two_node_bond_recognizer("C", ("N",), 2.0), + ), + FunctionalGroupPattern( + "amidine", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "N"}), + (3, {"element": "N"}), + ], + [(1, 2, {"order": 2.0}), (1, 3, {"order": 1.0})], + ), + (1, 2, 3), + suppresses=("amine",), + anchor_node=1, + priority=40, + validator=_amidine, + ), + FunctionalGroupPattern( + "amidoxime", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "N"}), + (3, {"element": "N"}), + (4, {"element": "O"}), + ], + [ + (1, 2, {"order": 2.0}), + (1, 3, {"order": 1.0}), + (2, 4, {"order": 1.0}), + ], + ), + (1, 2, 3, 4), + suppresses=("amine", "oxime"), + anchor_node=1, + priority=50, + ), + FunctionalGroupPattern( + "azide", + _graph( + [ + (1, {"element": "N"}), + (2, {"element": "N"}), + (3, {"element": "N"}), + ], + [(1, 2, {"order": 2.0}), (2, 3, {"order": 2.0})], + ), + (1, 2, 3), + anchor_node=2, + priority=40, + validator=_azide, + ), + FunctionalGroupPattern( + "azo", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "N"}), + (3, {"element": "N"}), + (4, {"element": "C"}), + ], + [ + (1, 2, {"order": 1.0}), + (2, 3, {"order": 2.0}), + (3, 4, {"order": 1.0}), + ], + ), + (2, 3), + anchor_node=2, + priority=40, + ), + FunctionalGroupPattern( + "isothiocyanate", + _graph( + [ + (1, {"element": "S"}), + (2, {"element": "C"}), + (3, {"element": "N"}), + ], + [(1, 2, {"order": 2.0}), (2, 3, {"order": 2.0})], + ), + (1, 2, 3), + anchor_node=2, + priority=50, + ), + FunctionalGroupPattern( + "thiourea", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "S"}), + (3, {"element": "N"}), + (4, {"element": "N"}), + ], + [ + (1, 2, {"order": 2.0}), + (1, 3, {"order": 1.0}), + (1, 4, {"order": 1.0}), + ], + ), + (1, 2, 3, 4), + suppresses=("amine", "thioamide"), + anchor_node=1, + priority=50, + ), + FunctionalGroupPattern( + "thioamide", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": "S"}), + (3, {"element": "N"}), + ], + [(1, 2, {"order": 2.0}), (1, 3, {"order": 1.0})], + ), + (1, 2, 3), + suppresses=("amine",), + anchor_node=1, + priority=40, + ), + FunctionalGroupPattern( + "organohalide", + _graph( + [ + (1, {"element": "C"}), + (2, {"element": ("F", "Cl", "Br", "I")}), + ], + [(1, 2, {"order": 1.0})], + ), + (1, 2), + anchor_node=2, + priority=20, + recognizer=_two_node_bond_recognizer("C", ("F", "Cl", "Br", "I"), 1.0), + ), + FunctionalGroupPattern( + "aryl_halide", + _graph( + [ + (1, {"element": "C", "aromatic": True}), + (2, {"element": ("F", "Cl", "Br", "I")}), + ], + [(1, 2, {"order": 1.0})], + ), + (1, 2), + parents=("organohalide",), + requires=("organohalide",), + anchor_node=2, + priority=30, + validator=_aryl_halide, + recognizer=_two_node_bond_recognizer("C", ("F", "Cl", "Br", "I"), 1.0), + ), + ] + return FunctionalGroupRegistry(patterns) diff --git a/synkit/Graph/FG/detector.py b/synkit/Graph/FG/detector.py new file mode 100644 index 0000000..ddfe197 --- /dev/null +++ b/synkit/Graph/FG/detector.py @@ -0,0 +1,288 @@ +from __future__ import annotations + +from collections import Counter +from typing import Any + +import networkx as nx +from networkx.algorithms.isomorphism import GraphMatcher + +from .catalog import default_registry +from .model import FunctionalGroupMatch, FunctionalGroupPattern, FunctionalGroupRegistry +from .ring_system import AromaticRingSystemDetector + + +def _node_match(host: dict[str, Any], pattern: dict[str, Any]) -> bool: + element = pattern.get("element") + if element is not None: + allowed = element if isinstance(element, tuple) else (element,) + if host.get("element") not in allowed: + return False + + for attr in ("aromatic", "charge", "radical", "in_ring"): + if attr in pattern and host.get(attr) != pattern[attr]: + return False + + hcount = host.get("hcount", 0) + if "hcount_min" in pattern and hcount < pattern["hcount_min"]: + return False + if "hcount_max" in pattern and hcount > pattern["hcount_max"]: + return False + return True + + +def _edge_match(host: dict[str, Any], pattern: dict[str, Any]) -> bool: + if "order" in pattern and host.get("order") != pattern["order"]: + return False + if "aromatic" in pattern and host.get("aromatic") != pattern["aromatic"]: + return False + return True + + +class FunctionalGroupDetector: + """Detect functional groups from an input molecular ``nx.Graph``.""" + + def __init__(self, registry: FunctionalGroupRegistry | None = None) -> None: + self.registry = default_registry() if registry is None else registry + self.profile_counts: Counter[str] = Counter() + + def raw_matches( + self, + graph: nx.Graph, + *, + include_internal: bool = False, + ) -> list[FunctionalGroupMatch]: + """Return raw public matches before hierarchy resolution.""" + matches: list[FunctionalGroupMatch] = [] + matched_names: set[str] = set() + for pattern in self.registry.execution_order(): + if pattern.requires and not any( + required in matched_names for required in pattern.requires + ): + self.profile_counts[f"skip:{pattern.name}"] += 1 + continue + self.profile_counts[f"attempt:{pattern.name}"] += 1 + if pattern.recognizer is not None: + found = pattern.recognizer(graph, pattern) + matches.extend(found) + if found: + matched_names.add(pattern.name) + continue + if pattern.anchor_node is not None: + anchor_data = pattern.graph.nodes[pattern.anchor_node] + anchor_candidates = [ + node + for node, host_data in graph.nodes(data=True) + if _node_match(host_data, anchor_data) + ] + if not anchor_candidates: + continue + matcher = GraphMatcher( + graph, + pattern.graph, + node_match=_node_match, + edge_match=_edge_match, + ) + seen: set[tuple[int, ...]] = set() + for host_to_pattern in matcher.subgraph_monomorphisms_iter(): + mapping = { + pattern_node: host_node + for host_node, pattern_node in host_to_pattern.items() + } + if pattern.validator is not None and not pattern.validator( + graph, mapping + ): + continue + group_nodes = tuple( + sorted(mapping[node] for node in pattern.group_nodes) + ) + if group_nodes in seen: + continue + seen.add(group_nodes) + matches.append( + FunctionalGroupMatch( + name=pattern.name, + group_nodes=group_nodes, + mapping=mapping, + pattern=pattern, + ) + ) + if any(match.name == pattern.name for match in matches): + matched_names.add(pattern.name) + matches.extend(self._heteroaromatic_matches(graph)) + if include_internal: + return matches + return [match for match in matches if match.pattern.public] + + def matches(self, graph: nx.Graph) -> list[FunctionalGroupMatch]: + """Return hierarchy-resolved matches.""" + raw = self.raw_matches(graph) + raw.sort( + key=lambda match: ( + match.pattern.priority, + len(match.group_nodes), + match.name, + match.group_nodes, + ), + reverse=True, + ) + + accepted: list[FunctionalGroupMatch] = [] + for candidate in raw: + if any(self._suppressed_by(candidate, chosen) for chosen in accepted): + continue + accepted.append(candidate) + return sorted(accepted, key=lambda match: (match.group_nodes, match.name)) + + def detect(self, graph: nx.Graph) -> list[tuple[str, tuple[int, ...]]]: + """Return simple ``(name, node_ids)`` functional-group labels.""" + return [(match.name, match.group_nodes) for match in self.matches(graph)] + + def _suppressed_by( + self, + candidate: FunctionalGroupMatch, + chosen: FunctionalGroupMatch, + ) -> bool: + if not self.registry.is_ancestor(candidate.name, chosen.name): + if candidate.name not in chosen.pattern.suppresses: + return False + return set(candidate.group_nodes).issubset(chosen.group_nodes) + + @staticmethod + def _heteroaromatic_matches(graph: nx.Graph) -> list[FunctionalGroupMatch]: + matches: list[FunctionalGroupMatch] = [] + for system in AromaticRingSystemDetector.detect(graph): + if not system.hetero_nodes: + continue + pattern_graph = nx.Graph() + pattern = FunctionalGroupPattern( + name="heteroaromatic_ring", + graph=pattern_graph, + group_nodes=(), + suppresses=("amine",), + priority=60, + ) + matches.append( + FunctionalGroupMatch( + name="heteroaromatic_ring", + group_nodes=system.nodes, + mapping={}, + pattern=pattern, + ) + ) + for ( + classifier_name, + group_nodes, + ) in FunctionalGroupDetector._classify_ring_system( + graph, + system, + ): + named_pattern = FunctionalGroupPattern( + name=classifier_name, + graph=nx.Graph(), + group_nodes=(), + priority=70, + ) + matches.append( + FunctionalGroupMatch( + name=classifier_name, + group_nodes=group_nodes, + mapping={}, + pattern=named_pattern, + ) + ) + return matches + + @staticmethod + def _classify_ring_system( + graph: nx.Graph, system + ) -> list[tuple[str, tuple[int, ...]]]: + labels: list[tuple[str, tuple[int, ...]]] = [] + for ring in system.subrings: + ring_size = len(ring.nodes) + counts = ring.element_counts + sequence = ring.hetero_sequence + if counts == {"C": 5, "N": 1} and ring_size == 6: + labels.append(("pyridine", ring.nodes)) + elif counts == {"C": 4, "N": 2} and ring_size == 6: + labels.append(("diazine", ring.nodes)) + elif counts == {"C": 4, "N": 1} and ring_size == 5: + labels.append(("pyrrole", ring.nodes)) + elif counts == {"C": 4, "O": 1} and ring_size == 5: + labels.append(("furan", ring.nodes)) + elif counts == {"C": 4, "S": 1} and ring_size == 5: + labels.append(("thiophene", ring.nodes)) + elif counts == {"C": 3, "N": 2} and ring_size == 5: + if sequence == ("C", "C", "C", "N", "N"): + labels.append(("pyrazole", ring.nodes)) + elif sequence == ("C", "C", "N", "C", "N"): + labels.append(("imidazole", ring.nodes)) + elif counts == {"C": 3, "N": 1, "S": 1} and ring_size == 5: + if sequence == ("C", "C", "C", "N", "S"): + labels.append(("isothiazole", ring.nodes)) + elif sequence == ("C", "C", "N", "C", "S"): + labels.append(("thiazole", ring.nodes)) + elif counts == {"C": 3, "N": 1, "O": 1} and ring_size == 5: + if sequence == ("C", "C", "C", "N", "O"): + labels.append(("isoxazole", ring.nodes)) + elif sequence == ("C", "C", "N", "C", "O"): + labels.append(("oxazole", ring.nodes)) + elif counts == {"C": 2, "N": 3} and ring_size == 5: + labels.append(("triazole", ring.nodes)) + elif counts == {"C": 2, "N": 2, "O": 1} and ring_size == 5: + labels.append(("oxadiazole", ring.nodes)) + elif counts == {"C": 1, "N": 4} and ring_size == 5: + labels.append(("tetrazole", ring.nodes)) + elif counts == {"C": 2, "N": 2, "S": 1} and ring_size == 5: + labels.append(("thiadiazole", ring.nodes)) + elif counts == {"C": 3, "N": 3} and ring_size == 6: + labels.append(("triazine", ring.nodes)) + labels.extend(FunctionalGroupDetector._classify_fused_ring_system(system)) + return labels + + @staticmethod + def _classify_fused_ring_system(system) -> list[tuple[str, tuple[int, ...]]]: + if not system.is_fused or system.ring_sizes != (5, 6): + return [] + sequences = {ring.hetero_sequence for ring in system.subrings} + if ( + system.element_counts == {"C": 8, "N": 1} + and ( + "C", + "C", + "C", + "C", + "N", + ) + in sequences + ): + return [("indole", system.nodes)] + if system.element_counts == {"C": 7, "N": 2}: + if ("C", "C", "N", "C", "N") in sequences: + return [("benzimidazole", system.nodes)] + if ("C", "C", "C", "N", "N") in sequences: + return [("indazole", system.nodes)] + if ( + system.element_counts == {"C": 7, "N": 1, "O": 1} + and ( + "C", + "C", + "N", + "C", + "O", + ) + in sequences + ): + return [("benzoxazole", system.nodes)] + if ( + system.element_counts == {"C": 7, "N": 1, "S": 1} + and ( + "C", + "C", + "N", + "C", + "S", + ) + in sequences + ): + return [("benzothiazole", system.nodes)] + return [] diff --git a/synkit/Graph/FG/model.py b/synkit/Graph/FG/model.py new file mode 100644 index 0000000..c012328 --- /dev/null +++ b/synkit/Graph/FG/model.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Callable, Iterable + +import networkx as nx + +Mapping = dict[int, int] +Validator = Callable[[nx.Graph, Mapping], bool] +Recognizer = Callable[ + [nx.Graph, "FunctionalGroupPattern"], list["FunctionalGroupMatch"] +] + + +@dataclass(frozen=True) +class FunctionalGroupPattern: + """Graph-native functional-group definition.""" + + name: str + graph: nx.Graph + group_nodes: tuple[int, ...] + parents: tuple[str, ...] = () + suppresses: tuple[str, ...] = () + requires: tuple[str, ...] = () + anchor_node: int | None = None + priority: int = 0 + validator: Validator | None = None + recognizer: Recognizer | None = None + public: bool = True + + +@dataclass(frozen=True) +class FunctionalGroupMatch: + """One matched functional group in a host graph.""" + + name: str + group_nodes: tuple[int, ...] + mapping: Mapping + pattern: FunctionalGroupPattern + + +@dataclass +class FunctionalGroupRegistry: + """Container for functional-group patterns and hierarchy metadata.""" + + patterns: list[FunctionalGroupPattern] = field(default_factory=list) + + def add(self, pattern: FunctionalGroupPattern) -> None: + self.patterns.append(pattern) + + def extend(self, patterns: Iterable[FunctionalGroupPattern]) -> None: + self.patterns.extend(patterns) + + def by_name(self, name: str) -> FunctionalGroupPattern: + for pattern in self.patterns: + if pattern.name == name: + return pattern + raise KeyError(name) + + def is_ancestor(self, ancestor: str, child: str) -> bool: + """Return whether ``ancestor`` is an ancestor of ``child``.""" + seen: set[str] = set() + stack = [child] + while stack: + current = stack.pop() + if current in seen: + continue + seen.add(current) + try: + parents = self.by_name(current).parents + except KeyError: + parents = () + for parent in parents: + if parent == ancestor: + return True + stack.append(parent) + return False + + def execution_order(self) -> list[FunctionalGroupPattern]: + """Return patterns in prerequisite-respecting order.""" + by_name = {pattern.name: pattern for pattern in self.patterns} + visited: set[str] = set() + ordered: list[FunctionalGroupPattern] = [] + + def visit(name: str) -> None: + if name in visited: + return + visited.add(name) + pattern = by_name[name] + for required in pattern.requires: + if required in by_name: + visit(required) + ordered.append(pattern) + + for pattern in self.patterns: + visit(pattern.name) + return ordered diff --git a/synkit/Graph/FG/ring_system.py b/synkit/Graph/FG/ring_system.py new file mode 100644 index 0000000..e357808 --- /dev/null +++ b/synkit/Graph/FG/ring_system.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +from collections import Counter +from dataclasses import dataclass + +import networkx as nx + + +@dataclass(frozen=True) +class AromaticSubring: + """One minimal aromatic cycle inside an aromatic ring system.""" + + nodes: tuple[int, ...] + element_counts: dict[str, int] + hetero_sequence: tuple[str, ...] + + +@dataclass(frozen=True) +class AromaticRingSystem: + """One connected aromatic ring system from a molecular graph.""" + + nodes: tuple[int, ...] + edges: tuple[tuple[int, int], ...] + hetero_nodes: tuple[int, ...] + element_counts: dict[str, int] + ring_sizes: tuple[int, ...] + subrings: tuple[AromaticSubring, ...] + is_fused: bool + hetero_sequence: tuple[str, ...] | None + hetero_pattern: str + + +class AromaticRingSystemDetector: + """Extract aromatic connected components and characterize their rings.""" + + @staticmethod + def detect(graph: nx.Graph) -> list[AromaticRingSystem]: + aromatic = nx.Graph() + for node, data in graph.nodes(data=True): + if data.get("aromatic"): + aromatic.add_node(node, **data) + for left, right, data in graph.edges(data=True): + if left not in aromatic or right not in aromatic: + continue + if data.get("aromatic") or data.get("order") == 1.5: + aromatic.add_edge(left, right, **data) + + systems: list[AromaticRingSystem] = [] + for component_nodes in nx.connected_components(aromatic): + component = aromatic.subgraph(component_nodes).copy() + cycles = nx.minimum_cycle_basis(component) + if not cycles: + continue + nodes = tuple(sorted(component.nodes())) + edges = tuple(sorted(tuple(sorted(edge)) for edge in component.edges())) + hetero_nodes = tuple( + sorted( + node + for node in component.nodes() + if component.nodes[node].get("element") not in {"C", "H"} + ) + ) + element_counts = dict( + sorted( + Counter( + component.nodes[node].get("element") for node in component + ).items() + ) + ) + ring_sizes = tuple(sorted(len(cycle) for cycle in cycles)) + subrings = tuple( + sorted( + ( + AromaticSubring( + nodes=tuple(sorted(cycle)), + element_counts=dict( + sorted( + Counter( + component.nodes[node].get("element") + for node in cycle + ).items() + ) + ), + hetero_sequence=AromaticRingSystemDetector._canonical_cycle_sequence( + component, + cycle, + ), + ) + for cycle in cycles + ), + key=lambda ring: ring.nodes, + ) + ) + hetero_sequence = None + if len(cycles) == 1: + hetero_sequence = subrings[0].hetero_sequence + systems.append( + AromaticRingSystem( + nodes=nodes, + edges=edges, + hetero_nodes=hetero_nodes, + element_counts=element_counts, + ring_sizes=ring_sizes, + subrings=subrings, + is_fused=len(cycles) > 1, + hetero_sequence=hetero_sequence, + hetero_pattern=AromaticRingSystemDetector._pattern( + element_counts, + ring_sizes, + ), + ) + ) + return sorted(systems, key=lambda system: system.nodes) + + @staticmethod + def _canonical_cycle_sequence( + graph: nx.Graph, + cycle_nodes: list[int], + ) -> tuple[str, ...]: + cycle = list(cycle_nodes) + subgraph = graph.subgraph(cycle) + start = min(cycle) + neighbors = sorted(subgraph.neighbors(start)) + if len(neighbors) != 2: + return tuple(graph.nodes[node].get("element") for node in sorted(cycle)) + + candidates: list[tuple[str, ...]] = [] + for first in neighbors: + order = [start, first] + while len(order) < len(cycle): + current = order[-1] + previous = order[-2] + nxt = [node for node in subgraph.neighbors(current) if node != previous] + if not nxt: + break + order.append(nxt[0]) + seq = tuple(graph.nodes[node].get("element") for node in order) + candidates.append(seq) + rotations: list[tuple[str, ...]] = [] + for seq in candidates: + for index in range(len(seq)): + rotations.append(seq[index:] + seq[:index]) + return min(rotations) + + @staticmethod + def _pattern(element_counts: dict[str, int], ring_sizes: tuple[int, ...]) -> str: + hetero = [ + f"{count}{element}" + for element, count in element_counts.items() + if element not in {"C", "H"} + ] + prefix = "-".join(hetero) if hetero else "carbocycle" + sizes = "-".join(str(size) for size in ring_sizes) + return f"{prefix}-{sizes}ring" diff --git a/synkit/Graph/Hyrogen/_misc.py b/synkit/Graph/Hyrogen/_misc.py index b84ad5c..2568a01 100644 --- a/synkit/Graph/Hyrogen/_misc.py +++ b/synkit/Graph/Hyrogen/_misc.py @@ -134,6 +134,9 @@ def h_to_explicit(G: nx.Graph, nodes: List[int] = None, its: bool = False) -> nx if heavy not in H2: continue count = H2.nodes[heavy].get("hcount", 0) + if its and _is_pair_hcount(count): + max_node = _expand_paired_its_hydrogens(H2, heavy, max_node) + continue if count <= 0: continue @@ -164,6 +167,65 @@ def h_to_explicit(G: nx.Graph, nodes: List[int] = None, its: bool = False) -> nx return H2 +def _is_pair_hcount(value: Any) -> bool: + """Return whether a value is a 2-item integer hydrogen-count pair.""" + return ( + isinstance(value, (list, tuple)) + and len(value) == 2 + and all(isinstance(item, int) for item in value) + ) + + +def _paired_hydrogen_node_attrs(present: Tuple[bool, bool]) -> dict: + """Build named paired attrs for a tuple-style ITS hydrogen node.""" + return { + "element": ("H", "H"), + "aromatic": (False, False), + "hcount": (0, 0), + "charge": (0, 0), + "radical": (0, 0), + "lone_pairs": (0, 0), + "valence_electrons": (1, 1), + "neighbors": ([], []), + "atom_map": (0, 0), + "present": present, + } + + +def _expand_paired_its_hydrogens( + graph: nx.Graph, + heavy: int, + max_node: int, +) -> int: + """Expand paired ITS hydrogen counts into shared and side-only H nodes.""" + react_h, prod_h = graph.nodes[heavy]["hcount"] + shared = min(react_h, prod_h) + react_only = react_h - shared + prod_only = prod_h - shared + + expansion_plan = ( + [((True, True), (1.0, 1.0))] * shared + + [((True, False), (1.0, 0.0))] * react_only + + [((False, True), (0.0, 1.0))] * prod_only + ) + + for present, bond_pair in expansion_plan: + max_node += 1 + graph.add_node(max_node, **_paired_hydrogen_node_attrs(present)) + graph.add_edge( + heavy, + max_node, + order=bond_pair, + kekule_order=bond_pair, + sigma_order=bond_pair, + pi_order=(0.0, 0.0), + standard_order=bond_pair[0] - bond_pair[1], + ) + + graph.nodes[heavy]["hcount"] = (0, 0) + return max_node + + def implicit_hydrogen( graph: nx.Graph, preserve_atom_maps: Set[int], reindex: bool = False ) -> nx.Graph: @@ -464,16 +526,17 @@ def _normalize_h_pair(h_react: int, h_prod: int) -> Tuple[int, int]: def normalize_h_pair_graph(rc_graph: nx.Graph, inplace: bool = False) -> nx.Graph: """ - Normalize the hydrogen-count field inside ``typesGH`` for all nodes. + Normalize paired hydrogen counts for all ITS nodes. - Assumption: - ``typesGH`` is a 2-tuple: - (reactant_attr, product_attr) + New-style ITS nodes may store ``hcount`` directly as + ``(reactant_hcount, product_hcount)``. Legacy ITS nodes may instead store + hydrogen counts inside ``typesGH``: - and each attr tuple has the form: - (element, aromatic, hydrogen_count, charge, neighbors) + ``typesGH = (reactant_attr, product_attr)`` - Only the hydrogen_count field at index 2 is normalized. + where each side tuple has the form + ``(element, aromatic, hydrogen_count, charge, neighbors)``. + Both representations are normalized when present. :param rc_graph: Reaction-center graph. :type rc_graph: nx.Graph @@ -485,6 +548,14 @@ def normalize_h_pair_graph(rc_graph: nx.Graph, inplace: bool = False) -> nx.Grap graph = rc_graph if inplace else deepcopy(rc_graph) for node, data in graph.nodes(data=True): + hcount = data.get("hcount") + if ( + isinstance(hcount, (list, tuple)) + and len(hcount) == 2 + and all(isinstance(value, int) for value in hcount) + ): + data["hcount"] = _normalize_h_pair(hcount[0], hcount[1]) + typesgh = data.get("typesGH") if typesgh is None: continue diff --git a/synkit/Graph/ITS/its_construction.py b/synkit/Graph/ITS/its_construction.py index 0934776..8f00d65 100644 --- a/synkit/Graph/ITS/its_construction.py +++ b/synkit/Graph/ITS/its_construction.py @@ -24,11 +24,15 @@ class ITSConstruction: "partial_charge": 0, "hybridization": "", "lone_pairs": 0, + "radical": 0, "valence_electrons": 0, } CORE_EDGE_DEFAULTS: Dict[str, Any] = { "order": 0.0, + "kekule_order": 0.0, + "sigma_order": 0.0, + "pi_order": 0.0, "ez_isomer": "", "bond_type": "", "conjugated": False, @@ -235,6 +239,7 @@ def _populate_node_attributes( ) its.nodes[n]["typesGH"] = (g_tuple, h_tuple) + its.nodes[n]["present"] = (n in G, n in H) for i, attr in enumerate(node_attrs): its.nodes[n][attr] = (g_tuple[i], h_tuple[i]) if store else g_tuple[i] @@ -446,8 +451,16 @@ def construct( "hcount", "charge", "neighbors", + "lone_pairs", + "radical", + "valence_electrons", + ] + edge_attrs = edge_attrs or [ + "order", + "kekule_order", + "sigma_order", + "pi_order", ] - edge_attrs = edge_attrs or ["order"] node_defaults = ITSConstruction._resolve_defaults( attributes_defaults, ITSConstruction.CORE_NODE_DEFAULTS diff --git a/synkit/Graph/ITS/its_destruction.py b/synkit/Graph/ITS/its_destruction.py index 274a969..3e99ffc 100644 --- a/synkit/Graph/ITS/its_destruction.py +++ b/synkit/Graph/ITS/its_destruction.py @@ -35,13 +35,26 @@ def __init__( its_graph: nx.Graph, node_attrs: Optional[List[str]] = None, edge_share: str = "order", + edge_attrs: Optional[List[str]] = None, clean_wildcard: bool = False, ): if node_attrs is None: - node_attrs = ["element", "charge", "hcount", "aromatic", "atom_map"] + node_attrs = [ + "element", + "charge", + "hcount", + "aromatic", + "radical", + "lone_pairs", + "valence_electrons", + "atom_map", + ] + if edge_attrs is None: + edge_attrs = [edge_share, "kekule_order", "sigma_order", "pi_order"] self._its = its_graph self.node_attrs = node_attrs self.edge_share = edge_share + self.edge_attrs = edge_attrs self.clean_wildcard = clean_wildcard self._G: Optional[nx.Graph] = None self._H: Optional[nx.Graph] = None @@ -143,10 +156,19 @@ def _has_real(side_dict: Dict[str, Any]) -> bool: order_g, order_h = order_tuple else: order_g = order_h = 0.0 + g_edge_attrs: Dict[str, Any] = {} + h_edge_attrs: Dict[str, Any] = {} + for attr in self.edge_attrs: + value = data.get(attr) + if isinstance(value, tuple) and len(value) == 2: + g_edge_attrs[attr], h_edge_attrs[attr] = value + elif value is not None: + g_edge_attrs[attr] = value + h_edge_attrs[attr] = value if isinstance(order_g, (int, float)) and order_g > 0: - G.add_edge(u, v, order=order_g) + G.add_edge(u, v, **g_edge_attrs) if isinstance(order_h, (int, float)) and order_h > 0: - H.add_edge(u, v, order=order_h) + H.add_edge(u, v, **h_edge_attrs) # Apply wildcard cleaning if requested (without neighbor contraction) if self.clean_wildcard: diff --git a/synkit/Graph/ITS/its_expand.py b/synkit/Graph/ITS/its_expand.py index 0a23438..d8c5bf6 100644 --- a/synkit/Graph/ITS/its_expand.py +++ b/synkit/Graph/ITS/its_expand.py @@ -16,9 +16,17 @@ class ITSExpand: reaction center graph. This class identifies the reaction center from an RSMI, builds and - reconstructs the ITS graph, decomposes it back into reactants and - products, and standardizes atom mappings to produce a fully mapped - AAM RSMI. + reconstructs the ITS graph, decomposes it back into reactants and products, + and standardizes atom mappings to produce a fully mapped AAM RSMI. + + The optional ``preserve_older_map`` mode keeps existing atom-map numbers + from the input RSMI by reindexing the side graph before ITS reconstruction. + + Notes + ----- + ``preserve_older_map=True`` is intended for the ITS expansion path only. + It should not be combined with ``relabel=True``, because ``ITSRelabel`` + globally renumbers atom maps. :cvar std: Standardize instance for reaction SMILES standardization. :type std: Standardize @@ -31,56 +39,406 @@ def __init__(self) -> None: """ pass + @staticmethod + def _split_rsmi(rsmi: str) -> tuple[str, str]: + """Split a reaction SMILES into reactant and product sides. + + :param rsmi: Reaction SMILES string in ``reactant>>product`` format. + :type rsmi: str + :returns: Reactant-side SMILES and product-side SMILES. + :rtype: tuple[str, str] + :raises ValueError: If the input is not a valid two-sided RSMI. + """ + try: + react_smi, prod_smi = rsmi.split(">>") + except ValueError as e: + raise ValueError("Input RSMI must be 'reactant>>product'") from e + + if not react_smi or not prod_smi: + raise ValueError("Input RSMI must contain both reactant and product sides") + + return react_smi, prod_smi + + @staticmethod + def _atom_map(data: dict) -> int: + """Safely extract an atom-map number from node attributes. + + :param data: Node attribute dictionary. + :type data: dict + :returns: Atom-map number. Returns ``0`` if absent or falsy. + :rtype: int + """ + return int(data.get("atom_map", 0) or 0) + + @staticmethod + def _nonzero_atom_maps(graph) -> list[int]: + """Collect all nonzero atom-map numbers from a graph. + + :param graph: Molecular graph. + :type graph: networkx.Graph + :returns: List of nonzero atom-map numbers. + :rtype: list[int] + """ + return [ + ITSExpand._atom_map(data) + for _, data in graph.nodes(data=True) + if ITSExpand._atom_map(data) != 0 + ] + + @staticmethod + def _validate_unique_atom_maps(atom_maps: list[int]) -> None: + """Validate that all nonzero atom-map numbers are unique. + + :param atom_maps: Nonzero atom-map numbers. + :type atom_maps: list[int] + :raises ValueError: If duplicate nonzero atom-map numbers are found. + """ + if len(atom_maps) != len(set(atom_maps)): + raise ValueError( + "Duplicate nonzero atom_map values found in side graph. " + "Cannot safely reindex graph by atom_map." + ) + + @staticmethod + def _validate_atom_maps_within_range( + atom_maps: list[int], + n_nodes: int, + ) -> None: + """Validate that atom-map numbers can be used as contiguous node IDs. + + In the side graph, we want final node IDs to remain exactly ``1..N``. + Therefore, a mapped atom can only be moved to its atom-map number if + that number is within ``1..N``. + + :param atom_maps: Nonzero atom-map numbers. + :type atom_maps: list[int] + :param n_nodes: Number of nodes in the side graph. + :type n_nodes: int + :raises ValueError: If any atom-map number is outside ``1..N``. + """ + bad_targets = [ + atom_map for atom_map in atom_maps if atom_map < 1 or atom_map > n_nodes + ] + + if bad_targets: + raise ValueError( + "Cannot keep side graph node ids contiguous from 1..N while " + f"also using atom_map as node id. The following atom maps are " + f"outside 1..{n_nodes}: {bad_targets}" + ) + + @staticmethod + def _assign_mapped_nodes(graph) -> tuple[dict, set[int]]: + """Assign mapped atoms to node IDs equal to their atom-map numbers. + + For example, if a node has ``atom_map=20``, the returned mapping assigns + that old node to new node ID ``20``. + + :param graph: Molecular side graph. + :type graph: networkx.Graph + :returns: A partial old-node to new-node mapping and the used node IDs. + :rtype: tuple[dict, set[int]] + """ + mapping = {} + used_ids = set() + + for node, data in graph.nodes(data=True): + atom_map = ITSExpand._atom_map(data) + + if atom_map == 0: + continue + + mapping[node] = atom_map + used_ids.add(atom_map) + + return mapping, used_ids + + @staticmethod + def _assign_unmapped_nodes( + graph, + mapping: dict, + used_ids: set[int], + ) -> dict: + """Assign unmapped atoms while preserving contiguous node IDs. + + Unmapped atoms keep their original node ID when possible. If an + unmapped atom's node ID conflicts with a mapped atom's target ID, it is + moved into one of the remaining free IDs inside ``1..N``. + + :param graph: Molecular side graph. + :type graph: networkx.Graph + :param mapping: Existing mapping from mapped atoms. + :type mapping: dict + :param used_ids: Node IDs already occupied by mapped atoms. + :type used_ids: set[int] + :returns: Complete old-node to new-node mapping. + :rtype: dict + """ + n_nodes = graph.number_of_nodes() + free_ids = set(range(1, n_nodes + 1)) - used_ids + pending_unmapped = [] + + for node, data in graph.nodes(data=True): + atom_map = ITSExpand._atom_map(data) + + if atom_map != 0: + continue + + if isinstance(node, int) and node in free_ids: + mapping[node] = node + free_ids.remove(node) + else: + pending_unmapped.append(node) + + for old_node, new_node in zip(pending_unmapped, sorted(free_ids)): + mapping[old_node] = new_node + + return mapping + + @staticmethod + def _validate_contiguous_mapping(mapping: dict, n_nodes: int) -> None: + """Validate that a mapping produces exactly node IDs ``1..N``. + + :param mapping: Old-node to new-node mapping. + :type mapping: dict + :param n_nodes: Number of nodes in the graph. + :type n_nodes: int + :raises RuntimeError: If the mapped node IDs are not exactly ``1..N``. + """ + expected_ids = set(range(1, n_nodes + 1)) + actual_ids = set(mapping.values()) + + if actual_ids != expected_ids: + missing = sorted(expected_ids - actual_ids) + extra = sorted(actual_ids - expected_ids) + raise RuntimeError( + f"Reindexing failed. Missing node ids: {missing}; " + f"extra node ids: {extra}" + ) + + @staticmethod + def _build_side_graph_reindex_mapping(graph) -> dict: + """Build an old-node to new-node mapping for a side graph. + + The mapping satisfies two conditions: + + 1. Every atom with ``atom_map != 0`` is assigned to node ID + ``atom_map``. + 2. The final node IDs are exactly contiguous from ``1..N``. + + :param graph: Molecular side graph. + :type graph: networkx.Graph + :returns: Old-node to new-node mapping. + :rtype: dict + :raises ValueError: If atom-map values are duplicated or incompatible + with contiguous node IDs. + """ + n_nodes = graph.number_of_nodes() + atom_maps = ITSExpand._nonzero_atom_maps(graph) + + ITSExpand._validate_unique_atom_maps(atom_maps) + ITSExpand._validate_atom_maps_within_range(atom_maps, n_nodes) + + mapping, used_ids = ITSExpand._assign_mapped_nodes(graph) + mapping = ITSExpand._assign_unmapped_nodes(graph, mapping, used_ids) + + ITSExpand._validate_contiguous_mapping(mapping, n_nodes) + + return mapping + + @staticmethod + def _copy_nodes_with_mapping(graph, new_graph, mapping: dict) -> None: + """Copy graph nodes into a new graph using a node mapping. + + :param graph: Source graph. + :type graph: networkx.Graph + :param new_graph: Destination graph. + :type new_graph: networkx.Graph + :param mapping: Old-node to new-node mapping. + :type mapping: dict + """ + for old_node, new_node in mapping.items(): + attrs = dict(graph.nodes[old_node]) + new_graph.add_node(new_node, **attrs) + + @staticmethod + def _copy_edges_with_mapping(graph, new_graph, mapping: dict) -> None: + """Copy graph edges into a new graph using a node mapping. + + Supports both simple graphs and multigraphs. + + :param graph: Source graph. + :type graph: networkx.Graph + :param new_graph: Destination graph. + :type new_graph: networkx.Graph + :param mapping: Old-node to new-node mapping. + :type mapping: dict + """ + if graph.is_multigraph(): + for u, v, key, attrs in graph.edges(keys=True, data=True): + new_graph.add_edge( + mapping[u], + mapping[v], + key=key, + **dict(attrs), + ) + return + + for u, v, attrs in graph.edges(data=True): + new_graph.add_edge( + mapping[u], + mapping[v], + **dict(attrs), + ) + + @staticmethod + def _rebuild_graph_with_mapping(graph, mapping: dict): + """Rebuild a graph with remapped node IDs. + + This avoids in-place relabeling collisions, for example when node ``27`` + must become node ``20`` while old node ``20`` must move elsewhere. + + :param graph: Source graph. + :type graph: networkx.Graph + :param mapping: Old-node to new-node mapping. + :type mapping: dict + :returns: Rebuilt graph with remapped node IDs. + :rtype: networkx.Graph + """ + new_graph = graph.__class__() + new_graph.graph.update(graph.graph) + + ITSExpand._copy_nodes_with_mapping(graph, new_graph, mapping) + ITSExpand._copy_edges_with_mapping(graph, new_graph, mapping) + + return new_graph + + @staticmethod + def reindex_side_graph_by_atom_map(graph): + """Reindex a side graph so mapped atoms use ``atom_map`` as node ID. + + The returned graph keeps node IDs contiguous from ``1..N``. + + This is useful because the reaction-center graph produced by + ``ITSConstruction().ITSGraph(...)`` uses atom-map numbers as node IDs, + whereas the side graph produced by ``smiles_to_graph(...)`` may use + RDKit-style atom indices as node IDs. + + Example + ------- + Before reindexing: + + .. code-block:: text + + Node 20: atom_map = 0 + Node 27: atom_map = 20 + + After reindexing: + + .. code-block:: text + + Node 20: atom_map = 20 + Node 27: atom_map = 0 + + or another unmapped atom may be moved into the freed node position. + + :param graph: Molecular side graph. + :type graph: networkx.Graph + :returns: Reindexed side graph with contiguous node IDs. + :rtype: networkx.Graph + :raises ValueError: If atom-map numbers cannot be safely used as node + IDs while preserving ``1..N`` indexing. + """ + mapping = ITSExpand._build_side_graph_reindex_mapping(graph) + return ITSExpand._rebuild_graph_with_mapping(graph, mapping) + @staticmethod def expand_aam_with_its( rsmi: str, relabel: bool = False, use_G: bool = True, + preserve_older_map: bool = False, ) -> str: """Expand a partial reaction SMILES to a full AAM RSMI using ITS reconstruction. - :param rsmi: Reaction SMILES string in the format 'reactant>>product'. + :param rsmi: Reaction SMILES string in the format + ``reactant>>product``. :type rsmi: str - :param use_G: If True, expand using the reactant side; otherwise use the product side. + :param relabel: If True, directly apply ``ITSRelabel().fit(rsmi)``. + This globally renumbers atom maps. + :type relabel: bool + :param use_G: If True, expand using the reactant side. If False, + expand using the product side. :type use_G: bool - :param light_weight: Flag indicating whether to apply a lighter-weight standardization. - :type light_weight: bool - :returns: Fully atom-mapped reaction SMILES after ITS expansion and standardization. + :param preserve_older_map: If True, preserve existing nonzero atom-map + numbers by reindexing the side graph before ITS reconstruction. + This keeps old maps such as ``:20`` attached to the same atom. + This option is incompatible with ``relabel=True``. + :type preserve_older_map: bool + :returns: Fully atom-mapped reaction SMILES after ITS expansion and + standardization. :rtype: str - :raises ValueError: If input RSMI format is invalid or ITS reconstruction fails. + :raises ValueError: If input RSMI format is invalid, if incompatible + options are used, or if side-graph reindexing is unsafe. :example: >>> expander = ITSExpand() - >>> expander.expand_aam_with_its("CC[CH2:3][Cl:1].[N:2]>>CC[CH2:3][N:2].[Cl:1]") + >>> expander.expand_aam_with_its( + ... "CC[CH2:3][Cl:1].[N:2]>>CC[CH2:3][N:2].[Cl:1]", + ... preserve_older_map=True, + ... ) '[CH3:1][CH2:2][CH2:3][Cl:4].[N:5]>>[CH3:1][CH2:2][CH2:3][N:5].[Cl:4]' """ + if relabel and preserve_older_map: + raise ValueError( + "preserve_older_map=True cannot be combined with relabel=True. " + "ITSRelabel globally renumbers atom maps. Use relabel=False " + "with preserve_older_map=True." + ) + if relabel: return ITSRelabel().fit(rsmi) - # Validate and split reaction SMILES - try: - react_smi, prod_smi = rsmi.split(">>") - except ValueError as e: - raise ValueError("Input RSMI must be 'reactant>>product'") from e - # Build graphs for reactants and products + react_smi, prod_smi = ITSExpand._split_rsmi(rsmi) + + # Build graphs for reactants and products. react_graph, prod_graph = rsmi_to_graph(rsmi) - # Construct the ITS reaction center graph + # Construct the ITS reaction-center graph. + # + # Do NOT reindex rc_graph here. + # The reaction-center graph already uses atom-map numbers as node IDs, + # for example nodes 10, 11, 12, and 20. rc_graph = ITSConstruction().ITSGraph(react_graph, prod_graph) - # Choose which side to expand + # Choose which side to expand. smi_side = react_smi if use_G else prod_smi + side_graph = smiles_to_graph( - smi_side, sanitize=True, drop_non_aam=False, use_index_as_atom_map=False + smi_side, + sanitize=True, + drop_non_aam=False, + use_index_as_atom_map=False, ) - # Reconstruct the full ITS graph + # Node IDs remain contiguous from 1..N. + if preserve_older_map: + side_graph = ITSExpand.reindex_side_graph_by_atom_map(side_graph) + + # Reconstruct the full ITS graph. its_graph = ITSBuilder().ITSGraph(side_graph, rc_graph) - # Decompose ITS back into reactant and product graphs + # Decompose ITS back into reactant and product graphs. new_react, new_prod = its_decompose(its_graph) - # Convert graphs back to RSMI and standardize atom mappings - expanded_rsmi = graph_to_rsmi(new_react, new_prod, its_graph, True, False) + # Convert graphs back to RSMI and standardize atom mappings. + expanded_rsmi = graph_to_rsmi( + new_react, + new_prod, + its_graph, + True, + False, + ) + return std.fit(expanded_rsmi, remove_aam=False) diff --git a/synkit/Graph/ITS/its_reverter.py b/synkit/Graph/ITS/its_reverter.py index c69b5be..a8d6842 100644 --- a/synkit/Graph/ITS/its_reverter.py +++ b/synkit/Graph/ITS/its_reverter.py @@ -57,12 +57,15 @@ class ITSReverter: "neighbors", "atom_map", "lone_pairs", + "radical", "valence_electrons", ) #: edge attributes commonly stored in ITS and worth restoring DEFAULT_EDGE_ATTRS = ( "kekule_order", + "sigma_order", + "pi_order", "order", "bond_type", "conjugated", @@ -126,6 +129,10 @@ def _node_exists_on_side(cls, attrs: dict[str, Any], idx: int) -> bool: :returns: Whether the node exists on that side. :rtype: bool """ + present = attrs.get("present") + if isinstance(present, tuple) and len(present) == 2: + return bool(present[idx]) + element = cls._pick_side_value(attrs.get("element"), idx) return element not in (None, "") diff --git a/synkit/Graph/ITS/rc_extractor.py b/synkit/Graph/ITS/rc_extractor.py index 29466aa..98b309f 100644 --- a/synkit/Graph/ITS/rc_extractor.py +++ b/synkit/Graph/ITS/rc_extractor.py @@ -30,6 +30,7 @@ class RCExtractor: - ``hcount`` (after hydrogen-pair normalization) - ``charge`` - ``lone_pairs`` (or alias ``lp``) + - ``radical`` - ``valence_electrons`` Default exported attribute sets @@ -47,12 +48,15 @@ class RCExtractor: - ``hybridization`` - ``atom_map`` - ``lone_pairs`` + - ``radical`` - ``valence_electrons`` - ``partial_charge`` Default edge attributes: - ``kekule_order`` + - ``sigma_order`` + - ``pi_order`` - ``order`` - ``bond_type`` - ``conjugated`` @@ -123,6 +127,7 @@ class RCExtractor: "hcount", "charge", "lone_pairs", + "radical", "valence_electrons", ) LP_ALIASES = ("lone_pairs", "lp") @@ -136,12 +141,15 @@ class RCExtractor: "hybridization", "atom_map", "lone_pairs", + "radical", "valence_electrons", "partial_charge", ) DEFAULT_EDGE_ATTRS = ( "kekule_order", + "sigma_order", + "pi_order", "order", "bond_type", "conjugated", @@ -152,6 +160,7 @@ def __init__( self, node_attrs: Iterable[str] | None = None, edge_attrs: Iterable[str] | None = None, + preserve_full_attrs: bool = False, ) -> None: """ Initialize the reaction-center extractor. @@ -164,9 +173,14 @@ def __init__( ``graph.graph["rc"]["edge_attrs"]``. If ``None``, the class defaults are used. :type edge_attrs: Iterable[str] | None + :param preserve_full_attrs: If ``True``, export complete node and edge + attribute dictionaries in the RC metadata snapshots instead of the + configured attribute subset. + :type preserve_full_attrs: bool """ self._node_attrs = tuple(node_attrs or self.DEFAULT_NODE_ATTRS) self._edge_attrs = tuple(edge_attrs or self.DEFAULT_EDGE_ATTRS) + self.preserve_full_attrs = preserve_full_attrs def __repr__(self) -> str: """ @@ -421,7 +435,11 @@ def _collect_node_attrs( """ collected: dict[int, dict[str, Any]] = {} for node in nodes: - selected = self._select_attrs(graph.nodes[node], self.node_attrs) + selected = ( + dict(graph.nodes[node]) + if self.preserve_full_attrs + else self._select_attrs(graph.nodes[node], self.node_attrs) + ) if selected: collected[node] = selected return collected @@ -445,7 +463,11 @@ def _collect_edge_attrs( collected: dict[tuple[int, int], dict[str, Any]] = {} for u, v in edges: edge_key = self._edge_key(u, v) - selected = self._select_attrs(graph.edges[u, v], self.edge_attrs) + selected = ( + dict(graph.edges[u, v]) + if self.preserve_full_attrs + else self._select_attrs(graph.edges[u, v], self.edge_attrs) + ) if selected: collected[edge_key] = selected return collected diff --git a/synkit/Graph/MTG/group_comp.py b/synkit/Graph/MTG/group_comp.py deleted file mode 100644 index dc83c53..0000000 --- a/synkit/Graph/MTG/group_comp.py +++ /dev/null @@ -1,157 +0,0 @@ -"""groupcomp.py -~~~~~~~~~~~~~~~~ -Orchestration utilities to discover *groupoid‑compatible* merge candidates between two -`networkx` graphs, mirroring the MTG public API style. - -* Single orchestration class – :class:`GroupComp` – instantiated with two graphs. -* Exposes high‑level methods to get **node candidates**, **edge candidates**, and a **mapping**. -* Lean – core node/edge logic lives in `groupoid.py`; this module coordinates and presents a clean API. -""" - -from __future__ import annotations - -from dataclasses import dataclass -from collections import defaultdict -from typing import Any, Dict, List, Iterable, Tuple - -import networkx as nx - -from synkit.Graph.MTG.groupoid import ( - node_constraint, - edge_constraint, -) - -# ============================================================================== -# Type Aliases -# ============================================================================== - -NodeId = int -Node = Tuple[NodeId, Dict[str, Any]] -Edge = Tuple[NodeId, NodeId, Dict[str, Any]] # (u, v, attribute-dict) -MappingList = List[Dict[NodeId, NodeId]] - -# ============================================================================== -# Public orchestration class -# ============================================================================== - - -@dataclass(slots=True) -class GroupComp: - """Compute node/edge merge mappings that respect the *groupoid* rule. - - Parameters - ---------- - G1, G2 : networkx.Graph or networkx.DiGraph - Graphs between which to find compatible merge candidates. - """ - - G1: nx.Graph - G2: nx.Graph - - # ................................................................. - # SINGLE‑NODE MAPPING (FALLBACK) - # ................................................................. - @staticmethod - def get_mapping_from_nodes( - node_mapping: Dict[NodeId, List[NodeId]], - edges1: Iterable[Edge], - edges2: Iterable[Edge], - ) -> MappingList: - """Return *single‑node* mappings ``[{v₁: v₂}, …]`` that obey the - groupoid order rule w.r.t **all** incident edges on each side.""" - # Index incident edges once – O(|E|) - inc1: Dict[NodeId, List[Edge]] = defaultdict(list) - inc2: Dict[NodeId, List[Edge]] = defaultdict(list) - for u, v, a in edges1: - inc1[u].append((u, v, a)) - inc1[v].append((u, v, a)) - for u, v, a in edges2: - inc2[u].append((u, v, a)) - inc2[v].append((u, v, a)) - - res: MappingList = [] - for v1, cand in node_mapping.items(): - E1 = inc1.get(v1, []) - for v2 in cand: - E2 = inc2.get(v2, []) - if not E1 and not E2: # isolate nodes – always compatible - res.append({v1: v2}) - continue - # Forward check: every e1 has partner e2 - fwd_ok = all( - any( - a1.get("order", (None, None))[1] - == a2.get("order", (None, None))[0] - for _, _, a2 in E2 - ) - for _, _, a1 in E1 - ) - if not fwd_ok: - continue - # Reverse check: every e2 has partner e1 - rev_ok = all( - any( - a2.get("order", (None, None))[0] - == a1.get("order", (None, None))[1] - for _, _, a1 in E1 - ) - for _, _, a2 in E2 - ) - if rev_ok: - res.append({v1: v2}) - return res - - def get_mapping( - self, - *, - include_singleton: bool = False, - algorithm: str = "bt", - mcs: bool = False, - ) -> MappingList: - """Return all *groupoid‑legal* node‑mappings between G1 and G2. - - Steps: - 1. :func:`node_constraint` – filter by element/charge. - 2. :func:`edge_constraint` – structural filter (pairwise edges). - 3. Optionally fallback to :func:`get_mapping_from_nodes` for isolated nodes - or if *include_singleton* is *True*. - """ - # 1. node candidates - node_map = node_constraint(self.G1.nodes(data=True), self.G2.nodes(data=True)) - # 2. edge‑based candidates - mappings = edge_constraint( - self.G1.edges(data=True), - self.G2.edges(data=True), - node_map, - algorithm=algorithm, - mcs=mcs, - ) - # 3. fallback single‑node mappings - if include_singleton or not mappings: - singletons = self.get_mapping_from_nodes( - node_map, self.G1.edges(data=True), self.G2.edges(data=True) - ) - mappings.extend(singletons) - return mappings - - def help(self) -> None: - """Print the class docstring and all public methods.""" - print(self.__class__.__doc__) - for name in dir(self): - if not name.startswith("_"): - print(name) - - def __repr__(self) -> str: - """Compact summary: GroupComp(|V|1_V2, |E|1_E2, |M|).""" - try: - v1 = self.G1.number_of_nodes() - v2 = self.G2.number_of_nodes() - e1 = self.G1.number_of_edges() - e2 = self.G2.number_of_edges() - m = len(self.get_mapping()) - except Exception: - v1 = v2 = e1 = e2 = m = 0 # type: ignore - return f"GroupComp(|V|={v1}_{v2}, |E|={e1}_{e2}, |M|={m})" - - -__all__ = ["GroupComp", "NodeId", "Node", "Edge", "MappingList"] diff --git a/synkit/Graph/MTG/groupoid.py b/synkit/Graph/MTG/groupoid.py deleted file mode 100644 index b5d473b..0000000 --- a/synkit/Graph/MTG/groupoid.py +++ /dev/null @@ -1,358 +0,0 @@ -from __future__ import annotations -import networkx as nx -from collections import defaultdict -from typing import Iterable, Mapping, List, Dict, Any, Tuple, Optional, Set, FrozenSet - -# ============================================================================== -# Type Aliases -# ============================================================================== - -NodeId = int -ChargeTuple = Tuple[int | None, int | None] -Node = Tuple[NodeId, Dict[str, Any]] # (id, attribute-dict) -Edge = Tuple[NodeId, NodeId, Dict[str, Any]] # (u, v, attribute-dict) -MappingList = List[Dict[NodeId, NodeId]] - -# ============================================================================== -# Public Groupoid Operations -# ============================================================================== - - -def charge_tuple(attrs: Mapping[str, Any]) -> ChargeTuple: - """Extract the 2-tuple charge signature from node attributes. - - Supports both: - - attrs['charges'] as a tuple of two ints - - attrs['typesGH'] as an iterable of two tuples where the 3rd element - in each is an int charge. - - Returns - ------- - (charge0, charge1) or (None, None) if unavailable - """ - # Case 1: direct 'charges' field - ch = attrs.get("charges") - if isinstance(ch, tuple) and len(ch) == 2: - return ch[0], ch[1] - - # Case 2: 'typesGH' field - tg = attrs.get("typesGH") - if isinstance(tg, (list, tuple)) and len(tg) >= 2: - try: - return tg[0][3], tg[1][3] - except Exception: - pass - - return None, None - - -def node_constraint( - nodes1: Iterable[Node], - nodes2: Iterable[Node], -) -> Dict[NodeId, List[NodeId]]: - """Compute candidate node mappings based on element and groupoid charge - rule. - - For each node v1 in nodes1 and v2 in nodes2, v2 is a candidate if: - 1. v1.attrs['element'] == v2.attrs['element'], and - 2. charge_tuple(v1)[1] == charge_tuple(v2)[0]. - - Returns - ------- - mapping : dict mapping each G1 node_id to a list of G2 node_ids - """ - # Index G2 by (element, first_charge) - idx_g2: Dict[Tuple[Any, Any], List[NodeId]] = defaultdict(list) - for n2_id, attrs2 in nodes2: - elem2 = attrs2.get("element") - first_charge, _ = charge_tuple(attrs2) - if elem2 is not None: - idx_g2[(elem2, first_charge)].append(n2_id) - - # Build mapping for G1 - mapping: Dict[NodeId, List[NodeId]] = {} - for n1_id, attrs1 in nodes1: - elem1 = attrs1.get("element") - _, second_charge = charge_tuple(attrs1) - mapping[n1_id] = idx_g2.get((elem1, second_charge), []) - - return mapping - - -# --------------------------------------------------------------------------- -# Back‑tracking implementation (legacy / fallback) -# --------------------------------------------------------------------------- - - -def _edge_constraint_backtracking( - edges1: Iterable[Edge], - edges2: Iterable[Edge], - node_mapping: Optional[Mapping[NodeId, List[NodeId]]] = None, - *, - mcs: bool = True, -) -> MappingList: - """Explicit set‑packing search. - - Parameters - ---------- - mcs : bool, default ``True`` - If ``True`` return **only** mappings that maximise the number of matched - edges (MCS). If ``False`` return *all* disjoint edge‑set mappings. - """ - # 1. candidate edge pairs ------------------------------------------------ - candidates: List[Tuple[Edge, Edge]] = [] - for u1, v1, a1 in edges1: - o1 = a1.get("order", (None, None)) - if len(o1) < 2: - continue - needed = o1[1] - for u2, v2, a2 in edges2: - o2 = a2.get("order", (None, None)) - if len(o2) < 2 or o2[0] != needed: - continue - if node_mapping and ( - u2 not in node_mapping.get(u1, []) or v2 not in node_mapping.get(v1, []) - ): - continue - candidates.append(((u1, v1, a1), (u2, v2, a2))) - - # 2. DFS to enumerate *all* disjoint edge‑pair sets ---------------------- - pair_sets: List[List[Tuple[Edge, Edge]]] = [] - - def _dfs(chosen: List[Tuple[Edge, Edge]], rem: List[Tuple[Edge, Edge]]): - if not rem: - if chosen: - pair_sets.append(chosen.copy()) - return - first, *rest = rem - (u1, v1, _), (u2, v2, _) = first - # include if disjoint on both graphs - filt = [ - p - for p in rest - if p[0][0] not in (u1, v1) - and p[0][1] not in (u1, v1) - and p[1][0] not in (u2, v2) - and p[1][1] not in (u2, v2) - ] - _dfs(chosen + [first], filt) # include - _dfs(chosen, rest) # exclude - - _dfs([], candidates) - - # 3. select MCS (optional) ---------------------------------------------- - if mcs: - max_sz = max((len(s) for s in pair_sets), default=0) - pair_sets = [s for s in pair_sets if len(s) == max_sz] - - # 4. convert → mapping list & dedupe ------------------------------------ - mappings: MappingList = [] - seen: Set[FrozenSet] = set() - for match_set in pair_sets: - m: Dict[NodeId, NodeId] = {} - for (u1, v1, _), (u2, v2, _) in match_set: - m[u1] = u2 - m[v1] = v2 - key = frozenset(m.items()) - if key not in seen: - seen.add(key) - mappings.append(m) - return mappings - - -# --------------------------------------------------------------------------- -# VF2 -# --------------------------------------------------------------------------- - - -def _edge_constraint_vf2( - edges1: Iterable[Edge], - edges2: Iterable[Edge], - node_mapping: Optional[Mapping[NodeId, List[NodeId]]] = None, -) -> MappingList: - """VF2‐style routine, fully in Python (no NetworkX), seeded like VF3 but - relaxed so it returns the same maximal‐common‐subgraph mappings. - - The returned dicts will always have their keys sorted ascending. - """ - # --- build adjacency lists with valid 'order' tuples --- - adj1: Dict[NodeId, Dict[NodeId, Tuple[int, int]]] = {} - for u, v, data in edges1: - o = data.get("order", ()) - if isinstance(o, tuple) and len(o) >= 2: - adj1.setdefault(u, {})[v] = o - adj1.setdefault(v, {})[u] = (o[1], o[0]) - adj2: Dict[NodeId, Dict[NodeId, Tuple[int, int]]] = {} - for u, v, data in edges2: - o = data.get("order", ()) - if isinstance(o, tuple) and len(o) >= 2: - adj2.setdefault(u, {})[v] = o - adj2.setdefault(v, {})[u] = (o[1], o[0]) - - # --- seed exactly as VF3 does --- - seeds: List[Dict[NodeId, NodeId]] = [] - for u1, v1, d1 in edges1: - o1 = d1.get("order", ()) - if not (isinstance(o1, tuple) and len(o1) >= 2): - continue - need = o1[1] - for u2, v2, d2 in edges2: - o2 = d2.get("order", ()) - if not (isinstance(o2, tuple) and len(o2) >= 2) or o2[0] != need: - continue - if node_mapping and ( - u2 not in node_mapping.get(u1, []) or v2 not in node_mapping.get(v1, []) - ): - continue - seeds.append({u1: u2, v1: v2}) - if not seeds: - return [] - - # --- DFS grouping by using state dict --- - state: Dict[str, Any] = {"best": [], "max_edges": 0} - - def _dfs( - idx: int, - current: Dict[NodeId, NodeId], - mapped1: Set[NodeId], - mapped2: Set[NodeId], - edge_count: int, - ): - # mutate state - if idx == len(seeds): - if edge_count > state["max_edges"]: - state["max_edges"] = edge_count - state["best"] = [current.copy()] - elif edge_count == state["max_edges"]: - state["best"].append(current.copy()) - return - - cand = seeds[idx] - # try including if no node-ID conflict - if not (set(cand.keys()) & mapped1 or set(cand.values()) & mapped2): - _dfs( - idx + 1, - {**current, **cand}, - mapped1 | set(cand.keys()), - mapped2 | set(cand.values()), - edge_count + 1, - ) - # try skipping this seed - _dfs(idx + 1, current, mapped1, mapped2, edge_count) - - # kick off DFS from each seed - for i, seed in enumerate(seeds): - _dfs(i, seed.copy(), set(seed.keys()), set(seed.values()), 1) - - # --- dedupe automorphisms & sort keys --- - uniq: MappingList = [] - seen: Set[FrozenSet] = set() - for m in state["best"]: - key = frozenset(m.items()) - if key in seen: - continue - seen.add(key) - # rebuild with keys in ascending order - sorted_map = {u: m[u] for u in sorted(m.keys())} - uniq.append(sorted_map) - - return uniq - - -# --------------------------------------------------------------------------- -# VF3 – pairwise → grouped matching (hybrid) -# --------------------------------------------------------------------------- - - -def _edge_constraint_vf3( - edges1: Iterable[Edge], - edges2: Iterable[Edge], - node_mapping: Optional[Mapping[NodeId, List[NodeId]]] = None, -) -> MappingList: - """Hybrid strategy: single‑edge matches seeded, then grouped via DFS.""" - # 1. seed list - seeds: List[Dict[NodeId, NodeId]] = [] - for u1, v1, a1 in edges1: - o1 = a1.get("order", (None, None)) - if len(o1) < 2: - continue - need = o1[1] - for u2, v2, a2 in edges2: - o2 = a2.get("order", (None, None)) - if len(o2) < 2 or o2[0] != need: - continue - if node_mapping and ( - u2 not in node_mapping.get(u1, []) or v2 not in node_mapping.get(v1, []) - ): - continue - seeds.append({u1: u2, v1: v2}) - if not seeds: - return [] - - # 2. DFS grouping by using a state dict - state: Dict[str, Any] = {"best": [], "max_edges": 0} - - def _dfs(idx: int, current: Dict[NodeId, NodeId]): - if idx == len(seeds): - edges = len(current) // 2 - if edges == 0: - return - if edges > state["max_edges"]: - state["max_edges"] = edges - state["best"] = [current.copy()] - elif edges == state["max_edges"]: - state["best"].append(current.copy()) - return - - cand = seeds[idx] - # include this seed if no conflicts - if not ( - set(cand.keys()) & current.keys() - or set(cand.values()) & set(current.values()) - ): - _dfs(idx + 1, {**current, **cand}) - # always try skipping - _dfs(idx + 1, current) - - _dfs(0, {}) - - # 3. dedupe - uniq: MappingList = [] - seen: Set[FrozenSet] = set() - for m in state["best"]: - key = frozenset(m.items()) - if key not in seen: - seen.add(key) - uniq.append(m) - return uniq - - -# --------------------------------------------------------------------------- -# Public wrapper -# --------------------------------------------------------------------------- - - -def edge_constraint( - edges1: Iterable[Edge], - edges2: Iterable[Edge], - node_mapping: Optional[Mapping[NodeId, List[NodeId]]] = None, - *, - algorithm: str = "bt", - mcs: bool = False, -) -> MappingList: - """Return node‑mappings under the groupoid order rule. - - Parameters - ---------- - algorithm : {'vf2', 'vf3', 'bt'}, default 'bt' - Which internal strategy to use. - mcs : bool, default True - Only for ``algorithm='bt'`` – if ``True`` keep maximum‑edge mappings, else - return *all* disjoint mappings. - """ - alg = algorithm.lower() - if alg == "vf3": - return _edge_constraint_vf3(edges1, edges2, node_mapping) - if alg == "bt" or alg == "backtracking": - return _edge_constraint_backtracking(edges1, edges2, node_mapping, mcs=mcs) - return _edge_constraint_vf2(edges1, edges2, node_mapping) diff --git a/synkit/Graph/MTG/mtg.py b/synkit/Graph/MTG/mtg.py index 6cc773d..c2212cb 100644 --- a/synkit/Graph/MTG/mtg.py +++ b/synkit/Graph/MTG/mtg.py @@ -34,15 +34,24 @@ compute_standard_order, ) from synkit.Graph.canon_graph import GraphCanonicaliser -from synkit.IO import its_to_rsmi, rsmi_to_its +from synkit.IO import ITSFormat, its_to_rsmi, rsmi_to_its NodeID = int -OrderPair = Tuple[float, float] MissingOrder = Tuple[Set[float], Set[float]] GraphMapping = Dict[NodeID, NodeID] _PLACEHOLDER: MissingOrder = (set(), set()) _PLACEHOLDER_TYPESGH = (set(), set(), set(), set(), set()) +_TUPLE_EDGE_ATTRS = ("order", "kekule_order", "sigma_order", "pi_order") +_TUPLE_NODE_SCALAR_ATTRS = ("element", "atom_map", "valence_electrons") +_TUPLE_NODE_TIMELINE_ATTRS = ( + "aromatic", + "hcount", + "charge", + "radical", + "lone_pairs", + "present", +) __all__ = ["MTG"] @@ -54,6 +63,9 @@ class MTG: :param mappings: Optional list of precomputed mappings; computed via MCS if None. :param node_label_names: Keys for node-label matching. :param canonicaliser: Optional GraphCanonicaliser for snapshot canonicalisation. + :param its_format: ITS format used when ``sequences`` contains RSMI strings. + Defaults to ``"tuple"`` for Lewis State Graph MTGs. Pass + ``"typesGH"`` to build legacy MTGs from strings. :raises ValueError: On invalid sequence or mapping lengths. :raises RuntimeError: On mapping failures. """ @@ -67,6 +79,7 @@ def __init__( canonicaliser: GraphCanonicaliser | None = None, mcs_mol: bool = False, mcs: bool = False, + its_format: ITSFormat = "tuple", ) -> None: if len(sequences) < 2: raise ValueError("Need at least two snapshots.") @@ -75,9 +88,11 @@ def __init__( self._canonicaliser = canonicaliser self.mcs_mol = mcs_mol self.mcs = mcs + self.its_format = its_format self._graphs = self._prepare_graph_sequence(sequences) self._k = len(self._graphs) + self._tuple_its = all(self._is_tuple_its(g) for g in self._graphs) self._mappings = ( mappings if mappings is not None else self._compute_mappings(self._graphs) @@ -116,11 +131,41 @@ def describe() -> str: def get_mtg(self, *, directed: bool = False) -> nx.Graph: return self._graph.to_directed() if directed else self._graph + def get_its_steps(self, *, directed: bool = False) -> List[nx.Graph]: + """Reconstruct the ordered list of per-step ITS graphs from the MTG.""" + if not self._tuple_its: + return [graph.copy() for graph in self._graphs] + graph = self.get_mtg(directed=directed) + return [self._tuple_step_its(graph, step) for step in range(self._k)] + + def get_rsmi_steps( + self, + *, + directed: bool = False, + explicit_hydrogen: bool = False, + sanitize: bool = True, + ) -> List[str]: + """Serialize reconstructed per-step ITS graphs to reaction SMILES.""" + fmt = "tuple" if self._tuple_its else "typesGH" + return [ + its_to_rsmi( + its, + format=fmt, + explicit_hydrogen=explicit_hydrogen, + sanitize=sanitize, + ) + for its in self.get_its_steps(directed=directed) + ] + def get_compose_its(self, *, directed: bool = False) -> nx.Graph: g = self.get_mtg(directed=directed) - g = label_mtg_edges(g, inplace=False) - g = normalize_order(g) - g = normalize_hcount_and_typesGH(g) + if self._tuple_its: + g = self._compose_tuple_node_attrs(g) + g = self._compose_tuple_edge_attrs(g) + else: + g = label_mtg_edges(g, inplace=False) + g = normalize_order(g) + g = normalize_hcount_and_typesGH(g) return compute_standard_order(g) def get_aam(self, *, directed: bool = False, explicit_h: bool = False) -> str: @@ -144,6 +189,10 @@ def _merge_attrs(lhs: MutableMapping[str, Any], rhs: Mapping[str, Any]) -> None: lhs[k] = v def _build_node_map_and_attributes(self) -> None: + if self._tuple_its and self._has_tuple_atom_maps(self._graphs): + self._build_tuple_node_map_and_attributes() + return + prod, node_map = {}, {} last = self._graphs[-1] for nid, attrs in last.nodes(data=True): @@ -174,76 +223,332 @@ def _build_node_map_and_attributes(self) -> None: else: first_idx[p] = gi - for p, attrs in prod.items(): - hist: List[Any] = [] - fi = first_idx[p] - for i in range(self._k): - if i < fi: - hist.append(_PLACEHOLDER_TYPESGH) - elif i == fi: - val = ( - self._graphs[i] - .nodes[ - next( - n - for (gi, n), pp in node_map.items() - if gi == i and pp == p - ) - ] - .get("typesGH", (_PLACEHOLDER_TYPESGH, _PLACEHOLDER_TYPESGH)) - ) - hist.append(val) - else: - originals = [ - n for (gi, n), pp in node_map.items() if gi == i and pp == p - ] - if originals: + if self._tuple_its: + self._simplify_tuple_node_attrs(prod, node_map) + else: + for p, attrs in prod.items(): + hist: List[Any] = [] + fi = first_idx[p] + for i in range(self._k): + if i < fi: + hist.append(_PLACEHOLDER_TYPESGH) + elif i == fi: val = ( self._graphs[i] - .nodes[originals[0]] + .nodes[ + next( + n + for (gi, n), pp in node_map.items() + if gi == i and pp == p + ) + ] .get( "typesGH", (_PLACEHOLDER_TYPESGH, _PLACEHOLDER_TYPESGH) - )[-1] + ) ) hist.append(val) else: - hist.append(_PLACEHOLDER_TYPESGH) - attrs["typesGH_history"] = tuple(hist) - attrs["typesGH"] = attrs["typesGH_history"] + originals = [ + n for (gi, n), pp in node_map.items() if gi == i and pp == p + ] + if originals: + val = ( + self._graphs[i] + .nodes[originals[0]] + .get( + "typesGH", + (_PLACEHOLDER_TYPESGH, _PLACEHOLDER_TYPESGH), + )[-1] + ) + hist.append(val) + else: + hist.append(_PLACEHOLDER_TYPESGH) + attrs["typesGH_history"] = tuple(hist) + attrs["typesGH"] = attrs["typesGH_history"] + + self._prod_nodes = prod + self._node_map = node_map + def _build_tuple_node_map_and_attributes(self) -> None: + prod: Dict[int, Dict[str, Any]] = {} + node_map: Dict[Tuple[int, NodeID], int] = {} + pid_counter = 0 + + for gi, graph in enumerate(self._graphs): + used_in_graph: Set[int] = set() + for nid, attrs in graph.nodes(data=True): + pid = self._tuple_node_pid(attrs) + if pid is None or pid in used_in_graph: + while pid_counter in prod: + pid_counter += 1 + pid = pid_counter + pid_counter += 1 + prod.setdefault(pid, {}) + node_map[(gi, nid)] = pid + used_in_graph.add(pid) + + self._simplify_tuple_node_attrs(prod, node_map) self._prod_nodes = prod self._node_map = node_map def _build_edge_history_and_graph(self) -> None: - hist: Dict[Tuple[int, int], List[MissingOrder]] = {} + hist: Dict[Tuple[int, int], Dict[str, List[MissingOrder]]] = {} for i, G in enumerate(self._graphs): for u, v, a in G.edges(data=True): pu, pv = self._node_map[(i, u)], self._node_map[(i, v)] key = tuple(sorted((pu, pv))) - lst = hist.setdefault(key, [_PLACEHOLDER] * self._k) - lst[i] = a.get("order", _PLACEHOLDER) + attr_hist = hist.setdefault( + key, + {name: [_PLACEHOLDER] * self._k for name in _TUPLE_EDGE_ATTRS}, + ) + for name in _TUPLE_EDGE_ATTRS: + attr_hist[name][i] = a.get(name, _PLACEHOLDER) g = nx.Graph() g.add_nodes_from(self._prod_nodes.items()) - for (u, v), lst in hist.items(): - g.add_edge(u, v, order=tuple(lst)) + for (u, v), attr_hist in hist.items(): + attrs: Dict[str, Any] = {"order": tuple(attr_hist["order"])} + if self._tuple_its: + attrs = {} + for name, values in attr_hist.items(): + attrs[name] = self._edge_pair_history_to_timeline( + tuple(values), + g.nodes[u].get("present"), + g.nodes[v].get("present"), + ) + attrs["steps"] = tuple( + i + for i, value in enumerate(attr_hist["order"]) + if self._is_observed_pair(value) + ) + g.add_edge(u, v, **attrs) if g.number_of_nodes() != len(self._prod_nodes): raise RuntimeError("Node count mismatch.") self._graph = g + def _simplify_tuple_node_attrs( + self, + prod: Dict[int, Dict[str, Any]], + node_map: Dict[Tuple[int, NodeID], int], + ) -> None: + """ + Replace tuple-ITS node attrs with compact MTG attrs. + + A path of ``k`` ITS steps has ``k + 1`` mechanism states: the first + step's left side followed by each step's right side. + """ + refs_by_pid: Dict[int, Dict[int, NodeID]] = {} + for (gi, nid), pid in node_map.items(): + refs_by_pid.setdefault(pid, {})[gi] = nid + + for pid, refs in refs_by_pid.items(): + simplified: Dict[str, Any] = {} + for key in _TUPLE_NODE_SCALAR_ATTRS: + timeline = self._node_attr_timeline(refs, key) + simplified[key] = next( + (value for value in timeline if value is not None), + None, + ) + + for key in _TUPLE_NODE_TIMELINE_ATTRS: + simplified[key] = self._node_attr_timeline(refs, key) + + simplified["steps"] = tuple(sorted(refs)) + prod[pid] = simplified + + def _node_attr_timeline( + self, + refs: Dict[int, NodeID], + key: str, + ) -> Tuple[Any, ...]: + timeline: List[Any] = [None] * (self._k + 1) + for gi in range(self._k): + nid = refs.get(gi) + if nid is None: + continue + value = self._graphs[gi].nodes[nid].get(key) + if self._is_pair(value): + timeline[gi] = value[0] + timeline[gi + 1] = value[1] + return tuple(timeline) + + def _compose_tuple_node_attrs(self, graph: nx.Graph) -> nx.Graph: + """ + Collapse tuple-ITS node histories to the outermost observed states. + + The fused MTG node attrs are initially copied from the last ITS step. + For a composed ITS we instead need the first available left-side value + and the last available right-side value across the whole trajectory. + """ + out = graph.copy() + for _, attrs in out.nodes(data=True): + for key in _TUPLE_NODE_SCALAR_ATTRS: + value = attrs.get(key) + attrs[key] = (value, value) + for key in _TUPLE_NODE_TIMELINE_ATTRS: + timeline = attrs.get(key) + if isinstance(timeline, tuple) and timeline: + attrs[key] = (timeline[0], timeline[-1]) + return out + + def _compose_tuple_edge_attrs(self, graph: nx.Graph) -> nx.Graph: + """Collapse tuple edge timelines to first-state / final-state pairs.""" + out = graph.copy() + for _, _, attrs in out.edges(data=True): + for name in _TUPLE_EDGE_ATTRS: + timeline = attrs.get(name) + if not isinstance(timeline, tuple): + continue + if timeline: + attrs[name] = (timeline[0], timeline[-1]) + return out + + def _tuple_step_its(self, graph: nx.Graph, step: int) -> nx.Graph: + """Extract one paired tuple ITS step from compact tuple-MTG timelines.""" + its = nx.Graph() + for node, attrs in graph.nodes(data=True): + node_attrs: Dict[str, Any] = {} + if step not in attrs.get("steps", ()): + continue + present_pair = self._timeline_pair(attrs.get("present"), step) + if present_pair[0] is None or present_pair[1] is None: + continue + for key in _TUPLE_NODE_SCALAR_ATTRS: + value = attrs.get(key) + node_attrs[key] = (value, value) + for key in _TUPLE_NODE_TIMELINE_ATTRS: + value = self._timeline_pair(attrs.get(key), step) + if value != (None, None): + node_attrs[key] = value + its.add_node(node, **node_attrs) + + for u, v, attrs in graph.edges(data=True): + if step not in attrs.get("steps", ()): + continue + edge_attrs: Dict[str, Any] = {} + has_edge = False + for key in _TUPLE_EDGE_ATTRS: + value = self._timeline_pair(attrs.get(key), step) + if value == (None, None): + continue + edge_attrs[key] = value + if ( + key == "order" + and value[0] is not None + and value[1] is not None + and value != (0, 0) + and value != (0.0, 0.0) + ): + has_edge = True + if has_edge and u in its and v in its: + its.add_edge(u, v, **edge_attrs) + return compute_standard_order(its) + def _prepare_graph_sequence( self, seq: List[nx.Graph] | List[str] ) -> List[nx.Graph]: out: List[nx.Graph] = [] for item in seq: - g = rsmi_to_its(item, core=False) if isinstance(item, str) else item + g = ( + rsmi_to_its(item, core=False, format=self.its_format) + if isinstance(item, str) + else item + ) if self._canonicaliser: g = self._canonicaliser.canonicalise_graph(g).canonical_graph + if self._is_tuple_its(g): + out.append(g) + continue g = h_to_explicit(g, its=True) - # out.append(g) out.append(normalize_hcount_and_typesGH(g)) return out + @staticmethod + def _is_tuple_its(graph: nx.Graph) -> bool: + """ + Detect paired-attribute ITS graphs produced by the newer tuple format. + + Tuple ITS nodes carry side-specific attributes directly, such as + ``element=("C", "C")`` and ``lone_pairs=(0, 0)``. Legacy ITS graphs + instead keep the paired state primarily in ``typesGH``. + """ + if graph.number_of_nodes() == 0: + return False + _, attrs = next(iter(graph.nodes(data=True))) + element = attrs.get("element") + return isinstance(element, tuple) and len(element) == 2 + + @staticmethod + def _is_pair(value: Any) -> bool: + return isinstance(value, tuple) and len(value) == 2 + + @classmethod + def _is_observed_pair(cls, value: Any) -> bool: + return cls._is_pair(value) and not ( + isinstance(value[0], set) and isinstance(value[1], set) + ) + + @staticmethod + def _timeline_pair(timeline: Any, step: int) -> Tuple[Any, Any]: + if not isinstance(timeline, tuple) or len(timeline) <= step + 1: + return (None, None) + return (timeline[step], timeline[step + 1]) + + @classmethod + def _edge_pair_history_to_timeline( + cls, + history: Tuple[Any, ...], + u_present: Any, + v_present: Any, + ) -> Tuple[Any, ...]: + """ + Convert ITS step-pair history into mechanism-state timeline. + + Example: ``((2, 1), (1, 2))`` becomes ``(2, 1, 2)``. + Missing edge states are ``0`` when both endpoint atoms exist and + ``None`` when an endpoint is absent. + """ + if not history: + return () + + timeline: List[Any] = [None] * (len(history) + 1) + for idx, value in enumerate(history): + if cls._is_pair(value) and not ( + isinstance(value[0], set) and isinstance(value[1], set) + ): + timeline[idx] = value[0] + timeline[idx + 1] = value[1] + return tuple( + cls._fill_missing_edge_state(value, idx, u_present, v_present) + for idx, value in enumerate(timeline) + ) + + @staticmethod + def _fill_missing_edge_state( + value: Any, + idx: int, + u_present: Any, + v_present: Any, + ) -> Any: + if value is not None: + return value + if ( + isinstance(u_present, tuple) + and isinstance(v_present, tuple) + and len(u_present) > idx + and len(v_present) > idx + and u_present[idx] is True + and v_present[idx] is True + ): + return 0.0 + return None + def _compute_mappings(self, graphs: List[nx.Graph]) -> List[GraphMapping]: + if self._tuple_its: + return [ + self._compute_tuple_mapping(graphs[i], graphs[i + 1]) + for i in range(len(graphs) - 1) + ] + mappings: List[GraphMapping] = [] for i in range(len(graphs) - 1): m = MCSMatcher(node_label_names=self._node_label_names) @@ -255,6 +560,48 @@ def _compute_mappings(self, graphs: List[nx.Graph]) -> List[GraphMapping]: mappings.append(m._mappings[0]) return mappings + @classmethod + def _compute_tuple_mapping(cls, left: nx.Graph, right: nx.Graph) -> GraphMapping: + left_by_map = cls._nodes_by_atom_map(left) + right_by_map = cls._nodes_by_atom_map(right) + common_maps = sorted(set(left_by_map) & set(right_by_map)) + mapping = {left_by_map[amap]: right_by_map[amap] for amap in common_maps} + + if mapping: + return mapping + + common_nodes = sorted(set(left.nodes()) & set(right.nodes())) + return {node: node for node in common_nodes} + + @classmethod + def _has_tuple_atom_maps(cls, graphs: List[nx.Graph]) -> bool: + return any( + cls._tuple_node_pid(attrs) is not None + for graph in graphs + for _, attrs in graph.nodes(data=True) + ) + + @staticmethod + def _tuple_node_pid(attrs: Mapping[str, Any]) -> int | None: + atom_map = attrs.get("atom_map") + if isinstance(atom_map, tuple) and len(atom_map) == 2: + atom_map = atom_map[1] if atom_map[1] not in (None, 0, "") else atom_map[0] + if atom_map in (None, 0, ""): + return None + return int(atom_map) + + @staticmethod + def _nodes_by_atom_map(graph: nx.Graph) -> Dict[int, NodeID]: + by_map: Dict[int, NodeID] = {} + for node, attrs in graph.nodes(data=True): + atom_map = MTG._tuple_node_pid(attrs) + if atom_map is None: + continue + if atom_map in by_map: + continue + by_map[atom_map] = node + return by_map + @property def node_mapping(self) -> Dict[Tuple[int, NodeID], int]: return dict(self._node_map) diff --git a/synkit/Graph/Matcher/graph_matcher.py b/synkit/Graph/Matcher/graph_matcher.py index 2fece56..8ee767a 100644 --- a/synkit/Graph/Matcher/graph_matcher.py +++ b/synkit/Graph/Matcher/graph_matcher.py @@ -77,9 +77,10 @@ class GraphMatcherEngine: :class:`~networkx.algorithms.isomorphism.GraphMatcher`. * ``"rule"`` – optional, requires the third‑party *mod* package. node_attrs, edge_attrs: - Lists of attribute keys that must match exactly between candidate - nodes/edges. ``hcount`` is treated specially – the host must be **≥** - the pattern (to allow aggregated counts). + Lists of attribute keys used for matching. ``hcount`` and + ``lone_pairs`` are treated specially: the host must be **≥** the + pattern. Other requested attributes, including ``radical``, match + exactly. wl1_filter: If *True*, a fast WL‑based colour refinement pre‑filter discards host graphs that cannot possibly contain the pattern. @@ -162,11 +163,15 @@ def nm(nh, np): # noqa: ANN001 – external signature return nm def nm(nh, np, _attrs=attrs): # noqa: ANN001 – external signature - # Strict equality for selected attributes … for k in _attrs: - if nh.get(k) != np.get(k): + host_value = nh.get(k, 0 if k in {"hcount", "lone_pairs"} else None) + pattern_value = np.get(k, 0 if k in {"hcount", "lone_pairs"} else None) + if k in {"hcount", "lone_pairs"}: + if host_value < pattern_value: + return False + continue + if host_value != pattern_value: return False - # … plus host‑≥‑pattern for "hcount" if present. return nh.get("hcount", 0) >= np.get("hcount", 0) return nm @@ -230,17 +235,19 @@ def _isomorphic_nx( if not isinstance(g1, nx.Graph) or not isinstance(g2, nx.Graph): raise TypeError("NX backend expects `networkx.Graph` objects.") - # Put the *smaller* graph first – helps GraphMatcher. - if g1.number_of_nodes() > g2.number_of_nodes(): - g1, g2 = g2, g1 # type: ignore[misc] + # Treat the larger graph as host so comparator semantics remain + # host-first for subgraph checks. + host, pattern = (g1, g2) + if g1.number_of_nodes() < g2.number_of_nodes(): + host, pattern = g2, g1 - if not self._pre_check(g2, g1): # g2 is the (larger) host + if not self._pre_check(host, pattern): return False - gm = _NXGraphMatcher(g1, g2, node_match=self._nm, edge_match=self._em) + gm = _NXGraphMatcher(host, pattern, node_match=self._nm, edge_match=self._em) return ( gm.is_isomorphic() - if g1.number_of_nodes() == g2.number_of_nodes() + if host.number_of_nodes() == pattern.number_of_nodes() else gm.subgraph_is_isomorphic() ) @@ -253,7 +260,7 @@ def _get_mappings_nx( if not self._pre_check(host, pattern): return [] - gm = _NXGraphMatcher(pattern, host, node_match=self._nm, edge_match=self._em) + gm = _NXGraphMatcher(host, pattern, node_match=self._nm, edge_match=self._em) # Full blow isomorphism (same #nodes / #edges)? Then a single call tells # us everything and is much faster than iterating via *isomorphisms_iter*. @@ -261,7 +268,16 @@ def _get_mappings_nx( pattern.number_of_nodes() == host.number_of_nodes() and pattern.number_of_edges() == host.number_of_edges() ): - return [gm.mapping] if gm.is_isomorphic() else [] + return ( + [ + { + pattern_node: host_node + for host_node, pattern_node in gm.mapping.items() + } + ] + if gm.is_isomorphic() + else [] + ) # Sub‑isomorphisms. iso_iter = gm.subgraph_isomorphisms_iter() @@ -271,7 +287,10 @@ def _get_mappings_nx( ) # local import – cheap and avoids polluting global namespace iso_iter = islice(iso_iter, self.max_mappings) - return list(iso_iter) + return [ + {pattern_node: host_node for host_node, pattern_node in mapping.items()} + for mapping in iso_iter + ] # ―――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――――—— # Rule (GML) backend – thin wrappers around ``mod.ruleGMLString`` diff --git a/synkit/Graph/Matcher/subgraph_matcher.py b/synkit/Graph/Matcher/subgraph_matcher.py index 8b78c00..8de9505 100644 --- a/synkit/Graph/Matcher/subgraph_matcher.py +++ b/synkit/Graph/Matcher/subgraph_matcher.py @@ -105,6 +105,126 @@ ] +def electron_aware_node_match( + host_data: EdgeAttr, + pattern_data: EdgeAttr, + node_attrs: Sequence[str], +) -> bool: + """Compare node attributes with chemistry-aware cardinality semantics. + + Attributes in ``node_attrs`` are exact matches except: + + - ``hcount``: host must be greater than or equal to pattern + - ``lone_pairs``: host must be greater than or equal to pattern + - ``aromatic_n_pi_count``: exact aromatic-N role label when present + ``radical`` therefore remains exact whenever the caller includes it in + ``node_attrs``. + """ + for attr in node_attrs: + host_value = host_data.get( + attr, 0 if attr in {"hcount", "lone_pairs"} else None + ) + pattern_value = pattern_data.get( + attr, 0 if attr in {"hcount", "lone_pairs"} else None + ) + if attr in {"hcount", "lone_pairs"}: + if host_value < pattern_value: + return False + continue + if host_value != pattern_value: + return False + return True + + +def electron_aware_edge_match( + host_data: EdgeAttr, + pattern_data: EdgeAttr, + edge_attrs: Sequence[str], +) -> bool: + """Compare edge attrs while treating aromatic Kekule phase as non-semantic. + + Aromatic presentation bonds are matched by ``order == 1.5``. Their + particular ``sigma_order`` / ``pi_order`` split depends on the chosen + Kekule form and is not stable across independently parsed graphs. + """ + host_is_aromatic = host_data.get("order") == 1.5 + pattern_is_aromatic = pattern_data.get("order") == 1.5 + for attr in edge_attrs: + if ( + attr in {"sigma_order", "pi_order"} + and host_is_aromatic + and pattern_is_aromatic + ): + continue + if host_data.get(attr) != pattern_data.get(attr): + return False + return True + + +def explain_node_mismatch( + host_data: EdgeAttr, + pattern_data: EdgeAttr, + node_attrs: Sequence[str], +) -> list[str]: + """Return node-level mismatch reasons using matcher semantics.""" + reasons: list[str] = [] + for attr in node_attrs: + host_value = host_data.get( + attr, 0 if attr in {"hcount", "lone_pairs"} else None + ) + pattern_value = pattern_data.get( + attr, 0 if attr in {"hcount", "lone_pairs"} else None + ) + if attr in {"hcount", "lone_pairs"}: + if host_value < pattern_value: + reasons.append(f"{attr}: host {host_value} < pattern {pattern_value}") + continue + if host_value != pattern_value: + reasons.append(f"{attr}: host {host_value!r} != pattern {pattern_value!r}") + return reasons + + +def resolve_template_match_attrs( + pattern: nx.Graph, + *, + legacy_node_attrs: Sequence[str] = ("element", "charge"), + legacy_edge_attrs: Sequence[str] = ("order",), +) -> tuple[list[str], list[str]]: + """Choose match attrs from what the template actually carries. + + Legacy templates keep the legacy attribute set. Electron-aware templates opt + into extra constraints only when those attrs are present on the template. + """ + node_attrs = list(legacy_node_attrs) + edge_attrs = list(legacy_edge_attrs) + + for attr in ( + "aromatic", + "hcount", + "lone_pairs", + "radical", + "aromatic_n_pi_count", + ): + if any(attr in data for _, data in pattern.nodes(data=True)): + node_attrs.append(attr) + + for attr in ("sigma_order", "pi_order"): + if any(attr in data for _, _, data in pattern.edges(data=True)): + edge_attrs.append(attr) + + return node_attrs, edge_attrs + + +def diagnose_candidate_node_match( + host_data: EdgeAttr, + pattern_data: EdgeAttr, + node_attrs: Sequence[str], +) -> dict[str, Any]: + """Return a compact node-match diagnostic payload.""" + reasons = explain_node_mismatch(host_data, pattern_data, node_attrs) + return {"matched": not reasons, "reasons": reasons} + + # --------------------------------------------------------------------------- # Core engine class # --------------------------------------------------------------------------- @@ -315,8 +435,7 @@ def _quick_pre_filter( count = sum( 1 for _, host_data in host.nodes(data=True) - if all(host_data.get(a) == pat_data.get(a) for a in node_attrs) - and host_data.get("hcount", 0) >= pat_data.get("hcount", 0) + if electron_aware_node_match(host_data, pat_data, node_attrs) and host.degree(_) >= pat_deg ) # if no candidates; impossible match @@ -347,7 +466,8 @@ def find_subgraph_mappings( host, pattern NetworkX graphs (host ≥ pattern). node_attrs, edge_attrs - Keys of attributes to match exactly (plus `hcount` ≥). + Keys of attributes to match; ``hcount`` and ``lone_pairs`` use + host-greater-or-equal semantics, while the rest are exact. strategy Matching strategy code or enum ("all", "comp", "bt"). max_results @@ -426,12 +546,10 @@ def _find_all_subgraph_mappings( """Classic VF2 over the whole host graph.""" def node_match(nh: EdgeAttr, np: EdgeAttr) -> bool: - return all(nh.get(k) == np.get(k) for k in node_attrs) and nh.get( - "hcount", 0 - ) >= np.get("hcount", 0) + return electron_aware_node_match(nh, np, node_attrs) def edge_match(eh: EdgeAttr, ep: EdgeAttr) -> bool: - return all(eh.get(k) == ep.get(k) for k in edge_attrs) + return electron_aware_edge_match(eh, ep, edge_attrs) gm = GraphMatcher(host, pattern, node_match=node_match, edge_match=edge_match) results: List[MappingDict] = [] @@ -467,12 +585,10 @@ def _find_component_aware_subgraph_mappings( return [] def node_match(nh: EdgeAttr, np: EdgeAttr) -> bool: - if any(nh.get(a) != np.get(a) for a in node_attrs): - return False - return nh.get("hcount", 0) >= np.get("hcount", 0) + return electron_aware_node_match(nh, np, node_attrs) def edge_match(eh: EdgeAttr, ep: EdgeAttr) -> bool: - return all(eh.get(a) == ep.get(a) for a in edge_attrs) + return electron_aware_edge_match(eh, ep, edge_attrs) per_cc: List[List[Tuple[int, MappingDict]]] = [] for pc in pat_ccs: diff --git a/synkit/Graph/Mech/__init__.py b/synkit/Graph/Mech/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/synkit/Graph/Mech/conversion.py b/synkit/Graph/Mech/conversion.py new file mode 100644 index 0000000..3c1d846 --- /dev/null +++ b/synkit/Graph/Mech/conversion.py @@ -0,0 +1,1305 @@ +from __future__ import annotations + +import re +from collections import Counter +from typing import Any, Optional + +# ============================================================ +# Optional SynKit imports +# ============================================================ + +try: + from synkit.Chem.Reaction.canon_rsmi import CanonRSMI + from synkit.Graph.ITS.its_expand import ITSExpand + from synkit.IO import rsmi_to_its +except ImportError: + CanonRSMI = None + rsmi_to_its = None + ITSExpand = None + + +# ============================================================ +# Regex helpers +# ============================================================ + +BRACKET_ATOM_RE = re.compile(r"\[[^\[\]]+\]") +ATOM_MAP_RE = re.compile(r":(\d+)(?=\])") + + +# ============================================================ +# Arrow-code parsing +# ============================================================ + + +def parse_atom_list(text: str) -> list[int]: + """Parse a comma-separated atom-map list. + + :param text: Atom-map text such as ``"10"`` or ``"10,11"``. + :type text: str + :returns: Parsed atom-map numbers. + :rtype: list[int] + """ + return [int(x.strip()) for x in text.split(",") if x.strip()] + + +def parse_arrow_step(step: str) -> tuple[list[int], list[int]]: + """ + Convert one arrow-code step. + + For example, ``"10=20"`` becomes ``([10], [20])`` and + ``"12=11,12"`` becomes ``([12], [11, 12])``. + + :param step: One arrow-code step containing a left and right side. + :type step: str + :returns: Parsed left-side and right-side atom-map lists. + :rtype: tuple[list[int], list[int]] + :raises ValueError: If ``step`` does not contain ``"="``. + """ + if "=" not in step: + raise ValueError(f"Invalid arrow step without '=': {step}") + + lhs, rhs = step.split("=", 1) + return parse_atom_list(lhs), parse_atom_list(rhs) + + +def split_arrow_code(arrow_code: str) -> list[str]: + """Split an arrow code into non-empty steps. + + :param arrow_code: Semicolon-separated arrow code. + :type arrow_code: str + :returns: Individual stripped arrow-code steps. + :rtype: list[str] + """ + return [s.strip() for s in arrow_code.split(";") if s.strip()] + + +def arrow_atom_maps(arrow_code: str) -> set[int]: + """Return all atom maps used in an arrow code. + + :param arrow_code: Semicolon-separated arrow code. + :type arrow_code: str + :returns: Atom-map numbers referenced by the code. + :rtype: set[int] + """ + maps: set[int] = set() + + for step in split_arrow_code(arrow_code): + lhs, rhs = parse_arrow_step(step) + maps.update(lhs) + maps.update(rhs) + + return maps + + +def classify_arrow_shape(step: str) -> str: + """Classify one arrow-code step. + + :param step: One arrow-code step. + :type step: str + :returns: Shape label such as ``"a=b"`` or ``"a,b=c,d"``. + :rtype: str + """ + lhs, rhs = parse_arrow_step(step) + + if len(lhs) == 1 and len(rhs) == 1: + return "a=b" + + if len(lhs) == 1 and len(rhs) == 2: + return "a=b,c" + + if len(lhs) == 2 and len(rhs) == 1: + return "a,b=c" + + if len(lhs) == 2 and len(rhs) == 2: + return "a,b=c,d" + + return f"unsupported:{len(lhs)},{len(rhs)}" + + +def check_arrow_code_coverage(arrow_codes: list[str]) -> dict[str, Any]: + """Check which arrow-code shapes appear in a dataset. + + :param arrow_codes: Arrow codes to inspect. + :type arrow_codes: list[str] + :returns: Shape counts, unsupported steps, and an all-supported flag. + :rtype: dict[str, Any] + """ + shape_counter = Counter() + unsupported = [] + + for row_idx, arrow_code in enumerate(arrow_codes, start=1): + for step in split_arrow_code(arrow_code): + shape = classify_arrow_shape(step) + shape_counter[shape] += 1 + + if shape.startswith("unsupported"): + unsupported.append( + { + "row_index": row_idx, + "arrow_code": arrow_code, + "step": step, + "shape": shape, + } + ) + + return { + "shape_counts": dict(shape_counter), + "unsupported": unsupported, + "all_supported": len(unsupported) == 0, + } + + +# ============================================================ +# Atom-map preprocessing +# ============================================================ + + +def extract_atom_maps_from_smiles(smiles: str) -> list[int]: + """ + Extract atom-map numbers from bracket atoms. + + For example, ``"[CH:10]"`` yields ``10`` and ``"[N+:61]"`` yields ``61``. + + :param smiles: SMILES or SMIRKS fragment. + :type smiles: str + :returns: Atom-map numbers found in bracket atoms. + :rtype: list[int] + """ + return [int(x) for x in ATOM_MAP_RE.findall(smiles)] + + +def duplicate_atom_maps_in_side(smiles: str) -> dict[int, int]: + """Find duplicated atom maps in one side of a reaction. + + :param smiles: Reactant-side or product-side SMILES text. + :type smiles: str + :returns: Mapping of duplicated atom-map numbers to occurrence counts. + :rtype: dict[int, int] + """ + counts = Counter(extract_atom_maps_from_smiles(smiles)) + return {atom_map: count for atom_map, count in counts.items() if count > 1} + + +def validate_arrow_maps( + rsmi: str, + arrow_code: str, + raise_on_arrow_duplicates: bool = True, + raise_on_missing_arrow_maps: bool = True, +) -> dict[str, Any]: + """ + Validate atom maps before SynKit. + + Rules + ----- + 1. Duplicated atom maps used by arrow_code are fatal. + 2. Missing atom maps used by arrow_code are fatal. + 3. Duplicated non-arrow atom maps are warnings only, because they + can be removed before SynKit expansion. + + :param rsmi: Reaction SMILES in ``reactants>>products`` format. + :type rsmi: str + :param arrow_code: Arrow code whose atom maps must be validated. + :type arrow_code: str + :param raise_on_arrow_duplicates: Whether duplicated arrow atom maps are fatal. + :type raise_on_arrow_duplicates: bool + :param raise_on_missing_arrow_maps: Whether missing arrow atom maps are fatal. + :type raise_on_missing_arrow_maps: bool + :returns: Validation diagnostics. + :rtype: dict[str, Any] + :raises ValueError: If the RSMI is malformed or enabled validation fails. + """ + if ">>" not in rsmi: + raise ValueError("RSMI must contain '>>'") + + reactants, products = rsmi.split(">>", 1) + + arrow_maps = arrow_atom_maps(arrow_code) + + reactant_maps = extract_atom_maps_from_smiles(reactants) + product_maps = extract_atom_maps_from_smiles(products) + + all_raw_maps = set(reactant_maps) | set(product_maps) + + missing_arrow_maps = sorted(m for m in arrow_maps if m not in all_raw_maps) + + r_dupes = duplicate_atom_maps_in_side(reactants) + p_dupes = duplicate_atom_maps_in_side(products) + + arrow_dupes = { + "reactants": {m: c for m, c in r_dupes.items() if m in arrow_maps}, + "products": {m: c for m, c in p_dupes.items() if m in arrow_maps}, + } + + non_arrow_dupes = { + "reactants": {m: c for m, c in r_dupes.items() if m not in arrow_maps}, + "products": {m: c for m, c in p_dupes.items() if m not in arrow_maps}, + } + + diagnostics = { + "arrow_maps": sorted(arrow_maps), + "missing_arrow_maps": missing_arrow_maps, + "arrow_duplicate_maps": arrow_dupes, + "non_arrow_duplicate_maps": non_arrow_dupes, + "has_missing_arrow_maps": bool(missing_arrow_maps), + "has_arrow_duplicate_maps": bool( + arrow_dupes["reactants"] or arrow_dupes["products"] + ), + "has_non_arrow_duplicate_maps": bool( + non_arrow_dupes["reactants"] or non_arrow_dupes["products"] + ), + } + + if raise_on_missing_arrow_maps and diagnostics["has_missing_arrow_maps"]: + raise ValueError( + "Some atom maps used in arrow_code are missing from the reaction SMILES. " + f"Diagnostics: {diagnostics}" + ) + + if raise_on_arrow_duplicates and diagnostics["has_arrow_duplicate_maps"]: + raise ValueError( + "Some atom maps used in arrow_code are duplicated in the reaction SMILES. " + f"Diagnostics: {diagnostics}" + ) + + return diagnostics + + +def remove_non_arrow_atom_maps(rsmi: str, arrow_code: str) -> str: + """ + Keep only atom maps involved in arrow_code. + Remove every other atom map. + + This is important because some source SMIRKS have duplicated + non-arrow atom maps, e.g. + + [N+:61]2=[CH:61] + + If 61 is not used by arrow_code, we remove it and let SynKit + CanonRSMI().expand_aam(...) generate clean full maps. + + :param rsmi: Reaction SMILES to clean. + :type rsmi: str + :param arrow_code: Arrow code whose atom maps should be preserved. + :type arrow_code: str + :returns: Reaction SMILES with non-arrow atom maps removed. + :rtype: str + """ + keep_maps = arrow_atom_maps(arrow_code) + + def clean_bracket_atom(match: re.Match) -> str: + token = match.group(0) + + map_match = ATOM_MAP_RE.search(token) + if map_match is None: + return token + + atom_map = int(map_match.group(1)) + + if atom_map in keep_maps: + return token + + return ATOM_MAP_RE.sub("", token) + + return BRACKET_ATOM_RE.sub(clean_bracket_atom, rsmi) + + +# ============================================================ +# Generic LP/B conversion +# ============================================================ + + +def generic_convert_step(step: str) -> list[Any]: + """ + Generic graph-independent conversion. + + Supported grammar + ----------------- + a=b + LP(a) forms bond a-b + -> ["LP-/B+", [a], [a, b]] + + a=b,c + LP(a) forms/increases bond b-c + -> ["LP-/B+", [a], [b, c]] + + a,b=c + bond a-b breaks; electrons end as LP on c + -> ["B-/LP+", [a, b], [c]] + + a,b=c,d + bond a-b becomes bond c-d + -> ["B-/B+", [a, b], [c, d]] + + :param step: One arrow-code step. + :type step: str + :returns: Generic LP/B conversion record. + :rtype: list[Any] + :raises ValueError: If the step shape is unsupported. + """ + lhs, rhs = parse_arrow_step(step) + + # a=b + if len(lhs) == 1 and len(rhs) == 1: + a = lhs[0] + b = rhs[0] + return ["LP-/B+", [a], [a, b]] + + # a=b,c + if len(lhs) == 1 and len(rhs) == 2: + return ["LP-/B+", lhs, rhs] + + # a,b=c + if len(lhs) == 2 and len(rhs) == 1: + return ["B-/LP+", lhs, rhs] + + # a,b=c,d + if len(lhs) == 2 and len(rhs) == 2: + return ["B-/B+", lhs, rhs] + + raise ValueError(f"Unsupported arrow step: {step}") + + +def generic_convert_arrow_code(arrow_code: str) -> list[list[Any]]: + """Convert every step in an arrow code to generic LP/B form. + + :param arrow_code: Semicolon-separated arrow code. + :type arrow_code: str + :returns: Generic conversion records for each step. + :rtype: list[list[Any]] + """ + return [generic_convert_step(step) for step in split_arrow_code(arrow_code)] + + +# ============================================================ +# SynKit ITS construction +# ============================================================ + + +def build_its_from_rsmi( + rsmi: str, + arrow_code: str, + expand_aam: bool = True, + remove_non_arrow_maps: bool = True, +): + """ + Build SynKit ITS graph from reaction SMILES. + + Pipeline + -------- + raw SMIRKS + -> validate arrow atom maps + -> remove non-arrow atom maps + -> CanonRSMI().expand_aam(...) + -> rsmi_to_its(...) + + :param rsmi: Reaction SMILES in ``reactants>>products`` format. + :type rsmi: str + :param arrow_code: Arrow code used to preserve relevant atom maps. + :type arrow_code: str + :param expand_aam: Whether to expand atom mapping before ITS construction. + :type expand_aam: bool + :param remove_non_arrow_maps: Whether to remove atom maps not used by the arrow code. + :type remove_non_arrow_maps: bool + :returns: ITS graph, expanded RSMI, cleaned RSMI, and validation diagnostics. + :rtype: tuple + :raises ImportError: If required SynKit conversion helpers are unavailable. + """ + if CanonRSMI is None or rsmi_to_its is None: + raise ImportError( + "SynKit is not available. Run this code inside your SynKit environment." + ) + + diagnostics = validate_arrow_maps( + rsmi=rsmi, + arrow_code=arrow_code, + raise_on_arrow_duplicates=True, + raise_on_missing_arrow_maps=True, + ) + + if remove_non_arrow_maps: + rsmi_for_its = remove_non_arrow_atom_maps(rsmi, arrow_code) + else: + rsmi_for_its = rsmi + + expanded_rsmi = ( + ITSExpand().expand_aam_with_its( + rsmi_for_its, relabel=False, preserve_older_map=True + ) + if expand_aam + else rsmi_for_its + ) + + its = rsmi_to_its(expanded_rsmi) + + return its, expanded_rsmi, rsmi_for_its, diagnostics + + +# ============================================================ +# ITS graph helpers +# ============================================================ + + +def atom_map_to_nodes(its) -> dict[int, list[Any]]: + """ + Build atom-map-number -> list of ITS node ids. + + This catches ambiguous duplicated atom maps after ITS construction. + + :param its: ITS graph. + :type its: networkx.Graph + :returns: Mapping from atom-map number to ITS node IDs. + :rtype: dict[int, list[Any]] + """ + mapping: dict[int, list[Any]] = {} + + for node, data in its.nodes(data=True): + atom_map = int(data.get("atom_map", node)) + mapping.setdefault(atom_map, []).append(node) + + return mapping + + +def get_unique_node_for_atom_map( + its, + atom_map: int, + strict: bool = True, + atom_map_nodes: Optional[dict[int, list[Any]]] = None, +) -> Optional[Any]: + """Get the unique ITS node corresponding to an atom map. + + :param its: ITS graph. + :type its: networkx.Graph + :param atom_map: Atom-map number to resolve. + :type atom_map: int + :param strict: Whether a missing atom map should raise. + :type strict: bool + :param atom_map_nodes: Optional precomputed atom-map to node index. + :type atom_map_nodes: Optional[dict[int, list[Any]]] + :returns: Unique ITS node ID, or ``None`` when missing and ``strict`` is false. + :rtype: Optional[Any] + :raises ValueError: If the atom map is missing in strict mode or is ambiguous. + """ + mapping = atom_map_nodes if atom_map_nodes is not None else atom_map_to_nodes(its) + nodes = mapping.get(int(atom_map), []) + + if len(nodes) == 0: + if strict: + raise ValueError(f"Atom map {atom_map} is missing from ITS graph.") + return None + + if len(nodes) == 1: + return nodes[0] + + raise ValueError( + f"Atom map {atom_map} maps to multiple ITS nodes: {nodes}. " + "This means atom mapping is ambiguous." + ) + + +def extract_order_from_edge_data(edge_data: Any) -> tuple[float, float]: + """ + Extract SynKit ITS edge order. + + Expected normal edge format: + {"order": (reactant_order, product_order)} + + MultiGraph-like fallback: + {0: {"order": (reactant_order, product_order)}} + + :param edge_data: ITS edge attributes. + :type edge_data: Any + :returns: Reactant-side and product-side bond orders. + :rtype: tuple[float, float] + """ + if edge_data is None: + return 0.0, 0.0 + + if isinstance(edge_data, dict) and "order" in edge_data: + order = edge_data["order"] + return float(order[0]), float(order[1]) + + if isinstance(edge_data, dict): + for value in edge_data.values(): + if isinstance(value, dict) and "order" in value: + order = value["order"] + return float(order[0]), float(order[1]) + + return 0.0, 0.0 + + +def get_its_bond_order( + its, + atom_a: int, + atom_b: int, + strict: bool = True, + context: str = "", + atom_map_nodes: Optional[dict[int, list[Any]]] = None, +) -> tuple[float, float]: + """ + Return ITS bond order for atom-map pair. + + For example, an edge with order ``(0.0, 1.0)`` represents new bond + formation from reactants to products. + + :param its: ITS graph. + :type its: networkx.Graph + :param atom_a: First atom-map number. + :type atom_a: int + :param atom_b: Second atom-map number. + :type atom_b: int + :param strict: Whether missing nodes or edges should raise. + :type strict: bool + :param context: Optional context appended to strict-mode edge errors. + :type context: str + :param atom_map_nodes: Optional precomputed atom-map to node index. + :type atom_map_nodes: Optional[dict[int, list[Any]]] + :returns: Reactant-side and product-side bond orders. + :rtype: tuple[float, float] + :raises ValueError: If strict lookup fails. + """ + node_a = get_unique_node_for_atom_map( + its, + atom_a, + strict=strict, + atom_map_nodes=atom_map_nodes, + ) + node_b = get_unique_node_for_atom_map( + its, + atom_b, + strict=strict, + atom_map_nodes=atom_map_nodes, + ) + + if node_a is None or node_b is None: + return 0.0, 0.0 + + if not its.has_edge(node_a, node_b): + if strict: + extra = f" Context: {context}" if context else "" + raise ValueError( + f"ITS graph has no edge for atom maps {atom_a}-{atom_b}." f"{extra}" + ) + return 0.0, 0.0 + + edge_data = its.get_edge_data(node_a, node_b) + return extract_order_from_edge_data(edge_data) + + +# ============================================================ +# Sigma/Pi typing +# ============================================================ + + +def is_zero(x: float, tol: float = 1e-6) -> bool: + """Return whether a value is approximately zero. + + :param x: Value to compare. + :type x: float + :param tol: Absolute tolerance. + :type tol: float + :returns: Whether ``x`` is within tolerance of zero. + :rtype: bool + """ + return abs(x) < tol + + +def is_one(x: float, tol: float = 1e-6) -> bool: + """Return whether a value is approximately one. + + :param x: Value to compare. + :type x: float + :param tol: Absolute tolerance. + :type tol: float + :returns: Whether ``x`` is within tolerance of one. + :rtype: bool + """ + return abs(x - 1.0) < tol + + +def bond_minus_type(reactant_order: float) -> str: + """ + Type consumed bond/electron-pair source. + + Rules + ----- + reactant_order == 1.0 -> Sigma- + reactant_order > 1.0 -> Pi- + includes double, triple, aromatic 1.5 + + unknown -> B- + + :param reactant_order: Bond order on the reactant side. + :type reactant_order: float + :returns: Typed consumed-bond label. + :rtype: str + """ + if is_one(reactant_order): + return "Sigma-" + + if reactant_order > 1.0: + return "Pi-" + + return "B-" + + +def bond_plus_type( + reactant_order: float, + product_order: float, +) -> str: + """ + Type formed/increased bond destination. + + Rules + ----- + 0 -> 1 : Sigma+ + 0 -> 1.5 : Sigma+, because new connectivity starts as sigma + 0 -> 2 : Sigma+, because new connectivity starts as sigma + 1 -> 2 : Pi+ + 1.5 -> 2 : Pi+ + 2 -> 3 : Pi+ + + :param reactant_order: Bond order on the reactant side. + :type reactant_order: float + :param product_order: Bond order on the product side. + :type product_order: float + :returns: Typed formed-bond label. + :rtype: str + """ + if product_order <= 0: + return "B+" + + # New bond formation. First new connectivity is sigma. + if is_zero(reactant_order) and product_order > 0: + return "Sigma+" + + # Existing bond order increases. Added component is pi. + if product_order > reactant_order: + return "Pi+" + + # Fallbacks. + if is_one(product_order): + return "Sigma+" + + if product_order > 1.0: + return "Pi+" + + return "B+" + + +# ============================================================ +# Typed LP/Sigma/Pi conversion +# ============================================================ + + +def typed_convert_step( + step: str, + its, + strict_bond_lookup: bool = True, + atom_map_nodes: Optional[dict[int, list[Any]]] = None, +) -> list[Any]: + """ + Convert one arrow-code step into typed LP/Sigma/Pi format. + + Important + --------- + This function does NOT globally force Sigma/Pi from orbital_class. + Each step is typed from local ITS bond-order changes. + + :param step: One arrow-code step. + :type step: str + :param its: ITS graph used for local bond-order lookup. + :type its: networkx.Graph + :param strict_bond_lookup: Whether missing bond lookups should raise. + :type strict_bond_lookup: bool + :param atom_map_nodes: Optional precomputed atom-map to node index. + :type atom_map_nodes: Optional[dict[int, list[Any]]] + :returns: Typed LP/Sigma/Pi conversion record. + :rtype: list[Any] + :raises ValueError: If the step shape is unsupported or strict lookup fails. + """ + lhs, rhs = parse_arrow_step(step) + + # -------------------------------------------------------- + # Case 1: a=b + # LP(a) forms bond a-b + # -------------------------------------------------------- + if len(lhs) == 1 and len(rhs) == 1: + a = lhs[0] + b = rhs[0] + + r_order, p_order = get_its_bond_order( + its, + a, + b, + strict=strict_bond_lookup, + context=step, + atom_map_nodes=atom_map_nodes, + ) + plus = bond_plus_type(r_order, p_order) + + return [f"LP-/{plus}", [a], [a, b]] + + # -------------------------------------------------------- + # Case 2: a=b,c + # LP(a) forms/increases bond b-c + # + # Example: + # 12=11,12 + # LP on 12 forms/increases 11-12 bond + # -------------------------------------------------------- + if len(lhs) == 1 and len(rhs) == 2: + a = lhs[0] + b, c = rhs + + r_order, p_order = get_its_bond_order( + its, + b, + c, + strict=strict_bond_lookup, + context=step, + atom_map_nodes=atom_map_nodes, + ) + plus = bond_plus_type(r_order, p_order) + + return [f"LP-/{plus}", [a], [b, c]] + + # -------------------------------------------------------- + # Case 3: a,b=c + # bond a-b breaks; electrons become LP on c + # -------------------------------------------------------- + if len(lhs) == 2 and len(rhs) == 1: + a, b = lhs + c = rhs[0] + + r_order, _p_order = get_its_bond_order( + its, + a, + b, + strict=strict_bond_lookup, + context=step, + atom_map_nodes=atom_map_nodes, + ) + minus = bond_minus_type(r_order) + + return [f"{minus}/LP+", [a, b], [c]] + + # -------------------------------------------------------- + # Case 4: a,b=c,d + # bond a-b becomes bond c-d + # -------------------------------------------------------- + if len(lhs) == 2 and len(rhs) == 2: + a, b = lhs + c, d = rhs + + src_r_order, _src_p_order = get_its_bond_order( + its, + a, + b, + strict=strict_bond_lookup, + context=f"source of {step}", + atom_map_nodes=atom_map_nodes, + ) + + dst_r_order, dst_p_order = get_its_bond_order( + its, + c, + d, + strict=strict_bond_lookup, + context=f"destination of {step}", + atom_map_nodes=atom_map_nodes, + ) + + minus = bond_minus_type(src_r_order) + plus = bond_plus_type(dst_r_order, dst_p_order) + + return [f"{minus}/{plus}", [a, b], [c, d]] + + raise ValueError(f"Unsupported arrow step: {step}") + + +def typed_convert_arrow_code( + arrow_code: str, + its, + strict_bond_lookup: bool = True, +) -> list[list[Any]]: + """Convert every step in an arrow code to typed LP/Sigma/Pi form. + + :param arrow_code: Semicolon-separated arrow code. + :type arrow_code: str + :param its: ITS graph used for local bond-order lookup. + :type its: networkx.Graph + :param strict_bond_lookup: Whether missing bond lookups should raise. + :type strict_bond_lookup: bool + :returns: Typed conversion records for each step. + :rtype: list[list[Any]] + """ + atom_map_nodes = atom_map_to_nodes(its) + + return [ + typed_convert_step( + step=step, + its=its, + strict_bond_lookup=strict_bond_lookup, + atom_map_nodes=atom_map_nodes, + ) + for step in split_arrow_code(arrow_code) + ] + + +# ============================================================ +# Main public conversion functions +# ============================================================ + + +def convert_arrow_code( + arrow_code: str, + its=None, + strict_bond_lookup: bool = True, +) -> dict[str, Any]: + """ + Convert arrow code into generic and typed formats. + + If ``its`` is ``None``, ``typed_converted`` is ``None``. + + :param arrow_code: Semicolon-separated arrow code. + :type arrow_code: str + :param its: Optional ITS graph for typed conversion. + :type its: Optional[networkx.Graph] + :param strict_bond_lookup: Whether missing typed bond lookups should raise. + :type strict_bond_lookup: bool + :returns: Arrow code with generic and optional typed conversions. + :rtype: dict[str, Any] + """ + converted = generic_convert_arrow_code(arrow_code) + + if its is None: + typed_converted = None + else: + typed_converted = typed_convert_arrow_code( + arrow_code=arrow_code, + its=its, + strict_bond_lookup=strict_bond_lookup, + ) + + return { + "arrow_code": arrow_code, + "converted": converted, + "typed_converted": typed_converted, + } + + +def convert_reaction_arrow( + reaction_smiles: str, + arrow_code: str, + orbital_class: Optional[str] = None, + expand_aam: bool = True, + remove_non_arrow_maps: bool = True, + strict_bond_lookup: bool = True, +) -> dict[str, Any]: + """ + Complete wrapper. + + reaction SMILES + arrow code + -> clean non-arrow maps + -> expand AAM with SynKit + -> ITS graph + -> generic converted + -> typed converted + + orbital_class is stored as metadata only. + It is not used to force Sigma/Pi typing. + + :param reaction_smiles: Reaction SMILES in ``reactants>>products`` format. + :type reaction_smiles: str + :param arrow_code: Semicolon-separated arrow code. + :type arrow_code: str + :param orbital_class: Optional source-dataset orbital classification metadata. + :type orbital_class: Optional[str] + :param expand_aam: Whether to expand atom mapping before ITS construction. + :type expand_aam: bool + :param remove_non_arrow_maps: Whether to remove atom maps not used by the arrow code. + :type remove_non_arrow_maps: bool + :param strict_bond_lookup: Whether missing typed bond lookups should raise. + :type strict_bond_lookup: bool + :returns: Conversion result and ITS preparation metadata. + :rtype: dict[str, Any] + """ + its, expanded_rsmi, rsmi_for_its, diagnostics = build_its_from_rsmi( + rsmi=reaction_smiles, + arrow_code=arrow_code, + expand_aam=expand_aam, + remove_non_arrow_maps=remove_non_arrow_maps, + ) + + result = convert_arrow_code( + arrow_code=arrow_code, + its=its, + strict_bond_lookup=strict_bond_lookup, + ) + + result["reaction_smiles"] = reaction_smiles + result["rsmi_for_its"] = rsmi_for_its + result["expanded_rsmi"] = expanded_rsmi + result["orbital_class"] = orbital_class + result["diagnostics"] = diagnostics + + return result + + +def convert_record( + record: dict[str, Any], + reaction_key: str = "SMIRKS", + arrow_key: str = "arrow_code", + orbital_key: str = "orbital pair classification", + expand_aam: bool = True, + remove_non_arrow_maps: bool = True, + strict_bond_lookup: bool = True, +) -> dict[str, Any]: + """ + Convert one dictionary record. + + Expected input keys + ------------------- + { + "SMIRKS": "...>>...", + "arrow_code": "...", + "orbital pair classification": "pi_empty" + } + + :param record: Source record to convert. + :type record: dict[str, Any] + :param reaction_key: Key containing reaction SMILES. + :type reaction_key: str + :param arrow_key: Key containing arrow code. + :type arrow_key: str + :param orbital_key: Key containing optional orbital classification metadata. + :type orbital_key: str + :param expand_aam: Whether to expand atom mapping before ITS construction. + :type expand_aam: bool + :param remove_non_arrow_maps: Whether to remove atom maps not used by the arrow code. + :type remove_non_arrow_maps: bool + :param strict_bond_lookup: Whether missing typed bond lookups should raise. + :type strict_bond_lookup: bool + :returns: Converted record with original metadata preserved. + :rtype: dict[str, Any] + """ + reaction_smiles = record[reaction_key] + arrow_code = record[arrow_key] + orbital_class = record.get(orbital_key) + + result = convert_reaction_arrow( + reaction_smiles=reaction_smiles, + arrow_code=arrow_code, + orbital_class=orbital_class, + expand_aam=expand_aam, + remove_non_arrow_maps=remove_non_arrow_maps, + strict_bond_lookup=strict_bond_lookup, + ) + + # Preserve original metadata. + for key, value in record.items(): + if key not in result: + result[key] = value + + return result + + +def convert_records( + records: list[dict[str, Any]], + reaction_key: str = "SMIRKS", + arrow_key: str = "arrow_code", + orbital_key: str = "orbital pair classification", + expand_aam: bool = True, + remove_non_arrow_maps: bool = True, + strict_bond_lookup: bool = True, + keep_errors: bool = False, +) -> list[dict[str, Any]]: + """ + Batch conversion. + + keep_errors=False: + raise immediately on first error. + + keep_errors=True: + collect errors into result dictionaries. + + :param records: Source records to convert. + :type records: list[dict[str, Any]] + :param reaction_key: Key containing reaction SMILES. + :type reaction_key: str + :param arrow_key: Key containing arrow code. + :type arrow_key: str + :param orbital_key: Key containing optional orbital classification metadata. + :type orbital_key: str + :param expand_aam: Whether to expand atom mapping before ITS construction. + :type expand_aam: bool + :param remove_non_arrow_maps: Whether to remove atom maps not used by the arrow code. + :type remove_non_arrow_maps: bool + :param strict_bond_lookup: Whether missing typed bond lookups should raise. + :type strict_bond_lookup: bool + :param keep_errors: Whether to collect conversion failures instead of raising. + :type keep_errors: bool + :returns: Converted records, including failures when ``keep_errors`` is true. + :rtype: list[dict[str, Any]] + """ + results = [] + + for idx, record in enumerate(records, start=1): + try: + result = convert_record( + record=record, + reaction_key=reaction_key, + arrow_key=arrow_key, + orbital_key=orbital_key, + expand_aam=expand_aam, + remove_non_arrow_maps=remove_non_arrow_maps, + strict_bond_lookup=strict_bond_lookup, + ) + result["row_index"] = idx + results.append(result) + + except Exception as e: + if not keep_errors: + print("=" * 100) + print(f"FAILED ROW {idx}") + print("=" * 100) + print("orbital_class:", record.get(orbital_key)) + print("arrow_code:", record.get(arrow_key)) + print("error:", repr(e)) + print("=" * 100) + raise + + failed = dict(record) + failed["row_index"] = idx + failed["error"] = repr(e) + results.append(failed) + + return results + + +# ============================================================ +# Debug helpers +# ============================================================ + + +def debug_arrow_bond_orders( + reaction_smiles: str, + arrow_code: str, + expand_aam: bool = True, + remove_non_arrow_maps: bool = True, + strict_bond_lookup: bool = True, +) -> None: + """Print the ITS bond orders used by each arrow step. + + :param reaction_smiles: Reaction SMILES in ``reactants>>products`` format. + :type reaction_smiles: str + :param arrow_code: Semicolon-separated arrow code. + :type arrow_code: str + :param expand_aam: Whether to expand atom mapping before ITS construction. + :type expand_aam: bool + :param remove_non_arrow_maps: Whether to remove atom maps not used by the arrow code. + :type remove_non_arrow_maps: bool + :param strict_bond_lookup: Whether missing typed bond lookups should raise. + :type strict_bond_lookup: bool + :returns: ``None``. + :rtype: None + """ + its, expanded_rsmi, rsmi_for_its, diagnostics = build_its_from_rsmi( + rsmi=reaction_smiles, + arrow_code=arrow_code, + expand_aam=expand_aam, + remove_non_arrow_maps=remove_non_arrow_maps, + ) + + print("Diagnostics:") + print(diagnostics) + print() + + print("RSMI used for ITS:") + print(rsmi_for_its) + print() + + print("Expanded RSMI:") + print(expanded_rsmi) + print() + + for step in split_arrow_code(arrow_code): + lhs, rhs = parse_arrow_step(step) + + print(f"Step: {step}") + print(f" shape: {classify_arrow_shape(step)}") + + if len(lhs) == 1 and len(rhs) == 1: + a = lhs[0] + b = rhs[0] + print( + f" destination bond {a}-{b}: " + f"{get_its_bond_order(its, a, b, strict=strict_bond_lookup, context=step)}" + ) + + elif len(lhs) == 1 and len(rhs) == 2: + a = lhs[0] + b, c = rhs + print(f" LP source atom {a}") + print( + f" destination bond {b}-{c}: " + f"{get_its_bond_order(its, b, c, strict=strict_bond_lookup, context=step)}" + ) + + elif len(lhs) == 2 and len(rhs) == 1: + a, b = lhs + c = rhs[0] + print( + f" source bond {a}-{b}: " + f"{get_its_bond_order(its, a, b, strict=strict_bond_lookup, context=step)}" + ) + print(f" LP destination atom {c}") + + elif len(lhs) == 2 and len(rhs) == 2: + a, b = lhs + c, d = rhs + print( + f" source bond {a}-{b}: " + f"{get_its_bond_order(its, a, b, strict=strict_bond_lookup, context=step)}" + ) + print( + f" destination bond {c}-{d}: " + f"{get_its_bond_order(its, c, d, strict=strict_bond_lookup, context=step)}" + ) + + print() + + +def debug_record( + record: dict[str, Any], + reaction_key: str = "SMIRKS", + arrow_key: str = "arrow_code", + orbital_key: str = "orbital pair classification", +) -> dict[str, Any]: + """Print full debug output for one record. + + :param record: Source record to inspect. + :type record: dict[str, Any] + :param reaction_key: Key containing reaction SMILES. + :type reaction_key: str + :param arrow_key: Key containing arrow code. + :type arrow_key: str + :param orbital_key: Key containing optional orbital classification metadata. + :type orbital_key: str + :returns: Converted record. + :rtype: dict[str, Any] + """ + from pprint import pprint + + rsmi = record[reaction_key] + arrow_code = record[arrow_key] + orbital_class = record.get(orbital_key) + + print("=" * 100) + print("DEBUG RECORD") + print("=" * 100) + + print("orbital_class:") + print(orbital_class) + print() + + print("arrow_code:") + print(arrow_code) + print() + + print("arrow shapes:") + for step in split_arrow_code(arrow_code): + print(f" {step:25s} -> {classify_arrow_shape(step)}") + print() + + print("raw map diagnostics:") + diagnostics = validate_arrow_maps( + rsmi=rsmi, + arrow_code=arrow_code, + raise_on_arrow_duplicates=False, + raise_on_missing_arrow_maps=False, + ) + pprint(diagnostics, width=160) + print() + + print("RSMI after removing non-arrow maps:") + cleaned = remove_non_arrow_atom_maps(rsmi, arrow_code) + print(cleaned) + print() + + print("Arrow bond orders:") + debug_arrow_bond_orders( + reaction_smiles=rsmi, + arrow_code=arrow_code, + expand_aam=True, + remove_non_arrow_maps=True, + strict_bond_lookup=True, + ) + + result = convert_reaction_arrow( + reaction_smiles=rsmi, + arrow_code=arrow_code, + orbital_class=orbital_class, + expand_aam=True, + remove_non_arrow_maps=True, + strict_bond_lookup=True, + ) + + print("converted:") + pprint(result["converted"], width=120) + print() + + print("typed_converted:") + pprint(result["typed_converted"], width=120) + print() + + return result + + +def check_typed_conversion_quality(results: list[dict[str, Any]]) -> dict[str, Any]: + """Check whether typed conversions still contain generic B-/B+ labels. + + :param results: Conversion results to inspect. + :type results: list[dict[str, Any]] + :returns: Error and untyped-step diagnostics. + :rtype: dict[str, Any] + """ + errors = [] + untyped = [] + + for result in results: + if "error" in result: + errors.append(result) + continue + + typed = result.get("typed_converted") + + if typed is None: + untyped.append( + { + "row_index": result.get("row_index"), + "reason": "typed_converted is None", + } + ) + continue + + for step in typed: + label = step[0] + if "B-" in label or "B+" in label: + untyped.append( + { + "row_index": result.get("row_index"), + "orbital_class": result.get("orbital_class"), + "arrow_code": result.get("arrow_code"), + "step": step, + } + ) + + return { + "n_results": len(results), + "n_errors": len(errors), + "n_untyped_steps": len(untyped), + "all_fully_typed": len(errors) == 0 and len(untyped) == 0, + "errors": errors, + "untyped": untyped, + } diff --git a/synkit/Graph/Mech/electron_accounting.py b/synkit/Graph/Mech/electron_accounting.py new file mode 100644 index 0000000..42a1d0a --- /dev/null +++ b/synkit/Graph/Mech/electron_accounting.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from typing import Any + +import networkx as nx +from rdkit import Chem + +from synkit.IO.graph_to_mol import GraphToMol + + +def bond_order_sum(graph: nx.Graph, node: Any) -> float: + """Return the sigma-plus-pi bond-order sum around one node.""" + total = 0.0 + for _, _, data in graph.edges(node, data=True): + total += float(data.get("sigma_order", 0.0)) + float(data.get("pi_order", 0.0)) + return total + + +def recompute_charge(graph: nx.Graph, node: Any) -> int | float: + """Recompute formal charge from stored electron-state fields.""" + attrs = graph.nodes[node] + charge = float(attrs["valence_electrons"]) - ( + 2 * float(attrs.get("lone_pairs", 0)) + + float(attrs.get("radical", 0)) + + float(attrs.get("hcount", 0)) + + bond_order_sum(graph, node) + ) + return int(charge) if charge.is_integer() else charge + + +def refresh_electron_fields(graph: nx.Graph, *, in_place: bool = False) -> nx.Graph: + """Refresh derived electron bookkeeping on a molecular graph. + + The graph is expected to store scalar ``sigma_order`` and ``pi_order`` edge + fields plus node-level electron state. Presentation-facing ``order`` is not + rewritten here; RDKit reconstruction remains responsible for aromatic + re-perception at the product boundary. + """ + target = graph if in_place else graph.copy() + + for _, _, data in target.edges(data=True): + sigma = float(data.get("sigma_order", 0.0)) + pi = float(data.get("pi_order", 0.0)) + data["kekule_order"] = sigma + pi + + for node, attrs in target.nodes(data=True): + attrs["bond_order_sum"] = bond_order_sum(target, node) + if "valence_electrons" not in attrs: + continue + attrs["recomputed_charge"] = recompute_charge(target, node) + represented_charge = float(attrs.get("charge", 0)) + attrs["charge_mismatch"] = represented_charge != attrs["recomputed_charge"] + + return target + + +def graph_to_sanitized_kekule_mol(graph: nx.Graph) -> Chem.Mol: + """Reconstruct a product from ``kekule_order`` and let RDKit sanitize it.""" + refreshed = refresh_electron_fields(graph) + return GraphToMol(edge_attributes={"order": "kekule_order"}).graph_to_mol( + refreshed, + sanitize=True, + use_h_count=True, + ) diff --git a/synkit/Graph/canon_graph.py b/synkit/Graph/canon_graph.py index f134d88..5c49f62 100644 --- a/synkit/Graph/canon_graph.py +++ b/synkit/Graph/canon_graph.py @@ -85,6 +85,8 @@ def _default_node_key(node_id: NodeId, data: NodeData) -> Tuple[Any, ...]: return ( data.get("element", ""), data.get("charge", 0), + data.get("lone_pairs", 0), + data.get("radical", 0), data.get("aromatic", False), # data.get("atom_map", 0), data.get("hcount", 0), @@ -149,7 +151,14 @@ def __init__( backend: Literal["generic", "wl", "morgan", "nauty"] = "generic", wl_iterations: int = 3, morgan_radius: int = 3, - node_attrs: List[str] = ["element", "aromatic", "charge", "hcount"], + node_attrs: List[str] = [ + "element", + "aromatic", + "charge", + "lone_pairs", + "radical", + "hcount", + ], node_sort_key: T_NodeSortKey = _default_node_key, edge_sort_key: T_EdgeSortKey = _default_edge_key, ) -> None: @@ -316,8 +325,8 @@ def _serialise(self, g: nx.Graph) -> str: nodes = sorted(g.nodes(data=True), key=lambda x: self._node_key(*x)) edges = sorted(g.edges(data=True), key=lambda x: self._edge_key(*x)) - node_str = ";".join(f"{n}:{self._node_key(n,d)}" for n, d in nodes) - edge_str = ";".join(f"{(u,v)}:{self._edge_key(u,v,d)}" for u, v, d in edges) + node_str = ";".join(f"{n}:{self._node_key(n, d)}" for n, d in nodes) + edge_str = ";".join(f"{(u, v)}:{self._edge_key(u, v, d)}" for u, v, d in edges) return f"N[{node_str}]|E[{edge_str}]" # ------------------------------------------------------------------ # diff --git a/synkit/Graph/utils.py b/synkit/Graph/utils.py index be44e69..51e7f8f 100644 --- a/synkit/Graph/utils.py +++ b/synkit/Graph/utils.py @@ -81,6 +81,7 @@ def add_wildcard_subgraph_for_unmapped( mapping: Dict[Any, Any], edge_keys: List[str] = ["order"], inplace: bool = False, + tuple_mode: bool = False, ) -> Tuple[nx.Graph, Dict[Any, Any]]: """Extend G with wildcard nodes/edges for every L-node not already mapped, preserving original L->G mapping and returning the full mapping. @@ -97,6 +98,9 @@ def add_wildcard_subgraph_for_unmapped( Edge attributes to copy (first element if list/tuple). Default ['order']. inplace : bool, optional If True, modify G in place; otherwise modify a copy. + tuple_mode : bool, optional + If True, scalarize tuple ITS node attrs onto the left side before + adding wildcard placeholders to the host graph. Returns ------- @@ -116,12 +120,38 @@ def add_wildcard_subgraph_for_unmapped( # Prepare new node IDs next_id = max(G_ext.nodes, default=-1) + 1 + used_atom_maps = { + data.get("atom_map") + for _, data in G_ext.nodes(data=True) + if data.get("atom_map") not in (None, 0) + } + + def _next_unused_atom_map(start: int) -> int: + candidate = start + while candidate in used_atom_maps: + candidate += 1 + return candidate # Add wildcard nodes for each unmapped L node for l_node in unmapped: attrs = L.nodes[l_node].copy() + if tuple_mode: + attrs = { + key: ( + value[0] if isinstance(value, tuple) and len(value) == 2 else value + ) + for key, value in attrs.items() + if key != "typesGH" + } + left_types = L.nodes[l_node].get("typesGH", (None, None))[0] + if left_types is not None: + attrs["typesGH"] = (left_types, left_types) attrs["element"] = "*" - attrs.setdefault("atom_map", next_id) + atom_map = attrs.get("atom_map") + if atom_map in (None, 0) or atom_map in used_atom_maps: + atom_map = _next_unused_atom_map(next_id) + attrs["atom_map"] = atom_map + used_atom_maps.add(atom_map) G_ext.add_node(next_id, **attrs) L_to_G[l_node] = next_id next_id += 1 diff --git a/synkit/IO/chem_converter.py b/synkit/IO/chem_converter.py index 5b4c32c..73d5c96 100644 --- a/synkit/IO/chem_converter.py +++ b/synkit/IO/chem_converter.py @@ -16,6 +16,7 @@ from synkit.Graph.ITS.its_decompose import get_rc, its_decompose from synkit.Graph.ITS.rc_extractor import RCExtractor from synkit.Graph.ITS.its_reverter import ITSReverter +from synkit.Graph.Mech.electron_accounting import graph_to_sanitized_kekule_mol _BRACKET_DIGIT_PATTERN: Pattern[str] = re.compile(r"\[([^\]]*?)\](\d+)") _BRACKET_MAP_PATTERN: Pattern[str] = re.compile(r"\[([^\]]+):(\d+)\]") @@ -27,11 +28,14 @@ "aromatic", "hcount", "charge", + "radical", + "lone_pairs", + "valence_electrons", "neighbors", "atom_map", ) -DEFAULT_EDGE_ATTRS = ("order",) +DEFAULT_EDGE_ATTRS = ("order", "kekule_order", "sigma_order", "pi_order") logger = setup_logging() @@ -73,6 +77,42 @@ def _validate_its_format(format: str) -> ITSFormat: return format +def detect_its_format(graph: nx.Graph) -> ITSFormat: + """ + Detect the ITS storage representation used by a graph. + + Legacy ITS graphs keep scalar node attributes and store side-specific + values only in ``typesGH``. Tuple ITS graphs store direct paired node and + edge attributes such as ``element=("C", "C")`` or + ``sigma_order=(1.0, 1.0)``. + + :param graph: ITS-like graph to inspect. + :type graph: nx.Graph + :return: Detected ITS format. + :rtype: ITSFormat + """ + tuple_node_keys = ( + "element", + "aromatic", + "hcount", + "charge", + "radical", + "lone_pairs", + "valence_electrons", + ) + tuple_edge_keys = ("kekule_order", "sigma_order", "pi_order") + + for _, attrs in graph.nodes(data=True): + if any(_is_pair(attrs.get(key)) for key in tuple_node_keys): + return "tuple" + + for _, _, attrs in graph.edges(data=True): + if any(_is_pair(attrs.get(key)) for key in tuple_edge_keys): + return "tuple" + + return "typesGH" + + def _split_rsmi(rsmi: str) -> tuple[str, str]: """ Split a reaction SMILES string into reactant and product parts. @@ -525,8 +565,8 @@ def rsmi_to_its( :type node_attrs: Optional[Sequence[str]] :param edge_attrs: Edge attributes to include in graph construction. :type edge_attrs: Optional[Sequence[str]] - :param explicit_hydrogen: If ``True`` and ``format="typesGH"``, - convert implicit hydrogens to explicit nodes. + :param explicit_hydrogen: If ``True``, convert implicit hydrogens to + explicit nodes for the selected ITS format. :type explicit_hydrogen: bool :param format: ITS format. :type format: ITSFormat @@ -568,6 +608,10 @@ def rsmi_to_its( node_attrs=resolved_node_attrs, edge_attrs=resolved_edge_attrs, ) + if explicit_hydrogen: + from synkit.Graph.Hyrogen._misc import h_to_explicit + + its_graph = h_to_explicit(its_graph, None, True) if core: its_graph = RCExtractor( node_attrs=resolved_node_attrs, @@ -611,13 +655,44 @@ def its_to_rsmi( validated_format = _validate_its_format(format) reactant_graph, product_graph = _decompose_its(its, validated_format) - rsmi = graph_to_rsmi( - reactant_graph, - product_graph, - its, - sanitize, - explicit_hydrogen, - ) + if validated_format == "tuple": + preserved_hydrogens = ( + [] + if explicit_hydrogen + else _get_preserved_hydrogen_maps(its, validated_format) + ) + reactant_smiles = graph_to_smi( + reactant_graph, + sanitize=sanitize, + preserve_atom_maps=preserved_hydrogens, + ) + try: + if explicit_hydrogen: + product = product_graph + else: + from synkit.Graph.Hyrogen._misc import implicit_hydrogen + + product = implicit_hydrogen( + product_graph, + set(preserved_hydrogens), + ) + product_smiles = Chem.MolToSmiles(graph_to_sanitized_kekule_mol(product)) + except Exception as exc: + logger.debug("Error generating tuple product SMILES: %s", exc) + product_smiles = None + rsmi = ( + f"{reactant_smiles}>>{product_smiles}" + if reactant_smiles is not None and product_smiles is not None + else None + ) + else: + rsmi = graph_to_rsmi( + reactant_graph, + product_graph, + its, + sanitize, + explicit_hydrogen, + ) if rsmi is None: raise ValueError("Failed to convert ITS graph to reaction SMILES.") diff --git a/synkit/IO/graph_to_mol.py b/synkit/IO/graph_to_mol.py index ddcf048..a0d946d 100644 --- a/synkit/IO/graph_to_mol.py +++ b/synkit/IO/graph_to_mol.py @@ -76,6 +76,7 @@ def graph_to_mol( for node, data in graph.nodes(data=True): element = data.get(self.node_attributes["element"], "*") charge = data.get(self.node_attributes["charge"], 0) + radical = data.get(self.node_attributes.get("radical", "radical"), 0) atom_map = ( data.get(self.node_attributes["atom_map"], 0) if "atom_map" in data.keys() @@ -89,6 +90,7 @@ def graph_to_mol( atom = Chem.Atom(element) atom.SetFormalCharge(charge) + atom.SetNumRadicalElectrons(int(radical)) if atom_map is not None: atom.SetAtomMapNum(atom_map) if hcount is not None: diff --git a/synkit/IO/mol_to_graph.py b/synkit/IO/mol_to_graph.py index 54bd839..a96d6e2 100644 --- a/synkit/IO/mol_to_graph.py +++ b/synkit/IO/mol_to_graph.py @@ -558,6 +558,11 @@ def _bond_order_sum_for_lone_pairs(cls, atom: Chem.Atom) -> float: :rtype: float """ try: + if atom.GetIsAromatic(): + # Lone-pair bookkeeping needs the Kekule heavy-atom valence, + # not presentation bond orders such as three aromatic 1.5 bonds. + return float(atom.GetTotalValence() - cls._non_neighbor_h_count(atom)) + aromatic_lp_donor = cls._is_aromatic_lone_pair_donor(atom) total = 0.0 @@ -936,13 +941,13 @@ def _augment_atom_properties( new_props["available_lp"] = available_lone_pairs > 0 # Backward-compatible field used by SynEltra. new_props["lone_pairs"] = estimated_lone_pairs + new_props["valence_electrons"] = cls._safe_valence_electrons(atom) if profile == "full": new_props["bond_order_sum"] = round(cls._safe_bond_order_sum(atom), 3) new_props["lp_bond_order_sum"] = round( cls._bond_order_sum_for_lone_pairs(atom), 3 ) - new_props["valence_electrons"] = cls._safe_valence_electrons(atom) new_props["estimated_lone_pairs"] = estimated_lone_pairs new_props["available_lone_pairs"] = available_lone_pairs @@ -960,11 +965,12 @@ def _gather_atom_properties( Minimal profile keys: ``element``, ``aromatic``, ``hcount``, ``charge``, ``radical``, ``isomer``, ``partial_charge``, ``hybridization``, ``in_ring``, ``neighbors``, ``atom_map``, - ``oxidation_state``, ``available_lp``, ``lone_pairs``. + ``oxidation_state``, ``available_lp``, ``lone_pairs``, + ``valence_electrons``. Full profile additionally includes ``bond_order_sum``, - ``lp_bond_order_sum``, ``valence_electrons``, - ``estimated_lone_pairs``, ``available_lone_pairs``. + ``lp_bond_order_sum``, ``estimated_lone_pairs``, + ``available_lone_pairs``. :param atom: RDKit atom. :type atom: Chem.Atom @@ -1010,6 +1016,7 @@ def _gather_atom_properties( ), "available_lp": available_lone_pairs > 0, "lone_pairs": estimated_lone_pairs, + "valence_electrons": MolToGraph._safe_valence_electrons(atom), } if profile == "full": @@ -1017,7 +1024,6 @@ def _gather_atom_properties( props["lp_bond_order_sum"] = round( MolToGraph._bond_order_sum_for_lone_pairs(atom), 3 ) - props["valence_electrons"] = MolToGraph._safe_valence_electrons(atom) props["estimated_lone_pairs"] = estimated_lone_pairs props["available_lone_pairs"] = available_lone_pairs @@ -1081,17 +1087,29 @@ def _gather_bond_properties( except Exception: kekule_bond_type = bond_type + sigma_order, pi_order = MolToGraph._split_sigma_pi_order(kekule_order) + return { "order": order, "bond_type": bond_type, "aromatic": aromatic, "kekule_order": kekule_order, + "sigma_order": sigma_order, + "pi_order": pi_order, "kekule_bond_type": kekule_bond_type, "ez_isomer": ez, "conjugated": conjugated, "in_ring": in_ring, } + @staticmethod + def _split_sigma_pi_order(kekule_order: float) -> tuple[float, float]: + """Split a Kekule bond order into sigma and pi contributions.""" + order = max(0.0, float(kekule_order)) + if order <= 0: + return 0.0, 0.0 + return 1.0, max(0.0, order - 1.0) + # ------------------------------------------------------------------ # Stereochemistry helpers # ------------------------------------------------------------------ @@ -1225,8 +1243,10 @@ def _create_light_weight_graph( Node attributes: ``element``, ``aromatic``, ``hcount``, ``charge``, ``radical``, ``neighbors``, ``atom_map``, ``oxidation_state``, - ``available_lp``, ``lone_pairs``. Edge attributes: ``order``, - ``bond_type``, ``aromatic``, ``kekule_order``, ``kekule_bond_type``. + ``available_lp``, ``lone_pairs``, ``valence_electrons``. + Edge attributes: ``order``, ``bond_type``, ``aromatic``, + ``kekule_order``, ``sigma_order``, ``pi_order``, + ``kekule_bond_type``. :param mol: RDKit molecule. :type mol: Chem.Mol @@ -1276,6 +1296,7 @@ def _create_light_weight_graph( ), available_lp=available_lone_pairs > 0, lone_pairs=estimated_lone_pairs, + valence_electrons=cls._safe_valence_electrons(atom), ) for bond in mol.GetBonds(): diff --git a/synkit/Rule/Apply/rule_matcher.py b/synkit/Rule/Apply/rule_matcher.py index b587fd9..5fac36f 100644 --- a/synkit/Rule/Apply/rule_matcher.py +++ b/synkit/Rule/Apply/rule_matcher.py @@ -55,7 +55,11 @@ class RuleMatcher: """ def __init__( - self, rsmi: str, rule: Union[str, nx.Graph], explicit_h: bool = True + self, + rsmi: str, + rule: Union[str, nx.Graph], + explicit_h: bool = True, + electron_diagnostics: bool = False, ) -> None: """Initialize the matcher by standardizing the RSMI, building graphs, checking balance, and computing the match. @@ -74,6 +78,8 @@ def __init__( rule = rsmi_to_its(rule, core=True) self.rule = rule self.explicit_h = explicit_h + self.electron_diagnostics = electron_diagnostics + self._diagnostics: list[dict] = [] self.balanced = BalanceReactionCheck(n_jobs=1).rsmi_balance_check(self.rsmi) # Compute and store the match result @@ -103,9 +109,14 @@ def _match_valid(self) -> Optional[Tuple[str, nx.Graph]]: None. :rtype: Optional[tuple[str, nx.Graph]] """ - reactor = SynReactor(substrate=self.r_graph, template=self.rule) + reactor = SynReactor( + substrate=self.r_graph, + template=self.rule, + electron_diagnostics=self.electron_diagnostics, + ) for smarts in reactor.smarts_list: if self.std.fit(smarts) == self.rsmi: + self._diagnostics = reactor.diagnostics return smarts, self.rule return None @@ -120,12 +131,17 @@ def _match_reverse(self) -> Optional[Tuple[str, nx.Graph]]: :rtype: Optional[tuple[str, nx.Graph]] """ # Product‑side fragments - reactor = SynReactor(substrate=self.r_graph, template=self.rule) + reactor = SynReactor( + substrate=self.r_graph, + template=self.rule, + electron_diagnostics=self.electron_diagnostics, + ) for smarts in reactor.smarts_list: std_r = self.std.fit(smarts) if self.all_in( self.rsmi.split(">>")[1].split("."), std_r.split(">>")[1].split(".") ): + self._diagnostics = reactor.diagnostics return smarts, self.rule # Reactant‑side with inverted template @@ -134,12 +150,14 @@ def _match_reverse(self) -> Optional[Tuple[str, nx.Graph]]: template=self.rule, invert=True, explicit_h=self.explicit_h, + electron_diagnostics=self.electron_diagnostics, ) for smarts in reactor.smarts_list: std_r = self.std.fit(smarts) if self.all_in( self.rsmi.split(">>")[0].split("."), std_r.split(">>")[0].split(".") ): + self._diagnostics = reactor.diagnostics return smarts, self.rule return None @@ -157,6 +175,11 @@ def all_in(a: List[str], b: List[str]) -> bool: """ return set(a).issubset(b) + @property + def diagnostics(self) -> list[dict]: + """Electron diagnostics from the reactor that produced the match.""" + return list(self._diagnostics) + def help(self) -> None: """Print internal state and candidate SMARTS patterns for debugging. diff --git a/synkit/Rule/syn_rule.py b/synkit/Rule/syn_rule.py index 733b628..168f764 100644 --- a/synkit/Rule/syn_rule.py +++ b/synkit/Rule/syn_rule.py @@ -27,8 +27,14 @@ from synkit.Graph.syn_graph import SynGraph from synkit.Graph.canon_graph import GraphCanonicaliser from synkit.Graph.ITS.its_decompose import its_decompose +from synkit.Graph.ITS.its_reverter import ITSReverter from synkit.Graph.Hyrogen._misc import normalize_h_pair_graph -from synkit.IO.chem_converter import rsmi_to_its, gml_to_its +from synkit.IO.chem_converter import ( + ITSFormat, + detect_its_format, + rsmi_to_its, + gml_to_its, +) __all__ = ["SynRule"] @@ -76,14 +82,16 @@ def from_smart( *, canon: bool = True, implicit_h: bool = True, + format: ITSFormat = "typesGH", ) -> "SynRule": """Instantiate from a SMARTS string.""" return cls( - rsmi_to_its(smart), + rsmi_to_its(smart, format=format), name=name, canonicaliser=canonicaliser, canon=canon, implicit_h=implicit_h, + format=format, ) @classmethod @@ -116,6 +124,7 @@ def __init__( *, canon: bool = True, implicit_h: bool = True, + format: Optional[ITSFormat] = None, ) -> None: self._name = name self._canon_enabled = canon @@ -124,38 +133,37 @@ def __init__( # Fragment decomposition rc_graph = rc.copy() + self._format = format or detect_its_format(rc_graph) if self._implicit_h: rc_graph = normalize_h_pair_graph(rc_graph) - left_graph, right_graph = its_decompose(rc_graph) + left_graph, right_graph = self._decompose(rc_graph, self._format) # Optional H-stripping - if self._implicit_h: + if self._implicit_h and self._format == "typesGH": self._strip_explicit_h(rc_graph, left_graph, right_graph) - # Update typesGH tuples with new hcount - for node, att in rc_graph.nodes(data=True): - # unpack the old tuples - t0, t1 = att["typesGH"] - - # build new versions with the updated hcount at position 2 - new_t0 = ( - t0[0], - t0[1], - left_graph.nodes[node]["hcount"] + t0[2], - t0[3], - t0[4], - ) - new_t1 = ( - t1[0], - t1[1], - right_graph.nodes[node]["hcount"] + t1[2], - t1[3], - t1[4], - ) - - # reassign the attribute to a fresh tuple-of-tuples - att["typesGH"] = (new_t0, new_t1) - left_graph, right_graph = its_decompose(rc_graph) + # Update typesGH tuples with new hcount. + for node, att in rc_graph.nodes(data=True): + t0, t1 = att["typesGH"] + new_t0 = ( + t0[0], + t0[1], + left_graph.nodes[node]["hcount"] + t0[2], + t0[3], + t0[4], + ) + new_t1 = ( + t1[0], + t1[1], + right_graph.nodes[node]["hcount"] + t1[2], + t1[3], + t1[4], + ) + att["typesGH"] = (new_t0, new_t1) + left_graph, right_graph = self._decompose(rc_graph, self._format) + elif self._implicit_h and self._format == "tuple": + self._strip_explicit_h_tuple(rc_graph, left_graph, right_graph) + left_graph, right_graph = self._decompose(rc_graph, self._format) # ---------- wrap graphs ---------------------------------------- # self.rc = SynGraph(rc_graph, self._canonicaliser, canon=canon) self.left = SynGraph(left_graph, self._canonicaliser, canon=canon) @@ -168,6 +176,14 @@ def __init__( # ================================================================== # # Private utilities # # ================================================================== # + @staticmethod + def _decompose(rc: nx.Graph, format: ITSFormat) -> tuple[nx.Graph, nx.Graph]: + """Return left/right fragments for either supported ITS representation.""" + if format == "tuple": + reverter = ITSReverter(rc) + return reverter.to_reactant_graph(), reverter.to_product_graph() + return its_decompose(rc) + @staticmethod def _strip_explicit_h( rc: nx.Graph, @@ -234,6 +250,97 @@ def _fully_removable(h: str) -> bool: g.nodes[nbr]["hcount"] += 1 g.remove_node(h) + @staticmethod + def _strip_explicit_h_tuple( + rc: nx.Graph, + left: nx.Graph, + right: nx.Graph, + ) -> None: + """Tuple-style equivalent of legacy explicit-H stripping.""" + + def _removable_on(graph: nx.Graph, h: int) -> bool: + if not graph.has_node(h): + return False + nbrs = list(graph.neighbors(h)) + if not nbrs: + return False + return not all(graph.nodes[n].get("element") == "H" for n in nbrs) + + def _fully_removable(h: int) -> bool: + return _removable_on(left, h) and _removable_on(right, h) + + for graph in (left, right): + for _, data in graph.nodes(data=True): + if data.get("element") != "H": + data.setdefault("h_pairs", []) + data.setdefault("h_pairs_left", []) + data.setdefault("h_pairs_right", []) + data.setdefault("h_pair_atom_maps", {}) + + for _, data in rc.nodes(data=True): + element = data.get("element") + is_h = ( + isinstance(element, tuple) + and len(element) == 2 + and all(value == "H" for value in element) + ) + if not is_h: + data.setdefault("h_pairs", []) + data.setdefault("h_pairs_left", []) + data.setdefault("h_pairs_right", []) + data.setdefault("h_pair_atom_maps", {}) + + removable = sorted( + node + for node, attrs in left.nodes(data=True) + if attrs.get("element") == "H" + and right.has_node(node) + and _fully_removable(node) + ) + + for pair_id, h in enumerate(removable, start=1): + atom_map = left.nodes[h].get("atom_map", h) + for side, graph in (("left", left), ("right", right)): + for nbr in list(graph.neighbors(h)): + if graph.nodes[nbr].get("element") != "H": + graph.nodes[nbr]["hcount"] += 1 + graph.nodes[nbr].setdefault("h_pairs", []).append(pair_id) + graph.nodes[nbr].setdefault(f"h_pairs_{side}", []).append( + pair_id + ) + graph.nodes[nbr].setdefault("h_pair_atom_maps", {})[ + pair_id + ] = atom_map + graph.remove_node(h) + if rc.has_node(h): + rc.remove_node(h) + + for node, attrs in rc.nodes(data=True): + if node not in left or node not in right: + continue + if attrs.get("element") == ("H", "H"): + continue + left_h = left.nodes[node].get("hcount", 0) + right_h = right.nodes[node].get("hcount", 0) + attrs["hcount"] = (left_h, right_h) + attrs["h_pairs"] = sorted( + set(left.nodes[node].get("h_pairs", [])) + | set(right.nodes[node].get("h_pairs", [])) + ) + attrs["h_pairs_left"] = sorted(left.nodes[node].get("h_pairs_left", [])) + attrs["h_pairs_right"] = sorted(right.nodes[node].get("h_pairs_right", [])) + attrs["h_pair_atom_maps"] = { + **left.nodes[node].get("h_pair_atom_maps", {}), + **right.nodes[node].get("h_pair_atom_maps", {}), + } + typesgh = attrs.get("typesGH") + if typesgh and len(typesgh) == 2: + react_attr, prod_attr = typesgh + attrs["typesGH"] = ( + tuple(list(react_attr[:2]) + [left_h] + list(react_attr[3:])), + tuple(list(prod_attr[:2]) + [right_h] + list(prod_attr[3:])), + ) + # ================================================================== # # Dunder methods # # ================================================================== # diff --git a/synkit/Synthesis/Reactor/imba_engine.py b/synkit/Synthesis/Reactor/imba_engine.py index 3a64dd3..d98e29d 100644 --- a/synkit/Synthesis/Reactor/imba_engine.py +++ b/synkit/Synthesis/Reactor/imba_engine.py @@ -47,6 +47,7 @@ def __init__( partial: bool = False, embed_threshold: float = None, embed_pre_filter: bool = False, + electron_diagnostics: bool = False, ) -> None: # Assign parameters self.substrate = substrate @@ -60,8 +61,10 @@ def __init__( self.partial = partial self.embed_threshold = embed_threshold self.embed_pre_filter = embed_pre_filter + self.electron_diagnostics = electron_diagnostics # Internal state self._results: List[str] = [] + self._diagnostics = [] # Auto-run fit on init self.fit() @@ -111,8 +114,10 @@ def fit(self) -> "ImbaEngine": canonicaliser=self.canonicaliser, embed_threshold=self.embed_threshold, embed_pre_filter=self.embed_pre_filter, + electron_diagnostics=self.electron_diagnostics, ) raw_smarts: List[str] = reactor.smarts_list + self._diagnostics = reactor.diagnostics # Add radical wildcards if requested if self.add_wildcard: @@ -145,6 +150,11 @@ def smarts_list(self) -> List[str]: """ return self._results.copy() + @property + def diagnostics(self) -> list[dict]: + """Electron diagnostics from the last underlying reactor run.""" + return list(self._diagnostics) + def __len__(self) -> int: """ Number of product SMARTS results. diff --git a/synkit/Synthesis/Reactor/partial_engine.py b/synkit/Synthesis/Reactor/partial_engine.py index d1330e0..b839bd1 100644 --- a/synkit/Synthesis/Reactor/partial_engine.py +++ b/synkit/Synthesis/Reactor/partial_engine.py @@ -18,7 +18,12 @@ class PartialEngine: :type template: str """ - def __init__(self, smi: str, template: str) -> None: + def __init__( + self, + smi: str, + template: str, + electron_diagnostics: bool = False, + ) -> None: """Initialize the PartialEngine. - Removes explicit hydrogens from the given template SMARTS. @@ -39,6 +44,8 @@ def __init__(self, smi: str, template: str) -> None: # Build host graph from the provided SMILES or rsmi self.host = smiles_to_graph(smi) + self.electron_diagnostics = electron_diagnostics + self._diagnostics = [] def fit(self, invert: bool = False) -> list[str]: """Apply the template in one direction to generate radical‐wildcarded @@ -63,8 +70,15 @@ def fit(self, invert: bool = False) -> list[str]: implicit_temp=True, explicit_h=False, invert=invert, + electron_diagnostics=self.electron_diagnostics, ) # Generate SMARTS, then inject radical wildcards smarts_list = reactor.smarts_list + self._diagnostics = reactor.diagnostics wildcarded = [RadicalWildcardAdder().transform(rxn) for rxn in smarts_list] return wildcarded + + @property + def diagnostics(self) -> list[dict]: + """Electron diagnostics from the last reactor run.""" + return list(self._diagnostics) diff --git a/synkit/Synthesis/Reactor/rbl_engine.py b/synkit/Synthesis/Reactor/rbl_engine.py index 4517d31..90b70d5 100644 --- a/synkit/Synthesis/Reactor/rbl_engine.py +++ b/synkit/Synthesis/Reactor/rbl_engine.py @@ -302,6 +302,7 @@ def __init__( max_mappings_per_pair: int = 1, implicit_temp: bool = True, explicit_h: bool = False, + electron_diagnostics: bool = False, embed_threshold: int = 10_000, reactor_cls: type = SynReactor, wildcard_adder_cls: type = RadicalWildcardAdder, @@ -338,6 +339,7 @@ def __init__( # Reactor behaviour flags self.implicit_temp: bool = bool(implicit_temp) self.explicit_h: bool = bool(explicit_h) + self.electron_diagnostics: bool = bool(electron_diagnostics) self.embed_threshold: int = int(embed_threshold) # Dependencies (DI) @@ -369,6 +371,11 @@ def __init__( self._backward_its: List[ITSLike] = [] self._fused_its: List[ITSLike] = [] self._fused_rsmis: List[str] = [] + self._diagnostics: Dict[str, List[Dict[str, Any]]] = { + "forward": [], + "backward": [], + "quick_check": [], + } # Result / termination bookkeeping self._last_stop_mode: str = "not_run" @@ -441,6 +448,7 @@ def _reset_run_state(self) -> None: self._backward_its = [] self._fused_its = [] self._fused_rsmis = [] + self._diagnostics = {"forward": [], "backward": [], "quick_check": []} self._last_stop_mode = "not_run" self._last_stop_reason = "not_run" self._last_stop_metadata = {} @@ -561,8 +569,14 @@ def result(self) -> Dict[str, Any]: "n_forward_its": len(self._forward_its), "n_backward_its": len(self._backward_its), "n_fused_its": len(self._fused_its), + "diagnostics": self.diagnostics, } + @property + def diagnostics(self) -> Dict[str, List[Dict[str, Any]]]: + """Electron diagnostics grouped by reactor stage.""" + return {stage: list(reports) for stage, reports in self._diagnostics.items()} + # ------------------------------------------------------------------ # Template preparation # ------------------------------------------------------------------ @@ -713,7 +727,10 @@ def _run_reaction( automorphism=False, invert=invert, embed_threshold=self.embed_threshold, + electron_diagnostics=self.electron_diagnostics, ) + stage = "backward" if invert else "forward" + self._diagnostics[stage].extend(getattr(reactor, "diagnostics", []) or []) out: List[ITSLike] = [] its_list: Sequence[ITSLike] = getattr(reactor, "its", []) or [] @@ -934,6 +951,10 @@ def _quick_check( automorphism=False, invert=False, embed_threshold=self.embed_threshold, + electron_diagnostics=self.electron_diagnostics, + ) + self._diagnostics["quick_check"].extend( + getattr(reactor, "diagnostics", []) or [] ) sols: Sequence[str] = getattr(reactor, "smarts", []) or [] diff --git a/synkit/Synthesis/Reactor/rule_filter.py b/synkit/Synthesis/Reactor/rule_filter.py index 6e174ad..03a3d80 100644 --- a/synkit/Synthesis/Reactor/rule_filter.py +++ b/synkit/Synthesis/Reactor/rule_filter.py @@ -3,8 +3,10 @@ from synkit.Graph.Matcher.turbo_iso import TurboISO from synkit.Graph.Matcher.sing import SING from synkit.Graph.ITS import its_decompose +from synkit.Graph.ITS.its_reverter import ITSReverter from synkit.Graph.Matcher.subgraph_matcher import SubgraphMatch from synkit.Graph.Hyrogen._misc import h_to_explicit +from synkit.IO.chem_converter import detect_its_format class RuleFilter: @@ -76,7 +78,7 @@ def __init__( # Decompose patterns via ITS self._patterns = [ - its_decompose(r)[1] if self._invert else its_decompose(r)[0] + self._decompose_rule(r)[1] if self._invert else self._decompose_rule(r)[0] for r in self._rules ] @@ -99,6 +101,14 @@ def __init__( self._matches = [self._match(p) for p in self._patterns] self._new_rules = [r for r, m in zip(self._rules, self._matches) if m] + @staticmethod + def _decompose_rule(rule: nx.Graph) -> tuple[nx.Graph, nx.Graph]: + """Return left/right rule fragments for either ITS representation.""" + if detect_its_format(rule) == "tuple": + reverter = ITSReverter(rule) + return reverter.to_reactant_graph(), reverter.to_product_graph() + return its_decompose(rule) + def _match(self, pattern: nx.Graph) -> bool: """Test whether the given pattern occurs as a subgraph in the host. diff --git a/synkit/Synthesis/Reactor/syn_reactor.py b/synkit/Synthesis/Reactor/syn_reactor.py index ef228cd..4c49028 100644 --- a/synkit/Synthesis/Reactor/syn_reactor.py +++ b/synkit/Synthesis/Reactor/syn_reactor.py @@ -6,9 +6,18 @@ from typing import Any, Dict, List, Mapping, Optional, Tuple, Union import networkx as nx +from rdkit import Chem +from networkx.algorithms.isomorphism import ( + GraphMatcher, + categorical_edge_match, + categorical_node_match, +) from synkit.IO.chem_converter import ( + ITSFormat, + _get_preserved_hydrogen_maps, + detect_its_format, smiles_to_graph, rsmi_to_its, graph_to_smi, @@ -20,18 +29,29 @@ from synkit.Graph.syn_graph import SynGraph from synkit.Graph.canon_graph import GraphCanonicaliser from synkit.Graph.ITS.its_decompose import its_decompose +from synkit.Graph.ITS.its_reverter import ITSReverter from synkit.Graph.ITS.its_construction import ITSConstruction from synkit.Graph.Matcher.automorphism import ( Automorphism, ) from synkit.Graph.Matcher.dedup_matches import deduplicate_matches_with_anchor from synkit.Graph.Matcher.auto_est import AutoEst +from synkit.Graph.Matcher.graph_cluster import GraphCluster from synkit.Graph.Matcher.partial_matcher import PartialMatcher from synkit.Graph.Matcher.subgraph_matcher import SubgraphSearchEngine +from synkit.Graph.Matcher.subgraph_matcher import resolve_template_match_attrs +from synkit.Graph.Feature.wl_hash import WLHash +from synkit.Graph.Mech.electron_accounting import ( + graph_to_sanitized_kekule_mol, + refresh_electron_fields, +) +from synkit.IO.graph_to_mol import GraphToMol +from synkit.IO.mol_to_graph import MolToGraph from synkit.Graph.Hyrogen._misc import ( h_to_implicit, h_to_explicit, has_XH, + implicit_hydrogen, ) from synkit.Graph import ( remove_wildcard_nodes, @@ -53,6 +73,18 @@ log = setup_logging(task_type="synreactor") +ITS_STRUCTURAL_NODE_ATTRS = [ + "element", + "aromatic", + "hcount", + "charge", + "radical", + "lone_pairs", + "valence_electrons", + "present", +] +ITS_STRUCTURAL_EDGE_ATTRS = ["order", "kekule_order", "sigma_order", "pi_order"] + # ────────────────────────────────────────────────────────────────────────────── # SynReactor core @@ -88,6 +120,12 @@ class SynReactor: :param partial: If True, use a partial matching fallback. Defaults to False. :type partial: bool + :param template_format: ITS representation used when ``template`` is a + reaction string. Defaults to ``"typesGH"`` for compatibility. + :type template_format: ITSFormat + :param electron_diagnostics: If True, expose per-result electron-accounting + diagnostics without changing generated products. + :type electron_diagnostics: bool :ivar _graph: Cached SynGraph for the substrate. :vartype _graph: Optional[SynGraph] :ivar _rule: Cached SynRule for the template. @@ -111,6 +149,8 @@ class SynReactor: implicit_temp: bool = False strategy: Strategy | str = Strategy.ALL partial: bool = False + template_format: ITSFormat = "typesGH" + electron_diagnostics: bool = False embed_threshold: Optional[int] = None embed_pre_filter: bool = False automorphism: bool = True @@ -121,6 +161,7 @@ class SynReactor: _mappings: List[MappingDict] | None = field(init=False, default=None, repr=False) _its: List[nx.Graph] | None = field(init=False, default=None, repr=False) _smarts: List[str] | None = field(init=False, default=None, repr=False) + _host_for_matching: nx.Graph | None = field(init=False, default=None, repr=False) _flag_pattern_has_explicit_H: bool = field(init=False, default=False, repr=False) def __post_init__(self) -> None: @@ -149,6 +190,8 @@ def from_smiles( implicit_temp: bool = False, automorphism: bool = False, strategy: Strategy | str = Strategy.ALL, + template_format: ITSFormat = "typesGH", + electron_diagnostics: bool = False, ) -> "SynReactor": """ Alternate constructor: build a SynReactor directly from SMILES. @@ -168,6 +211,12 @@ def from_smiles( :type implicit_temp: bool :param strategy: Matching strategy: ALL, 'comp', or 'bt'. Defaults to ALL. :type strategy: Strategy or str + :param template_format: ITS representation used when ``template`` is a + reaction string. Defaults to ``"typesGH"``. + :type template_format: ITSFormat + :param electron_diagnostics: If True, expose per-result electron + diagnostics without changing products. + :type electron_diagnostics: bool :returns: A new `SynReactor` instance. :rtype: SynReactor """ @@ -180,6 +229,8 @@ def from_smiles( implicit_temp=implicit_temp, strategy=strategy, automorphism=automorphism, + template_format=template_format, + electron_diagnostics=electron_diagnostics, ) # ------------------------------------------------------------------ @@ -224,16 +275,20 @@ def mappings(self) -> List[MappingDict]: if has_wildcard_node(pattern_graph): pattern_graph = remove_wildcard_nodes(pattern_graph) + pattern_graph = self._with_aromatic_n_pi_roles(pattern_graph) + matching_host = self._with_aromatic_n_pi_roles(self._matching_host_graph()) + node_attrs, edge_attrs = resolve_template_match_attrs(pattern_graph) + # --- Choose matcher ------------------------------------------------ if self.partial: max_results = ( self.embed_threshold / 100 if self.embed_threshold else None ) matcher = PartialMatcher( - host=self.graph.raw, + host=matching_host, pattern=pattern_graph, - node_attrs=["element", "charge"], - edge_attrs=["order"], + node_attrs=node_attrs, + edge_attrs=edge_attrs, strategy=Strategy.from_string(self.strategy), threshold=self.embed_threshold, pre_filter=self.embed_pre_filter, @@ -243,10 +298,10 @@ def mappings(self) -> List[MappingDict]: raw_maps = matcher.get_mappings() else: raw_maps = SubgraphSearchEngine.find_subgraph_mappings( - host=self.graph.raw, + host=matching_host, pattern=pattern_graph, - node_attrs=["element", "charge"], - edge_attrs=["order"], + node_attrs=node_attrs, + edge_attrs=edge_attrs, strategy=Strategy.from_string(self.strategy), threshold=self.embed_threshold, pre_filter=self.embed_pre_filter, @@ -254,11 +309,36 @@ def mappings(self) -> List[MappingDict]: # --- Automorphism pruning ---------------------------------------- if self.automorphism and raw_maps: - auto = Automorphism(pattern_graph) + automorphism_pattern = self._automorphism_pattern_graph(pattern_graph) + auto = Automorphism( + automorphism_pattern, + node_attr_keys=self._automorphism_node_attrs( + automorphism_pattern, + node_attrs, + ), + edge_attr_keys=edge_attrs, + ) + host_auto = Automorphism( + self._matching_host_graph(), + node_attr_keys=node_attrs, + edge_attr_keys=edge_attrs, + ) self._mappings = deduplicate_matches_with_anchor( raw_maps, pattern_orbits=auto.orbits, pattern_anchor=auto.anchor_component, + host_orbits=host_auto.orbits, + host_anchor=host_auto.anchor_component, + ) + self._mappings = self._deduplicate_equivalent_free_components( + self._mappings, + automorphism_pattern, + auto.anchor_component, + self._automorphism_node_attrs( + automorphism_pattern, + node_attrs, + ), + edge_attrs, ) log.debug( "Automorphism pruning: %d → %d unique mapping(s)", @@ -282,6 +362,20 @@ def mappings(self) -> List[MappingDict]: log.info("%d mapping(s) discovered", len(self._mappings)) return self._mappings + @staticmethod + def _with_aromatic_n_pi_roles(graph: nx.Graph) -> nx.Graph: + """Label aromatic nitrogens by incident aromatic pi-bond count.""" + decorated = graph.copy() + for node, attrs in decorated.nodes(data=True): + if attrs.get("element") != "N" or not attrs.get("aromatic", False): + continue + attrs["aromatic_n_pi_count"] = sum( + 1 + for _, _, edge in decorated.edges(node, data=True) + if edge.get("order") == 1.5 and edge.get("pi_order") == 1.0 + ) + return decorated + @property def its_list(self) -> List[nx.Graph]: """Build ITS graphs for each subgraph mapping. @@ -291,7 +385,7 @@ def its_list(self) -> List[nx.Graph]: """ if self._its is None: # Build ITS for each mapping ------------------------------- - host_raw = self.graph.raw + host_raw = self._matching_host_graph() rc_raw = self.rule.rc.raw self._its = [] for m in self.mappings: @@ -309,6 +403,7 @@ def its_list(self) -> List[nx.Graph]: if self.explicit_h: self._its = [self._explicit_h(g) for g in self._its] + self._its = self._deduplicate_structural_its(self._its) log.debug("Built %d ITS graph(s)", len(self._its)) return self._its @@ -324,8 +419,67 @@ def smarts_list(self) -> List[str]: self._smarts = [value for value in self._smarts if value] if self.invert: self._smarts = [reverse_reaction(rsmi) for rsmi in self._smarts] + self._smarts = list(dict.fromkeys(self._smarts)) return self._smarts + @property + def diagnostics(self) -> List[Dict[str, Any]]: + """Return optional electron-accounting diagnostics for built ITS graphs.""" + if not self.electron_diagnostics: + return [] + + reports: List[Dict[str, Any]] = [] + for index, its in enumerate(self.its_list): + if its.graph.get("electron_aware_rewrite", False): + mismatches = {} + for node, attrs in its.nodes(data=True): + charge_mismatch = attrs.get("charge_mismatch") + mismatch = ( + charge_mismatch[1] + if isinstance(charge_mismatch, tuple) + and len(charge_mismatch) == 2 + else charge_mismatch + ) + if mismatch: + template_charge = attrs.get("template_charge") + recomputed_charge = attrs.get("recomputed_charge") + mismatches[node] = { + "charge": ( + template_charge[1] + if isinstance(template_charge, tuple) + and len(template_charge) == 2 + else template_charge + ), + "recomputed_charge": ( + recomputed_charge[1] + if isinstance(recomputed_charge, tuple) + and len(recomputed_charge) == 2 + else recomputed_charge + ), + } + else: + product = self._product_graph_for_diagnostics(its) + refreshed = refresh_electron_fields(product) + mismatches = { + node: { + "charge": attrs.get("charge"), + "recomputed_charge": attrs.get("recomputed_charge"), + } + for node, attrs in refreshed.nodes(data=True) + if attrs.get("charge_mismatch") + } + reports.append( + { + "index": index, + "electron_aware_rewrite": bool( + its.graph.get("electron_aware_rewrite", False) + ), + "mismatch_count": len(mismatches), + "mismatches": mismatches, + } + ) + return reports + # Backward‑compat aliases (original attribute names) ---------------- smarts = property(lambda self: self.smarts_list) its = property(lambda self: self.its_list) @@ -392,36 +546,99 @@ def _wrap_template(self, tpl: Union[str, nx.Graph, SynRule]) -> SynRule: elif isinstance(tpl, nx.Graph): graph = tpl elif isinstance(tpl, str): - graph = rsmi_to_its(tpl) + graph = rsmi_to_its(tpl, format=self.template_format) else: # pragma: no cover raise TypeError(f"Unsupported template type: {type(tpl)}") # graph = normalize_h_pair_graph(graph) + format = detect_its_format(graph) + # Invert if asked ----------------------------------------------------- if self.invert: if self.implicit_temp: - graph = self._invert_template(graph, balance_its=True) + graph = self._invert_template( + graph, + balance_its=True, + format=format, + ) return SynRule( graph, canonicaliser=self.canonicaliser or GraphCanonicaliser(), + format=format, ) else: - graph = self._invert_template(graph, balance_its=False) + graph = self._invert_template( + graph, + balance_its=False, + format=format, + ) return SynRule( - graph, canonicaliser=self.canonicaliser or GraphCanonicaliser() + graph, + canonicaliser=self.canonicaliser or GraphCanonicaliser(), + format=format, ) else: if self.implicit_temp: return SynRule( graph, canonicaliser=self.canonicaliser or GraphCanonicaliser(), + format=format, ) return SynRule( - graph, canonicaliser=self.canonicaliser or GraphCanonicaliser() + graph, + canonicaliser=self.canonicaliser or GraphCanonicaliser(), + format=format, ) + def _matching_host_graph(self) -> nx.Graph: + """Return the host graph normalized to the active rule representation.""" + if self._host_for_matching is None: + host = self.graph.raw + if getattr(self.rule, "_format", None) == "tuple": + host = self._implicit_heavy_hydrogens(host) + self._host_for_matching = host + return self._host_for_matching + + @staticmethod + def _implicit_heavy_hydrogens(graph: nx.Graph) -> nx.Graph: + """Convert ordinary heavy-atom-bound explicit H nodes into hcount.""" + normalized = graph.copy() + removable = [] + for node, attrs in normalized.nodes(data=True): + if attrs.get("element") != "H": + continue + neighbors = list(normalized.neighbors(node)) + heavy_neighbors = [ + nbr for nbr in neighbors if normalized.nodes[nbr].get("element") != "H" + ] + if heavy_neighbors and len(heavy_neighbors) == len(neighbors): + removable.append((node, heavy_neighbors)) + + for h, heavy_neighbors in removable: + if not normalized.has_node(h): + continue + for heavy in heavy_neighbors: + normalized.nodes[heavy]["hcount"] = ( + normalized.nodes[heavy].get("hcount", 0) + 1 + ) + normalized.remove_node(h) + return normalized + @staticmethod - def _invert_template(tpl: nx.Graph, balance_its: bool = True) -> nx.Graph: + def _invert_template( + tpl: nx.Graph, + balance_its: bool = True, + format: ITSFormat | None = None, + ) -> nx.Graph: + resolved_format = format or detect_its_format(tpl) + if resolved_format == "tuple": + reverter = ITSReverter(tpl) + l, r = reverter.to_reactant_graph(), reverter.to_product_graph() + return ITSConstruction().construct( + r, + l, + balance_its=balance_its, + ) l, r = its_decompose(tpl) return ITSConstruction().ITSGraph(r, l, balance_its=balance_its) @@ -455,8 +672,9 @@ def _node_glue( # host_p[0] = '*' host_n[key] = (new_r, new_p) - if "h_pairs" in pat_n: - host_n["h_pairs"] = pat_n["h_pairs"] + for key in ("h_pairs", "h_pairs_left", "h_pairs_right", "h_pair_atom_maps"): + if key in pat_n: + host_n[key] = pat_n[key] @staticmethod def _get_explicit_map( @@ -493,6 +711,7 @@ def _glue_graph( ) -> List[nx.Graph]: list_its: List[nx.Graph] = [] host_g = deepcopy(host) + electron_aware = SynReactor._is_electron_aware_template(rc) def _default_tg(a: Dict[str, Any]) -> Tuple[Tuple[Any, ...], Tuple[Any, ...]]: tpl = ( @@ -506,6 +725,8 @@ def _default_tg(a: Dict[str, Any]) -> Tuple[Tuple[Any, ...], Tuple[Any, ...]]: for _, data in host_g.nodes(data=True): data.setdefault("typesGH", _default_tg(data)) + if electron_aware: + SynReactor._ensure_host_atom_maps(host_g) if pattern_has_explicit_H: mappings, host_g = SynReactor._get_explicit_map( @@ -516,6 +737,8 @@ def _default_tg(a: Dict[str, Any]) -> Tuple[Tuple[Any, ...], Tuple[Any, ...]]: embed_threshold, embed_pre_filter, ) + if electron_aware: + SynReactor._ensure_host_atom_maps(host_g) else: mappings = [mapping] @@ -525,11 +748,21 @@ def _default_tg(a: Dict[str, Any]) -> Tuple[Tuple[Any, ...], Tuple[Any, ...]]: its = deepcopy(host_g) # This should only work for implict cases if len(m.keys()) < rc.number_of_nodes(): - its, m = add_wildcard_subgraph_for_unmapped(its, rc, m) + its, m = add_wildcard_subgraph_for_unmapped( + its, + rc, + m, + tuple_mode=electron_aware, + ) for _, _, data in its.edges(data=True): o = data.get("order", 1.0) data["order"] = (o, o) + if electron_aware: + sigma = data.get("sigma_order", 1.0 if o else 0.0) + pi = data.get("pi_order", max(0.0, float(o) - 1.0)) + data["sigma_order"] = (sigma, sigma) + data["pi_order"] = (pi, pi) data.setdefault("standard_order", 0.0) for _, data in rc.nodes(data=True): @@ -539,6 +772,11 @@ def _default_tg(a: Dict[str, Any]) -> Tuple[Tuple[Any, ...], Tuple[Any, ...]]: for rc_n, host_n in m.items(): if its.has_node(host_n): SynReactor._node_glue(its.nodes[host_n], rc.nodes[rc_n]) + if electron_aware: + SynReactor._pair_electron_aware_node_attrs( + its.nodes[host_n], + rc.nodes[rc_n], + ) # merge edges (additive order) --------------------------- for u, v, rc_attr in rc.edges(data=True): @@ -553,17 +791,423 @@ def _default_tg(a: Dict[str, Any]) -> Tuple[Tuple[Any, ...], Tuple[Any, ...]]: if rc_order[0] == 0: # additive only on product side ho = host_attr["order"] host_attr["order"] = (ho[0], round(ho[1] + rc_order[1])) + if electron_aware: + host_sigma = host_attr.get("sigma_order", (0.0, 0.0)) + host_pi = host_attr.get("pi_order", (0.0, 0.0)) + rc_sigma = rc_attr.get("sigma_order", (0.0, 0.0)) + rc_pi = rc_attr.get("pi_order", (0.0, 0.0)) + host_attr["sigma_order"] = ( + host_sigma[0], + host_sigma[1] + rc_sigma[1], + ) + host_attr["pi_order"] = ( + host_pi[0], + host_pi[1] + rc_pi[1], + ) host_attr["standard_order"] += rc_attr.get( "standard_order", 0.0 ) else: host_attr.update(rc_attr) + if electron_aware: + SynReactor._refresh_product_electron_fields(its) + its.graph["electron_aware_rewrite"] = electron_aware list_its.append(its) return list_its + @staticmethod + def _is_electron_aware_template(rc: nx.Graph) -> bool: + """Return whether an RC carries sigma/pi rewrite state.""" + return any( + "sigma_order" in data and "pi_order" in data + for _, _, data in rc.edges(data=True) + ) + + @staticmethod + def _automorphism_node_attrs( + pattern: nx.Graph, + node_attrs: List[str], + ) -> List[str]: + """Keep pruning at least as role-aware as the stored template data.""" + attrs = list(node_attrs) + for attr in ("aromatic", "neighbors", "_rewrite_role"): + if attr not in attrs and any( + attr in data for _, data in pattern.nodes(data=True) + ): + attrs.append(attr) + return attrs + + def _automorphism_pattern_graph(self, pattern: nx.Graph) -> nx.Graph: + """Decorate tuple patterns with product-side rewrite roles for pruning.""" + if getattr(self.rule, "_format", None) != "tuple": + return pattern + + decorated = pattern.copy() + rc = self.rule.rc.raw + for node, attrs in decorated.nodes(data=True): + rc_attrs = rc.nodes.get(node) + if not rc_attrs: + continue + types = rc_attrs.get("typesGH") + if isinstance(types, tuple) and len(types) == 2: + attrs["_rewrite_role"] = self._chemical_rewrite_role(types[1]) + return decorated + + @staticmethod + def _chemical_rewrite_role(role: Any) -> Any: + """Drop provenance-only atom-map identity from tuple rewrite roles.""" + if isinstance(role, tuple) and len(role) >= 9: + chemical_role = role[:-1] + if chemical_role[0] == "H": + return chemical_role[:-1] + ((),) + return chemical_role + return role + + @staticmethod + def _prepare_its_for_structural_cluster(its: nx.Graph) -> nx.Graph: + """Attach one combined edge signature for exact ITS clustering.""" + prepared = deepcopy(its) + if prepared.graph.get("electron_aware_rewrite", False): + SynReactor._refresh_product_electron_fields(prepared) + aromatic_nodes = { + node + for u, v, attrs in prepared.edges(data=True) + if attrs.get("order") == (1.5, 1.5) + for node in (u, v) + } + for node in aromatic_nodes: + template_charge = prepared.nodes[node].get("template_charge") + if isinstance(template_charge, tuple) and len(template_charge) == 2: + prepared.nodes[node]["charge"] = template_charge + for _, _, attrs in prepared.edges(data=True): + edge_values = [] + aromatic_unchanged = attrs.get("order") == (1.5, 1.5) + for name in ITS_STRUCTURAL_EDGE_ATTRS: + value = attrs.get(name) + if aromatic_unchanged and name in { + "kekule_order", + "sigma_order", + "pi_order", + }: + value = "aromatic_phase" + edge_values.append(value) + attrs["_its_edge_sig"] = tuple(edge_values) + return prepared + + @staticmethod + def _deduplicate_structural_its(its_graphs: List[nx.Graph]) -> List[nx.Graph]: + """Keep one representative for each structurally unique ITS graph.""" + if len(its_graphs) < 2: + return its_graphs + + hasher = WLHash( + node=ITS_STRUCTURAL_NODE_ATTRS, + edge="_its_edge_sig", + ) + buckets: Dict[str, List[Tuple[int, nx.Graph]]] = defaultdict(list) + for index, its in enumerate(its_graphs): + prepared = SynReactor._prepare_its_for_structural_cluster(its) + signature = hasher.weisfeiler_lehman_graph_hash(prepared) + buckets[signature].append((index, prepared)) + + cluster = GraphCluster( + node_label_names=ITS_STRUCTURAL_NODE_ATTRS, + node_label_default=["*", False, 0, 0, 0, 0, 0, ()], + edge_attribute="_its_edge_sig", + ) + representative_indices: List[int] = [] + for bucket in buckets.values(): + if len(bucket) == 1: + representative_indices.append(bucket[0][0]) + continue + prepared = [prepared for _, prepared in bucket] + classes, _ = cluster.iterative_cluster(prepared) + for cls in classes: + representative_indices.append(bucket[min(cls)][0]) + + representative_indices.sort() + return [its_graphs[index] for index in representative_indices] + + def _deduplicate_equivalent_free_components( + self, + mappings: List[MappingDict], + pattern: nx.Graph, + anchor: Optional[frozenset[NodeId]], + node_attrs: List[str], + edge_attrs: List[str], + ) -> List[MappingDict]: + """Collapse swaps of equivalent disconnected non-anchor tuple components.""" + if getattr(self.rule, "_format", None) != "tuple" or len(mappings) < 2: + return mappings + + anchor = anchor or frozenset() + free_components = [ + frozenset(component) + for component in nx.connected_components(pattern) + if not set(component) & set(anchor) + ] + if len(free_components) < 2: + return mappings + + component_groups: List[List[frozenset[NodeId]]] = [] + for component in free_components: + for group in component_groups: + if self._components_are_equivalent( + pattern, + component, + group[0], + node_attrs, + edge_attrs, + ): + group.append(component) + break + else: + component_groups.append([component]) + + swappable_groups = [group for group in component_groups if len(group) > 1] + if not swappable_groups: + return mappings + + swappable_nodes = set().union( + *[set().union(*group) for group in swappable_groups] + ) + seen = set() + unique: List[MappingDict] = [] + for mapping in mappings: + fixed = tuple( + sorted( + (node, host) + for node, host in mapping.items() + if node not in swappable_nodes + ) + ) + component_bags = tuple( + tuple( + sorted( + tuple(sorted(mapping[node] for node in component)) + for component in group + ) + ) + for group in swappable_groups + ) + signature = (fixed, component_bags) + if signature in seen: + continue + seen.add(signature) + unique.append(mapping) + return unique + + @staticmethod + def _components_are_equivalent( + pattern: nx.Graph, + left: frozenset[NodeId], + right: frozenset[NodeId], + node_attrs: List[str], + edge_attrs: List[str], + ) -> bool: + """Return whether two disconnected pattern components have one role shape.""" + left_graph = pattern.subgraph(left) + right_graph = pattern.subgraph(right) + node_defaults = [0 if attr == "charge" else "*" for attr in node_attrs] + edge_defaults = [1.0 for _ in edge_attrs] + matcher = GraphMatcher( + left_graph, + right_graph, + node_match=categorical_node_match(node_attrs, node_defaults), + edge_match=categorical_edge_match(edge_attrs, edge_defaults), + ) + return matcher.is_isomorphic() + + @staticmethod + def _pair_electron_aware_node_attrs( + host_n: Dict[str, Any], + rc_n: Dict[str, Any], + ) -> None: + """Store direct paired node attrs after legacy-compatible node glue.""" + _, product_types = host_n["typesGH"] + rc_present = rc_n.get("present") + reactant_is_absent = ( + isinstance(rc_present, tuple) and len(rc_present) == 2 and not rc_present[0] + ) + legacy_product_values = { + "element": product_types[0], + "aromatic": product_types[1], + "hcount": product_types[2], + "neighbors": product_types[4], + } + + for key, product_value in legacy_product_values.items(): + left_value = host_n.get(key) + product_is_absent = ( + isinstance(rc_present, tuple) + and len(rc_present) == 2 + and not rc_present[1] + ) + rc_value = rc_n.get(key) + if ( + reactant_is_absent + and isinstance(rc_value, tuple) + and len(rc_value) == 2 + ): + product_value = rc_value[1] + if key == "element" and product_value == "*" and not product_is_absent: + product_value = left_value + host_n[key] = (left_value, product_value) + + for key in ("radical", "lone_pairs", "valence_electrons"): + rc_value = rc_n.get(key) + if isinstance(rc_value, tuple) and len(rc_value) == 2: + left_value = host_n.get(key) + if left_value is None: + left_value = rc_value[0] + host_n[key] = (left_value, rc_value[1]) + + host_n["template_charge"] = (host_n.get("charge"), product_types[3]) + + # Electron-authoritative RCs derive charge at the product boundary. + # Keep the reactant-side value temporarily so mutation does not copy + # the RC's product charge label. + host_n["charge"] = (host_n.get("charge"), host_n.get("charge")) + + if "atom_map" in host_n: + host_n["atom_map"] = (host_n["atom_map"], host_n["atom_map"]) + if isinstance(rc_present, tuple) and len(rc_present) == 2: + host_n["present"] = (bool(host_n.get("present", True)), rc_present[1]) + else: + host_n["present"] = (True, True) + + @staticmethod + def _ensure_host_atom_maps(host: nx.Graph) -> None: + """Assign stable fresh atom maps to unmapped host atoms.""" + for node, attrs in host.nodes(data=True): + if attrs.get("atom_map") in (None, 0): + attrs["atom_map"] = node + + @staticmethod + def _refresh_product_electron_fields( + its: nx.Graph, + ) -> None: + """Refresh product-side electron fields from the scalar product graph.""" + product = SynReactor._prepared_electron_product_graph(its) + refreshed = refresh_electron_fields(product) + for node, attrs in refreshed.nodes(data=True): + current_charge = its.nodes[node].get("charge") + left_charge = ( + current_charge[0] + if isinstance(current_charge, tuple) and len(current_charge) == 2 + else current_charge + ) + product_charge = SynReactor._electron_product_charge(its, node, attrs) + if product_charge is not None: + its.nodes[node]["charge"] = (left_charge, product_charge) + + template_charge = its.nodes[node].get("template_charge") + if isinstance(template_charge, tuple) and len(template_charge) == 2: + attrs["charge_mismatch"] = template_charge[1] != attrs.get( + "recomputed_charge" + ) + + for key in ("bond_order_sum", "recomputed_charge", "charge_mismatch"): + if key in attrs: + current = its.nodes[node].get(key) + left_value = ( + current[0] + if isinstance(current, tuple) and len(current) == 2 + else current + ) + its.nodes[node][key] = (left_value, attrs[key]) + for u, v, attrs in refreshed.edges(data=True): + for key in ("kekule_order", "sigma_order", "pi_order"): + if key not in attrs: + continue + current = its.edges[u, v].get(key) + left_value = ( + current[0] + if isinstance(current, tuple) and len(current) == 2 + else current + ) + its.edges[u, v][key] = (left_value, attrs[key]) + + @staticmethod + def _prepared_electron_product_graph(its: nx.Graph) -> nx.Graph: + """Build the scalar product graph used for electron recomputation.""" + product = ITSReverter(its).to_product_graph() + preserved_hydrogens = _get_preserved_hydrogen_maps(its, "tuple") + product = implicit_hydrogen(product, set(preserved_hydrogens)) + return SynReactor._reperceive_product_kekule_phase(product, its) + + @staticmethod + def _electron_product_charge( + its: nx.Graph, + node: Any, + product_attrs: Mapping[str, Any], + ) -> Any: + """Choose the product charge used for electron-aware serialization. + + Non-aromatic tuple products are electron-authoritative and use the + recomputed formal charge. Aromatic tuple products are still an open + representation boundary: if the template explicitly carries a product + charge, preserve it instead of inventing cationic aromatic carbons from + an incomplete Kekule phase. + """ + if node in its: + template_charge = its.nodes[node].get("template_charge") + aromatic = product_attrs.get("aromatic", its.nodes[node].get("aromatic")) + if ( + aromatic is True + and isinstance(template_charge, tuple) + and len(template_charge) == 2 + ): + return template_charge[1] + return product_attrs.get("recomputed_charge") + + @staticmethod + def _reperceive_product_kekule_phase(product: nx.Graph, its: nx.Graph) -> nx.Graph: + """Refresh aromatic sigma/pi phase from full product presentation bonds.""" + if not any(data.get("order") == 1.5 for _, _, data in product.edges(data=True)): + return product + + probe = product.copy() + for node, attrs in probe.nodes(data=True): + template_charge = its.nodes[node].get("template_charge") + if isinstance(template_charge, tuple) and len(template_charge) == 2: + attrs["charge"] = template_charge[1] + + try: + mol = GraphToMol(edge_attributes={"order": "order"}).graph_to_mol( + probe, + sanitize=True, + use_h_count=True, + ) + reperceived = MolToGraph(attr_profile="minimal").transform( + mol, + use_index_as_atom_map=True, + ) + except Exception: + return product + + refreshed = product.copy() + for u, v in refreshed.edges(): + if not reperceived.has_edge(u, v): + continue + for key in ("kekule_order", "sigma_order", "pi_order"): + if key in reperceived[u][v]: + refreshed[u][v][key] = reperceived[u][v][key] + return refreshed + + @staticmethod + def _product_graph_for_diagnostics(its: nx.Graph) -> nx.Graph: + """Return the product graph matching the rewrite representation.""" + if its.graph.get("electron_aware_rewrite", False): + return ITSReverter(its).to_product_graph() + return its_decompose(its)[1] + # --------------------- explicit‑H handling ------------------------- @staticmethod def _explicit_h(rc: nx.Graph) -> nx.Graph: + if bool(rc.graph.get("electron_aware_rewrite", False)): + return SynReactor._explicit_h_tuple(rc) + next_id = max((n for n in rc.nodes if isinstance(n, int)), default=-1) + 1 orig_delta: Dict[int, int] = {} pair_to_nodes: Dict[int, List[int]] = defaultdict(list) @@ -625,14 +1269,161 @@ def _explicit_h(rc: nx.Graph) -> nx.Graph: ) return rc + @staticmethod + def _explicit_h_tuple(rc: nx.Graph) -> nx.Graph: + """Materialize only hydrogens that were explicit in the template.""" + next_id = max((n for n in rc.nodes if isinstance(n, int)), default=-1) + 1 + pair_left: Dict[int, int] = {} + pair_right: Dict[int, int] = {} + for n, data in rc.nodes(data=True): + for pair_id in data.get("h_pairs_left", []): + pair_left[pair_id] = n + for pair_id in data.get("h_pairs_right", []): + pair_right[pair_id] = n + + explicit_pairs = sorted(set(pair_left) & set(pair_right)) + used_maps = { + value + for _, data in rc.nodes(data=True) + for atom_map in [data.get("atom_map")] + for value in ( + atom_map + if isinstance(atom_map, tuple) + else (() if atom_map in (None, 0) else (atom_map,)) + ) + } + for pair_id in explicit_pairs: + src = pair_left[pair_id] + dst = pair_right[pair_id] + h = next_id + next_id += 1 + preferred_map = rc.nodes[src].get("h_pair_atom_maps", {}).get( + pair_id + ) or rc.nodes[dst].get("h_pair_atom_maps", {}).get(pair_id) + atom_map = preferred_map if preferred_map not in used_maps else h + while atom_map in used_maps: + atom_map += 1 + used_maps.add(atom_map) + rc.add_node( + h, + element=("H", "H"), + aromatic=(False, False), + charge=(0, 0), + atom_map=(atom_map, atom_map), + hcount=(0, 0), + radical=(0, 0), + lone_pairs=(0, 0), + valence_electrons=(1, 1), + neighbors=([], []), + present=(True, True), + typesGH=(("H", False, 0, 0, []), ("H", False, 0, 0, [])), + ) + if src == dst: + rc.add_edge( + src, + h, + order=(1.0, 1.0), + kekule_order=(1.0, 1.0), + sigma_order=(1.0, 1.0), + pi_order=(0.0, 0.0), + standard_order=0.0, + ) + continue + rc.add_edge( + src, + h, + order=(1.0, 0.0), + kekule_order=(1.0, 0.0), + sigma_order=(1.0, 0.0), + pi_order=(0.0, 0.0), + standard_order=1.0, + ) + rc.add_edge( + h, + dst, + order=(0.0, 1.0), + kekule_order=(0.0, 1.0), + sigma_order=(0.0, 1.0), + pi_order=(0.0, 0.0), + standard_order=-1.0, + ) + + for pair_id in explicit_pairs: + src = pair_left[pair_id] + dst = pair_right[pair_id] + if src == dst: + h0, h1 = rc.nodes[src]["hcount"] + rc.nodes[src]["hcount"] = (h0 - 1, h1 - 1) + continue + src_h0, src_h1 = rc.nodes[src]["hcount"] + dst_h0, dst_h1 = rc.nodes[dst]["hcount"] + rc.nodes[src]["hcount"] = (src_h0 - 1, src_h1) + rc.nodes[dst]["hcount"] = (dst_h0, dst_h1 - 1) + + for n in set(pair_left.values()) | set(pair_right.values()): + if "typesGH" in rc.nodes[n]: + t0, t1 = rc.nodes[n]["typesGH"] + rc.nodes[n]["typesGH"] = ( + t0[:2] + (rc.nodes[n]["hcount"][0],) + t0[3:], + t1[:2] + (rc.nodes[n]["hcount"][1],) + t1[3:], + ) + + SynReactor._ensure_tuple_atom_maps(rc) + return rc + + @staticmethod + def _ensure_tuple_atom_maps(graph: nx.Graph) -> None: + """Assign stable paired atom maps to tuple nodes lacking visible maps.""" + for node, attrs in graph.nodes(data=True): + atom_map = attrs.get("atom_map") + if atom_map in (None, 0) or atom_map == (0, 0): + attrs["atom_map"] = (node, node) + # --------------------- SMARTS serialisation ----------------------- @staticmethod def _to_smarts(its: nx.Graph) -> str: - left, right = its_decompose(its) + electron_aware = bool(its.graph.get("electron_aware_rewrite", False)) + if electron_aware: + reverter = ITSReverter(its) + left = reverter.to_reactant_graph() + right = reverter.to_product_graph() + preserved_hydrogens = _get_preserved_hydrogen_maps(its, "tuple") + else: + left, right = its_decompose(its) + preserved_hydrogens = [] left = remove_wildcard_nodes(left) right = remove_wildcard_nodes(right) - r_smi = graph_to_smi(left) - p_smi = graph_to_smi(right) + r_smi = graph_to_smi(left, preserve_atom_maps=preserved_hydrogens) + if electron_aware: + product_candidates = [ + right, + SynReactor._prepared_electron_product_graph(its), + ] + p_smi = None + for product in product_candidates: + product = refresh_electron_fields(product) + for node, attrs in product.nodes(data=True): + product_charge = SynReactor._electron_product_charge( + its, + node, + attrs, + ) + if product_charge is not None: + attrs["charge"] = product_charge + if any( + attrs.get("order") == 1.5 + for _, _, attrs in product.edges(data=True) + ): + p_smi = graph_to_smi(product) + if p_smi is not None: + break + try: + p_smi = Chem.MolToSmiles(graph_to_sanitized_kekule_mol(product)) + break + except Exception: + p_smi = None + else: + p_smi = graph_to_smi(right) if r_smi is None or p_smi is None: return None return f"{r_smi}>>{p_smi}" diff --git a/synkit/Vis/__init__.py b/synkit/Vis/__init__.py index aa5119a..9bae5ca 100644 --- a/synkit/Vis/__init__.py +++ b/synkit/Vis/__init__.py @@ -1,5 +1,50 @@ from .graph_visualizer import GraphVisualizer from .rule_vis import RuleVis from .rxn_vis import RXNVis +from .visual_model import ( + VisualEdge, + VisualGraph, + VisualKind, + VisualNode, + detect_visual_kind, + iter_changed_edges, + iter_changed_nodes, + summarize_visual_graph, + to_visual_graph, +) +from .visual_drawer import draw_graph +from .molecule_drawer import draw_molecule_graph +from .reaction_drawer import ( + ReactionHighlights, + draw_reaction_graph, + draw_reaction_graphs, + find_reaction_highlights, +) +from .its_drawer import draw_its_from_rsmi, draw_its_graph, draw_its_only +from .mtg_drawer import draw_mtg_graph, draw_mtg_steps -__all__ = ["GraphVisualizer", "RuleVis", "RXNVis"] +__all__ = [ + "GraphVisualizer", + "RuleVis", + "RXNVis", + "VisualEdge", + "VisualGraph", + "VisualKind", + "VisualNode", + "detect_visual_kind", + "iter_changed_edges", + "iter_changed_nodes", + "summarize_visual_graph", + "to_visual_graph", + "draw_graph", + "draw_molecule_graph", + "ReactionHighlights", + "draw_reaction_graph", + "draw_reaction_graphs", + "find_reaction_highlights", + "draw_its_from_rsmi", + "draw_its_graph", + "draw_its_only", + "draw_mtg_graph", + "draw_mtg_steps", +] diff --git a/synkit/Vis/graph_visualizer.py b/synkit/Vis/graph_visualizer.py index 4030548..6c6bed6 100644 --- a/synkit/Vis/graph_visualizer.py +++ b/synkit/Vis/graph_visualizer.py @@ -232,7 +232,7 @@ def plot_as_mol( for n, d in g.nodes(data=True): charge = d.get("charge", 0) cstr = "" if charge == 0 else f"{charge:+}" - lbl = f"{d.get(symbol_key,'')}{cstr}" + lbl = f"{d.get(symbol_key, '')}{cstr}" if show_atom_map: lbl += f" ({d.get(aam_key)})" labels[n] = lbl diff --git a/synkit/Vis/its_drawer.py b/synkit/Vis/its_drawer.py new file mode 100644 index 0000000..bc10b9c --- /dev/null +++ b/synkit/Vis/its_drawer.py @@ -0,0 +1,615 @@ +from __future__ import annotations + +"""ITS visualization. + +The default ITS view is a single molecule-like transition graph. Reactant / +product molecular projections remain available through ``projection=True`` for +debugging and comparison. +""" + +from typing import Any, Optional, Tuple + +import matplotlib.patheffects as pe +import matplotlib.pyplot as plt +import networkx as nx + +from synkit.Graph.ITS.its_decompose import its_decompose +from synkit.Graph.ITS.its_reverter import ITSReverter +from synkit.IO.chem_converter import rsmi_to_its +from synkit.Vis.molecule_drawer import ( + _draw_aromatic_circles, + _draw_bond_lines, + _edge_is_aromatic, + _element_colors, + _element_label, + _index_offset_vec, + _layout_positions, + _luminance, + _set_padded_limits, +) +from synkit.Vis.reaction_drawer import draw_reaction_graphs, find_reaction_highlights +from synkit.Vis.visual_drawer import draw_graph + + +def draw_its_graph( + its: nx.Graph, + *, + title: Optional[str] = None, + mode: str = "sigma_pi", + show_atom_map: bool = True, + label_mode: str = "hetero", + aromatic_style: str = "circle", + include_delta_panel: bool = True, + projection: bool = False, + show_edge_labels: bool = False, + edge_label_mode: str = "kekule", + show_electron_labels: bool = False, + electron_label_mode: str = "charge", +) -> tuple[plt.Figure, list[plt.Axes]]: + """Draw an ITS graph. + + By default this draws only the ITS as a molecule-like graph. Changed bonds + are colored and compactly labeled from ``kekule_order``. Optional node + electron labels can show one of charge, lone-pair, radical, or all changes. + Set ``projection=True`` to draw reactant/product molecular projections plus + a diagnostic ITS panel. + + :param its: ITS graph in tuple or legacy representation. + :type its: nx.Graph + :param title: Optional figure title. + :type title: Optional[str] + :param mode: Diagnostic label mode for the projection-mode delta panel. + :type mode: str + :param show_atom_map: Show atom-map labels. + :type show_atom_map: bool + :param label_mode: Atom label mode. + :type label_mode: str + :param aromatic_style: Aromatic style for molecular panels. + :type aromatic_style: str + :param include_delta_panel: In projection mode, include a diagnostic ITS + graph panel. + :type include_delta_panel: bool + :param projection: If ``True``, draw reactant/product molecular projection + panels plus an ITS delta panel. If ``False``, draw only the ITS graph. + :type projection: bool + :param show_edge_labels: If ``True``, show labels for unchanged edges too. + Changed edge labels are shown by default unless ``edge_label_mode`` is + ``"none"``. + :type show_edge_labels: bool + :param edge_label_mode: ``"kekule"``, ``"sigma_pi"``, or ``"none"``. + :type edge_label_mode: str + :param show_electron_labels: Show changed atom electron annotations. + :type show_electron_labels: bool + :param electron_label_mode: ``"charge"``, ``"lone_pair"``, ``"radical"``, + or ``"all"``. + :type electron_label_mode: str + :returns: ``(fig, axes)``. + :rtype: tuple[plt.Figure, list[plt.Axes]] + """ + + if not projection: + fig, ax = plt.subplots(figsize=(7.0, 5.0), facecolor="white") + draw_its_only( + its, + ax=ax, + title=title or "ITS", + show_atom_map=show_atom_map, + label_mode=label_mode, + aromatic_style=aromatic_style, + show_edge_labels=show_edge_labels, + edge_label_mode=edge_label_mode, + show_electron_labels=show_electron_labels, + electron_label_mode=electron_label_mode, + ) + fig.tight_layout() + return fig, [ax] + + reactant, product = _its_to_side_graphs(its) + if not include_delta_panel: + return draw_reaction_graphs( + reactant, + product, + title=title or "ITS projections", + show_atom_map=show_atom_map, + highlight_reaction_center=True, + label_mode=label_mode, + aromatic_style=aromatic_style, + ) + + n_reaction_axes = ( + nx.number_connected_components(reactant) + + nx.number_connected_components(product) + + 1 + ) + fig = plt.figure( + figsize=(max(10.0, 3.1 * (n_reaction_axes + 1)), 3.7), + facecolor="white", + ) + grid = fig.add_gridspec( + 1, + n_reaction_axes + 1, + width_ratios=[1.0] * n_reaction_axes + [1.35], + ) + axes = [fig.add_subplot(grid[0, index]) for index in range(n_reaction_axes + 1)] + + # Draw panels directly here so the diagnostic ITS delta can share one + # figure with the molecular projections. + from synkit.Vis.reaction_drawer import _components, _draw_arrow, _draw_part + + highlights = find_reaction_highlights(reactant, product) + panel = 0 + for index, part in enumerate(_components(reactant)): + _draw_part( + part, + axes[panel], + title="Reactant" if index == 0 else "+", + highlights=highlights, + side="reactant", + show_atom_map=show_atom_map, + label_mode=label_mode, + aromatic_style=aromatic_style, + ) + panel += 1 + _draw_arrow(axes[panel]) + panel += 1 + for index, part in enumerate(_components(product)): + _draw_part( + part, + axes[panel], + title="Product" if index == 0 else "+", + highlights=highlights, + side="product", + show_atom_map=show_atom_map, + label_mode=label_mode, + aromatic_style=aromatic_style, + ) + panel += 1 + draw_graph( + its, + ax=axes[-1], + mode=mode, + title="ITS delta", + show_atom_map=show_atom_map, + layout="kamada_kawai", + ) + if title: + fig.suptitle(title, fontsize=12, fontweight="bold", y=0.98) + fig.tight_layout() + return fig, axes + + +def draw_its_from_rsmi( + rsmi: str, + *, + format: str = "tuple", + core: bool = False, + title: Optional[str] = None, + mode: str = "sigma_pi", + show_atom_map: bool = True, + label_mode: str = "hetero", + aromatic_style: str = "circle", + include_delta_panel: bool = True, + projection: bool = False, + show_edge_labels: bool = False, + edge_label_mode: str = "kekule", + show_electron_labels: bool = False, + electron_label_mode: str = "charge", +) -> tuple[plt.Figure, list[plt.Axes]]: + """Build an ITS from RSMI and draw it.""" + + its = rsmi_to_its(rsmi, core=core, format=format) + return draw_its_graph( + its, + title=title or "ITS from RSMI", + mode=mode, + show_atom_map=show_atom_map, + label_mode=label_mode, + aromatic_style=aromatic_style, + include_delta_panel=include_delta_panel, + projection=projection, + show_edge_labels=show_edge_labels, + edge_label_mode=edge_label_mode, + show_electron_labels=show_electron_labels, + electron_label_mode=electron_label_mode, + ) + + +def draw_its_only( # noqa: C901 + its: nx.Graph, + *, + ax: Optional[plt.Axes] = None, + title: Optional[str] = None, + show_atom_map: bool = True, + label_mode: str = "hetero", + aromatic_style: str = "circle", + show_edge_labels: bool = False, + edge_label_mode: str = "kekule", + show_electron_labels: bool = False, + electron_label_mode: str = "charge", +) -> plt.Axes: + """Draw a molecule-like ITS transition graph on one axes.""" + + edge_label_mode = edge_label_mode.lower() + if edge_label_mode not in {"none", "kekule", "sigma_pi"}: + raise ValueError("edge_label_mode must be one of: none, kekule, sigma_pi") + electron_label_mode = electron_label_mode.lower() + if electron_label_mode not in {"charge", "lone_pair", "radical", "all"}: + raise ValueError( + "electron_label_mode must be one of: charge, lone_pair, radical, all" + ) + + display = _its_display_graph(its) + fig = None + if ax is None: + fig, ax = plt.subplots(figsize=(7.0, 5.0), facecolor="white") + else: + fig = ax.figure + ax.clear() + ax.set_facecolor("white") + ax.set_axis_off() + ax.set_aspect("equal") + + nodes = list(display.nodes()) + pos = _layout_positions(display, nodes, use_h_count=False) + avg_len = _avg_edge_length(pos, display) + bond_offset = avg_len * 0.09 + atom_map_offset = avg_len * 0.18 + n_nodes = max(1, len(nodes)) + node_size = max(210, min(560, 5200 // n_nodes)) + bond_width = max(1.5, min(2.8, 26 / n_nodes)) + element_font_size = max(7, min(12, 100 // n_nodes)) + atom_map_font_size = max(7, element_font_size) + + for u, v, attrs in display.edges(data=True): + p1, p2 = pos[u], pos[v] + state = attrs.get("its_state", "unchanged") + order = attrs.get("display_order", 1.0) + aromatic = bool(attrs.get("display_aromatic", False)) + color = _state_color(state) + if state in {"formed", "broken"}: + ax.plot( + [p1[0], p2[0]], + [p1[1], p2[1]], + color=color, + linewidth=bond_width * 3.6, + alpha=0.18, + solid_capstyle="round", + zorder=1, + ) + _draw_bond_lines( + ax, + p1, + p2, + order=max(1, int(round(order))), + aromatic=aromatic, + aromatic_style=aromatic_style, + offset=bond_offset, + lw=bond_width if state == "unchanged" else bond_width * 1.25, + color=color, + ) + if state in {"formed", "broken"}: + line_style = (0, (3, 3)) + ax.plot( + [p1[0], p2[0]], + [p1[1], p2[1]], + color=color, + linewidth=bond_width * 1.6, + linestyle=line_style, + alpha=0.95, + solid_capstyle="round", + zorder=3, + ) + edge_label = attrs.get(f"its_label_{edge_label_mode}", "") + if ( + edge_label_mode != "none" + and (show_edge_labels or state != "unchanged") + and edge_label + ): + ax.text( + (p1[0] + p2[0]) / 2, + (p1[1] + p2[1]) / 2, + edge_label, + fontsize=7, + ha="center", + va="center", + color="#111827", + bbox={ + "boxstyle": "round,pad=0.12", + "fc": "white", + "ec": "none", + "alpha": 0.9, + }, + zorder=9, + ) + + if aromatic_style == "circle": + _draw_aromatic_circles(ax, display, pos, scale=0.52) + + node_colors = [] + node_borders = [] + for node in nodes: + fill, border = _element_colors(str(display.nodes[node].get("element", "C"))) + if display.nodes[node].get("its_changed", False): + border = "#f97316" + node_colors.append(fill) + node_borders.append(border) + + node_artist = nx.draw_networkx_nodes( + display, + pos, + nodelist=nodes, + node_color=node_colors, + edgecolors=node_borders, + linewidths=[ + ( + max(2.2, node_size**0.5 * 0.1) + if display.nodes[node].get("its_changed", False) + else max(1.0, node_size**0.5 * 0.065) + ) + for node in nodes + ], + node_size=node_size, + ax=ax, + ) + node_artist.set_zorder(4) + + for node in nodes: + attrs = display.nodes[node] + text = _element_label(attrs, label_mode=label_mode) + if text: + x, y = pos[node] + fill, _ = _element_colors(str(attrs.get("element", "C"))) + ax.text( + x, + y, + text, + ha="center", + va="center", + fontsize=element_font_size, + fontweight="bold", + color="white" if _luminance(fill) < 0.5 else "#1f2937", + zorder=10, + ) + if show_atom_map: + atom_map = attrs.get("atom_map", node) + if atom_map in (None, 0): + atom_map = node + x, y = pos[node] + dx, dy = _index_offset_vec(node, display, pos, base=atom_map_offset) + ax.text( + x + dx, + y + dy, + str(atom_map), + ha="center", + va="center", + fontsize=atom_map_font_size, + fontweight="bold", + color="#111827", + path_effects=[pe.withStroke(linewidth=2.5, foreground="white")], + zorder=11, + ) + if show_electron_labels: + electron_label = attrs.get(f"its_electron_label_{electron_label_mode}", "") + if electron_label: + x, y = pos[node] + _, dy = _index_offset_vec( + node, display, pos, base=atom_map_offset * 2.35 + ) + ax.text( + x, + y - abs(dy), + electron_label, + ha="center", + va="center", + fontsize=max(7, element_font_size - 1), + color="#374151", + bbox={ + "boxstyle": "round,pad=0.16", + "fc": "white", + "ec": "#cbd5e1", + "alpha": 0.92, + }, + zorder=12, + ) + + if title: + ax.set_title(title, fontsize=12, fontweight="bold", pad=8) + _set_padded_limits(ax, pos, avg_len) + if fig is not None: + fig.tight_layout() + return ax + + +def _its_to_side_graphs(its: nx.Graph) -> Tuple[nx.Graph, nx.Graph]: + if _has_direct_tuple_attrs(its): + reverter = ITSReverter(its) + return ( + reverter.to_reactant_graph(recompute_neighbors=True), + reverter.to_product_graph(recompute_neighbors=True), + ) + return its_decompose(its) + + +def _its_display_graph(its: nx.Graph) -> nx.Graph: + reactant, product = _its_to_side_graphs(its) + display = nx.compose(reactant, product) + for node in display.nodes: + display.nodes[node]["its_changed"] = False + electron_labels = _electron_node_labels( + reactant.nodes[node] if node in reactant else {}, + product.nodes[node] if node in product else {}, + ) + for key, label in electron_labels.items(): + display.nodes[node][f"its_electron_label_{key}"] = label + for key in ("element", "charge", "hcount", "radical", "lone_pairs"): + r_value = reactant.nodes[node].get(key) if node in reactant else None + p_value = product.nodes[node].get(key) if node in product else None + if r_value != p_value: + display.nodes[node]["its_changed"] = True + break + + for u, v in display.edges(): + r_data = reactant.get_edge_data(u, v) + p_data = product.get_edge_data(u, v) + r_order = _edge_order_value(r_data) + p_order = _edge_order_value(p_data) + state = _edge_state(r_order, p_order) + display.edges[u, v]["its_state"] = state + display.edges[u, v]["display_order"] = max(r_order, p_order, 1.0) + display.edges[u, v]["display_aromatic"] = _is_display_aromatic(r_data, p_data) + display.edges[u, v]["order"] = display.edges[u, v]["display_order"] + display.edges[u, v][ + "its_label_kekule" + ] = f"{_fmt_order(r_order)}→{_fmt_order(p_order)}" + display.edges[u, v]["its_label_sigma_pi"] = _sigma_pi_label(r_data, p_data) + if state != "unchanged": + display.nodes[u]["its_changed"] = True + display.nodes[v]["its_changed"] = True + return display + + +def _edge_order_value(attrs: Optional[dict[str, Any]]) -> float: + if not attrs: + return 0.0 + value = attrs.get("kekule_order", attrs.get("order", 1.0)) + try: + return float(value) + except (TypeError, ValueError): + return 0.0 + + +def _edge_state(before: float, after: float) -> str: + if abs(before - after) < 1e-9: + return "unchanged" + if before == 0 and after > 0: + return "formed" + if before > 0 and after == 0: + return "broken" + return "order_changed" + + +def _is_display_aromatic( + reactant_attrs: Optional[dict[str, Any]], + product_attrs: Optional[dict[str, Any]], +) -> bool: + return any( + attrs is not None and _edge_is_aromatic(attrs) + for attrs in (reactant_attrs, product_attrs) + ) + + +def _state_color(state: str) -> str: + return { + "formed": "#15803d", + "broken": "#b91c1c", + "order_changed": "#ca8a04", + "unchanged": "#374151", + }.get(state, "#374151") + + +def _fmt_order(order: float) -> str: + if order == 0: + return "∅" + if float(order).is_integer(): + return str(int(order)) + return f"{order:g}" + + +def _sigma_pi_label( + reactant_attrs: Optional[dict[str, Any]], + product_attrs: Optional[dict[str, Any]], +) -> str: + r_sigma = _specific_order_value(reactant_attrs, "sigma_order") + p_sigma = _specific_order_value(product_attrs, "sigma_order") + r_pi = _specific_order_value(reactant_attrs, "pi_order") + p_pi = _specific_order_value(product_attrs, "pi_order") + parts = [] + if abs(r_sigma - p_sigma) > 1e-9: + parts.append(f"σ{_fmt_order(r_sigma)}→{_fmt_order(p_sigma)}") + if abs(r_pi - p_pi) > 1e-9: + parts.append(f"π{_fmt_order(r_pi)}→{_fmt_order(p_pi)}") + return " ".join(parts) + + +def _specific_order_value(attrs: Optional[dict[str, Any]], key: str) -> float: + if not attrs: + return 0.0 + value = attrs.get(key, 0.0) + if value is None: + return 0.0 + try: + return float(value) + except (TypeError, ValueError): + return 0.0 + + +def _electron_node_labels( + reactant_attrs: dict[str, Any], + product_attrs: dict[str, Any], +) -> dict[str, str]: + labels: dict[str, str] = {} + all_parts = [] + for key, mode, label in ( + ("charge", "charge", "q"), + ("lone_pairs", "lone_pair", "λ"), + ("radical", "radical", "rad"), + ): + before = reactant_attrs.get(key, 0) + after = product_attrs.get(key, 0) + if before != after: + formatter = _fmt_signed if key == "charge" else _fmt_count + text = f"{label}{formatter(before)}→{formatter(after)}" + labels[mode] = text + all_parts.append(text) + labels["all"] = " ".join(all_parts) + return labels + + +def _fmt_signed(value: Any) -> str: + try: + number = int(value) + except (TypeError, ValueError): + return str(value) + if number > 0: + return f"+{number}" + if number < 0: + return str(number) + return "0" + + +def _fmt_count(value: Any) -> str: + try: + number = int(value) + except (TypeError, ValueError): + return str(value) + return str(number) + + +def _avg_edge_length(pos: dict[Any, tuple[float, float]], graph: nx.Graph) -> float: + if graph.number_of_edges() == 0: + return 1.0 + lengths = [ + ((pos[v][0] - pos[u][0]) ** 2 + (pos[v][1] - pos[u][1]) ** 2) ** 0.5 + for u, v in graph.edges() + ] + return sum(lengths) / len(lengths) + + +def _has_direct_tuple_attrs(its: nx.Graph) -> bool: + node_keys = ("element", "hcount", "charge", "radical", "lone_pairs", "present") + edge_keys = ("kekule_order", "sigma_order", "pi_order") + for _, attrs in its.nodes(data=True): + if any(_is_plain_pair(attrs.get(key)) for key in node_keys): + return True + for _, _, attrs in its.edges(data=True): + if any(_is_plain_pair(attrs.get(key)) for key in edge_keys): + return True + return False + + +def _is_plain_pair(value: object) -> bool: + return ( + isinstance(value, tuple) + and len(value) == 2 + and not any(isinstance(item, (tuple, list, set, dict)) for item in value) + ) diff --git a/synkit/Vis/molecule_drawer.py b/synkit/Vis/molecule_drawer.py new file mode 100644 index 0000000..5245f51 --- /dev/null +++ b/synkit/Vis/molecule_drawer.py @@ -0,0 +1,565 @@ +from __future__ import annotations + +"""Chemistry-oriented molecular graph drawing. + +This module draws scalar molecular ``nx.Graph`` objects as molecule-like +figures. It is adapted from the copied ``vis_synedu`` renderer, but uses +SynKit's own graph-to-mol conversion and avoids relying on broken copied +relative imports. +""" + +import math +from typing import Any, Dict, Mapping, Optional, Set, Tuple + +import matplotlib.patches as mpatches +import matplotlib.patheffects as pe +import matplotlib.pyplot as plt +import networkx as nx +from rdkit import Chem +from rdkit.Chem import AllChem, Draw + +from synkit.IO.graph_to_mol import GraphToMol + +ELEMENT_PALETTE: Dict[str, Tuple[str, str]] = { + "C": ("#5f6368", "#3d4145"), + "H": ("#f8fafc", "#94a3b8"), + "O": ("#e8524a", "#b83830"), + "N": ("#5b8dd9", "#3a65b0"), + "S": ("#e8a838", "#b87909"), + "P": ("#e878c8", "#b84898"), + "F": ("#5bc8af", "#2a9178"), + "Cl": ("#3dbe6c", "#1e8a46"), + "Br": ("#a0522d", "#6b3118"), + "I": ("#8c54c8", "#5e2fa0"), + "B": ("#d6a77a", "#9a6a44"), + "Si": ("#f0c8a0", "#b88860"), +} + +DEFAULT_FILL = "#a0a0a0" +DEFAULT_BORDER = "#606060" + + +def draw_molecule_graph( # noqa: C901 + graph: nx.Graph, + *, + ax: Optional[plt.Axes] = None, + title: Optional[str] = None, + label_mode: str = "hetero", + show_atom_map: bool = False, + show_bond_order: bool = False, + aromatic_style: str = "circle", + include_rdkit_panel: bool = False, + use_h_count: bool = False, + node_size: Optional[int] = None, + bond_width: Optional[float] = None, + figsize: Tuple[float, float] = (6.0, 5.0), + highlight_nodes: Optional[Set[Any]] = None, + highlight_edges: Optional[Set[Tuple[Any, Any]]] = None, + highlight_color: str = "#f97316", + custom_node_colors: Optional[Mapping[Any, str]] = None, +) -> plt.Axes | tuple[plt.Figure, tuple[plt.Axes, plt.Axes]]: + """Draw a scalar molecular graph using RDKit coordinates when possible. + + :param graph: Molecular NetworkX graph with scalar ``element`` and + ``order`` attributes. + :type graph: nx.Graph + :param ax: Optional Matplotlib axes. + :type ax: Optional[plt.Axes] + :param title: Optional title. + :type title: Optional[str] + :param label_mode: ``"all"``, ``"hetero"``, or ``"none"``. + :type label_mode: str + :param show_atom_map: Show atom-map numbers near atoms. + :type show_atom_map: bool + :param show_bond_order: Show numeric bond order labels. + :type show_bond_order: bool + :param aromatic_style: ``"circle"`` or ``"dashed"``. + :type aromatic_style: str + :param include_rdkit_panel: Also show RDKit's own rendering side-by-side. + :type include_rdkit_panel: bool + :param use_h_count: Pass graph ``hcount`` to ``GraphToMol`` for layout. + :type use_h_count: bool + :returns: Axes, or ``(fig, (rdkit_ax, graph_ax))`` when + ``include_rdkit_panel=True``. + :rtype: Union[plt.Axes, Tuple[plt.Figure, Tuple[plt.Axes, plt.Axes]]] + """ + + label_mode = label_mode.lower() + aromatic_style = aromatic_style.lower() + if label_mode not in {"all", "hetero", "none"}: + raise ValueError("label_mode must be one of: all, hetero, none") + if aromatic_style not in {"circle", "dashed"}: + raise ValueError("aromatic_style must be one of: circle, dashed") + + graph_view = graph.copy() + nodes = list(graph_view.nodes()) + n_nodes = max(1, len(nodes)) + + if include_rdkit_panel: + fig, (ax_rdkit, ax_graph) = plt.subplots( + 1, 2, figsize=(figsize[0] * 2, figsize[1]), facecolor="white" + ) + elif ax is None: + fig, ax_graph = plt.subplots(figsize=figsize, facecolor="white") + ax_rdkit = None + else: + fig = ax.figure + ax_graph = ax + ax_rdkit = None + + ax_graph.clear() + ax_graph.set_facecolor("white") + ax_graph.set_axis_off() + ax_graph.set_aspect("equal") + + pos = _layout_positions(graph_view, nodes, use_h_count=use_h_count) + avg_len = _avg_edge_length(pos, graph_view) + bond_offset = avg_len * 0.09 + atom_map_offset = avg_len * 0.18 + scaled_node_size = ( + node_size if node_size is not None else max(180, min(560, 4600 // n_nodes)) + ) + scaled_bond_width = ( + bond_width if bond_width is not None else max(1.3, min(2.6, 24 / n_nodes)) + ) + element_font_size = max(7, min(12, 100 // n_nodes)) + atom_map_font_size = max(7, element_font_size) + + normalized_highlight_edges = _normalize_edge_set(highlight_edges) + + _draw_highlights( + ax_graph, + graph_view, + pos, + highlight_nodes=highlight_nodes, + highlight_edges=normalized_highlight_edges, + node_size=scaled_node_size, + bond_width=scaled_bond_width, + color=highlight_color, + ) + + for u, v, attrs in graph_view.edges(data=True): + p1, p2 = pos[u], pos[v] + aromatic = _edge_is_aromatic(attrs) + order = _edge_order(attrs, aromatic=aromatic) + _draw_bond_lines( + ax_graph, + p1, + p2, + order=order, + aromatic=aromatic, + aromatic_style=aromatic_style, + offset=bond_offset, + lw=scaled_bond_width, + color="#262a2f", + ) + if show_bond_order and not aromatic: + _draw_bond_order_label(ax_graph, p1, p2, order) + + if aromatic_style == "circle": + _draw_aromatic_circles(ax_graph, graph_view, pos, scale=0.52) + + node_fills = [] + node_borders = [] + for node in nodes: + element = str(graph_view.nodes[node].get("element", "C")) + fill, border = _element_colors(element) + if custom_node_colors and node in custom_node_colors: + fill = custom_node_colors[node] + border = fill + node_fills.append(fill) + node_borders.append(border) + + node_artist = nx.draw_networkx_nodes( + graph_view, + pos, + nodelist=nodes, + node_color=node_fills, + edgecolors=node_borders, + linewidths=max(1.0, scaled_node_size**0.5 * 0.065), + node_size=scaled_node_size, + ax=ax_graph, + ) + node_artist.set_zorder(3) + + for node in nodes: + attrs = graph_view.nodes[node] + text = _element_label(attrs, label_mode=label_mode) + if not text: + continue + x, y = pos[node] + fill, _ = _element_colors(str(attrs.get("element", "C"))) + ax_graph.text( + x, + y, + text, + ha="center", + va="center", + fontsize=element_font_size, + fontweight="bold", + color="white" if _luminance(fill) < 0.5 else "#1f2937", + zorder=8, + ) + + if show_atom_map: + for node in nodes: + atom_map = graph_view.nodes[node].get("atom_map", node) + if atom_map in (None, 0): + atom_map = node + x, y = pos[node] + dx, dy = _index_offset_vec(node, graph_view, pos, base=atom_map_offset) + ax_graph.text( + x + dx, + y + dy, + str(atom_map), + ha="center", + va="center", + fontsize=atom_map_font_size, + fontweight="bold", + color="#111827", + path_effects=[pe.withStroke(linewidth=2.5, foreground="white")], + zorder=9, + ) + + if title: + ax_graph.set_title(title, fontsize=12, fontweight="bold", pad=8) + _set_padded_limits(ax_graph, pos, avg_len) + + if include_rdkit_panel and ax_rdkit is not None: + _draw_rdkit_panel(ax_rdkit, graph_view, nodes, use_h_count=use_h_count) + fig.tight_layout() + return fig, (ax_rdkit, ax_graph) + + fig.tight_layout() + return ax_graph + + +def _layout_positions( + graph: nx.Graph, + nodes: list[Any], + *, + use_h_count: bool, +) -> Dict[Any, Tuple[float, float]]: + try: + ordered = _ordered_graph(graph, nodes) + mol = _graph_to_mol(ordered, sanitize=True, use_h_count=use_h_count) + _ensure_2d(mol) + conf = mol.GetConformer(0) + return { + node: (conf.GetAtomPosition(idx).x, conf.GetAtomPosition(idx).y) + for idx, node in enumerate(nodes) + } + except Exception: + return { + node: (float(point[0]), float(point[1])) + for node, point in nx.kamada_kawai_layout(graph).items() + } + + +def _ordered_graph(graph: nx.Graph, nodes: list[Any]) -> nx.Graph: + ordered = nx.Graph() + for node in nodes: + ordered.add_node(node, **graph.nodes[node]) + for u, v, attrs in graph.edges(data=True): + ordered.add_edge(u, v, **attrs) + return ordered + + +def _graph_to_mol(graph: nx.Graph, *, sanitize: bool, use_h_count: bool) -> Chem.Mol: + converter = GraphToMol( + { + "element": "element", + "charge": "charge", + "atom_map": "atom_map", + "radical": "radical", + }, + {"order": "order"}, + ) + try: + return converter.graph_to_mol(graph, sanitize=sanitize, use_h_count=use_h_count) + except Exception: + return converter.graph_to_mol(graph, sanitize=False, use_h_count=use_h_count) + + +def _ensure_2d(mol: Chem.Mol) -> None: + if mol.GetNumConformers() == 0: + AllChem.Compute2DCoords(mol) + + +def _element_colors(element: str) -> Tuple[str, str]: + return ELEMENT_PALETTE.get(element, (DEFAULT_FILL, DEFAULT_BORDER)) + + +def _element_label(attrs: Mapping[str, Any], *, label_mode: str) -> str: + element = str(attrs.get("element", "C")) + if label_mode == "none": + return "" + if label_mode == "hetero" and element == "C": + charge = int(attrs.get("charge", 0) or 0) + radical = int(attrs.get("radical", 0) or 0) + return "C" if charge or radical else "" + charge_suffix = _charge_suffix(attrs.get("charge", 0)) + radical_suffix = "." * int(attrs.get("radical", 0) or 0) + return f"{element}{charge_suffix}{radical_suffix}" + + +def _charge_suffix(charge: Any) -> str: + try: + value = int(charge) + except (TypeError, ValueError): + return "" + if value == 0: + return "" + sign = "+" if value > 0 else "-" + mag = abs(value) + return sign if mag == 1 else f"{sign}{mag}" + + +def _edge_order(attrs: Mapping[str, Any], *, aromatic: bool) -> int: + if aromatic: + return 1 + try: + order = abs(float(attrs.get("kekule_order", attrs.get("order", 1.0)))) + except (TypeError, ValueError): + order = 1.0 + return max(1, min(3, int(round(order)))) + + +def _edge_is_aromatic(attrs: Mapping[str, Any]) -> bool: + if bool(attrs.get("aromatic", False)): + return True + try: + return float(attrs.get("order", 0.0)) == 1.5 + except (TypeError, ValueError): + return False + + +def _draw_bond_lines( + ax: plt.Axes, + p1: Tuple[float, float], + p2: Tuple[float, float], + *, + order: int, + aromatic: bool, + aromatic_style: str, + offset: float, + lw: float, + color: str, +) -> None: + kwargs = { + "color": color, + "linewidth": lw, + "solid_capstyle": "round", + "solid_joinstyle": "round", + "zorder": 2, + } + if aromatic and aromatic_style == "dashed": + ax.plot([p1[0], p2[0]], [p1[1], p2[1]], linestyle="--", **kwargs) + return + if aromatic or order <= 1: + ax.plot([p1[0], p2[0]], [p1[1], p2[1]], **kwargs) + return + dx, dy = _perp_offset(p1, p2, offset) + if order == 2: + ax.plot([p1[0] + dx, p2[0] + dx], [p1[1] + dy, p2[1] + dy], **kwargs) + ax.plot([p1[0] - dx, p2[0] - dx], [p1[1] - dy, p2[1] - dy], **kwargs) + return + ax.plot([p1[0], p2[0]], [p1[1], p2[1]], **{**kwargs, "linewidth": lw * 0.9}) + ax.plot( + [p1[0] + dx, p2[0] + dx], + [p1[1] + dy, p2[1] + dy], + **{**kwargs, "linewidth": lw * 0.9}, + ) + ax.plot( + [p1[0] - dx, p2[0] - dx], + [p1[1] - dy, p2[1] - dy], + **{**kwargs, "linewidth": lw * 0.9}, + ) + + +def _draw_aromatic_circles( + ax: plt.Axes, + graph: nx.Graph, + pos: Mapping[Any, Tuple[float, float]], + *, + scale: float, +) -> None: + for cycle in nx.cycle_basis(graph): + if len(cycle) < 5: + continue + if not all(bool(graph.nodes[node].get("aromatic", False)) for node in cycle): + continue + xs = [pos[node][0] for node in cycle] + ys = [pos[node][1] for node in cycle] + cx, cy = sum(xs) / len(xs), sum(ys) / len(ys) + radius = sum(math.hypot(x - cx, y - cy) for x, y in zip(xs, ys)) / len(xs) + ax.add_patch( + mpatches.Circle( + (cx, cy), + radius * scale, + fill=False, + linewidth=1.15, + color="#333333", + zorder=1, + ) + ) + + +def _draw_highlights( + ax: plt.Axes, + graph: nx.Graph, + pos: Mapping[Any, Tuple[float, float]], + *, + highlight_nodes: Optional[Set[Any]], + highlight_edges: Set[Tuple[Any, Any]], + node_size: int, + bond_width: float, + color: str, +) -> None: + if highlight_edges: + for u, v in graph.edges(): + if _edge_key(u, v) not in highlight_edges: + continue + p1, p2 = pos[u], pos[v] + ax.plot( + [p1[0], p2[0]], + [p1[1], p2[1]], + color=color, + linewidth=bond_width * 5.0, + alpha=0.25, + solid_capstyle="round", + zorder=1, + ) + if highlight_nodes: + nodes = [node for node in highlight_nodes if node in graph] + if nodes: + artist = nx.draw_networkx_nodes( + graph, + pos, + nodelist=nodes, + node_size=int(node_size * 1.75), + node_color=color, + edgecolors="none", + alpha=0.22, + ax=ax, + ) + artist.set_zorder(1) + + +def _draw_bond_order_label( + ax: plt.Axes, + p1: Tuple[float, float], + p2: Tuple[float, float], + order: int, +) -> None: + ax.text( + (p1[0] + p2[0]) / 2, + (p1[1] + p2[1]) / 2, + str(order), + fontsize=7, + ha="center", + va="center", + color="#111827", + bbox={"boxstyle": "round,pad=0.12", "fc": "white", "ec": "none", "alpha": 0.9}, + zorder=8, + ) + + +def _draw_rdkit_panel( + ax: plt.Axes, + graph: nx.Graph, + nodes: list[Any], + *, + use_h_count: bool, +) -> None: + ax.clear() + ax.set_axis_off() + try: + mol = _graph_to_mol( + _ordered_graph(graph, nodes), sanitize=True, use_h_count=use_h_count + ) + _ensure_2d(mol) + options = Draw.MolDrawOptions() + options.addAtomIndices = True + image = Draw.MolToImage(mol, size=(500, 500), kekulize=False, options=options) + ax.imshow(image) + ax.set_title("RDKit", fontsize=12, fontweight="bold", pad=8) + except Exception as exc: + ax.text(0.5, 0.5, f"RDKit render failed\n{exc}", ha="center", va="center") + + +def _perp_offset( + p1: Tuple[float, float], + p2: Tuple[float, float], + offset: float, +) -> Tuple[float, float]: + dx, dy = p2[0] - p1[0], p2[1] - p1[1] + length = math.hypot(dx, dy) + if length == 0: + return 0.0, 0.0 + return -dy / length * offset, dx / length * offset + + +def _index_offset_vec( + node: Any, + graph: nx.Graph, + pos: Mapping[Any, Tuple[float, float]], + *, + base: float, +) -> Tuple[float, float]: + x, y = pos[node] + neighbors = list(graph.neighbors(node)) + if not neighbors: + return 0.0, base + cx = sum(pos[nbr][0] for nbr in neighbors) / len(neighbors) + cy = sum(pos[nbr][1] for nbr in neighbors) / len(neighbors) + dx, dy = x - cx, y - cy + length = math.hypot(dx, dy) + if length == 0: + return 0.0, base + return dx / length * base, dy / length * base + + +def _avg_edge_length( + pos: Mapping[Any, Tuple[float, float]], + graph: nx.Graph, +) -> float: + if graph.number_of_edges() == 0: + return 1.0 + lengths = [ + math.hypot(pos[v][0] - pos[u][0], pos[v][1] - pos[u][1]) + for u, v in graph.edges() + ] + return sum(lengths) / len(lengths) + + +def _set_padded_limits( + ax: plt.Axes, + pos: Mapping[Any, Tuple[float, float]], + avg_len: float, +) -> None: + if not pos: + return + xs = [point[0] for point in pos.values()] + ys = [point[1] for point in pos.values()] + x_span = max(xs) - min(xs) + y_span = max(ys) - min(ys) + pad = max(avg_len * 0.45, x_span * 0.08, y_span * 0.08, 0.2) + ax.set_xlim(min(xs) - pad, max(xs) + pad) + ax.set_ylim(min(ys) - pad, max(ys) + pad) + + +def _normalize_edge_set(edges: Optional[Set[Tuple[Any, Any]]]) -> Set[Tuple[Any, Any]]: + if not edges: + return set() + return {_edge_key(u, v) for u, v in edges} + + +def _edge_key(u: Any, v: Any) -> Tuple[Any, Any]: + return (u, v) if str(u) <= str(v) else (v, u) + + +def _luminance(hex_color: str) -> float: + color = hex_color.lstrip("#") + red, green, blue = (int(color[i : i + 2], 16) / 255.0 for i in (0, 2, 4)) # noqa + return 0.2126 * red + 0.7152 * green + 0.0722 * blue diff --git a/synkit/Vis/mtg_drawer.py b/synkit/Vis/mtg_drawer.py new file mode 100644 index 0000000..4c3dd89 --- /dev/null +++ b/synkit/Vis/mtg_drawer.py @@ -0,0 +1,769 @@ +from __future__ import annotations + +"""MTG visualization helpers. + +The compact MTG view is a timeline diagnostic. Step panels reuse the molecule- +like ITS renderer so each reconstructed ITS step is inspected with the same +visual language as normal Lewis State Graph / ITS drawings. +""" + +from typing import Any, Iterable, Mapping, Optional + +import matplotlib.lines as mlines +import matplotlib.pyplot as plt +import networkx as nx +from mpl_toolkits.mplot3d import Axes3D # noqa: F401 + +from synkit.Vis.its_drawer import draw_its_only + +ELEMENT_COLORS = { + "H": "#ffffff", + "C": "#f8fafc", + "N": "#bfdbfe", + "O": "#fecaca", + "F": "#bbf7d0", + "Cl": "#bbf7d0", + "Br": "#fed7aa", + "I": "#ddd6fe", + "S": "#fde68a", + "P": "#fecdd3", + "B": "#e7e5e4", + "Si": "#e9d5ff", +} + +EDGE_STYLES = { + "unchanged": ("#94a3b8", "solid", 1.7), + "formed": ("#15803d", "solid", 3.1), + "broken": ("#b91c1c", "solid", 3.1), + "transient": ("#ec4899", "dashed", 3.0), +} + + +def draw_mtg_graph( + mtg: Any, + *, + ax: Optional[plt.Axes] = None, + title: Optional[str] = None, + mode: str = "timeline", + layout: str = "kamada_kawai", + show_atom_map: bool = True, + show_edge_labels: bool = True, + show_node_badges: bool = False, + hydrogen_mode: str = "changed", + changed_only: bool = False, + compress: bool = True, + show_step_axis: bool = False, + dimension: str = "2d", + seed: int = 7, +) -> tuple[plt.Figure, plt.Axes]: + """Draw a compact MTG timeline graph. + + ``mtg`` may be a :class:`synkit.Graph.MTG.mtg.MTG` instance or a raw + compact MTG ``networkx.Graph`` from ``MTG.get_mtg()``. + + :param mtg: MTG object or compact MTG graph. + :type mtg: Any + :param ax: Optional Matplotlib axes. + :type ax: Optional[plt.Axes] + :param title: Optional title. + :type title: Optional[str] + :param mode: Label mode. ``"timeline"`` is the recommended MTG view; + ``"sigma_pi"`` gives a shorter Lewis-state bond diagnostic when + sigma/pi timelines are present. + :type mode: str + :param layout: NetworkX layout name: ``"kamada_kawai"``, ``"spring"``, + ``"circular"``, or ``"shell"``. + :type layout: str + :param hydrogen_mode: Hydrogen display policy. ``"changed"`` keeps only + hydrogens participating in changing edges, ``"all"`` keeps all, and + ``"none"`` hides all hydrogens. + :type hydrogen_mode: str + :param changed_only: If True, hide unchanged edges and isolated nodes. + :type changed_only: bool + :param compress: If True, edge labels show only first and final state. + If False, edge labels show the full mechanism-state timeline. + :type compress: bool + :param show_step_axis: Draw a compact state axis under the graph. + :type show_step_axis: bool + :param dimension: Draw as ``"2d"`` or ``"3d"``. The 3D mode uses a + spring layout with ``dim=3`` and is helpful for dense changed cores. + :type dimension: str + :returns: ``(figure, axes)``. + :rtype: tuple[plt.Figure, plt.Axes] + """ + if dimension not in {"2d", "3d"}: + raise ValueError("dimension must be '2d' or '3d'") + + graph = _as_mtg_graph(mtg) + display = _mtg_display_graph( + graph, + mode=mode, + show_atom_map=show_atom_map, + show_node_badges=show_node_badges, + hydrogen_mode=hydrogen_mode, + changed_only=changed_only, + compress=compress, + ) + return _draw_mtg_display( + display, + ax=ax, + title=title or "MTG timeline", + layout=layout, + show_edge_labels=show_edge_labels, + show_step_axis=show_step_axis, + dimension=dimension, + seed=seed, + ) + + +def draw_mtg_steps( + mtg: Any, + *, + steps: Optional[Iterable[int]] = None, + include_composed: bool = False, + title: Optional[str] = None, + max_columns: int = 3, + show_atom_map: bool = True, + label_mode: str = "hetero", + edge_label_mode: str = "kekule", + show_edge_labels: bool = False, + show_electron_labels: bool = False, + electron_label_mode: str = "charge", +) -> tuple[plt.Figure, list[plt.Axes]]: + """Draw reconstructed MTG ITS steps as ordered panels. + + :param mtg: MTG object exposing ``get_its_steps``. + :type mtg: Any + :param steps: Optional zero-based step indices to draw. + :type steps: Optional[Iterable[int]] + :param include_composed: Append the composed outer-state ITS panel. + :type include_composed: bool + :param title: Optional figure title. + :type title: Optional[str] + :param max_columns: Maximum subplot columns. + :type max_columns: int + :returns: ``(figure, axes)``. + :rtype: tuple[plt.Figure, list[plt.Axes]] + """ + + if not hasattr(mtg, "get_its_steps"): + raise TypeError("draw_mtg_steps expects an MTG object with get_its_steps().") + + all_steps = list(mtg.get_its_steps()) + selected = list(range(len(all_steps))) if steps is None else list(steps) + for step in selected: + if step < 0 or step >= len(all_steps): + raise IndexError(f"MTG step index out of range: {step}") + + panels = [(f"Step {step + 1}", all_steps[step]) for step in selected] + if include_composed: + if not hasattr(mtg, "get_compose_its"): + raise TypeError( + "include_composed requires an MTG object with get_compose_its()." + ) + panels.append(("Composed", mtg.get_compose_its())) + + if not panels: + raise ValueError("No MTG steps selected for drawing.") + + ncols = min(max(1, max_columns), len(panels)) + nrows = (len(panels) + ncols - 1) // ncols + fig, axes_grid = plt.subplots( + nrows, + ncols, + figsize=(4.8 * ncols, 4.2 * nrows), + squeeze=False, + facecolor="white", + ) + axes = [ax for row in axes_grid for ax in row] + if title: + fig.suptitle(title, fontsize=13, fontweight="bold") + + for ax, (panel_title, its) in zip(axes, panels): + draw_its_only( + its, + ax=ax, + title=panel_title, + show_atom_map=show_atom_map, + label_mode=label_mode, + edge_label_mode=edge_label_mode, + show_edge_labels=show_edge_labels, + show_electron_labels=show_electron_labels, + electron_label_mode=electron_label_mode, + ) + + for ax in axes[len(panels) :]: # noqa + ax.set_axis_off() + + fig.tight_layout() + return fig, axes[: len(panels)] + + +def _as_mtg_graph(mtg: Any) -> nx.Graph: + if isinstance(mtg, nx.Graph): + return mtg + if hasattr(mtg, "get_mtg"): + graph = mtg.get_mtg() + if isinstance(graph, nx.Graph): + return graph + raise TypeError("Expected an MTG object or a NetworkX compact MTG graph.") + + +def _mtg_display_graph( + graph: nx.Graph, + *, + mode: str, + show_atom_map: bool, + show_node_badges: bool, + hydrogen_mode: str, + changed_only: bool, + compress: bool, +) -> nx.Graph: + if hydrogen_mode not in {"changed", "all", "none"}: + raise ValueError("hydrogen_mode must be one of: changed, all, none") + + edge_info = { + _edge_key(u, v): _edge_visual(attrs, mode=mode, compress=compress) + for u, v, attrs in graph.edges(data=True) + } + changed_incident = { + node + for (u, v), info in edge_info.items() + if info["state"] != "unchanged" + for node in (u, v) + } + + display = nx.Graph() + for node, attrs in graph.nodes(data=True): + element = str(_first_present(attrs.get("element")) or "") + atom_map = _first_present(attrs.get("atom_map")) + if element == "H": + if hydrogen_mode == "none": + continue + if hydrogen_mode == "changed" and atom_map in (None, 0): + continue + if hydrogen_mode == "changed" and node not in changed_incident: + continue + if changed_only and node not in changed_incident: + continue + + label = _node_label(node, attrs, show_atom_map=show_atom_map) + badges = _node_badges(attrs) if show_node_badges else [] + display.add_node( + node, + label=label, + badges=tuple(badges), + element=element, + changed=bool(badges) or node in changed_incident, + fill=ELEMENT_COLORS.get(element, "#f3f4f6"), + ) + + for u, v, attrs in graph.edges(data=True): + key = _edge_key(u, v) + info = edge_info[key] + if changed_only and info["state"] == "unchanged": + continue + if u not in display or v not in display: + continue + display.add_edge(u, v, **info, raw=dict(attrs)) + + display.graph["steps"] = _infer_state_count(graph) + return display + + +def _draw_mtg_display( + graph: nx.Graph, + *, + ax: Optional[plt.Axes], + title: str, + layout: str, + show_edge_labels: bool, + show_step_axis: bool, + dimension: str, + seed: int, +) -> tuple[plt.Figure, plt.Axes]: + if ax is None: + fig = plt.figure(figsize=_figure_size(graph), facecolor="white") + ax = ( + fig.add_subplot(111, projection="3d") + if dimension == "3d" + else fig.add_subplot(111) + ) + else: + fig = ax.figure + + pos = _layout(graph, layout=layout, dimension=dimension, seed=seed) + ax.clear() + ax.set_axis_off() + if dimension == "2d": + ax.set_aspect("equal") + ax.set_title(title, fontsize=13, fontweight="bold", pad=12) + + if dimension == "3d": + _draw_mtg_display_3d( + graph, + pos, + ax=ax, + show_edge_labels=show_edge_labels, + ) + _draw_legend(ax) + fig.tight_layout() + return fig, ax + + for state in ("unchanged", "formed", "broken", "transient"): + edges = [ + (u, v) + for u, v, attrs in graph.edges(data=True) + if attrs.get("state") == state + ] + if not edges: + continue + color, style, width = EDGE_STYLES[state] + nx.draw_networkx_edges( + graph, + pos, + ax=ax, + edgelist=edges, + edge_color=color, + style=style, + width=width, + alpha=0.88 if state != "unchanged" else 0.38, + ) + + nodes = list(graph.nodes(data=True)) + if nodes: + nx.draw_networkx_nodes( + graph, + pos, + ax=ax, + node_color=[attrs["fill"] for _, attrs in nodes], + edgecolors=[ + "#f97316" if attrs.get("changed") else "#475569" for _, attrs in nodes + ], + linewidths=[2.6 if attrs.get("changed") else 1.2 for _, attrs in nodes], + node_size=[ + 760 if attrs.get("element") != "H" else 500 for _, attrs in nodes + ], + ) + nx.draw_networkx_labels( + graph, + pos, + labels={ + node: _stack_node_label(attrs) for node, attrs in graph.nodes(data=True) + }, + ax=ax, + font_size=8, + font_weight="bold", + font_color="#111827", + ) + + if show_edge_labels: + edge_labels = { + (u, v): attrs["label"] + for u, v, attrs in graph.edges(data=True) + if attrs.get("label") + } + if edge_labels: + nx.draw_networkx_edge_labels( + graph, + pos, + edge_labels=edge_labels, + ax=ax, + font_size=7, + rotate=False, + font_color="#111827", + bbox={ + "boxstyle": "round,pad=0.18", + "fc": "white", + "ec": "#cbd5e1", + "alpha": 0.94, + }, + ) + + _draw_legend(ax) + if show_step_axis: + _draw_step_axis(ax, graph.graph.get("steps", 0)) + _pad_limits(ax, pos) + fig.tight_layout() + return fig, ax + + +def _draw_mtg_display_3d( + graph: nx.Graph, + pos: Mapping[Any, Any], + *, + ax: plt.Axes, + show_edge_labels: bool, +) -> None: + for state in ("unchanged", "formed", "broken", "transient"): + color, style, width = EDGE_STYLES[state] + alpha = 0.88 if state != "unchanged" else 0.28 + for u, v, attrs in graph.edges(data=True): + if attrs.get("state") != state: + continue + p0 = pos[u] + p1 = pos[v] + ax.plot( + [p0[0], p1[0]], + [p0[1], p1[1]], + [p0[2], p1[2]], + color=color, + linestyle=style, + linewidth=width, + alpha=alpha, + ) + if show_edge_labels and attrs.get("label"): + mid = ((p0[0] + p1[0]) / 2, (p0[1] + p1[1]) / 2, (p0[2] + p1[2]) / 2) + ax.text( + *mid, + attrs["label"], + fontsize=7, + color="#111827", + ha="center", + va="center", + ) + + for node, attrs in graph.nodes(data=True): + x, y, z = pos[node] + edge_color = "#f97316" if attrs.get("changed") else "#475569" + size = 430 if attrs.get("element") != "H" else 320 + ax.scatter( + [x], + [y], + [z], + s=size, + c=[attrs["fill"]], + edgecolors=[edge_color], + linewidths=1.5, + depthshade=True, + ) + ax.text( + x, + y, + z + 0.12, + _stack_node_label(attrs), + fontsize=8.5, + fontweight="bold", + color="#111827", + ha="center", + va="center", + bbox={ + "boxstyle": "round,pad=0.08", + "fc": "white", + "ec": "none", + "alpha": 0.78, + }, + ) + + +def _edge_visual( + attrs: Mapping[str, Any], + *, + mode: str, + compress: bool, +) -> dict[str, Any]: + preferred = _preferred_timeline(attrs, mode=mode) + state = _timeline_state(preferred) + label = _timeline_label( + attrs, + preferred, + mode=mode, + state=state, + compress=compress, + ) + color, style, width = EDGE_STYLES[state] + return { + "history": tuple(preferred), + "state": state, + "label": label, + "color": color, + "style": style, + "width": width, + } + + +def _preferred_timeline(attrs: Mapping[str, Any], *, mode: str) -> tuple[Any, ...]: + if mode == "sigma_pi": + sigma = _coerce_timeline(attrs.get("sigma_order")) + pi = _coerce_timeline(attrs.get("pi_order")) + if _changes(sigma) or _changes(pi): + return tuple( + None if s is None and p is None else _none_order(s) + _none_order(p) + for s, p in zip(_pad(sigma, pi), _pad(pi, sigma)) + ) + for key in ("kekule_order", "order", "sigma_order", "pi_order"): + timeline = _coerce_timeline(attrs.get(key)) + if timeline: + return timeline + return () + + +def _timeline_label( + attrs: Mapping[str, Any], + preferred: tuple[Any, ...], + *, + mode: str, + state: str, + compress: bool, +) -> str: + if state == "unchanged": + return "" + timeline = _compressed_timeline(preferred) if compress else preferred + if mode == "sigma_pi": + parts = [] + for key, prefix in (("sigma_order", "σ"), ("pi_order", "π")): + part_timeline = _coerce_timeline(attrs.get(key)) + if part_timeline and _changes(_known_timeline(part_timeline)): + part_timeline = ( + _compressed_timeline(part_timeline) if compress else part_timeline + ) + parts.append(f"{prefix}:{_format_timeline(part_timeline)}") + if parts: + return " ".join(parts) + return _format_timeline(timeline) + + +def _coerce_timeline(value: Any) -> tuple[Any, ...]: + if not isinstance(value, tuple): + return () + if value and all(_is_step_pair(item) for item in value): + history = [] + for idx, pair in enumerate(value): + left, right = pair + if idx == 0: + history.append(_clean_order(left)) + history.append(_clean_order(right)) + return tuple(history) + if value and not any(isinstance(item, (tuple, list, dict, set)) for item in value): + return value + return () + + +def _is_step_pair(value: Any) -> bool: + return isinstance(value, tuple) and len(value) == 2 + + +def _clean_order(value: Any) -> Any: + if isinstance(value, set): + return None + return value + + +def _timeline_state(timeline: tuple[Any, ...]) -> str: + known = _known_timeline(timeline) + numeric = [_none_order(value) for value in known] + if not numeric or len(set(numeric)) == 1: + return "unchanged" + if numeric[0] == numeric[-1]: + return "transient" + if numeric[0] == 0 and numeric[-1] > 0: + return "formed" + if numeric[0] > 0 and numeric[-1] == 0: + return "broken" + return "transient" + + +def _node_label( + node: Any, + attrs: Mapping[str, Any], + *, + show_atom_map: bool, +) -> str: + element = _first_present(attrs.get("element")) or str(node) + atom_map = _first_present(attrs.get("atom_map")) + if show_atom_map and atom_map not in (None, 0): + return f"{element}:{atom_map}" + if show_atom_map: + return f"{element}:{node}" + return str(element) + + +def _node_badges(attrs: Mapping[str, Any]) -> list[str]: + badges = [] + for key, label in ( + ("charge", "q"), + ("hcount", "H"), + ("lone_pairs", "lp"), + ("radical", "rad"), + ): + timeline = _coerce_node_timeline(attrs.get(key)) + if timeline and _changes(timeline): + badges.append(f"{label}:{_format_timeline(timeline)}") + return badges[:2] + + +def _coerce_node_timeline(value: Any) -> tuple[Any, ...]: + if ( + isinstance(value, tuple) + and value + and all(_is_step_pair(item) for item in value) + ): + return _coerce_timeline(value) + if isinstance(value, tuple) and len(value) >= 3: + return value + if isinstance(value, tuple) and len(value) == 2: + return value + return () + + +def _stack_node_label(attrs: Mapping[str, Any]) -> str: + label = str(attrs.get("label", "")) + badges = attrs.get("badges") or () + return f"{label}\n{' '.join(badges)}" if badges else label + + +def _format_timeline(timeline: tuple[Any, ...]) -> str: + return "→".join(_format_order(value) for value in timeline) + + +def _compressed_timeline(timeline: tuple[Any, ...]) -> tuple[Any, ...]: + if len(timeline) <= 2: + return timeline + return (timeline[0], timeline[-1]) + + +def _trim_timeline(timeline: tuple[Any, ...]) -> tuple[Any, ...]: + if len(timeline) <= 2: + return timeline + start = 0 + end = len(timeline) + while start + 1 < end and timeline[start] == timeline[start + 1]: + start += 1 + while end - 2 >= start and timeline[end - 1] == timeline[end - 2]: + end -= 1 + return timeline[start:end] + + +def _format_order(value: Any) -> str: + if value is None: + return "∅" + if isinstance(value, float) and value.is_integer(): + return str(int(value)) + return str(value) + + +def _none_order(value: Any) -> float: + return 0.0 if value is None else float(value) + + +def _changes(timeline: tuple[Any, ...]) -> bool: + return bool(timeline) and len(set(timeline)) > 1 + + +def _known_timeline(timeline: tuple[Any, ...]) -> tuple[Any, ...]: + start = 0 + end = len(timeline) + while start < end and timeline[start] is None: + start += 1 + while end > start and timeline[end - 1] is None: + end -= 1 + return timeline[start:end] + + +def _pad(first: tuple[Any, ...], second: tuple[Any, ...]) -> tuple[Any, ...]: + if len(first) >= len(second): + return first + return first + (None,) * (len(second) - len(first)) + + +def _first_present(value: Any) -> Any: + if isinstance(value, tuple): + for item in value: + if isinstance(item, tuple): + for side in item: + if side not in (None, set()): + return side + elif item is not None: + return item + return None + return value + + +def _edge_key(u: Any, v: Any) -> tuple[Any, Any]: + return (u, v) if str(u) <= str(v) else (v, u) + + +def _infer_state_count(graph: nx.Graph) -> int: + max_len = 0 + for _, _, attrs in graph.edges(data=True): + for key in ("kekule_order", "order", "sigma_order", "pi_order"): + max_len = max(max_len, len(_coerce_timeline(attrs.get(key)))) + return max_len + + +def _layout( + graph: nx.Graph, + *, + layout: str, + dimension: str, + seed: int, +) -> dict[Any, Any]: + if graph.number_of_nodes() == 0: + return {} + if dimension == "3d": + if layout not in {"spring", "kamada_kawai"}: + raise ValueError("3D MTG layout supports: spring, kamada_kawai") + return nx.spring_layout(graph, seed=seed, k=1.15, iterations=160, dim=3) + if layout == "spring": + return nx.spring_layout(graph, seed=seed, k=1.15, iterations=120) + if layout == "kamada_kawai": + return nx.kamada_kawai_layout(graph) + if layout == "circular": + return nx.circular_layout(graph) + if layout == "shell": + return nx.shell_layout(graph) + raise ValueError("layout must be one of: spring, kamada_kawai, circular, shell") + + +def _figure_size(graph: nx.Graph) -> tuple[float, float]: + n_nodes = max(1, graph.number_of_nodes()) + return min(14.0, max(7.0, n_nodes * 0.78)), min(10.0, max(5.2, n_nodes * 0.55)) + + +def _draw_legend(ax: plt.Axes) -> None: + handles = [ + mlines.Line2D( + [], [], color=color, linestyle=style, linewidth=width, label=label + ) + for label, (color, style, width) in ( + ("formed", EDGE_STYLES["formed"]), + ("broken", EDGE_STYLES["broken"]), + ("transient", EDGE_STYLES["transient"]), + ) + ] + ax.legend( + handles=handles, + loc="upper right", + bbox_to_anchor=(1.0, 1.0), + frameon=False, + fontsize=8, + ncol=1, + ) + + +def _draw_step_axis(ax: plt.Axes, states: int) -> None: + if states <= 1: + return + text = "states " + " → ".join(f"S{i}" for i in range(states)) + ax.text( + 0.5, + -0.045, + text, + transform=ax.transAxes, + ha="center", + va="top", + fontsize=8, + color="#475569", + ) + + +def _pad_limits(ax: plt.Axes, pos: Mapping[Any, Any]) -> None: + if not pos: + return + xs = [point[0] for point in pos.values()] + ys = [point[1] for point in pos.values()] + x_span = max(xs) - min(xs) + y_span = max(ys) - min(ys) + pad = max(x_span, y_span, 1.0) * 0.25 + ax.set_xlim(min(xs) - pad, max(xs) + pad) + ax.set_ylim(min(ys) - pad, max(ys) + pad) diff --git a/synkit/Vis/reaction_drawer.py b/synkit/Vis/reaction_drawer.py new file mode 100644 index 0000000..7b61324 --- /dev/null +++ b/synkit/Vis/reaction_drawer.py @@ -0,0 +1,285 @@ +from __future__ import annotations + +"""Reaction visualization built from molecular graph panels.""" + +from dataclasses import dataclass +from typing import Any, Dict, FrozenSet, Iterable, Optional, Set, Tuple + +import matplotlib.pyplot as plt +import networkx as nx + +from synkit.IO.chem_converter import rsmi_to_graph +from synkit.Vis.molecule_drawer import draw_molecule_graph + + +@dataclass(frozen=True) +class ReactionHighlights: + """Atom-map based reaction-center highlights.""" + + changed_atoms: frozenset[int] + formed_bonds: frozenset[frozenset[int]] + broken_bonds: frozenset[frozenset[int]] + order_changed_bonds: frozenset[frozenset[int]] + + +def draw_reaction_graph( + rsmi: str, + *, + title: Optional[str] = None, + show_atom_map: bool = True, + highlight_reaction_center: bool = True, + label_mode: str = "hetero", + aromatic_style: str = "circle", + figsize_per_mol: Tuple[float, float] = (3.2, 2.8), + sanitize: bool = True, +) -> tuple[plt.Figure, list[plt.Axes]]: + """Draw an RSMI as molecular graph panels. + + :param rsmi: Reaction SMILES, preferably atom-mapped when reaction-center + highlighting is desired. + :type rsmi: str + :param title: Optional figure title. + :type title: Optional[str] + :param show_atom_map: Show atom-map/index labels on molecule panels. + :type show_atom_map: bool + :param highlight_reaction_center: Highlight changed mapped atoms/bonds. + :type highlight_reaction_center: bool + :param label_mode: Molecule label mode passed to ``draw_molecule_graph``. + :type label_mode: str + :param aromatic_style: Molecule aromatic style. + :type aromatic_style: str + :param figsize_per_mol: Approximate panel size for each molecular graph. + :type figsize_per_mol: tuple[float, float] + :param sanitize: Whether to sanitize molecules during RSMI conversion. + :type sanitize: bool + :returns: ``(fig, axes)``. + :rtype: tuple[plt.Figure, list[plt.Axes]] + """ + + reactant, product = rsmi_to_graph( + rsmi, + drop_non_aam=False, + sanitize=sanitize, + use_index_as_atom_map=True, + ) + if reactant is None or product is None: + raise ValueError(f"Could not convert RSMI to graphs: {rsmi!r}") + return draw_reaction_graphs( + reactant, + product, + title=title or rsmi, + show_atom_map=show_atom_map, + highlight_reaction_center=highlight_reaction_center, + label_mode=label_mode, + aromatic_style=aromatic_style, + figsize_per_mol=figsize_per_mol, + ) + + +def draw_reaction_graphs( + reactant: nx.Graph, + product: nx.Graph, + *, + title: Optional[str] = None, + show_atom_map: bool = True, + highlight_reaction_center: bool = True, + label_mode: str = "hetero", + aromatic_style: str = "circle", + figsize_per_mol: Tuple[float, float] = (3.2, 2.8), +) -> tuple[plt.Figure, list[plt.Axes]]: + """Draw reactant and product graphs as molecule panels.""" + + highlights = ( + find_reaction_highlights(reactant, product) + if highlight_reaction_center + else ReactionHighlights(frozenset(), frozenset(), frozenset(), frozenset()) + ) + reactant_parts = _components(reactant) + product_parts = _components(product) + n_panels = len(reactant_parts) + len(product_parts) + 1 + fig_width = max(6.0, figsize_per_mol[0] * n_panels) + fig_height = figsize_per_mol[1] + (0.45 if title else 0.0) + fig, axes_arr = plt.subplots( + 1, + n_panels, + figsize=(fig_width, fig_height), + facecolor="white", + gridspec_kw={"width_ratios": _width_ratios(reactant_parts, product_parts)}, + ) + axes = list(axes_arr if isinstance(axes_arr, Iterable) else [axes_arr]) + + if title: + fig.suptitle(title, fontsize=12, fontweight="bold", y=0.98) + + panel_index = 0 + for index, part in enumerate(reactant_parts): + _draw_part( + part, + axes[panel_index], + title="Reactant" if index == 0 else "+", + highlights=highlights, + side="reactant", + show_atom_map=show_atom_map, + label_mode=label_mode, + aromatic_style=aromatic_style, + ) + panel_index += 1 + + _draw_arrow(axes[panel_index]) + panel_index += 1 + + for index, part in enumerate(product_parts): + _draw_part( + part, + axes[panel_index], + title="Product" if index == 0 else "+", + highlights=highlights, + side="product", + show_atom_map=show_atom_map, + label_mode=label_mode, + aromatic_style=aromatic_style, + ) + panel_index += 1 + + fig.tight_layout() + return fig, axes + + +def find_reaction_highlights( + reactant: nx.Graph, + product: nx.Graph, +) -> ReactionHighlights: + """Find atom-map based changed atoms and bonds between two side graphs.""" + + reactant_bonds = _mapped_bond_orders(reactant) + product_bonds = _mapped_bond_orders(product) + formed: set[FrozenSet[int]] = set() + broken: set[FrozenSet[int]] = set() + order_changed: set[FrozenSet[int]] = set() + changed_atoms: set[int] = set() + + for pair in set(reactant_bonds) | set(product_bonds): + r_order = reactant_bonds.get(pair) + p_order = product_bonds.get(pair) + if r_order is None: + formed.add(pair) + changed_atoms.update(pair) + elif p_order is None: + broken.add(pair) + changed_atoms.update(pair) + elif abs(r_order - p_order) > 1e-6: + order_changed.add(pair) + changed_atoms.update(pair) + + return ReactionHighlights( + changed_atoms=frozenset(changed_atoms), + formed_bonds=frozenset(formed), + broken_bonds=frozenset(broken), + order_changed_bonds=frozenset(order_changed), + ) + + +def _components(graph: nx.Graph) -> list[nx.Graph]: + return [ + graph.subgraph(nodes).copy() for nodes in nx.connected_components(graph) + ] or [graph.copy()] + + +def _width_ratios(reactants: list[nx.Graph], products: list[nx.Graph]) -> list[float]: + ratios = [max(1.0, part.number_of_nodes() / 5.0) for part in reactants] + ratios.append(0.45) + ratios.extend(max(1.0, part.number_of_nodes() / 5.0) for part in products) + return ratios + + +def _draw_part( + graph: nx.Graph, + ax: plt.Axes, + *, + title: str, + highlights: ReactionHighlights, + side: str, + show_atom_map: bool, + label_mode: str, + aromatic_style: str, +) -> None: + edge_maps = ( + highlights.broken_bonds | highlights.order_changed_bonds + if side == "reactant" + else highlights.formed_bonds | highlights.order_changed_bonds + ) + highlight_nodes = _nodes_for_atom_maps(graph, highlights.changed_atoms) + highlight_edges = _edges_for_atom_map_pairs(graph, edge_maps) + draw_molecule_graph( + graph, + ax=ax, + title=title, + label_mode=label_mode, + show_atom_map=show_atom_map, + aromatic_style=aromatic_style, + highlight_nodes=highlight_nodes, + highlight_edges=highlight_edges, + highlight_color="#f97316", + ) + + +def _draw_arrow(ax: plt.Axes) -> None: + ax.clear() + ax.set_axis_off() + ax.annotate( + "", + xy=(0.92, 0.5), + xytext=(0.08, 0.5), + xycoords="axes fraction", + arrowprops={"arrowstyle": "->", "lw": 2.2, "color": "#374151"}, + ) + + +def _mapped_bond_orders(graph: nx.Graph) -> Dict[FrozenSet[int], float]: + bonds: Dict[FrozenSet[int], float] = {} + for u, v, attrs in graph.edges(data=True): + a = _atom_map(graph.nodes[u], fallback=u) + b = _atom_map(graph.nodes[v], fallback=v) + if not a or not b: + continue + bonds[frozenset({a, b})] = float( + attrs.get("kekule_order", attrs.get("order", 1.0)) + ) + return bonds + + +def _nodes_for_atom_maps( + graph: nx.Graph, atom_maps: Set[int] | frozenset[int] +) -> Set[Any]: + return { + node + for node, attrs in graph.nodes(data=True) + if _atom_map(attrs, fallback=node) in atom_maps + } + + +def _edges_for_atom_map_pairs( + graph: nx.Graph, + pairs: Set[FrozenSet[int]] | frozenset[FrozenSet[int]], +) -> Set[Tuple[Any, Any]]: + out: set[Tuple[Any, Any]] = set() + for u, v in graph.edges(): + pair = frozenset( + { + _atom_map(graph.nodes[u], fallback=u), + _atom_map(graph.nodes[v], fallback=v), + } + ) + if pair in pairs: + out.add((u, v)) + return out + + +def _atom_map(attrs: Dict[str, Any], *, fallback: Any) -> int: + value = attrs.get("atom_map", 0) + if value in (None, 0): + value = fallback + try: + return int(value) + except (TypeError, ValueError): + return 0 diff --git a/synkit/Vis/vis_synedu/Vis/__init__.py b/synkit/Vis/vis_synedu/Vis/__init__.py new file mode 100644 index 0000000..9271b5c --- /dev/null +++ b/synkit/Vis/vis_synedu/Vis/__init__.py @@ -0,0 +1,7 @@ +from .dpo import DPODecomp, visualize_dpo_rule, dpo_decompose_atom_conserving + +__all__ = [ + "DPODecomp", + "visualize_dpo_rule", + "dpo_decompose_atom_conserving", +] diff --git a/synkit/Vis/vis_synedu/Vis/dpo.py b/synkit/Vis/vis_synedu/Vis/dpo.py new file mode 100644 index 0000000..7002607 --- /dev/null +++ b/synkit/Vis/vis_synedu/Vis/dpo.py @@ -0,0 +1,862 @@ +from __future__ import annotations + +import networkx as nx +import matplotlib.pyplot as plt +import matplotlib.patheffects as pe +from matplotlib.lines import Line2D +from matplotlib.patches import Patch +from dataclasses import dataclass +from typing import Dict, Tuple, Optional, Any + +from rdkit import Chem +from rdkit.Chem import AllChem + +from ..conversion import graph_to_mol +from ..its_vis import visualize_its as _visualize_its + +# ── CPK element palette (fill, border) — matches its_vis / vis ──────────── +_ELEMENT_PALETTE: Dict[str, Tuple[str, str]] = { + "C": ("#636363", "#3d3d3d"), + "O": ("#E8524A", "#b83830"), + "N": ("#5B8DD9", "#3a65b0"), + "S": ("#E8A838", "#c07a10"), + "Cl": ("#3DBE6C", "#1e8a46"), + "F": ("#5BC8AF", "#2a9178"), + "Br": ("#A0522D", "#6b3118"), + "I": ("#8C54C8", "#5e2fa0"), + "P": ("#E878C8", "#b84898"), + "H": ("#C8C8C8", "#909090"), + "Na": ("#AB5CF2", "#7b34c8"), + "Mg": ("#8AFF00", "#58b000"), + "Si": ("#F0C8A0", "#b88860"), +} +_DEFAULT_FILL = "#A0A0A0" +_DEFAULT_BORDER = "#606060" + + +def _fill(el: str) -> str: + return _ELEMENT_PALETTE.get(el, (_DEFAULT_FILL, _DEFAULT_BORDER))[0] + + +def _border(el: str) -> str: + return _ELEMENT_PALETTE.get(el, (_DEFAULT_FILL, _DEFAULT_BORDER))[1] + + +def _luminance(hex_color: str) -> float: + h = hex_color.lstrip("#") + r, g, b = (int(h[i : i + 2], 16) / 255.0 for i in (0, 2, 4)) # noqa + return 0.2126 * r + 0.7152 * g + 0.0722 * b + + +def _pos_from_mol( + mol: Chem.Mol, G: nx.Graph +) -> Optional[Dict[Any, Tuple[float, float]]]: + """Return {node_id: (x, y)} from RDKit 2D conformer matched via atom_map attribute.""" + if mol.GetNumConformers() == 0: + AllChem.Compute2DCoords(mol) + conf = mol.GetConformer(0) + mol_pos: Dict[int, Tuple[float, float]] = { + a.GetAtomMapNum(): ( + conf.GetAtomPosition(a.GetIdx()).x, + conf.GetAtomPosition(a.GetIdx()).y, + ) + for a in mol.GetAtoms() + if a.GetAtomMapNum() > 0 + } + out: Dict[Any, Tuple[float, float]] = {} + for n in G.nodes(): + am = G.nodes[n].get("atom_map", n) + if am in mol_pos: + out[n] = mol_pos[am] + elif n in mol_pos: + out[n] = mol_pos[n] + return out if len(out) == G.number_of_nodes() else None + + +# ============================================================ +# ITS visualizer +# ============================================================ +def visualize_its( # noqa: C901 + its: nx.Graph, + *, + mol: Optional[Chem.Mol] = None, + ax=None, + title: str | None = None, + pos: dict | None = None, + layout: str = "kamada_kawai", # "spring" | "kamada_kawai" | "circular" + node_size: int = 900, + font_size: int = 10, + edge_width: float = 2.8, + show_edge_labels: bool = True, + show_unchanged_edge_labels: bool = False, + show_node_labels: bool = True, + show_atom_map: bool = False, + show_legends: bool = False, + rc_ring_color: str = "#FFD700", +): + """ + Visualize an ITS graph with CPK node colors and colored edge types. + + Pass *mol* (an RDKit molecule with 2D coords and atom-map numbers) to use + chemically correct 2D layout instead of the graph-theoretic fallback. + + Edges (its[u][v]['order'] == (br, bp)): + - br > bp : broken (red) + - br < bp : formed (green) + - br = bp : unchanged (grey dashed) + """ + created_fig = False + if ax is None: + fig, ax = plt.subplots(figsize=(6, 4), facecolor="white") + created_fig = True + + ax.set_facecolor("white") + ax.set_axis_off() + if title: + ax.set_title( + title, fontsize=font_size + 1, fontweight="bold", pad=6, color="#1a1a1a" + ) + + # ── layout ─────────────────────────────────────────────────────────── + if pos is None: + if mol is not None: + pos = _pos_from_mol(mol, its) + if pos is None: + if layout == "spring": + pos = nx.spring_layout(its, seed=0, k=0.9) + elif layout == "circular": + pos = nx.circular_layout(its) + else: + pos = nx.kamada_kawai_layout(its) + + broken, formed, unchanged = [], [], [] + lbl_b, lbl_f, lbl_u = {}, {}, {} + rc_nodes = set() + + for u, v, d in its.edges(data=True): + br, bp = d.get("order", (0.0, 0.0)) + if br > bp: + broken.append((u, v)) + lbl_b[(u, v)] = f"({br:g},{bp:g})" + rc_nodes.update([u, v]) + elif br < bp: + formed.append((u, v)) + lbl_f[(u, v)] = f"({br:g},{bp:g})" + rc_nodes.update([u, v]) + else: + unchanged.append((u, v)) + lbl_u[(u, v)] = f"({br:g},{bp:g})" + + nodelist = list(its.nodes()) + node_colors, node_borders, label_colors = [], [], [] + elems_present = [] + + for n in nodelist: + elem = its.nodes[n].get("element", "?") + if elem not in elems_present: + elems_present.append(elem) + fc = _fill(elem) + bc = _border(elem) + node_colors.append(fc) + node_borders.append(bc) + label_colors.append("white" if _luminance(fc) < 0.50 else "#1a1a1a") + + # ── RC glow ────────────────────────────────────────────────────────── + rc_list = [n for n in nodelist if n in rc_nodes] + if rc_list: + nc = nx.draw_networkx_nodes( + its, + pos, + ax=ax, + nodelist=rc_list, + node_size=int(node_size * 1.9), + node_color=rc_ring_color, + edgecolors="none", + linewidths=0, + alpha=0.20, + ) + nc.set_zorder(1) + + # ── edges ──────────────────────────────────────────────────────────── + if unchanged: + nx.draw_networkx_edges( + its, + pos, + ax=ax, + edgelist=unchanged, + width=max(1.1, edge_width * 0.55), + edge_color="#888888", + alpha=0.45, + style="--", + arrows=False, + ) + if broken: + nx.draw_networkx_edges( + its, + pos, + ax=ax, + edgelist=broken, + width=edge_width * 1.25, + edge_color="#D62728", + alpha=0.95, + arrows=False, + ) + if formed: + nx.draw_networkx_edges( + its, + pos, + ax=ax, + edgelist=formed, + width=edge_width * 1.25, + edge_color="#2CA02C", + alpha=0.95, + arrows=False, + ) + + # ── nodes ──────────────────────────────────────────────────────────── + nc = nx.draw_networkx_nodes( + its, + pos, + nodelist=nodelist, + ax=ax, + node_size=node_size, + node_color=node_colors, + edgecolors=node_borders, + linewidths=max(1.2, node_size**0.5 * 0.055), + ) + nc.set_zorder(3) + + if rc_list: + nc = nx.draw_networkx_nodes( + its, + pos, + nodelist=rc_list, + ax=ax, + node_size=int(node_size * 1.32), + node_color="none", + edgecolors=rc_ring_color, + linewidths=max(2.0, node_size**0.5 * 0.10), + alpha=0.9, + ) + nc.set_zorder(4) + + # ── node labels ────────────────────────────────────────────────────── + if show_node_labels: + for i, n in enumerate(nodelist): + el = its.nodes[n].get("element", "?") + am = its.nodes[n].get("atom_map", 0) + lbl = f"{el}:{am}" if (show_atom_map and am) else el + x, y = pos[n] + ax.text( + x, + y, + lbl, + ha="center", + va="center", + fontsize=font_size, + fontweight="bold", + color=label_colors[i], + zorder=9, + ) + + # ── edge labels ────────────────────────────────────────────────────── + if show_edge_labels: + for lbl_dict, color in ( + (lbl_b, "#D62728"), + (lbl_f, "#2CA02C"), + ): + if lbl_dict: + nx.draw_networkx_edge_labels( + its, + pos, + ax=ax, + edge_labels=lbl_dict, + font_size=font_size - 1, + font_color=color, + bbox=dict( + boxstyle="round,pad=0.15", fc="white", ec="none", alpha=0.85 + ), + ) + if show_unchanged_edge_labels and lbl_u: + nx.draw_networkx_edge_labels( + its, + pos, + ax=ax, + edge_labels=lbl_u, + font_size=font_size - 3, + font_color="#888888", + ) + + if show_legends: + edge_legend = [ + Line2D([0], [0], color="#D62728", lw=3, label="broken (br>bp)"), + Line2D([0], [0], color="#2CA02C", lw=3, label="formed (br Tuple[int, int]: + return (u, v) if u < v else (v, u) + + +def _edge_orders(G: nx.Graph) -> Dict[Tuple[int, int], float]: + out: Dict[Tuple[int, int], float] = {} + for u, v, d in G.edges(data=True): + out[_ekey(u, v)] = float(d.get("order", 1.0)) + return out + + +def dpo_decompose_atom_conserving( + L: nx.Graph, + R: nx.Graph, + *, + order_tol: float = 1e-9, +) -> DPODecomp: + """ + Atom-conserving DPO-like decomposition: + - K_nodes: nodes present on both sides (by node id) + - K_edges: edges present on both sides with same 'order' + - L_only_edges: edges deleted or order-changed (treated as delete) + - R_only_edges: edges created or order-changed (treated as add) + """ + K_nodes = set(L.nodes()) & set(R.nodes()) + L_orders = _edge_orders(L) + R_orders = _edge_orders(R) + + common = set(L_orders) & set(R_orders) + K_edges = {e for e in common if abs(L_orders[e] - R_orders[e]) <= order_tol} + + L_only = set(L_orders) - K_edges + R_only = set(R_orders) - K_edges + + return DPODecomp( + K_nodes=K_nodes, + K_edges=K_edges, + L_only_edges=L_only, + R_only_edges=R_only, + L_orders=L_orders, + R_orders=R_orders, + ) + + +def build_its_from_LR( + L: nx.Graph, R: nx.Graph, *, dec: Optional[DPODecomp] = None +) -> nx.Graph: + """Build ITS with edge attribute order=(br,bp) from L and R.""" + if dec is None: + dec = dpo_decompose_atom_conserving(L, R) + + its = nx.Graph() + + for n in set(L.nodes()) | set(R.nodes()): + if n in L.nodes: + its.add_node(n, **L.nodes[n]) + else: + its.add_node(n, **R.nodes[n]) + + all_edges = set(dec.L_orders) | set(dec.R_orders) + for u, v in all_edges: + br = dec.L_orders.get((u, v), 0.0) + bp = dec.R_orders.get((u, v), 0.0) + its.add_edge(u, v, order=(br, bp)) + + return its + + +def _layout_pos(G: nx.Graph, layout: str, seed: int = 0) -> Dict[Any, Any]: + if layout == "spring": + return nx.spring_layout(G, seed=seed, k=0.9) + if layout == "circular": + return nx.circular_layout(G) + if layout == "kamada_kawai": + return nx.kamada_kawai_layout(G) + raise ValueError("layout must be one of: 'spring', 'kamada_kawai', 'circular'") + + +def _graph_to_layout_pos(G: nx.Graph) -> Optional[Dict[Any, Tuple[float, float]]]: + """Use graph_to_mol + RDKit 2D coordinates, mapped back to graph node ids.""" + nodelist = list(G.nodes()) + ordered = nx.Graph() + for node in nodelist: + ordered.add_node(node, **G.nodes[node]) + for u, v, data in G.edges(data=True): + ordered.add_edge(u, v, **data) + + try: + mol = graph_to_mol(ordered, sanitize=True) + except Exception: + try: + mol = graph_to_mol(ordered, sanitize=False) + except Exception: + return None + + if mol.GetNumConformers() == 0: + AllChem.Compute2DCoords(mol) + conf = mol.GetConformer(0) + return { + node: ( + conf.GetAtomPosition(atom_idx).x, + conf.GetAtomPosition(atom_idx).y, + ) + for atom_idx, node in enumerate(nodelist) + } + + +def _set_shared_limits(axes, pos: Dict[Any, Tuple[float, float]]) -> None: + if not pos: + return + xs = [p[0] for p in pos.values()] + ys = [p[1] for p in pos.values()] + x_span = max(xs) - min(xs) + y_span = max(ys) - min(ys) + pad = max(x_span * 0.12, y_span * 0.12, 0.45) + for ax in axes: + ax.set_xlim(min(xs) - pad, max(xs) + pad) + ax.set_ylim(min(ys) - pad, max(ys) + pad) + ax.set_aspect("equal") + + +def _edge_order_label(order: float) -> str: + return str(int(order)) if float(order).is_integer() else f"{order:g}" + + +# ============================================================ +# DPO visualizer +# ============================================================ +def visualize_dpo_rule( # noqa: C901 + L: nx.Graph, + R: nx.Graph, + *, + mol: Optional[Chem.Mol] = None, + use_its: bool = False, + ax=None, + title: str | None = None, + layout: str = "kamada_kawai", + pos: dict | None = None, + seed: int = 0, + node_size: int = 900, + font_size: int = 10, + edge_width: float = 2.8, + show_edge_labels: bool = True, + show_node_labels: bool = True, + show_atom_map: bool = False, + show_legends: bool = True, + show_unchanged_edge_labels: bool = False, + order_tol: float = 1e-9, +): + """ + Visualize a DPO rule as L | K (or ITS) | R with a shared layout. + + The default layout is chemistry-aware: the union graph is converted with + ``graph_to_mol`` and laid out by RDKit. The ``mol`` argument is kept for + older notebooks but is no longer required. + + - use_its=False: middle panel is K (context); changed edges are dashed. + - use_its=True: middle panel shows the full ITS with (br,bp) edge labels. + """ + created_fig = False + if ax is None: + fig, axes = plt.subplots(1, 3, figsize=(13, 4), facecolor="white") + fig.subplots_adjust(wspace=0.05) + created_fig = True + else: + axes = ax + + dec = dpo_decompose_atom_conserving(L, R, order_tol=order_tol) + + # build K (context) graph: preserved nodes+edges only + K = nx.Graph() + for n in dec.K_nodes: + K.add_node(n, **(L.nodes[n] if n in L.nodes else R.nodes[n])) + for u, v in dec.K_edges: + if L.has_edge(u, v): + K.add_edge(u, v, **L.get_edge_data(u, v)) + else: + K.add_edge(u, v, **R.get_edge_data(u, v)) + + its = build_its_from_LR(L, R, dec=dec) if use_its else None + + # ── shared layout ───────────────────────────────────────────────────── + if pos is None: + union = nx.compose(L, R) + if mol is not None: + pos = _pos_from_mol(mol, union) + if pos is None: + pos = _graph_to_layout_pos(union) + if pos is None: + pos = _layout_pos(union, layout=layout, seed=seed) + + rc_nodes = set() + for e in dec.L_only_edges | dec.R_only_edges: + rc_nodes.update(e) + + def _draw_panel( + G: nx.Graph, + axp, + *, + panel_title: str, + panel_subtitle: str = "", + dashed_edges: Optional[set] = None, + dashed_color: str = "#D62728", + dashed_label: str = "", + ): + axp.set_facecolor("white") + axp.set_axis_off() + axp.set_title( + panel_title, + fontsize=font_size + 1, + fontweight="bold", + pad=6, + color="#1a1a1a", + ) + if panel_subtitle: + axp.text( + 0.5, + 0.98, + panel_subtitle, + transform=axp.transAxes, + ha="center", + va="top", + fontsize=max(7, font_size - 2), + color="#555555", + ) + + nodes = list(G.nodes()) + node_colors = [_fill(G.nodes[n].get("element", "?")) for n in nodes] + node_borders = [_border(G.nodes[n].get("element", "?")) for n in nodes] + label_colors = [ + ( + "white" + if _luminance(_fill(G.nodes[n].get("element", "?"))) < 0.50 + else "#1a1a1a" + ) + for n in nodes + ] + + _dashed = dashed_edges or set() + solid, dashed = [], [] + for u, v in G.edges(): + if _ekey(u, v) in _dashed: + dashed.append((u, v)) + else: + solid.append((u, v)) + + if solid: + nx.draw_networkx_edges( + G, + pos, + ax=axp, + edgelist=solid, + width=max(1.1, edge_width * 0.65), + edge_color="#2a2a2a", + alpha=0.65, + arrows=False, + ) + if dashed: + nx.draw_networkx_edges( + G, + pos, + ax=axp, + edgelist=dashed, + width=edge_width * 3.2, + edge_color=dashed_color, + alpha=0.18, + arrows=False, + ) + nx.draw_networkx_edges( + G, + pos, + ax=axp, + edgelist=dashed, + width=edge_width * 1.35, + edge_color=dashed_color, + style="dashed", + alpha=0.95, + arrows=False, + ) + + if rc_nodes: + rc_in_panel = [n for n in nodes if n in rc_nodes] + if rc_in_panel: + nc = nx.draw_networkx_nodes( + G, + pos, + ax=axp, + nodelist=rc_in_panel, + node_size=int(node_size * 1.9), + node_color="#FFD700", + edgecolors="none", + linewidths=0, + alpha=0.16, + ) + nc.set_zorder(1) + + lw = [ + ( + max(2.0, node_size**0.5 * 0.10) + if n in rc_nodes + else max(1.2, node_size**0.5 * 0.055) + ) + for n in nodes + ] + border_colors = [ + "#FFD700" if n in rc_nodes else node_borders[i] for i, n in enumerate(nodes) + ] + + nc = nx.draw_networkx_nodes( + G, + pos, + ax=axp, + nodelist=nodes, + node_size=node_size, + node_color=node_colors, + edgecolors=border_colors, + linewidths=lw, + ) + nc.set_zorder(3) + + if show_node_labels: + for i, n in enumerate(nodes): + el = G.nodes[n].get("element", "?") + am = G.nodes[n].get("atom_map", n) + lbl = f"{el}:{am}" if show_atom_map else el + x, y = pos[n] + axp.text( + x, + y, + lbl, + ha="center", + va="center", + fontsize=font_size, + fontweight="bold", + color=label_colors[i], + zorder=9, + path_effects=[pe.withStroke(linewidth=1.4, foreground="none")], + ) + + if show_edge_labels: + elabs = {} + for u, v, d in G.edges(data=True): + o = float(d.get("order", 1.0)) + elabs[(u, v)] = _edge_order_label(o) + if elabs: + nx.draw_networkx_edge_labels( + G, + pos, + ax=axp, + edge_labels=elabs, + font_size=font_size - 1, + bbox=dict( + boxstyle="round,pad=0.1", fc="white", ec="none", alpha=0.85 + ), + ) + + if dashed_label and dashed: + axp.text( + 0.5, + 0.04, + dashed_label, + transform=axp.transAxes, + ha="center", + va="bottom", + fontsize=max(7, font_size - 2), + color=dashed_color, + fontweight="bold", + bbox=dict( + boxstyle="round,pad=0.25", + fc="white", + ec=dashed_color, + alpha=0.90, + linewidth=1.0, + ), + ) + + if title and created_fig: + fig.suptitle(title, fontsize=font_size + 2, fontweight="bold", color="#1a1a1a") + + _draw_panel( + L, + axes[0], + panel_title="L reactant pattern", + panel_subtitle=f"{L.number_of_nodes()} atoms · {L.number_of_edges()} bonds", + dashed_edges=dec.L_only_edges, + dashed_color="#D62728", + dashed_label=( + f"delete {len(dec.L_only_edges)} bond(s)" if dec.L_only_edges else "" + ), + ) + + if use_its: + _visualize_its( + its, + ax=axes[1], + title="ITS bond-change view", + pos=pos, + layout=layout, + node_size=node_size, + font_size=font_size, + edge_width=edge_width, + show_edge_labels=show_edge_labels, + show_unchanged_edge_labels=show_unchanged_edge_labels, + show_node_labels=show_node_labels, + show_atom_map=show_atom_map, + show_legend=False, + ) + else: + _draw_panel( + K, + axes[1], + panel_title="K preserved context", + panel_subtitle=f"{K.number_of_nodes()} atoms · {K.number_of_edges()} preserved bonds", + ) + + _draw_panel( + R, + axes[2], + panel_title="R product pattern", + panel_subtitle=f"{R.number_of_nodes()} atoms · {R.number_of_edges()} bonds", + dashed_edges=dec.R_only_edges, + dashed_color="#2CA02C", + dashed_label=f"add {len(dec.R_only_edges)} bond(s)" if dec.R_only_edges else "", + ) + + if created_fig: + axes[0].annotate( + "", + xy=(1.03, 0.50), + xytext=(0.97, 0.50), + xycoords="axes fraction", + arrowprops=dict(arrowstyle="-|>", color="#6b7280", lw=1.4), + annotation_clip=False, + ) + axes[1].annotate( + "", + xy=(1.03, 0.50), + xytext=(0.97, 0.50), + xycoords="axes fraction", + arrowprops=dict(arrowstyle="-|>", color="#6b7280", lw=1.4), + annotation_clip=False, + ) + + _set_shared_limits(axes, pos) + + # legends (optional) + if show_legends: + if use_its: + edge_handles = [ + Line2D([0], [0], color="#D62728", lw=3, label="broken (br>bp)"), + Line2D([0], [0], color="#2CA02C", lw=3, label="formed (br Chem.Mol: + """Ensure the molecule has 2D coords (in-place).""" + if m is None: + return m + if m.GetNumConformers() == 0: + # RDKit tends to do better with this than Compute2DCoords alone for some cases + AllChem.Compute2DCoords(m) + return m + + +def _bond_signature(m: Chem.Mol) -> Dict[Tuple[int, int], Tuple[int, int]]: + """ + Return mapping: (map_i, map_j) -> (bondTypeInt, isAromaticInt) + Only for bonds where both atoms have atom-map numbers. + """ + out: Dict[Tuple[int, int], Tuple[int, int]] = {} + for b in m.GetBonds(): + a1, a2 = b.GetBeginAtom(), b.GetEndAtom() + m1, m2 = a1.GetAtomMapNum(), a2.GetAtomMapNum() + if m1 <= 0 or m2 <= 0: + continue + key = (m1, m2) if m1 < m2 else (m2, m1) + # bond type as an int-ish bucket + aromatic flag + bt = int(b.GetBondTypeAsDouble() * 10) # e.g., single=10, double=20 + ar = 1 if b.GetIsAromatic() else 0 + out[key] = (bt, ar) + return out + + +def _mapnum_to_atomidx(m: Chem.Mol) -> Dict[int, int]: + out: Dict[int, int] = {} + for a in m.GetAtoms(): + mn = a.GetAtomMapNum() + if mn > 0: + out[mn] = a.GetIdx() + return out + + +def _find_changed_bonds_by_atommap( # noqa: C901 + rxn: rdChemReactions.ChemicalReaction, +) -> Optional[RxnHighlights]: + """ + Detect changed bonds (formed/broken/changed order) using atom-map numbers. + If the reaction has no atom maps, return None. + """ + reactants = [m for m in rxn.GetReactants()] + products = [m for m in rxn.GetProducts()] + + # If we have essentially no mapping info, bail + total_mapped = 0 + for m in reactants + products: + total_mapped += sum(1 for a in m.GetAtoms() if a.GetAtomMapNum() > 0) + if total_mapped == 0: + return None + + # Build global bond signatures for each side + r_sig: Dict[Tuple[int, int], Tuple[int, int]] = {} + p_sig: Dict[Tuple[int, int], Tuple[int, int]] = {} + for m in reactants: + r_sig.update(_bond_signature(m)) + for m in products: + p_sig.update(_bond_signature(m)) + + changed_pairs = set(r_sig.keys()) | set(p_sig.keys()) + changed_pairs = {k for k in changed_pairs if r_sig.get(k) != p_sig.get(k)} + + # Precompute mapnum->atomidx per molecule, and mapnum->(mol_i, atom_i) + r_mn_loc: Dict[int, Tuple[int, int]] = {} + p_mn_loc: Dict[int, Tuple[int, int]] = {} + r_mn2aidx: List[Dict[int, int]] = [] + p_mn2aidx: List[Dict[int, int]] = [] + + for i, m in enumerate(reactants): + d = _mapnum_to_atomidx(m) + r_mn2aidx.append(d) + for mn, aidx in d.items(): + r_mn_loc[mn] = (i, aidx) + + for i, m in enumerate(products): + d = _mapnum_to_atomidx(m) + p_mn2aidx.append(d) + for mn, aidx in d.items(): + p_mn_loc[mn] = (i, aidx) + + # Collect atom + bond highlights per molecule index + r_atoms: Dict[int, List[int]] = {i: [] for i in range(len(reactants))} + r_bonds: Dict[int, List[int]] = {i: [] for i in range(len(reactants))} + p_atoms: Dict[int, List[int]] = {i: [] for i in range(len(products))} + p_bonds: Dict[int, List[int]] = {i: [] for i in range(len(products))} + + def _add_atom(side_atoms: Dict[int, List[int]], mol_i: int, atom_i: int) -> None: + if atom_i not in side_atoms[mol_i]: + side_atoms[mol_i].append(atom_i) + + def _add_bond( + side_bonds: Dict[int, List[int]], m: Chem.Mol, mol_i: int, a: int, b: int + ) -> None: + bond = m.GetBondBetweenAtoms(a, b) + if bond is None: + return + bi = bond.GetIdx() + if bi not in side_bonds[mol_i]: + side_bonds[mol_i].append(bi) + + # For each changed mapped pair, mark the corresponding bond (if present) + endpoint atoms + for mn1, mn2 in changed_pairs: + # reactants + if mn1 in r_mn_loc and mn2 in r_mn_loc: + mi1, ai1 = r_mn_loc[mn1] + mi2, ai2 = r_mn_loc[mn2] + if mi1 == mi2: + m = reactants[mi1] + _add_atom(r_atoms, mi1, ai1) + _add_atom(r_atoms, mi1, ai2) + _add_bond(r_bonds, m, mi1, ai1, ai2) + + # products + if mn1 in p_mn_loc and mn2 in p_mn_loc: + mi1, ai1 = p_mn_loc[mn1] + mi2, ai2 = p_mn_loc[mn2] + if mi1 == mi2: + m = products[mi1] + _add_atom(p_atoms, mi1, ai1) + _add_atom(p_atoms, mi1, ai2) + _add_bond(p_bonds, m, mi1, ai1, ai2) + + return RxnHighlights( + r_atoms=r_atoms, r_bonds=r_bonds, p_atoms=p_atoms, p_bonds=p_bonds + ) + + +def _reaction_molecules(rxn: rdChemReactions.ChemicalReaction) -> List[Chem.Mol]: + return list(rxn.GetReactants()) + list(rxn.GetAgents()) + list(rxn.GetProducts()) + + +def _auto_canvas_size( + rxn: rdChemReactions.ChemicalReaction, + size: Optional[Tuple[int, int]], + legend: Optional[str], +) -> Tuple[int, int]: + """Choose a compact canvas so small reactions do not become tiny.""" + if size is not None: + return size + + mols = _reaction_molecules(rxn) + n_components = max(1, len(mols)) + n_atoms = sum(m.GetNumAtoms() for m in mols if m is not None) + width = int(max(520, min(1500, 180 + 150 * n_components + 34 * n_atoms))) + height = 290 if legend else 250 + return width, height + + +def _fallback_sub_img_size( + canvas_size: Tuple[int, int], rxn: rdChemReactions.ChemicalReaction +) -> Tuple[int, int]: + n_panels = max(1, len(_reaction_molecules(rxn)) + 1) + sub_width = int(max(220, min(340, canvas_size[0] / n_panels))) + return sub_width, canvas_size[1] + + +def _add_svg_title(svg_text: str, title: str, canvas_size: Tuple[int, int]) -> str: + """Inject a centered SVG title after RDKit drawing finishes.""" + safe_title = html.escape(title) + title_svg = ( + f'{safe_title}' + ) + return svg_text.replace("", f"{title_svg}") + + +def _add_pil_title(image: Any, title: str, canvas_size: Tuple[int, int]) -> Any: + """Overlay a centered title on a PIL reaction image.""" + from PIL import ImageDraw, ImageFont + + image = image.convert("RGBA") + draw = ImageDraw.Draw(image) + try: + font = ImageFont.truetype( + "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 20 + ) + except Exception: + font = ImageFont.load_default() + bbox = draw.textbbox((0, 0), title, font=font) + text_width = bbox[2] - bbox[0] + x = max(8, (canvas_size[0] - text_width) / 2) + draw.text((x, 8), title, fill="#1a1a1a", font=font) + return image + + +def visualize_reaction( # noqa: C901 + rsmi: str, + *, + size: Optional[Tuple[int, int]] = None, + sub_img_size: Tuple[int, int] = ( + 450, + 300, + ), # kept for compatibility; used in fallback + svg: bool = True, + kekulize: bool = False, + show_atom_maps: bool = False, + highlight_changes: bool = True, + legend: Optional[str] = None, + fixed_bond_length: Optional[float] = None, + padding: float = 0.06, +) -> Union[str, Any]: # Any covers PIL.Image.Image when Cairo is available + """ + More visual RDKit reaction rendering. + + Improvements vs Draw.ReactionToImage: + - Uses rdMolDraw2D for cleaner SVG/Cairo output and better control. + - Optional highlighting of changed bonds using atom-map numbers. + - Optional atom-map labels overlay (useful for debugging / talktorials). + - Title/legend support. + + Notes + ----- + - `highlight_changes=True` works best when rsmi contains atom-maps like [C:1]. + - For PNG/PIL output, your RDKit must be built with Cairo support. + + Parameters + ---------- + rsmi : str + Reaction SMILES / SMARTS (e.g. '[CH3:1][Br:2]>>[CH3:1][OH:2]'). + size : (w, h), optional + Canvas size in pixels. If omitted, a compact size is inferred from + the number of reaction components and atoms. + svg : bool + If True return SVG string; else return PIL image (Cairo). + kekulize : bool + If True kekulize molecules before drawing (sometimes nicer for aromatic). + show_atom_maps : bool + If True, draw atom-map numbers as labels. + highlight_changes : bool + If True, detect and highlight changed bonds (requires atom maps). + legend : str | None + Optional title at the top. + fixed_bond_length : float, optional + Affects perceived scale / whitespace. If omitted, a readable default + is chosen for the inferred canvas. + padding : float + Relative padding around the drawing. + + Returns + ------- + str (SVG) or PIL.Image.Image + """ + rxn = rdChemReactions.ReactionFromSmarts(rsmi, useSmiles=True) + if rxn is None: + raise ValueError("Invalid reaction SMILES/SMARTS") + + rxn.Initialize() + canvas_size = _auto_canvas_size(rxn, size, legend) + bond_length = fixed_bond_length if fixed_bond_length is not None else 34.0 + + # Ensure 2D coords + for m in list(rxn.GetReactants()) + list(rxn.GetProducts()) + list(rxn.GetAgents()): + if m is not None: + _ensure_2d(m) + + # Optional kekulization (copy to be safe) + if kekulize: + + def _kek(m: Chem.Mol) -> Chem.Mol: + m2 = Chem.Mol(m) + try: + Chem.Kekulize(m2, clearAromaticFlags=True) + except Exception: + pass + return m2 + + rxn2 = rdChemReactions.ChemicalReaction() + for m in rxn.GetReactants(): + rxn2.AddReactantTemplate(_kek(m)) + for m in rxn.GetAgents(): + rxn2.AddAgentTemplate(_kek(m)) + for m in rxn.GetProducts(): + rxn2.AddProductTemplate(_kek(m)) + rxn2.Initialize() + rxn = rxn2 + + # Highlights + hl = _find_changed_bonds_by_atommap(rxn) if highlight_changes else None + # rdMolDraw2D expects a single highlight dict; for reactions, DrawReaction accepts + # per-mol highlights in newer RDKit builds. We'll attempt that; otherwise fallback. + # (Fallback still gives improved aesthetics via Draw.ReactionToImage.) + try: + if svg: + drawer = rdMolDraw2D.MolDraw2DSVG(canvas_size[0], canvas_size[1]) + else: + drawer = rdMolDraw2D.MolDraw2DCairo(canvas_size[0], canvas_size[1]) + + opts = drawer.drawOptions() + opts.fixedBondLength = bond_length + opts.padding = padding + opts.continuousHighlight = True + opts.highlightBondWidthMultiplier = 18 + opts.useBWAtomPalette() # crisp, publication-ish defaults + + if show_atom_maps: + # draw atom-map numbers + opts.atomLabels = {} # type: ignore[attr-defined] + for m in ( + list(rxn.GetReactants()) + + list(rxn.GetAgents()) + + list(rxn.GetProducts()) + ): + for a in m.GetAtoms(): + mn = a.GetAtomMapNum() + if mn > 0: + opts.atomLabels[(m, a.GetIdx())] = str( + mn + ) # may be ignored in some builds + + # Per-molecule highlights (reactants/products only; agents usually ignored) + if hl is not None: + # Build per-template highlight specs + # Newer RDKit supports passing these directly to DrawReaction. + drawer.DrawReaction( + rxn, + highlightByReactant=False, + highlightReactantAtoms=hl.r_atoms, + highlightReactantBonds=hl.r_bonds, + highlightProductAtoms=hl.p_atoms, + highlightProductBonds=hl.p_bonds, + ) + else: + drawer.DrawReaction(rxn, highlightByReactant=False) + + drawer.FinishDrawing() + if svg: + svg_text = drawer.GetDrawingText() + return _add_svg_title(svg_text, legend, canvas_size) if legend else svg_text + else: + # Cairo returns PNG bytes + from PIL import Image + import io + + png = drawer.GetDrawingText() + image = Image.open(io.BytesIO(png)) + return _add_pil_title(image, legend, canvas_size) if legend else image + + except Exception: + # Safe fallback (still decent) if DrawReaction signature differs in your RDKit build + from rdkit.Chem import Draw as _Draw + + fallback = _Draw.ReactionToImage( + rxn, + subImgSize=( + _fallback_sub_img_size(canvas_size, rxn) + if size is None + else sub_img_size + ), + useSVG=svg, + ) + if legend and svg: + return _add_svg_title(fallback, legend, canvas_size) + if legend: + return _add_pil_title(fallback, legend, canvas_size) + return fallback diff --git a/synkit/Vis/vis_synedu/vis.py b/synkit/Vis/vis_synedu/vis.py new file mode 100644 index 0000000..07dfdde --- /dev/null +++ b/synkit/Vis/vis_synedu/vis.py @@ -0,0 +1,501 @@ +from __future__ import annotations + +from typing import Dict, List, Optional, Set, Tuple +import math + +import matplotlib.pyplot as plt +import matplotlib.patches as mpatches +import matplotlib.patheffects as pe +import networkx as nx +from rdkit import Chem +from rdkit.Chem import AllChem, Draw + +from .conversion import graph_to_mol + +# ── CPK-inspired palette (fill, border) ─────────────────────────────────── +_ELEMENT_PALETTE: Dict[str, Tuple[str, str]] = { + "C": ("#636363", "#3d3d3d"), + "O": ("#E8524A", "#b83830"), + "N": ("#5B8DD9", "#3a65b0"), + "S": ("#E8A838", "#c07a10"), + "Cl": ("#3DBE6C", "#1e8a46"), + "F": ("#5BC8AF", "#2a9178"), + "Br": ("#A0522D", "#6b3118"), + "I": ("#8C54C8", "#5e2fa0"), + "P": ("#E878C8", "#b84898"), + "H": ("#C8C8C8", "#909090"), + "Na": ("#AB5CF2", "#7b34c8"), + "Mg": ("#8AFF00", "#58b000"), + "Si": ("#F0C8A0", "#b88860"), +} +_DEFAULT_FILL = "#A0A0A0" +_DEFAULT_BORDER = "#606060" + + +def _fill(el: str) -> str: + return _ELEMENT_PALETTE.get(el, (_DEFAULT_FILL, _DEFAULT_BORDER))[0] + + +def _border(el: str) -> str: + return _ELEMENT_PALETTE.get(el, (_DEFAULT_FILL, _DEFAULT_BORDER))[1] + + +def _luminance(hex_color: str) -> float: + h = hex_color.lstrip("#") + r, g, b = (int(h[i : i + 2], 16) / 255.0 for i in (0, 2, 4)) # noqa + return 0.2126 * r + 0.7152 * g + 0.0722 * b + + +def _ensure_2d(mol: Chem.Mol) -> None: + if mol.GetNumConformers() == 0: + AllChem.Compute2DCoords(mol) + + +def _avg_edge_length(pos: Dict, G: nx.Graph) -> float: + if G.number_of_edges() == 0: + return 1.0 + lengths = [ + math.hypot(pos[int(v)][0] - pos[int(u)][0], pos[int(v)][1] - pos[int(u)][1]) + for u, v in G.edges() + ] + return sum(lengths) / len(lengths) + + +def _perp_offset(p1, p2, offset): + dx, dy = p2[0] - p1[0], p2[1] - p1[1] + L = math.hypot(dx, dy) + if L == 0: + return 0.0, 0.0 + return -dy / L * offset, dx / L * offset + + +def _index_offset_vec(n, G, pos, *, base): + x, y = pos[n] + nbrs = [int(m) for m in G.neighbors(n)] + if not nbrs: + return 0.0, base + cx = sum(pos[m][0] for m in nbrs) / len(nbrs) + cy = sum(pos[m][1] for m in nbrs) / len(nbrs) + dx, dy = x - cx, y - cy + L = math.hypot(dx, dy) + if L == 0: + return 0.0, base + return dx / L * base, dy / L * base + + +def _draw_bond_lines( + ax, p1, p2, *, order, aromatic, aromatic_style, offset, lw, color="k" +): + kw = dict( + color=color, linewidth=lw, solid_capstyle="round", solid_joinstyle="round" + ) + if aromatic and aromatic_style == "dashed": + ax.plot([p1[0], p2[0]], [p1[1], p2[1]], linestyle="--", **kw) + return + if aromatic: + ax.plot([p1[0], p2[0]], [p1[1], p2[1]], **kw) + return + if order <= 1: + ax.plot([p1[0], p2[0]], [p1[1], p2[1]], **kw) + return + dx, dy = _perp_offset(p1, p2, offset) + if order == 2: + ax.plot([p1[0] + dx, p2[0] + dx], [p1[1] + dy, p2[1] + dy], **kw) + ax.plot([p1[0] - dx, p2[0] - dx], [p1[1] - dy, p2[1] - dy], **kw) + elif order == 3: + ax.plot([p1[0], p2[0]], [p1[1], p2[1]], **{**kw, "linewidth": lw * 0.9}) + ax.plot( + [p1[0] + dx, p2[0] + dx], + [p1[1] + dy, p2[1] + dy], + **{**kw, "linewidth": lw * 0.9}, + ) + ax.plot( + [p1[0] - dx, p2[0] - dx], + [p1[1] - dy, p2[1] - dy], + **{**kw, "linewidth": lw * 0.9}, + ) + else: + ax.plot([p1[0], p2[0]], [p1[1], p2[1]], **kw) + + +def _draw_aromatic_circles(ax, G, pos, scale): + for cyc in nx.cycle_basis(G): + if len(cyc) < 5: + continue + if not all(bool(G.nodes[int(n)].get("aromatic", False)) for n in cyc): + continue + ok = all( + bool( + G.edges[int(cyc[i]), int(cyc[(i + 1) % len(cyc)])].get( + "aromatic", False + ) + ) + for i in range(len(cyc)) + ) + if not ok: + continue + xs = [pos[int(n)][0] for n in cyc] + ys = [pos[int(n)][1] for n in cyc] + cx, cy = sum(xs) / len(xs), sum(ys) / len(ys) + rs = [math.hypot(x - cx, y - cy) for x, y in zip(xs, ys)] + r = (sum(rs) / len(rs)) * scale + ax.add_patch( + mpatches.Circle( + (cx, cy), r, fill=False, linewidth=1.2, color="#333333", zorder=1 + ) + ) + + +def _set_padded_limits(ax, pos: Dict[int, Tuple[float, float]], avg_len: float) -> None: + """Pad plot limits so node markers and index labels are not clipped.""" + if not pos: + return + + xs = [p[0] for p in pos.values()] + ys = [p[1] for p in pos.values()] + x_span = max(xs) - min(xs) + y_span = max(ys) - min(ys) + pad = max(avg_len * 0.45, x_span * 0.08, y_span * 0.08, 0.20) + + ax.set_xlim(min(xs) - pad, max(xs) + pad) + ax.set_ylim(min(ys) - pad, max(ys) + pad) + + +def _ordered_graph_for_layout( + G: nx.Graph, nodelist: List[int] +) -> Tuple[nx.Graph, Dict[int, int]]: + """Create an insertion-ordered graph and mapping node id -> RDKit atom idx.""" + ordered = nx.Graph() + for node in nodelist: + ordered.add_node(node, **G.nodes[node]) + for u, v, data in G.edges(data=True): + ordered.add_edge(int(u), int(v), **data) + return ordered, {node: idx for idx, node in enumerate(nodelist)} + + +def _graph_to_layout_mol(G: nx.Graph) -> Chem.Mol: + try: + return graph_to_mol(G, sanitize=True) + except Exception: + return graph_to_mol(G, sanitize=False) + + +def _layout_from_graph_mol( + G: nx.Graph, + nodelist: List[int], +) -> Dict[int, Tuple[float, float]]: + """ + Compute RDKit 2D coordinates for graph nodes. + + The molecule is reconstructed from the graph itself so callers do not need + to pass a parallel RDKit Mol object. Coordinates are mapped back to the + original graph node ids. + """ + try: + ordered, node_to_atom = _ordered_graph_for_layout(G, nodelist) + layout_mol = _graph_to_layout_mol(ordered) + _ensure_2d(layout_mol) + conf = layout_mol.GetConformer(0) + pos = {} + for node in nodelist: + p = conf.GetAtomPosition(node_to_atom[node]) + pos[node] = (p.x, p.y) + return pos + except Exception: + return { + int(k): (float(v[0]), float(v[1])) + for k, v in nx.kamada_kawai_layout(G).items() + } + + +def draw_molecular_graph( # noqa: C901 + G: nx.Graph, + *, + ax: Optional[plt.Axes] = None, + title: Optional[str] = None, + include_mol: bool = False, + label_mode: str = "hetero", # "all" | "hetero" | "none" + show_indices: bool = False, + indices_for_carbons: bool = True, + show_bond_labels: bool = False, + aromatic_style: str = "circle", # "circle" | "dashed" + # --- sizing (auto-scaled to graph; override if needed) --- + node_size: Optional[int] = None, + bond_lw: Optional[float] = None, + figsize: Tuple[float, float] = (6, 5), + # --- highlighting --- + highlight_nodes: Optional[Set[int]] = None, + highlight_edges: Optional[Set[Tuple[int, int]]] = None, + highlight_color: str = "#FF7F0E", + highlight_alpha: float = 0.85, + # --- custom node colors (overrides element palette) --- + custom_node_colors: Optional[Dict[int, str]] = None, + # --- typography --- + element_fontsize: Optional[int] = None, + index_fontsize: Optional[int] = None, + title_fontsize: int = 11, +) -> plt.Axes: + """ + Visualize a labeled molecular NetworkX graph with CPK-style node coloring, + element borders, proper bond styles, and optional MCS/WL highlighting. + """ + aromatic_style = aromatic_style.lower() + label_mode = label_mode.lower() + + hl_edges_norm: Set[Tuple[int, int]] = set() + if highlight_edges: + for u, v in highlight_edges: + hl_edges_norm.add((min(int(u), int(v)), max(int(u), int(v)))) + + # ── figure / axis setup ────────────────────────────────────────────── + created_fig = False + if include_mol: + fig, (ax_mol, ax_g) = plt.subplots( + 1, 2, figsize=(figsize[0] * 2, figsize[1]), facecolor="white" + ) + created_fig = True + elif ax is None: + fig, ax_g = plt.subplots(figsize=figsize, facecolor="white") + created_fig = True + else: + ax_g = ax + fig = ax_g.figure + + ax_g.set_facecolor("white") + + # ── stable node order ──────────────────────────────────────────────── + nodelist = sorted(int(n) for n in G.nodes()) + n_nodes = len(nodelist) + + # ── auto-scale sizes ───────────────────────────────────────────────── + _ns = ( + node_size + if node_size is not None + else max(180, min(500, 4000 // max(n_nodes, 1))) + ) + _lw = bond_lw if bond_lw is not None else max(1.2, min(2.2, 18 / max(n_nodes, 1))) + _efs = ( + element_fontsize + if element_fontsize is not None + else max(7, min(11, 90 // max(n_nodes, 1))) + ) + _ifs = index_fontsize if index_fontsize is not None else _efs + 1 + + # ── positions: graph -> RDKit 2D layout; fallback to NetworkX layout ── + pos = _layout_from_graph_mol(G, nodelist) + + avg_len = _avg_edge_length(pos, G) + bond_offset = avg_len * 0.09 + idx_offset = avg_len * 0.16 + + # ── node styling ───────────────────────────────────────────────────── + node_colors: List[str] = [] + node_borders: List[str] = [] + element_labels: Dict[int, str] = {} + label_colors: Dict[int, str] = {} + + for n in nodelist: + data = G.nodes[n] + el = str(data.get("element", "C")) + + if custom_node_colors and n in custom_node_colors: + fill = custom_node_colors[n] + bord = fill # same color, will look fine + else: + fill = _fill(el) + bord = _border(el) + + node_colors.append(fill) + node_borders.append(bord) + + if label_mode == "none": + txt = "" + elif label_mode == "hetero" and el == "C": + txt = "" + else: + txt = el + element_labels[n] = txt + label_colors[n] = "white" if _luminance(fill) < 0.50 else "#1a1a1a" + + # ── draw highlight glow (under nodes) ──────────────────────────────── + if highlight_nodes: + hl = [int(n) for n in highlight_nodes if int(n) in G] + if hl: + nc = nx.draw_networkx_nodes( + G, + pos, + nodelist=hl, + node_size=int(_ns * 2.2), + node_color=highlight_color, + edgecolors="none", + linewidths=0, + ax=ax_g, + alpha=0.25, + ) + nc.set_zorder(1) + + # ── draw edges ─────────────────────────────────────────────────────── + for u, v, data in G.edges(data=True): + u, v = int(u), int(v) + p1, p2 = pos[u], pos[v] + aromatic = bool(data.get("aromatic", False)) + try: + order = 1 if aromatic else int(round(abs(float(data.get("order", 1.0))))) + except Exception: + order = 1 + + # highlight glow under edge + if hl_edges_norm and (min(u, v), max(u, v)) in hl_edges_norm: + ax_g.plot( + [p1[0], p2[0]], + [p1[1], p2[1]], + color=highlight_color, + linewidth=_lw * 4.5, + alpha=0.3, + solid_capstyle="round", + zorder=1, + ) + ax_g.plot( + [p1[0], p2[0]], + [p1[1], p2[1]], + color=highlight_color, + linewidth=_lw * 2.2, + alpha=highlight_alpha, + solid_capstyle="round", + zorder=2, + ) + + _draw_bond_lines( + ax_g, + p1, + p2, + order=order, + aromatic=aromatic, + aromatic_style=aromatic_style, + offset=bond_offset, + lw=_lw, + color="#2a2a2a", + ) + + if show_bond_labels and not aromatic: + mx, my = (p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2 + ax_g.text( + mx, + my, + str(order), + fontsize=7, + ha="center", + va="center", + color="#333333", + bbox=dict(boxstyle="round,pad=0.12", fc="white", ec="none", alpha=0.9), + zorder=8, + ) + + if aromatic_style == "circle": + _draw_aromatic_circles(ax_g, G, pos, scale=0.52) + + # ── draw nodes ─────────────────────────────────────────────────────── + nc = nx.draw_networkx_nodes( + G, + pos, + nodelist=nodelist, + node_size=_ns, + node_color=node_colors, + edgecolors=node_borders, + linewidths=max(1.0, _ns**0.5 * 0.065), + ax=ax_g, + ) + nc.set_zorder(3) + + # highlight ring (on top of node) + if highlight_nodes: + hl = [int(n) for n in highlight_nodes if int(n) in G] + if hl: + nc = nx.draw_networkx_nodes( + G, + pos, + nodelist=hl, + node_size=int(_ns * 1.35), + node_color="none", + edgecolors=highlight_color, + linewidths=max(2.0, _ns**0.5 * 0.12), + ax=ax_g, + alpha=highlight_alpha, + ) + nc.set_zorder(4) + + # ── element labels ─────────────────────────────────────────────────── + for n in nodelist: + txt = element_labels.get(n, "") + if not txt: + continue + x, y = pos[n] + ax_g.text( + x, + y, + txt, + ha="center", + va="center", + fontsize=_efs, + fontweight="bold", + color=label_colors[n], + zorder=9, + ) + + # ── index labels ───────────────────────────────────────────────────── + if show_indices: + for n in nodelist: + el = str(G.nodes[n].get("element", "C")) + if label_mode == "hetero" and el == "C" and not indices_for_carbons: + continue + x, y = pos[n] + dx, dy = _index_offset_vec(n, G, pos, base=idx_offset) + ax_g.text( + x + dx, + y + dy, + str(n), + fontsize=_ifs, + ha="center", + va="center", + color="#222222", + fontweight="bold", + path_effects=[pe.withStroke(linewidth=2.5, foreground="white")], + zorder=10, + ) + + # ── title ──────────────────────────────────────────────────────────── + if title: + ax_g.set_title( + title, fontsize=title_fontsize, fontweight="bold", pad=6, color="#1a1a1a" + ) + + _set_padded_limits(ax_g, pos, avg_len) + ax_g.set_axis_off() + ax_g.set_aspect("equal") + + # ── optional RDKit panel ────────────────────────────────────────────── + if include_mol: + ax_mol.set_axis_off() + try: + ordered, _ = _ordered_graph_for_layout(G, nodelist) + display_mol = _graph_to_layout_mol(ordered) + except Exception: + display_mol = None + if display_mol is not None: + _ensure_2d(display_mol) + try: + dopt = Draw.MolDrawOptions() + dopt.addAtomIndices = bool(show_indices) + img = Draw.MolToImage( + display_mol, size=(500, 500), kekulize=False, options=dopt + ) + except Exception: + img = Draw.MolToImage(display_mol, size=(500, 500), kekulize=False) + ax_mol.imshow(img) + if created_fig: + fig.tight_layout() + return fig, (ax_mol, ax_g) + + if created_fig: + fig.tight_layout() + return ax_g diff --git a/synkit/Vis/visual_drawer.py b/synkit/Vis/visual_drawer.py new file mode 100644 index 0000000..dcd8ad9 --- /dev/null +++ b/synkit/Vis/visual_drawer.py @@ -0,0 +1,215 @@ +from __future__ import annotations + +"""Matplotlib drawing helpers for representation-aware SynKit visuals.""" + +from typing import Any, Mapping + +import matplotlib.pyplot as plt +import networkx as nx + +from synkit.Vis.visual_model import VisualGraph, to_visual_graph + +ELEMENT_COLORS = { + "H": "#ffffff", + "C": "#f8fafc", + "N": "#bfdbfe", + "O": "#fecaca", + "F": "#bbf7d0", + "Cl": "#bbf7d0", + "Br": "#fed7aa", + "I": "#ddd6fe", + "S": "#fde68a", + "P": "#fecdd3", + "B": "#e7e5e4", + "Si": "#e9d5ff", +} + + +def draw_graph( + graph: nx.Graph | VisualGraph, + *, + ax: plt.Axes | None = None, + mode: str = "compact", + title: str | None = None, + show_atom_map: bool = True, + layout: str = "spring", + pos: Mapping[Any, tuple[float, float]] | None = None, + seed: int = 7, + node_size: int = 980, + font_size: int = 9, + edge_label_font_size: int = 8, + show_edge_labels: bool = True, + show_node_badges: bool = True, +) -> tuple[plt.Figure, plt.Axes]: + """Draw a molecule, ITS, or MTG graph using the visual adapter. + + :param graph: Raw NetworkX graph or already adapted ``VisualGraph``. + :type graph: Union[nx.Graph, VisualGraph] + :param ax: Optional Matplotlib axes. + :type ax: Optional[plt.Axes] + :param mode: Adapter label mode, e.g. ``compact``, ``electron``, + ``sigma_pi``, or ``timeline``. + :type mode: str + :param title: Optional title. Defaults to the detected visual kind. + :type title: Optional[str] + :param show_atom_map: Include atom maps in labels when adapting raw graphs. + :type show_atom_map: bool + :param layout: Layout name: ``spring``, ``kamada_kawai``, ``circular``, or + ``shell``. + :type layout: str + :param pos: Optional fixed positions. + :type pos: Optional[Mapping[Any, Tuple[float, float]]] + :returns: ``(figure, axes)``. + :rtype: Tuple[plt.Figure, plt.Axes] + """ + + visual = ( + graph + if isinstance(graph, VisualGraph) + else to_visual_graph( + graph, + mode=mode, # type: ignore[arg-type] + show_atom_map=show_atom_map, + title=title or "", + ) + ) + nx_graph = _to_nx_graph(visual) + + if ax is None: + fig, ax = plt.subplots(figsize=_figure_size(nx_graph)) + else: + fig = ax.figure + + if pos is None: + pos = _layout(nx_graph, layout=layout, seed=seed) + + ax.clear() + ax.set_axis_off() + ax.set_aspect("equal") + ax.set_title(title or visual.title or visual.kind, fontsize=12, fontweight="bold") + + edges = list(nx_graph.edges(data=True)) + nodes = list(nx_graph.nodes(data=True)) + + if edges: + nx.draw_networkx_edges( + nx_graph, + pos, + ax=ax, + edge_color=[data["visual_color"] for _, _, data in edges], + width=[data["visual_width"] for _, _, data in edges], + alpha=0.88, + ) + + node_collection = nx.draw_networkx_nodes( + nx_graph, + pos, + ax=ax, + node_color=[data["fill"] for _, data in nodes], + edgecolors=[data["border"] for _, data in nodes], + linewidths=[2.4 if data["changed"] else 1.2 for _, data in nodes], + node_size=node_size, + ) + node_collection.set_zorder(3) + + labels = { + node: _node_label(data, show_node_badges=show_node_badges) + for node, data in nx_graph.nodes(data=True) + } + nx.draw_networkx_labels( + nx_graph, + pos, + labels=labels, + ax=ax, + font_size=font_size, + font_color="#111827", + ) + + if show_edge_labels: + edge_labels = { + (u, v): data["label"] + for u, v, data in nx_graph.edges(data=True) + if data.get("label") + } + if edge_labels: + nx.draw_networkx_edge_labels( + nx_graph, + pos, + edge_labels=edge_labels, + ax=ax, + font_size=edge_label_font_size, + font_color="#111827", + bbox={ + "boxstyle": "round,pad=0.18", + "fc": "white", + "ec": "#d1d5db", + "alpha": 0.92, + }, + ) + + _pad_limits(ax, pos) + return fig, ax + + +def _to_nx_graph(visual: VisualGraph) -> nx.Graph: + graph = nx.Graph() + for node in visual.nodes: + graph.add_node( + node.node_id, + label=node.label, + badges=node.badges, + changed=node.changed, + fill=ELEMENT_COLORS.get(node.element or "", "#f3f4f6"), + border="#dc2626" if node.changed else "#374151", + ) + for edge in visual.edges: + graph.add_edge( + edge.source, + edge.target, + label=edge.label, + state=edge.state, + visual_color=edge.color, + visual_width=edge.width, + ) + return graph + + +def _node_label(data: Mapping[str, Any], *, show_node_badges: bool) -> str: + label = str(data.get("label", "")) + badges = data.get("badges") or () + if show_node_badges and badges: + return f"{label}\n{' '.join(badges)}" + return label + + +def _layout(graph: nx.Graph, *, layout: str, seed: int) -> dict[Any, Any]: + if graph.number_of_nodes() == 0: + return {} + if layout == "spring": + return nx.spring_layout(graph, seed=seed, k=1.1) + if layout == "kamada_kawai": + return nx.kamada_kawai_layout(graph) + if layout == "circular": + return nx.circular_layout(graph) + if layout == "shell": + return nx.shell_layout(graph) + raise ValueError("layout must be one of: spring, kamada_kawai, circular, shell") + + +def _figure_size(graph: nx.Graph) -> tuple[float, float]: + n_nodes = max(1, graph.number_of_nodes()) + width = min(12.0, max(4.8, 1.25 * n_nodes)) + height = min(8.0, max(3.6, 0.85 * n_nodes)) + return width, height + + +def _pad_limits(ax: plt.Axes, pos: Mapping[Any, Any]) -> None: + if not pos: + return + xs = [point[0] for point in pos.values()] + ys = [point[1] for point in pos.values()] + x_span = max(xs) - min(xs) + y_span = max(ys) - min(ys) + pad = max(x_span, y_span, 1.0) * 0.18 + ax.set_xlim(min(xs) - pad, max(xs) + pad) + ax.set_ylim(min(ys) - pad, max(ys) + pad) diff --git a/synkit/Vis/visual_model.py b/synkit/Vis/visual_model.py new file mode 100644 index 0000000..f0d1ae4 --- /dev/null +++ b/synkit/Vis/visual_model.py @@ -0,0 +1,520 @@ +from __future__ import annotations + +"""Representation-aware visual adapters for SynKit graphs. + +This module is intentionally lightweight: it detects the graph representation +and converts raw NetworkX attributes into stable labels/colors that drawing +backends can consume. The adapters never mutate the input graph. +""" + +from dataclasses import dataclass, field +from typing import Any, Dict, Hashable, Iterable, Literal, Mapping + +import networkx as nx + +VisualKind = Literal[ + "molecule", + "legacy_its", + "tuple_its", + "compact_mtg", + "mechanism_dag", + "unknown", +] + + +@dataclass(frozen=True) +class VisualNode: + """Drawing-ready node information.""" + + node_id: Hashable + label: str + element: str | None = None + atom_map: int | None = None + badges: tuple[str, ...] = () + changed: bool = False + raw: Mapping[str, Any] = field(default_factory=dict) + + +@dataclass(frozen=True) +class VisualEdge: + """Drawing-ready edge information.""" + + source: Hashable + target: Hashable + label: str = "" + state: str = "unchanged" + color: str = "#2f3437" + width: float = 2.0 + raw: Mapping[str, Any] = field(default_factory=dict) + + +@dataclass(frozen=True) +class VisualGraph: + """A compact, immutable view of graph content for renderers.""" + + kind: VisualKind + nodes: tuple[VisualNode, ...] + edges: tuple[VisualEdge, ...] + title: str = "" + metadata: Mapping[str, Any] = field(default_factory=dict) + + +BOND_SYMBOLS = { + None: "∅", + 0: "∅", + 0.0: "∅", + 1: "—", + 1.0: "—", + 1.5: ":", + 2: "=", + 2.0: "=", + 3: "≡", + 3.0: "≡", +} + +EDGE_COLORS = { + "unchanged": "#6b7280", + "formed": "#15803d", + "broken": "#b91c1c", + "order_changed": "#ca8a04", + "transient": "#7c3aed", + "unknown": "#2f3437", +} + +NODE_TIMELINE_ATTRS = ( + "aromatic", + "hcount", + "charge", + "radical", + "lone_pairs", + "present", +) +EDGE_TIMELINE_ATTRS = ("order", "kekule_order", "sigma_order", "pi_order") +ELECTRON_NODE_ATTRS = ("charge", "hcount", "lone_pairs", "radical") + + +def detect_visual_kind(graph: nx.Graph) -> VisualKind: + """Return the visualization representation kind for ``graph``. + + :param graph: NetworkX graph to inspect. + :type graph: nx.Graph + :returns: Detected graph kind. + :rtype: VisualKind + """ + + if not isinstance(graph, nx.Graph): + return "unknown" + + if _looks_like_mechanism_dag(graph): + return "mechanism_dag" + if _looks_like_compact_mtg(graph): + return "compact_mtg" + if _looks_like_tuple_its(graph): + return "tuple_its" + if _looks_like_legacy_its(graph): + return "legacy_its" + if _looks_like_molecule(graph): + return "molecule" + return "unknown" + + +def to_visual_graph( + graph: nx.Graph, + *, + kind: VisualKind | None = None, + mode: Literal["compact", "electron", "sigma_pi", "timeline"] = "compact", + show_atom_map: bool = True, + title: str = "", +) -> VisualGraph: + """Adapt a SynKit graph to drawing-ready labels. + + :param graph: NetworkX graph to adapt. + :type graph: nx.Graph + :param kind: Optional explicit representation kind. + :type kind: Optional[VisualKind] + :param mode: Label density. ``sigma_pi`` and ``timeline`` are mostly useful + for tuple ITS and compact MTG. + :type mode: str + :param show_atom_map: Include atom-map numbers in node labels when present. + :type show_atom_map: bool + :param title: Optional title carried to renderer metadata. + :type title: str + :returns: Immutable visual graph model. + :rtype: VisualGraph + """ + + detected = kind or detect_visual_kind(graph) + nodes = tuple( + _adapt_node(node_id, attrs, detected, mode=mode, show_atom_map=show_atom_map) + for node_id, attrs in graph.nodes(data=True) + ) + edges = tuple( + _adapt_edge(u, v, attrs, detected, mode=mode) + for u, v, attrs in graph.edges(data=True) + ) + return VisualGraph( + kind=detected, + nodes=nodes, + edges=edges, + title=title, + metadata={ + "mode": mode, + "node_count": graph.number_of_nodes(), + "edge_count": graph.number_of_edges(), + }, + ) + + +def summarize_visual_graph(visual: VisualGraph) -> Dict[str, Any]: + """Return a notebook-friendly summary of a visual graph.""" + + return { + "kind": visual.kind, + "title": visual.title, + "metadata": dict(visual.metadata), + "nodes": [ + { + "id": node.node_id, + "label": node.label, + "badges": list(node.badges), + "changed": node.changed, + } + for node in visual.nodes + ], + "edges": [ + { + "source": edge.source, + "target": edge.target, + "label": edge.label, + "state": edge.state, + "color": edge.color, + } + for edge in visual.edges + ], + } + + +def _looks_like_molecule(graph: nx.Graph) -> bool: + return bool(graph.nodes) and all( + "element" in attrs and not _is_pair(attrs.get("element")) + for _, attrs in graph.nodes(data=True) + ) + + +def _looks_like_legacy_its(graph: nx.Graph) -> bool: + if not graph.edges: + return False + has_pair_order = any( + _is_pair(attrs.get("order")) for _, _, attrs in graph.edges(data=True) + ) + has_tuple_electron_edges = any( + key in attrs + for _, _, attrs in graph.edges(data=True) + for key in ("sigma_order", "pi_order", "kekule_order") + ) + return has_pair_order and not has_tuple_electron_edges + + +def _looks_like_tuple_its(graph: nx.Graph) -> bool: + if not graph.nodes and not graph.edges: + return False + node_pair = any( + _is_pair(attrs.get(key)) + for _, attrs in graph.nodes(data=True) + for key in ("element", "hcount", "charge", "lone_pairs", "radical", "present") + ) + edge_pair = any( + _is_pair(attrs.get(key)) + for _, _, attrs in graph.edges(data=True) + for key in ("sigma_order", "pi_order", "kekule_order") + ) + return node_pair or edge_pair + + +def _looks_like_compact_mtg(graph: nx.Graph) -> bool: + graph_steps = graph.graph.get("steps") + if isinstance(graph_steps, int) and graph_steps >= 2: + return True + has_steps_attr = any( + "steps" in attrs for _, attrs in graph.nodes(data=True) + ) or any("steps" in attrs for _, _, attrs in graph.edges(data=True)) + if not has_steps_attr: + return False + has_timeline = any( + _is_timeline(attrs.get(key)) + for _, attrs in graph.nodes(data=True) + for key in NODE_TIMELINE_ATTRS + ) or any( + _is_timeline(attrs.get(key)) + for _, _, attrs in graph.edges(data=True) + for key in EDGE_TIMELINE_ATTRS + ) + return has_timeline + + +def _looks_like_mechanism_dag(graph: nx.Graph) -> bool: + if not graph.is_directed(): + return False + graph_kind = str(graph.graph.get("kind", "")).lower() + if graph_kind in {"mechanism_dag", "mechanism", "reaction_dag"}: + return True + return any( + attrs.get("kind") in {"reaction", "step", "species"} + for _, attrs in graph.nodes(data=True) + ) + + +def _adapt_node( + node_id: Hashable, + attrs: Mapping[str, Any], + kind: VisualKind, + *, + mode: str, + show_atom_map: bool, +) -> VisualNode: + element = _first_present(attrs.get("element")) + atom_map = _first_present(attrs.get("atom_map")) + label = str(element or node_id) + if show_atom_map and atom_map not in (None, 0): + label = f"{label}:{atom_map}" + elif show_atom_map and atom_map == 0: + label = f"{label}:{node_id}" + + badges = _node_badges(attrs, kind, mode) + changed = bool(badges) or any( + _is_changed_pair(attrs.get(key)) for key in ELECTRON_NODE_ATTRS + ) + return VisualNode( + node_id=node_id, + label=label, + element=str(element) if element is not None else None, + atom_map=int(atom_map) if isinstance(atom_map, int) else None, + badges=tuple(badges), + changed=changed, + raw=dict(attrs), + ) + + +def _adapt_edge( + u: Hashable, + v: Hashable, + attrs: Mapping[str, Any], + kind: VisualKind, + *, + mode: str, +) -> VisualEdge: + if kind == "compact_mtg": + label = _mtg_edge_label(attrs, mode) + state = _timeline_edge_state(attrs) + elif kind == "tuple_its": + label = _tuple_its_edge_label(attrs, mode) + state = _pair_edge_state(_preferred_edge_pair(attrs)) + elif kind == "legacy_its": + pair = attrs.get("order", (None, None)) + label = _pair_label(pair) + state = _pair_edge_state(pair) + else: + order = attrs.get("order") + label = _bond_symbol(order) + state = "unchanged" + + return VisualEdge( + source=u, + target=v, + label=label, + state=state, + color=EDGE_COLORS.get(state, EDGE_COLORS["unknown"]), + width=( + 3.0 if state in {"formed", "broken", "order_changed", "transient"} else 2.0 + ), + raw=dict(attrs), + ) + + +def _node_badges(attrs: Mapping[str, Any], kind: VisualKind, mode: str) -> list[str]: + badges: list[str] = [] + if kind in {"tuple_its", "compact_mtg"}: + for key in ELECTRON_NODE_ATTRS: + value = attrs.get(key) + if kind == "compact_mtg" and _is_timeline(value): + if _timeline_changes(value) or mode in {"electron", "timeline"}: + badges.append(f"{_short_node_key(key)}:{_format_timeline(value)}") + elif _is_pair(value): + if value[0] != value[1] or mode in {"electron", "timeline"}: + badges.append(f"{_short_node_key(key)}:{_format_pair(value)}") + elif mode in {"electron", "timeline"} and value not in (None, 0, False): + badges.append(f"{_short_node_key(key)}:{value}") + elif kind == "molecule": + for key in ("charge", "radical", "lone_pairs"): + value = attrs.get(key) + if value not in (None, 0, False): + badges.append(f"{_short_node_key(key)}:{value}") + return badges + + +def _tuple_its_edge_label(attrs: Mapping[str, Any], mode: str) -> str: + if mode == "sigma_pi": + sigma = attrs.get("sigma_order") + pi = attrs.get("pi_order") + return f"σ{_format_pair(sigma)} π{_format_pair(pi)}" + pair = attrs.get("kekule_order", attrs.get("order")) + return _pair_label(pair) + + +def _mtg_edge_label(attrs: Mapping[str, Any], mode: str) -> str: + if mode == "sigma_pi": + parts = [] + for key in ("sigma_order", "pi_order"): + value = attrs.get(key) + if _is_timeline(value) and (_timeline_changes(value) or mode == "timeline"): + parts.append(f"{_short_edge_key(key)}:{_format_timeline(value)}") + return " ".join(parts) or _format_timeline( + attrs.get("kekule_order", attrs.get("order")) + ) + if mode == "timeline": + parts = [] + for key in EDGE_TIMELINE_ATTRS: + value = attrs.get(key) + if _is_timeline(value): + parts.append(f"{_short_edge_key(key)}:{_format_timeline(value)}") + return " ".join(parts) + value = attrs.get("kekule_order", attrs.get("order")) + return _format_timeline(value) if _is_timeline(value) else _bond_symbol(value) + + +def _preferred_edge_pair(attrs: Mapping[str, Any]) -> Any: + return attrs.get("kekule_order", attrs.get("order")) + + +def _pair_edge_state(pair: Any) -> str: + if not _is_pair(pair): + return "unknown" + before, after = pair + before = _none_order(before) + after = _none_order(after) + if before == after: + return "unchanged" + if before == 0 and after > 0: + return "formed" + if before > 0 and after == 0: + return "broken" + return "order_changed" + + +def _timeline_edge_state(attrs: Mapping[str, Any]) -> str: + timeline = attrs.get("kekule_order", attrs.get("order")) + if not _is_timeline(timeline): + return "unknown" + numeric = [_none_order(v) for v in timeline if _is_order_value(v)] + if not numeric or len(set(numeric)) == 1: + return "unchanged" + if numeric[0] == numeric[-1] and len(set(numeric)) > 1: + return "transient" + if numeric[0] == 0 and numeric[-1] > 0: + return "formed" + if numeric[0] > 0 and numeric[-1] == 0: + return "broken" + return "order_changed" + + +def _pair_label(pair: Any) -> str: + if not _is_pair(pair): + return _bond_symbol(pair) + return f"{_bond_symbol(pair[0])}>{_bond_symbol(pair[1])}" + + +def _format_pair(pair: Any) -> str: + if not _is_pair(pair): + return str(pair) + return f"{_format_value(pair[0])}>{_format_value(pair[1])}" + + +def _format_timeline(value: Any) -> str: + if not _is_timeline(value): + return str(value) + return "-".join(_format_value(item) for item in value) + + +def _format_value(value: Any) -> str: + if value is None: + return "∅" + if isinstance(value, float) and value.is_integer(): + return str(int(value)) + return str(value) + + +def _bond_symbol(order: Any) -> str: + return BOND_SYMBOLS.get(order, _format_value(order)) + + +def _short_node_key(key: str) -> str: + return { + "charge": "q", + "hcount": "H", + "lone_pairs": "lp", + "radical": "rad", + }.get(key, key) + + +def _short_edge_key(key: str) -> str: + return { + "order": "ord", + "kekule_order": "kek", + "sigma_order": "σ", + "pi_order": "π", + }.get(key, key) + + +def _is_pair(value: Any) -> bool: + return ( + isinstance(value, tuple) + and len(value) == 2 + and not any(isinstance(item, (set, list, tuple, dict)) for item in value) + ) + + +def _is_timeline(value: Any) -> bool: + return ( + isinstance(value, tuple) + and len(value) >= 3 + and not any(isinstance(item, (set, list, tuple, dict)) for item in value) + ) + + +def _is_changed_pair(value: Any) -> bool: + return _is_pair(value) and value[0] != value[1] + + +def _timeline_changes(value: Any) -> bool: + return _is_timeline(value) and len(set(value)) > 1 + + +def _is_order_value(value: Any) -> bool: + return value is None or isinstance(value, (int, float)) + + +def _none_order(value: Any) -> float: + return 0.0 if value is None else float(value) + + +def _first_present(value: Any) -> Any: + if _is_pair(value): + return value[0] if value[0] is not None else value[1] + if _is_timeline(value): + for item in value: + if item is not None: + return item + return None + return value + + +def iter_changed_edges(visual: VisualGraph) -> Iterable[VisualEdge]: + """Yield edges whose visual state is not unchanged.""" + + return (edge for edge in visual.edges if edge.state != "unchanged") + + +def iter_changed_nodes(visual: VisualGraph) -> Iterable[VisualNode]: + """Yield nodes with at least one visual badge/change marker.""" + + return (node for node in visual.nodes if node.changed) diff --git a/test_data_query.ipynb b/test_data_query.ipynb deleted file mode 100644 index a70c2bc..0000000 --- a/test_data_query.ipynb +++ /dev/null @@ -1,2302 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "6abb3307", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/homes/biertank/minh/miniconda3/envs/synkit/lib/python3.11/site-packages/rxnmapper/batched_mapper.py:4: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", - " import pkg_resources\n", - "/homes/biertank/minh/miniconda3/envs/synkit/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "from synkit.CRN.Query.kegg_extract import KEGGExtractor\n", - "\n", - "extractor = KEGGExtractor()\n", - "\n", - "pathway_data = extractor.build_pathway_json(\n", - " \"hsa00010\",\n", - " with_compounds=True,\n", - " with_atom_maps=True,\n", - " save_as='Data/KEGG_Query/hsa00010_raw.json',\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "0fda9a18", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'pathway_id': 'hsa00010',\n", - " 'modules': ['M00001', 'M00002', 'M00003', 'M00307'],\n", - " 'by_module': {'M00001': {'module_id': 'M00001',\n", - " 'reactions': [{'id': 'R00200',\n", - " 'reaction': 'C00002 + C00022 <=> C00008 + C00074',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[C:8](=[O:9])[OH:10].[P:4](=[O:5])([OH:6])([OH:7])[O:33][P:30]([O:29][P:26]([O:25][CH2:24][C@H:23]1[O:22][C@@H:21]([n:20]2[c:16]3[n:15][cH:14][n:13][c:12]([NH2:11])[c:17]3[n:18][cH:19]2)[C@H:36]([OH:37])[C@@H:34]1[OH:35])(=[O:27])[OH:28])(=[O:31])[OH:32]>>[CH2:1]=[C:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10].[NH2:11][c:12]1[n:13][cH:14][n:15][c:16]2[c:17]1[n:18][cH:19][n:20]2[C@@H:21]1[O:22][C@H:23]([CH2:24][O:25][P:26](=[O:27])([OH:28])[O:29][P:30](=[O:31])([OH:32])[OH:33])[C@@H:34]([OH:35])[C@H:36]1[OH:37]',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.CC(=O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'R00658',\n", - " 'reaction': 'C00631 <=> C00074 + C00001',\n", - " 'rule': '[CH2:1]([C@@H:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10])[OH:11]>>[CH2:1]=[C:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10].[OH2:11]',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>C=C(OP(=O)(O)O)C(=O)O.O'},\n", - " {'id': 'R00756',\n", - " 'reaction': 'C00002 + C00085 <=> C00008 + C00354',\n", - " 'rule': '[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:20](=[O:21])([OH:22])[O:23][P:40](=[O:41])([OH:42])[OH:43])[C@@H:24]([OH:25])[C@H:26]1[OH:27].[O:28]=[P:29]([OH:30])([OH:31])[O:32][CH2:33][C@H:34]1[O:35][C:36]([OH:37])([CH2:38][OH:39])[C@@H:44]([OH:45])[C@@H:46]1[OH:47]>>[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:20](=[O:21])([OH:22])[OH:23])[C@@H:24]([OH:25])[C@H:26]1[OH:27].[O:28]=[P:29]([OH:30])([OH:31])[O:32][CH2:33][C@H:34]1[O:35][C:36]([OH:37])([CH2:38][O:39][P:40](=[O:41])([OH:42])[OH:43])[C@@H:44]([OH:45])[C@@H:46]1[OH:47]',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R01015',\n", - " 'reaction': 'C00118 <=> C00111',\n", - " 'rule': '[OH:1][C@@H:2]([CH:3]=[O:4])[CH2:5][O:6][P:7](=[O:8])([OH:9])[OH:10]>>[O:1]=[C:2]([CH2:3][OH:4])[CH2:5][O:6][P:7](=[O:8])([OH:9])[OH:10]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O>>O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'R01061',\n", - " 'reaction': 'C00118 + C00009 + C00003 <=> C00236 + C00004 + C00080',\n", - " 'rule': '[NH2:1][C:2](=[O:3])[c:4]1[cH:5][n+:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[cH:42][cH:43][cH:44]1.[O:45]=[CH:46][C@H:52]([OH:53])[CH2:54][O:55][P:56](=[O:57])([OH:58])[OH:59].[OH:47][P:48](=[O:49])([OH:50])[OH:51]>>[NH2:1][C:2](=[O:3])[C:4]1=[CH:5][N:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[CH:42]=[CH:43][CH2:44]1.[O:45]=[C:46]([O:47][P:48](=[O:49])([OH:50])[OH:51])[C@H:52]([OH:53])[CH2:54][O:55][P:56](=[O:57])([OH:58])[OH:59].[H+:60]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01063',\n", - " 'reaction': 'C00118 + C00009 + C00006 <=> C00236 + C00005 + C00080',\n", - " 'rule': '[NH2:1][C:2](=[O:3])[c:4]1[cH:5][n+:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([O:35][P:36](=[O:37])([OH:38])[OH:39])[C@@H:40]3[OH:41])[C@@H:42]([OH:43])[C@H:44]2[OH:45])[cH:46][cH:47][cH:48]1.[O:49]=[CH:50][C@H:56]([OH:57])[CH2:58][O:59][P:60](=[O:61])([OH:62])[OH:63].[OH:51][P:52](=[O:53])([OH:54])[OH:55]>>[NH2:1][C:2](=[O:3])[C:4]1=[CH:5][N:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([O:35][P:36](=[O:37])([OH:38])[OH:39])[C@@H:40]3[OH:41])[C@@H:42]([OH:43])[C@H:44]2[OH:45])[CH:46]=[CH:47][CH2:48]1.[O:49]=[C:50]([O:51][P:52](=[O:53])([OH:54])[OH:55])[C@H:56]([OH:57])[CH2:58][O:59][P:60](=[O:61])([OH:62])[OH:63].[H+:64]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01068',\n", - " 'reaction': 'C00354 <=> C00111 + C00118',\n", - " 'rule': '[OH:1][C:2]1([CH2:3][O:4][P:7]([OH:6])(=[O:8])[OH:9])[C@@H:5]([OH:10])[C@H:15]([OH:16])[C@@H:13]([CH2:12][O:11][P:17](=[O:18])([OH:19])[OH:20])[O:14]1>>[O:1]=[C:2]([CH2:3][OH:4])[CH2:5][O:6][P:7](=[O:8])([OH:9])[OH:10].[O:11]=[CH:12][C@H:13]([OH:14])[CH2:15][O:16][P:17](=[O:18])([OH:19])[OH:20]',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O>>O=C(CO)COP(=O)(O)O.O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01512',\n", - " 'reaction': 'C00002 + C00197 <=> C00008 + C00236',\n", - " 'rule': '[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:20](=[O:21])([OH:22])[O:23][P:31](=[O:32])([OH:33])[OH:34])[C@@H:24]([OH:25])[C@H:26]1[OH:27].[O:28]=[C:29]([OH:30])[C@H:35]([OH:36])[CH2:37][O:38][P:39](=[O:40])([OH:41])[OH:42]>>[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:20](=[O:21])([OH:22])[OH:23])[C@@H:24]([OH:25])[C@H:26]1[OH:27].[O:28]=[C:29]([O:30][P:31](=[O:32])([OH:33])[OH:34])[C@H:35]([OH:36])[CH2:37][O:38][P:39](=[O:40])([OH:41])[OH:42]',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01518',\n", - " 'reaction': 'C00631 <=> C00197',\n", - " 'rule': '[O:1]=[C:2]([OH:3])[C@H:4]([O:5][P:8](=[O:9])([OH:10])[OH:11])[CH2:6][OH:7]>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11]',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01786',\n", - " 'reaction': 'C00002 + C00267 <=> C00008 + C00668',\n", - " 'rule': '[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:20](=[O:21])([OH:22])[O:23][P:29](=[O:28])([OH:30])[OH:31])[C@@H:24]([OH:25])[C@H:26]1[OH:27].[OH:32][CH2:33][C@H:34]1[O:35][C@H:36]([OH:37])[C@H:38]([OH:39])[C@@H:40]([OH:41])[C@@H:42]1[OH:43]>>[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:20](=[O:21])([OH:22])[OH:23])[C@@H:24]([OH:25])[C@H:26]1[OH:27].[O:28]=[P:29]([OH:30])([OH:31])[O:32][CH2:33][C@H:34]1[O:35][C@H:36]([OH:37])[C@H:38]([OH:39])[C@@H:40]([OH:41])[C@@H:42]1[OH:43]',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R02189',\n", - " 'reaction': 'C00404 + C00267 <=> C00404 + C00668',\n", - " 'rule': '[O:1]=[P:2]([OH:3])([OH:19])[O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[OH:29].[OH:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16]>>[O:1]=[P:2]([OH:3])([OH:4])[O:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16].[O:17]=[P:18]([OH:19])([OH:20])[O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[OH:29]',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O.OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>O=P(O)(O)OP(=O)(O)OP(=O)(O)O.O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R05805',\n", - " 'reaction': 'C00008 + C00085 <=> C00020 + C00354',\n", - " 'rule': '[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:36](=[O:37])([OH:38])[OH:39])[C@@H:20]([OH:21])[C@H:22]1[OH:23].[O:24]=[P:25]([OH:26])([OH:27])[O:28][CH2:29][C@H:30]1[O:31][C:32]([OH:33])([CH2:34][OH:35])[C@@H:40]([OH:41])[C@@H:42]1[OH:43]>>[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[OH:19])[C@@H:20]([OH:21])[C@H:22]1[OH:23].[O:24]=[P:25]([OH:26])([OH:27])[O:28][CH2:29][C@H:30]1[O:31][C:32]([OH:33])([CH2:34][O:35][P:36](=[O:37])([OH:38])[OH:39])[C@@H:40]([OH:41])[C@@H:42]1[OH:43]',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R07159',\n", - " 'reaction': 'C00118 + C00001 + 2 C00139 <=> C00197 + 2 C00080 + 2 C00138',\n", - " 'rule': '[OH2:3].[O:1]=[CH:2][C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11]>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[H+:12].[H+:13]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O>>O=C(O)[C@H](O)COP(=O)(O)O.[H+].[H+]'},\n", - " {'id': 'R09085',\n", - " 'reaction': 'C00267 + C00008 <=> C00668 + C00020',\n", - " 'rule': '[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:25](=[O:24])([OH:26])[OH:27])[C@@H:20]([OH:21])[C@H:22]1[OH:23].[OH:28][CH2:29][C@H:30]1[O:31][C@H:32]([OH:33])[C@H:34]([OH:35])[C@@H:36]([OH:37])[C@@H:38]1[OH:39]>>[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[OH:19])[C@@H:20]([OH:21])[C@H:22]1[OH:23].[O:24]=[P:25]([OH:26])([OH:27])[O:28][CH2:29][C@H:30]1[O:31][C@H:32]([OH:33])[C@H:34]([OH:35])[C@@H:36]([OH:37])[C@@H:38]1[OH:39]',\n", - " 'smiles': 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O.Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O>>O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O.Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'R13199',\n", - " 'reaction': 'C00668 <=> C00085',\n", - " 'rule': '[O:1]=[P:2]([OH:3])([OH:4])[O:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16]>>[O:1]=[P:2]([OH:3])([OH:4])[O:5][CH2:6][C@H:7]1[O:8][C:9]([OH:10])([CH2:11][OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16]',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O'}],\n", - " 'molecules': [{'id': 'C00001', 'name': 'H2O', 'smiles': 'O'},\n", - " {'id': 'C00002',\n", - " 'name': 'ATP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00008',\n", - " 'name': 'ADP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00009', 'name': 'Orthophosphate', 'smiles': 'O=P(O)(O)O'},\n", - " {'id': 'C00020',\n", - " 'name': 'AMP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00022', 'name': 'Pyruvate', 'smiles': 'CC(=O)C(=O)O'},\n", - " {'id': 'C00074',\n", - " 'name': 'Phosphoenolpyruvate',\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00085',\n", - " 'name': 'D-Fructose 6-phosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00111',\n", - " 'name': 'Glycerone phosphate',\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'C00118',\n", - " 'name': 'D-Glyceraldehyde 3-phosphate',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': None},\n", - " {'id': 'C00139', 'name': 'Oxidized ferredoxin', 'smiles': None},\n", - " {'id': 'C00197',\n", - " 'name': '3-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00236',\n", - " 'name': '3-Phospho-D-glyceroyl phosphate',\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00267',\n", - " 'name': 'alpha-D-Glucose',\n", - " 'smiles': 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00354',\n", - " 'name': 'D-Fructose 1,6-bisphosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00404',\n", - " 'name': 'Polyphosphate',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O'},\n", - " {'id': 'C00631',\n", - " 'name': '2-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'C00668',\n", - " 'name': 'alpha-D-Glucose 6-phosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'}],\n", - " 'missing': {'missing_compounds': [{'id': 'C00138',\n", - " 'name': 'Reduced ferredoxin',\n", - " 'reactions': ['R07159']},\n", - " {'id': 'C00139', 'name': 'Oxidized ferredoxin', 'reactions': ['R07159']}],\n", - " 'missing_compound_ids': ['C00138', 'C00139'],\n", - " 'reactions_involving_missing': ['R07159']}},\n", - " 'M00002': {'module_id': 'M00002',\n", - " 'reactions': [{'id': 'R00200',\n", - " 'reaction': 'C00002 + C00022 <=> C00008 + C00074',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[C:8](=[O:9])[OH:10].[P:4](=[O:5])([OH:6])([OH:7])[O:33][P:30]([O:29][P:26]([O:25][CH2:24][C@H:23]1[O:22][C@@H:21]([n:20]2[c:16]3[n:15][cH:14][n:13][c:12]([NH2:11])[c:17]3[n:18][cH:19]2)[C@H:36]([OH:37])[C@@H:34]1[OH:35])(=[O:27])[OH:28])(=[O:31])[OH:32]>>[CH2:1]=[C:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10].[NH2:11][c:12]1[n:13][cH:14][n:15][c:16]2[c:17]1[n:18][cH:19][n:20]2[C@@H:21]1[O:22][C@H:23]([CH2:24][O:25][P:26](=[O:27])([OH:28])[O:29][P:30](=[O:31])([OH:32])[OH:33])[C@@H:34]([OH:35])[C@H:36]1[OH:37]',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.CC(=O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'R00658',\n", - " 'reaction': 'C00631 <=> C00074 + C00001',\n", - " 'rule': '[CH2:1]([C@@H:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10])[OH:11]>>[CH2:1]=[C:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10].[OH2:11]',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>C=C(OP(=O)(O)O)C(=O)O.O'},\n", - " {'id': 'R01015',\n", - " 'reaction': 'C00118 <=> C00111',\n", - " 'rule': '[OH:1][C@@H:2]([CH:3]=[O:4])[CH2:5][O:6][P:7](=[O:8])([OH:9])[OH:10]>>[O:1]=[C:2]([CH2:3][OH:4])[CH2:5][O:6][P:7](=[O:8])([OH:9])[OH:10]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O>>O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'R01061',\n", - " 'reaction': 'C00118 + C00009 + C00003 <=> C00236 + C00004 + C00080',\n", - " 'rule': '[NH2:1][C:2](=[O:3])[c:4]1[cH:5][n+:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[cH:42][cH:43][cH:44]1.[O:45]=[CH:46][C@H:52]([OH:53])[CH2:54][O:55][P:56](=[O:57])([OH:58])[OH:59].[OH:47][P:48](=[O:49])([OH:50])[OH:51]>>[NH2:1][C:2](=[O:3])[C:4]1=[CH:5][N:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[CH:42]=[CH:43][CH2:44]1.[O:45]=[C:46]([O:47][P:48](=[O:49])([OH:50])[OH:51])[C@H:52]([OH:53])[CH2:54][O:55][P:56](=[O:57])([OH:58])[OH:59].[H+:60]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01063',\n", - " 'reaction': 'C00118 + C00009 + C00006 <=> C00236 + C00005 + C00080',\n", - " 'rule': '[NH2:1][C:2](=[O:3])[c:4]1[cH:5][n+:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([O:35][P:36](=[O:37])([OH:38])[OH:39])[C@@H:40]3[OH:41])[C@@H:42]([OH:43])[C@H:44]2[OH:45])[cH:46][cH:47][cH:48]1.[O:49]=[CH:50][C@H:56]([OH:57])[CH2:58][O:59][P:60](=[O:61])([OH:62])[OH:63].[OH:51][P:52](=[O:53])([OH:54])[OH:55]>>[NH2:1][C:2](=[O:3])[C:4]1=[CH:5][N:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([O:35][P:36](=[O:37])([OH:38])[OH:39])[C@@H:40]3[OH:41])[C@@H:42]([OH:43])[C@H:44]2[OH:45])[CH:46]=[CH:47][CH2:48]1.[O:49]=[C:50]([O:51][P:52](=[O:53])([OH:54])[OH:55])[C@H:56]([OH:57])[CH2:58][O:59][P:60](=[O:61])([OH:62])[OH:63].[H+:64]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01512',\n", - " 'reaction': 'C00002 + C00197 <=> C00008 + C00236',\n", - " 'rule': '[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:20](=[O:21])([OH:22])[O:23][P:31](=[O:32])([OH:33])[OH:34])[C@@H:24]([OH:25])[C@H:26]1[OH:27].[O:28]=[C:29]([OH:30])[C@H:35]([OH:36])[CH2:37][O:38][P:39](=[O:40])([OH:41])[OH:42]>>[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:20](=[O:21])([OH:22])[OH:23])[C@@H:24]([OH:25])[C@H:26]1[OH:27].[O:28]=[C:29]([O:30][P:31](=[O:32])([OH:33])[OH:34])[C@H:35]([OH:36])[CH2:37][O:38][P:39](=[O:40])([OH:41])[OH:42]',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01518',\n", - " 'reaction': 'C00631 <=> C00197',\n", - " 'rule': '[O:1]=[C:2]([OH:3])[C@H:4]([O:5][P:8](=[O:9])([OH:10])[OH:11])[CH2:6][OH:7]>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11]',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R07159',\n", - " 'reaction': 'C00118 + C00001 + 2 C00139 <=> C00197 + 2 C00080 + 2 C00138',\n", - " 'rule': '[OH2:3].[O:1]=[CH:2][C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11]>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[H+:12].[H+:13]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O>>O=C(O)[C@H](O)COP(=O)(O)O.[H+].[H+]'}],\n", - " 'molecules': [{'id': 'C00001', 'name': 'H2O', 'smiles': 'O'},\n", - " {'id': 'C00002',\n", - " 'name': 'ATP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00008',\n", - " 'name': 'ADP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00009', 'name': 'Orthophosphate', 'smiles': 'O=P(O)(O)O'},\n", - " {'id': 'C00022', 'name': 'Pyruvate', 'smiles': 'CC(=O)C(=O)O'},\n", - " {'id': 'C00074',\n", - " 'name': 'Phosphoenolpyruvate',\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00111',\n", - " 'name': 'Glycerone phosphate',\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'C00118',\n", - " 'name': 'D-Glyceraldehyde 3-phosphate',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': None},\n", - " {'id': 'C00139', 'name': 'Oxidized ferredoxin', 'smiles': None},\n", - " {'id': 'C00197',\n", - " 'name': '3-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00236',\n", - " 'name': '3-Phospho-D-glyceroyl phosphate',\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00631',\n", - " 'name': '2-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O'}],\n", - " 'missing': {'missing_compounds': [{'id': 'C00138',\n", - " 'name': 'Reduced ferredoxin',\n", - " 'reactions': ['R07159']},\n", - " {'id': 'C00139', 'name': 'Oxidized ferredoxin', 'reactions': ['R07159']}],\n", - " 'missing_compound_ids': ['C00138', 'C00139'],\n", - " 'reactions_involving_missing': ['R07159']}},\n", - " 'M00003': {'module_id': 'M00003',\n", - " 'reactions': [{'id': 'R00341',\n", - " 'reaction': 'C00002 + C00036 <=> C00008 + C00074 + C00011',\n", - " 'rule': '[P:4](=[O:5])([OH:6])([OH:7])[O:33][P:30]([O:29][P:26]([O:25][CH2:24][C@H:23]1[O:22][C@@H:21]([n:20]2[c:16]3[n:15][cH:14][n:13][c:12]([NH2:11])[c:17]3[n:18][cH:19]2)[C@H:36]([OH:37])[C@@H:34]1[OH:35])(=[O:27])[OH:28])(=[O:31])[OH:32].[CH2:1]([C:2](=[O:3])[C:8](=[O:9])[OH:10])[C:39](=[O:38])[OH:40]>>[CH2:1]=[C:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10].[NH2:11][c:12]1[n:13][cH:14][n:15][c:16]2[c:17]1[n:18][cH:19][n:20]2[C@@H:21]1[O:22][C@H:23]([CH2:24][O:25][P:26](=[O:27])([OH:28])[O:29][P:30](=[O:31])([OH:32])[OH:33])[C@@H:34]([OH:35])[C@H:36]1[OH:37].[O:38]=[C:39]=[O:40]',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)CC(=O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O.O=C=O'},\n", - " {'id': 'R00431',\n", - " 'reaction': 'C00044 + C00036 <=> C00035 + C00074 + C00011',\n", - " 'rule': '[P:4](=[O:5])([OH:6])([OH:7])[O:31][P:28]([O:27][P:24]([O:23][CH2:22][C@H:21]1[O:20][C@@H:19]([n:18]2[c:14]3[n:13][c:12]([NH2:11])[nH:38][c:36](=[O:37])[c:15]3[n:16][cH:17]2)[C@H:34]([OH:35])[C@@H:32]1[OH:33])(=[O:25])[OH:26])(=[O:29])[OH:30].[CH2:1]([C:2](=[O:3])[C:8](=[O:9])[OH:10])[C:40](=[O:39])[OH:41]>>[CH2:1]=[C:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10].[NH2:11][c:12]1[n:13][c:14]2[c:15]([n:16][cH:17][n:18]2[C@@H:19]2[O:20][C@H:21]([CH2:22][O:23][P:24](=[O:25])([OH:26])[O:27][P:28](=[O:29])([OH:30])[OH:31])[C@@H:32]([OH:33])[C@H:34]2[OH:35])[c:36](=[O:37])[nH:38]1.[O:39]=[C:40]=[O:41]',\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1.O=C(O)CC(=O)C(=O)O>>Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1.C=C(OP(=O)(O)O)C(=O)O.O=C=O'},\n", - " {'id': 'R00658',\n", - " 'reaction': 'C00631 <=> C00074 + C00001',\n", - " 'rule': '[CH2:1]([C@@H:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10])[OH:11]>>[CH2:1]=[C:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10].[OH2:11]',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>C=C(OP(=O)(O)O)C(=O)O.O'},\n", - " {'id': 'R00726',\n", - " 'reaction': 'C00081 + C00036 <=> C00104 + C00074 + C00011',\n", - " 'rule': '[CH2:1]([C:2](=[O:3])[C:8](=[O:9])[OH:10])[C:12](=[O:11])[OH:13].[P:4](=[O:5])([OH:6])([OH:7])[O:36][P:33]([O:32][P:29]([O:28][CH2:27][C@H:26]1[O:25][C@@H:24]([n:23]2[c:19]3[n:18][cH:17][nH:16][c:15](=[O:14])[c:20]3[n:21][cH:22]2)[C@H:39]([OH:40])[C@@H:37]1[OH:38])(=[O:30])[OH:31])(=[O:34])[OH:35]>>[CH2:1]=[C:2]([O:3][P:4](=[O:5])([OH:6])[OH:7])[C:8](=[O:9])[OH:10].[O:11]=[C:12]=[O:13].[O:14]=[c:15]1[nH:16][cH:17][n:18][c:19]2[c:20]1[n:21][cH:22][n:23]2[C@@H:24]1[O:25][C@H:26]([CH2:27][O:28][P:29](=[O:30])([OH:31])[O:32][P:33](=[O:34])([OH:35])[OH:36])[C@@H:37]([OH:38])[C@H:39]1[OH:40]',\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)CC(=O)C(=O)O>>O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O.O=C=O'},\n", - " {'id': 'R00762',\n", - " 'reaction': 'C00354 + C00001 <=> C00085 + C00009',\n", - " 'rule': '[OH2:3].[O:1]=[P:2]([OH:4])([OH:5])[O:17][CH2:16][C:14]1([OH:15])[O:13][C@H:12]([CH2:11][O:10][P:7](=[O:6])([OH:8])[OH:9])[C@@H:20]([OH:21])[C@@H:18]1[OH:19]>>[O:1]=[P:2]([OH:3])([OH:4])[OH:5].[O:6]=[P:7]([OH:8])([OH:9])[O:10][CH2:11][C@H:12]1[O:13][C:14]([OH:15])([CH2:16][OH:17])[C@@H:18]([OH:19])[C@@H:20]1[OH:21]',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O.O>>O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O.O=P(O)(O)O'},\n", - " {'id': 'R01015',\n", - " 'reaction': 'C00118 <=> C00111',\n", - " 'rule': '[OH:1][C@@H:2]([CH:3]=[O:4])[CH2:5][O:6][P:7](=[O:8])([OH:9])[OH:10]>>[O:1]=[C:2]([CH2:3][OH:4])[CH2:5][O:6][P:7](=[O:8])([OH:9])[OH:10]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O>>O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'R01061',\n", - " 'reaction': 'C00118 + C00009 + C00003 <=> C00236 + C00004 + C00080',\n", - " 'rule': '[NH2:1][C:2](=[O:3])[c:4]1[cH:5][n+:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[cH:42][cH:43][cH:44]1.[O:45]=[CH:46][C@H:52]([OH:53])[CH2:54][O:55][P:56](=[O:57])([OH:58])[OH:59].[OH:47][P:48](=[O:49])([OH:50])[OH:51]>>[NH2:1][C:2](=[O:3])[C:4]1=[CH:5][N:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[CH:42]=[CH:43][CH2:44]1.[O:45]=[C:46]([O:47][P:48](=[O:49])([OH:50])[OH:51])[C@H:52]([OH:53])[CH2:54][O:55][P:56](=[O:57])([OH:58])[OH:59].[H+:60]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01063',\n", - " 'reaction': 'C00118 + C00009 + C00006 <=> C00236 + C00005 + C00080',\n", - " 'rule': '[NH2:1][C:2](=[O:3])[c:4]1[cH:5][n+:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([O:35][P:36](=[O:37])([OH:38])[OH:39])[C@@H:40]3[OH:41])[C@@H:42]([OH:43])[C@H:44]2[OH:45])[cH:46][cH:47][cH:48]1.[O:49]=[CH:50][C@H:56]([OH:57])[CH2:58][O:59][P:60](=[O:61])([OH:62])[OH:63].[OH:51][P:52](=[O:53])([OH:54])[OH:55]>>[NH2:1][C:2](=[O:3])[C:4]1=[CH:5][N:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([O:35][P:36](=[O:37])([OH:38])[OH:39])[C@@H:40]3[OH:41])[C@@H:42]([OH:43])[C@H:44]2[OH:45])[CH:46]=[CH:47][CH2:48]1.[O:49]=[C:50]([O:51][P:52](=[O:53])([OH:54])[OH:55])[C@H:56]([OH:57])[CH2:58][O:59][P:60](=[O:61])([OH:62])[OH:63].[H+:64]',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01068',\n", - " 'reaction': 'C00354 <=> C00111 + C00118',\n", - " 'rule': '[OH:1][C:2]1([CH2:3][O:4][P:7]([OH:6])(=[O:8])[OH:9])[C@@H:5]([OH:10])[C@H:15]([OH:16])[C@@H:13]([CH2:12][O:11][P:17](=[O:18])([OH:19])[OH:20])[O:14]1>>[O:1]=[C:2]([CH2:3][OH:4])[CH2:5][O:6][P:7](=[O:8])([OH:9])[OH:10].[O:11]=[CH:12][C@H:13]([OH:14])[CH2:15][O:16][P:17](=[O:18])([OH:19])[OH:20]',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O>>O=C(CO)COP(=O)(O)O.O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01512',\n", - " 'reaction': 'C00002 + C00197 <=> C00008 + C00236',\n", - " 'rule': '[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:20](=[O:21])([OH:22])[O:23][P:31](=[O:32])([OH:33])[OH:34])[C@@H:24]([OH:25])[C@H:26]1[OH:27].[O:28]=[C:29]([OH:30])[C@H:35]([OH:36])[CH2:37][O:38][P:39](=[O:40])([OH:41])[OH:42]>>[NH2:1][c:2]1[n:3][cH:4][n:5][c:6]2[c:7]1[n:8][cH:9][n:10]2[C@@H:11]1[O:12][C@H:13]([CH2:14][O:15][P:16](=[O:17])([OH:18])[O:19][P:20](=[O:21])([OH:22])[OH:23])[C@@H:24]([OH:25])[C@H:26]1[OH:27].[O:28]=[C:29]([O:30][P:31](=[O:32])([OH:33])[OH:34])[C@H:35]([OH:36])[CH2:37][O:38][P:39](=[O:40])([OH:41])[OH:42]',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01518',\n", - " 'reaction': 'C00631 <=> C00197',\n", - " 'rule': '[O:1]=[C:2]([OH:3])[C@H:4]([O:5][P:8](=[O:9])([OH:10])[OH:11])[CH2:6][OH:7]>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11]',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>O=C(O)[C@H](O)COP(=O)(O)O'}],\n", - " 'molecules': [{'id': 'C00001', 'name': 'H2O', 'smiles': 'O'},\n", - " {'id': 'C00002',\n", - " 'name': 'ATP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00008',\n", - " 'name': 'ADP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00009', 'name': 'Orthophosphate', 'smiles': 'O=P(O)(O)O'},\n", - " {'id': 'C00011', 'name': 'CO2', 'smiles': 'O=C=O'},\n", - " {'id': 'C00035',\n", - " 'name': 'GDP',\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1'},\n", - " {'id': 'C00036', 'name': 'Oxaloacetate', 'smiles': 'O=C(O)CC(=O)C(=O)O'},\n", - " {'id': 'C00044',\n", - " 'name': 'GTP',\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1'},\n", - " {'id': 'C00074',\n", - " 'name': 'Phosphoenolpyruvate',\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00081',\n", - " 'name': 'ITP',\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00085',\n", - " 'name': 'D-Fructose 6-phosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00104',\n", - " 'name': 'IDP',\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00111',\n", - " 'name': 'Glycerone phosphate',\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'C00118',\n", - " 'name': 'D-Glyceraldehyde 3-phosphate',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00197',\n", - " 'name': '3-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00236',\n", - " 'name': '3-Phospho-D-glyceroyl phosphate',\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00354',\n", - " 'name': 'D-Fructose 1,6-bisphosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00631',\n", - " 'name': '2-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O'}],\n", - " 'missing': {'missing_compounds': [],\n", - " 'missing_compound_ids': [],\n", - " 'reactions_involving_missing': []}},\n", - " 'M00307': {'module_id': 'M00307',\n", - " 'reactions': [{'id': 'R00014',\n", - " 'reaction': 'C00022 + C00068 <=> C05125 + C00011',\n", - " 'rule': '[C:9]([CH3:10])(=[O:11])[C:31](=[O:30])[OH:32].[CH3:1][c:2]1[n:3][cH:4][c:5]([CH2:6][n+:7]2[cH:8][s:12][c:13]([CH2:14][CH2:15][O:16][P:17](=[O:18])([OH:19])[O:20][P:21](=[O:22])([OH:23])[OH:24])[c:25]2[CH3:26])[c:27]([NH2:28])[n:29]1>>[CH3:1][c:2]1[n:3][cH:4][c:5]([CH2:6][n+:7]2[c:8]([CH:9]([CH3:10])[OH:11])[s:12][c:13]([CH2:14][CH2:15][O:16][P:17](=[O:18])([OH:19])[O:20][P:21](=[O:22])([OH:23])[OH:24])[c:25]2[CH3:26])[c:27]([NH2:28])[n:29]1.[O:30]=[C:31]=[O:32]',\n", - " 'smiles': 'CC(=O)C(=O)O.Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1>>Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1.O=C=O'},\n", - " {'id': 'R00209',\n", - " 'reaction': 'C00022 + C00010 + C00003 <=> C00024 + C00011 + C00004 + C00080',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[C:97]([OH:96])=[O:98].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[NH2:52][C:53](=[O:54])[c:55]1[cH:56][n+:57]([C@@H:58]2[O:59][C@H:60]([CH2:61][O:62][P:63](=[O:64])([OH:65])[O:66][P:67](=[O:68])([OH:69])[O:70][CH2:71][C@H:72]3[O:73][C@@H:74]([n:75]4[cH:76][n:77][c:78]5[c:79]([NH2:80])[n:81][cH:82][n:83][c:84]45)[C@H:85]([OH:86])[C@@H:87]3[OH:88])[C@@H:89]([OH:90])[C@H:91]2[OH:92])[cH:93][cH:94][cH:95]1>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[NH2:52][C:53](=[O:54])[C:55]1=[CH:56][N:57]([C@@H:58]2[O:59][C@H:60]([CH2:61][O:62][P:63](=[O:64])([OH:65])[O:66][P:67](=[O:68])([OH:69])[O:70][CH2:71][C@H:72]3[O:73][C@@H:74]([n:75]4[cH:76][n:77][c:78]5[c:79]([NH2:80])[n:81][cH:82][n:83][c:84]45)[C@H:85]([OH:86])[C@@H:87]3[OH:88])[C@@H:89]([OH:90])[C@H:91]2[OH:92])[CH:93]=[CH:94][CH2:95]1.[O:96]=[C:97]=[O:98].[H+:99]',\n", - " 'smiles': 'CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R00210',\n", - " 'reaction': 'C00022 + C00010 + C00006 <=> C00024 + C00011 + C00005 + C00080',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[C:101]([OH:100])=[O:102].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[NH2:52][C:53](=[O:54])[c:55]1[cH:56][n+:57]([C@@H:58]2[O:59][C@H:60]([CH2:61][O:62][P:63](=[O:64])([OH:65])[O:66][P:67](=[O:68])([OH:69])[O:70][CH2:71][C@H:72]3[O:73][C@@H:74]([n:75]4[cH:76][n:77][c:78]5[c:79]([NH2:80])[n:81][cH:82][n:83][c:84]45)[C@H:85]([O:86][P:87](=[O:88])([OH:89])[OH:90])[C@@H:91]3[OH:92])[C@@H:93]([OH:94])[C@H:95]2[OH:96])[cH:97][cH:98][cH:99]1>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[NH2:52][C:53](=[O:54])[C:55]1=[CH:56][N:57]([C@@H:58]2[O:59][C@H:60]([CH2:61][O:62][P:63](=[O:64])([OH:65])[O:66][P:67](=[O:68])([OH:69])[O:70][CH2:71][C@H:72]3[O:73][C@@H:74]([n:75]4[cH:76][n:77][c:78]5[c:79]([NH2:80])[n:81][cH:82][n:83][c:84]45)[C@H:85]([O:86][P:87](=[O:88])([OH:89])[OH:90])[C@@H:91]3[OH:92])[C@@H:93]([OH:94])[C@H:95]2[OH:96])[CH:97]=[CH:98][CH2:99]1.[O:100]=[C:101]=[O:102].[H+:103]',\n", - " 'smiles': 'CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01196',\n", - " 'reaction': '2 C00138 + C00024 + C00011 + 2 C00080 <=> 2 C00139 + C00022 + C00010',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[S:54][CH2:53][CH2:52][NH:51][C:49]([CH2:48][CH2:47][NH:46][C:44]([C@@H:42]([C:8]([CH3:7])([CH3:9])[CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]1[O:22][C@@H:23]([n:24]2[cH:25][n:26][c:27]3[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]23)[C@H:34]([OH:35])[C@@H:36]1[O:37][P:38](=[O:39])([OH:40])[OH:41])[OH:43])=[O:45])=[O:50].[C:4](=[O:5])=[O:6].[H+].[H+]>>[CH3:1][C:2](=[O:3])[C:4](=[O:5])[OH:6].[CH3:7][C:8]([CH3:9])([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]1[O:22][C@@H:23]([n:24]2[cH:25][n:26][c:27]3[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]23)[C@H:34]([OH:35])[C@@H:36]1[O:37][P:38](=[O:39])([OH:40])[OH:41])[C@@H:42]([OH:43])[C:44](=[O:45])[NH:46][CH2:47][CH2:48][C:49](=[O:50])[NH:51][CH2:52][CH2:53][SH:54]',\n", - " 'smiles': 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.[H+].[H+]>>CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS'},\n", - " {'id': 'R02569',\n", - " 'reaction': 'C00024 + C15973 <=> C00010 + C16255',\n", - " 'rule': '[*:1][NH:2][C:3](=[O:4])[CH2:5][CH2:6][CH2:7][CH2:8][C@@H:9]([SH:10])[CH2:11][CH2:12][SH:13].[C:14]([CH3:15])(=[O:16])[S:64][CH2:63][CH2:62][NH:61][C:59]([CH2:58][CH2:57][NH:56][C:54]([C@@H:52]([C:18]([CH3:17])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51])[OH:53])=[O:55])=[O:60]>>[*:1][NH:2][C:3](=[O:4])[CH2:5][CH2:6][CH2:7][CH2:8][C@@H:9]([SH:10])[CH2:11][CH2:12][S:13][C:14]([CH3:15])=[O:16].[CH3:17][C:18]([CH3:19])([CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51])[C@@H:52]([OH:53])[C:54](=[O:55])[NH:56][CH2:57][CH2:58][C:59](=[O:60])[NH:61][CH2:62][CH2:63][SH:64]',\n", - " 'smiles': 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.*NC(=O)CCCC[C@@H](S)CCS>>CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.*NC(=O)CCCC[C@@H](S)CCSC(C)=O'},\n", - " {'id': 'R03270',\n", - " 'reaction': 'C05125 + C15972 <=> C16255 + C00068',\n", - " 'rule': '[*:1][NH:2][C:3](=[O:4])[CH2:5][CH2:6][CH2:7][CH2:8][C@H:9]1[S:10][S:13][CH2:12][CH2:11]1.[CH:14]([CH3:15])([OH:16])[c:24]1[n+:23]([CH2:22][c:21]2[cH:20][n:19][c:18]([CH3:17])[n:42][c:40]2[NH2:41])[c:38]([CH3:39])[c:26]([CH2:27][CH2:28][O:29][P:30](=[O:31])([OH:32])[O:33][P:34](=[O:35])([OH:36])[OH:37])[s:25]1>>[*:1][NH:2][C:3](=[O:4])[CH2:5][CH2:6][CH2:7][CH2:8][C@@H:9]([SH:10])[CH2:11][CH2:12][S:13][C:14]([CH3:15])=[O:16].[CH3:17][c:18]1[n:19][cH:20][c:21]([CH2:22][n+:23]2[cH:24][s:25][c:26]([CH2:27][CH2:28][O:29][P:30](=[O:31])([OH:32])[O:33][P:34](=[O:35])([OH:36])[OH:37])[c:38]2[CH3:39])[c:40]([NH2:41])[n:42]1',\n", - " 'smiles': 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1.*NC(=O)CCCC[C@@H]1CCSS1>>*NC(=O)CCCC[C@@H](S)CCSC(C)=O.Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1'},\n", - " {'id': 'R07618',\n", - " 'reaction': 'C15973 + C00003 <=> C15972 + C00004 + C00080',\n", - " 'rule': '[*:1][NH:2][C:3](=[O:4])[CH2:5][CH2:6][CH2:7][CH2:8][C@H:9]([CH2:10][CH2:11][SH:12])[SH:13].[NH2:14][C:15](=[O:16])[c:17]1[cH:18][n+:19]([C@@H:20]2[O:21][C@H:22]([CH2:23][O:24][P:25](=[O:26])([OH:27])[O:28][P:29](=[O:30])([OH:31])[O:32][CH2:33][C@H:34]3[O:35][C@@H:36]([n:37]4[cH:38][n:39][c:40]5[c:41]([NH2:42])[n:43][cH:44][n:45][c:46]45)[C@H:47]([OH:48])[C@@H:49]3[OH:50])[C@@H:51]([OH:52])[C@H:53]2[OH:54])[cH:55][cH:56][cH:57]1>>[*:1][NH:2][C:3](=[O:4])[CH2:5][CH2:6][CH2:7][CH2:8][C@@H:9]1[CH2:10][CH2:11][S:12][S:13]1.[NH2:14][C:15](=[O:16])[C:17]1=[CH:18][N:19]([C@@H:20]2[O:21][C@H:22]([CH2:23][O:24][P:25](=[O:26])([OH:27])[O:28][P:29](=[O:30])([OH:31])[O:32][CH2:33][C@H:34]3[O:35][C@@H:36]([n:37]4[cH:38][n:39][c:40]5[c:41]([NH2:42])[n:43][cH:44][n:45][c:46]45)[C@H:47]([OH:48])[C@@H:49]3[OH:50])[C@@H:51]([OH:52])[C@H:53]2[OH:54])[CH:55]=[CH:56][CH2:57]1.[H+:58]',\n", - " 'smiles': '*NC(=O)CCCC[C@@H](S)CCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>*NC(=O)CCCC[C@@H]1CCSS1.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R10866',\n", - " 'reaction': 'C00022 + C00010 + C02869 <=> C00024 + C00011 + C02745',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[C:53](=[O:52])[OH:54].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51]>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[O:52]=[C:53]=[O:54]',\n", - " 'smiles': 'CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O'}],\n", - " 'molecules': [{'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00010',\n", - " 'name': 'CoA',\n", - " 'smiles': 'CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS'},\n", - " {'id': 'C00011', 'name': 'CO2', 'smiles': 'O=C=O'},\n", - " {'id': 'C00022', 'name': 'Pyruvate', 'smiles': 'CC(=O)C(=O)O'},\n", - " {'id': 'C00024',\n", - " 'name': 'Acetyl-CoA',\n", - " 'smiles': 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O'},\n", - " {'id': 'C00068',\n", - " 'name': 'Thiamin diphosphate',\n", - " 'smiles': 'Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': None},\n", - " {'id': 'C00139', 'name': 'Oxidized ferredoxin', 'smiles': None},\n", - " {'id': 'C02745', 'name': 'Reduced flavodoxin', 'smiles': None},\n", - " {'id': 'C02869', 'name': 'Oxidized flavodoxin', 'smiles': None},\n", - " {'id': 'C05125',\n", - " 'name': '2-(alpha-Hydroxyethyl)thiamine diphosphate',\n", - " 'smiles': 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1'},\n", - " {'id': 'C15972',\n", - " 'name': 'Enzyme N6-(lipoyl)lysine',\n", - " 'smiles': '*NC(=O)CCCC[C@@H]1CCSS1'},\n", - " {'id': 'C15973',\n", - " 'name': 'Enzyme N6-(dihydrolipoyl)lysine',\n", - " 'smiles': '*NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'C16255',\n", - " 'name': '[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine',\n", - " 'smiles': '*NC(=O)CCCC[C@@H](S)CCSC(C)=O'}],\n", - " 'missing': {'missing_compounds': [{'id': 'C00138',\n", - " 'name': 'Reduced ferredoxin',\n", - " 'reactions': ['R01196']},\n", - " {'id': 'C00139', 'name': 'Oxidized ferredoxin', 'reactions': ['R01196']},\n", - " {'id': 'C02745', 'name': 'Reduced flavodoxin', 'reactions': ['R10866']},\n", - " {'id': 'C02869', 'name': 'Oxidized flavodoxin', 'reactions': ['R10866']}],\n", - " 'missing_compound_ids': ['C00138', 'C00139', 'C02745', 'C02869'],\n", - " 'reactions_involving_missing': ['R01196', 'R10866']}}},\n", - " 'missing': {'missing_compound_ids': ['C00138', 'C00139', 'C02745', 'C02869'],\n", - " 'reactions_involving_missing': ['R01196', 'R07159', 'R10866']}}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pathway_data" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "a7e0f3fe", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'pathway_id': 'hsa00010',\n", - " 'modules': ['M00001', 'M00002', 'M00003', 'M00307'],\n", - " 'by_module': {'M00001': {'module_id': 'M00001',\n", - " 'reactions': [{'id': 'R00200',\n", - " 'reaction': 'C00008 + C00074 => C00002 + C00022',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.CC(=O)C(=O)O'},\n", - " {'id': 'R00658',\n", - " 'reaction': 'C00631 => C00074 + C00001',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>C=C(OP(=O)(O)O)C(=O)O.O'},\n", - " {'id': 'R00756',\n", - " 'reaction': 'C00002 + C00085 => C00008 + C00354',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R01015',\n", - " 'reaction': 'C00111 => C00118',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O>>O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01061',\n", - " 'reaction': 'C00118 + C00009 + C00003 => C00236 + C00004 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01063',\n", - " 'reaction': 'C00118 + C00009 + C00006 => C00236 + C00005 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01068',\n", - " 'reaction': 'C00354 => C00111 + C00118',\n", - " 'rule': None,\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O>>O=C(CO)COP(=O)(O)O.O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01512',\n", - " 'reaction': 'C00008 + C00236 => C00002 + C00197',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01518',\n", - " 'reaction': 'C00197 => C00631',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O>>O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'R01786',\n", - " 'reaction': 'C00002 + C00267 => C00008 + C00668',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R02189',\n", - " 'reaction': 'C00404 + C00267 => C99999 + C00668',\n", - " 'rule': '[O:1]=[P:2]([OH:3])([OH:4])[O:20][P:18](=[O:17])([OH:19])[O:21][P:22](=[O:23])([OH:24])[OH:25].[OH:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16]>>[O:1]=[P:2]([OH:3])([OH:4])[O:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16].[O:17]=[P:18]([OH:19])([OH:20])[O:21][P:22](=[O:23])([OH:24])[OH:25]',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O.OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>O=P(O)(O)OP(=O)(O)O.O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R05805',\n", - " 'reaction': 'C00008 + C00085 => C00020 + C00354',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R07159',\n", - " 'reaction': 'C00118 + C00001 + 2 C00139 => C00197 + 2 C00080 + 2 C00138',\n", - " 'rule': '[OH2:3].[O:1]=[CH:2][C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[S:14]1[Fe+:15][S:16][Fe+:17]1.[S:18]1[Fe+:19][S:20][Fe+:21]1>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[H+:12].[H+:13].[S:14]1[Fe:15][S:16][Fe+:17]1.[S:18]1[Fe:19][S:20][Fe+:21]1',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O.S1[Fe+]S[Fe+]1.S1[Fe+]S[Fe+]1>>O=C(O)[C@H](O)COP(=O)(O)O.[H+].[H+].S1[Fe]S[Fe+]1.S1[Fe]S[Fe+]1'},\n", - " {'id': 'R09085',\n", - " 'reaction': 'C00267 + C00008 => C00668 + C00020',\n", - " 'rule': None,\n", - " 'smiles': 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O.Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O>>O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O.Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'R13199',\n", - " 'reaction': 'C00668 => C00085',\n", - " 'rule': None,\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O'}],\n", - " 'molecules': [{'id': 'C00001', 'name': 'H2O', 'smiles': 'O'},\n", - " {'id': 'C00002',\n", - " 'name': 'ATP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00008',\n", - " 'name': 'ADP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00009', 'name': 'Orthophosphate', 'smiles': 'O=P(O)(O)O'},\n", - " {'id': 'C00020',\n", - " 'name': 'AMP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00022', 'name': 'Pyruvate', 'smiles': 'CC(=O)C(=O)O'},\n", - " {'id': 'C00074',\n", - " 'name': 'Phosphoenolpyruvate',\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00085',\n", - " 'name': 'D-Fructose 6-phosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00111',\n", - " 'name': 'Glycerone phosphate',\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'C00118',\n", - " 'name': 'D-Glyceraldehyde 3-phosphate',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': 'S1[Fe]S[Fe+]1'},\n", - " {'id': 'C00139',\n", - " 'name': 'Oxidized ferredoxin',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1'},\n", - " {'id': 'C00197',\n", - " 'name': '3-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00236',\n", - " 'name': '3-Phospho-D-glyceroyl phosphate',\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00267',\n", - " 'name': 'alpha-D-Glucose',\n", - " 'smiles': 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00354',\n", - " 'name': 'D-Fructose 1,6-bisphosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00404',\n", - " 'name': 'Polyphosphate',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O'},\n", - " {'id': 'C00631',\n", - " 'name': '2-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'C00668',\n", - " 'name': 'alpha-D-Glucose 6-phosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C02745',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'},\n", - " {'id': 'C02869',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))'},\n", - " {'id': 'C15972',\n", - " 'name': 'Enzyme N6-(lipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H]1CCSS1'},\n", - " {'id': 'C15973',\n", - " 'name': 'Enzyme N6-(dihydrolipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'C16255',\n", - " 'name': '[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O'},\n", - " {'id': 'C99999',\n", - " 'name': 'Polyphosphate fragment',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)O'}],\n", - " 'missing': {'missing_compounds': [],\n", - " 'missing_compound_ids': [],\n", - " 'reactions_involving_missing': []}},\n", - " 'M00002': {'module_id': 'M00002',\n", - " 'reactions': [{'id': 'R00200',\n", - " 'reaction': 'C00008 + C00074 => C00002 + C00022',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.CC(=O)C(=O)O'},\n", - " {'id': 'R00658',\n", - " 'reaction': 'C00631 => C00074 + C00001',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>C=C(OP(=O)(O)O)C(=O)O.O'},\n", - " {'id': 'R01015',\n", - " 'reaction': 'C00111 => C00118',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O>>O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01061',\n", - " 'reaction': 'C00118 + C00009 + C00003 => C00236 + C00004 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01063',\n", - " 'reaction': 'C00118 + C00009 + C00006 => C00236 + C00005 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01512',\n", - " 'reaction': 'C00008 + C00236 => C00002 + C00197',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01518',\n", - " 'reaction': 'C00197 => C00631',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O>>O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'R07159',\n", - " 'reaction': 'C00118 + C00001 + 2 C00139 => C00197 + 2 C00080 + 2 C00138',\n", - " 'rule': '[OH2:3].[O:1]=[CH:2][C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[S:14]1[Fe+:15][S:16][Fe+:17]1.[S:18]1[Fe+:19][S:20][Fe+:21]1>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[H+:12].[H+:13].[S:14]1[Fe:15][S:16][Fe+:17]1.[S:18]1[Fe:19][S:20][Fe+:21]1',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O.S1[Fe+]S[Fe+]1.S1[Fe+]S[Fe+]1>>O=C(O)[C@H](O)COP(=O)(O)O.[H+].[H+].S1[Fe]S[Fe+]1.S1[Fe]S[Fe+]1'}],\n", - " 'molecules': [{'id': 'C00001', 'name': 'H2O', 'smiles': 'O'},\n", - " {'id': 'C00002',\n", - " 'name': 'ATP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00008',\n", - " 'name': 'ADP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00009', 'name': 'Orthophosphate', 'smiles': 'O=P(O)(O)O'},\n", - " {'id': 'C00022', 'name': 'Pyruvate', 'smiles': 'CC(=O)C(=O)O'},\n", - " {'id': 'C00074',\n", - " 'name': 'Phosphoenolpyruvate',\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00111',\n", - " 'name': 'Glycerone phosphate',\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'C00118',\n", - " 'name': 'D-Glyceraldehyde 3-phosphate',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': 'S1[Fe]S[Fe+]1'},\n", - " {'id': 'C00139',\n", - " 'name': 'Oxidized ferredoxin',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1'},\n", - " {'id': 'C00197',\n", - " 'name': '3-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00236',\n", - " 'name': '3-Phospho-D-glyceroyl phosphate',\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00631',\n", - " 'name': '2-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'C02745',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'},\n", - " {'id': 'C02869',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))'},\n", - " {'id': 'C15972',\n", - " 'name': 'Enzyme N6-(lipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H]1CCSS1'},\n", - " {'id': 'C15973',\n", - " 'name': 'Enzyme N6-(dihydrolipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'C16255',\n", - " 'name': '[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O'},\n", - " {'id': 'C99999',\n", - " 'name': 'Polyphosphate fragment',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)O'}],\n", - " 'missing': {'missing_compounds': [],\n", - " 'missing_compound_ids': [],\n", - " 'reactions_involving_missing': []}},\n", - " 'M00003': {'module_id': 'M00003',\n", - " 'reactions': [{'id': 'R00341',\n", - " 'reaction': 'C00002 + C00036 => C00008 + C00074 + C00011',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)CC(=O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O.O=C=O'},\n", - " {'id': 'R00431',\n", - " 'reaction': 'C00044 + C00036 => C00035 + C00074 + C00011',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1.O=C(O)CC(=O)C(=O)O>>Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1.C=C(OP(=O)(O)O)C(=O)O.O=C=O'},\n", - " {'id': 'R00658',\n", - " 'reaction': 'C00074 + C00001 => C00631',\n", - " 'rule': None,\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O.O>>O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'R00726',\n", - " 'reaction': 'C00081 + C00036 => C00104 + C00074 + C00011',\n", - " 'rule': None,\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)CC(=O)C(=O)O>>O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O.O=C=O'},\n", - " {'id': 'R00762',\n", - " 'reaction': 'C00354 + C00001 => C00085 + C00009',\n", - " 'rule': None,\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O.O>>O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O.O=P(O)(O)O'},\n", - " {'id': 'R01015',\n", - " 'reaction': 'C00118 => C00111',\n", - " 'rule': None,\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O>>O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'R01061',\n", - " 'reaction': 'C00236 + C00004 + C00080 => C00118 + C00009 + C00003',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]>>O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'R01063',\n", - " 'reaction': 'C00236 + C00005 + C00080 => C00118 + C00009 + C00006',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]>>O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'R01068',\n", - " 'reaction': 'C00111 + C00118 => C00354',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O.O=C[C@H](O)COP(=O)(O)O>>O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R01512',\n", - " 'reaction': 'C00002 + C00197 => C00008 + C00236',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01518',\n", - " 'reaction': 'C00631 => C00197',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>O=C(O)[C@H](O)COP(=O)(O)O'}],\n", - " 'molecules': [{'id': 'C00001', 'name': 'H2O', 'smiles': 'O'},\n", - " {'id': 'C00002',\n", - " 'name': 'ATP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00008',\n", - " 'name': 'ADP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00009', 'name': 'Orthophosphate', 'smiles': 'O=P(O)(O)O'},\n", - " {'id': 'C00011', 'name': 'CO2', 'smiles': 'O=C=O'},\n", - " {'id': 'C00035',\n", - " 'name': 'GDP',\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1'},\n", - " {'id': 'C00036', 'name': 'Oxaloacetate', 'smiles': 'O=C(O)CC(=O)C(=O)O'},\n", - " {'id': 'C00044',\n", - " 'name': 'GTP',\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1'},\n", - " {'id': 'C00074',\n", - " 'name': 'Phosphoenolpyruvate',\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00081',\n", - " 'name': 'ITP',\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00085',\n", - " 'name': 'D-Fructose 6-phosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00104',\n", - " 'name': 'IDP',\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00111',\n", - " 'name': 'Glycerone phosphate',\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'C00118',\n", - " 'name': 'D-Glyceraldehyde 3-phosphate',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00197',\n", - " 'name': '3-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00236',\n", - " 'name': '3-Phospho-D-glyceroyl phosphate',\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00354',\n", - " 'name': 'D-Fructose 1,6-bisphosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00631',\n", - " 'name': '2-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': 'S1[Fe]S[Fe+]1'},\n", - " {'id': 'C00139',\n", - " 'name': 'Oxidized ferredoxin',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1'},\n", - " {'id': 'C02745',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'},\n", - " {'id': 'C02869',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))'},\n", - " {'id': 'C15972',\n", - " 'name': 'Enzyme N6-(lipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H]1CCSS1'},\n", - " {'id': 'C15973',\n", - " 'name': 'Enzyme N6-(dihydrolipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'C16255',\n", - " 'name': '[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O'},\n", - " {'id': 'C99999',\n", - " 'name': 'Polyphosphate fragment',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)O'}],\n", - " 'missing': {'missing_compounds': [],\n", - " 'missing_compound_ids': [],\n", - " 'reactions_involving_missing': []}},\n", - " 'M00307': {'module_id': 'M00307',\n", - " 'reactions': [{'id': 'R00014',\n", - " 'reaction': 'C00022 + C00068 => C05125 + C00011',\n", - " 'rule': None,\n", - " 'smiles': 'CC(=O)C(=O)O.Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1>>Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1.O=C=O'},\n", - " {'id': 'R00209',\n", - " 'reaction': 'C00022 + C00010 + C00003 => C00024 + C00011 + C00004 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R00210',\n", - " 'reaction': 'C00022 + C00010 + C00006 => C00024 + C00011 + C00005 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01196',\n", - " 'reaction': '2 C00139 + C00022 + C00010 => 2 C00138 + C00024 + C00011 + 2 C00080',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[C:53]([OH:52])=[O:54].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[S:57]1[Fe+:58][S:59][Fe+:60]1.[S:61]1[Fe+:62][S:63][Fe+:64]1>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[O:52]=[C:53]=[O:54].[H+:55].[H+:56].[S:57]1[Fe:58][S:59][Fe+:60]1.[S:61]1[Fe:62][S:63][Fe+:64]1',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1.S1[Fe+]S[Fe+]1.CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS>>S1[Fe]S[Fe+]1.S1[Fe]S[Fe+]1.CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.[H+].[H+]'},\n", - " {'id': 'R02569',\n", - " 'reaction': 'C00010 + C16255 => C00024 + C15973',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[S:4][CH2:62][CH2:61][C@@H:59]([CH2:58][CH2:57][CH2:56][CH2:55][C:53]([NH2:52])=[O:54])[SH:60].[CH2:5]([CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51])[SH:63]>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[NH2:52][C:53](=[O:54])[CH2:55][CH2:56][CH2:57][CH2:58][C@@H:59]([SH:60])[CH2:61][CH2:62][SH:63]',\n", - " 'smiles': 'CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)CCCC[C@@H](S)CCSC(C)=O>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'R03270',\n", - " 'reaction': 'C05125 + C15972 => C16255 + C00068',\n", - " 'rule': '[CH3:1][CH:2]([OH:3])[c:23]1[n+:22]([CH2:21][c:20]2[cH:19][n:18][c:17]([CH3:16])[n:41][c:39]2[NH2:40])[c:37]([CH3:38])[c:25]([CH2:26][CH2:27][O:28][P:29](=[O:30])([OH:31])[O:32][P:33](=[O:34])([OH:35])[OH:36])[s:24]1.[S:4]1[CH2:5][CH2:6][C@@H:7]([CH2:9][CH2:10][CH2:11][CH2:12][C:13]([NH2:14])=[O:15])[S:8]1>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][C@H:7]([SH:8])[CH2:9][CH2:10][CH2:11][CH2:12][C:13]([NH2:14])=[O:15].[CH3:16][c:17]1[n:18][cH:19][c:20]([CH2:21][n+:22]2[cH:23][s:24][c:25]([CH2:26][CH2:27][O:28][P:29](=[O:30])([OH:31])[O:32][P:33](=[O:34])([OH:35])[OH:36])[c:37]2[CH3:38])[c:39]([NH2:40])[n:41]1',\n", - " 'smiles': 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1.NC(=O)CCCC[C@@H]1CCSS1>>NC(=O)CCCC[C@@H](S)CCSC(C)=O.Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1'},\n", - " {'id': 'R07618',\n", - " 'reaction': 'C15973 + C00003 => C15972 + C00004 + C00080',\n", - " 'rule': '[NH2:45][C:46](=[O:47])[CH2:48][CH2:49][CH2:50][CH2:51][C@H:52]([CH2:53][CH2:54][SH:55])[SH:56].[NH2:1][C:2](=[O:3])[c:4]1[cH:5][n+:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[cH:42][cH:43][cH:44]1>>[NH2:1][C:2](=[O:3])[C:4]1=[CH:5][N:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[CH:42]=[CH:43][CH2:44]1.[NH2:45][C:46](=[O:47])[CH2:48][CH2:49][CH2:50][CH2:51][C@@H:52]1[CH2:53][CH2:54][S:55][S:56]1.[H+:57]',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>NC(=O)CCCC[C@@H]1CCSS1.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R10866',\n", - " 'reaction': 'C00022 + C00010 + C02869 => C00024 + C00011 + C02745',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[C:84]([OH:83])=[O:85].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[CH3:52][c:53]1[cH:54][c:55]2[c:56]([cH:57][c:58]1[CH3:59])[n:60]([CH2:61][CH:62]([OH:63])[CH:64]([OH:65])[CH:66]([OH:67])[CH2:68][O:69][P:70](=[O:71])([O-:72])[O-:73])[c:74]1[n:75][c:76](=[O:77])[nH:78][c:79](=[O:80])[c:81]-1[n:82]2>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[CH3:52][c:53]1[cH:54][c:55]2[c:56]([cH:57][c:58]1[CH3:59])[N:60]([CH2:61][CH:62]([OH:63])[CH:64]([OH:65])[CH:66]([OH:67])[CH2:68][O:69][P:70](=[O:71])([O-:72])[O-:73])[c:74]1[nH:75][c:76](=[O:77])[nH:78][c:79](=[O:80])[c:81]1[NH:82]2.[O:83]=[C:84]=[O:85]',\n", - " 'smiles': 'CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'}],\n", - " 'molecules': [{'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00010',\n", - " 'name': 'CoA',\n", - " 'smiles': 'CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS'},\n", - " {'id': 'C00011', 'name': 'CO2', 'smiles': 'O=C=O'},\n", - " {'id': 'C00022', 'name': 'Pyruvate', 'smiles': 'CC(=O)C(=O)O'},\n", - " {'id': 'C00024',\n", - " 'name': 'Acetyl-CoA',\n", - " 'smiles': 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O'},\n", - " {'id': 'C00068',\n", - " 'name': 'Thiamin diphosphate',\n", - " 'smiles': 'Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': 'S1[Fe]S[Fe+]1'},\n", - " {'id': 'C00139',\n", - " 'name': 'Oxidized ferredoxin',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1'},\n", - " {'id': 'C02745',\n", - " 'name': 'Reduced flavodoxin',\n", - " 'smiles': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'},\n", - " {'id': 'C02869',\n", - " 'name': 'Oxidized flavodoxin',\n", - " 'smiles': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))'},\n", - " {'id': 'C05125',\n", - " 'name': '2-(alpha-Hydroxyethyl)thiamine diphosphate',\n", - " 'smiles': 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1'},\n", - " {'id': 'C15972',\n", - " 'name': 'Enzyme N6-(lipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H]1CCSS1'},\n", - " {'id': 'C15973',\n", - " 'name': 'Enzyme N6-(dihydrolipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'C16255',\n", - " 'name': '[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O'},\n", - " {'id': 'C99999',\n", - " 'name': 'Polyphosphate fragment',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)O'}],\n", - " 'missing': {'missing_compounds': [],\n", - " 'missing_compound_ids': [],\n", - " 'reactions_involving_missing': []}}},\n", - " 'missing': {'missing_compound_ids': [], 'reactions_involving_missing': []}}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import json\n", - "\n", - "with open('Data/KEGG/hsa00010_fixed_new.json', 'r') as file:\n", - " pathway_data = json.load(file)\n", - "\n", - "pathway_data" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "6175331d", - "metadata": {}, - "outputs": [], - "source": [ - "from synkit.CRN.Query.kegg_impute import KEGGImputer\n", - "\n", - "fixes = [\n", - " {\n", - " \"id\": \"R02189\",\n", - " \"reaction\": \"C00404 + C00267 => C99999 + C00668\",\n", - " },\n", - " {\n", - " \"id\": \"C99999\",\n", - " \"name\": \"Polyphosphate fragment\",\n", - " \"smiles\": \"O=P(O)(O)OP(=O)(O)O\",\n", - " },\n", - " {\n", - " \"id\": \"C00138\",\n", - " \"name\": \"Reduced ferredoxin\",\n", - " \"smiles\": \"S1[Fe]S[Fe+]1\"\n", - " },\n", - " {\n", - " \"id\": \"C00139\",\n", - " \"name\": \"Oxidized ferredoxin\",\n", - " \"smiles\": \"S1[Fe+]S[Fe+]1\"\n", - " },\n", - " {\n", - " \"id\": \"C02745\",\n", - " \"smiles\": \"CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))\"\n", - " },\n", - " {\n", - " \"id\": \"C02869\",\n", - " \"smiles\": \"CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))\"\n", - " },\n", - " {\n", - " \"id\": \"C15972\",\n", - " \"name\": \"Enzyme N6-(lipoyl)lysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H]1CCSS\",\n", - " },\n", - " {\n", - " \"id\": \"C15973\",\n", - " \"name\": \"Enzyme N6-(dihydrolipoyl)lysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H](S)CCS\",\n", - " },\n", - " {\n", - " \"id\": \"C16255\",\n", - " \"name\": \"[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H](S)CCSC(C)=O\",\n", - " },\n", - " {\n", - " \"id\": \"C15972\",\n", - " \"name\": \"Enzyme N6-(lipoyl)lysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H]1CCSS1\"\n", - " },\n", - " {\n", - " \"id\": \"C15973\",\n", - " \"name\": \"Enzyme N6-(dihydrolipoyl)lysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H](S)CCS\"\n", - " },\n", - " {\n", - " \"id\": \"C16255\",\n", - " \"name\": \"[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H](S)CCSC(C)=O\"\n", - " }\n", - "]\n", - "\n", - "imputer = KEGGImputer()\n", - "imputed_pathway = imputer.impute_pathway(\n", - " pathway_data,\n", - " fixes=fixes,\n", - " save_as='Data/KEGG/hsa00010_fixed_new.json',\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "1f0e9981", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'pathway_id': 'hsa00010',\n", - " 'modules': ['M00001', 'M00002', 'M00003', 'M00307'],\n", - " 'by_module': {'M00001': {'module_id': 'M00001',\n", - " 'reactions': [{'id': 'R00200',\n", - " 'reaction': 'C00008 + C00074 => C00002 + C00022',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.CC(=O)C(=O)O'},\n", - " {'id': 'R00658',\n", - " 'reaction': 'C00631 => C00074 + C00001',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>C=C(OP(=O)(O)O)C(=O)O.O'},\n", - " {'id': 'R00756',\n", - " 'reaction': 'C00002 + C00085 => C00008 + C00354',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R01015',\n", - " 'reaction': 'C00111 => C00118',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O>>O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01061',\n", - " 'reaction': 'C00118 + C00009 + C00003 => C00236 + C00004 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01063',\n", - " 'reaction': 'C00118 + C00009 + C00006 => C00236 + C00005 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01068',\n", - " 'reaction': 'C00354 => C00111 + C00118',\n", - " 'rule': None,\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O>>O=C(CO)COP(=O)(O)O.O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01512',\n", - " 'reaction': 'C00008 + C00236 => C00002 + C00197',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01518',\n", - " 'reaction': 'C00197 => C00631',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O>>O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'R01786',\n", - " 'reaction': 'C00002 + C00267 => C00008 + C00668',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R02189',\n", - " 'reaction': 'C00404 + C00267 => C99999 + C00668',\n", - " 'rule': '[O:1]=[P:2]([OH:3])([OH:4])[O:20][P:18](=[O:17])([OH:19])[O:21][P:22](=[O:23])([OH:24])[OH:25].[OH:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16]>>[O:1]=[P:2]([OH:3])([OH:4])[O:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16].[O:17]=[P:18]([OH:19])([OH:20])[O:21][P:22](=[O:23])([OH:24])[OH:25]',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O.OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>O=P(O)(O)OP(=O)(O)O.O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R05805',\n", - " 'reaction': 'C00008 + C00085 => C00020 + C00354',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R07159',\n", - " 'reaction': 'C00118 + C00001 + 2 C00139 => C00197 + 2 C00080 + 2 C00138',\n", - " 'rule': '[OH2:3].[O:1]=[CH:2][C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[S:14]1[Fe+:15][S:16][Fe+:17]1.[S:18]1[Fe+:19][S:20][Fe+:21]1>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[H+:12].[H+:13].[S:14]1[Fe:15][S:16][Fe+:17]1.[S:18]1[Fe:19][S:20][Fe+:21]1',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O.S1[Fe+]S[Fe+]1.S1[Fe+]S[Fe+]1>>O=C(O)[C@H](O)COP(=O)(O)O.[H+].[H+].S1[Fe]S[Fe+]1.S1[Fe]S[Fe+]1'},\n", - " {'id': 'R09085',\n", - " 'reaction': 'C00267 + C00008 => C00668 + C00020',\n", - " 'rule': None,\n", - " 'smiles': 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O.Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O>>O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O.Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'R13199',\n", - " 'reaction': 'C00668 => C00085',\n", - " 'rule': None,\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O'}],\n", - " 'molecules': [{'id': 'C00001', 'name': 'H2O', 'smiles': 'O'},\n", - " {'id': 'C00002',\n", - " 'name': 'ATP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00008',\n", - " 'name': 'ADP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00009', 'name': 'Orthophosphate', 'smiles': 'O=P(O)(O)O'},\n", - " {'id': 'C00020',\n", - " 'name': 'AMP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00022', 'name': 'Pyruvate', 'smiles': 'CC(=O)C(=O)O'},\n", - " {'id': 'C00074',\n", - " 'name': 'Phosphoenolpyruvate',\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00085',\n", - " 'name': 'D-Fructose 6-phosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00111',\n", - " 'name': 'Glycerone phosphate',\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'C00118',\n", - " 'name': 'D-Glyceraldehyde 3-phosphate',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': 'S1[Fe]S[Fe+]1'},\n", - " {'id': 'C00139',\n", - " 'name': 'Oxidized ferredoxin',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1'},\n", - " {'id': 'C00197',\n", - " 'name': '3-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00236',\n", - " 'name': '3-Phospho-D-glyceroyl phosphate',\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00267',\n", - " 'name': 'alpha-D-Glucose',\n", - " 'smiles': 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00354',\n", - " 'name': 'D-Fructose 1,6-bisphosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00404',\n", - " 'name': 'Polyphosphate',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O'},\n", - " {'id': 'C00631',\n", - " 'name': '2-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'C00668',\n", - " 'name': 'alpha-D-Glucose 6-phosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C02745',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'},\n", - " {'id': 'C02869',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))'},\n", - " {'id': 'C15972',\n", - " 'name': 'Enzyme N6-(lipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H]1CCSS1'},\n", - " {'id': 'C15973',\n", - " 'name': 'Enzyme N6-(dihydrolipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'C16255',\n", - " 'name': '[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O'},\n", - " {'id': 'C99999',\n", - " 'name': 'Polyphosphate fragment',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)O'}],\n", - " 'missing': {'missing_compounds': [],\n", - " 'missing_compound_ids': [],\n", - " 'reactions_involving_missing': []}},\n", - " 'M00002': {'module_id': 'M00002',\n", - " 'reactions': [{'id': 'R00200',\n", - " 'reaction': 'C00008 + C00074 => C00002 + C00022',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.CC(=O)C(=O)O'},\n", - " {'id': 'R00658',\n", - " 'reaction': 'C00631 => C00074 + C00001',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>C=C(OP(=O)(O)O)C(=O)O.O'},\n", - " {'id': 'R01015',\n", - " 'reaction': 'C00111 => C00118',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O>>O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01061',\n", - " 'reaction': 'C00118 + C00009 + C00003 => C00236 + C00004 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01063',\n", - " 'reaction': 'C00118 + C00009 + C00006 => C00236 + C00005 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01512',\n", - " 'reaction': 'C00008 + C00236 => C00002 + C00197',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01518',\n", - " 'reaction': 'C00197 => C00631',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O>>O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'R07159',\n", - " 'reaction': 'C00118 + C00001 + 2 C00139 => C00197 + 2 C00080 + 2 C00138',\n", - " 'rule': '[OH2:3].[O:1]=[CH:2][C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[S:14]1[Fe+:15][S:16][Fe+:17]1.[S:18]1[Fe+:19][S:20][Fe+:21]1>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[H+:12].[H+:13].[S:14]1[Fe:15][S:16][Fe+:17]1.[S:18]1[Fe:19][S:20][Fe+:21]1',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O.O.S1[Fe+]S[Fe+]1.S1[Fe+]S[Fe+]1>>O=C(O)[C@H](O)COP(=O)(O)O.[H+].[H+].S1[Fe]S[Fe+]1.S1[Fe]S[Fe+]1'}],\n", - " 'molecules': [{'id': 'C00001', 'name': 'H2O', 'smiles': 'O'},\n", - " {'id': 'C00002',\n", - " 'name': 'ATP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00008',\n", - " 'name': 'ADP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00009', 'name': 'Orthophosphate', 'smiles': 'O=P(O)(O)O'},\n", - " {'id': 'C00022', 'name': 'Pyruvate', 'smiles': 'CC(=O)C(=O)O'},\n", - " {'id': 'C00074',\n", - " 'name': 'Phosphoenolpyruvate',\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00111',\n", - " 'name': 'Glycerone phosphate',\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'C00118',\n", - " 'name': 'D-Glyceraldehyde 3-phosphate',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': 'S1[Fe]S[Fe+]1'},\n", - " {'id': 'C00139',\n", - " 'name': 'Oxidized ferredoxin',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1'},\n", - " {'id': 'C00197',\n", - " 'name': '3-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00236',\n", - " 'name': '3-Phospho-D-glyceroyl phosphate',\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00631',\n", - " 'name': '2-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'C02745',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'},\n", - " {'id': 'C02869',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))'},\n", - " {'id': 'C15972',\n", - " 'name': 'Enzyme N6-(lipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H]1CCSS1'},\n", - " {'id': 'C15973',\n", - " 'name': 'Enzyme N6-(dihydrolipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'C16255',\n", - " 'name': '[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O'},\n", - " {'id': 'C99999',\n", - " 'name': 'Polyphosphate fragment',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)O'}],\n", - " 'missing': {'missing_compounds': [],\n", - " 'missing_compound_ids': [],\n", - " 'reactions_involving_missing': []}},\n", - " 'M00003': {'module_id': 'M00003',\n", - " 'reactions': [{'id': 'R00341',\n", - " 'reaction': 'C00002 + C00036 => C00008 + C00074 + C00011',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)CC(=O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O.O=C=O'},\n", - " {'id': 'R00431',\n", - " 'reaction': 'C00044 + C00036 => C00035 + C00074 + C00011',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1.O=C(O)CC(=O)C(=O)O>>Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1.C=C(OP(=O)(O)O)C(=O)O.O=C=O'},\n", - " {'id': 'R00658',\n", - " 'reaction': 'C00074 + C00001 => C00631',\n", - " 'rule': None,\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O.O>>O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'R00726',\n", - " 'reaction': 'C00081 + C00036 => C00104 + C00074 + C00011',\n", - " 'rule': None,\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)CC(=O)C(=O)O>>O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O.O=C=O'},\n", - " {'id': 'R00762',\n", - " 'reaction': 'C00354 + C00001 => C00085 + C00009',\n", - " 'rule': None,\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O.O>>O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O.O=P(O)(O)O'},\n", - " {'id': 'R01015',\n", - " 'reaction': 'C00118 => C00111',\n", - " 'rule': None,\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O>>O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'R01061',\n", - " 'reaction': 'C00236 + C00004 + C00080 => C00118 + C00009 + C00003',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]>>O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'R01063',\n", - " 'reaction': 'C00236 + C00005 + C00080 => C00118 + C00009 + C00006',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]>>O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'R01068',\n", - " 'reaction': 'C00111 + C00118 => C00354',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O.O=C[C@H](O)COP(=O)(O)O>>O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'R01512',\n", - " 'reaction': 'C00002 + C00197 => C00008 + C00236',\n", - " 'rule': None,\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'R01518',\n", - " 'reaction': 'C00631 => C00197',\n", - " 'rule': None,\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O>>O=C(O)[C@H](O)COP(=O)(O)O'}],\n", - " 'molecules': [{'id': 'C00001', 'name': 'H2O', 'smiles': 'O'},\n", - " {'id': 'C00002',\n", - " 'name': 'ATP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00008',\n", - " 'name': 'ADP',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00009', 'name': 'Orthophosphate', 'smiles': 'O=P(O)(O)O'},\n", - " {'id': 'C00011', 'name': 'CO2', 'smiles': 'O=C=O'},\n", - " {'id': 'C00035',\n", - " 'name': 'GDP',\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1'},\n", - " {'id': 'C00036', 'name': 'Oxaloacetate', 'smiles': 'O=C(O)CC(=O)C(=O)O'},\n", - " {'id': 'C00044',\n", - " 'name': 'GTP',\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1'},\n", - " {'id': 'C00074',\n", - " 'name': 'Phosphoenolpyruvate',\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00081',\n", - " 'name': 'ITP',\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00085',\n", - " 'name': 'D-Fructose 6-phosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00104',\n", - " 'name': 'IDP',\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O'},\n", - " {'id': 'C00111',\n", - " 'name': 'Glycerone phosphate',\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O'},\n", - " {'id': 'C00118',\n", - " 'name': 'D-Glyceraldehyde 3-phosphate',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00197',\n", - " 'name': '3-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00236',\n", - " 'name': '3-Phospho-D-glyceroyl phosphate',\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O'},\n", - " {'id': 'C00354',\n", - " 'name': 'D-Fructose 1,6-bisphosphate',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O'},\n", - " {'id': 'C00631',\n", - " 'name': '2-Phospho-D-glycerate',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': 'S1[Fe]S[Fe+]1'},\n", - " {'id': 'C00139',\n", - " 'name': 'Oxidized ferredoxin',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1'},\n", - " {'id': 'C02745',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'},\n", - " {'id': 'C02869',\n", - " 'name': None,\n", - " 'smiles': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))'},\n", - " {'id': 'C15972',\n", - " 'name': 'Enzyme N6-(lipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H]1CCSS1'},\n", - " {'id': 'C15973',\n", - " 'name': 'Enzyme N6-(dihydrolipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'C16255',\n", - " 'name': '[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O'},\n", - " {'id': 'C99999',\n", - " 'name': 'Polyphosphate fragment',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)O'}],\n", - " 'missing': {'missing_compounds': [],\n", - " 'missing_compound_ids': [],\n", - " 'reactions_involving_missing': []}},\n", - " 'M00307': {'module_id': 'M00307',\n", - " 'reactions': [{'id': 'R00014',\n", - " 'reaction': 'C00022 + C00068 => C05125 + C00011',\n", - " 'rule': None,\n", - " 'smiles': 'CC(=O)C(=O)O.Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1>>Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1.O=C=O'},\n", - " {'id': 'R00209',\n", - " 'reaction': 'C00022 + C00010 + C00003 => C00024 + C00011 + C00004 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R00210',\n", - " 'reaction': 'C00022 + C00010 + C00006 => C00024 + C00011 + C00005 + C00080',\n", - " 'rule': None,\n", - " 'smiles': 'CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R01196',\n", - " 'reaction': '2 C00139 + C00022 + C00010 => 2 C00138 + C00024 + C00011 + 2 C00080',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[C:53]([OH:52])=[O:54].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[S:57]1[Fe+:58][S:59][Fe+:60]1.[S:61]1[Fe+:62][S:63][Fe+:64]1>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[O:52]=[C:53]=[O:54].[H+:55].[H+:56].[S:57]1[Fe:58][S:59][Fe+:60]1.[S:61]1[Fe:62][S:63][Fe+:64]1',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1.S1[Fe+]S[Fe+]1.CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS>>S1[Fe]S[Fe+]1.S1[Fe]S[Fe+]1.CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.[H+].[H+]'},\n", - " {'id': 'R02569',\n", - " 'reaction': 'C00010 + C16255 => C00024 + C15973',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[S:4][CH2:62][CH2:61][C@@H:59]([CH2:58][CH2:57][CH2:56][CH2:55][C:53]([NH2:52])=[O:54])[SH:60].[CH2:5]([CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51])[SH:63]>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[NH2:52][C:53](=[O:54])[CH2:55][CH2:56][CH2:57][CH2:58][C@@H:59]([SH:60])[CH2:61][CH2:62][SH:63]',\n", - " 'smiles': 'CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)CCCC[C@@H](S)CCSC(C)=O>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'R03270',\n", - " 'reaction': 'C05125 + C15972 => C16255 + C00068',\n", - " 'rule': '[CH3:1][CH:2]([OH:3])[c:23]1[n+:22]([CH2:21][c:20]2[cH:19][n:18][c:17]([CH3:16])[n:41][c:39]2[NH2:40])[c:37]([CH3:38])[c:25]([CH2:26][CH2:27][O:28][P:29](=[O:30])([OH:31])[O:32][P:33](=[O:34])([OH:35])[OH:36])[s:24]1.[S:4]1[CH2:5][CH2:6][C@@H:7]([CH2:9][CH2:10][CH2:11][CH2:12][C:13]([NH2:14])=[O:15])[S:8]1>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][C@H:7]([SH:8])[CH2:9][CH2:10][CH2:11][CH2:12][C:13]([NH2:14])=[O:15].[CH3:16][c:17]1[n:18][cH:19][c:20]([CH2:21][n+:22]2[cH:23][s:24][c:25]([CH2:26][CH2:27][O:28][P:29](=[O:30])([OH:31])[O:32][P:33](=[O:34])([OH:35])[OH:36])[c:37]2[CH3:38])[c:39]([NH2:40])[n:41]1',\n", - " 'smiles': 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1.NC(=O)CCCC[C@@H]1CCSS1>>NC(=O)CCCC[C@@H](S)CCSC(C)=O.Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1'},\n", - " {'id': 'R07618',\n", - " 'reaction': 'C15973 + C00003 => C15972 + C00004 + C00080',\n", - " 'rule': '[NH2:45][C:46](=[O:47])[CH2:48][CH2:49][CH2:50][CH2:51][C@H:52]([CH2:53][CH2:54][SH:55])[SH:56].[NH2:1][C:2](=[O:3])[c:4]1[cH:5][n+:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[cH:42][cH:43][cH:44]1>>[NH2:1][C:2](=[O:3])[C:4]1=[CH:5][N:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[CH:42]=[CH:43][CH2:44]1.[NH2:45][C:46](=[O:47])[CH2:48][CH2:49][CH2:50][CH2:51][C@@H:52]1[CH2:53][CH2:54][S:55][S:56]1.[H+:57]',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>NC(=O)CCCC[C@@H]1CCSS1.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]'},\n", - " {'id': 'R10866',\n", - " 'reaction': 'C00022 + C00010 + C02869 => C00024 + C00011 + C02745',\n", - " 'rule': '[CH3:1][C:2](=[O:3])[C:84]([OH:83])=[O:85].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[CH3:52][c:53]1[cH:54][c:55]2[c:56]([cH:57][c:58]1[CH3:59])[n:60]([CH2:61][CH:62]([OH:63])[CH:64]([OH:65])[CH:66]([OH:67])[CH2:68][O:69][P:70](=[O:71])([O-:72])[O-:73])[c:74]1[n:75][c:76](=[O:77])[nH:78][c:79](=[O:80])[c:81]-1[n:82]2>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[CH3:52][c:53]1[cH:54][c:55]2[c:56]([cH:57][c:58]1[CH3:59])[N:60]([CH2:61][CH:62]([OH:63])[CH:64]([OH:65])[CH:66]([OH:67])[CH2:68][O:69][P:70](=[O:71])([O-:72])[O-:73])[c:74]1[nH:75][c:76](=[O:77])[nH:78][c:79](=[O:80])[c:81]1[NH:82]2.[O:83]=[C:84]=[O:85]',\n", - " 'smiles': 'CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'}],\n", - " 'molecules': [{'id': 'C00003',\n", - " 'name': 'NAD+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00004',\n", - " 'name': 'NADH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00005',\n", - " 'name': 'NADPH',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1'},\n", - " {'id': 'C00006',\n", - " 'name': 'NADP+',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1'},\n", - " {'id': 'C00010',\n", - " 'name': 'CoA',\n", - " 'smiles': 'CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS'},\n", - " {'id': 'C00011', 'name': 'CO2', 'smiles': 'O=C=O'},\n", - " {'id': 'C00022', 'name': 'Pyruvate', 'smiles': 'CC(=O)C(=O)O'},\n", - " {'id': 'C00024',\n", - " 'name': 'Acetyl-CoA',\n", - " 'smiles': 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O'},\n", - " {'id': 'C00068',\n", - " 'name': 'Thiamin diphosphate',\n", - " 'smiles': 'Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1'},\n", - " {'id': 'C00080', 'name': 'H+', 'smiles': '[H+]'},\n", - " {'id': 'C00138', 'name': 'Reduced ferredoxin', 'smiles': 'S1[Fe]S[Fe+]1'},\n", - " {'id': 'C00139',\n", - " 'name': 'Oxidized ferredoxin',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1'},\n", - " {'id': 'C02745',\n", - " 'name': 'Reduced flavodoxin',\n", - " 'smiles': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'},\n", - " {'id': 'C02869',\n", - " 'name': 'Oxidized flavodoxin',\n", - " 'smiles': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))'},\n", - " {'id': 'C05125',\n", - " 'name': '2-(alpha-Hydroxyethyl)thiamine diphosphate',\n", - " 'smiles': 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1'},\n", - " {'id': 'C15972',\n", - " 'name': 'Enzyme N6-(lipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H]1CCSS1'},\n", - " {'id': 'C15973',\n", - " 'name': 'Enzyme N6-(dihydrolipoyl)lysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS'},\n", - " {'id': 'C16255',\n", - " 'name': '[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O'},\n", - " {'id': 'C99999',\n", - " 'name': 'Polyphosphate fragment',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)O'}],\n", - " 'missing': {'missing_compounds': [],\n", - " 'missing_compound_ids': [],\n", - " 'reactions_involving_missing': []}}},\n", - " 'missing': {'missing_compound_ids': [], 'reactions_involving_missing': []}}" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "imputed_pathway" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "731cabeb", - "metadata": {}, - "outputs": [], - "source": [ - "imputed_pathway = pathway_data" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "9547de51", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AbstractReactionNetwork(molecule_pool=['Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O', 'C=C(OP(=O)(O)O)C(=O)O', 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O', 'CC(=O)C(=O)O', 'O=C(O)[C@@H](CO)OP(=O)(O)O', 'O', 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O', 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O', 'O=C(CO)COP(=O)(O)O', 'O=C[C@H](O)COP(=O)(O)O', 'O=P(O)(O)O', 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1', 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O', 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1', '[H+]', 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1', 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1', 'O=C(O)[C@H](O)COP(=O)(O)O', 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O', 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O', 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O', 'O=P(O)(O)OP(=O)(O)O', 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O', 'S1[Fe+]S[Fe+]1', 'S1[Fe]S[Fe+]1', 'O=C(O)CC(=O)C(=O)O', 'O=C=O', 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1', 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1', 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O', 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O', 'Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1', 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1', 'CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS', 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O', 'NC(=O)CCCC[C@@H](S)CCSC(C)=O', 'NC(=O)CCCC[C@@H](S)CCS', 'NC(=O)CCCC[C@@H]1CCSS1', 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))', 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'], reactions=['A+B>>C+D', 'E>>B+F', 'C+G>>A+H', 'I>>J', 'J+K+L>>M+N+O', 'J+K+P>>M+Q+O', 'H>>I+J', 'A+M>>C+R', 'R>>E', 'C+S>>A+T', 'U+S>>V+T', 'A+G>>W+H', 'J+F+X+X>>R+O+O+Y+Y', 'S+A>>T+W', 'T>>G', 'C+Z>>A+B+AA', 'AB+Z>>AC+B+AA', 'B+F>>E', 'AD+Z>>AE+B+AA', 'H+F>>G+K', 'J>>I', 'M+N+O>>J+K+L', 'M+Q+O>>J+K+P', 'I+J>>H', 'C+R>>A+M', 'E>>R', 'D+AF>>AG+AA', 'D+AH+L>>AI+AA+N+O', 'D+AH+P>>AI+AA+Q+O', 'X+X+D+AH>>Y+Y+AI+AA+O+O', 'AH+AJ>>AI+AK', 'AG+AL>>AJ+AF', 'AK+L>>AL+N+O', 'D+AH+AM>>AI+AA+AN'], templates={'M00001:R02189': '[O:1]=[P:2]([OH:3])([OH:4])[O:20][P:18](=[O:17])([OH:19])[O:21][P:22](=[O:23])([OH:24])[OH:25].[OH:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16]>>[O:1]=[P:2]([OH:3])([OH:4])[O:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16].[O:17]=[P:18]([OH:19])([OH:20])[O:21][P:22](=[O:23])([OH:24])[OH:25]', 'M00001:R07159': '[OH2:3].[O:1]=[CH:2][C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[S:14]1[Fe+:15][S:16][Fe+:17]1.[S:18]1[Fe+:19][S:20][Fe+:21]1>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[H+:12].[H+:13].[S:14]1[Fe:15][S:16][Fe+:17]1.[S:18]1[Fe:19][S:20][Fe+:21]1', 'M00002:R07159': '[OH2:3].[O:1]=[CH:2][C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[S:14]1[Fe+:15][S:16][Fe+:17]1.[S:18]1[Fe+:19][S:20][Fe+:21]1>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[H+:12].[H+:13].[S:14]1[Fe:15][S:16][Fe+:17]1.[S:18]1[Fe:19][S:20][Fe+:21]1', 'M00307:R01196': '[CH3:1][C:2](=[O:3])[C:53]([OH:52])=[O:54].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[S:57]1[Fe+:58][S:59][Fe+:60]1.[S:61]1[Fe+:62][S:63][Fe+:64]1>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[O:52]=[C:53]=[O:54].[H+:55].[H+:56].[S:57]1[Fe:58][S:59][Fe+:60]1.[S:61]1[Fe:62][S:63][Fe+:64]1', 'M00307:R02569': '[CH3:1][C:2](=[O:3])[S:4][CH2:62][CH2:61][C@@H:59]([CH2:58][CH2:57][CH2:56][CH2:55][C:53]([NH2:52])=[O:54])[SH:60].[CH2:5]([CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51])[SH:63]>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[NH2:52][C:53](=[O:54])[CH2:55][CH2:56][CH2:57][CH2:58][C@@H:59]([SH:60])[CH2:61][CH2:62][SH:63]', 'M00307:R03270': '[CH3:1][CH:2]([OH:3])[c:23]1[n+:22]([CH2:21][c:20]2[cH:19][n:18][c:17]([CH3:16])[n:41][c:39]2[NH2:40])[c:37]([CH3:38])[c:25]([CH2:26][CH2:27][O:28][P:29](=[O:30])([OH:31])[O:32][P:33](=[O:34])([OH:35])[OH:36])[s:24]1.[S:4]1[CH2:5][CH2:6][C@@H:7]([CH2:9][CH2:10][CH2:11][CH2:12][C:13]([NH2:14])=[O:15])[S:8]1>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][C@H:7]([SH:8])[CH2:9][CH2:10][CH2:11][CH2:12][C:13]([NH2:14])=[O:15].[CH3:16][c:17]1[n:18][cH:19][c:20]([CH2:21][n+:22]2[cH:23][s:24][c:25]([CH2:26][CH2:27][O:28][P:29](=[O:30])([OH:31])[O:32][P:33](=[O:34])([OH:35])[OH:36])[c:37]2[CH3:38])[c:39]([NH2:40])[n:41]1', 'M00307:R07618': '[NH2:45][C:46](=[O:47])[CH2:48][CH2:49][CH2:50][CH2:51][C@H:52]([CH2:53][CH2:54][SH:55])[SH:56].[NH2:1][C:2](=[O:3])[c:4]1[cH:5][n+:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[cH:42][cH:43][cH:44]1>>[NH2:1][C:2](=[O:3])[C:4]1=[CH:5][N:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[CH:42]=[CH:43][CH2:44]1.[NH2:45][C:46](=[O:47])[CH2:48][CH2:49][CH2:50][CH2:51][C@@H:52]1[CH2:53][CH2:54][S:55][S:56]1.[H+:57]', 'M00307:R10866': '[CH3:1][C:2](=[O:3])[C:84]([OH:83])=[O:85].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[CH3:52][c:53]1[cH:54][c:55]2[c:56]([cH:57][c:58]1[CH3:59])[n:60]([CH2:61][CH:62]([OH:63])[CH:64]([OH:65])[CH:66]([OH:67])[CH2:68][O:69][P:70](=[O:71])([O-:72])[O-:73])[c:74]1[n:75][c:76](=[O:77])[nH:78][c:79](=[O:80])[c:81]-1[n:82]2>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[CH3:52][c:53]1[cH:54][c:55]2[c:56]([cH:57][c:58]1[CH3:59])[N:60]([CH2:61][CH:62]([OH:63])[CH:64]([OH:65])[CH:66]([OH:67])[CH2:68][O:69][P:70](=[O:71])([O-:72])[O-:73])[c:74]1[nH:75][c:76](=[O:77])[nH:78][c:79](=[O:80])[c:81]1[NH:82]2.[O:83]=[C:84]=[O:85]'}, label_to_molecule={'A': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O', 'B': 'C=C(OP(=O)(O)O)C(=O)O', 'C': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O', 'D': 'CC(=O)C(=O)O', 'E': 'O=C(O)[C@@H](CO)OP(=O)(O)O', 'F': 'O', 'G': 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O', 'H': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O', 'I': 'O=C(CO)COP(=O)(O)O', 'J': 'O=C[C@H](O)COP(=O)(O)O', 'K': 'O=P(O)(O)O', 'L': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1', 'M': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O', 'N': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1', 'O': '[H+]', 'P': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1', 'Q': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1', 'R': 'O=C(O)[C@H](O)COP(=O)(O)O', 'S': 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O', 'T': 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O', 'U': 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O', 'V': 'O=P(O)(O)OP(=O)(O)O', 'W': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O', 'X': 'S1[Fe+]S[Fe+]1', 'Y': 'S1[Fe]S[Fe+]1', 'Z': 'O=C(O)CC(=O)C(=O)O', 'AA': 'O=C=O', 'AB': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1', 'AC': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1', 'AD': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O', 'AE': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O', 'AF': 'Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1', 'AG': 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1', 'AH': 'CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS', 'AI': 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O', 'AJ': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O', 'AK': 'NC(=O)CCCC[C@@H](S)CCS', 'AL': 'NC(=O)CCCC[C@@H]1CCSS1', 'AM': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))', 'AN': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'})" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from synkit.CRN.Construct.abstract import AbstractReactionExtractor\n", - "\n", - "abs_network = AbstractReactionExtractor().build(\n", - " data=imputed_pathway,\n", - " drop_missing_smiles_reactions=True,\n", - " deduplicate=True,\n", - " order=\"appearance\",\n", - " reactant_join=\"+\",\n", - " product_join=\"+\",\n", - " prefix_module_in_reaction_id=True,\n", - " reaction_id_keys=[\"id\"],\n", - " reaction_smiles_keys=[\"smiles\"],\n", - " template_keys=[\"rule\"],\n", - " save_as=None\n", - ")\n", - "abs_network" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "cc6565fc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'molecule_pool': ['Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'C=C(OP(=O)(O)O)C(=O)O',\n", - " 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'CC(=O)C(=O)O',\n", - " 'O=C(O)[C@@H](CO)OP(=O)(O)O',\n", - " 'O',\n", - " 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O',\n", - " 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O',\n", - " 'O=C(CO)COP(=O)(O)O',\n", - " 'O=C[C@H](O)COP(=O)(O)O',\n", - " 'O=P(O)(O)O',\n", - " 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1',\n", - " 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O',\n", - " 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1',\n", - " '[H+]',\n", - " 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1',\n", - " 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1',\n", - " 'O=C(O)[C@H](O)COP(=O)(O)O',\n", - " 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O',\n", - " 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O',\n", - " 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O',\n", - " 'O=P(O)(O)OP(=O)(O)O',\n", - " 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'S1[Fe+]S[Fe+]1',\n", - " 'S1[Fe]S[Fe+]1',\n", - " 'O=C(O)CC(=O)C(=O)O',\n", - " 'O=C=O',\n", - " 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1',\n", - " 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1',\n", - " 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1',\n", - " 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1',\n", - " 'CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS',\n", - " 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O',\n", - " 'NC(=O)CCCC[C@@H](S)CCSC(C)=O',\n", - " 'NC(=O)CCCC[C@@H](S)CCS',\n", - " 'NC(=O)CCCC[C@@H]1CCSS1',\n", - " 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))',\n", - " 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'],\n", - " 'reactions': ['A+B>>C+D',\n", - " 'E>>B+F',\n", - " 'C+G>>A+H',\n", - " 'I>>J',\n", - " 'J+K+L>>M+N+O',\n", - " 'J+K+P>>M+Q+O',\n", - " 'H>>I+J',\n", - " 'A+M>>C+R',\n", - " 'R>>E',\n", - " 'C+S>>A+T',\n", - " 'U+S>>V+T',\n", - " 'A+G>>W+H',\n", - " 'J+F+X+X>>R+O+O+Y+Y',\n", - " 'S+A>>T+W',\n", - " 'T>>G',\n", - " 'C+Z>>A+B+AA',\n", - " 'AB+Z>>AC+B+AA',\n", - " 'B+F>>E',\n", - " 'AD+Z>>AE+B+AA',\n", - " 'H+F>>G+K',\n", - " 'J>>I',\n", - " 'M+N+O>>J+K+L',\n", - " 'M+Q+O>>J+K+P',\n", - " 'I+J>>H',\n", - " 'C+R>>A+M',\n", - " 'E>>R',\n", - " 'D+AF>>AG+AA',\n", - " 'D+AH+L>>AI+AA+N+O',\n", - " 'D+AH+P>>AI+AA+Q+O',\n", - " 'X+X+D+AH>>Y+Y+AI+AA+O+O',\n", - " 'AH+AJ>>AI+AK',\n", - " 'AG+AL>>AJ+AF',\n", - " 'AK+L>>AL+N+O',\n", - " 'D+AH+AM>>AI+AA+AN'],\n", - " 'templates': {'M00001:R02189': '[O:1]=[P:2]([OH:3])([OH:4])[O:20][P:18](=[O:17])([OH:19])[O:21][P:22](=[O:23])([OH:24])[OH:25].[OH:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16]>>[O:1]=[P:2]([OH:3])([OH:4])[O:5][CH2:6][C@H:7]1[O:8][C@H:9]([OH:10])[C@H:11]([OH:12])[C@@H:13]([OH:14])[C@@H:15]1[OH:16].[O:17]=[P:18]([OH:19])([OH:20])[O:21][P:22](=[O:23])([OH:24])[OH:25]',\n", - " 'M00001:R07159': '[OH2:3].[O:1]=[CH:2][C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[S:14]1[Fe+:15][S:16][Fe+:17]1.[S:18]1[Fe+:19][S:20][Fe+:21]1>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[H+:12].[H+:13].[S:14]1[Fe:15][S:16][Fe+:17]1.[S:18]1[Fe:19][S:20][Fe+:21]1',\n", - " 'M00002:R07159': '[OH2:3].[O:1]=[CH:2][C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[S:14]1[Fe+:15][S:16][Fe+:17]1.[S:18]1[Fe+:19][S:20][Fe+:21]1>>[O:1]=[C:2]([OH:3])[C@H:4]([OH:5])[CH2:6][O:7][P:8](=[O:9])([OH:10])[OH:11].[H+:12].[H+:13].[S:14]1[Fe:15][S:16][Fe+:17]1.[S:18]1[Fe:19][S:20][Fe+:21]1',\n", - " 'M00307:R01196': '[CH3:1][C:2](=[O:3])[C:53]([OH:52])=[O:54].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[S:57]1[Fe+:58][S:59][Fe+:60]1.[S:61]1[Fe+:62][S:63][Fe+:64]1>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[O:52]=[C:53]=[O:54].[H+:55].[H+:56].[S:57]1[Fe:58][S:59][Fe+:60]1.[S:61]1[Fe:62][S:63][Fe+:64]1',\n", - " 'M00307:R02569': '[CH3:1][C:2](=[O:3])[S:4][CH2:62][CH2:61][C@@H:59]([CH2:58][CH2:57][CH2:56][CH2:55][C:53]([NH2:52])=[O:54])[SH:60].[CH2:5]([CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51])[SH:63]>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[NH2:52][C:53](=[O:54])[CH2:55][CH2:56][CH2:57][CH2:58][C@@H:59]([SH:60])[CH2:61][CH2:62][SH:63]',\n", - " 'M00307:R03270': '[CH3:1][CH:2]([OH:3])[c:23]1[n+:22]([CH2:21][c:20]2[cH:19][n:18][c:17]([CH3:16])[n:41][c:39]2[NH2:40])[c:37]([CH3:38])[c:25]([CH2:26][CH2:27][O:28][P:29](=[O:30])([OH:31])[O:32][P:33](=[O:34])([OH:35])[OH:36])[s:24]1.[S:4]1[CH2:5][CH2:6][C@@H:7]([CH2:9][CH2:10][CH2:11][CH2:12][C:13]([NH2:14])=[O:15])[S:8]1>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][C@H:7]([SH:8])[CH2:9][CH2:10][CH2:11][CH2:12][C:13]([NH2:14])=[O:15].[CH3:16][c:17]1[n:18][cH:19][c:20]([CH2:21][n+:22]2[cH:23][s:24][c:25]([CH2:26][CH2:27][O:28][P:29](=[O:30])([OH:31])[O:32][P:33](=[O:34])([OH:35])[OH:36])[c:37]2[CH3:38])[c:39]([NH2:40])[n:41]1',\n", - " 'M00307:R07618': '[NH2:45][C:46](=[O:47])[CH2:48][CH2:49][CH2:50][CH2:51][C@H:52]([CH2:53][CH2:54][SH:55])[SH:56].[NH2:1][C:2](=[O:3])[c:4]1[cH:5][n+:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[cH:42][cH:43][cH:44]1>>[NH2:1][C:2](=[O:3])[C:4]1=[CH:5][N:6]([C@@H:7]2[O:8][C@H:9]([CH2:10][O:11][P:12](=[O:13])([OH:14])[O:15][P:16](=[O:17])([OH:18])[O:19][CH2:20][C@H:21]3[O:22][C@@H:23]([n:24]4[cH:25][n:26][c:27]5[c:28]([NH2:29])[n:30][cH:31][n:32][c:33]45)[C@H:34]([OH:35])[C@@H:36]3[OH:37])[C@@H:38]([OH:39])[C@H:40]2[OH:41])[CH:42]=[CH:43][CH2:44]1.[NH2:45][C:46](=[O:47])[CH2:48][CH2:49][CH2:50][CH2:51][C@@H:52]1[CH2:53][CH2:54][S:55][S:56]1.[H+:57]',\n", - " 'M00307:R10866': '[CH3:1][C:2](=[O:3])[C:84]([OH:83])=[O:85].[SH:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[CH3:52][c:53]1[cH:54][c:55]2[c:56]([cH:57][c:58]1[CH3:59])[n:60]([CH2:61][CH:62]([OH:63])[CH:64]([OH:65])[CH:66]([OH:67])[CH2:68][O:69][P:70](=[O:71])([O-:72])[O-:73])[c:74]1[n:75][c:76](=[O:77])[nH:78][c:79](=[O:80])[c:81]-1[n:82]2>>[CH3:1][C:2](=[O:3])[S:4][CH2:5][CH2:6][NH:7][C:8](=[O:9])[CH2:10][CH2:11][NH:12][C:13](=[O:14])[C@H:15]([OH:16])[C:17]([CH3:18])([CH3:19])[CH2:20][O:21][P:22](=[O:23])([OH:24])[O:25][P:26](=[O:27])([OH:28])[O:29][CH2:30][C@H:31]1[O:32][C@@H:33]([n:34]2[cH:35][n:36][c:37]3[c:38]([NH2:39])[n:40][cH:41][n:42][c:43]23)[C@H:44]([OH:45])[C@@H:46]1[O:47][P:48](=[O:49])([OH:50])[OH:51].[CH3:52][c:53]1[cH:54][c:55]2[c:56]([cH:57][c:58]1[CH3:59])[N:60]([CH2:61][CH:62]([OH:63])[CH:64]([OH:65])[CH:66]([OH:67])[CH2:68][O:69][P:70](=[O:71])([O-:72])[O-:73])[c:74]1[nH:75][c:76](=[O:77])[nH:78][c:79](=[O:80])[c:81]1[NH:82]2.[O:83]=[C:84]=[O:85]'},\n", - " 'label_to_molecule': {'A': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'B': 'C=C(OP(=O)(O)O)C(=O)O',\n", - " 'C': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'D': 'CC(=O)C(=O)O',\n", - " 'E': 'O=C(O)[C@@H](CO)OP(=O)(O)O',\n", - " 'F': 'O',\n", - " 'G': 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O',\n", - " 'H': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O',\n", - " 'I': 'O=C(CO)COP(=O)(O)O',\n", - " 'J': 'O=C[C@H](O)COP(=O)(O)O',\n", - " 'K': 'O=P(O)(O)O',\n", - " 'L': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1',\n", - " 'M': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O',\n", - " 'N': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1',\n", - " 'O': '[H+]',\n", - " 'P': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1',\n", - " 'Q': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1',\n", - " 'R': 'O=C(O)[C@H](O)COP(=O)(O)O',\n", - " 'S': 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O',\n", - " 'T': 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O',\n", - " 'U': 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O',\n", - " 'V': 'O=P(O)(O)OP(=O)(O)O',\n", - " 'W': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'X': 'S1[Fe+]S[Fe+]1',\n", - " 'Y': 'S1[Fe]S[Fe+]1',\n", - " 'Z': 'O=C(O)CC(=O)C(=O)O',\n", - " 'AA': 'O=C=O',\n", - " 'AB': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1',\n", - " 'AC': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1',\n", - " 'AD': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'AE': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'AF': 'Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1',\n", - " 'AG': 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1',\n", - " 'AH': 'CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS',\n", - " 'AI': 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O',\n", - " 'AJ': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O',\n", - " 'AK': 'NC(=O)CCCC[C@@H](S)CCS',\n", - " 'AL': 'NC(=O)CCCC[C@@H]1CCSS1',\n", - " 'AM': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))',\n", - " 'AN': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))'}}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "abs_network_dict = abs_network.to_dict()\n", - "abs_network_dict" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "1bdbbc3d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "40" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(abs_network_dict['molecule_pool'])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "d5cdb557", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['A+B>>C+D',\n", - " 'E>>B+F',\n", - " 'C+G>>A+H',\n", - " 'I>>J',\n", - " 'J+K+L>>M+N+O',\n", - " 'J+K+P>>M+Q+O',\n", - " 'H>>I+J',\n", - " 'A+M>>C+R',\n", - " 'R>>E',\n", - " 'C+S>>A+T',\n", - " 'U+S>>V+T',\n", - " 'A+G>>W+H',\n", - " 'J+F+X+X>>R+O+O+Y+Y',\n", - " 'S+A>>T+W',\n", - " 'T>>G',\n", - " 'C+Z>>A+B+AA',\n", - " 'AB+Z>>AC+B+AA',\n", - " 'B+F>>E',\n", - " 'AD+Z>>AE+B+AA',\n", - " 'H+F>>G+K',\n", - " 'J>>I',\n", - " 'M+N+O>>J+K+L',\n", - " 'M+Q+O>>J+K+P',\n", - " 'I+J>>H',\n", - " 'C+R>>A+M',\n", - " 'E>>R',\n", - " 'D+AF>>AG+AA',\n", - " 'D+AH+L>>AI+AA+N+O',\n", - " 'D+AH+P>>AI+AA+Q+O',\n", - " 'X+X+D+AH>>Y+Y+AI+AA+O+O',\n", - " 'AH+AJ>>AI+AK',\n", - " 'AG+AL>>AJ+AF',\n", - " 'AK+L>>AL+N+O',\n", - " 'D+AH+AM>>AI+AA+AN']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "abs_network_dict['reactions']" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "526599d4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "34" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(abs_network_dict['reactions'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a33b5678", - "metadata": {}, - "outputs": [], - "source": [ - "map = {\"r1\": \"A+B>>C+D\",\n", - " \"r2\": \"E>>D+F\",\n", - " \"r3\": \"A+G>>C+H\",\n", - " \"r4\": \"I>>J\",\n", - " \"r5\": \"I+K+L>>M+N+O\",\n", - " \"r27\": \"I+K+P>>M+Q+O\",\n", - " \"r7\": \"H>>J+I\",\n", - " \"r8\": \"A+R>>C+M\",\n", - " \"r9\": \"E>>R\",\n", - " \"r10\": \"A+S>>C+T\",\n", - " \"r11\": \"U+S>>V+T\",\n", - " \"r12\": \"C+G>>W+H\",\n", - " \"r13\": \"I+F+X+X>>R+O+O+Y+Y\",\n", - " \"r14\": \"S+C>>T+W\",\n", - " \"r15\": \"T>>G\",\n", - " \"r16\": \"A+Z>>C+D+AA\",\n", - " \"r17\": \"AB+Z>>AC+D+AA\",\n", - " \"r18\": \"AD+Z>>AE+D+AA\",\n", - " \"r19\": \"H+F>>G+K\",\n", - " \"r20\": \"B+AF>>AG+AA\",\n", - " \"r21\": \"B+AH+L>>AI+AA+N+O\",\n", - " \"r6\": \"B+AH+P>>AI+AA+Q+O\",/\n", - " \"r22\": \"X+X+B+AH>>Y+Y+AI+AA+O+O\",\n", - " \"r23\": \"AI+AJ>>AH+AK\",\n", - " \"r24\": \"AG+AL>>AK+AF\",\n", - " \"r25\": \"AJ+L>>AL+N+O\",\n", - " \"r26\": \"B+AH+AM>>AI+AA+AN\"\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "bd06a176", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.CC(=O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O\n", - "O=C(O)[C@@H](CO)OP(=O)(O)O>>C=C(OP(=O)(O)O)C(=O)O.O\n", - "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O\n", - "O=C[C@H](O)COP(=O)(O)O>>O=C(CO)COP(=O)(O)O\n", - "O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]\n", - "O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]\n", - "O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O>>O=C(CO)COP(=O)(O)O.O=C[C@H](O)COP(=O)(O)O\n", - "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O\n", - "O=C(O)[C@@H](CO)OP(=O)(O)O>>O=C(O)[C@H](O)COP(=O)(O)O\n", - "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O\n", - "O=P(O)(O)OP(=O)(O)OP(=O)(O)O.OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>O=P(O)(O)OP(=O)(O)O.O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O\n", - "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O.O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O\n", - "O=C[C@H](O)COP(=O)(O)O.O.S1[Fe+]S[Fe+]1.S1[Fe+]S[Fe+]1>>O=C(O)[C@H](O)COP(=O)(O)O.[H+].[H+].S1[Fe]S[Fe+]1.S1[Fe]S[Fe+]1\n", - "OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O.Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O>>O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O.Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O\n", - "O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O>>O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O\n", - "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.CC(=O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O\n", - "O=C(O)[C@@H](CO)OP(=O)(O)O>>C=C(OP(=O)(O)O)C(=O)O.O\n", - "O=C[C@H](O)COP(=O)(O)O>>O=C(CO)COP(=O)(O)O\n", - "O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]\n", - "O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]\n", - "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O\n", - "O=C(O)[C@@H](CO)OP(=O)(O)O>>O=C(O)[C@H](O)COP(=O)(O)O\n", - "O=C[C@H](O)COP(=O)(O)O.O.S1[Fe+]S[Fe+]1.S1[Fe+]S[Fe+]1>>O=C(O)[C@H](O)COP(=O)(O)O.[H+].[H+].S1[Fe]S[Fe+]1.S1[Fe]S[Fe+]1\n", - "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)CC(=O)C(=O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O.O=C=O\n", - "Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1.O=C(O)CC(=O)C(=O)O>>Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1.C=C(OP(=O)(O)O)C(=O)O.O=C=O\n", - "O=C(O)[C@@H](CO)OP(=O)(O)O>>C=C(OP(=O)(O)O)C(=O)O.O\n", - "O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)CC(=O)C(=O)O>>O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.C=C(OP(=O)(O)O)C(=O)O.O=C=O\n", - "O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O.O>>O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O.O=P(O)(O)O\n", - "O=C[C@H](O)COP(=O)(O)O>>O=C(CO)COP(=O)(O)O\n", - "O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]\n", - "O=C[C@H](O)COP(=O)(O)O.O=P(O)(O)O.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]\n", - "O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O>>O=C(CO)COP(=O)(O)O.O=C[C@H](O)COP(=O)(O)O\n", - "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(O)[C@H](O)COP(=O)(O)O>>Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O.O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O\n", - "O=C(O)[C@@H](CO)OP(=O)(O)O>>O=C(O)[C@H](O)COP(=O)(O)O\n", - "CC(=O)C(=O)O.Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1>>Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1.O=C=O\n", - "CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]\n", - "CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]\n", - "S1[Fe+]S[Fe+]1.S1[Fe+]S[Fe+]1.CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS>>S1[Fe]S[Fe+]1.S1[Fe]S[Fe+]1.CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.[H+].[H+]\n", - "CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.NC(=O)CCCC[C@@H](S)CCS>>CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.NC(=O)CCCC[C@@H](S)CCSC(C)=O\n", - "Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1.NC(=O)CCCC[C@@H]1CCSS1>>NC(=O)CCCC[C@@H](S)CCSC(C)=O.Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1\n", - "NC(=O)CCCC[C@@H](S)CCS.NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1>>NC(=O)CCCC[C@@H]1CCSS1.NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1.[H+]\n", - "CC(=O)C(=O)O.CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS.CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))>>CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O.O=C=O.CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))\n" - ] - } - ], - "source": [ - "reactions = [\n", - " rxn[\"smiles\"]\n", - " for module in pathway_data[\"by_module\"].values()\n", - " for rxn in module.get(\"reactions\", [])\n", - " if \"reaction\" in rxn\n", - "]\n", - "\n", - "for r in reactions:\n", - " print(r)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "a80f4435", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " A | C00008 | ADP | Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O\n", - " B | C00074 | Phosphoenolpyruvate | C=C(OP(=O)(O)O)C(=O)O\n", - " C | C00002 | ATP | Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O\n", - " D | C00022 | Pyruvate | CC(=O)C(=O)O\n", - " E | C00631 | 2-Phospho-D-glycerate | O=C(O)[C@@H](CO)OP(=O)(O)O\n", - " F | C00001 | H2O | O\n", - " G | C00085 | D-Fructose 6-phosphate | O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O\n", - " H | C00354 | D-Fructose 1,6-bisphosphate | O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O\n", - " I | C00111 | Glycerone phosphate | O=C(CO)COP(=O)(O)O\n", - " J | C00118 | D-Glyceraldehyde 3-phosphate | O=C[C@H](O)COP(=O)(O)O\n", - " K | C00009 | Orthophosphate | O=P(O)(O)O\n", - " L | C00003 | NAD+ | NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1\n", - " M | C00236 | 3-Phospho-D-glyceroyl phosphate | O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O\n", - " N | C00004 | NADH | NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1\n", - " O | C00080 | H+ | [H+]\n", - " P | C00006 | NADP+ | NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1\n", - " Q | C00005 | NADPH | NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1\n", - " R | C00197 | 3-Phospho-D-glycerate | O=C(O)[C@H](O)COP(=O)(O)O\n", - " S | C00267 | alpha-D-Glucose | OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O\n", - " T | C00668 | alpha-D-Glucose 6-phosphate | O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O\n", - " U | C00404 | Polyphosphate | O=P(O)(O)OP(=O)(O)OP(=O)(O)O\n", - " V | C99999 | Polyphosphate fragment | O=P(O)(O)OP(=O)(O)O\n", - " W | C00020 | AMP | Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O\n", - " X | C00139 | Oxidized ferredoxin | S1[Fe+]S[Fe+]1\n", - " Y | C00138 | Reduced ferredoxin | S1[Fe]S[Fe+]1\n", - " Z | C00036 | Oxaloacetate | O=C(O)CC(=O)C(=O)O\n", - " AA | C00011 | CO2 | O=C=O\n", - " AB | C00044 | GTP | Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1\n", - " AC | C00035 | GDP | Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1\n", - " AD | C00081 | ITP | O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O\n", - " AE | C00104 | IDP | O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O\n", - " AF | C00068 | Thiamin diphosphate | Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1\n", - " AG | C05125 | 2-(alpha-Hydroxyethyl)thiamine diphosphate | Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1\n", - " AH | C00010 | CoA | CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS\n", - " AI | C00024 | Acetyl-CoA | CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O\n", - " AJ | C16255 | [Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine | NC(=O)CCCC[C@@H](S)CCSC(C)=O\n", - " AK | C15973 | Enzyme N6-(dihydrolipoyl)lysine | NC(=O)CCCC[C@@H](S)CCS\n", - " AL | C15972 | Enzyme N6-(lipoyl)lysine | NC(=O)CCCC[C@@H]1CCSS1\n", - " AM | C02869 | Oxidized flavodoxin | CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))\n", - " AN | C02745 | Reduced flavodoxin | CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))\n", - "\n", - "Saved mapping to: Data/Study/CRN/case_glycolysis/label_to_name_smiles.json\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[16:04:47] WARNING: not removing hydrogen atom without neighbors\n", - "[16:04:47] WARNING: not removing hydrogen atom without neighbors\n", - "[16:04:47] WARNING: not removing hydrogen atom without neighbors\n", - "[16:04:47] WARNING: not removing hydrogen atom without neighbors\n", - "[16:04:47] WARNING: not removing hydrogen atom without neighbors\n" - ] - } - ], - "source": [ - "from __future__ import annotations\n", - "\n", - "import json\n", - "from collections import defaultdict\n", - "from pathlib import Path\n", - "\n", - "try:\n", - " from rdkit import Chem\n", - "except ImportError:\n", - " Chem = None\n", - "\n", - "\n", - "def canonicalize_smiles(smiles: str) -> str:\n", - " \"\"\"\n", - " Canonicalize a SMILES string with RDKit.\n", - " Falls back to the raw SMILES if RDKit is unavailable or parsing fails.\n", - " \"\"\"\n", - " smiles = smiles.strip()\n", - " if Chem is None:\n", - " return smiles\n", - "\n", - " mol = Chem.MolFromSmiles(smiles)\n", - " if mol is None:\n", - " return smiles\n", - "\n", - " return Chem.MolToSmiles(mol, canonical=True)\n", - "\n", - "\n", - "def collect_full_molecule_index(full_data: dict) -> dict[str, dict]:\n", - " \"\"\"\n", - " Build an index:\n", - " canonical_smiles -> {\n", - " \"ids\": set(...),\n", - " \"names\": set(...),\n", - " \"raw_smiles\": set(...)\n", - " }\n", - " \"\"\"\n", - " index = defaultdict(lambda: {\"ids\": set(), \"names\": set(), \"raw_smiles\": set()})\n", - "\n", - " for module_data in full_data.get(\"by_module\", {}).values():\n", - " for mol in module_data.get(\"molecules\", []):\n", - " smiles = mol.get(\"smiles\")\n", - " if not smiles:\n", - " continue\n", - "\n", - " key = canonicalize_smiles(smiles)\n", - " index[key][\"raw_smiles\"].add(smiles)\n", - "\n", - " mol_id = mol.get(\"id\")\n", - " if mol_id:\n", - " index[key][\"ids\"].add(mol_id)\n", - "\n", - " mol_name = mol.get(\"name\")\n", - " if mol_name:\n", - " index[key][\"names\"].add(mol_name)\n", - "\n", - " return index\n", - "\n", - "\n", - "def map_abstract_labels_to_full_molecules(abstract_data: dict, full_data: dict) -> dict[str, dict]:\n", - " \"\"\"\n", - " Map abstract labels A, B, C, ... to the corresponding full molecule info.\n", - " \"\"\"\n", - " index = collect_full_molecule_index(full_data)\n", - "\n", - " example = abstract_data[\"examples\"][0]\n", - " label_to_molecule = example[\"label_to_molecule\"]\n", - "\n", - " mapped = {}\n", - "\n", - " for label, smiles in label_to_molecule.items():\n", - " key = canonicalize_smiles(smiles)\n", - " hit = index.get(key)\n", - "\n", - " mapped[label] = {\n", - " \"label\": label,\n", - " \"smiles\": smiles,\n", - " \"ids\": sorted(hit[\"ids\"]) if hit else [],\n", - " \"names\": sorted(hit[\"names\"]) if hit else [],\n", - " \"found\": hit is not None,\n", - " }\n", - "\n", - " return mapped\n", - "\n", - "\n", - "\n", - "# Change these to your actual file names\n", - "abstract_path = Path(\"Data/Study/CRN/case_glycolysis/hsa00010_abstract.json\")\n", - "full_path = Path(\"Data/Study/CRN/case_glycolysis/hsa00010_imputed.json\")\n", - "\n", - "abstract_data = json.loads(abstract_path.read_text(encoding=\"utf-8\"))\n", - "full_data = json.loads(full_path.read_text(encoding=\"utf-8\"))\n", - "\n", - "mapping = map_abstract_labels_to_full_molecules(abstract_data, full_data)\n", - "\n", - "# Pretty print all mappings\n", - "for label in sorted(mapping, key=lambda x: (len(x), x)):\n", - " item = mapping[label]\n", - " names = \"; \".join(item[\"names\"]) if item[\"names\"] else \"UNKNOWN\"\n", - " ids = \", \".join(item[\"ids\"]) if item[\"ids\"] else \"UNKNOWN\"\n", - " print(f\"{label:>3} | {ids:20} | {names:60} | {item['smiles']}\")\n", - "\n", - "# Save as JSON\n", - "out_path = Path(\"Data/Study/CRN/case_glycolysis/label_to_name_smiles.json\")\n", - "out_path.write_text(json.dumps(mapping, indent=2, ensure_ascii=False), encoding=\"utf-8\")\n", - "\n", - "print(\"\\nSaved mapping to:\", out_path)\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "f42abbb9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'A': {'label': 'A',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'ids': ['C00008'],\n", - " 'names': ['ADP'],\n", - " 'found': True},\n", - " 'B': {'label': 'B',\n", - " 'smiles': 'C=C(OP(=O)(O)O)C(=O)O',\n", - " 'ids': ['C00074'],\n", - " 'names': ['Phosphoenolpyruvate'],\n", - " 'found': True},\n", - " 'C': {'label': 'C',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'ids': ['C00002'],\n", - " 'names': ['ATP'],\n", - " 'found': True},\n", - " 'D': {'label': 'D',\n", - " 'smiles': 'CC(=O)C(=O)O',\n", - " 'ids': ['C00022'],\n", - " 'names': ['Pyruvate'],\n", - " 'found': True},\n", - " 'E': {'label': 'E',\n", - " 'smiles': 'O=C(O)[C@@H](CO)OP(=O)(O)O',\n", - " 'ids': ['C00631'],\n", - " 'names': ['2-Phospho-D-glycerate'],\n", - " 'found': True},\n", - " 'F': {'label': 'F',\n", - " 'smiles': 'O',\n", - " 'ids': ['C00001'],\n", - " 'names': ['H2O'],\n", - " 'found': True},\n", - " 'G': {'label': 'G',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(CO)[C@@H](O)[C@@H]1O',\n", - " 'ids': ['C00085'],\n", - " 'names': ['D-Fructose 6-phosphate'],\n", - " 'found': True},\n", - " 'H': {'label': 'H',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1OC(O)(COP(=O)(O)O)[C@@H](O)[C@@H]1O',\n", - " 'ids': ['C00354'],\n", - " 'names': ['D-Fructose 1,6-bisphosphate'],\n", - " 'found': True},\n", - " 'I': {'label': 'I',\n", - " 'smiles': 'O=C(CO)COP(=O)(O)O',\n", - " 'ids': ['C00111'],\n", - " 'names': ['Glycerone phosphate'],\n", - " 'found': True},\n", - " 'J': {'label': 'J',\n", - " 'smiles': 'O=C[C@H](O)COP(=O)(O)O',\n", - " 'ids': ['C00118'],\n", - " 'names': ['D-Glyceraldehyde 3-phosphate'],\n", - " 'found': True},\n", - " 'K': {'label': 'K',\n", - " 'smiles': 'O=P(O)(O)O',\n", - " 'ids': ['C00009'],\n", - " 'names': ['Orthophosphate'],\n", - " 'found': True},\n", - " 'L': {'label': 'L',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1',\n", - " 'ids': ['C00003'],\n", - " 'names': ['NAD+'],\n", - " 'found': True},\n", - " 'M': {'label': 'M',\n", - " 'smiles': 'O=C(OP(=O)(O)O)[C@H](O)COP(=O)(O)O',\n", - " 'ids': ['C00236'],\n", - " 'names': ['3-Phospho-D-glyceroyl phosphate'],\n", - " 'found': True},\n", - " 'N': {'label': 'N',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1',\n", - " 'ids': ['C00004'],\n", - " 'names': ['NADH'],\n", - " 'found': True},\n", - " 'O': {'label': 'O',\n", - " 'smiles': '[H+]',\n", - " 'ids': ['C00080'],\n", - " 'names': ['H+'],\n", - " 'found': True},\n", - " 'P': {'label': 'P',\n", - " 'smiles': 'NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)c1',\n", - " 'ids': ['C00006'],\n", - " 'names': ['NADP+'],\n", - " 'found': True},\n", - " 'Q': {'label': 'Q',\n", - " 'smiles': 'NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)(O)O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1',\n", - " 'ids': ['C00005'],\n", - " 'names': ['NADPH'],\n", - " 'found': True},\n", - " 'R': {'label': 'R',\n", - " 'smiles': 'O=C(O)[C@H](O)COP(=O)(O)O',\n", - " 'ids': ['C00197'],\n", - " 'names': ['3-Phospho-D-glycerate'],\n", - " 'found': True},\n", - " 'S': {'label': 'S',\n", - " 'smiles': 'OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O',\n", - " 'ids': ['C00267'],\n", - " 'names': ['alpha-D-Glucose'],\n", - " 'found': True},\n", - " 'T': {'label': 'T',\n", - " 'smiles': 'O=P(O)(O)OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O',\n", - " 'ids': ['C00668'],\n", - " 'names': ['alpha-D-Glucose 6-phosphate'],\n", - " 'found': True},\n", - " 'U': {'label': 'U',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)OP(=O)(O)O',\n", - " 'ids': ['C00404'],\n", - " 'names': ['Polyphosphate'],\n", - " 'found': True},\n", - " 'V': {'label': 'V',\n", - " 'smiles': 'O=P(O)(O)OP(=O)(O)O',\n", - " 'ids': ['C99999'],\n", - " 'names': ['Polyphosphate fragment'],\n", - " 'found': True},\n", - " 'W': {'label': 'W',\n", - " 'smiles': 'Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'ids': ['C00020'],\n", - " 'names': ['AMP'],\n", - " 'found': True},\n", - " 'X': {'label': 'X',\n", - " 'smiles': 'S1[Fe+]S[Fe+]1',\n", - " 'ids': ['C00139'],\n", - " 'names': ['Oxidized ferredoxin'],\n", - " 'found': True},\n", - " 'Y': {'label': 'Y',\n", - " 'smiles': 'S1[Fe]S[Fe+]1',\n", - " 'ids': ['C00138'],\n", - " 'names': ['Reduced ferredoxin'],\n", - " 'found': True},\n", - " 'Z': {'label': 'Z',\n", - " 'smiles': 'O=C(O)CC(=O)C(=O)O',\n", - " 'ids': ['C00036'],\n", - " 'names': ['Oxaloacetate'],\n", - " 'found': True},\n", - " 'AA': {'label': 'AA',\n", - " 'smiles': 'O=C=O',\n", - " 'ids': ['C00011'],\n", - " 'names': ['CO2'],\n", - " 'found': True},\n", - " 'AB': {'label': 'AB',\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1',\n", - " 'ids': ['C00044'],\n", - " 'names': ['GTP'],\n", - " 'found': True},\n", - " 'AC': {'label': 'AC',\n", - " 'smiles': 'Nc1nc2c(ncn2[C@@H]2O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]2O)c(=O)[nH]1',\n", - " 'ids': ['C00035'],\n", - " 'names': ['GDP'],\n", - " 'found': True},\n", - " 'AD': {'label': 'AD',\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'ids': ['C00081'],\n", - " 'names': ['ITP'],\n", - " 'found': True},\n", - " 'AE': {'label': 'AE',\n", - " 'smiles': 'O=c1[nH]cnc2c1ncn2[C@@H]1O[C@H](COP(=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O',\n", - " 'ids': ['C00104'],\n", - " 'names': ['IDP'],\n", - " 'found': True},\n", - " 'AF': {'label': 'AF',\n", - " 'smiles': 'Cc1ncc(C[n+]2csc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1',\n", - " 'ids': ['C00068'],\n", - " 'names': ['Thiamin diphosphate'],\n", - " 'found': True},\n", - " 'AG': {'label': 'AG',\n", - " 'smiles': 'Cc1ncc(C[n+]2c(C(C)O)sc(CCOP(=O)(O)OP(=O)(O)O)c2C)c(N)n1',\n", - " 'ids': ['C05125'],\n", - " 'names': ['2-(alpha-Hydroxyethyl)thiamine diphosphate'],\n", - " 'found': True},\n", - " 'AH': {'label': 'AH',\n", - " 'smiles': 'CC(C)(COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O)[C@@H](O)C(=O)NCCC(=O)NCCS',\n", - " 'ids': ['C00010'],\n", - " 'names': ['CoA'],\n", - " 'found': True},\n", - " 'AI': {'label': 'AI',\n", - " 'smiles': 'CC(=O)SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)(O)OP(=O)(O)OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)(O)O',\n", - " 'ids': ['C00024'],\n", - " 'names': ['Acetyl-CoA'],\n", - " 'found': True},\n", - " 'AJ': {'label': 'AJ',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCSC(C)=O',\n", - " 'ids': ['C16255'],\n", - " 'names': ['[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine'],\n", - " 'found': True},\n", - " 'AK': {'label': 'AK',\n", - " 'smiles': 'NC(=O)CCCC[C@@H](S)CCS',\n", - " 'ids': ['C15973'],\n", - " 'names': ['Enzyme N6-(dihydrolipoyl)lysine'],\n", - " 'found': True},\n", - " 'AL': {'label': 'AL',\n", - " 'smiles': 'NC(=O)CCCC[C@@H]1CCSS1',\n", - " 'ids': ['C15972'],\n", - " 'names': ['Enzyme N6-(lipoyl)lysine'],\n", - " 'found': True},\n", - " 'AM': {'label': 'AM',\n", - " 'smiles': 'CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))',\n", - " 'ids': ['C02869'],\n", - " 'names': ['Oxidized flavodoxin'],\n", - " 'found': True},\n", - " 'AN': {'label': 'AN',\n", - " 'smiles': 'CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))',\n", - " 'ids': ['C02745'],\n", - " 'names': ['Reduced flavodoxin'],\n", - " 'found': True}}" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mapping" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "synkit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.14" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/test_query.ipynb b/test_query.ipynb deleted file mode 100644 index 63f6065..0000000 --- a/test_query.ipynb +++ /dev/null @@ -1,324 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "c0d495aa", - "metadata": {}, - "source": [ - "# Example: hsa00010 pathway" - ] - }, - { - "cell_type": "markdown", - "id": "7744e38a", - "metadata": {}, - "source": [ - "### Extract reactions and compounds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2d66d127", - "metadata": {}, - "outputs": [], - "source": [ - "from synkit.CRN.Query.kegg_extract import KEGGExtractor\n", - "\n", - "pathway_data = KEGGExtractor().build_pathway_json(\n", - " \"hsa00010\",\n", - " with_compounds=True,\n", - " with_atom_maps=True,\n", - " save_as=\"Data/KEGG/hsa00010_raw.json\",\n", - ")\n", - "pathway_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ce774e5", - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "with open('Data/Study/CRN/hsa00010_raw.json') as f:\n", - " pathway_data = json.load(f)" - ] - }, - { - "cell_type": "markdown", - "id": "4d9cd763", - "metadata": {}, - "source": [ - "### Impute reactions and compounds, regenerate atom maps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5f7361d5", - "metadata": {}, - "outputs": [], - "source": [ - "from synkit.CRN.Query.kegg_impute import KEGGImputer\n", - "\n", - "fixes = [\n", - " {\n", - " \"id\": \"R02189\",\n", - " \"reaction\": \"C00404 + C00267 => C99999 + C00668\",\n", - " },\n", - " {\n", - " \"id\": \"C99999\",\n", - " \"name\": \"Polyphosphate fragment\",\n", - " \"smiles\": \"O=P(O)(O)OP(=O)(O)O\",\n", - " },\n", - " {\n", - " \"id\": \"C00138\",\n", - " \"name\": \"Reduced ferredoxin\",\n", - " \"smiles\": \"S1[Fe]S[Fe+]1\"\n", - " },\n", - " {\n", - " \"id\": \"C00139\",\n", - " \"name\": \"Oxidized ferredoxin\",\n", - " \"smiles\": \"S1[Fe+]S[Fe+]1\"\n", - " },\n", - " {\n", - " \"id\": \"C02745\",\n", - " \"smiles\": \"CC2(C=C1(NC3(C(=O)NC(=O)NC(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)=3)))\"\n", - " },\n", - " {\n", - " \"id\": \"C02869\",\n", - " \"smiles\": \"CC2(C=C1(N=C3(C(=O)NC(=O)N=C(N(CC(O)C(O)C(O)COP([O-])(=O)[O-])C1=CC(C)=2)3)))\"\n", - " },\n", - " {\n", - " \"id\": \"C15972\",\n", - " \"name\": \"Enzyme N6-(lipoyl)lysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H]1CCSS\",\n", - " },\n", - " {\n", - " \"id\": \"C15973\",\n", - " \"name\": \"Enzyme N6-(dihydrolipoyl)lysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H](S)CCS\",\n", - " },\n", - " {\n", - " \"id\": \"C16255\",\n", - " \"name\": \"[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H](S)CCSC(C)=O\",\n", - " },\n", - " {\n", - " \"id\": \"C15972\",\n", - " \"name\": \"Enzyme N6-(lipoyl)lysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H]1CCSS1\"\n", - " },\n", - " {\n", - " \"id\": \"C15973\",\n", - " \"name\": \"Enzyme N6-(dihydrolipoyl)lysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H](S)CCS\"\n", - " },\n", - " {\n", - " \"id\": \"C16255\",\n", - " \"name\": \"[Dihydrolipoyllysine-residue acetyltransferase] S-acetyldihydrolipoyllysine\",\n", - " \"smiles\": \"NC(=O)CCCC[C@@H](S)CCSC(C)=O\"\n", - " }\n", - "]\n", - "\n", - "imputer = KEGGImputer()\n", - "imputed_pathway = imputer.impute_pathway(\n", - " pathway_data,\n", - " fixes=fixes,\n", - " save_as='Data/KEGG/hsa00010_imputed.json',\n", - ")\n", - "imputed_pathway" - ] - }, - { - "cell_type": "markdown", - "id": "d5fd04f0", - "metadata": {}, - "source": [ - "### Make abstract RN" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01f0fbbe", - "metadata": {}, - "outputs": [], - "source": [ - "from synkit.CRN.Construct.abstract import AbstractReactionExtractor\n", - "\n", - "abtract_pathway = AbstractReactionExtractor().build(\n", - " data=imputed_pathway,\n", - " drop_missing_smiles_reactions=True,\n", - " deduplicate=True,\n", - " order=\"appearance\",\n", - " reactant_join=\"+\",\n", - " product_join=\"+\",\n", - " reaction_id_keys=[\"id\"],\n", - " reaction_smiles_keys=[\"smiles\"],\n", - " template_keys=[\"rule\"],\n", - " save_as=\"Data/KEGG/hsa00010_abstract.json\",\n", - ")\n", - "abtract_pathway" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d3a080c4", - "metadata": {}, - "outputs": [], - "source": [ - "abtract_pathway.reactions" - ] - }, - { - "cell_type": "markdown", - "id": "1de18e9c", - "metadata": {}, - "source": [ - "# Example: M00001 module" - ] - }, - { - "cell_type": "markdown", - "id": "6d130e59", - "metadata": {}, - "source": [ - "### Extract reactions and compounds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "46e5e1a3", - "metadata": {}, - "outputs": [], - "source": [ - "from synkit.CRN.Query.kegg_extract import KEGGExtractor\n", - "\n", - "module_data = KEGGExtractor().build_module_json(\n", - " \"M00001\",\n", - " with_compounds=True,\n", - " with_atom_maps=True,\n", - " save_as=\"Data/KEGG/M00001_raw.json\",\n", - ")\n", - "module_data" - ] - }, - { - "cell_type": "markdown", - "id": "bbd6f451", - "metadata": {}, - "source": [ - "### Impute reactions and compounds, regenerate atom maps" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a20de3b0", - "metadata": {}, - "outputs": [], - "source": [ - "from synkit.CRN.Query.kegg_impute import KEGGImputer\n", - "\n", - "fixes = [\n", - " {\n", - " \"id\": \"R02189\",\n", - " \"reaction\": \"C00404 + C00267 => C99999 + C00668\",\n", - " },\n", - " {\n", - " \"id\": \"C99999\",\n", - " \"name\": \"Polyphosphate fragment\",\n", - " \"smiles\": \"O=P(O)(O)OP(=O)(O)O\",\n", - " },\n", - " {\n", - " \"id\": \"C00138\",\n", - " \"name\": \"Reduced ferredoxin\",\n", - " \"smiles\": \"S1[Fe]S[Fe+]1\"\n", - " },\n", - " {\n", - " \"id\": \"C00139\",\n", - " \"name\": \"Oxidized ferredoxin\",\n", - " \"smiles\": \"S1[Fe+]S[Fe+]1\"\n", - " },\n", - "]\n", - "\n", - "imputer = KEGGImputer()\n", - "imputed_module = imputer.impute_module(\n", - " module_data,\n", - " fixes=fixes,\n", - " save_as='Data/KEGG/M00001_imputed.json',\n", - ")\n", - "imputed_module" - ] - }, - { - "cell_type": "markdown", - "id": "8e4680e7", - "metadata": {}, - "source": [ - "### Make abstract RN" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "35a3801f", - "metadata": {}, - "outputs": [], - "source": [ - "from synkit.CRN.Construct.abstract import AbstractReactionExtractor\n", - "\n", - "abtract_module = AbstractReactionExtractor().build(\n", - " data=imputed_module,\n", - " drop_missing_smiles_reactions=True,\n", - " deduplicate=True,\n", - " order=\"appearance\",\n", - " reactant_join=\"+\",\n", - " product_join=\"+\",\n", - " reaction_id_keys=[\"id\"],\n", - " reaction_smiles_keys=[\"smiles\"],\n", - " template_keys=[\"rule\"],\n", - " save_as=\"Data/KEGG/M00001_abstract.json\",\n", - ")\n", - "abtract_module" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e17956c2", - "metadata": {}, - "outputs": [], - "source": [ - "abtract_module.reactions" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "synkit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.14" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}