From e2494291b24f42b6993220e14d2a35e5e67ffbbb Mon Sep 17 00:00:00 2001 From: Timo Aho Date: Thu, 7 May 2026 07:37:58 +0300 Subject: [PATCH] Merge template publish features into canonical platform-tooling action Merged features from template/publish-to-confluence into platform-tooling to establish platform-tooling as the canonical action going forward. ## Frontmatter write-back (confluence_url + page_id) After a successful publish, the source .md file gains confluence_url and page_id in its YAML frontmatter. Eliminates hunting for the Confluence page after a push; enables downstream CI steps and Jira links to reference the exact page without a title search. ## Code block -> Confluence macro Fenced code blocks are converted to ac:structured-macro code blocks with syntax highlighting (30+ language aliases) and line numbers. Previously published as unstyled
 text.

## Direct REST PUT for page updates
Replaces conf.update_page() with a manually constructed PUT that includes
the space key Confluence Cloud requires. Eliminates sporadic 400 ApiValueError
failures that left runs partially complete.

## Ghost page cleanup
On title conflict during create, the script searches across draft/trashed/
archived statuses, deletes the blocking ghost page, and retries. Previously
required manual intervention in Confluence UI to clear the conflict.

## Space-wide folder title fallback
get_or_create_folder_page now falls back to a space-wide title search if the
folder page is not found among the parent's children. Prevents duplicate folder
pages being created on partial re-runs or across workflow instances.

## Multi-format image upload
Image attachments now support .svg, .gif, .webp, .jpg, .bmp, .ico in addition
to .png, with correct MIME types. SVG diagrams from draw.io and other formats
previously failed silently and appeared broken in Confluence.

## Redundant index.md deduplication
foo/index.md is skipped when foo.md exists at the same level, preventing
duplicate Confluence pages (e.g. 'Adr' and 'Adr - Overview') for the same section.

## Selective publish (files= parameter)
publish_docs() accepts an optional files list. Enables a future workflow
pattern where only changed .md files are published per push, reducing
Confluence API calls and workflow runtime significantly.

## Mermaid integration (Option B - per-file pipeline)
Mermaid rendering via mmdc is now fully encapsulated in publish_single_file.
PNG attachments from render_mermaid_diagrams are converted to ac:image macros
via a dedicated _replace_mermaid_img_tags helper before the generic image
handler runs. Blocks that mmdc cannot render fall through to the CloudScript
macro fallback.
---
 .../actions/publish-to-confluence/publish.py  | 877 +++++++++++++-----
 1 file changed, 652 insertions(+), 225 deletions(-)

diff --git a/.github/actions/publish-to-confluence/publish.py b/.github/actions/publish-to-confluence/publish.py
index 4894871..a95cc8f 100644
--- a/.github/actions/publish-to-confluence/publish.py
+++ b/.github/actions/publish-to-confluence/publish.py
@@ -4,33 +4,135 @@
 Preserves folder structure as page hierarchy.
 
 Supports:
-  - Mermaid diagrams: converted to Confluence CloudScript macro (cloudscript-mermaid)
-  - Local images: uploaded as Confluence attachments
-  - Tables, fenced code blocks
+  - Mermaid diagrams: rendered to PNG via mmdc, uploaded as attachments;
+    falls back to Confluence CloudScript macro if mmdc is unavailable
+  - Local images: uploaded as Confluence attachments (png/jpg/gif/svg/webp/bmp/ico)
+  - Tables, fenced code blocks with syntax highlighting
+  - YAML frontmatter: stripped before publish, confluence_url/page_id written back
 """
 
+import html
+import logging
 import os
 import re
 import subprocess
 import sys
 import tempfile
 from pathlib import Path
+from urllib.parse import unquote
 
 import markdown
 from atlassian import Confluence
 
+log = logging.getLogger(__name__)
 
 # ---------------------------------------------------------------------------
-# Mermaid macro (Confluence CloudScript app)
+# Language map: fenced-code identifier → Confluence code macro language
 # ---------------------------------------------------------------------------
 
+LANGUAGE_MAP = {
+    "python": "python",
+    "py": "python",
+    "javascript": "javascript",
+    "js": "javascript",
+    "typescript": "typescript",
+    "ts": "typescript",
+    "java": "java",
+    "bash": "bash",
+    "sh": "bash",
+    "shell": "bash",
+    "zsh": "bash",
+    "sql": "sql",
+    "json": "javascript",
+    "xml": "xml",
+    "html": "html",
+    "css": "css",
+    "yaml": "yaml",
+    "yml": "yaml",
+    "ruby": "ruby",
+    "rb": "ruby",
+    "go": "go",
+    "rust": "rust",
+    "c": "c",
+    "cpp": "cpp",
+    "c++": "cpp",
+    "csharp": "csharp",
+    "cs": "csharp",
+    "php": "php",
+    "scala": "scala",
+    "terraform": "text",
+    "tf": "text",
+    "hcl": "text",
+    "dockerfile": "bash",
+    "groovy": "groovy",
+    "powershell": "powershell",
+    "ps1": "powershell",
+    "r": "r",
+    "perl": "perl",
+    "swift": "swift",
+    "kotlin": "kotlin",
+}
+
+# ---------------------------------------------------------------------------
+# Frontmatter helpers
+# ---------------------------------------------------------------------------
+
+_FRONTMATTER_RE = re.compile(r"^---\r?\n(.*?)\r?\n---\r?\n", re.DOTALL)
+
+
+def _fm_parse(text):
+    """Return (meta_dict, body) stripping YAML frontmatter from *text*.
+
+    If no frontmatter is present returns ({}, text) unchanged.
+    """
+    m = _FRONTMATTER_RE.match(text)
+    if not m:
+        return {}, text
+    meta = {}
+    for line in m.group(1).splitlines():
+        if ":" in line:
+            key, _, value = line.partition(":")
+            meta[key.strip()] = value.strip().strip('"')
+    return meta, text[m.end() :]
+
+
+def _fm_dump(meta, body):
+    """Return markdown text with *meta* serialised as YAML frontmatter."""
+    if not meta:
+        return body
+    lines = ["---"]
+    for key, value in meta.items():
+        if any(c in str(value) for c in (":", "#", "[", "]", "{", "}")):
+            lines.append(f'{key}: "{value}"')
+        else:
+            lines.append(f"{key}: {value}")
+    lines.append("---")
+    lines.append("")
+    return "\n".join(lines) + body
+
+
+def _fm_update_file(path, updates):
+    """Merge *updates* into the frontmatter of *path*, preserving existing keys."""
+    text = path.read_text(encoding="utf-8")
+    meta, body = _fm_parse(text)
+    meta.update(updates)
+    path.write_text(_fm_dump(meta, body), encoding="utf-8")
+
+
+# ---------------------------------------------------------------------------
+# Mermaid macro (Confluence CloudScript app fallback)
+# ---------------------------------------------------------------------------
+
+
 def convert_mermaid_blocks(md_content):
     """
     Replace ```mermaid ... ``` fenced blocks with Confluence cloudscript-mermaid macro HTML
     before the standard markdown conversion runs.
     Requires the 'CloudScript.io Mermaid' app to be installed in Confluence.
+    Used as a fallback when mmdc rendering is not available.
     """
     pattern = re.compile(r"```mermaid\s*\n(.*?)```", re.DOTALL)
+
     def replace(match):
         diagram = match.group(1).strip()
         return (
@@ -40,53 +142,63 @@ def replace(match):
             ""
             ""
         )
+
     return pattern.sub(replace, md_content)
 
 
 # ---------------------------------------------------------------------------
-# Helpers
+# Code block conversion
 # ---------------------------------------------------------------------------
 
-def prefixed(title, prefix):
-    """Prepend prefix to a page title, or return the title unchanged if prefix is empty."""
-    return f"{prefix}{title}" if prefix else title
 
+def _replace_code_block(match):
+    """Replace a single 
 block with a Confluence code macro."""
+    lang_attr = match.group(1) or ""
+    code_body = match.group(2)
 
-def folder_page_title(docs_dir, folder_path):
-    """
-    Derive a space-unique Confluence page title for a folder.
+    lang = ""
+    lang_match = re.search(r'language-([^\s"\']+)', lang_attr)
+    if lang_match:
+        lang = lang_match.group(1).lower()
 
-    Confluence titles must be unique across the whole space, so we use the full
-    relative path rather than just the leaf name.  Each path part has its numeric
-    prefix stripped and is title-cased, then parts are joined with ' / '.
+    confluence_lang = LANGUAGE_MAP.get(lang, lang) if lang else "none"
+    plain_code = html.unescape(code_body)
 
-    Examples:
-        docs/adr          -> "Adr"
-        docs/poc/adr      -> "Poc / Adr"
-        docs/00-intro     -> "Intro"
-        docs/poc/00-intro -> "Poc / Intro"
-    """
-    rel = folder_path.relative_to(docs_dir)
-    parts = []
-    for part in rel.parts:
-        if part and part[0].isdigit() and "-" in part:
-            part = part.split("-", 1)[1]
-        parts.append(part.replace("-", " ").title())
-    return " / ".join(parts)
+    return (
+        ''
+        f'{confluence_lang}'
+        'true'
+        ""
+        f""
+        ""
+        ""
+    )
+
+
+def convert_code_blocks_for_confluence(html_content):
+    """Convert 
 blocks into Confluence ac:structured-macro code blocks."""
+    pattern = re.compile(
+        r"
]*))?>(.*?)
", + re.DOTALL, + ) + return pattern.sub(_replace_code_block, html_content) # --------------------------------------------------------------------------- # Mermaid rendering # --------------------------------------------------------------------------- + def render_mermaid_diagrams(md_content, tmp_dir): """ Find all ```mermaid ... ``` blocks in the markdown, render each to a PNG file in tmp_dir using mmdc, and return: - - modified md_content with blocks replaced by ![mermaid-N](path/to/N.png) + - modified md_content with blocks replaced by ![mermaid-N](abs/path/N.png) - list of (attachment_name, png_path) tuples for later upload + + Falls back to leaving the block unchanged if mmdc is not available. """ - pattern = re.compile(r'```mermaid\s*\n(.*?)\n```', re.DOTALL) + pattern = re.compile(r"```mermaid\s*\n(.*?)\n```", re.DOTALL) attachments = [] counter = [0] @@ -99,7 +211,6 @@ def replace_block(m): puppeteer_cfg = Path(tmp_dir) / "puppeteer-config.json" mmd_file.write_text(diagram_src, encoding="utf-8") - # Write puppeteer config once (idempotent) if not puppeteer_cfg.exists(): puppeteer_cfg.write_text('{"args": ["--no-sandbox"]}', encoding="utf-8") @@ -107,27 +218,29 @@ def replace_block(m): subprocess.run( [ "mmdc", - "-i", str(mmd_file), - "-o", str(png_file), - "--backgroundColor", "white", - "--puppeteerConfigFile", str(puppeteer_cfg), + "-i", + str(mmd_file), + "-o", + str(png_file), + "--backgroundColor", + "white", + "--puppeteerConfigFile", + str(puppeteer_cfg), ], check=True, capture_output=True, ) attachment_name = f"mermaid-{idx}.png" attachments.append((attachment_name, png_file)) - # Replace fenced block with a local image reference that will be - # picked up by the image-upload step below. return f"![{attachment_name}]({png_file})" except subprocess.CalledProcessError as e: stderr = e.stderr.decode() if e.stderr else "" print(f" Warning: mmdc failed for diagram {idx}: {stderr.strip()}") - # Fall back: keep the block as a fenced code block so at least - # the source is visible. return m.group(0) except FileNotFoundError: - print(" Warning: mmdc not found — Mermaid diagrams will not be rendered.") + print( + " Warning: mmdc not found — Mermaid diagrams will not be rendered as PNG." + ) return m.group(0) modified = pattern.sub(replace_block, md_content) @@ -138,87 +251,226 @@ def replace_block(m): # Image handling # --------------------------------------------------------------------------- -def collect_local_images(md_content, md_file_path): - """ - Find all local image references in markdown (![alt](path)) and return a - list of (alt, abs_path, original_ref) tuples. Remote URLs are skipped. - """ - pattern = re.compile(r'!\[([^\]]*)\]\(([^)]+)\)') - images = [] - for m in pattern.finditer(md_content): - alt, ref = m.group(1), m.group(2) - if ref.startswith("http://") or ref.startswith("https://"): - continue - abs_path = (md_file_path.parent / ref).resolve() - if abs_path.exists(): - images.append((alt, abs_path, ref)) - else: - print(f" Warning: image not found, skipping: {ref}") - return images +_IMG_SRC_PATTERN = re.compile( + r']*\s+)?src="([^"]+)"([^>]*)>', + re.IGNORECASE, +) + +def _replace_local_images_with_attachment_macros(html_content, md_file_dir): + """Replace local image refs with Confluence attachment macros. -def upload_attachments(conf, page_id, attachments): + Returns (modified_html, list_of_(filename, absolute_path)) for upload. + Remote URLs and data URIs are left unchanged. """ - Upload a list of (attachment_name, file_path) to a Confluence page. - Returns a set of successfully uploaded attachment names. + to_upload = [] + + def replace_one(match): + src = match.group(2).strip() + if src.startswith(("http://", "https://", "data:", "//")): + return match.group(0) + path_part = unquote(src.lstrip("./")) + if not path_part: + return match.group(0) + local_path = (md_file_dir / path_part).resolve() + if not local_path.is_file(): + return match.group(0) + filename = local_path.name + to_upload.append((filename, local_path)) + return f'' + + return _IMG_SRC_PATTERN.sub(replace_one, html_content), to_upload + + +def _replace_mermaid_img_tags(html_content, mermaid_attachments): + """Replace tags produced by render_mermaid_diagrams + with Confluence macros. + + render_mermaid_diagrams replaces mermaid blocks with ![name](abs_path). + After markdown conversion those become tags which + cannot be resolved relative to md_file.parent. We handle them explicitly + here using the known (name, abs_path) pairs before the generic image handler runs. """ - uploaded = set() - for name, path in attachments: + for name, abs_path in mermaid_attachments: + escaped_name = html.escape(name) + macro = f'' + # match the img tag with this exact src + html_content = re.sub( + r']*src="' + re.escape(str(abs_path)) + r'"[^>]*/?>', + macro, + html_content, + ) + return html_content + + +def _content_type_for_filename(filename): + ext = Path(filename).suffix.lower() + return { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".svg": "image/svg+xml", + ".webp": "image/webp", + ".bmp": "image/bmp", + ".ico": "image/x-icon", + }.get(ext, "application/octet-stream") + + +def _upload_attachments(conf, page_id, files_to_upload): + """Upload local image files as Confluence page attachments.""" + page_id = str(page_id) + for filename, file_path in files_to_upload: + file_path = Path(file_path) + if not file_path.is_file(): + continue try: - conf.attach_file( - str(path), - name=name, + content = file_path.read_bytes() + conf.attach_content( + content, + name=filename, + content_type=_content_type_for_filename(filename), page_id=page_id, - content_type="image/png", ) - uploaded.add(name) - print(f" ✓ Uploaded attachment: {name}") + print(f" ✓ Uploaded attachment: {filename}") except Exception as e: - print(f" Warning: could not upload {name}: {e}") - return uploaded + print(f" Warning: could not upload {filename}: {e}") -def replace_images_with_ac_macros(html_content, image_map): - """ - Replace tags whose src maps to an uploaded Confluence - attachment with Confluence storage-format macros. +# --------------------------------------------------------------------------- +# Confluence client + page operations +# --------------------------------------------------------------------------- + - image_map: dict of {original_ref_or_abs_path_str: attachment_name} +def init_confluence(confluence_url, confluence_user, confluence_token): + """Initialize and return a Confluence client.""" + return Confluence( + url=confluence_url, + username=confluence_user, + password=confluence_token, + cloud=True, + ) + + +def _update_confluence_page(conf, page_id, title, body_html, space_key, parent_id): + """Update a Confluence page via direct REST PUT. + + The atlassian-python-api update_page can trigger 400 ApiValueError on + Confluence Cloud when space is missing from the payload. We build the + PUT payload explicitly to avoid this. """ - def replace_img(m): - src = m.group(1) - alt = m.group(2) if m.group(2) else "" - # Try exact match first, then basename - name = image_map.get(src) or image_map.get(Path(src).name) - if name: - return ( - f'' - f'' - f'' + try: + hist = conf.history(page_id) + if hasattr(hist, "json"): + hist = hist.json() + version_num = hist.get("lastUpdated", {}).get("number", 1) + except Exception: + try: + page = conf.get_page_by_id(page_id, expand="version") + version_num = page.get("version", {}).get("number", 1) + except Exception: + version_num = 1 + + data = { + "id": page_id, + "type": "page", + "title": title, + "space": {"key": space_key}, + "version": {"number": version_num + 1, "minorEdit": False}, + "body": {"storage": {"value": body_html, "representation": "storage"}}, + } + if parent_id and str(parent_id) != str(page_id): + data["ancestors"] = [{"type": "page", "id": parent_id}] + + conf.put(f"rest/api/content/{page_id}", data=data, params={"status": "current"}) + + +def _find_page_any_status(conf, space_key, title): + """Search for a page by title across all statuses (current, draft, trashed, archived). + + The standard get_page_by_title only returns 'current' pages. Confluence + still enforces title uniqueness across drafts and trashed pages, so this + is needed to find a conflicting ghost page before retrying a create. + """ + for status in ("current", "draft", "trashed", "archived"): + try: + response = conf.get( + "rest/api/content", + params={ + "type": "page", + "spaceKey": space_key, + "title": title, + "status": status, + "limit": 1, + }, ) - return m.group(0) + results = response.get("results", []) if isinstance(response, dict) else [] + if results: + page = results[0] + page.setdefault("status", status) + return page + except Exception: + continue + return None - # Match both ... and ... - html_content = re.sub( - r']*/?>', - replace_img, - html_content, - ) - html_content = re.sub( - r']*/?>', - lambda m: replace_img(type('M', (), { - 'group': lambda self, n: m.group(2) if n == 1 else m.group(1), - '__call__': lambda self: None, - })()) if m.group(2) else m.group(0), - html_content, + +def get_or_create_root_page(conf, space_key, root_page_title): + """Return the root page id, creating the page if it doesn't exist.""" + print(f"Setting up root page: {root_page_title}") + root_page = conf.get_page_by_title(space=space_key, title=root_page_title) + if root_page: + print(f"✓ Found root page: {root_page_title}") + return root_page["id"] + print(f"Creating root page: {root_page_title}") + root_page = conf.create_page( + space=space_key, + title=root_page_title, + body="

This is the root documentation page. Content is auto-generated from GitHub.

", ) - return html_content + if not root_page: + raise RuntimeError(f"Failed to create root page: {root_page_title}") + print(f"✓ Created root page: {root_page_title}") + return root_page["id"] + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def prefixed(title, prefix): + """Prepend prefix to a page title, or return the title unchanged if prefix is empty.""" + return f"{prefix}{title}" if prefix else title + + +def folder_page_title(docs_dir, folder_path): + """ + Derive a space-unique Confluence page title for a folder. + + Confluence titles must be unique across the whole space, so we use the full + relative path rather than just the leaf name. Each path part has its numeric + prefix stripped and is title-cased, then parts are joined with ' / '. + + Examples: + docs/adr -> "Adr" + docs/poc/adr -> "Poc / Adr" + docs/00-intro -> "Intro" + docs/poc/00-intro -> "Poc / Intro" + """ + rel = folder_path.relative_to(docs_dir) + parts = [] + for part in rel.parts: + if part and part[0].isdigit() and "-" in part: + part = part.split("-", 1)[1] + parts.append(part.replace("-", " ").title()) + return " / ".join(parts) # --------------------------------------------------------------------------- # Confluence page hierarchy # --------------------------------------------------------------------------- + def get_or_create_folder_page( conf, space_key, @@ -228,7 +480,12 @@ def get_or_create_folder_page( docs_dir, confluence_prefix="", ): - """Get or create a page for a folder""" + """Get or create a Confluence page for a folder. + + Checks parent's children first, then falls back to a space-wide title + search. On title conflict caused by a ghost draft/trashed page, removes + the ghost and retries creation. + """ folder_key = str(folder_path) if folder_key in folder_pages: return folder_pages[folder_key] @@ -248,16 +505,47 @@ def get_or_create_folder_page( except Exception as e: print(f" Warning: Could not check children: {e}") + # Also search the whole space by title + existing = conf.get_page_by_title(space=space_key, title=title) + if existing: + print(f" ✓ Found folder page: {title} (id: {existing['id']})") + folder_pages[folder_key] = existing["id"] + return existing["id"] + # Create folder page if not found print(f" Creating folder page: {title} (under parent: {parent_id})") - folder_page = conf.create_page( - space=space_key, - title=title, - body=f"

This section contains documentation for {title}.

", - parent_id=parent_id, - ) - folder_pages[folder_key] = folder_page["id"] - return folder_page["id"] + try: + folder_page = conf.create_page( + space=space_key, + title=title, + body=f"

This section contains documentation for {title}.

", + parent_id=parent_id, + ) + folder_pages[folder_key] = folder_page["id"] + return folder_page["id"] + except Exception as exc: + if "already exists" not in str(exc).lower() and "title" not in str(exc).lower(): + raise + # Ghost page (draft/trashed) is blocking creation — find and clean it up + fallback = _find_page_any_status(conf, space_key, title) + if fallback: + fallback_status = fallback.get("status", "current") + fallback_id = fallback["id"] + if fallback_status in ("draft", "trashed"): + try: + conf.remove_page(fallback_id) + except Exception: + pass + folder_page = conf.create_page( + space=space_key, + title=title, + body=f"

This section contains documentation for {title}.

", + parent_id=parent_id, + ) + fallback_id = folder_page["id"] + folder_pages[folder_key] = fallback_id + return fallback_id + raise def get_nested_parent_id( @@ -269,7 +557,7 @@ def get_nested_parent_id( folder_pages, confluence_prefix="", ): - """Get parent page ID for nested folder structure""" + """Get parent page ID for nested folder structure.""" if rel_path.parent == Path("."): return root_page_id @@ -292,9 +580,234 @@ def get_nested_parent_id( # --------------------------------------------------------------------------- -# Main publisher +# Title derivation +# --------------------------------------------------------------------------- + + +def _title_for_md(md_file, rel_path, md_content): + """Derive a Confluence page title from a markdown file and its content.""" + if md_content.startswith("# "): + title = md_content.split("\n")[0].strip("# ") + else: + title = md_file.stem.replace("-", " ").title() + + if md_file.name == "index.md" and rel_path.parent != Path("."): + folder_name = rel_path.parent.name + if folder_name and folder_name[0].isdigit() and "-" in folder_name: + folder_name = folder_name.split("-", 1)[1] + folder_display = folder_name.replace("-", " ").title() + if title.lower() in ["index", "readme"]: + title = f"{folder_display} - Overview" + + return title + + +# --------------------------------------------------------------------------- +# Single file publish +# --------------------------------------------------------------------------- + + +def publish_single_file( + conf, + space_key, + md_file, + docs_dir, + root_page_id, + folder_pages, + confluence_prefix="", + tmp_dir=None, +): + """Publish (create or update) a single markdown file to Confluence. + + Handles the full pipeline: + - Mermaid rendering to PNG (via mmdc) with CloudScript macro fallback + - Frontmatter stripping and write-back of confluence_url/page_id + - Code block conversion to Confluence macros + - Local image upload as attachments + - Ghost page cleanup on title conflicts + - Direct REST PUT for page updates (avoids atlassian-python-api 400 bug) + """ + md_file = Path(md_file).resolve() + docs_dir = Path(docs_dir).resolve() + + try: + rel_path = md_file.relative_to(docs_dir) + except ValueError: + rel_path = Path(md_file.name) + + md_content = md_file.read_text(encoding="utf-8") + + # Strip frontmatter before conversion so it doesn't appear in page content + existing_meta, md_content = _fm_parse(md_content) + + # Render mermaid blocks to PNG; falls back to leaving block unchanged if mmdc unavailable + if tmp_dir: + md_content, mermaid_attachments = render_mermaid_diagrams(md_content, tmp_dir) + else: + mermaid_attachments = [] + + # Apply CloudScript macro fallback for any mermaid blocks that were not rendered + # (render_mermaid_diagrams leaves un-renderable blocks intact as fenced code) + md_content = convert_mermaid_blocks(md_content) + + title = prefixed(_title_for_md(md_file, rel_path, md_content), confluence_prefix) + + # Convert Markdown → HTML + html_content = markdown.markdown(md_content, extensions=["tables", "fenced_code"]) + + # Convert
 blocks to Confluence code macros
+    html_content = convert_code_blocks_for_confluence(html_content)
+
+    # Replace mermaid PNG  tags (abs paths) with ac:image macros before
+    # the generic local-image handler runs (which resolves paths relative to md_file.parent)
+    if mermaid_attachments:
+        html_content = _replace_mermaid_img_tags(html_content, mermaid_attachments)
+
+    # Replace remaining local image references with Confluence attachment macros
+    html_content, local_files_to_upload = _replace_local_images_with_attachment_macros(
+        html_content, md_file.parent
+    )
+
+    if not html_content or not html_content.strip():
+        html_content = "

" + + parent_id = get_nested_parent_id( + conf, + space_key, + rel_path, + docs_dir, + root_page_id, + folder_pages, + confluence_prefix, + ) + + existing = conf.get_page_by_title(space=space_key, title=title) + + if existing: + page_id = existing["id"] + _update_confluence_page( + conf, page_id, title, html_content, space_key, parent_id + ) + print(f" ✓ Updated: {title}") + else: + try: + created = conf.create_page( + space=space_key, + title=title, + body=html_content, + parent_id=parent_id, + ) + page_id = ( + created.get("id") + if isinstance(created, dict) + else getattr(created, "id", None) + ) + print(f" ✓ Created: {title}") + except Exception as exc: + if ( + "already exists" not in str(exc).lower() + and "title" not in str(exc).lower() + ): + raise + # Ghost page (draft/trashed) is blocking creation + fallback = _find_page_any_status(conf, space_key, title) + if fallback: + fallback_status = fallback.get("status", "current") + fallback_id = fallback["id"] + if fallback_status in ("draft", "trashed"): + try: + conf.remove_page(fallback_id) + except Exception: + pass + created = conf.create_page( + space=space_key, + title=title, + body=html_content, + parent_id=parent_id, + ) + page_id = ( + created.get("id") + if isinstance(created, dict) + else getattr(created, "id", None) + ) + print(f" ✓ Created (after ghost cleanup): {title}") + else: + page_id = fallback_id + _update_confluence_page( + conf, page_id, title, html_content, space_key, parent_id + ) + print(f" ✓ Updated (fallback): {title}") + else: + raise + + # Upload all attachments: local images + mermaid PNGs + all_files_to_upload = local_files_to_upload + [ + (name, path) for name, path in mermaid_attachments + ] + if all_files_to_upload and page_id: + _upload_attachments(conf, page_id, all_files_to_upload) + + # Write confluence_url and page_id back into the file's frontmatter + if page_id: + try: + page_info = conf.get_page_by_id(page_id, expand="") + webui = ( + page_info.get("_links", {}).get("webui", "") + if isinstance(page_info, dict) + else "" + ) + base_url = conf.url.rstrip("/") + target_url = ( + f"{base_url}/wiki{webui}" + if webui and not webui.startswith("http") + else webui + ) + if target_url: + existing_meta["confluence_url"] = target_url + existing_meta["page_id"] = str(page_id) + _fm_update_file(md_file, existing_meta) + except Exception: + pass + + # Register this page in the folder cache if a same-named sibling directory exists, + # so child pages nest under it rather than a separate placeholder page. + if page_id: + sibling_dir = md_file.parent / md_file.stem + if sibling_dir.is_dir(): + folder_pages[str(sibling_dir)] = page_id + + +# --------------------------------------------------------------------------- +# Sorting / filtering helpers # --------------------------------------------------------------------------- + +def _depth_then_index_first(p): + """Sort key: shallowest paths first, index.md before siblings.""" + return (len(p.parts), p.name != "index.md", str(p)) + + +def _remove_redundant_index_files(md_files): + """Remove index.md files that duplicate a sibling parent .md file. + + When foo/index.md exists alongside foo.md, the index is redundant. + """ + resolved_paths = {p.resolve() for p in md_files} + return [ + p + for p in md_files + if not ( + p.name == "index.md" + and (p.parent.parent / f"{p.parent.name}.md").resolve() in resolved_paths + ) + ] + + +# --------------------------------------------------------------------------- +# Main publish entry point +# --------------------------------------------------------------------------- + + def publish_docs( confluence_url, confluence_user, @@ -303,145 +816,59 @@ def publish_docs( docs_path, root_page_title, confluence_prefix="", + files=None, ): - """Publish all markdown files to Confluence""" - - conf = Confluence( - url=confluence_url, - username=confluence_user, - password=confluence_token, - cloud=True, - ) + """Publish all markdown files under docs_path to Confluence. - # Get or create root page + If *files* is given (a list of paths), only those files are published. + """ + conf = init_confluence(confluence_url, confluence_user, confluence_token) root_page_title_prefixed = prefixed(root_page_title, confluence_prefix) - print(f"Setting up root page: {root_page_title_prefixed}") - root_page = conf.get_page_by_title(space=space_key, title=root_page_title_prefixed) - if root_page: - print(f"✓ Found root page: {root_page_title_prefixed}") - root_page_id = root_page["id"] - else: - print(f"Creating root page: {root_page_title_prefixed}") - root_page = conf.create_page( - space=space_key, - title=root_page_title_prefixed, - body="

This is the root documentation page. Content is auto-generated from GitHub.

", - ) - if not root_page: - raise RuntimeError(f"Failed to create root page: {root_page_title_prefixed}") - print(f"✓ Created root page: {root_page_title_prefixed}") - root_page_id = root_page["id"] + root_page_id = get_or_create_root_page(conf, space_key, root_page_title_prefixed) folder_pages = {} docs_dir = Path(docs_path) - md_files = list(docs_dir.rglob("*.md")) - def sort_key(p): - is_index = p.name == "index.md" - depth = len(p.parts) - return (not is_index, depth, str(p)) + if files: + md_files = [Path(f) for f in files] + else: + md_files = list(docs_dir.rglob("*.md")) + + def sort_key(p): + is_index = p.name == "index.md" + depth = len(p.parts) + return (not is_index, depth, str(p)) + + md_files.sort(key=sort_key) + md_files = _remove_redundant_index_files(md_files) - md_files.sort(key=sort_key) print(f"\nPublishing {len(md_files)} files...\n") with tempfile.TemporaryDirectory() as tmp_dir: for md_file in md_files: if "template" in md_file.name.lower(): - print(f"Skipping template: {md_file.relative_to(docs_dir)}") + print( + f"Skipping template: {md_file.relative_to(docs_dir) if md_file.is_relative_to(docs_dir) else md_file.name}" + ) continue - rel_path = md_file.relative_to(docs_dir) - print(f"\nPublishing {rel_path}...") - - with open(md_file, "r", encoding="utf-8") as f: - md_content = f.read() - - # --- Mermaid: render diagrams to PNG, replace blocks with img refs --- - md_content, mermaid_attachments = render_mermaid_diagrams(md_content, tmp_dir) - - # --- Collect local images referenced in markdown --- - local_images = collect_local_images(md_content, md_file) - all_attachments = mermaid_attachments + [ - (Path(ref).name, abs_path) for _, abs_path, ref in local_images - ] - - # --- Extract title --- - if md_content.startswith("# "): - title = md_content.split("\n")[0].strip("# ") - else: - title = md_file.stem.replace("-", " ").title() - - if md_file.name == "index.md" and rel_path.parent != Path("."): - folder_name = rel_path.parent.name - if folder_name and folder_name[0].isdigit() and "-" in folder_name: - folder_name = folder_name.split("-", 1)[1] - folder_display = folder_name.replace("-", " ").title() - if title.lower() in ["index", "readme"]: - title = f"{folder_display} - Overview" - - title = prefixed(title, confluence_prefix) - - # --- Convert markdown to HTML --- - md_content_processed = convert_mermaid_blocks(md_content) - html_content = markdown.markdown( - md_content_processed, extensions=["tables", "fenced_code"] + rel = ( + md_file.relative_to(docs_dir) + if md_file.is_relative_to(docs_dir) + else md_file ) - - # --- Determine parent page --- - parent_id = get_nested_parent_id( + print(f"\nPublishing {rel}...") + publish_single_file( conf, space_key, - rel_path, + md_file, docs_dir, root_page_id, folder_pages, confluence_prefix, + tmp_dir=tmp_dir, ) - # --- Create or update page first (need page_id for attachments) --- - existing = conf.get_page_by_title(space=space_key, title=title) - if existing: - page_id = existing["id"] - else: - new_page = conf.create_page( - space=space_key, - title=title, - body="

Placeholder — content being uploaded.

", - parent_id=parent_id, - ) - page_id = new_page["id"] - print(f" ✓ Created page: {title}") - - # --- Upload attachments (mermaid PNGs + local images) --- - if all_attachments: - uploaded = upload_attachments(conf, page_id, all_attachments) - else: - uploaded = set() - - # --- Build image_map for src→attachment_name replacement --- - # Keys: original markdown ref strings and absolute path strings - image_map = {} - for name, path in mermaid_attachments: - image_map[str(path)] = name - for _, abs_path, ref in local_images: - att_name = Path(ref).name - if att_name in uploaded: - image_map[ref] = att_name - image_map[str(abs_path)] = att_name - - # --- Replace tags with ac:image macros --- - if image_map: - html_content = replace_images_with_ac_macros(html_content, image_map) - - # --- Final update with real content --- - conf.update_page( - page_id=page_id, - title=title, - body=html_content, - parent_id=parent_id, - ) - print(f" ✓ Updated: {title}") - print("\n✓ All pages published successfully!")