diff --git a/.github/actions/publish-to-confluence/publish.py b/.github/actions/publish-to-confluence/publish.py index 4894871..a95cc8f 100644 --- a/.github/actions/publish-to-confluence/publish.py +++ b/.github/actions/publish-to-confluence/publish.py @@ -4,33 +4,135 @@ Preserves folder structure as page hierarchy. Supports: - - Mermaid diagrams: converted to Confluence CloudScript macro (cloudscript-mermaid) - - Local images: uploaded as Confluence attachments - - Tables, fenced code blocks + - Mermaid diagrams: rendered to PNG via mmdc, uploaded as attachments; + falls back to Confluence CloudScript macro if mmdc is unavailable + - Local images: uploaded as Confluence attachments (png/jpg/gif/svg/webp/bmp/ico) + - Tables, fenced code blocks with syntax highlighting + - YAML frontmatter: stripped before publish, confluence_url/page_id written back """ +import html +import logging import os import re import subprocess import sys import tempfile from pathlib import Path +from urllib.parse import unquote import markdown from atlassian import Confluence +log = logging.getLogger(__name__) # --------------------------------------------------------------------------- -# Mermaid macro (Confluence CloudScript app) +# Language map: fenced-code identifier → Confluence code macro language # --------------------------------------------------------------------------- +LANGUAGE_MAP = { + "python": "python", + "py": "python", + "javascript": "javascript", + "js": "javascript", + "typescript": "typescript", + "ts": "typescript", + "java": "java", + "bash": "bash", + "sh": "bash", + "shell": "bash", + "zsh": "bash", + "sql": "sql", + "json": "javascript", + "xml": "xml", + "html": "html", + "css": "css", + "yaml": "yaml", + "yml": "yaml", + "ruby": "ruby", + "rb": "ruby", + "go": "go", + "rust": "rust", + "c": "c", + "cpp": "cpp", + "c++": "cpp", + "csharp": "csharp", + "cs": "csharp", + "php": "php", + "scala": "scala", + "terraform": "text", + "tf": "text", + "hcl": "text", + "dockerfile": "bash", + "groovy": "groovy", + "powershell": "powershell", + "ps1": "powershell", + "r": "r", + "perl": "perl", + "swift": "swift", + "kotlin": "kotlin", +} + +# --------------------------------------------------------------------------- +# Frontmatter helpers +# --------------------------------------------------------------------------- + +_FRONTMATTER_RE = re.compile(r"^---\r?\n(.*?)\r?\n---\r?\n", re.DOTALL) + + +def _fm_parse(text): + """Return (meta_dict, body) stripping YAML frontmatter from *text*. + + If no frontmatter is present returns ({}, text) unchanged. + """ + m = _FRONTMATTER_RE.match(text) + if not m: + return {}, text + meta = {} + for line in m.group(1).splitlines(): + if ":" in line: + key, _, value = line.partition(":") + meta[key.strip()] = value.strip().strip('"') + return meta, text[m.end() :] + + +def _fm_dump(meta, body): + """Return markdown text with *meta* serialised as YAML frontmatter.""" + if not meta: + return body + lines = ["---"] + for key, value in meta.items(): + if any(c in str(value) for c in (":", "#", "[", "]", "{", "}")): + lines.append(f'{key}: "{value}"') + else: + lines.append(f"{key}: {value}") + lines.append("---") + lines.append("") + return "\n".join(lines) + body + + +def _fm_update_file(path, updates): + """Merge *updates* into the frontmatter of *path*, preserving existing keys.""" + text = path.read_text(encoding="utf-8") + meta, body = _fm_parse(text) + meta.update(updates) + path.write_text(_fm_dump(meta, body), encoding="utf-8") + + +# --------------------------------------------------------------------------- +# Mermaid macro (Confluence CloudScript app fallback) +# --------------------------------------------------------------------------- + + def convert_mermaid_blocks(md_content): """ Replace ```mermaid ... ``` fenced blocks with Confluence cloudscript-mermaid macro HTML before the standard markdown conversion runs. Requires the 'CloudScript.io Mermaid' app to be installed in Confluence. + Used as a fallback when mmdc rendering is not available. """ pattern = re.compile(r"```mermaid\s*\n(.*?)```", re.DOTALL) + def replace(match): diagram = match.group(1).strip() return ( @@ -40,53 +142,63 @@ def replace(match): "" "" ) + return pattern.sub(replace, md_content) # --------------------------------------------------------------------------- -# Helpers +# Code block conversion # --------------------------------------------------------------------------- -def prefixed(title, prefix): - """Prepend prefix to a page title, or return the title unchanged if prefix is empty.""" - return f"{prefix}{title}" if prefix else title +def _replace_code_block(match): + """Replace a single
 block with a Confluence code macro."""
+    lang_attr = match.group(1) or ""
+    code_body = match.group(2)
 
-def folder_page_title(docs_dir, folder_path):
-    """
-    Derive a space-unique Confluence page title for a folder.
+    lang = ""
+    lang_match = re.search(r'language-([^\s"\']+)', lang_attr)
+    if lang_match:
+        lang = lang_match.group(1).lower()
 
-    Confluence titles must be unique across the whole space, so we use the full
-    relative path rather than just the leaf name.  Each path part has its numeric
-    prefix stripped and is title-cased, then parts are joined with ' / '.
+    confluence_lang = LANGUAGE_MAP.get(lang, lang) if lang else "none"
+    plain_code = html.unescape(code_body)
 
-    Examples:
-        docs/adr          -> "Adr"
-        docs/poc/adr      -> "Poc / Adr"
-        docs/00-intro     -> "Intro"
-        docs/poc/00-intro -> "Poc / Intro"
-    """
-    rel = folder_path.relative_to(docs_dir)
-    parts = []
-    for part in rel.parts:
-        if part and part[0].isdigit() and "-" in part:
-            part = part.split("-", 1)[1]
-        parts.append(part.replace("-", " ").title())
-    return " / ".join(parts)
+    return (
+        ''
+        f'{confluence_lang}'
+        'true'
+        ""
+        f""
+        ""
+        ""
+    )
+
+
+def convert_code_blocks_for_confluence(html_content):
+    """Convert 
 blocks into Confluence ac:structured-macro code blocks."""
+    pattern = re.compile(
+        r"
]*))?>(.*?)
", + re.DOTALL, + ) + return pattern.sub(_replace_code_block, html_content) # --------------------------------------------------------------------------- # Mermaid rendering # --------------------------------------------------------------------------- + def render_mermaid_diagrams(md_content, tmp_dir): """ Find all ```mermaid ... ``` blocks in the markdown, render each to a PNG file in tmp_dir using mmdc, and return: - - modified md_content with blocks replaced by ![mermaid-N](path/to/N.png) + - modified md_content with blocks replaced by ![mermaid-N](abs/path/N.png) - list of (attachment_name, png_path) tuples for later upload + + Falls back to leaving the block unchanged if mmdc is not available. """ - pattern = re.compile(r'```mermaid\s*\n(.*?)\n```', re.DOTALL) + pattern = re.compile(r"```mermaid\s*\n(.*?)\n```", re.DOTALL) attachments = [] counter = [0] @@ -99,7 +211,6 @@ def replace_block(m): puppeteer_cfg = Path(tmp_dir) / "puppeteer-config.json" mmd_file.write_text(diagram_src, encoding="utf-8") - # Write puppeteer config once (idempotent) if not puppeteer_cfg.exists(): puppeteer_cfg.write_text('{"args": ["--no-sandbox"]}', encoding="utf-8") @@ -107,27 +218,29 @@ def replace_block(m): subprocess.run( [ "mmdc", - "-i", str(mmd_file), - "-o", str(png_file), - "--backgroundColor", "white", - "--puppeteerConfigFile", str(puppeteer_cfg), + "-i", + str(mmd_file), + "-o", + str(png_file), + "--backgroundColor", + "white", + "--puppeteerConfigFile", + str(puppeteer_cfg), ], check=True, capture_output=True, ) attachment_name = f"mermaid-{idx}.png" attachments.append((attachment_name, png_file)) - # Replace fenced block with a local image reference that will be - # picked up by the image-upload step below. return f"![{attachment_name}]({png_file})" except subprocess.CalledProcessError as e: stderr = e.stderr.decode() if e.stderr else "" print(f" Warning: mmdc failed for diagram {idx}: {stderr.strip()}") - # Fall back: keep the block as a fenced code block so at least - # the source is visible. return m.group(0) except FileNotFoundError: - print(" Warning: mmdc not found — Mermaid diagrams will not be rendered.") + print( + " Warning: mmdc not found — Mermaid diagrams will not be rendered as PNG." + ) return m.group(0) modified = pattern.sub(replace_block, md_content) @@ -138,87 +251,226 @@ def replace_block(m): # Image handling # --------------------------------------------------------------------------- -def collect_local_images(md_content, md_file_path): - """ - Find all local image references in markdown (![alt](path)) and return a - list of (alt, abs_path, original_ref) tuples. Remote URLs are skipped. - """ - pattern = re.compile(r'!\[([^\]]*)\]\(([^)]+)\)') - images = [] - for m in pattern.finditer(md_content): - alt, ref = m.group(1), m.group(2) - if ref.startswith("http://") or ref.startswith("https://"): - continue - abs_path = (md_file_path.parent / ref).resolve() - if abs_path.exists(): - images.append((alt, abs_path, ref)) - else: - print(f" Warning: image not found, skipping: {ref}") - return images +_IMG_SRC_PATTERN = re.compile( + r']*\s+)?src="([^"]+)"([^>]*)>', + re.IGNORECASE, +) + +def _replace_local_images_with_attachment_macros(html_content, md_file_dir): + """Replace local image refs with Confluence attachment macros. -def upload_attachments(conf, page_id, attachments): + Returns (modified_html, list_of_(filename, absolute_path)) for upload. + Remote URLs and data URIs are left unchanged. """ - Upload a list of (attachment_name, file_path) to a Confluence page. - Returns a set of successfully uploaded attachment names. + to_upload = [] + + def replace_one(match): + src = match.group(2).strip() + if src.startswith(("http://", "https://", "data:", "//")): + return match.group(0) + path_part = unquote(src.lstrip("./")) + if not path_part: + return match.group(0) + local_path = (md_file_dir / path_part).resolve() + if not local_path.is_file(): + return match.group(0) + filename = local_path.name + to_upload.append((filename, local_path)) + return f'' + + return _IMG_SRC_PATTERN.sub(replace_one, html_content), to_upload + + +def _replace_mermaid_img_tags(html_content, mermaid_attachments): + """Replace tags produced by render_mermaid_diagrams + with Confluence macros. + + render_mermaid_diagrams replaces mermaid blocks with ![name](abs_path). + After markdown conversion those become tags which + cannot be resolved relative to md_file.parent. We handle them explicitly + here using the known (name, abs_path) pairs before the generic image handler runs. """ - uploaded = set() - for name, path in attachments: + for name, abs_path in mermaid_attachments: + escaped_name = html.escape(name) + macro = f'' + # match the img tag with this exact src + html_content = re.sub( + r']*src="' + re.escape(str(abs_path)) + r'"[^>]*/?>', + macro, + html_content, + ) + return html_content + + +def _content_type_for_filename(filename): + ext = Path(filename).suffix.lower() + return { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".svg": "image/svg+xml", + ".webp": "image/webp", + ".bmp": "image/bmp", + ".ico": "image/x-icon", + }.get(ext, "application/octet-stream") + + +def _upload_attachments(conf, page_id, files_to_upload): + """Upload local image files as Confluence page attachments.""" + page_id = str(page_id) + for filename, file_path in files_to_upload: + file_path = Path(file_path) + if not file_path.is_file(): + continue try: - conf.attach_file( - str(path), - name=name, + content = file_path.read_bytes() + conf.attach_content( + content, + name=filename, + content_type=_content_type_for_filename(filename), page_id=page_id, - content_type="image/png", ) - uploaded.add(name) - print(f" ✓ Uploaded attachment: {name}") + print(f" ✓ Uploaded attachment: {filename}") except Exception as e: - print(f" Warning: could not upload {name}: {e}") - return uploaded + print(f" Warning: could not upload {filename}: {e}") -def replace_images_with_ac_macros(html_content, image_map): - """ - Replace tags whose src maps to an uploaded Confluence - attachment with Confluence storage-format macros. +# --------------------------------------------------------------------------- +# Confluence client + page operations +# --------------------------------------------------------------------------- + - image_map: dict of {original_ref_or_abs_path_str: attachment_name} +def init_confluence(confluence_url, confluence_user, confluence_token): + """Initialize and return a Confluence client.""" + return Confluence( + url=confluence_url, + username=confluence_user, + password=confluence_token, + cloud=True, + ) + + +def _update_confluence_page(conf, page_id, title, body_html, space_key, parent_id): + """Update a Confluence page via direct REST PUT. + + The atlassian-python-api update_page can trigger 400 ApiValueError on + Confluence Cloud when space is missing from the payload. We build the + PUT payload explicitly to avoid this. """ - def replace_img(m): - src = m.group(1) - alt = m.group(2) if m.group(2) else "" - # Try exact match first, then basename - name = image_map.get(src) or image_map.get(Path(src).name) - if name: - return ( - f'' - f'' - f'' + try: + hist = conf.history(page_id) + if hasattr(hist, "json"): + hist = hist.json() + version_num = hist.get("lastUpdated", {}).get("number", 1) + except Exception: + try: + page = conf.get_page_by_id(page_id, expand="version") + version_num = page.get("version", {}).get("number", 1) + except Exception: + version_num = 1 + + data = { + "id": page_id, + "type": "page", + "title": title, + "space": {"key": space_key}, + "version": {"number": version_num + 1, "minorEdit": False}, + "body": {"storage": {"value": body_html, "representation": "storage"}}, + } + if parent_id and str(parent_id) != str(page_id): + data["ancestors"] = [{"type": "page", "id": parent_id}] + + conf.put(f"rest/api/content/{page_id}", data=data, params={"status": "current"}) + + +def _find_page_any_status(conf, space_key, title): + """Search for a page by title across all statuses (current, draft, trashed, archived). + + The standard get_page_by_title only returns 'current' pages. Confluence + still enforces title uniqueness across drafts and trashed pages, so this + is needed to find a conflicting ghost page before retrying a create. + """ + for status in ("current", "draft", "trashed", "archived"): + try: + response = conf.get( + "rest/api/content", + params={ + "type": "page", + "spaceKey": space_key, + "title": title, + "status": status, + "limit": 1, + }, ) - return m.group(0) + results = response.get("results", []) if isinstance(response, dict) else [] + if results: + page = results[0] + page.setdefault("status", status) + return page + except Exception: + continue + return None - # Match both ... and ... - html_content = re.sub( - r']*/?>', - replace_img, - html_content, - ) - html_content = re.sub( - r']*/?>', - lambda m: replace_img(type('M', (), { - 'group': lambda self, n: m.group(2) if n == 1 else m.group(1), - '__call__': lambda self: None, - })()) if m.group(2) else m.group(0), - html_content, + +def get_or_create_root_page(conf, space_key, root_page_title): + """Return the root page id, creating the page if it doesn't exist.""" + print(f"Setting up root page: {root_page_title}") + root_page = conf.get_page_by_title(space=space_key, title=root_page_title) + if root_page: + print(f"✓ Found root page: {root_page_title}") + return root_page["id"] + print(f"Creating root page: {root_page_title}") + root_page = conf.create_page( + space=space_key, + title=root_page_title, + body="

This is the root documentation page. Content is auto-generated from GitHub.

", ) - return html_content + if not root_page: + raise RuntimeError(f"Failed to create root page: {root_page_title}") + print(f"✓ Created root page: {root_page_title}") + return root_page["id"] + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def prefixed(title, prefix): + """Prepend prefix to a page title, or return the title unchanged if prefix is empty.""" + return f"{prefix}{title}" if prefix else title + + +def folder_page_title(docs_dir, folder_path): + """ + Derive a space-unique Confluence page title for a folder. + + Confluence titles must be unique across the whole space, so we use the full + relative path rather than just the leaf name. Each path part has its numeric + prefix stripped and is title-cased, then parts are joined with ' / '. + + Examples: + docs/adr -> "Adr" + docs/poc/adr -> "Poc / Adr" + docs/00-intro -> "Intro" + docs/poc/00-intro -> "Poc / Intro" + """ + rel = folder_path.relative_to(docs_dir) + parts = [] + for part in rel.parts: + if part and part[0].isdigit() and "-" in part: + part = part.split("-", 1)[1] + parts.append(part.replace("-", " ").title()) + return " / ".join(parts) # --------------------------------------------------------------------------- # Confluence page hierarchy # --------------------------------------------------------------------------- + def get_or_create_folder_page( conf, space_key, @@ -228,7 +480,12 @@ def get_or_create_folder_page( docs_dir, confluence_prefix="", ): - """Get or create a page for a folder""" + """Get or create a Confluence page for a folder. + + Checks parent's children first, then falls back to a space-wide title + search. On title conflict caused by a ghost draft/trashed page, removes + the ghost and retries creation. + """ folder_key = str(folder_path) if folder_key in folder_pages: return folder_pages[folder_key] @@ -248,16 +505,47 @@ def get_or_create_folder_page( except Exception as e: print(f" Warning: Could not check children: {e}") + # Also search the whole space by title + existing = conf.get_page_by_title(space=space_key, title=title) + if existing: + print(f" ✓ Found folder page: {title} (id: {existing['id']})") + folder_pages[folder_key] = existing["id"] + return existing["id"] + # Create folder page if not found print(f" Creating folder page: {title} (under parent: {parent_id})") - folder_page = conf.create_page( - space=space_key, - title=title, - body=f"

This section contains documentation for {title}.

", - parent_id=parent_id, - ) - folder_pages[folder_key] = folder_page["id"] - return folder_page["id"] + try: + folder_page = conf.create_page( + space=space_key, + title=title, + body=f"

This section contains documentation for {title}.

", + parent_id=parent_id, + ) + folder_pages[folder_key] = folder_page["id"] + return folder_page["id"] + except Exception as exc: + if "already exists" not in str(exc).lower() and "title" not in str(exc).lower(): + raise + # Ghost page (draft/trashed) is blocking creation — find and clean it up + fallback = _find_page_any_status(conf, space_key, title) + if fallback: + fallback_status = fallback.get("status", "current") + fallback_id = fallback["id"] + if fallback_status in ("draft", "trashed"): + try: + conf.remove_page(fallback_id) + except Exception: + pass + folder_page = conf.create_page( + space=space_key, + title=title, + body=f"

This section contains documentation for {title}.

", + parent_id=parent_id, + ) + fallback_id = folder_page["id"] + folder_pages[folder_key] = fallback_id + return fallback_id + raise def get_nested_parent_id( @@ -269,7 +557,7 @@ def get_nested_parent_id( folder_pages, confluence_prefix="", ): - """Get parent page ID for nested folder structure""" + """Get parent page ID for nested folder structure.""" if rel_path.parent == Path("."): return root_page_id @@ -292,9 +580,234 @@ def get_nested_parent_id( # --------------------------------------------------------------------------- -# Main publisher +# Title derivation +# --------------------------------------------------------------------------- + + +def _title_for_md(md_file, rel_path, md_content): + """Derive a Confluence page title from a markdown file and its content.""" + if md_content.startswith("# "): + title = md_content.split("\n")[0].strip("# ") + else: + title = md_file.stem.replace("-", " ").title() + + if md_file.name == "index.md" and rel_path.parent != Path("."): + folder_name = rel_path.parent.name + if folder_name and folder_name[0].isdigit() and "-" in folder_name: + folder_name = folder_name.split("-", 1)[1] + folder_display = folder_name.replace("-", " ").title() + if title.lower() in ["index", "readme"]: + title = f"{folder_display} - Overview" + + return title + + +# --------------------------------------------------------------------------- +# Single file publish +# --------------------------------------------------------------------------- + + +def publish_single_file( + conf, + space_key, + md_file, + docs_dir, + root_page_id, + folder_pages, + confluence_prefix="", + tmp_dir=None, +): + """Publish (create or update) a single markdown file to Confluence. + + Handles the full pipeline: + - Mermaid rendering to PNG (via mmdc) with CloudScript macro fallback + - Frontmatter stripping and write-back of confluence_url/page_id + - Code block conversion to Confluence macros + - Local image upload as attachments + - Ghost page cleanup on title conflicts + - Direct REST PUT for page updates (avoids atlassian-python-api 400 bug) + """ + md_file = Path(md_file).resolve() + docs_dir = Path(docs_dir).resolve() + + try: + rel_path = md_file.relative_to(docs_dir) + except ValueError: + rel_path = Path(md_file.name) + + md_content = md_file.read_text(encoding="utf-8") + + # Strip frontmatter before conversion so it doesn't appear in page content + existing_meta, md_content = _fm_parse(md_content) + + # Render mermaid blocks to PNG; falls back to leaving block unchanged if mmdc unavailable + if tmp_dir: + md_content, mermaid_attachments = render_mermaid_diagrams(md_content, tmp_dir) + else: + mermaid_attachments = [] + + # Apply CloudScript macro fallback for any mermaid blocks that were not rendered + # (render_mermaid_diagrams leaves un-renderable blocks intact as fenced code) + md_content = convert_mermaid_blocks(md_content) + + title = prefixed(_title_for_md(md_file, rel_path, md_content), confluence_prefix) + + # Convert Markdown → HTML + html_content = markdown.markdown(md_content, extensions=["tables", "fenced_code"]) + + # Convert
 blocks to Confluence code macros
+    html_content = convert_code_blocks_for_confluence(html_content)
+
+    # Replace mermaid PNG  tags (abs paths) with ac:image macros before
+    # the generic local-image handler runs (which resolves paths relative to md_file.parent)
+    if mermaid_attachments:
+        html_content = _replace_mermaid_img_tags(html_content, mermaid_attachments)
+
+    # Replace remaining local image references with Confluence attachment macros
+    html_content, local_files_to_upload = _replace_local_images_with_attachment_macros(
+        html_content, md_file.parent
+    )
+
+    if not html_content or not html_content.strip():
+        html_content = "

" + + parent_id = get_nested_parent_id( + conf, + space_key, + rel_path, + docs_dir, + root_page_id, + folder_pages, + confluence_prefix, + ) + + existing = conf.get_page_by_title(space=space_key, title=title) + + if existing: + page_id = existing["id"] + _update_confluence_page( + conf, page_id, title, html_content, space_key, parent_id + ) + print(f" ✓ Updated: {title}") + else: + try: + created = conf.create_page( + space=space_key, + title=title, + body=html_content, + parent_id=parent_id, + ) + page_id = ( + created.get("id") + if isinstance(created, dict) + else getattr(created, "id", None) + ) + print(f" ✓ Created: {title}") + except Exception as exc: + if ( + "already exists" not in str(exc).lower() + and "title" not in str(exc).lower() + ): + raise + # Ghost page (draft/trashed) is blocking creation + fallback = _find_page_any_status(conf, space_key, title) + if fallback: + fallback_status = fallback.get("status", "current") + fallback_id = fallback["id"] + if fallback_status in ("draft", "trashed"): + try: + conf.remove_page(fallback_id) + except Exception: + pass + created = conf.create_page( + space=space_key, + title=title, + body=html_content, + parent_id=parent_id, + ) + page_id = ( + created.get("id") + if isinstance(created, dict) + else getattr(created, "id", None) + ) + print(f" ✓ Created (after ghost cleanup): {title}") + else: + page_id = fallback_id + _update_confluence_page( + conf, page_id, title, html_content, space_key, parent_id + ) + print(f" ✓ Updated (fallback): {title}") + else: + raise + + # Upload all attachments: local images + mermaid PNGs + all_files_to_upload = local_files_to_upload + [ + (name, path) for name, path in mermaid_attachments + ] + if all_files_to_upload and page_id: + _upload_attachments(conf, page_id, all_files_to_upload) + + # Write confluence_url and page_id back into the file's frontmatter + if page_id: + try: + page_info = conf.get_page_by_id(page_id, expand="") + webui = ( + page_info.get("_links", {}).get("webui", "") + if isinstance(page_info, dict) + else "" + ) + base_url = conf.url.rstrip("/") + target_url = ( + f"{base_url}/wiki{webui}" + if webui and not webui.startswith("http") + else webui + ) + if target_url: + existing_meta["confluence_url"] = target_url + existing_meta["page_id"] = str(page_id) + _fm_update_file(md_file, existing_meta) + except Exception: + pass + + # Register this page in the folder cache if a same-named sibling directory exists, + # so child pages nest under it rather than a separate placeholder page. + if page_id: + sibling_dir = md_file.parent / md_file.stem + if sibling_dir.is_dir(): + folder_pages[str(sibling_dir)] = page_id + + +# --------------------------------------------------------------------------- +# Sorting / filtering helpers # --------------------------------------------------------------------------- + +def _depth_then_index_first(p): + """Sort key: shallowest paths first, index.md before siblings.""" + return (len(p.parts), p.name != "index.md", str(p)) + + +def _remove_redundant_index_files(md_files): + """Remove index.md files that duplicate a sibling parent .md file. + + When foo/index.md exists alongside foo.md, the index is redundant. + """ + resolved_paths = {p.resolve() for p in md_files} + return [ + p + for p in md_files + if not ( + p.name == "index.md" + and (p.parent.parent / f"{p.parent.name}.md").resolve() in resolved_paths + ) + ] + + +# --------------------------------------------------------------------------- +# Main publish entry point +# --------------------------------------------------------------------------- + + def publish_docs( confluence_url, confluence_user, @@ -303,145 +816,59 @@ def publish_docs( docs_path, root_page_title, confluence_prefix="", + files=None, ): - """Publish all markdown files to Confluence""" - - conf = Confluence( - url=confluence_url, - username=confluence_user, - password=confluence_token, - cloud=True, - ) + """Publish all markdown files under docs_path to Confluence. - # Get or create root page + If *files* is given (a list of paths), only those files are published. + """ + conf = init_confluence(confluence_url, confluence_user, confluence_token) root_page_title_prefixed = prefixed(root_page_title, confluence_prefix) - print(f"Setting up root page: {root_page_title_prefixed}") - root_page = conf.get_page_by_title(space=space_key, title=root_page_title_prefixed) - if root_page: - print(f"✓ Found root page: {root_page_title_prefixed}") - root_page_id = root_page["id"] - else: - print(f"Creating root page: {root_page_title_prefixed}") - root_page = conf.create_page( - space=space_key, - title=root_page_title_prefixed, - body="

This is the root documentation page. Content is auto-generated from GitHub.

", - ) - if not root_page: - raise RuntimeError(f"Failed to create root page: {root_page_title_prefixed}") - print(f"✓ Created root page: {root_page_title_prefixed}") - root_page_id = root_page["id"] + root_page_id = get_or_create_root_page(conf, space_key, root_page_title_prefixed) folder_pages = {} docs_dir = Path(docs_path) - md_files = list(docs_dir.rglob("*.md")) - def sort_key(p): - is_index = p.name == "index.md" - depth = len(p.parts) - return (not is_index, depth, str(p)) + if files: + md_files = [Path(f) for f in files] + else: + md_files = list(docs_dir.rglob("*.md")) + + def sort_key(p): + is_index = p.name == "index.md" + depth = len(p.parts) + return (not is_index, depth, str(p)) + + md_files.sort(key=sort_key) + md_files = _remove_redundant_index_files(md_files) - md_files.sort(key=sort_key) print(f"\nPublishing {len(md_files)} files...\n") with tempfile.TemporaryDirectory() as tmp_dir: for md_file in md_files: if "template" in md_file.name.lower(): - print(f"Skipping template: {md_file.relative_to(docs_dir)}") + print( + f"Skipping template: {md_file.relative_to(docs_dir) if md_file.is_relative_to(docs_dir) else md_file.name}" + ) continue - rel_path = md_file.relative_to(docs_dir) - print(f"\nPublishing {rel_path}...") - - with open(md_file, "r", encoding="utf-8") as f: - md_content = f.read() - - # --- Mermaid: render diagrams to PNG, replace blocks with img refs --- - md_content, mermaid_attachments = render_mermaid_diagrams(md_content, tmp_dir) - - # --- Collect local images referenced in markdown --- - local_images = collect_local_images(md_content, md_file) - all_attachments = mermaid_attachments + [ - (Path(ref).name, abs_path) for _, abs_path, ref in local_images - ] - - # --- Extract title --- - if md_content.startswith("# "): - title = md_content.split("\n")[0].strip("# ") - else: - title = md_file.stem.replace("-", " ").title() - - if md_file.name == "index.md" and rel_path.parent != Path("."): - folder_name = rel_path.parent.name - if folder_name and folder_name[0].isdigit() and "-" in folder_name: - folder_name = folder_name.split("-", 1)[1] - folder_display = folder_name.replace("-", " ").title() - if title.lower() in ["index", "readme"]: - title = f"{folder_display} - Overview" - - title = prefixed(title, confluence_prefix) - - # --- Convert markdown to HTML --- - md_content_processed = convert_mermaid_blocks(md_content) - html_content = markdown.markdown( - md_content_processed, extensions=["tables", "fenced_code"] + rel = ( + md_file.relative_to(docs_dir) + if md_file.is_relative_to(docs_dir) + else md_file ) - - # --- Determine parent page --- - parent_id = get_nested_parent_id( + print(f"\nPublishing {rel}...") + publish_single_file( conf, space_key, - rel_path, + md_file, docs_dir, root_page_id, folder_pages, confluence_prefix, + tmp_dir=tmp_dir, ) - # --- Create or update page first (need page_id for attachments) --- - existing = conf.get_page_by_title(space=space_key, title=title) - if existing: - page_id = existing["id"] - else: - new_page = conf.create_page( - space=space_key, - title=title, - body="

Placeholder — content being uploaded.

", - parent_id=parent_id, - ) - page_id = new_page["id"] - print(f" ✓ Created page: {title}") - - # --- Upload attachments (mermaid PNGs + local images) --- - if all_attachments: - uploaded = upload_attachments(conf, page_id, all_attachments) - else: - uploaded = set() - - # --- Build image_map for src→attachment_name replacement --- - # Keys: original markdown ref strings and absolute path strings - image_map = {} - for name, path in mermaid_attachments: - image_map[str(path)] = name - for _, abs_path, ref in local_images: - att_name = Path(ref).name - if att_name in uploaded: - image_map[ref] = att_name - image_map[str(abs_path)] = att_name - - # --- Replace tags with ac:image macros --- - if image_map: - html_content = replace_images_with_ac_macros(html_content, image_map) - - # --- Final update with real content --- - conf.update_page( - page_id=page_id, - title=title, - body=html_content, - parent_id=parent_id, - ) - print(f" ✓ Updated: {title}") - print("\n✓ All pages published successfully!")