diff --git a/.github/actions/publish-to-confluence/publish.py b/.github/actions/publish-to-confluence/publish.py index 4894871..a95cc8f 100644 --- a/.github/actions/publish-to-confluence/publish.py +++ b/.github/actions/publish-to-confluence/publish.py @@ -4,33 +4,135 @@ Preserves folder structure as page hierarchy. Supports: - - Mermaid diagrams: converted to Confluence CloudScript macro (cloudscript-mermaid) - - Local images: uploaded as Confluence attachments - - Tables, fenced code blocks + - Mermaid diagrams: rendered to PNG via mmdc, uploaded as attachments; + falls back to Confluence CloudScript macro if mmdc is unavailable + - Local images: uploaded as Confluence attachments (png/jpg/gif/svg/webp/bmp/ico) + - Tables, fenced code blocks with syntax highlighting + - YAML frontmatter: stripped before publish, confluence_url/page_id written back """ +import html +import logging import os import re import subprocess import sys import tempfile from pathlib import Path +from urllib.parse import unquote import markdown from atlassian import Confluence +log = logging.getLogger(__name__) # --------------------------------------------------------------------------- -# Mermaid macro (Confluence CloudScript app) +# Language map: fenced-code identifier → Confluence code macro language # --------------------------------------------------------------------------- +LANGUAGE_MAP = { + "python": "python", + "py": "python", + "javascript": "javascript", + "js": "javascript", + "typescript": "typescript", + "ts": "typescript", + "java": "java", + "bash": "bash", + "sh": "bash", + "shell": "bash", + "zsh": "bash", + "sql": "sql", + "json": "javascript", + "xml": "xml", + "html": "html", + "css": "css", + "yaml": "yaml", + "yml": "yaml", + "ruby": "ruby", + "rb": "ruby", + "go": "go", + "rust": "rust", + "c": "c", + "cpp": "cpp", + "c++": "cpp", + "csharp": "csharp", + "cs": "csharp", + "php": "php", + "scala": "scala", + "terraform": "text", + "tf": "text", + "hcl": "text", + "dockerfile": "bash", + "groovy": "groovy", + "powershell": "powershell", + "ps1": "powershell", + "r": "r", + "perl": "perl", + "swift": "swift", + "kotlin": "kotlin", +} + +# --------------------------------------------------------------------------- +# Frontmatter helpers +# --------------------------------------------------------------------------- + +_FRONTMATTER_RE = re.compile(r"^---\r?\n(.*?)\r?\n---\r?\n", re.DOTALL) + + +def _fm_parse(text): + """Return (meta_dict, body) stripping YAML frontmatter from *text*. + + If no frontmatter is present returns ({}, text) unchanged. + """ + m = _FRONTMATTER_RE.match(text) + if not m: + return {}, text + meta = {} + for line in m.group(1).splitlines(): + if ":" in line: + key, _, value = line.partition(":") + meta[key.strip()] = value.strip().strip('"') + return meta, text[m.end() :] + + +def _fm_dump(meta, body): + """Return markdown text with *meta* serialised as YAML frontmatter.""" + if not meta: + return body + lines = ["---"] + for key, value in meta.items(): + if any(c in str(value) for c in (":", "#", "[", "]", "{", "}")): + lines.append(f'{key}: "{value}"') + else: + lines.append(f"{key}: {value}") + lines.append("---") + lines.append("") + return "\n".join(lines) + body + + +def _fm_update_file(path, updates): + """Merge *updates* into the frontmatter of *path*, preserving existing keys.""" + text = path.read_text(encoding="utf-8") + meta, body = _fm_parse(text) + meta.update(updates) + path.write_text(_fm_dump(meta, body), encoding="utf-8") + + +# --------------------------------------------------------------------------- +# Mermaid macro (Confluence CloudScript app fallback) +# --------------------------------------------------------------------------- + + def convert_mermaid_blocks(md_content): """ Replace ```mermaid ... ``` fenced blocks with Confluence cloudscript-mermaid macro HTML before the standard markdown conversion runs. Requires the 'CloudScript.io Mermaid' app to be installed in Confluence. + Used as a fallback when mmdc rendering is not available. """ pattern = re.compile(r"```mermaid\s*\n(.*?)```", re.DOTALL) + def replace(match): diagram = match.group(1).strip() return ( @@ -40,53 +142,63 @@ def replace(match): "" "" ) + return pattern.sub(replace, md_content) # --------------------------------------------------------------------------- -# Helpers +# Code block conversion # --------------------------------------------------------------------------- -def prefixed(title, prefix): - """Prepend prefix to a page title, or return the title unchanged if prefix is empty.""" - return f"{prefix}{title}" if prefix else title +def _replace_code_block(match): + """Replace a single
block with a Confluence code macro."""
+ lang_attr = match.group(1) or ""
+ code_body = match.group(2)
-def folder_page_title(docs_dir, folder_path):
- """
- Derive a space-unique Confluence page title for a folder.
+ lang = ""
+ lang_match = re.search(r'language-([^\s"\']+)', lang_attr)
+ if lang_match:
+ lang = lang_match.group(1).lower()
- Confluence titles must be unique across the whole space, so we use the full
- relative path rather than just the leaf name. Each path part has its numeric
- prefix stripped and is title-cased, then parts are joined with ' / '.
+ confluence_lang = LANGUAGE_MAP.get(lang, lang) if lang else "none"
+ plain_code = html.unescape(code_body)
- Examples:
- docs/adr -> "Adr"
- docs/poc/adr -> "Poc / Adr"
- docs/00-intro -> "Intro"
- docs/poc/00-intro -> "Poc / Intro"
- """
- rel = folder_path.relative_to(docs_dir)
- parts = []
- for part in rel.parts:
- if part and part[0].isdigit() and "-" in part:
- part = part.split("-", 1)[1]
- parts.append(part.replace("-", " ").title())
- return " / ".join(parts)
+ return (
+ ''
+ f'{confluence_lang} '
+ 'true '
+ ""
+ f""
+ " "
+ " "
+ )
+
+
+def convert_code_blocks_for_confluence(html_content):
+ """Convert blocks into Confluence ac:structured-macro code blocks."""
+ pattern = re.compile(
+ r"]*))?>(.*?)
",
+ re.DOTALL,
+ )
+ return pattern.sub(_replace_code_block, html_content)
# ---------------------------------------------------------------------------
# Mermaid rendering
# ---------------------------------------------------------------------------
+
def render_mermaid_diagrams(md_content, tmp_dir):
"""
Find all ```mermaid ... ``` blocks in the markdown, render each to a PNG
file in tmp_dir using mmdc, and return:
- - modified md_content with blocks replaced by 
+ - modified md_content with blocks replaced by 
- list of (attachment_name, png_path) tuples for later upload
+
+ Falls back to leaving the block unchanged if mmdc is not available.
"""
- pattern = re.compile(r'```mermaid\s*\n(.*?)\n```', re.DOTALL)
+ pattern = re.compile(r"```mermaid\s*\n(.*?)\n```", re.DOTALL)
attachments = []
counter = [0]
@@ -99,7 +211,6 @@ def replace_block(m):
puppeteer_cfg = Path(tmp_dir) / "puppeteer-config.json"
mmd_file.write_text(diagram_src, encoding="utf-8")
- # Write puppeteer config once (idempotent)
if not puppeteer_cfg.exists():
puppeteer_cfg.write_text('{"args": ["--no-sandbox"]}', encoding="utf-8")
@@ -107,27 +218,29 @@ def replace_block(m):
subprocess.run(
[
"mmdc",
- "-i", str(mmd_file),
- "-o", str(png_file),
- "--backgroundColor", "white",
- "--puppeteerConfigFile", str(puppeteer_cfg),
+ "-i",
+ str(mmd_file),
+ "-o",
+ str(png_file),
+ "--backgroundColor",
+ "white",
+ "--puppeteerConfigFile",
+ str(puppeteer_cfg),
],
check=True,
capture_output=True,
)
attachment_name = f"mermaid-{idx}.png"
attachments.append((attachment_name, png_file))
- # Replace fenced block with a local image reference that will be
- # picked up by the image-upload step below.
return f""
except subprocess.CalledProcessError as e:
stderr = e.stderr.decode() if e.stderr else ""
print(f" Warning: mmdc failed for diagram {idx}: {stderr.strip()}")
- # Fall back: keep the block as a fenced code block so at least
- # the source is visible.
return m.group(0)
except FileNotFoundError:
- print(" Warning: mmdc not found — Mermaid diagrams will not be rendered.")
+ print(
+ " Warning: mmdc not found — Mermaid diagrams will not be rendered as PNG."
+ )
return m.group(0)
modified = pattern.sub(replace_block, md_content)
@@ -138,87 +251,226 @@ def replace_block(m):
# Image handling
# ---------------------------------------------------------------------------
-def collect_local_images(md_content, md_file_path):
- """
- Find all local image references in markdown () and return a
- list of (alt, abs_path, original_ref) tuples. Remote URLs are skipped.
- """
- pattern = re.compile(r'!\[([^\]]*)\]\(([^)]+)\)')
- images = []
- for m in pattern.finditer(md_content):
- alt, ref = m.group(1), m.group(2)
- if ref.startswith("http://") or ref.startswith("https://"):
- continue
- abs_path = (md_file_path.parent / ref).resolve()
- if abs_path.exists():
- images.append((alt, abs_path, ref))
- else:
- print(f" Warning: image not found, skipping: {ref}")
- return images
+_IMG_SRC_PATTERN = re.compile(
+ r'
]*\s+)?src="([^"]+)"([^>]*)>',
+ re.IGNORECASE,
+)
+
+def _replace_local_images_with_attachment_macros(html_content, md_file_dir):
+ """Replace local image refs with Confluence attachment macros.
-def upload_attachments(conf, page_id, attachments):
+ Returns (modified_html, list_of_(filename, absolute_path)) for upload.
+ Remote URLs and data URIs are left unchanged.
"""
- Upload a list of (attachment_name, file_path) to a Confluence page.
- Returns a set of successfully uploaded attachment names.
+ to_upload = []
+
+ def replace_one(match):
+ src = match.group(2).strip()
+ if src.startswith(("http://", "https://", "data:", "//")):
+ return match.group(0)
+ path_part = unquote(src.lstrip("./"))
+ if not path_part:
+ return match.group(0)
+ local_path = (md_file_dir / path_part).resolve()
+ if not local_path.is_file():
+ return match.group(0)
+ filename = local_path.name
+ to_upload.append((filename, local_path))
+ return f' '
+
+ return _IMG_SRC_PATTERN.sub(replace_one, html_content), to_upload
+
+
+def _replace_mermaid_img_tags(html_content, mermaid_attachments):
+ """Replace
tags produced by render_mermaid_diagrams
+ with Confluence macros.
+
+ render_mermaid_diagrams replaces mermaid blocks with .
+ After markdown conversion those become
tags which
+ cannot be resolved relative to md_file.parent. We handle them explicitly
+ here using the known (name, abs_path) pairs before the generic image handler runs.
"""
- uploaded = set()
- for name, path in attachments:
+ for name, abs_path in mermaid_attachments:
+ escaped_name = html.escape(name)
+ macro = f' '
+ # match the img tag with this exact src
+ html_content = re.sub(
+ r'
]*src="' + re.escape(str(abs_path)) + r'"[^>]*/?>',
+ macro,
+ html_content,
+ )
+ return html_content
+
+
+def _content_type_for_filename(filename):
+ ext = Path(filename).suffix.lower()
+ return {
+ ".png": "image/png",
+ ".jpg": "image/jpeg",
+ ".jpeg": "image/jpeg",
+ ".gif": "image/gif",
+ ".svg": "image/svg+xml",
+ ".webp": "image/webp",
+ ".bmp": "image/bmp",
+ ".ico": "image/x-icon",
+ }.get(ext, "application/octet-stream")
+
+
+def _upload_attachments(conf, page_id, files_to_upload):
+ """Upload local image files as Confluence page attachments."""
+ page_id = str(page_id)
+ for filename, file_path in files_to_upload:
+ file_path = Path(file_path)
+ if not file_path.is_file():
+ continue
try:
- conf.attach_file(
- str(path),
- name=name,
+ content = file_path.read_bytes()
+ conf.attach_content(
+ content,
+ name=filename,
+ content_type=_content_type_for_filename(filename),
page_id=page_id,
- content_type="image/png",
)
- uploaded.add(name)
- print(f" ✓ Uploaded attachment: {name}")
+ print(f" ✓ Uploaded attachment: {filename}")
except Exception as e:
- print(f" Warning: could not upload {name}: {e}")
- return uploaded
+ print(f" Warning: could not upload {filename}: {e}")
-def replace_images_with_ac_macros(html_content, image_map):
- """
- Replace
tags whose src maps to an uploaded Confluence
- attachment with Confluence storage-format macros.
+# ---------------------------------------------------------------------------
+# Confluence client + page operations
+# ---------------------------------------------------------------------------
+
- image_map: dict of {original_ref_or_abs_path_str: attachment_name}
+def init_confluence(confluence_url, confluence_user, confluence_token):
+ """Initialize and return a Confluence client."""
+ return Confluence(
+ url=confluence_url,
+ username=confluence_user,
+ password=confluence_token,
+ cloud=True,
+ )
+
+
+def _update_confluence_page(conf, page_id, title, body_html, space_key, parent_id):
+ """Update a Confluence page via direct REST PUT.
+
+ The atlassian-python-api update_page can trigger 400 ApiValueError on
+ Confluence Cloud when space is missing from the payload. We build the
+ PUT payload explicitly to avoid this.
"""
- def replace_img(m):
- src = m.group(1)
- alt = m.group(2) if m.group(2) else ""
- # Try exact match first, then basename
- name = image_map.get(src) or image_map.get(Path(src).name)
- if name:
- return (
- f''
- f' '
- f' '
+ try:
+ hist = conf.history(page_id)
+ if hasattr(hist, "json"):
+ hist = hist.json()
+ version_num = hist.get("lastUpdated", {}).get("number", 1)
+ except Exception:
+ try:
+ page = conf.get_page_by_id(page_id, expand="version")
+ version_num = page.get("version", {}).get("number", 1)
+ except Exception:
+ version_num = 1
+
+ data = {
+ "id": page_id,
+ "type": "page",
+ "title": title,
+ "space": {"key": space_key},
+ "version": {"number": version_num + 1, "minorEdit": False},
+ "body": {"storage": {"value": body_html, "representation": "storage"}},
+ }
+ if parent_id and str(parent_id) != str(page_id):
+ data["ancestors"] = [{"type": "page", "id": parent_id}]
+
+ conf.put(f"rest/api/content/{page_id}", data=data, params={"status": "current"})
+
+
+def _find_page_any_status(conf, space_key, title):
+ """Search for a page by title across all statuses (current, draft, trashed, archived).
+
+ The standard get_page_by_title only returns 'current' pages. Confluence
+ still enforces title uniqueness across drafts and trashed pages, so this
+ is needed to find a conflicting ghost page before retrying a create.
+ """
+ for status in ("current", "draft", "trashed", "archived"):
+ try:
+ response = conf.get(
+ "rest/api/content",
+ params={
+ "type": "page",
+ "spaceKey": space_key,
+ "title": title,
+ "status": status,
+ "limit": 1,
+ },
)
- return m.group(0)
+ results = response.get("results", []) if isinstance(response, dict) else []
+ if results:
+ page = results[0]
+ page.setdefault("status", status)
+ return page
+ except Exception:
+ continue
+ return None
- # Match both
and
- html_content = re.sub(
- r'
]*/?>',
- replace_img,
- html_content,
- )
- html_content = re.sub(
- r'
]*/?>',
- lambda m: replace_img(type('M', (), {
- 'group': lambda self, n: m.group(2) if n == 1 else m.group(1),
- '__call__': lambda self: None,
- })()) if m.group(2) else m.group(0),
- html_content,
+
+def get_or_create_root_page(conf, space_key, root_page_title):
+ """Return the root page id, creating the page if it doesn't exist."""
+ print(f"Setting up root page: {root_page_title}")
+ root_page = conf.get_page_by_title(space=space_key, title=root_page_title)
+ if root_page:
+ print(f"✓ Found root page: {root_page_title}")
+ return root_page["id"]
+ print(f"Creating root page: {root_page_title}")
+ root_page = conf.create_page(
+ space=space_key,
+ title=root_page_title,
+ body="This is the root documentation page. Content is auto-generated from GitHub.
",
)
- return html_content
+ if not root_page:
+ raise RuntimeError(f"Failed to create root page: {root_page_title}")
+ print(f"✓ Created root page: {root_page_title}")
+ return root_page["id"]
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def prefixed(title, prefix):
+ """Prepend prefix to a page title, or return the title unchanged if prefix is empty."""
+ return f"{prefix}{title}" if prefix else title
+
+
+def folder_page_title(docs_dir, folder_path):
+ """
+ Derive a space-unique Confluence page title for a folder.
+
+ Confluence titles must be unique across the whole space, so we use the full
+ relative path rather than just the leaf name. Each path part has its numeric
+ prefix stripped and is title-cased, then parts are joined with ' / '.
+
+ Examples:
+ docs/adr -> "Adr"
+ docs/poc/adr -> "Poc / Adr"
+ docs/00-intro -> "Intro"
+ docs/poc/00-intro -> "Poc / Intro"
+ """
+ rel = folder_path.relative_to(docs_dir)
+ parts = []
+ for part in rel.parts:
+ if part and part[0].isdigit() and "-" in part:
+ part = part.split("-", 1)[1]
+ parts.append(part.replace("-", " ").title())
+ return " / ".join(parts)
# ---------------------------------------------------------------------------
# Confluence page hierarchy
# ---------------------------------------------------------------------------
+
def get_or_create_folder_page(
conf,
space_key,
@@ -228,7 +480,12 @@ def get_or_create_folder_page(
docs_dir,
confluence_prefix="",
):
- """Get or create a page for a folder"""
+ """Get or create a Confluence page for a folder.
+
+ Checks parent's children first, then falls back to a space-wide title
+ search. On title conflict caused by a ghost draft/trashed page, removes
+ the ghost and retries creation.
+ """
folder_key = str(folder_path)
if folder_key in folder_pages:
return folder_pages[folder_key]
@@ -248,16 +505,47 @@ def get_or_create_folder_page(
except Exception as e:
print(f" Warning: Could not check children: {e}")
+ # Also search the whole space by title
+ existing = conf.get_page_by_title(space=space_key, title=title)
+ if existing:
+ print(f" ✓ Found folder page: {title} (id: {existing['id']})")
+ folder_pages[folder_key] = existing["id"]
+ return existing["id"]
+
# Create folder page if not found
print(f" Creating folder page: {title} (under parent: {parent_id})")
- folder_page = conf.create_page(
- space=space_key,
- title=title,
- body=f"This section contains documentation for {title}.
",
- parent_id=parent_id,
- )
- folder_pages[folder_key] = folder_page["id"]
- return folder_page["id"]
+ try:
+ folder_page = conf.create_page(
+ space=space_key,
+ title=title,
+ body=f"This section contains documentation for {title}.
",
+ parent_id=parent_id,
+ )
+ folder_pages[folder_key] = folder_page["id"]
+ return folder_page["id"]
+ except Exception as exc:
+ if "already exists" not in str(exc).lower() and "title" not in str(exc).lower():
+ raise
+ # Ghost page (draft/trashed) is blocking creation — find and clean it up
+ fallback = _find_page_any_status(conf, space_key, title)
+ if fallback:
+ fallback_status = fallback.get("status", "current")
+ fallback_id = fallback["id"]
+ if fallback_status in ("draft", "trashed"):
+ try:
+ conf.remove_page(fallback_id)
+ except Exception:
+ pass
+ folder_page = conf.create_page(
+ space=space_key,
+ title=title,
+ body=f"This section contains documentation for {title}.
",
+ parent_id=parent_id,
+ )
+ fallback_id = folder_page["id"]
+ folder_pages[folder_key] = fallback_id
+ return fallback_id
+ raise
def get_nested_parent_id(
@@ -269,7 +557,7 @@ def get_nested_parent_id(
folder_pages,
confluence_prefix="",
):
- """Get parent page ID for nested folder structure"""
+ """Get parent page ID for nested folder structure."""
if rel_path.parent == Path("."):
return root_page_id
@@ -292,9 +580,234 @@ def get_nested_parent_id(
# ---------------------------------------------------------------------------
-# Main publisher
+# Title derivation
+# ---------------------------------------------------------------------------
+
+
+def _title_for_md(md_file, rel_path, md_content):
+ """Derive a Confluence page title from a markdown file and its content."""
+ if md_content.startswith("# "):
+ title = md_content.split("\n")[0].strip("# ")
+ else:
+ title = md_file.stem.replace("-", " ").title()
+
+ if md_file.name == "index.md" and rel_path.parent != Path("."):
+ folder_name = rel_path.parent.name
+ if folder_name and folder_name[0].isdigit() and "-" in folder_name:
+ folder_name = folder_name.split("-", 1)[1]
+ folder_display = folder_name.replace("-", " ").title()
+ if title.lower() in ["index", "readme"]:
+ title = f"{folder_display} - Overview"
+
+ return title
+
+
+# ---------------------------------------------------------------------------
+# Single file publish
+# ---------------------------------------------------------------------------
+
+
+def publish_single_file(
+ conf,
+ space_key,
+ md_file,
+ docs_dir,
+ root_page_id,
+ folder_pages,
+ confluence_prefix="",
+ tmp_dir=None,
+):
+ """Publish (create or update) a single markdown file to Confluence.
+
+ Handles the full pipeline:
+ - Mermaid rendering to PNG (via mmdc) with CloudScript macro fallback
+ - Frontmatter stripping and write-back of confluence_url/page_id
+ - Code block conversion to Confluence macros
+ - Local image upload as attachments
+ - Ghost page cleanup on title conflicts
+ - Direct REST PUT for page updates (avoids atlassian-python-api 400 bug)
+ """
+ md_file = Path(md_file).resolve()
+ docs_dir = Path(docs_dir).resolve()
+
+ try:
+ rel_path = md_file.relative_to(docs_dir)
+ except ValueError:
+ rel_path = Path(md_file.name)
+
+ md_content = md_file.read_text(encoding="utf-8")
+
+ # Strip frontmatter before conversion so it doesn't appear in page content
+ existing_meta, md_content = _fm_parse(md_content)
+
+ # Render mermaid blocks to PNG; falls back to leaving block unchanged if mmdc unavailable
+ if tmp_dir:
+ md_content, mermaid_attachments = render_mermaid_diagrams(md_content, tmp_dir)
+ else:
+ mermaid_attachments = []
+
+ # Apply CloudScript macro fallback for any mermaid blocks that were not rendered
+ # (render_mermaid_diagrams leaves un-renderable blocks intact as fenced code)
+ md_content = convert_mermaid_blocks(md_content)
+
+ title = prefixed(_title_for_md(md_file, rel_path, md_content), confluence_prefix)
+
+ # Convert Markdown → HTML
+ html_content = markdown.markdown(md_content, extensions=["tables", "fenced_code"])
+
+ # Convert blocks to Confluence code macros
+ html_content = convert_code_blocks_for_confluence(html_content)
+
+ # Replace mermaid PNG
tags (abs paths) with ac:image macros before
+ # the generic local-image handler runs (which resolves paths relative to md_file.parent)
+ if mermaid_attachments:
+ html_content = _replace_mermaid_img_tags(html_content, mermaid_attachments)
+
+ # Replace remaining local image references with Confluence attachment macros
+ html_content, local_files_to_upload = _replace_local_images_with_attachment_macros(
+ html_content, md_file.parent
+ )
+
+ if not html_content or not html_content.strip():
+ html_content = ""
+
+ parent_id = get_nested_parent_id(
+ conf,
+ space_key,
+ rel_path,
+ docs_dir,
+ root_page_id,
+ folder_pages,
+ confluence_prefix,
+ )
+
+ existing = conf.get_page_by_title(space=space_key, title=title)
+
+ if existing:
+ page_id = existing["id"]
+ _update_confluence_page(
+ conf, page_id, title, html_content, space_key, parent_id
+ )
+ print(f" ✓ Updated: {title}")
+ else:
+ try:
+ created = conf.create_page(
+ space=space_key,
+ title=title,
+ body=html_content,
+ parent_id=parent_id,
+ )
+ page_id = (
+ created.get("id")
+ if isinstance(created, dict)
+ else getattr(created, "id", None)
+ )
+ print(f" ✓ Created: {title}")
+ except Exception as exc:
+ if (
+ "already exists" not in str(exc).lower()
+ and "title" not in str(exc).lower()
+ ):
+ raise
+ # Ghost page (draft/trashed) is blocking creation
+ fallback = _find_page_any_status(conf, space_key, title)
+ if fallback:
+ fallback_status = fallback.get("status", "current")
+ fallback_id = fallback["id"]
+ if fallback_status in ("draft", "trashed"):
+ try:
+ conf.remove_page(fallback_id)
+ except Exception:
+ pass
+ created = conf.create_page(
+ space=space_key,
+ title=title,
+ body=html_content,
+ parent_id=parent_id,
+ )
+ page_id = (
+ created.get("id")
+ if isinstance(created, dict)
+ else getattr(created, "id", None)
+ )
+ print(f" ✓ Created (after ghost cleanup): {title}")
+ else:
+ page_id = fallback_id
+ _update_confluence_page(
+ conf, page_id, title, html_content, space_key, parent_id
+ )
+ print(f" ✓ Updated (fallback): {title}")
+ else:
+ raise
+
+ # Upload all attachments: local images + mermaid PNGs
+ all_files_to_upload = local_files_to_upload + [
+ (name, path) for name, path in mermaid_attachments
+ ]
+ if all_files_to_upload and page_id:
+ _upload_attachments(conf, page_id, all_files_to_upload)
+
+ # Write confluence_url and page_id back into the file's frontmatter
+ if page_id:
+ try:
+ page_info = conf.get_page_by_id(page_id, expand="")
+ webui = (
+ page_info.get("_links", {}).get("webui", "")
+ if isinstance(page_info, dict)
+ else ""
+ )
+ base_url = conf.url.rstrip("/")
+ target_url = (
+ f"{base_url}/wiki{webui}"
+ if webui and not webui.startswith("http")
+ else webui
+ )
+ if target_url:
+ existing_meta["confluence_url"] = target_url
+ existing_meta["page_id"] = str(page_id)
+ _fm_update_file(md_file, existing_meta)
+ except Exception:
+ pass
+
+ # Register this page in the folder cache if a same-named sibling directory exists,
+ # so child pages nest under it rather than a separate placeholder page.
+ if page_id:
+ sibling_dir = md_file.parent / md_file.stem
+ if sibling_dir.is_dir():
+ folder_pages[str(sibling_dir)] = page_id
+
+
+# ---------------------------------------------------------------------------
+# Sorting / filtering helpers
# ---------------------------------------------------------------------------
+
+def _depth_then_index_first(p):
+ """Sort key: shallowest paths first, index.md before siblings."""
+ return (len(p.parts), p.name != "index.md", str(p))
+
+
+def _remove_redundant_index_files(md_files):
+ """Remove index.md files that duplicate a sibling parent .md file.
+
+ When foo/index.md exists alongside foo.md, the index is redundant.
+ """
+ resolved_paths = {p.resolve() for p in md_files}
+ return [
+ p
+ for p in md_files
+ if not (
+ p.name == "index.md"
+ and (p.parent.parent / f"{p.parent.name}.md").resolve() in resolved_paths
+ )
+ ]
+
+
+# ---------------------------------------------------------------------------
+# Main publish entry point
+# ---------------------------------------------------------------------------
+
+
def publish_docs(
confluence_url,
confluence_user,
@@ -303,145 +816,59 @@ def publish_docs(
docs_path,
root_page_title,
confluence_prefix="",
+ files=None,
):
- """Publish all markdown files to Confluence"""
-
- conf = Confluence(
- url=confluence_url,
- username=confluence_user,
- password=confluence_token,
- cloud=True,
- )
+ """Publish all markdown files under docs_path to Confluence.
- # Get or create root page
+ If *files* is given (a list of paths), only those files are published.
+ """
+ conf = init_confluence(confluence_url, confluence_user, confluence_token)
root_page_title_prefixed = prefixed(root_page_title, confluence_prefix)
- print(f"Setting up root page: {root_page_title_prefixed}")
- root_page = conf.get_page_by_title(space=space_key, title=root_page_title_prefixed)
- if root_page:
- print(f"✓ Found root page: {root_page_title_prefixed}")
- root_page_id = root_page["id"]
- else:
- print(f"Creating root page: {root_page_title_prefixed}")
- root_page = conf.create_page(
- space=space_key,
- title=root_page_title_prefixed,
- body="This is the root documentation page. Content is auto-generated from GitHub.
",
- )
- if not root_page:
- raise RuntimeError(f"Failed to create root page: {root_page_title_prefixed}")
- print(f"✓ Created root page: {root_page_title_prefixed}")
- root_page_id = root_page["id"]
+ root_page_id = get_or_create_root_page(conf, space_key, root_page_title_prefixed)
folder_pages = {}
docs_dir = Path(docs_path)
- md_files = list(docs_dir.rglob("*.md"))
- def sort_key(p):
- is_index = p.name == "index.md"
- depth = len(p.parts)
- return (not is_index, depth, str(p))
+ if files:
+ md_files = [Path(f) for f in files]
+ else:
+ md_files = list(docs_dir.rglob("*.md"))
+
+ def sort_key(p):
+ is_index = p.name == "index.md"
+ depth = len(p.parts)
+ return (not is_index, depth, str(p))
+
+ md_files.sort(key=sort_key)
+ md_files = _remove_redundant_index_files(md_files)
- md_files.sort(key=sort_key)
print(f"\nPublishing {len(md_files)} files...\n")
with tempfile.TemporaryDirectory() as tmp_dir:
for md_file in md_files:
if "template" in md_file.name.lower():
- print(f"Skipping template: {md_file.relative_to(docs_dir)}")
+ print(
+ f"Skipping template: {md_file.relative_to(docs_dir) if md_file.is_relative_to(docs_dir) else md_file.name}"
+ )
continue
- rel_path = md_file.relative_to(docs_dir)
- print(f"\nPublishing {rel_path}...")
-
- with open(md_file, "r", encoding="utf-8") as f:
- md_content = f.read()
-
- # --- Mermaid: render diagrams to PNG, replace blocks with img refs ---
- md_content, mermaid_attachments = render_mermaid_diagrams(md_content, tmp_dir)
-
- # --- Collect local images referenced in markdown ---
- local_images = collect_local_images(md_content, md_file)
- all_attachments = mermaid_attachments + [
- (Path(ref).name, abs_path) for _, abs_path, ref in local_images
- ]
-
- # --- Extract title ---
- if md_content.startswith("# "):
- title = md_content.split("\n")[0].strip("# ")
- else:
- title = md_file.stem.replace("-", " ").title()
-
- if md_file.name == "index.md" and rel_path.parent != Path("."):
- folder_name = rel_path.parent.name
- if folder_name and folder_name[0].isdigit() and "-" in folder_name:
- folder_name = folder_name.split("-", 1)[1]
- folder_display = folder_name.replace("-", " ").title()
- if title.lower() in ["index", "readme"]:
- title = f"{folder_display} - Overview"
-
- title = prefixed(title, confluence_prefix)
-
- # --- Convert markdown to HTML ---
- md_content_processed = convert_mermaid_blocks(md_content)
- html_content = markdown.markdown(
- md_content_processed, extensions=["tables", "fenced_code"]
+ rel = (
+ md_file.relative_to(docs_dir)
+ if md_file.is_relative_to(docs_dir)
+ else md_file
)
-
- # --- Determine parent page ---
- parent_id = get_nested_parent_id(
+ print(f"\nPublishing {rel}...")
+ publish_single_file(
conf,
space_key,
- rel_path,
+ md_file,
docs_dir,
root_page_id,
folder_pages,
confluence_prefix,
+ tmp_dir=tmp_dir,
)
- # --- Create or update page first (need page_id for attachments) ---
- existing = conf.get_page_by_title(space=space_key, title=title)
- if existing:
- page_id = existing["id"]
- else:
- new_page = conf.create_page(
- space=space_key,
- title=title,
- body="Placeholder — content being uploaded.
",
- parent_id=parent_id,
- )
- page_id = new_page["id"]
- print(f" ✓ Created page: {title}")
-
- # --- Upload attachments (mermaid PNGs + local images) ---
- if all_attachments:
- uploaded = upload_attachments(conf, page_id, all_attachments)
- else:
- uploaded = set()
-
- # --- Build image_map for src→attachment_name replacement ---
- # Keys: original markdown ref strings and absolute path strings
- image_map = {}
- for name, path in mermaid_attachments:
- image_map[str(path)] = name
- for _, abs_path, ref in local_images:
- att_name = Path(ref).name
- if att_name in uploaded:
- image_map[ref] = att_name
- image_map[str(abs_path)] = att_name
-
- # --- Replace
tags with ac:image macros ---
- if image_map:
- html_content = replace_images_with_ac_macros(html_content, image_map)
-
- # --- Final update with real content ---
- conf.update_page(
- page_id=page_id,
- title=title,
- body=html_content,
- parent_id=parent_id,
- )
- print(f" ✓ Updated: {title}")
-
print("\n✓ All pages published successfully!")