diff --git a/CMakeLists.txt b/CMakeLists.txt index 1327577f0f6..ffae339a72c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -218,8 +218,8 @@ include(generate_html) set(MKHTML_PY ${CMAKE_BINARY_DIR}/utils/mkhtml.py) set(THUMBNAILS_PY ${CMAKE_BINARY_DIR}/utils/thumbnails.py) -set(HTML2MAN VERSION_NUMBER=${GRASS_VERSION_NUMBER} ${PYTHON_EXECUTABLE} - ${OUTDIR}/${GRASS_INSTALL_UTILSDIR}/g.html2man.py) +set(MD2MAN VERSION_NUMBER=${GRASS_VERSION_NUMBER} ${PYTHON_EXECUTABLE} + ${OUTDIR}/${GRASS_INSTALL_UTILSDIR}/markdown2man.py) set(env_path "$ENV{PATH}") diff --git a/cmake/generate_man_pages.cmake b/cmake/generate_man_pages.cmake index 85553ef8629..9ff22f2ad33 100644 --- a/cmake/generate_man_pages.cmake +++ b/cmake/generate_man_pages.cmake @@ -1,14 +1,14 @@ # work in progress... -file(GLOB doc_HTMLFILES "${OUTDIR}/${GRASS_INSTALL_DOCDIR}/*.html") +file(GLOB doc_MDFILES "${OUTDIR}/${GRASS_INSTALL_MKDOCSDIR}/source/*.md") -foreach(html_file ${doc_HTMLFILES}) - get_filename_component(PGM_NAME ${html_file} NAME) +foreach(md_file ${doc_MDFILES}) + get_filename_component(PGM_NAME ${md_file} NAME) add_custom_command( TARGET create_man_pages PRE_BUILD - COMMAND ${HTML2MAN} ${OUTDIR}/${GRASS_INSTALL_DOCDIR}/${PGM_NAME}.html + COMMAND ${MD2MAN} ${OUTDIR}/${GRASS_INSTALL_MKDOCSDIR}/${PGM_NAME}.html ${OUTDIR}/${GRASS_INSTALL_MANDIR}/${PGM_NAME}.1 ) endforeach() diff --git a/include/Make/Grass.make b/include/Make/Grass.make index dfcbd974b6a..ef8e09b66d4 100644 --- a/include/Make/Grass.make +++ b/include/Make/Grass.make @@ -98,7 +98,7 @@ YFLAGS = -d -v MANSECT = 1 MANBASEDIR = $(ARCH_DISTDIR)/docs/man MANDIR = $(MANBASEDIR)/man$(MANSECT) -HTML2MAN = VERSION_NUMBER=$(GRASS_VERSION_NUMBER) $(GISBASE)/utils/g.html2man.py +MD2MAN = VERSION_NUMBER=$(GRASS_VERSION_NUMBER) $(GISBASE)/utils/markdown2man.py GDAL_LINK = $(USE_GDAL) diff --git a/include/Make/Html.make b/include/Make/Html.make index 42d5284ecd9..7d7e843ab1b 100644 --- a/include/Make/Html.make +++ b/include/Make/Html.make @@ -11,8 +11,8 @@ $(MDDIR)/source/%.md: %.md %.tmp.md $(HTMLSRC) $(IMGDST_MD) | $(MDDIR) VERSION_NUMBER=$(GRASS_VERSION_NUMBER) VERSION_DATE=$(GRASS_VERSION_DATE) MODULE_TOPDIR=$(MODULE_TOPDIR) \ $(PYTHON) $(GISBASE)/utils/mkmarkdown.py $* > $@ -$(MANDIR)/%.$(MANSECT): $(HTMLDIR)/%.html - $(HTML2MAN) "$<" "$@" +$(MANDIR)/%.$(MANSECT): $(MDDIR)/source/%.md + $(MD2MAN) "$<" "$@" %.tmp.html: $(HTMLSRC) if [ "$(HTMLSRC)" != "" ] ; then $(call htmldesc,$<,$@) ; fi diff --git a/man/Makefile b/man/Makefile index 820b8a9f830..d85557853ff 100644 --- a/man/Makefile +++ b/man/Makefile @@ -66,8 +66,8 @@ default: $(DSTFILES) @echo "Generating manual pages index (help system)..." $(MAKE) $(INDICES) $(call build,check) - $(MAKE) manpages $(MAKE) $(INDICES_MD) + $(MAKE) manpages # $(MAKE) build-mkdocs # This must be a separate target so that evaluation of $(MANPAGES) diff --git a/utils/Makefile b/utils/Makefile index 8137e1b7e63..0e38eab95f3 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -7,6 +7,7 @@ include $(MODULE_TOPDIR)/include/Make/Compile.make default: parsubdirs $(UTILSDIR)/mkdocs.py $(UTILSDIR)/mkhtml.py $(UTILSDIR)/mkmarkdown.py \ $(UTILSDIR)/generate_last_commit_file.py \ + $(UTILSDIR)/markdown2man.py \ $(UTILSDIR)/g.echo$(EXE) $(UTILSDIR)/mkdocs.py: mkdocs.py @@ -21,5 +22,8 @@ $(UTILSDIR)/mkmarkdown.py: mkmarkdown.py $(UTILSDIR)/generate_last_commit_file.py: generate_last_commit_file.py $(INSTALL) $< $@ +$(UTILSDIR)/markdown2man.py: markdown2man.py + $(INSTALL) $< $@ + $(UTILSDIR)/g.echo$(EXE): $(OBJDIR)/g.echo.o $(call linker_base,$(LINK),$(LDFLAGS) $(EXTRA_LDFLAGS),$(MANIFEST_OBJ)) diff --git a/utils/markdown2man.py b/utils/markdown2man.py new file mode 100644 index 00000000000..b3a82b6498c --- /dev/null +++ b/utils/markdown2man.py @@ -0,0 +1,406 @@ +#!/usr/bin/env python3 + +############################################################################### +# Convert manual pages from markdown to MAN format +# +# Author(s): Anna Petrasova +# +# COPYRIGHT: (C) 2025 by the GRASS Development Team +# +# This program is free software under the GNU General Public +# License (>=v2). Read the file COPYING that comes with GRASS +# for details. +# +############################################################################### + +import argparse +import os +from pathlib import Path +import re +import textwrap + + +def strip_yaml_from_markdown_and_reformat(content): + # Remove YAML front matter + match = re.match(r"^---\n(.*?)\n---\n", content, re.DOTALL) + if not match: + return {}, content.strip() + + yaml_block = match.group(1) + markdown = content[match.end() :].strip() + + yaml = {} + for line in yaml_block.splitlines(): + key, value = line.strip().split(":") + key = key.strip() + value = value.strip() + if value.startswith("[") and value.endswith("]"): + yaml[key] = [v.strip() for v in value[1:-1].split(",")] + else: + yaml[key] = value + + split_string = '=== "Command line"' + before, after = markdown.split(split_string, 1) + + before = f""" + +# NAME + +{yaml["name"]} - {yaml["description"]} + +# KEYWORDS + +{", ".join(yaml["keywords"])} + +# SYNOPSIS + + """ + + markdown = before + after.strip() + markdown = markdown.replace("## Parameters", "### Parameters") + return yaml, markdown + + +def parse_markdown(content): + lines = content.splitlines() + processing_block = [] + processed_content = [] + + buffer = "" + state = "default" + + for line in lines: + if line.strip().startswith("```"): + # end of code block + if state == "code": + processing_block.append(line) + processed_content.append( + {"markdown": "\n".join(processing_block), "type": state} + ) + processing_block = [] + state = "default" + # start of code block + else: + processed_content.append( + {"markdown": "\n".join(processing_block), "type": state} + ) + processing_block = [] + processing_block.append(line) + state = "code" + continue + + if state == "code": + processing_block.append(line) + continue + + if re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line.strip()): + if buffer: + processing_block.append(buffer) + buffer = "" + # start of ordered list + if state != "list": + processed_content.append( + {"markdown": "\n".join(processing_block), "type": state} + ) + processing_block = [] + state = "list" + + if line.strip().startswith("|") and line.strip().endswith("|"): + if buffer: + processing_block.append(buffer) + buffer = "" + processing_block.append(line) + continue + + # empty line at the start and end of code, list blocks + if line == "": + if buffer: + processing_block.append(buffer) + buffer = "" + if state != "default": + processed_content.append( + {"markdown": "\n".join(processing_block), "type": state} + ) + processing_block = [] + state = "default" + processing_block.append(line) + continue + + if buffer: + buffer += " " + line + elif state == "list": + buffer += line + else: + buffer += line.lstrip() + + if line.endswith(" "): + processing_block.append(buffer) + buffer = "" + + if buffer: + processing_block.append(buffer) + if processing_block: + processed_content.append( + {"markdown": "\n".join(processing_block), "type": state} + ) + + return processed_content + + +def process_links(markdown): + """Replace Markdown links with only their display text.""" + markdown = re.sub(r"!\[.*?\]\(.*?\)", "", markdown) + return re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", markdown) + + +def process_parameters(markdown): + parts = markdown.split("## DESCRIPTION", 1) + if len(parts) == 1: + return markdown + before, after = parts + before_processed = re.sub( + r"^\*\*([a-z0-9_]*)\*\*=\*([a-zA-Z,_ ]*)\*( \*\*\[required\]\*\*)?", + r'.IP "**\1**=*\2*\3" 4m', + before, + flags=re.MULTILINE, + ) + return before_processed + "## DESCRIPTION" + after + + +def process_flags(markdown): + parts = markdown.split("## DESCRIPTION", 1) + if len(parts) == 1: + return markdown + + before, after = parts + before_processed = re.sub( + r"^\*\*-(.*?)\*\*", r'.IP "**-\1**" 4m', before, flags=re.MULTILINE + ) + return before_processed + "## DESCRIPTION" + after + + +def process_formatting(markdown): + """Apply inline formatting for bold, italic, and bold+italic.""" + markdown = re.sub(r"\*\*\*(.+?)\*\*\*", r"\\fB\\fI\1\\fR", markdown) + markdown = re.sub(r"\*\*(.+?)\*\*", r"\\fB\1\\fR", markdown) + # avoid detecting \* + return re.sub(r"(?", "", markdown, flags=re.DOTALL) + + +def process_list(markdown): + markdown = process_formatting(markdown) + markdown = process_special_characters(markdown) + markdown = process_links(markdown) + markdown = process_br(markdown) + + output = [] + indent_levels = [] + + for line in markdown.splitlines(): + match = re.match(r"^(\s*)([-*]|\d+\.)\s+(.*)", line) # Match bullets or numbers + if not match: + output.append(line) + continue # Skip non-list lines (shouldn't happen if input is all lists) + + spaces, bullet, item_text = match.groups() + level = len(spaces) # Determine indentation level + + while indent_levels and indent_levels[-1] > level: + output.append(".RE") # Close previous indentation level + indent_levels.pop() + + if not indent_levels or indent_levels[-1] < level: + output.append(".RS 4n") # Open new indentation level + indent_levels.append(level) + + if re.match(r"^\d+\.$", bullet): # Numbered list + output.append(f'.IP "{bullet}" 4n\n{item_text}') + else: # Bullet list + output.append(".IP \\(bu 4n\n" + item_text) + + # Close any remaining indentation levels + while indent_levels: + output.append(".RE") + indent_levels.pop() + + return "\n".join(output) + + +def process_special_characters(markdown): + markdown = markdown.replace(r"\[", "[") + markdown = markdown.replace(r"\]", "]") + markdown = markdown.replace(r"\#", "#") + markdown = markdown.replace(r"\>", ">") + markdown = markdown.replace(r"\<", "<") + markdown = markdown.replace(r"\*", "*") + markdown = markdown.replace("`", "") + # eliminate extra spaces between words + return re.sub(r"(?<=\S) {2,}(?=\S)", " ", markdown) + + +def process_br(markdown): + return re.sub(r"([^\n\s]) $", r"\1\n.br", markdown, flags=re.MULTILINE) + + +def process_default(markdown): + markdown = process_table(markdown) + markdown = process_formatting(markdown) + markdown = process_special_characters(markdown) + markdown = process_links(markdown) + markdown = process_headings(markdown) + return process_br(markdown) + + +def process_table(markdown: str) -> str: + def markdown_to_roff_table(md_table: str) -> str: + lines = md_table.strip().splitlines() + if len(lines) < 2: + return md_table # not a valid table + + # Remove divider line (2nd line) + header = lines[0].strip("|").split("|") + rows = [line.strip("|").split("|") for line in lines[2:]] + + # Trim spaces in cells + header = [cell.strip() for cell in header] + header = [f"**{cell.strip()}**" for cell in header] + rows = [[cell.strip() for cell in row] for row in rows] + + # Generate column format line (left aligned) + format_line = " ".join(["l"] * len(header)) + "." + # Build the roff table + lines = [".TS", "tab(|);", format_line, "|".join(header)] + for row in rows: + lines.append("|".join(row)) + lines.extend((".TE", ".PP")) + + return "\n".join(lines) + + markdown_table_pattern = re.compile( + r""" + ( # full table match + ^\|.*\|\s*\n # header row: starts and ends with | + ^\|[:\-| ]+\|\s*\n # divider row: like | --- | :--: | + (?:^\|.*\|\s*\n?)+ # one or more data rows + ) + """, + re.MULTILINE | re.VERBOSE, + ) + return markdown_table_pattern.sub( + lambda match: markdown_to_roff_table(match.group(0)), markdown + ) + + +def add_paragraphs(markdown): + return re.sub( + r"(?m)(?:^|\n)([^\n\S]*[^\n]+(?:\n[^\n\S]*[^\n]+)*)", + lambda m: f"\n.PP\n{m.group(1)}", + markdown, + ).strip() + + +def markdown_to_man(markdown_text): + """Convert a Markdown text to a Unix man page format""" + yaml, markdown_text = strip_yaml_from_markdown_and_reformat(markdown_text) + markdown_text = remove_python_content_blocks(markdown_text) + markdown_text = unindent_command_content_blocks(markdown_text) + markdown_text = remove_comments(markdown_text) + # process synopsis + markdown_text = process_parameters(markdown_text) + markdown_text = process_flags(markdown_text) + markdown_text = markdown_text.replace("    ", "") + + blocks = parse_markdown(markdown_text) + result = [] + for block in blocks: + if block["type"] == "code": + result.append(process_code(block["markdown"])) + elif block["type"] == "list": + result.append(process_list(block["markdown"])) + else: + result.append(process_default(block["markdown"])) + markdown_text = "\n".join(result) + markdown_text = add_paragraphs(markdown_text) + + version = os.environ.get("VERSION_NUMBER", "") + man_page = ( + f'.TH {yaml.get("name", "MAN")} 1 "" "GRASS {version}" "GRASS User\'s Manual"\n' + ) + man_page += markdown_text + + return man_page + + +def main(): + parser = argparse.ArgumentParser(description="Convert Markdown to Unix man page.") + parser.add_argument("input_file", help="Path to the input Markdown file.") + parser.add_argument("output_file", help="Path to the output man page file.") + args = parser.parse_args() + + markdown_text = Path(args.input_file).read_text() + man_text = markdown_to_man(markdown_text) + Path(args.output_file).write_text(man_text) + + +if __name__ == "__main__": + main()