ruby8008/md_to_html.py at main · Ruby-xin/ruby8008 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python3
"""
Markdown 转 HTML 工具
将重写后的 Markdown 转换为带配图的 HTML
"""

import os
import sys
import re
from pathlib import Path
from datetime import datetime

# 添加模块路径
sys.path.insert(0, os.path.dirname(__file__))

from modules.layout_engine import WeChatLayoutEngine
from modules.image_match import ImageMatcher


def convert_markdown_to_html(
    md_file: str,
    author: str = "ruby鑫燕",
    generate_images: bool = True,
    output_dir: str = None
):
    """将 Markdown 转换为 HTML

    Args:
        md_file: Markdown 文件路径
        author: 作者名称
        generate_images: 是否生成配图
        output_dir: 输出目录，默认为 output/

    Returns:
        HTML 文件路径
    """
    print("\n" + "="*70)
    print("📄 Markdown → HTML 转换")
    print("="*70 + "\n")

    md_path = Path(md_file)
    if not md_path.exists():
        print(f"❌ 文件不存在: {md_file}")
        return None

    # 读取 Markdown 内容
    with open(md_path, 'r', encoding='utf-8') as f:
        md_content = f.read()

    # 提取标题
    title_match = re.search(r'^#\s+(.+)$', md_content, re.MULTILINE)
    title = title_match.group(1) if title_match else "无标题"

    # 提取所有 h2 标题（用于生成配图提示词）
    sections = re.findall(r'##\s+(.+)', md_content)

    print(f"📝 文章标题: {title}")
    print(f"📊 章节数量: {len(sections)}")
    print()

    # 生成配图
    cover_image = None
    inline_images = []

    if generate_images:
        print("🎨 生成配图...")
        image_matcher = ImageMatcher()

        # 生成封面
        print("   • 封面图...")
        cover_image = image_matcher.generate_cover(title, "", "简约")
        if cover_image:
            print(f"     ✅ {cover_image}")

        # 生成章节配图（最多3张）
        print(f"   • 生成 {min(3, len(sections))} 张配图...")
        inline_images = image_matcher.generate_inline_images(sections, count=min(3, len(sections)), style="简约")

        print()

    # 转换为 HTML
    print("🔄 转换为 HTML...")
    layout_engine = WeChatLayoutEngine()

    html_content = layout_engine.render(
        title=title,
        author=author,
        content=md_content,
        cover_image=cover_image,
        inline_images=inline_images
    )

    # 确定输出路径
    if output_dir:
        out_dir = Path(output_dir)
    else:
        out_dir = md_path.parent

    out_dir.mkdir(parents=True, exist_ok=True)

    # 生成输出文件名
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    html_file = out_dir / md_path.name.replace('.md', '_final.html')

    # 如果使用了相对路径，转换为绝对路径
    if cover_image and not cover_image.startswith('/'):
        base_dir = str(Path(__file__).parent.absolute())
        html_content = html_content.replace(
            f'src="{cover_image}"',
            f'src="{base_dir}/{cover_image}"'
        )

    for img in inline_images:
        if not img.startswith('/'):
            base_dir = str(Path(__file__).parent.absolute())
            html_content = html_content.replace(
                f'src="{img}"',
                f'src="{base_dir}/{img}"'
            )

    # 保存 HTML
    with open(html_file, 'w', encoding='utf-8') as f:
        f.write(html_content)

    print("✅ 转换完成！\n")
    print("="*70)
    print(f"📁 输出文件: {html_file}")
    print(f"📸 封面: {cover_image if cover_image else '无'}")
    print(f"🖼️  配图: {len(inline_images)} 张")
    print("="*70 + "\n")

    return str(html_file)


def main():
    """主函数"""
    import argparse

    parser = argparse.ArgumentParser(description='Markdown 转 HTML')
    parser.add_argument('md_file', help='Markdown 文件路径')
    parser.add_argument('-a', '--author', default='ruby鑫燕', help='作者名称')
    parser.add_argument('--no-images', action='store_true', help='不生成配图')
    parser.add_argument('-o', '--output', help='输出目录')

    args = parser.parse_args()

    html_file = convert_markdown_to_html(
        md_file=args.md_file,
        author=args.author,
        generate_images=not args.no_images,
        output_dir=args.output
    )

    if html_file:
        print("💡 下一步:")
        print(f"   1. 预览: open {html_file}")
        print(f"   2. 审稿: python editor_review.py {html_file}")
        print(f"   3. 如有问题，运行自我学习: python self_learning.py output/article_*_review.md")
        print()


if __name__ == "__main__":
    main()