From 16a22bba5aa6188ca67a7ec3ae6ee1f20e03fbf3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 28 May 2026 12:59:11 +0000 Subject: [PATCH 1/5] Initial plan From 5c375d2bf8c3cf7c8d449ef74deb4ed9adb7d538 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 28 May 2026 13:08:38 +0000 Subject: [PATCH 2/5] Add Obsidian output format with --format flag and tests --- README.md | 71 +++++++-- medium2md/cli.py | 38 ++++- medium2md/pipeline.py | 52 ++++++- pyproject.toml | 2 +- tests/__init__.py | 0 tests/test_pipeline.py | 322 +++++++++++++++++++++++++++++++++++++++++ uv.lock | 48 ++++-- 7 files changed, 491 insertions(+), 42 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/test_pipeline.py diff --git a/README.md b/README.md index 6bb5329..aea1aca 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ [![Python Versions](https://img.shields.io/pypi/pyversions/medium2md-cli.svg)](https://pypi.org/project/medium2md-cli/) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -> Convert a Medium export ZIP into clean Markdown with localized images, optimized for Hugo and compatible with Obsidian knowledge bases. +> Convert a Medium export ZIP into clean Markdown with localized images, optimized for Hugo and Obsidian. -**medium2md** is a CLI tool that transforms Medium's HTML export into properly structured Markdown with localized assets. Today, output is optimized for [Hugo](https://gohugo.io/) page bundles and is also readable in [Obsidian](https://obsidian.md/) vaults; planned roadmap work adds stronger Obsidian-specific formatting conventions. +**medium2md** is a CLI tool that transforms Medium's HTML export into properly structured Markdown with localized assets. Output can be generated as [Hugo](https://gohugo.io/) page bundles (default) or as flat [Obsidian](https://obsidian.md/) vault notes, selectable with the `--format` flag. --- @@ -42,7 +42,7 @@ Medium allows you to export your account data as a ZIP archive, but the raw expo | Canonical URL | Preserves the original Medium URL | | Conversion reports | Summarizes what was converted and what was skipped | | Incremental re-runs | *(planned)* Re-run only changed posts | -| Obsidian compatibility | Current output is Obsidian-readable; dedicated Obsidian formatting profile is planned | +| Obsidian compatibility | Flat `.md` notes with Obsidian-style front matter (`title`, `source`); assets in a shared `assets/` folder | This tool is designed to be **deterministic**, **reproducible**, and **CI-friendly**. @@ -59,7 +59,8 @@ Generate correctly formatted Markdown files from Medium posts, with images local - Convert Medium export ZIP (posts under `posts/` in the export) - Extract title and canonical URL; generate slug - Convert HTML to Markdown -- Create Hugo page bundles with `index.md` and optional `images/` +- **Hugo format** (default): Hugo page bundles with `index.md` and optional `images/` +- **Obsidian format**: flat `.md` notes with Obsidian-style front matter (`title`, `source`); images in shared `assets//` - Image localization: download remote images into the bundle; copy local images when present in the export - Basic slug collision handling (`slug-2`, `slug-3`, …) - Terminal progress and summary; per-post image count; prompt to create missing output dir @@ -73,14 +74,12 @@ Generate correctly formatted Markdown files from Medium posts, with images local - Verification command - Theme-specific front matter mapping - Conversion report (e.g. JSON/file) -- Obsidian-friendly output profile (e.g., front matter + file layout conventions for vault workflows) ### Known limitations (current) -- Front matter currently includes `title`, `slug`, `draft`, and optional `medium.canonical`; date/tags are not extracted yet. +- Front matter currently includes `title`, `slug`, `draft`, and optional `medium.canonical` (Hugo) or `title` and `source` (Obsidian); date/tags are not extracted yet. - Embedded content is not converted to Hugo shortcodes yet. - Incremental conversion/state tracking is not implemented yet. -- Output structure is Hugo-first (`content/posts//index.md`); a dedicated Obsidian output mode is not implemented yet. --- @@ -117,9 +116,24 @@ uv run medium2md input/medium-export.zip --out ../blog/content/posts > **Note:** The `input/` directory is tracked by git (via `.gitkeep`) so it exists after a fresh clone, but its contents are ignored — your ZIP files will never be accidentally committed. -### Front Matter Example +### Choosing an output format -Each converted post produces an `index.md` with Hugo-compatible YAML front matter. Current output: +Use `--format` (or `-f`) to select the output format: + +- **`hugo`** (default): each post becomes a Hugo page bundle at `//index.md` with images at `//images/`. +- **`obsidian`**: each post becomes a flat note at `/.md` with images at `/assets//`. Obsidian-style front matter (`title`, `source`) is used instead of Hugo keys. + +```bash +# Hugo format (default) +uv run medium2md input/medium-export.zip --out content/posts + +# Obsidian format +uv run medium2md input/medium-export.zip --out my-vault/posts --format obsidian +``` + +### Front Matter Examples + +**Hugo format** (`--format hugo`, default): ```yaml --- @@ -131,13 +145,24 @@ medium: --- ``` -Additional keys (e.g. `date`, `lastmod`, `tags`) are planned. +**Obsidian format** (`--format obsidian`): + +```yaml +--- +title: "My Post Title" +source: "https://medium.com/@you/post-slug" +--- +``` + +Additional keys (e.g. `date`, `lastmod`, `tags`) are planned for both formats. --- ## Output Structure -Each Medium post becomes a Hugo page bundle. Image links in the Markdown point into the bundle’s `images/` folder (remote images are downloaded; local images from the export are copied): +### Hugo format (default) + +Each Medium post becomes a Hugo page bundle. Image links in the Markdown point into the bundle's `images/` folder (remote images are downloaded; local images from the export are copied): ``` content/posts/ @@ -149,6 +174,22 @@ content/posts/ └── … ``` +### Obsidian format (`--format obsidian`) + +Each Medium post becomes a flat Markdown note. Images are placed in a shared `assets/` folder: + +``` +my-vault/posts/ +├── my-post-slug.md +├── another-post.md +└── assets/ + ├── my-post-slug/ + │ ├── 1.png + │ └── 2.jpg + └── another-post/ + └── 1.png +``` + --- ## Project Structure @@ -184,13 +225,13 @@ ZIP → extract → find posts → parse HTML → localize images (copy/download | Milestone | Focus | Status | |---|---|---| | 1 — Core conversion | ZIP ingestion, post discovery, HTML→Markdown conversion, Hugo bundle writing, local/remote image localization, slug collision handling | ✅ Implemented | -| 2 — Content fidelity + verification | Better metadata extraction (`date`, tags), machine-readable conversion report, `verify` command, clearer failure reporting, Obsidian formatting compatibility review | 📋 Planned | -| 3 — Incremental + extensibility | Incremental state tracking, embed conversion, output-profile mapping (Hugo/Obsidian), optional Pandoc backend, internal link rewriting | 📋 Planned | +| 2 — Content fidelity + verification | Better metadata extraction (`date`, tags), machine-readable conversion report, `verify` command, clearer failure reporting, Obsidian output format (`--format obsidian`) | ✅ Implemented (Obsidian format); 📋 Planned (date/tags, verification) | +| 3 — Incremental + extensibility | Incremental state tracking, embed conversion, optional Pandoc backend, internal link rewriting | 📋 Planned | ### Roadmap status snapshot (code-verified) -- The repository has implemented the core `convert` flow end-to-end. -- Milestone 2 is the highest-impact next step for knowledge-base quality (`date`/tags extraction, verification/reporting, Obsidian compatibility conventions). +- The repository has implemented the core `convert` flow end-to-end for both Hugo and Obsidian output formats. +- Milestone 2 next steps: `date`/tags extraction and a `verify` command are the highest-impact remaining items. - Milestone 3 remains optional/polish after fidelity and verification are stable. --- diff --git a/medium2md/cli.py b/medium2md/cli.py index 4fcc4bb..67b83a6 100644 --- a/medium2md/cli.py +++ b/medium2md/cli.py @@ -4,20 +4,29 @@ import typer from medium2md.pipeline import ( + OutputFormat, find_post_html_files, get_title_canonical, convert_html_file, slug_from_post, write_bundle, + write_note, ) -app = typer.Typer(help="Convert a Medium export ZIP into Hugo page bundles.") +app = typer.Typer(help="Convert a Medium export ZIP into Hugo page bundles or Obsidian notes.") @app.command() def convert( export_zip: Path = typer.Argument(..., exists=True, file_okay=True, dir_okay=False), out: Path = typer.Option(Path("content/posts"), "--out", "-o"), + fmt: OutputFormat = typer.Option( + OutputFormat.hugo, + "--format", + "-f", + help="Output format: 'hugo' (default) produces Hugo page bundles; 'obsidian' produces flat Markdown notes.", + show_default=True, + ), ): out = out.resolve() if not out.exists(): @@ -34,6 +43,7 @@ def convert( typer.echo(f"Export: {export_zip}") typer.echo(f"Out: {out}") + typer.echo(f"Format: {fmt.value}") with tempfile.TemporaryDirectory(prefix="medium2md_") as td: tmp_dir = Path(td) @@ -72,13 +82,29 @@ def convert( suffix = 2 if slug == base_slug else int(slug.split("-")[-1]) + 1 slug = f"{base_slug}-{suffix}" used_slugs.add(slug) - bundle_dir = out / slug - bundle_dir.mkdir(parents=True, exist_ok=True) - title, canonical, body_md, num_images = convert_html_file(html_path, tmp_dir, bundle_dir) - write_bundle(out, slug, title, canonical, body_md) + + if fmt == OutputFormat.hugo: + bundle_dir = out / slug + bundle_dir.mkdir(parents=True, exist_ok=True) + title, canonical, body_md, num_images = convert_html_file(html_path, tmp_dir, bundle_dir) + write_bundle(out, slug, title, canonical, body_md) + output_path = out / slug / "index.md" + else: + # Obsidian: flat note + shared assets folder + assets_dir = out / "assets" / slug + title, canonical, body_md, num_images = convert_html_file( + html_path, + tmp_dir, + out, + images_dir=assets_dir, + src_prefix=f"assets/{slug}/", + ) + write_note(out, slug, title, canonical, body_md) + output_path = out / f"{slug}.md" + written += 1 img_info = f" ({num_images} image(s))" if num_images else "" - typer.echo(f" [{i}/{len(post_files)}] {slug} → {out / slug / 'index.md'}{img_info}") + typer.echo(f" [{i}/{len(post_files)}] {slug} → {output_path}{img_info}") except Exception as e: errors += 1 typer.echo( diff --git a/medium2md/pipeline.py b/medium2md/pipeline.py index 45671eb..3f3a8ae 100644 --- a/medium2md/pipeline.py +++ b/medium2md/pipeline.py @@ -2,6 +2,7 @@ import shutil import time +from enum import Enum from pathlib import Path from urllib.parse import urlparse @@ -10,6 +11,13 @@ from markdownify import markdownify as md from slugify import slugify + +class OutputFormat(str, Enum): + """Supported output formats for converted Markdown files.""" + + hugo = "hugo" + obsidian = "obsidian" + try: import httpx except ImportError: @@ -107,10 +115,10 @@ def _localize_images( article_soup: BeautifulSoup, html_path: Path, tmp_dir: Path, - bundle_dir: Path, + images_dir: Path, + src_prefix: str, ) -> int: - """In-place: resolve each img src to a local file or download, copy into bundle/images/, set src to images/. Returns count of images localized.""" - images_dir = bundle_dir / "images" + """In-place: resolve each img src to a local file or download, copy into images_dir, set src to src_prefix. Returns count of images localized.""" images_dir.mkdir(parents=True, exist_ok=True) imgs = article_soup.find_all("img", src=True) localized = 0 @@ -131,7 +139,7 @@ def _localize_images( dest_name = f"{i}{ext}" dest = images_dir / dest_name shutil.copy2(resolved, dest) - img["src"] = f"images/{dest_name}" + img["src"] = f"{src_prefix}{dest_name}" localized += 1 continue # Remote URL: download (with User-Agent and retry so CDNs don't block) @@ -154,7 +162,7 @@ def _localize_images( dest_name = f"{i}{ext}" dest = images_dir / dest_name dest.write_bytes(r.content) - img["src"] = f"images/{dest_name}" + img["src"] = f"{src_prefix}{dest_name}" localized += 1 last_error = None break @@ -172,8 +180,14 @@ def convert_html_file( html_path: Path, tmp_dir: Path, bundle_dir: Path, + *, + images_dir: Path | None = None, + src_prefix: str = "images/", ) -> tuple[str, str | None, str, int]: - """Parse one post HTML file, localize images into bundle_dir/images/, return (title, canonical_url, markdown_body, num_images_localized).""" + """Parse one post HTML file, localize images, return (title, canonical_url, markdown_body, num_images_localized). + + Images are saved into *images_dir* (defaults to bundle_dir/images) and referenced with *src_prefix* in the Markdown. + """ raw = html_path.read_text(encoding="utf-8", errors="replace") soup = BeautifulSoup(raw, "lxml") title = _extract_title(soup) @@ -183,8 +197,9 @@ def convert_html_file( body_md = "" localized = 0 else: + resolved_images_dir = images_dir if images_dir is not None else bundle_dir / "images" article_soup = BeautifulSoup(article_html, "lxml") - localized = _localize_images(article_soup, html_path, tmp_dir, bundle_dir) + localized = _localize_images(article_soup, html_path, tmp_dir, resolved_images_dir, src_prefix) body_md = md( str(article_soup), heading_style="ATX", @@ -211,7 +226,7 @@ def write_bundle(out_root: Path, slug: str, title: str, canonical: str | None, b """Write one Hugo page bundle: out_root//index.md. Returns path to index.md.""" bundle_dir = out_root / slug bundle_dir.mkdir(parents=True, exist_ok=True) - front = { + front: dict = { "title": title, "draft": True, "slug": slug, @@ -227,3 +242,24 @@ def write_bundle(out_root: Path, slug: str, title: str, canonical: str | None, b if body_md and not body_md.endswith("\n"): f.write("\n") return index_md + + +def write_note(out_root: Path, slug: str, title: str, canonical: str | None, body_md: str) -> Path: + """Write one Obsidian note: out_root/.md. Returns path to the note. + + Front matter uses Obsidian conventions: ``title`` and ``source`` (canonical URL). + Images are expected to reside in ``out_root/assets//`` and are referenced + as ``assets//`` in the Markdown body. + """ + front: dict = {"title": title} + if canonical: + front["source"] = canonical + note_path = out_root / f"{slug}.md" + with note_path.open("w", encoding="utf-8") as f: + f.write("---\n") + f.write(yaml.dump(front, default_flow_style=False, allow_unicode=True, sort_keys=False)) + f.write("---\n\n") + f.write(body_md) + if body_md and not body_md.endswith("\n"): + f.write("\n") + return note_path diff --git a/pyproject.toml b/pyproject.toml index 14bf99b..18b9d47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ Repository = "https://github.com/edgarbc/medium2md" Documentation = "https://github.com/edgarbc/medium2md#readme" [project.optional-dependencies] -dev = ["twine>=6.0"] +dev = ["pytest>=8.0", "twine>=6.0"] [tool.hatch.build.targets.wheel] packages = ["medium2md"] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py new file mode 100644 index 0000000..bbd4d30 --- /dev/null +++ b/tests/test_pipeline.py @@ -0,0 +1,322 @@ +"""Tests for medium2md pipeline: Hugo and Obsidian output formats.""" + +import textwrap +import zipfile +from pathlib import Path + +import pytest +import yaml + +from medium2md.pipeline import ( + OutputFormat, + convert_html_file, + find_post_html_files, + slug_from_post, + write_bundle, + write_note, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +MINIMAL_HTML = textwrap.dedent("""\ + + + + Hello World + + + +
+

Hello World

+

This is the post body.

+
+ + +""") + +MINIMAL_HTML_NO_CANONICAL = textwrap.dedent("""\ + + + No Canonical Post +

No Canonical Post

Body.

+ +""") + + +def _make_post_html(tmp_path: Path, content: str = MINIMAL_HTML, name: str = "post.html") -> Path: + """Write HTML to tmp_path/posts/ and return the file path.""" + posts_dir = tmp_path / "posts" + posts_dir.mkdir(parents=True, exist_ok=True) + html_file = posts_dir / name + html_file.write_text(content, encoding="utf-8") + return html_file + + +def _read_front_matter(md_path: Path) -> dict: + """Parse YAML front matter from a Markdown file.""" + text = md_path.read_text(encoding="utf-8") + assert text.startswith("---\n"), f"No front matter in {md_path}" + end = text.index("---\n", 4) + return yaml.safe_load(text[4:end]) + + +# --------------------------------------------------------------------------- +# slug_from_post +# --------------------------------------------------------------------------- + + +def test_slug_from_canonical(): + slug = slug_from_post("Hello World", "https://medium.com/@user/hello-world-abc123") + assert slug == "hello-world-abc123" + + +def test_slug_from_title_fallback(): + slug = slug_from_post("Hello World", None) + assert slug == "hello-world" + + +def test_slug_untitled_fallback(): + slug = slug_from_post("", None) + assert slug == "untitled" + + +# --------------------------------------------------------------------------- +# write_bundle (Hugo format) +# --------------------------------------------------------------------------- + + +def test_write_bundle_creates_index_md(tmp_path): + out = tmp_path / "posts" + out.mkdir() + result = write_bundle(out, "my-post", "My Post", "https://medium.com/@u/my-post", "Body text.") + assert result == out / "my-post" / "index.md" + assert result.exists() + + +def test_write_bundle_front_matter(tmp_path): + out = tmp_path / "posts" + out.mkdir() + write_bundle(out, "my-post", "My Post", "https://medium.com/@u/my-post", "Body.") + fm = _read_front_matter(out / "my-post" / "index.md") + assert fm["title"] == "My Post" + assert fm["slug"] == "my-post" + assert fm["draft"] is True + assert fm["medium"]["canonical"] == "https://medium.com/@u/my-post" + + +def test_write_bundle_no_canonical(tmp_path): + out = tmp_path / "posts" + out.mkdir() + write_bundle(out, "my-post", "My Post", None, "Body.") + fm = _read_front_matter(out / "my-post" / "index.md") + assert "medium" not in fm + + +def test_write_bundle_body_content(tmp_path): + out = tmp_path / "posts" + out.mkdir() + write_bundle(out, "slug", "Title", None, "Some **bold** text.") + content = (out / "slug" / "index.md").read_text(encoding="utf-8") + assert "Some **bold** text." in content + + +# --------------------------------------------------------------------------- +# write_note (Obsidian format) +# --------------------------------------------------------------------------- + + +def test_write_note_creates_flat_md(tmp_path): + result = write_note(tmp_path, "my-note", "My Note", "https://medium.com/@u/my-note", "Body.") + assert result == tmp_path / "my-note.md" + assert result.exists() + + +def test_write_note_front_matter(tmp_path): + write_note(tmp_path, "my-note", "My Note", "https://medium.com/@u/my-note", "Body.") + fm = _read_front_matter(tmp_path / "my-note.md") + assert fm["title"] == "My Note" + assert fm["source"] == "https://medium.com/@u/my-note" + # Obsidian notes should NOT include Hugo-specific keys + assert "slug" not in fm + assert "draft" not in fm + assert "medium" not in fm + + +def test_write_note_no_canonical(tmp_path): + write_note(tmp_path, "my-note", "My Note", None, "Body.") + fm = _read_front_matter(tmp_path / "my-note.md") + assert "source" not in fm + + +def test_write_note_body_content(tmp_path): + write_note(tmp_path, "slug", "Title", None, "Some **bold** text.") + content = (tmp_path / "slug.md").read_text(encoding="utf-8") + assert "Some **bold** text." in content + + +# --------------------------------------------------------------------------- +# convert_html_file — Hugo layout +# --------------------------------------------------------------------------- + + +def test_convert_html_file_hugo_returns_title_and_body(tmp_path): + html_file = _make_post_html(tmp_path) + bundle_dir = tmp_path / "out" / "hello-world-abc123" + bundle_dir.mkdir(parents=True) + title, canonical, body_md, num_images = convert_html_file(html_file, tmp_path, bundle_dir) + assert title == "Hello World" + assert canonical == "https://medium.com/@user/hello-world-abc123" + assert "Hello World" in body_md + assert "post body" in body_md + assert num_images == 0 + + +def test_convert_html_file_hugo_images_dir(tmp_path): + """Hugo: images should land in bundle_dir/images/ by default.""" + html_file = _make_post_html(tmp_path) + bundle_dir = tmp_path / "out" / "slug" + bundle_dir.mkdir(parents=True) + convert_html_file(html_file, tmp_path, bundle_dir) + # No remote images in the test HTML; images dir may or may not be created + # (it is only created when there are actual tags to localise). + # The important thing is that the function doesn't crash. + + +# --------------------------------------------------------------------------- +# convert_html_file — Obsidian layout +# --------------------------------------------------------------------------- + + +def test_convert_html_file_obsidian_custom_images_dir(tmp_path): + """Obsidian: pass explicit images_dir and src_prefix.""" + html_file = _make_post_html(tmp_path) + assets_dir = tmp_path / "vault" / "assets" / "hello-world" + title, canonical, body_md, num_images = convert_html_file( + html_file, + tmp_path, + tmp_path / "vault", + images_dir=assets_dir, + src_prefix="assets/hello-world/", + ) + assert title == "Hello World" + assert "Hello World" in body_md + assert num_images == 0 + + +# --------------------------------------------------------------------------- +# find_post_html_files +# --------------------------------------------------------------------------- + + +def test_find_post_html_files_with_posts_dir(tmp_path): + (tmp_path / "posts").mkdir() + (tmp_path / "posts" / "a.html").write_text("") + (tmp_path / "posts" / "b.html").write_text("") + (tmp_path / "blocks").mkdir() + (tmp_path / "blocks" / "x.html").write_text("", encoding="utf-8") # should be ignored + result = find_post_html_files(tmp_path) + names = {p.name for p in result} + assert "a.html" in names + assert "b.html" in names + + +def test_find_post_html_files_excludes_non_post_dirs(tmp_path): + for d in ("blocks", "bookmarks", "claps"): + (tmp_path / d).mkdir() + (tmp_path / d / "x.html").write_text("") + (tmp_path / "README.html").write_text("") + (tmp_path / "my-post.html").write_text("") + result = find_post_html_files(tmp_path) + names = [p.name for p in result] + assert "my-post.html" in names + assert "x.html" not in names + assert "README.html" not in names + + +# --------------------------------------------------------------------------- +# OutputFormat enum +# --------------------------------------------------------------------------- + + +def test_output_format_values(): + assert OutputFormat.hugo == "hugo" + assert OutputFormat.obsidian == "obsidian" + + +def test_output_format_from_string(): + assert OutputFormat("hugo") is OutputFormat.hugo + assert OutputFormat("obsidian") is OutputFormat.obsidian + + +# --------------------------------------------------------------------------- +# End-to-end: ZIP → Hugo bundle +# --------------------------------------------------------------------------- + + +def _make_zip(tmp_path: Path, posts: dict[str, str]) -> Path: + """Create a minimal Medium-style export ZIP from a dict of {filename: html_content}.""" + zip_path = tmp_path / "export.zip" + with zipfile.ZipFile(zip_path, "w") as zf: + for name, content in posts.items(): + zf.writestr(f"posts/{name}", content) + return zip_path + + +def test_e2e_hugo(tmp_path): + """End-to-end test: ZIP → Hugo bundle via CLI runner.""" + from typer.testing import CliRunner + from medium2md.cli import app + + zip_path = _make_zip(tmp_path, {"hello-world.html": MINIMAL_HTML}) + out_dir = tmp_path / "out" + out_dir.mkdir() + + runner = CliRunner() + result = runner.invoke(app, [str(zip_path), "--out", str(out_dir), "--format", "hugo"]) + assert result.exit_code == 0, result.output + index_md = out_dir / "hello-world-abc123" / "index.md" + assert index_md.exists() + fm = _read_front_matter(index_md) + assert fm["title"] == "Hello World" + assert fm["slug"] == "hello-world-abc123" + assert fm["draft"] is True + + +def test_e2e_obsidian(tmp_path): + """End-to-end test: ZIP → Obsidian note via CLI runner.""" + from typer.testing import CliRunner + from medium2md.cli import app + + zip_path = _make_zip(tmp_path, {"hello-world.html": MINIMAL_HTML}) + out_dir = tmp_path / "vault" + out_dir.mkdir() + + runner = CliRunner() + result = runner.invoke(app, [str(zip_path), "--out", str(out_dir), "--format", "obsidian"]) + assert result.exit_code == 0, result.output + note_md = out_dir / "hello-world-abc123.md" + assert note_md.exists() + fm = _read_front_matter(note_md) + assert fm["title"] == "Hello World" + assert fm["source"] == "https://medium.com/@user/hello-world-abc123" + assert "slug" not in fm + assert "draft" not in fm + + +def test_e2e_obsidian_no_bundle_dir(tmp_path): + """Obsidian output must NOT create a / subdirectory for the note itself.""" + from typer.testing import CliRunner + from medium2md.cli import app + + zip_path = _make_zip(tmp_path, {"hello-world.html": MINIMAL_HTML}) + out_dir = tmp_path / "vault" + out_dir.mkdir() + + runner = CliRunner() + runner.invoke(app, [str(zip_path), "--out", str(out_dir), "--format", "obsidian"]) + # A slug directory should only exist if there were images (assets//) + # The note must be a flat file, not a bundle directory. + assert (out_dir / "hello-world-abc123.md").is_file() + assert not (out_dir / "hello-world-abc123" / "index.md").exists() diff --git a/uv.lock b/uv.lock index 3386080..677ee27 100644 --- a/uv.lock +++ b/uv.lock @@ -56,20 +56,14 @@ sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8 wheels = [ { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" }, { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" }, - { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" }, - { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" }, { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" }, { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" }, { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" }, { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" }, - { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" }, - { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" }, { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" }, { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" }, { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" }, { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" }, - { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" }, - { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" }, { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" }, { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" }, { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" }, @@ -149,33 +143,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/9e/6b4397a3e3d15123de3b1806ef342522393d50736c13b20ec4c9ea6693a6/cryptography-46.0.5-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", size = 4275637, upload-time = "2026-02-10T19:17:10.53Z" }, { url = "https://files.pythonhosted.org/packages/63/e7/471ab61099a3920b0c77852ea3f0ea611c9702f651600397ac567848b897/cryptography-46.0.5-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", size = 4424742, upload-time = "2026-02-10T19:17:12.388Z" }, { url = "https://files.pythonhosted.org/packages/37/53/a18500f270342d66bf7e4d9f091114e31e5ee9e7375a5aba2e85a91e0044/cryptography-46.0.5-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", size = 4277528, upload-time = "2026-02-10T19:17:13.853Z" }, - { url = "https://files.pythonhosted.org/packages/22/29/c2e812ebc38c57b40e7c583895e73c8c5adb4d1e4a0cc4c5a4fdab2b1acc/cryptography-46.0.5-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", size = 4947993, upload-time = "2026-02-10T19:17:15.618Z" }, { url = "https://files.pythonhosted.org/packages/6b/e7/237155ae19a9023de7e30ec64e5d99a9431a567407ac21170a046d22a5a3/cryptography-46.0.5-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", size = 4456855, upload-time = "2026-02-10T19:17:17.221Z" }, { url = "https://files.pythonhosted.org/packages/2d/87/fc628a7ad85b81206738abbd213b07702bcbdada1dd43f72236ef3cffbb5/cryptography-46.0.5-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", size = 3984635, upload-time = "2026-02-10T19:17:18.792Z" }, { url = "https://files.pythonhosted.org/packages/84/29/65b55622bde135aedf4565dc509d99b560ee4095e56989e815f8fd2aa910/cryptography-46.0.5-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", size = 4277038, upload-time = "2026-02-10T19:17:20.256Z" }, - { url = "https://files.pythonhosted.org/packages/bc/36/45e76c68d7311432741faf1fbf7fac8a196a0a735ca21f504c75d37e2558/cryptography-46.0.5-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", size = 4912181, upload-time = "2026-02-10T19:17:21.825Z" }, { url = "https://files.pythonhosted.org/packages/6d/1a/c1ba8fead184d6e3d5afcf03d569acac5ad063f3ac9fb7258af158f7e378/cryptography-46.0.5-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", size = 4456482, upload-time = "2026-02-10T19:17:25.133Z" }, { url = "https://files.pythonhosted.org/packages/f9/e5/3fb22e37f66827ced3b902cf895e6a6bc1d095b5b26be26bd13c441fdf19/cryptography-46.0.5-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", size = 4405497, upload-time = "2026-02-10T19:17:26.66Z" }, { url = "https://files.pythonhosted.org/packages/1a/df/9d58bb32b1121a8a2f27383fabae4d63080c7ca60b9b5c88be742be04ee7/cryptography-46.0.5-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", size = 4667819, upload-time = "2026-02-10T19:17:28.569Z" }, { url = "https://files.pythonhosted.org/packages/67/c8/581a6702e14f0898a0848105cbefd20c058099e2c2d22ef4e476dfec75d7/cryptography-46.0.5-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678", size = 4265728, upload-time = "2026-02-10T19:17:35.569Z" }, { url = "https://files.pythonhosted.org/packages/dd/4a/ba1a65ce8fc65435e5a849558379896c957870dd64fecea97b1ad5f46a37/cryptography-46.0.5-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87", size = 4408287, upload-time = "2026-02-10T19:17:36.938Z" }, { url = "https://files.pythonhosted.org/packages/f8/67/8ffdbf7b65ed1ac224d1c2df3943553766914a8ca718747ee3871da6107e/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee", size = 4270291, upload-time = "2026-02-10T19:17:38.748Z" }, - { url = "https://files.pythonhosted.org/packages/f8/e5/f52377ee93bc2f2bba55a41a886fd208c15276ffbd2569f2ddc89d50e2c5/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981", size = 4927539, upload-time = "2026-02-10T19:17:40.241Z" }, { url = "https://files.pythonhosted.org/packages/3b/02/cfe39181b02419bbbbcf3abdd16c1c5c8541f03ca8bda240debc467d5a12/cryptography-46.0.5-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9", size = 4442199, upload-time = "2026-02-10T19:17:41.789Z" }, { url = "https://files.pythonhosted.org/packages/c0/96/2fcaeb4873e536cf71421a388a6c11b5bc846e986b2b069c79363dc1648e/cryptography-46.0.5-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648", size = 3960131, upload-time = "2026-02-10T19:17:43.379Z" }, { url = "https://files.pythonhosted.org/packages/d8/d2/b27631f401ddd644e94c5cf33c9a4069f72011821cf3dc7309546b0642a0/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4", size = 4270072, upload-time = "2026-02-10T19:17:45.481Z" }, - { url = "https://files.pythonhosted.org/packages/f4/a7/60d32b0370dae0b4ebe55ffa10e8599a2a59935b5ece1b9f06edb73abdeb/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0", size = 4892170, upload-time = "2026-02-10T19:17:46.997Z" }, { url = "https://files.pythonhosted.org/packages/d2/b9/cf73ddf8ef1164330eb0b199a589103c363afa0cf794218c24d524a58eab/cryptography-46.0.5-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663", size = 4441741, upload-time = "2026-02-10T19:17:48.661Z" }, { url = "https://files.pythonhosted.org/packages/5f/eb/eee00b28c84c726fe8fa0158c65afe312d9c3b78d9d01daf700f1f6e37ff/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826", size = 4396728, upload-time = "2026-02-10T19:17:50.058Z" }, { url = "https://files.pythonhosted.org/packages/65/f4/6bc1a9ed5aef7145045114b75b77c2a8261b4d38717bd8dea111a63c3442/cryptography-46.0.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d", size = 4652001, upload-time = "2026-02-10T19:17:51.54Z" }, { url = "https://files.pythonhosted.org/packages/0f/04/c85bdeab78c8bc77b701bf0d9bdcf514c044e18a46dcff330df5448631b0/cryptography-46.0.5-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", size = 4275349, upload-time = "2026-02-10T19:17:58.419Z" }, { url = "https://files.pythonhosted.org/packages/5c/32/9b87132a2f91ee7f5223b091dc963055503e9b442c98fc0b8a5ca765fab0/cryptography-46.0.5-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", size = 4420667, upload-time = "2026-02-10T19:18:00.619Z" }, { url = "https://files.pythonhosted.org/packages/a1/a6/a7cb7010bec4b7c5692ca6f024150371b295ee1c108bdc1c400e4c44562b/cryptography-46.0.5-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", size = 4276980, upload-time = "2026-02-10T19:18:02.379Z" }, - { url = "https://files.pythonhosted.org/packages/8e/7c/c4f45e0eeff9b91e3f12dbd0e165fcf2a38847288fcfd889deea99fb7b6d/cryptography-46.0.5-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", size = 4939143, upload-time = "2026-02-10T19:18:03.964Z" }, { url = "https://files.pythonhosted.org/packages/37/19/e1b8f964a834eddb44fa1b9a9976f4e414cbb7aa62809b6760c8803d22d1/cryptography-46.0.5-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", size = 4453674, upload-time = "2026-02-10T19:18:05.588Z" }, { url = "https://files.pythonhosted.org/packages/db/ed/db15d3956f65264ca204625597c410d420e26530c4e2943e05a0d2f24d51/cryptography-46.0.5-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", size = 3978801, upload-time = "2026-02-10T19:18:07.167Z" }, { url = "https://files.pythonhosted.org/packages/41/e2/df40a31d82df0a70a0daf69791f91dbb70e47644c58581d654879b382d11/cryptography-46.0.5-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", size = 4276755, upload-time = "2026-02-10T19:18:09.813Z" }, - { url = "https://files.pythonhosted.org/packages/33/45/726809d1176959f4a896b86907b98ff4391a8aa29c0aaaf9450a8a10630e/cryptography-46.0.5-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", size = 4901539, upload-time = "2026-02-10T19:18:11.263Z" }, { url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", size = 4452794, upload-time = "2026-02-10T19:18:12.914Z" }, { url = "https://files.pythonhosted.org/packages/02/ef/ffeb542d3683d24194a38f66ca17c0a4b8bf10631feef44a7ef64e631b1a/cryptography-46.0.5-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", size = 4404160, upload-time = "2026-02-10T19:18:14.375Z" }, { url = "https://files.pythonhosted.org/packages/96/93/682d2b43c1d5f1406ed048f377c0fc9fc8f7b0447a478d5c65ab3d3a66eb/cryptography-46.0.5-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", size = 4667123, upload-time = "2026-02-10T19:18:15.886Z" }, @@ -248,6 +236,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + [[package]] name = "jaraco-classes" version = "3.4.0" @@ -421,6 +418,7 @@ dependencies = [ [package.optional-dependencies] dev = [ + { name = "pytest" }, { name = "twine" }, ] @@ -430,6 +428,7 @@ requires-dist = [ { name = "httpx", specifier = ">=0.28.1" }, { name = "lxml", specifier = ">=6.0.2" }, { name = "markdownify", specifier = ">=1.2.2" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, { name = "python-dateutil", specifier = ">=2.9.0.post0" }, { name = "python-slugify", specifier = ">=8.0.4" }, { name = "pyyaml", specifier = ">=6.0.3" }, @@ -491,6 +490,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, ] +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + [[package]] name = "pycparser" version = "3.0" @@ -509,6 +517,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pytest" +version = "9.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" From 9cee92d52e72e47eaacd8228b3fe37a6e13b67cd Mon Sep 17 00:00:00 2001 From: Edgar Bermudez Date: Thu, 28 May 2026 07:25:16 -0600 Subject: [PATCH 3/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- medium2md/pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/medium2md/pipeline.py b/medium2md/pipeline.py index 3f3a8ae..baf2047 100644 --- a/medium2md/pipeline.py +++ b/medium2md/pipeline.py @@ -119,8 +119,10 @@ def _localize_images( src_prefix: str, ) -> int: """In-place: resolve each img src to a local file or download, copy into images_dir, set src to src_prefix. Returns count of images localized.""" - images_dir.mkdir(parents=True, exist_ok=True) imgs = article_soup.find_all("img", src=True) + if not imgs: + return 0 + images_dir.mkdir(parents=True, exist_ok=True) localized = 0 for i, img in enumerate(imgs, 1): src = img["src"].strip() From 08b18e84be754b0cd24f57d1777d067c9bd8f403 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 28 May 2026 13:29:14 +0000 Subject: [PATCH 4/5] Add UTF-8 encoding to targeted test writes --- tests/test_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index bbd4d30..031e492 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -212,8 +212,8 @@ def test_convert_html_file_obsidian_custom_images_dir(tmp_path): def test_find_post_html_files_with_posts_dir(tmp_path): (tmp_path / "posts").mkdir() - (tmp_path / "posts" / "a.html").write_text("") - (tmp_path / "posts" / "b.html").write_text("") + (tmp_path / "posts" / "a.html").write_text("", encoding="utf-8") + (tmp_path / "posts" / "b.html").write_text("", encoding="utf-8") (tmp_path / "blocks").mkdir() (tmp_path / "blocks" / "x.html").write_text("", encoding="utf-8") # should be ignored result = find_post_html_files(tmp_path) From 1890f389c89a55f987b3c7fa391fed286d2bac72 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 30 May 2026 00:07:10 +0000 Subject: [PATCH 5/5] Fix test file encodings --- tests/test_pipeline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 031e492..e46274b 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -225,9 +225,9 @@ def test_find_post_html_files_with_posts_dir(tmp_path): def test_find_post_html_files_excludes_non_post_dirs(tmp_path): for d in ("blocks", "bookmarks", "claps"): (tmp_path / d).mkdir() - (tmp_path / d / "x.html").write_text("") - (tmp_path / "README.html").write_text("") - (tmp_path / "my-post.html").write_text("") + (tmp_path / d / "x.html").write_text("", encoding="utf-8") + (tmp_path / "README.html").write_text("", encoding="utf-8") + (tmp_path / "my-post.html").write_text("", encoding="utf-8") result = find_post_html_files(tmp_path) names = [p.name for p in result] assert "my-post.html" in names