from __future__ import annotations

"""将 .docx 文档转换为 Markdown。

实现思路：
- 使用 mammoth 将 docx 转为 HTML（对 Word 样式有较好兼容）
- 再使用 markdownify 将 HTML 转为 Markdown
- 可选导出文档内图片到 assets 目录，并在 Markdown 中引用相对路径
"""

import argparse
import os
from pathlib import Path


def main() -> int:
    """命令行入口：执行 docx -> md 转换。"""
    parser = argparse.ArgumentParser()
    parser.add_argument("docx_path")
    parser.add_argument("md_path")
    parser.add_argument("--assets-dir", default=None)
    args = parser.parse_args()

    docx_path = Path(args.docx_path).expanduser().resolve()
    md_path = Path(args.md_path).expanduser().resolve()
    assets_dir = Path(args.assets_dir).expanduser().resolve() if args.assets_dir else None

    if not docx_path.exists() or not docx_path.is_file():
        raise FileNotFoundError(str(docx_path))

    import mammoth
    from markdownify import markdownify as md

    image_index = 0

    def _convert_image(image):
        """将 docx 内嵌图片写入 assets 目录，并返回 Markdown 可用的相对路径。"""
        nonlocal image_index
        if assets_dir is None:
            # 不导出图片时，返回空 src，避免把图片内容直接内联到 Markdown
            return {"src": ""}
        assets_dir.mkdir(parents=True, exist_ok=True)
        ext = _guess_image_extension(image.content_type)
        image_index += 1
        name = f"image_{image_index:03d}{ext}"
        target = assets_dir / name
        with target.open("wb") as f:
            f.write(image.read())
        rel = os.path.relpath(target, md_path.parent)
        rel = rel.replace("\\", "/")
        return {"src": rel}

    result = mammoth.convert_to_html(docx_path, convert_image=mammoth.images.img_element(_convert_image))
    html = result.value
    markdown = md(html, heading_style="ATX", bullets="-")

    md_path.parent.mkdir(parents=True, exist_ok=True)
    md_path.write_text(markdown, encoding="utf-8")
    return 0


def _guess_image_extension(content_type: str) -> str:
    """根据图片的 MIME 类型推断文件扩展名。"""
    mapping = {
        "image/png": ".png",
        "image/jpeg": ".jpg",
        "image/jpg": ".jpg",
        "image/gif": ".gif",
        "image/bmp": ".bmp",
        "image/tiff": ".tiff",
        "image/webp": ".webp",
        "image/svg+xml": ".svg",
    }
    return mapping.get(content_type, "")


if __name__ == "__main__":
    raise SystemExit(main())