#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""将Markdown转换为Word文档"""

from docx import Document
from docx.shared import Pt, Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn
import re

def create_word_doc():
    doc = Document()
    
    # 设置默认字体
    style = doc.styles['Normal']
    style.font.name = '宋体'
    style.font.size = Pt(12)
    style._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
    
    return doc

def add_heading(doc, text, level):
    """添加标题"""
    heading = doc.add_heading(text, level=level)
    for run in heading.runs:
        run.font.name = '黑体'
        run._element.rPr.rFonts.set(qn('w:eastAsia'), '黑体')
    return heading

def add_paragraph(doc, text, bold=False):
    """添加段落"""
    p = doc.add_paragraph()
    run = p.add_run(text)
    run.font.name = '宋体'
    run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
    if bold:
        run.bold = True
    return p

def parse_markdown_to_word(md_file, docx_file):
    """将Markdown文件转换为Word文档"""
    doc = create_word_doc()
    
    with open(md_file, 'r', encoding='utf-8') as f:
        content = f.read()
    
    lines = content.split('\n')
    i = 0
    in_table = False
    table_data = []
    
    while i < len(lines):
        line = lines[i]
        
        # 跳过空行
        if not line.strip():
            if in_table and table_data:
                # 结束表格
                add_table(doc, table_data)
                table_data = []
                in_table = False
            i += 1
            continue
        
        # 标题
        if line.startswith('# '):
            add_heading(doc, line[2:], 1)
        elif line.startswith('## '):
            add_heading(doc, line[3:], 2)
        elif line.startswith('### '):
            add_heading(doc, line[4:], 3)
        elif line.startswith('#### '):
            add_heading(doc, line[5:], 4)
        
        # 表格
        elif line.startswith('|'):
            in_table = True
            # 跳过分隔行
            if '---' in line:
                i += 1
                continue
            # 解析表格行
            cells = [c.strip() for c in line.split('|')[1:-1]]
            table_data.append(cells)
        
        # 引用块
        elif line.startswith('> '):
            p = doc.add_paragraph()
            run = p.add_run(line[2:])
            run.font.name = '宋体'
            run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
            run.italic = True
            p.paragraph_format.left_indent = Inches(0.5)
        
        # 列表
        elif line.startswith('- ') or line.startswith('* '):
            p = doc.add_paragraph(style='List Bullet')
            text = line[2:]
            # 处理加粗
            text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
            run = p.add_run(text)
            run.font.name = '宋体'
            run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
        
        elif re.match(r'^\d+\. ', line):
            p = doc.add_paragraph(style='List Number')
            text = re.sub(r'^\d+\. ', '', line)
            text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
            run = p.add_run(text)
            run.font.name = '宋体'
            run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
        
        # 分隔线
        elif line.startswith('---'):
            doc.add_paragraph()
        
        # 普通段落
        else:
            # 处理加粗标记
            text = re.sub(r'\*\*(.+?)\*\*', r'\1', line)
            add_paragraph(doc, text)
        
        i += 1
    
    # 处理最后的表格
    if table_data:
        add_table(doc, table_data)
    
    doc.save(docx_file)
    print(f"已生成: {docx_file}")

def add_table(doc, table_data):
    """添加表格"""
    if not table_data:
        return
    
    num_rows = len(table_data)
    num_cols = max(len(row) for row in table_data)
    
    table = doc.add_table(rows=num_rows, cols=num_cols)
    table.style = 'Table Grid'
    
    for i, row_data in enumerate(table_data):
        for j, cell_text in enumerate(row_data):
            if j < num_cols:
                cell = table.rows[i].cells[j]
                cell.text = cell_text
                # 设置字体
                for paragraph in cell.paragraphs:
                    for run in paragraph.runs:
                        run.font.name = '宋体'
                        run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋体')
                        run.font.size = Pt(10)

if __name__ == '__main__':
    md_file = '/root/.openclaw/workspace/study/品瑄/阅读理解专项训练课程.md'
    docx_file = '/root/.openclaw/workspace/study/品瑄/阅读理解专项训练课程.docx'
    parse_markdown_to_word(md_file, docx_file)
