#!/usr/bin/env python3
"""
自然拼读教程 Markdown → Word 打印版转换工具
生成适合孩子学习的打印版文档
"""

import os
import re
from docx import Document
from docx.shared import Pt, Inches, RGBColor, Cm
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.oxml.ns import qn
from docx.oxml import OxmlElement

# 配置
FONT_SIZE_TITLE = Pt(22)      # 标题字号
FONT_SIZE_H2 = Pt(18)         # 二级标题
FONT_SIZE_H3 = Pt(16)         # 三级标题
FONT_SIZE_BODY = Pt(14)       # 正文字号
FONT_SIZE_TABLE = Pt(12)      # 表格字号
LINE_SPACING = 1.5            # 行距

# 颜色配置（彩色打印用）
COLOR_TITLE = RGBColor(0, 102, 204)      # 蓝色标题
COLOR_H2 = RGBColor(0, 153, 76)          # 绿色二级标题
COLOR_H3 = RGBColor(204, 102, 0)         # 橙色三级标题
COLOR_EMPHASIS = RGBColor(255, 0, 0)     # 红色强调


def set_chinese_font(run, font_name='微软雅黑'):
    """设置中文字体"""
    run.font.name = font_name
    r = run._element
    r.rPr.rFonts.set(qn('w:eastAsia'), font_name)


def add_title(doc, text):
    """添加标题"""
    p = doc.add_paragraph()
    p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run = p.add_run(text)
    run.font.size = FONT_SIZE_TITLE
    run.font.bold = True
    run.font.color.rgb = COLOR_TITLE
    set_chinese_font(run)
    p.paragraph_format.space_after = Pt(12)
    return p


def add_heading(doc, text, level=2):
    """添加标题"""
    p = doc.add_paragraph()
    run = p.add_run(text)
    
    if level == 2:
        run.font.size = FONT_SIZE_H2
        run.font.color.rgb = COLOR_H2
    else:
        run.font.size = FONT_SIZE_H3
        run.font.color.rgb = COLOR_H3
    
    run.font.bold = True
    set_chinese_font(run)
    p.paragraph_format.space_before = Pt(12)
    p.paragraph_format.space_after = Pt(6)
    return p


def add_paragraph(doc, text):
    """添加正文段落"""
    p = doc.add_paragraph()
    p.paragraph_format.line_spacing = LINE_SPACING
    
    # 处理粗体 **text**
    parts = re.split(r'(\*\*[^*]+\*\*)', text)
    for part in parts:
        if part.startswith('**') and part.endswith('**'):
            run = p.add_run(part[2:-2])
            run.font.bold = True
        else:
            run = p.add_run(part)
        run.font.size = FONT_SIZE_BODY
        set_chinese_font(run)
    
    return p


def add_table(doc, headers, rows):
    """添加表格"""
    table = doc.add_table(rows=len(rows)+1, cols=len(headers))
    table.style = 'Table Grid'
    table.alignment = WD_TABLE_ALIGNMENT.CENTER
    
    # 设置表头
    header_cells = table.rows[0].cells
    for i, header in enumerate(headers):
        header_cells[i].text = header
        for paragraph in header_cells[i].paragraphs:
            for run in paragraph.runs:
                run.font.bold = True
                run.font.size = FONT_SIZE_TABLE
                set_chinese_font(run)
    
    # 设置内容
    for i, row in enumerate(rows):
        row_cells = table.rows[i+1].cells
        for j, cell in enumerate(row):
            row_cells[j].text = cell
            for paragraph in row_cells[j].paragraphs:
                for run in paragraph.runs:
                    run.font.size = FONT_SIZE_TABLE
                    set_chinese_font(run)
    
    doc.add_paragraph()  # 表格后空行
    return table


def add_practice_area(doc, instruction, lines=3):
    """添加练习区域（留空白）"""
    p = doc.add_paragraph()
    run = p.add_run(instruction)
    run.font.size = FONT_SIZE_BODY
    run.font.bold = True
    set_chinese_font(run)
    
    # 添加空白行
    for _ in range(lines):
        blank = doc.add_paragraph()
        blank.paragraph_format.line_spacing = LINE_SPACING
        run = blank.add_run('_' * 50)
        run.font.size = FONT_SIZE_BODY
        run.font.color.rgb = RGBColor(200, 200, 200)  # 浅灰色下划线


def add_code_block(doc, text):
    """添加代码块（用灰色背景框表示）"""
    p = doc.add_paragraph()
    p.paragraph_format.left_indent = Inches(0.3)
    p.paragraph_format.line_spacing = LINE_SPACING
    
    for line in text.strip().split('\n'):
        run = p.add_run(line + '\n')
        run.font.size = FONT_SIZE_BODY
        run.font.name = 'Consolas'
        set_chinese_font(run)


def parse_markdown_table(lines):
    """解析 Markdown 表格"""
    headers = []
    rows = []
    
    for i, line in enumerate(lines):
        line = line.strip()
        if not line or line.startswith('|---') or line.startswith('| ---'):
            continue
        
        if line.startswith('|') and line.endswith('|'):
            cells = [cell.strip() for cell in line[1:-1].split('|')]
            if not headers:
                headers = cells
            else:
                rows.append(cells)
    
    return headers, rows


def convert_md_to_docx(md_path, output_path):
    """将 Markdown 文件转换为 Word 文档"""
    doc = Document()
    
    # 设置页面边距
    sections = doc.sections
    for section in sections:
        section.top_margin = Cm(2)
        section.bottom_margin = Cm(2)
        section.left_margin = Cm(2.5)
        section.right_margin = Cm(2.5)
    
    with open(md_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    lines = content.split('\n')
    i = 0
    
    while i < len(lines):
        line = lines[i].strip()
        
        # 跳过空行
        if not line:
            i += 1
            continue
        
        # 一级标题
        if line.startswith('# '):
            add_title(doc, line[2:])
        
        # 二级标题
        elif line.startswith('## '):
            add_heading(doc, line[3:], level=2)
        
        # 三级标题
        elif line.startswith('### '):
            add_heading(doc, line[4:], level=3)
        
        # 表格
        elif line.startswith('|'):
            # 收集表格行
            table_lines = []
            while i < len(lines) and lines[i].strip().startswith('|'):
                table_lines.append(lines[i])
                i += 1
            i -= 1  # 回退一行
            
            headers, rows = parse_markdown_table(table_lines)
            if headers and rows:
                add_table(doc, headers, rows)
        
        # 代码块
        elif line.startswith('```'):
            code_lines = []
            i += 1
            while i < len(lines) and not lines[i].strip().startswith('```'):
                code_lines.append(lines[i])
                i += 1
            add_code_block(doc, '\n'.join(code_lines))
        
        # 列表项
        elif line.startswith('- ') or line.startswith('* '):
            text = line[2:]
            p = doc.add_paragraph(style='List Bullet')
            run = p.add_run(text)
            run.font.size = FONT_SIZE_BODY
            set_chinese_font(run)
            p.paragraph_format.line_spacing = LINE_SPACING
        
        # 有序列表
        elif re.match(r'^\d+\.', line):
            text = re.sub(r'^\d+\.\s*', '', line)
            p = doc.add_paragraph(style='List Number')
            run = p.add_run(text)
            run.font.size = FONT_SIZE_BODY
            set_chinese_font(run)
            p.paragraph_format.line_spacing = LINE_SPACING
        
        # 普通段落
        else:
            add_paragraph(doc, line)
        
        i += 1
    
    # 添加分页符
    doc.add_page_break()
    
    # 保存文档
    doc.save(output_path)
    print(f"✅ 已生成: {output_path}")


def convert_all_lessons(input_dir, output_dir):
    """转换所有课程"""
    os.makedirs(output_dir, exist_ok=True)
    
    # 获取所有 .md 文件
    md_files = sorted([f for f in os.listdir(input_dir) if f.endswith('.md')])
    
    for md_file in md_files:
        md_path = os.path.join(input_dir, md_file)
        docx_file = md_file.replace('.md', '_打印版.docx')
        output_path = os.path.join(output_dir, docx_file)
        
        print(f"📝 转换: {md_file}")
        convert_md_to_docx(md_path, output_path)
    
    print(f"\n🎉 完成！共转换 {len(md_files)} 个课程")
    print(f"📁 输出目录: {output_dir}")


def merge_all_lessons(input_dir, output_path):
    """合并所有课程到一个文档"""
    doc = Document()
    
    # 设置页面边距
    sections = doc.sections
    for section in sections:
        section.top_margin = Cm(2)
        section.bottom_margin = Cm(2)
        section.left_margin = Cm(2.5)
        section.right_margin = Cm(2.5)
    
    # 添加封面
    title = doc.add_paragraph()
    title.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run = title.add_run('自然拼读教程')
    run.font.size = Pt(36)
    run.font.bold = True
    run.font.color.rgb = COLOR_TITLE
    set_chinese_font(run)
    
    subtitle = doc.add_paragraph()
    subtitle.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run = subtitle.add_run('Phonics Course for Kids')
    run.font.size = Pt(24)
    run.font.color.rgb = RGBColor(128, 128, 128)
    
    doc.add_paragraph()
    doc.add_paragraph()
    
    # 添加目录说明
    toc = doc.add_paragraph()
    run = toc.add_run('📚 课程目录')
    run.font.size = FONT_SIZE_H2
    run.font.bold = True
    run.font.color.rgb = COLOR_H2
    set_chinese_font(run)
    
    # 获取所有 .md 文件
    md_files = sorted([f for f in os.listdir(input_dir) if f.endswith('.md')])
    
    for md_file in md_files:
        lesson_num = md_file.split('-')[0]
        lesson_name = md_file.replace('.md', '').split('-', 1)[1] if '-' in md_file else md_file
        p = doc.add_paragraph()
        run = p.add_run(f"第{lesson_num}课：{lesson_name}")
        run.font.size = FONT_SIZE_BODY
        set_chinese_font(run)
    
    doc.add_page_break()
    
    # 转换每课内容
    for md_file in md_files:
        md_path = os.path.join(input_dir, md_file)
        print(f"📝 添加: {md_file}")
        
        with open(md_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        lines = content.split('\n')
        i = 0
        
        while i < len(lines):
            line = lines[i].strip()
            
            if not line:
                i += 1
                continue
            
            # 一级标题
            if line.startswith('# '):
                p = doc.add_paragraph()
                p.alignment = WD_ALIGN_PARAGRAPH.CENTER
                run = p.add_run(line[2:])
                run.font.size = FONT_SIZE_TITLE
                run.font.bold = True
                run.font.color.rgb = COLOR_TITLE
                set_chinese_font(run)
            
            # 二级标题
            elif line.startswith('## '):
                p = doc.add_paragraph()
                run = p.add_run(line[3:])
                run.font.size = FONT_SIZE_H2
                run.font.color.rgb = COLOR_H2
                run.font.bold = True
                set_chinese_font(run)
            
            # 三级标题
            elif line.startswith('### '):
                p = doc.add_paragraph()
                run = p.add_run(line[4:])
                run.font.size = FONT_SIZE_H3
                run.font.color.rgb = COLOR_H3
                run.font.bold = True
                set_chinese_font(run)
            
            # 表格
            elif line.startswith('|'):
                table_lines = []
                while i < len(lines) and lines[i].strip().startswith('|'):
                    table_lines.append(lines[i])
                    i += 1
                i -= 1
                
                headers, rows = parse_markdown_table(table_lines)
                if headers and rows:
                    add_table(doc, headers, rows)
            
            # 代码块
            elif line.startswith('```'):
                code_lines = []
                i += 1
                while i < len(lines) and not lines[i].strip().startswith('```'):
                    code_lines.append(lines[i])
                    i += 1
                add_code_block(doc, '\n'.join(code_lines))
            
            # 列表项
            elif line.startswith('- ') or line.startswith('* '):
                text = line[2:]
                p = doc.add_paragraph(style='List Bullet')
                run = p.add_run(text)
                run.font.size = FONT_SIZE_BODY
                set_chinese_font(run)
            
            # 有序列表
            elif re.match(r'^\d+\.', line):
                text = re.sub(r'^\d+\.\s*', '', line)
                p = doc.add_paragraph(style='List Number')
                run = p.add_run(text)
                run.font.size = FONT_SIZE_BODY
                set_chinese_font(run)
            
            # 普通段落
            else:
                p = doc.add_paragraph()
                p.paragraph_format.line_spacing = LINE_SPACING
                parts = re.split(r'(\*\*[^*]+\*\*)', line)
                for part in parts:
                    if part.startswith('**') and part.endswith('**'):
                        run = p.add_run(part[2:-2])
                        run.font.bold = True
                    else:
                        run = p.add_run(part)
                    run.font.size = FONT_SIZE_BODY
                    set_chinese_font(run)
            
            i += 1
        
        # 每课后添加分页符
        doc.add_page_break()
    
    # 保存合并文档
    doc.save(output_path)
    print(f"\n🎉 完成！合并文档: {output_path}")


if __name__ == '__main__':
    import sys
    
    input_dir = '/root/.openclaw/workspace/study/共享资源/自然拼读教程'
    output_dir = '/root/.openclaw/workspace/study/共享资源/自然拼读教程_打印版'
    
    if len(sys.argv) > 1 and sys.argv[1] == 'merge':
        # 合并为一个文档
        output_path = os.path.join(output_dir, '自然拼读教程_完整版.docx')
        merge_all_lessons(input_dir, output_path)
    else:
        # 每课单独文档
        convert_all_lessons(input_dir, output_dir)