import os
import sys
import argparse
import time
import re
from pathlib import Path
from types import SimpleNamespace
from urllib.parse import urlparse
from configparser import ConfigParser

missing = []
for lib in [
    "chardet",
    "bs4",
    "jinja2",
    "html2text",
    "pdf2docx",
    "docx",
    "pptx",
    "markitdown",
]:
    try:
        __import__(lib)
    except ImportError:
        missing.append(lib)

if missing:
    print(f"Error: Missing libraries:\n{', '.join(missing)}")
    input("\nPress ENTER to terminate>>\n")
    sys.exit(1)

import chardet
from bs4 import BeautifulSoup
from jinja2 import Environment, FileSystemLoader
import html2text

from pdf2docx import Converter
from docx import Document
from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE
from markitdown import MarkItDown

from tkai_lib import read_ai_config
from tkai_lib import query_openai4, openai_response_to_json
from tkai_lib import query_openai5, extract_openai5_text
from tkai_lib import query_google2, google_response_to_json
from tkai_lib import query_deepl, extract_deepl_text

import openai


#=========================================================
# 初期化
#=========================================================

parser = None


def initialize():
    cfg = SimpleNamespace()
    cfg.config_path = "translate5.env"
    cfg.common_config_path = "ai.env"

    # APIキーや共通設定は従来どおり .env / ai.env から
    read_ai_config(cfg.config_path, read_account_inf=False)
    read_ai_config(cfg.common_config_path)

    p = argparse.ArgumentParser(description="Translate/Revise .docx/.pptx/.pdf/.html/.md")

    p.add_argument("--mode", default="je", help="Translation mode (je, ej, jj, etc.)")
    p.add_argument(
        "--infile", "-i",
        default="translate_test.docx",
        help="Input file (.docx/.pptx/.pdf/.html/.md/.txt)",
    )
    p.add_argument(
        "--output_html_path",
        default=None,
        help="Path for the output HTML file (comparison report, optional)",
    )
    p.add_argument(
        "--html_template_path",
        default="template_translate.html",
        help="Path to the HTML template file for comparison report",
    )

    p.add_argument(
        "--api",
        default="openai5",
        choices=["openai5", "openai", "google", "gemini", "deepl"],
        help="Translation API to use",
    )
    p.add_argument("--model", default=None, help="Override API model")
    p.add_argument("--endpoint", default=os.getenv("endpoint"), help="API endpoint")

    p.add_argument(
        "--max_tokens",
        type=int,
        default=int(os.getenv("max_tokens", "2000")),
        help="Maximum number of tokens / output length",
    )
    p.add_argument(
        "--openai_api_key",
        default=os.getenv("OPENAI_API_KEY"),
        help="OpenAI API key",
    )
    p.add_argument(
        "--openai_model",
        default=os.getenv("openai_model", "gpt-4o"),
        help="OpenAI model to use",
    )
    p.add_argument(
        "--temperature",
        type=float,
        default=float(os.getenv("temperature", "0.3")),
        help="Sampling temperature",
    )
    p.add_argument(
        "--openai_model5",
        default=os.getenv("openai_model5", "gpt-5.2"),
        help="OpenAI GPT-5 model",
    )
    p.add_argument(
        "--reasoning_effort",
        default=os.getenv("reasoning_effort", "low"),
        help="Reasoning effort level for GPT-5",
    )
    p.add_argument(
        "--google_api_key",
        default=os.getenv("GOOGLE_API_KEY"),
        help="Google API key",
    )
    p.add_argument(
        "--google_model",
        default=os.getenv("google_model", "gemini-2.5-flash"),
        help="Google/Gemini model to use",
    )
    p.add_argument(
        "--deepl_api_key",
        default=os.getenv("DEEPL_API_KEY"),
        help="DeepL API key",
    )

    p.add_argument(
        "--force_server_charcode",
        default=os.getenv("force_server_charcode", "utf-8"),
        help="force_server_charcode",
    )
    p.add_argument(
        "--tsleep_rpm",
        type=float,
        default=0.5,
        help="Sleep time to avoid rpm (seconds)",
    )

    p.add_argument("--use_md", action="store_true", help="Convert to markdown and process")
    p.add_argument(
        "--limit_to_multibyte_str",
        action="store_true",
        help="When set, only translate text containing multibyte chars (e.g. Japanese)",
    )

    p.add_argument(
        "--process_unit",
        default="paragraph",
        choices=["paragraph", "run"],
        help="Unit of processing for docx/pptx/html",
    )
    p.add_argument(
        "--min_translate_length",
        type=int,
        default=int(os.getenv("min_translate_length", "5")),
        help="Minimum length of text to translate",
    )
    p.add_argument(
        "--allowed_translation_length_ratio",
        type=float,
        default=float(os.getenv("allowed_translation_length_ratio", "5.0")),
        help="Max allowed ratio (translated_len / original_len)",
    )

    # ★ CLI からは prompt 本文のみ上書き可能（roleは統合済み）
    p.add_argument(
        "--translate_prompt",
        type=str,
        default="",
        help="Override translation prompt (full instructions incl. 'あなたは〜').",
    )
    p.add_argument(
        "--reformat_prompt",
        type=str,
        default="",
        help="Override reformat prompt (full instructions).",
    )

    return cfg, p


def load_prompt_config_from_ini(args, cfg):
    """
    translate5.ini を以下の順で探す:
      1. カレントディレクトリ
      2. 入力ファイルディレクトリ
      3. スクリプトディレクトリ

    [translate], [reformat] セクションから prompt を取得する。

    - 「role と prompt をまとめる」方針:
        * 新しい書き方: prompt に全指示を書く（あなたは〜 + 条件 + {{text}} まで全部）
        * 互換性維持: role と prompt の両方があれば「role + 改行 + prompt」を結合して1つのpromptとして扱う
        * role だけあればそれを prompt として扱う

    優先順位:
      - CLI (--translate_prompt / --reformat_prompt) があればそれが最優先
      - なければ ini の結合済み prompt
      - それも無ければ組込デフォルト
    """

    ini_name = "translate5.ini"
    candidate_paths = []

    # 1. 作業ディレクトリ
    candidate_paths.append(os.path.join(os.getcwd(), ini_name))

    # 2. 入力ファイルディレクトリ
    if getattr(args, "infile", None):
        infile_abs = os.path.abspath(args.infile)
        infile_dir = os.path.dirname(infile_abs)
        candidate_paths.append(os.path.join(infile_dir, ini_name))

    # 3. スクリプトディレクトリ
    script_dir = os.path.dirname(os.path.abspath(__file__))
    candidate_paths.append(os.path.join(script_dir, ini_name))

    ini_path = None
    for p in candidate_paths:
        if os.path.isfile(p):
            ini_path = p
            break

    translate_prompt_ini = None
    reformat_prompt_ini = None

    if ini_path:
        print(f"  Using prompt config file: {ini_path}")
        cp = ConfigParser()
        cp.read(ini_path, encoding="utf-8")

        # [translate]
        if cp.has_section("translate"):
            role_val = cp.get("translate", "role", fallback="").strip()
            prompt_val = cp.get("translate", "prompt", fallback="")
            prompt_val = prompt_val.lstrip("\n")
            if role_val and prompt_val:
                translate_prompt_ini = (role_val + "\n" + prompt_val).strip()
            elif prompt_val:
                translate_prompt_ini = prompt_val.strip()
            elif role_val:
                translate_prompt_ini = role_val

        # [reformat]
        if cp.has_section("reformat"):
            role_val = cp.get("reformat", "role", fallback="").strip()
            prompt_val = cp.get("reformat", "prompt", fallback="")
            prompt_val = prompt_val.lstrip("\n")
            if role_val and prompt_val:
                reformat_prompt_ini = (role_val + "\n" + prompt_val).strip()
            elif prompt_val:
                reformat_prompt_ini = prompt_val.strip()
            elif role_val:
                reformat_prompt_ini = role_val
    else:
        print("  translate5.ini not found. Using built-in default prompts.")

    # ---- 組込みデフォルト (role+prompt一体型) ----

    if not translate_prompt_ini:
        translate_prompt_ini = (
            "あなたは専門的な英語を正確かつプロフェッショナルに翻訳・校正するアシスタントです。\n"
            "*#翻訳してほしいテキスト*以降のテキストは学会プレゼンテーション等の一部です。\n"
            "以下の条件を守り、自然な米国英語に翻訳してください。\n"
            "条件1：元の意味を変えない。テキストに書かれていない解釈・内容は追加しない。\n"
            "条件2：文字数を大きく増やさない（目安 ±30%）。\n"
            "条件3：動詞を含まない短いフレーズはスライド見出しとして扱い、完全な文にしない。\n"
            "条件4：数式・記号・固有名詞は可能な限り保持する。\n"
            "{{additional_prompt}}\n\n"
            "#翻訳してほしいテキスト\n{{text}}"
        )

    if not reformat_prompt_ini:
        reformat_prompt_ini = (
            "あなたは専門的な英語文書を、内容を変えずに読みやすく整形するアシスタントです。\n"
            "以下の*＃テキスト*以降は PDF やスライドから抽出したテキストであり、"
            "改行位置や段落順が乱れたり、文が途中で分断されている可能性があります。\n"
            "元の意味・情報を一切削除・要約・追加せず、文や段落のつながりが自然になるように"
            "並び替えと改行のみを調整してください。\n"
            "箇条書きや見出しらしき部分は、その構造を保ってください。\n"
            "出力はプレーンテキストのみとし、余計な説明文は付けないでください。\n\n"
            "＃テキスト\n{{ text }}"
        )

    # ---- 優先順位適用: CLI > ini/結合 > デフォルト ----
    cfg.translate_prompt = args.translate_prompt if args.translate_prompt else translate_prompt_ini
    cfg.reformat_prompt = args.reformat_prompt if args.reformat_prompt else reformat_prompt_ini

    # role は内部的には空文字で扱う（全部入りプロンプトで運用）
    cfg.translate_role = ""
    cfg.reformat_role = ""

    return cfg


def update_variables(cfg, p):
    args = p.parse_args()

    # モデル上書き
    if args.model is not None:
        if args.api == "openai5":
            args.openai_model5 = args.model
        elif args.api == "openai":
            args.openai_model = args.model
        elif args.api in ("google", "gemini"):
            args.google_model = args.model

    # mode から言語方向
    if args.mode[0] == "j":
        args.source_lang = "JA"
        # GUI側で既に設定済みでも、CLI/デフォルトロジックに合わせて再度設定
        if not args.limit_to_multibyte_str:
            args.limit_to_multibyte_str = True
    else:
        args.source_lang = "EN"
        if args.mode[0] != "j":
            # 英語ソースの場合は明示的にFalseにする
            args.limit_to_multibyte_str = False

    if args.mode[1] == "j":
        args.target_lang = "JA"
    else:
        args.target_lang = "EN"

    # cfg に基本情報コピー
    cfg.infile = args.infile
    cfg.api = args.api
    cfg.openai_model = args.openai_model
    cfg.openai_model5 = args.openai_model5
    cfg.google_model = args.google_model
    cfg.deepl_api_key = args.deepl_api_key
    cfg.endpoint = args.endpoint
    cfg.max_tokens = args.max_tokens
    cfg.temperature = args.temperature
    cfg.reasoning_effort = args.reasoning_effort
    cfg.tsleep_rpm = args.tsleep_rpm
    cfg.use_md = args.use_md
    cfg.limit_to_multibyte_str = args.limit_to_multibyte_str
    cfg.process_unit = args.process_unit
    cfg.min_translate_length = args.min_translate_length
    cfg.allowed_translation_length_ratio = args.allowed_translation_length_ratio
    cfg.mode = args.mode
    cfg.source_lang = args.source_lang
    cfg.target_lang = args.target_lang
    cfg.html_template_path = args.html_template_path

    cfg.openai_api_key = args.openai_api_key
    cfg.google_api_key = args.google_api_key

    # ini + デフォルト + CLI 上書きで prompt を確定（roleは内部で空）
    cfg = load_prompt_config_from_ini(args, cfg)

    return cfg


def usage():
    global parser
    if parser:
        parser.print_usage()


# ------------------------------------------------------
# ユーティリティ
# ------------------------------------------------------
def process_template(template: str, context: dict) -> str:
    """
    \t, \n, \r を展開し、{{ key }} を context で置換。
    """
    template = template.replace(r"\t", "\t")
    template = template.replace(r"\n", "\n")
    template = template.replace(r"\r", "\r")

    def replace_placeholder(match):
        key = match.group(1).strip()
        return str(context.get(key, f"{{{{ {key} }}}}"))

    return re.sub(r"\{\{\s*(.*?)\s*\}\}", replace_placeholder, template)


def save(path, text):
    with open(path, "w", encoding="utf-8") as f:
        f.write(text)


def read_file(path):
    if not os.path.exists(path):
        print(f"\nError in read_file(): File [{path}] does not exist\n")
        sys.exit(1)
    with open(path, "r", encoding="utf-8") as fp:
        return fp.read()


def replace_path(path, ext):
    return os.path.splitext(path)[0] + ext


def check_multibyte_str(text, limit_to_multibyte_str):
    if not limit_to_multibyte_str:
        return True
    pattern = re.compile(r"[\u0800-\uFFFF]")
    return bool(pattern.search(text))


def html_to_markdown(html_file_path):
    with open(html_file_path, "r", encoding="utf-8") as html_file:
        html_content = html_file.read()
    return html2text.html2text(html_content)


def convert_to_md(infile):
    if ".html" in infile.lower():
        return html_to_markdown(infile)
    else:
        print(f"Convert {infile} to markdown")
        md = MarkItDown()
        result = md.convert(infile)
        return result.text_content


def n_leading_chars(s, c="#"):
    return len(s) - len(s.lstrip(c))


def pdf_to_docx(pdf_file, docx_file):
    cv = Converter(pdf_file)
    cv.convert(docx_file, start=0, end=None)
    cv.close()
    print(f"Converted '{pdf_file}' to '{docx_file}' successfully.")


def get_filetype(path):
    ext = os.path.splitext(path)[1].lower()
    if ext == ".pdf":
        return "pdf"
    if ext == ".docx":
        return "docx"
    if ext == ".pptx":
        return "pptx"
    if ext in (".html", ".htm"):
        return "html"
    if ext in (".txt", ".text"):
        return "txt"
    if ext == ".md":
        return "md"
    return None


def to_translate(text, min_translate_length, limit_to_multibyte_str):
    if text is None:
        return False
    text0 = text.strip()
    if text0 == "":
        return False
    if len(text0) < min_translate_length:
        return False

    is_mb = check_multibyte_str(text0, limit_to_multibyte_str)

    if not is_mb and not bool(re.search("[a-zA-Z]", text0)):
        return False

    return is_mb if limit_to_multibyte_str else True


# ------------------------------------------------------
# 翻訳系ラッパ
# ------------------------------------------------------
def revise_with_openai4(text, openai_model, prompt, temperature, max_tokens, cfg):
    prompt_text = process_template(prompt, {"text": text, "additional_prompt": ""})

    response = query_openai4(
        prompt_text,
        openai_model,
        role="",  # roleは統合済み。必要ならprompt側に書く。
        temperature=temperature,
        max_tokens=max_tokens,
        openai_api_key=cfg.openai_api_key,
    )
    if not response or response == {}:
        return ""
    usage = response.usage
    print(f"  prompt_tokens     : {usage.prompt_tokens}")
    print(f"  completion_tokens: {usage.completion_tokens}")

    if cfg.tsleep_rpm > 0:
        time.sleep(cfg.tsleep_rpm)

    return response.choices[0].message.content.strip()


def revise_with_openai5(text, openai_model5, prompt, effort, max_output_tokens, cfg):
    prompt_text = process_template(prompt, {"text": text, "additional_prompt": ""})

    response = query_openai5(
        prompt_text,
        openai_model5,
        role="",  # 統合済み
        effort=effort,
        max_output_tokens=max_output_tokens,
        openai_api_key=cfg.openai_api_key,
    )
    if not response or response == {}:
        return ""
    text_out = extract_openai5_text(response)
    print(f"  text: {text_out}")

    if cfg.tsleep_rpm > 0:
        time.sleep(cfg.tsleep_rpm)

    return text_out


def revise_with_google(text, google_model, prompt, temperature, max_tokens, cfg):
    if cfg.google_api_key is None:
        cfg.google_api_key = os.getenv("GOOGLE_API_KEY")
    if not cfg.google_api_key:
        print(
            "\nError in tkai_lib.query_google(): Can not get GOOGLE_API_KEY"
            "\n    define the environment variable GOOGLE_API_KEY\n"
        )
        raise RuntimeError("GOOGLE_API_KEY is not set")

    prompt_text = process_template(
        prompt,
        {
            "text": text,
            "additional_prompt": "翻訳結果のみを出力してください。前置きや後付けのメッセージは不要です。",
        },
    )

    generation_config = {
        "temperature": temperature,
        "top_p": getattr(cfg, "top_p", 0.8),
        "top_k": getattr(cfg, "top_k", 40),
        "max_output_tokens": max_tokens,
        "candidate_count": 1,
    }

    messages = [
        {
            "role": "user",
            "parts": [prompt_text],
        }
    ]

    safety_settings = [
        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        {
            "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
            "threshold": "BLOCK_MEDIUM_AND_ABOVE",
        },
        {
            "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
            "threshold": "BLOCK_MEDIUM_AND_ABOVE",
        },
    ]

    response = query_google2(
        messages,
        google_model=google_model,
        generation_config=generation_config,
        safety_settings=safety_settings,
        google_api_key=None,
    )

    if cfg.tsleep_rpm > 0:
        time.sleep(cfg.tsleep_rpm)

    try:
        return response.text.strip()
    except ValueError as e:
        print("\n--- [WARNING] Google API returned invalid response (safety filter etc.).")
        print("---           Skipping this text. Error:", e)
        print(f"---           Original: '{text[:80]}...'")
        return ""


def translate_with_deepl(text, deepl_api_key, endpoint, source_lang="JA", target_lang="EN", cfg = None):
    response = query_deepl(
        text,
        source_lang,
        target_lang,
        deepl_api_key=deepl_api_key,
        endpoint=endpoint,
    )

    if cfg.tsleep_rpm > 0:
        time.sleep(cfg.tsleep_rpm)

    if response.status_code == 200:
        return extract_deepl_text(response)
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return text


def translate(text, api, prompt, cfg):
    if api == "openai5":
        return revise_with_openai5(
            text,
            cfg.openai_model5,
            prompt,
            cfg.reasoning_effort,
            cfg.max_tokens,
            cfg,
        )
    elif api == "openai":
        return revise_with_openai4(
            text,
            cfg.openai_model,
            prompt,
            cfg.temperature,
            cfg.max_tokens,
            cfg,
        )
    elif api in ("google", "gemini"):
        return revise_with_google(
            text,
            cfg.google_model,
            prompt,
            cfg.temperature,
            cfg.max_tokens,
            cfg,
        )
    elif api == "deepl":
        return translate_with_deepl(
            text,
            cfg.deepl_api_key,
            cfg.endpoint,
            cfg.source_lang,
            cfg.target_lang,
            cfg,
        )
    else:
        print(f"\nError in translate(): Invalid API [{api}]\n")
        sys.exit(1)


# ------------------------------------------------------
# 翻訳結果チェック
# ------------------------------------------------------
def check_translation(text, translated, allowed_translation_length_ratio):
    if translated is None:
        return False
    if translated.startswith("'''"):
        return False
    if translated.startswith("```"):
        return False

    ntranslated = n_leading_chars(translated, "#")
    ntext = n_leading_chars(text, "#")
    if ntranslated > ntext:
        return False

    ntranslated = n_leading_chars(translated, "*")
    ntext = n_leading_chars(text, "*")
    if ntranslated == ntext + 1:
        return False

    if len(text) > 0 and len(translated) > len(text) * allowed_translation_length_ratio:
        return False

    return True


# ------------------------------------------------------
# 各フォーマット別処理
# ------------------------------------------------------
def translate_html(text, api, api_model, prompt, cfg):
    data = []
    print()
    print(">>> Analyzing html...")
    soup = BeautifulSoup(text, "html.parser")

    print("Translating content...")
    for element in soup.find_all(string=True):
        if to_translate(element, cfg.min_translate_length, cfg.limit_to_multibyte_str):
            original_text = element
            try:
                revised_text = translate(original_text, api, prompt, cfg)
            except Exception as e:
                print(f"  *** Translation failed for this element. Skipping. Error: {e}")
                revised_text = original_text # 失敗時は元のテキストを保持
                continue
                
            print(f"[Original] {original_text}")
            print(f"  -> [Revised] {revised_text}")
            if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                element.replace_with(revised_text)
                data.append({"original": original_text, "translated": revised_text})
            else:
                print("  *** This translation is rejected")

    return str(soup), data


def translate_pptx(ppt, api, api_model, prompt, cfg):
    cfg.translate_master = False

    data = []
    print()
    print(">>> Translating PPTX (shapes, groups, tables, notes, charts)...")

    def translate_paragraph(paragraph):
        nonlocal data
        if cfg.process_unit == "paragraph":
            text = paragraph.text
            if to_translate(text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                original_text = text
                try:
                    revised_text = translate(text, api, prompt, cfg)
                except Exception as e:
                    print(f"  *** Translation failed for this paragraph. Skipping. Error: {e}")
                    return # 失敗時は元のテキストを保持
                    
                print(f"[Original] {original_text}")
                print(f"  -> [Revised] {revised_text}")
                if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                    paragraph.text = revised_text
                    data.append({"original": original_text, "translated": revised_text})
                else:
                    print("  *** This translation is rejected")
        else:
            for run in paragraph.runs:
                if to_translate(run.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                    original_text = run.text
                    try:
                        revised_text = translate(run.text, api, prompt, cfg)
                    except Exception as e:
                        print(f"  *** Translation failed for this run. Skipping. Error: {e}")
                        continue # 失敗時は元のテキストを保持
                        
                    print(f"[Original] {original_text}")
                    print(f"  -> [Revised] {revised_text}")
                    if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                        run.text = revised_text
                        data.append({"original": original_text, "translated": revised_text})
                    else:
                        print("  *** This translation is rejected")

    def translate_shape(shape):
        if shape.has_text_frame:
            for paragraph in shape.text_frame.paragraphs:
                translate_paragraph(paragraph)

        if shape.has_table:
            for row in shape.table.rows:
                for cell in row.cells:
                    for paragraph in cell.text_frame.paragraphs:
                        translate_paragraph(paragraph)

        if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
            for shp in shape.shapes:
                translate_shape(shp)

        if hasattr(shape, "has_chart") and shape.has_chart:
            chart = shape.chart

            if chart.has_title and chart.chart_title.has_text_frame:
                for paragraph in chart.chart_title.text_frame.paragraphs:
                    translate_paragraph(paragraph)

            if hasattr(chart, "category_axis"):
                ax = chart.category_axis
                if getattr(ax, "has_title", False) and ax.axis_title.has_text_frame:
                    for paragraph in ax.axis_title.text_frame.paragraphs:
                        translate_paragraph(paragraph)

            if hasattr(chart, "value_axis"):
                ax = chart.value_axis
                if getattr(ax, "has_title", False) and ax.axis_title.has_text_frame:
                    for paragraph in ax.axis_title.text_frame.paragraphs:
                        translate_paragraph(paragraph)

            for series in chart.series:
                for pt in getattr(series, "points", []):
                    lbl = getattr(pt, "data_label", None)
                    if lbl is not None and getattr(lbl, "has_text_frame", False):
                        for paragraph in lbl.text_frame.paragraphs:
                            translate_paragraph(paragraph)

    for slide_idx, slide in enumerate(ppt.slides, start=1):
        print(f"\n--- Slide {slide_idx} ---")
        for shape in slide.shapes:
            translate_shape(shape)

        if slide.has_notes_slide:
            notes_slide = slide.notes_slide
            ntf = getattr(notes_slide, "notes_text_frame", None)
            if ntf is not None:
                print("  Translating notes...")
                for paragraph in ntf.paragraphs:
                    translate_paragraph(paragraph)

    # マスタースライド翻訳フラグ（無指定時は従来通り True 扱い）
    if getattr(cfg, "translate_master", True):
        print("\n>>> Translating slide masters and layouts (static text)...")
        for master in ppt.slide_masters:
            for shape in master.shapes:
                translate_shape(shape)
            for layout in master.slide_layouts:
                for shape in layout.shapes:
                    translate_shape(shape)
    else:
        print("\n>>> Skipping translation of slide masters and layouts (static text).")

    return ppt, data


def translate_docx(doc, api, api_model, prompt, cfg):
    data = []
    print()
    print(">>> Processing paragraphs/runs...")

    if cfg.process_unit == "paragraph":
        for paragraph in doc.paragraphs:
            if to_translate(paragraph.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                original_text = paragraph.text
                try:
                    revised_text = translate(paragraph.text, api, prompt, cfg)
                except Exception as e:
                    print(f"  *** Translation failed for this paragraph. Skipping. Error: {e}")
                    continue
                    
                print(f"[Original] {original_text}")
                print(f"  -> [Revised] {revised_text}")
                if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                    paragraph.text = revised_text
                    data.append({"original": original_text, "translated": revised_text})
                else:
                    print("  *** This translation is rejected")
    else:
        for paragraph in doc.paragraphs:
            for run in paragraph.runs:
                if to_translate(run.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                    original_text = run.text
                    try:
                        revised_text = translate(run.text, api, prompt, cfg)
                    except Exception as e:
                        print(f"  *** Translation failed for this run. Skipping. Error: {e}")
                        continue
                        
                    print(f"[Original] {original_text}")
                    print(f"  -> [Revised] {revised_text}")
                    if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                        run.text = revised_text
                        data.append({"original": original_text, "translated": revised_text})
                    else:
                        print("  *** This translation is rejected")

    print(">>> Processing tables...")
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    if cfg.process_unit == "paragraph":
                        if to_translate(paragraph.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                            original_text = paragraph.text
                            try:
                                revised_text = translate(paragraph.text, api, prompt, cfg)
                            except Exception as e:
                                print(f"  *** Translation failed for this table cell (paragraph). Skipping. Error: {e}")
                                continue
                                
                            print(f"[Original] {original_text}")
                            print(f"  -> [Revised] {revised_text}")
                            if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                                paragraph.text = revised_text
                                data.append({"original": original_text, "translated": revised_text})
                            else:
                                print("  *** This translation is rejected")
                    else:
                        for run in paragraph.runs:
                            if to_translate(run.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                                original_text = run.text
                                try:
                                    revised_text = translate(run.text, api, prompt, cfg)
                                except Exception as e:
                                    print(f"  *** Translation failed for this table cell (run). Skipping. Error: {e}")
                                    continue
                                    
                                print(f"[Original] {original_text}")
                                print(f"  -> [Revised] {revised_text}")
                                if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                                    run.text = revised_text
                                    data.append({"original": original_text, "translated": revised_text})
                                else:
                                    print("  *** This translation is rejected")
    return doc, data


def translate_text(text, api, api_model, prompt, cfg):
    data = []
    print(">>> Translating plain text...")
    revised_text = text
    if to_translate(text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
        original_text = text
        try:
            revised_text = translate(text, api, prompt, cfg)
        except Exception as e:
            print(f"  *** Translation failed for the entire text block. Skipping. Error: {e}")
            revised_text = original_text # 失敗時は元のテキストを保持
            
        print(f"[Original] {original_text}")
        print(f"  -> [Revised] {revised_text}")
        data.append({"original": original_text, "translated": revised_text})
    return revised_text, data


# ------------------------------------------------------
# メイン実行
# ------------------------------------------------------
def execute(cfg):
    filetype = get_filetype(cfg.infile)

    if filetype is None:
        print(f"\nError in execute(): Invalid extension in [{cfg.infile}]\n")
        sys.exit(1)

    # ------------------ ファイルパス設定 ------------------
    # NOTE: GUI側のロジックと重複するが、ここではCUI実行時のパスを決定する
    if filetype == "md":
        outfile = replace_path(cfg.infile, "_revised.md")
    elif cfg.use_md:
        mdfile = replace_path(cfg.infile, ".md")
        outfile = replace_path(cfg.infile, "_revised.md")
    elif filetype == "pdf":
        outfile = replace_path(cfg.infile, "_revised.md" if cfg.use_md else "_revised.docx")
    elif filetype == "docx":
        outfile = replace_path(cfg.infile, "_revised.docx")
    elif filetype == "html":
        outfile = replace_path(cfg.infile, "_revised.html")
    elif filetype == "pptx":
        outfile = replace_path(cfg.infile, "_revised.pptx")
    elif filetype == "txt":
        outfile = replace_path(cfg.infile, "_revised.txt")
    else:
        print(f"\nError in execute(): Unsupported extension in [{cfg.infile}]\n")
        sys.exit(1)

    output_comparison_html_path = replace_path(cfg.infile, "_compare.html")
    output_reformat_md = replace_path(cfg.infile, "_reformat.md")
    output_docx = replace_path(cfg.infile, ".docx")

    print("=== Translate and revise file ===")
    print(f"  Input file              : {cfg.infile}")
    # ... (設定情報の出力は省略) ...
    print(f"  Output file             : {outfile}")

    if not os.path.isfile(cfg.infile):
        print(f"Error: File [{cfg.infile}] does not exist")
        usage()
        sys.exit(1)

    data = []
    translated_document = None # 翻訳済みドキュメントオブジェクト/テキストを保持する変数
    
    # ------------------ 翻訳処理本体 (Tryブロックで囲む) ------------------
    try:
        # ---- md/txt ----
        if filetype in ("md", "txt"):
            print(f"Read [{cfg.infile}]")
            text = read_file(cfg.infile)
            text, data = translate_text(text, cfg.api, cfg.openai_model, cfg.translate_prompt, cfg)
            translated_document = text
            if text is None:
                print(f"Error for filetype = {filetype}: Could not get text")

        # ---- use_md モード ----
        elif cfg.use_md:
            print(f"Read [{cfg.infile}] and convert to markdown")
            text = convert_to_md(cfg.infile)
            if text is None:
                print("Error for md: Could not get text")
                
            print(f"=== Saving markdown to {mdfile} ===")
            save(mdfile, text)

            if filetype == "pdf":
                print(f"Reformatting [{mdfile}] by API...")
                # reformat → translate
                text2 = translate(text, cfg.api, cfg.reformat_prompt, cfg)
                if not text2:
                    print("Error: Could not get reformatted text")
                else:
                    print(f"Reformatted MD file is saved to [{output_reformat_md}]")
                    save(output_reformat_md, text2)
                    text = text2

            text, data = translate_text(text, cfg.api, cfg.openai_model, cfg.translate_prompt, cfg)
            translated_document = text
            if text is None:
                print("Error in getting translated text: Could not get text")
                
        # ---- 通常モード (docx, pptx, html, pdf -> docx) ----
        else:
            if filetype == "pdf":
                print(f"Converting [{cfg.infile}] to [{output_docx}]")
                pdf_to_docx(cfg.infile, output_docx)
                print(f"Read [{output_docx}]")
                doc = Document(output_docx)
                translated_document, data = translate_docx(doc, cfg.api, cfg.openai_model, cfg.translate_prompt, cfg)

            elif filetype == "pptx":
                ppt = Presentation(cfg.infile)
                translated_document, data = translate_pptx(ppt, cfg.api, cfg.openai_model, cfg.translate_prompt, cfg)

            elif filetype == "docx":
                print(f"Read [{cfg.infile}]")
                doc = Document(cfg.infile)
                translated_document, data = translate_docx(doc, cfg.api, cfg.openai_model, cfg.translate_prompt, cfg)

            elif filetype == "html":
                print(f"Read [{cfg.infile}]")
                html = read_file(cfg.infile)
                translated_document, data = translate_html(html, cfg.api, cfg.openai_model, cfg.translate_prompt, cfg)

    except Exception as e:
        # 翻訳中にエラーが発生した場合、メッセージを出力して保存処理へ進む
        print(f"\n[ERROR] Translation process interrupted by an exception: {e}")
        # スタックトレースはGUI側で表示されるため、ここでは簡易的なメッセージに留める
        # ここで処理を終了せずに、以下の保存処理に進むのが重要

    # ------------------ 翻訳結果の保存 (エラーに関わらず実行) ------------------
    print()
    if translated_document is not None:
        try:
            if filetype in ("md", "txt") or cfg.use_md:
                # md, txt, または use_mdモードの場合
                print(f"=== Saving revised text to {outfile} ===")
                save(outfile, translated_document)
            elif filetype == "html" and not cfg.use_md:
                # html の場合
                print(f"=== Saving revised html to {outfile} ===")
                save(outfile, translated_document)
            elif filetype in ("docx", "pdf", "pptx") and not cfg.use_md:
                # docx, pdf (docx), pptx のドキュメントオブジェクトの場合
                save_doc_type = "docx" if filetype in ("docx", "pdf") else "pptx"
                print(f"=== Saving revised {save_doc_type} to {outfile} ===")
                translated_document.save(outfile)
        except Exception as save_e:
            print(f"[ERROR] Failed to save the partially translated file to {outfile}: {save_e}")
            
    # ------------------------------------------------------
    # 比較HTML生成
    # ------------------------------------------------------
    print()
    print(f"Creating comparison HTML file from template [{cfg.html_template_path}]")

    if not data and translated_document is None:
        print("--- [WARNING] No translation data or document object available. Skipping HTML report.")
        return # 保存対象がないためここで終了

    context = cfg.__dict__.copy()
    context["data"] = data

    current_dir = os.getcwd()
    script_dir = os.path.dirname(os.path.abspath(__file__))
    
    # テンプレートファイルの探索ロジック（そのまま維持）
    if not os.path.isfile(cfg.html_template_path):
        template_filename = os.path.basename(cfg.html_template_path)
        cand = os.path.join(current_dir, template_filename)
        if os.path.isfile(cand):
            cfg.html_template_path = cand
    if not os.path.isfile(cfg.html_template_path):
        template_filename = os.path.basename(cfg.html_template_path)
        cand = os.path.join(script_dir, template_filename)
        if os.path.isfile(cand):
            cfg.html_template_path = cand

    print(f"  Template path: [{cfg.html_template_path}]")

    if os.path.isfile(cfg.html_template_path):
        try:
            template_dir = os.path.dirname(cfg.html_template_path)
            template_file = os.path.basename(cfg.html_template_path)

            env = Environment(loader=FileSystemLoader(template_dir))
            template = env.get_template(template_file)

            rendered_html = template.render(context)

            print()
            print(f"Save translation data to [{output_comparison_html_path}]")
            with open(output_comparison_html_path, "w", encoding="utf-8") as f:
                f.write(rendered_html)
            print(f"  Finished.\n")
        except Exception as html_e:
            print(f"[ERROR] Failed to generate comparison HTML report: {html_e}")
    else:
        print()
        print(f"--- [WARNING] Template file '{cfg.html_template_path}' not found.")
        print(f"---           Looked in current and script directories.")
        print("---           Skipping generation of comparison HTML report.")
    print()


def main():
    global parser
    cfg, parser = initialize()
    cfg = update_variables(cfg, parser)
    execute(cfg)


if __name__ == "__main__":
    main()
    print()
    usage()
    input("\nPress ENTER to terminate>>\n")