import os
import sys
import time
import re
from pathlib import Path
import chardet
import re
from dotenv import load_dotenv
from types import SimpleNamespace

from urllib.parse import urlparse
import html2text
from bs4 import BeautifulSoup

from pdf2docx import Converter
from docx import Document
from pptx import Presentation
from markitdown import MarkItDown

from jinja2 import Environment, FileSystemLoader

import requests
import openai
from openai import OpenAI


# ------------------------------------------------------
# 初期設定
# ------------------------------------------------------
# for reformat
reformat_role = "あなたは専門的な英語を正確かつ筋の通った文章に校正するアシスタントです。"
reformat_prompt = '''以下の*＃テキスト*以下の文章はPDFファイルからテキストを抜き出したものですが、以下の作業をお願いします
・論文題目、著者名、所属名、雑誌名、DOIをとりだし、[TITLE], [AUTHORS], [AFFILIATIONS], [JOURNAL], [DOI]に続けて出力してください
・本文の文章が崩れているので、もとの文章を復元してください 
＃テキスト
{{ text }}
'''

genai = None
gexceptions = None


def initialize():
    cfg = SimpleNamespace()

    cfg.config_path = "translate.env"

#以降のパラメターは config_path で設定できる
    cfg.html_template_path = 'template_translate.html'

    cfg.account_inf_path = "accounts.env"
    cfg.api = 'openai'  # 'deepl'
    cfg.openai_model = "gpt-4o"
    cfg.mode = None

    cfg.infile = 'translate_test.docx'
#cfg.infile = "vba_setup.html"
    cfg.output_html_path = None

    cfg.process_unit = 'paragraph'  # or 'run'
    cfg.use_md = False

    cfg.limit_to_multibyte_str = False
    cfg.min_translate_length = 5
    cfg.allowed_translation_length_ratio = 5.0

#for OpenAI API
    cfg.temperature = 0.3
    cfg.max_tokens = 2000

# ------------------------------------------------------
# 設定ファイル config_path 読み込み
# ------------------------------------------------------
    load_dotenv(dotenv_path = cfg.config_path)
    cfg.account_inf_path = os.getenv("account_inf_path", "accounts.env")
    load_dotenv(dotenv_path = cfg.account_inf_path)

    cfg.html_template_path = os.getenv("html_template_path", "template_translate.html")
    cfg.role_content       = os.getenv("role_content")
    cfg.prompt             = os.getenv("prompt_template")

    cfg.min_translate_length  = int(os.getenv("min_translate_length", "5"))
    cfg.allowed_translation_length_ratio  = float(os.getenv("allowed_translation_length_ratio", "5.0"))

    cfg.openai_model = os.getenv("openai_model", "gpt-4o")
    cfg.temperature = float(os.getenv("temperature", "0.3"))
    cfg.max_tokens  = int(os.getenv("max_tokens", "2000"))

    cfg.endpoint = os.getenv("endpoint")
    
    return cfg


# ------------------------------------------------------
# 基本関数
# ------------------------------------------------------
def usage():
    print()
    print(f"Usage: python {sys.argv[0]} api mode infile process_unit")
    print(f"   api: [openai|gemini|deepl]")
    print(f"   mode: [je|ee]")
    print(f"   process_unit: 'paragraph' (recommended), 'run' (to keep character formats), or 'md'")
    print()

def getarg(i, defval = None):
    if len(sys.argv) > i:
        return sys.argv[i]
    return defval

def process_template(template: str, context: dict) -> str:
    """
    Replace special characters like \t, \n, \r and template tags {{ key }} with their corresponding values.

    Args:
        template (str): The input string containing template tags and special characters.
        context (dict): A dictionary containing key-value pairs for template replacement.

    Returns:
        str: The processed string with replacements applied.
    """
    # Replace special characters
    template = template.replace(r'\t', '\t')
    template = template.replace(r'\n', '\n')
    template = template.replace(r'\r', '\r')
    
    # Replace {{ key }} with context values
    def replace_placeholder(match):
        key = match.group(1).strip()
        return str(context.get(key, f'{{{{ {key} }}}}'))  # Keep original if key not found

    template = re.sub(r'\{\{\s*(.*?)\s*\}\}', replace_placeholder, template)

    return template

# ------------------------------------------------------
# 起動時引数設定
# ------------------------------------------------------
def update_variables(cfg):
    cfg.api = getarg(1, cfg.api)
    if cfg.api == "openai":
        cfg.mode = "je"
    else:
        cfg.mode = "je"

    cfg.mode = getarg(2, cfg.mode)
    if cfg.mode[0] == 'j':
        cfg.source_lang = "JA"
        limit_to_multibyte_str = True
    else:
        cfg.source_lang = "EN"
        limit_to_multibyte_str = False
    if cfg.mode[1] == 'j':
        cfg.target_lang = "JA"
    else:
        cfg.target_lang = "EN"

    cfg.infile          = getarg(3, cfg.infile)
    cfg.process_unit    = getarg(4, cfg.process_unit)

    return cfg


# ------------------------------------------------------
# Key, アカウント情報等読み込み
# ------------------------------------------------------
def read_api_inf(cfg):
    global genai, gexceptions
    
    if cfg.api == 'openai':
        openai.api_key = os.getenv("OPENAI_API_KEY")
        if not openai.api_key:
            print("ERROR: OpenAI APIキーが見つかりません。環境変数 'OPENAI_API_KEY' を設定してください。")
            return False

        if not cfg.role_content:
            print("ERROR: role_contentを定義してください")
            return False

        if not cfg.prompt:
            print("ERROR: prompt_templateを定義してください")
            return False
    elif cfg.api == "gemini":
        import google.generativeai as _genai
        from google.api_core import exceptions as _gexceptions
        genai = _genai
        gexceptions = _gexceptions
        
        cfg.GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
        cfg.gemini_model = os.getenv("gemini_model")
        cfg.tsleep_rpm = float(os.getenv("tsleep_rpm", 0))

        if not cfg.GOOGLE_API_KEY:
            print("ERROR: Gemini APIキーが見つかりません。環境変数 'GOOGLE_API_KEY' を設定してください。")
            return False

        if not cfg.gemini_model:
            print("ERROR: Gemini modelが見つかりません。環境変数 'gemini_model' を設定してください。")
            return False

        if not cfg.role_content:
            print("ERROR: role_contentを定義してください")
            return False

        if not cfg.prompt:
            print("ERROR: prompt_templateを定義してください")
            return False
    elif cfg.api == 'deepl':
        cfg.deepl_api_key = os.getenv("DEEPL_API_KEY")
        if not cfg.deepl_api_key:
            print("ERROR: DeepL APIキーが見つかりません。環境変数 'DEEPL_API_KEY' を設定してください。")
            return False

        if not cfg.endpoint:
            print("ERROR: endpointが見つかりません。環境変数 'endpoint' を設定してください。")
            return False
    else:
        print()
        print(f"Error: Invalid API [{cfg.api}]")
        print()
        usage()
        return False

    force_server_charcode = os.getenv("force_server_charcode", 'utf-8')

    return True

# ------------------------------------------------------
# 関数
# ------------------------------------------------------

def save(path, text):
    with open(path, "w", encoding="utf-8") as file:
        file.write(text)

def read_file(path):
    if not os.path.exists(path):
        print("\nError in read_file(): File [{path}] does not exist\n")
        exit()

    fp = open(path, "r", encoding="utf-8")
    if not fp:
        print("\nError in read_file(): Failed to read [{path}]\n")
        exit()
        
    text = fp.read()
    fp.close()
    
    return text

def replace_path(path, ext):
    return os.path.splitext(path)[0] + ext

def check_multibyte_str(text, limit_to_multibyte_str):
    if not limit_to_multibyte_str:
        return True 

    pattern = re.compile(r'[\u0800-\uFFFF]')
    ret = bool(pattern.search(text))

    return ret

def html_to_markdown(html_file_path):
    with open(html_file_path, 'r', encoding='utf-8') as html_file:
        html_content = html_file.read()

    return html2text.html2text(html_content)

def convert_to_md(infile):
    if ".jpg" in infile.lower(): 
        print()
        print("Extract markdown from image file {infile}")
        client = OpenAI()
        md = MarkItDown(mlm_client=client, mlm_model="gpt-4o-mini")
#        result = md.convert(infile, mlm_prompt="画像情報を抽出してください。")
        result = md.convert(infile, mlm_prompt="画像について説明してください。")
#        result = md.convert(infile, mlm_prompt="画像からテキストを抽出してください。")
    elif ".html" in infile.lower(): 
        return html_to_markdown(infile)
    else:
        print()
        print("Convert {infile} to markdown")
#    md = MarkItDown(mlm_client=client, mlm_model="gpt-4o-mini")
        md = MarkItDown()
        result = md.convert(infile)

    return result.text_content

def n_leading_chars(s, c = '#'):
    return len(s) - len(s.lstrip(c))

def pdf_to_docx(pdf_file, docx_file):
    cv = Converter(pdf_file)
    cv.convert(docx_file, start=0, end=None)  # start, endページを指定可能
    cv.close()
    print(f"Converted '{pdf_file}' to '{docx_file}' successfully.")

def get_filetype(path):
    ext = os.path.splitext(path)[1].lower()
    if ext == ".pdf":
        return "pdf"
    elif ext == ".docx":
        return "docx"
    elif ext == ".pptx":
        return "pptx"
    elif ext == ".html" or ext == ".htm":
        return "html"
    elif ext == ".txt" or ext == ".text":
        return "txt"
    elif ext == ".md":
        return "md"

    return None

def to_translate(text, min_translate_length, limit_to_multibyte_str):
    text0 = text.strip()
    if text is None or text0 == '': 
        return False

    if len(text0) < min_translate_length:
        return False

# 2byte文字が含まれるかどうか
    is_mb = check_multibyte_str(text0, limit_to_multibyte_str)
# 2byte文字でなく、アルファベットを含まない場合
    if not is_mb and not bool(re.search('[a-zA-Z]', text0)):
        return False
        
    return is_mb

def revise_with_openai(text, openai_model, role_content, prompt, temperature, max_tokens):
#    prompt = f"{prompt}:\n翻訳してほしいテキスト\n{text}"
    prompt = process_template(prompt, { "text": text, "additional_prompt": "" })

    response = openai.chat.completions.create(
        model = openai_model,
        messages=[
            {"role": "system", "content": role_content},
            {"role": "user", "content": prompt}
        ],
        temperature=temperature,
        max_tokens=max_tokens,
    )
    
    usage = response.usage
    print(f"  prompt_tokens    : {usage.prompt_tokens}")
    print(f"  completion_tokens: {usage.completion_tokens}")

    return response.choices[0].message.content.strip()

def revise_with_gemini(text, gemini_model, role_content, prompt, temperature, max_tokens, cfg):
    global c
    
#    prompt = f"{prompt}:\n翻訳してほしいテキスト\n{text}"
    prompt = process_template(prompt, 
       { 
           "text": text, 
           "additional_prompt": 
               "翻訳結果のみを出力してください。前置きや後付けのメッセージは不要です。" 
       })

    generation_config = { 
        "temperature": temperature, # 正確さ・ランダム性の制御。0.0なら正確、1.0ならランダム性高い
        "top_p": getattr(cfg, "top_p", 0.8), # 生成に使うトークンの確率範囲。1.0なら全て
        "top_k": getattr(cfg, "top_k", 40),  # top_kの候補から生成する回答を選択
        "max_output_tokens": max_tokens,
        "candidate_count": 1,
        }

    messages = [
            {
                'role': 'user',
                'parts': [role_content]
            },
            {
                'role': 'model',
                'parts': ["承知いたしました。何かお手伝いできることはありますか？"]
            },
            {
                'role': 'user',
                'parts': [prompt]
            }
        ]

    model = genai.GenerativeModel(gemini_model)
    try:
        response = model.generate_content(
            messages,
            generation_config = generation_config,
            safety_settings=[
                {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
                {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            ],
                )
# 正常な応答の処理 (上記1または2の方法でエラーチェックも行う)
        if response and hasattr(response, "parts"):
            pass
#            print("Response:\n", response.text)
#            for part in response.parts:
#                print(part.text)
        else:
            print("Error in revise_with_gemini(): 応答がありませんでした。")
    except gexceptions.ResourceExhausted as e: 
        print(f"Error in revise_with_gemini(): Gemini APIエラーが発生しました: {e}")
        if e.code == 429:  # レート制限のエラーコードの例
            print("   出力クォータに達しました。時間をおいてから再度お試しください。")
            return None
        else:
            print(f"   エラー詳細 (コード: {e.code}): {e.message}")
# エラーの詳細 (e.message, e.code など) を確認して処理
    except Exception as e:
        print(f"Error in revise_with_gemini(): 予期せぬエラーが発生しました: {e}") #, f"  type(e)={type(e)}")
        return None

    if response.prompt_feedback:
        if response.prompt_feedback.block_reason:
            print(f"Error in revise_with_gemini(): プロンプトがブロックされました。理由: {response.prompt_feedback.block_reason}")
            if response.prompt_feedback.safety_ratings:
                print("安全性評価:")
                for rating in response.prompt_feedback.safety_ratings:
                    print(f"- カテゴリ: {rating.category}, 評価: {rating.probability}")
            return None

    if cfg.tsleep_rpm > 0:
        time.sleep(cfg.tsleep_rpm)

    return response.text.strip()

def translate_with_deepl(text, deepl_api_key, endpoint, source_lang = 'JA', target_lang = 'EN'):
    """
    DeepL APIを使用してテキストを翻訳する関数
    """

    headers = {"Authorization": f"DeepL-Auth-Key {deepl_api_key}"}
    params = {
        "text": text,
        "source_lang": source_lang,
        "target_lang": target_lang
    }
    response = requests.post(endpoint, headers = headers, data = params)
    if response.status_code == 200:
        result = response.json()
        return result["translations"][0]["text"]
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return text  # エラーの場合、元のテキストを返す

def translate(text, api, openai_model, role_content, prompt, cfg):
    if api == 'openai':
        return revise_with_openai(text, openai_model, role_content, prompt, cfg.temperature, cfg.max_tokens)
    elif api == 'gemini':
        return revise_with_gemini(text, cfg.gemini_model, role_content, prompt, cfg.temperature, cfg.max_tokens, cfg)
    elif api == 'deepl':
        return translate_with_deepl(text, cfg.deepl_api_key, cfg.endpoint, cfg.source_lang, cfg.target_lang)
    else:
        print()
        print(f"Error in translate(): Invalid API [{api}]")
        print()
        exit()

# OpenAI APIが、どうしても英文を日本語文に変換したり余計な説明をつけるので、
# 特定のケースの翻訳をrejectする
# ・ 翻訳文の先頭が ``` の場合（念のため ''' も却下）
# ・ 翻訳文の先頭に余計な #, * を追加している場合
# ・ 翻訳文が元の文の長さよりもかなり長い場合
def check_translation(text, translated, allowed_translation_length_ratio):
    if translated is None:
        return False
    if translated.startswith("'''"):
        return False
    if translated.startswith("```"):
        return False

    ntranslated = n_leading_chars(translated, '#')
    ntext = n_leading_chars(text, '#')
    if ntranslated > ntext:
        return False

    ntranslated = n_leading_chars(translated, '*')
    ntext = n_leading_chars(text, '*')
    if ntranslated == ntext + 1:
        return False

    if len(translated) > len(text) * allowed_translation_length_ratio:
        return False

    return True

def translate_html(text, api, api_model, role_content, prompt, cfg):
    """
    HTMLの日本語部分を英語に翻訳
    """

    print("372=", text)
    data = []
    print()
    print(">>> Analyzing html...")
    soup = BeautifulSoup(text, 'html.parser')

    print("Translating content...")
    for element in soup.find_all(string=True):  # 全てのテキストノードを取得
        if to_translate(element, cfg.min_translate_length, cfg.limit_to_multibyte_str):
            original_text = element
            revised_text = translate(element, api, api_model, role_content, prompt, cfg)
            print(f"[Original] {original_text}")
            print(f"  -> [Revised] {revised_text}")
            if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                element.replace_with(revised_text)
                data.append({ "original": original_text, "translated": revised_text})
            else:
                print(f"  *** This translation is rejected")

    return str(soup), data

def translate_pptx(ppt, api, api_model, role_content, prompt, cfg):
    data = []
    print()
    print(">>> Translating...")
    for slide in ppt.slides:
        for shape in slide.shapes:
            if shape.has_text_frame:  # テキストフレームがある場合
                for paragraph in shape.text_frame.paragraphs:
                    if cfg.process_unit == 'paragraph':
                        if to_translate(paragraph.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                            original_text = paragraph.text
                            revised_text = translate(paragraph.text, api, api_model, role_content, prompt, cfg)
                            print(f"[Original] {original_text}")
                            print(f"  -> [Revised] {revised_text}")
                            if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                                paragraph.text = revised_text
                                data.append({ "original": original_text, "translated": revised_text})
                            else:
                                print(f"  *** This translation is rejected")
                    else:
                        for run in paragraph.runs:
                            if to_translate(run.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                                original_text = run.text
                                revised_text = translate(run.text, api, api_model, role_content, prompt, cfg)
                                print(f"[Original] {original_text}")
                                print(f"  -> [Revised] {revised_text}")
                                if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                                    run.text = revised_text
                                    data.append({ "original": original_text, "translated": revised_text})
                                else:
                                    print(f"  *** This translation is rejected")
    return ppt, data

def translate_docx(doc, api, api_model, role_content, prompt, cfg):
    data = []
    print()
    print(">>> Processing paragraphs/runs...")
    if cfg.process_unit == 'paragraph':
        for paragraph in doc.paragraphs:
            if to_translate(paragraph.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                original_text = paragraph.text
                revised_text = translate(paragraph.text, api, api_model, role_content, prompt, cfg)
                print(f"[Original] {original_text}")
                print(f"  -> [Revised] {revised_text}")
                if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                    paragraph.text = revised_text
                    data.append({ "original": original_text, "translated": revised_text})
                else:
                    print(f"  *** This translation is rejected")
    else:
        for paragraph in doc.paragraphs:
            for run in paragraph.runs:
                if to_translate(run.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                    original_text = run.text
                    revised_text = translate(run.text, api, api_model, role_content, prompt, cfg)
                    print(f"[Original] {run.text}")
                    print(f"  -> [Revised] {revised_text}")
                    if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                        run.text = revised_text
                        data.append({ "original": original_text, "translated": revised_text})
                    else:
                        print(f"  *** This translation is rejected")

    print(">>> Processing tables...")
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    if cfg.process_unit == 'paragraph':
                        if to_translate(paragraph.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                            original_text = paragraph.text
                            revised_text = translate(paragraph.text, api, api_model, role_content, prompt, cfg)
                            print(f"[Original] {original_text}")
                            print(f"  -> [Revised] {revised_text}")
                            if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                                paragraph.text = revised_text
                                data.append({ "original": original_text, "translated": paragraph.text})
                            else:
                                print(f"  *** This translation is rejected")
                    else:
                        for run in paragraph.runs:
                            if to_translate(run.text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
                                original_text = run.text
                                revised_text = translate(run.text, api, api_model, role_content, prompt, cfg)
                                print(f"[Original] {original_text}")
                                print(f"  -> [Revised] {revised_text}")
                                if check_translation(original_text, revised_text, cfg.allowed_translation_length_ratio):
                                    run.text = revised_text
                                    data.append({ "original": original_text, "translated": revised_text})
                                else:
                                    print(f"  *** This translation is rejected")
    return doc, data

def translate_text(text, api, api_model, role_content, prompt, cfg):
    data = []
    print(">>> Translating...")
    if to_translate(text, cfg.min_translate_length, cfg.limit_to_multibyte_str):
        original_text = text
        revised_text = translate(text, api, api_model, role_content, prompt, cfg)
        print(f"[Original] {original_text}")
        print(f"  -> [Revised] {revised_text}")
        data.append({ "original": original_text, "translated": revised_text})

    return revised_text, data

def execute(cfg):
    filetype = get_filetype(cfg.infile)
    if cfg.process_unit == 'md':
        cfg.use_md = 1
    else:
        cfg.use_md = 0

    if filetype == 'md':
        outfile = replace_path(mdfile, "_revised.md")
    elif cfg.use_md:
        mdfile = replace_path(cfg.infile, ".md")
        outfile = replace_path(mdfile, "_revised.md")
    elif filetype == 'pdf':
        if cfg.use_md:
            outfile = replace_path(mdfile, "_revised.md")
        else:
            outfile = replace_path(cfg.infile, "_revised.docx")
    elif filetype == 'docx':
        outfile = replace_path(cfg.infile, "_revised.docx")
    elif filetype == 'html':
        outfile = replace_path(cfg.infile, "_revised.html")
    elif filetype == 'pptx':
        outfile = replace_path(cfg.infile, "_revised.pptx")
    else:
        print(f"\nError in execute(): Invalid extension in [{cfg.infile}]\n")
        exit()
        
    output_comparison_html_path = replace_path(cfg.infile, '_compare.html')
    output_reformat_md = replace_path(cfg.infile, "_reformat.md")
    output_docx = replace_path(cfg.infile, ".docx")

    print("=== Translate and revise .docx/.pptx/.pdf/.html/.md file ===")
    print(f"  Input file            : {cfg.infile}")
    print(f"    file type           : {filetype}")
    print(f"  API                   : {cfg.api}")
    if cfg.api == "openai":
        print(f"  openai_model          : {cfg.openai_model}")
    elif cfg.api == "gemini":
        print(f"  gemini_model          : {cfg.gemini_model}")
    print(f"  Translation mode      : {cfg.mode}")
    print(f"  limit_to_multibyte_str: {cfg.limit_to_multibyte_str}")
    print(f"  min_translate_length  : {cfg.min_translate_length}")
    print(f"  allowed_translation_length_ratio: {cfg.allowed_translation_length_ratio}")
    print(f"  Output file           : {outfile}")
    print(f"  Template HTML file    : {cfg.html_template_path}")
    print(f"  Output compare file   : {output_comparison_html_path}")
    print(f"  Use markdown : {cfg.use_md}")
    if cfg.use_md:
        print(f"  Markdown file: {mdfile}")
        if filetype == 'pdf':
            print(f"  Reformat markdown file: {output_reformat_md}")
    else:
        if filetype == 'docx' or filetype == 'pdf':
            print(f"  Process unit : {cfg.process_unit}")
            if filetype == 'pdf':
                print(f"  Converted docx file: {output_docx}")
    if cfg.api == "openai" or cfg.api == "gemini" or cfg.infile.endswith('.pdf'):
        print(f"  role_content : {cfg.role_content}")
        print(f"  prompt       : {cfg.prompt}")

    print()
    if os.path.isfile(cfg.infile) == False:
        print(f"Error: File [{cfg.infile}] does not exist")
        usage()
        exit()

    if filetype == 'md' or filetype == 'txt':
        print(f"Read [{cfg.infile}]")
        text = read_file(cfg.infile)
        text, data = translate_text(text, cfg.api, cfg.openai_model, cfg.role_content, cfg.prompt, cfg)
        if text is None:
            print(f"Error for filetype = {filetype}: Could not get text")
            exit()
        print(f"=== Saving revised text to {outfile} ===")
        save(outfile, text)
        exit()
    elif cfg.use_md:
        print(f"Read [{cfg.infile}] and convert to markdown")
        text = convert_to_md(cfg.infile)
        if text is None:
            print(f"Error for md: Could not get text")
            exit()
        print(f"=== Saving markdown to  {mdfile} ===")
        save(mdfile, text)

# PDFファイルの場合は、ChatGPT/Geminiで文書を整えなおす
        if filetype == 'pdf':
            print(f"Reformatting [{mdfile}] by OpenAI...")
            if cfg.api == 'openai':
                text = revise_with_openai(text, cfg.openai_model, reformat_role, reformat_prompt, cfg.temperature, cfg.max_tokens)
            elif cfg.api == "gemini":
                text = revise_with_gemini(text, cfg.gemini_model, reformat_role, reformat_prompt, cfg.temperature, cfg.max_tokens)
            else:
                print(f"Error: API [{cfg.api}] is unable to reformat PDF text")
                exit()

            if text is None:
                print(f"Error: Could not get reformatted text")
                exit()
            print(f"Reformatted MD file is saved to [{output_reformat_md}]")
            save(output_reformat_md, text)

        text, data = translate_text(text, cfg.api, cfg.openai_model, cfg.role_content, cfg.prompt, cfg)
        if text is None:
            print(f"Error in getting translated text: Could not get text")
            exit()
        print(f"=== Saving revised text to {outfile} ===")
        save(outfile, text)

    if not cfg.use_md:
        if filetype == 'pdf':
            print(f"Converting [{cfg.infile}] to [{output_docx}]")
            pdf_to_docx(cfg.infile, output_docx)
            print(f"Read [{output_docx}]")
            doc = Document(output_docx)
            doc, data = translate_docx(doc, cfg.api, cfg.openai_model, cfg.role_content, cfg.prompt, cfg)
            print(f"=== Saving revised text to {outfile} ===")
            doc.save(outfile)
        elif filetype == 'pptx':
            ppt = Presentation(cfg.infile)
            ppt, data = translate_pptx(ppt, cfg.api, cfg.openai_model, cfg.role_content, cfg.prompt, cfg)
            print(f"=== Saving revised text to {outfile} ===")
            ppt.save(outfile)
        elif filetype == 'docx':
            print(f"Read [{cfg.infile}]")
            doc = Document(cfg.infile)
            doc, data = translate_docx(doc, cfg.api, cfg.openai_model, cfg.role_content, cfg.prompt, cfg)
            print(f"=== Saving revised text to {outfile} ===")
            doc.save(outfile)
        elif filetype == 'html':
            print(f"Read [{cfg.infile}]")
            html = read_file(cfg.infile)
            html, data = translate_html(html, cfg.api, cfg.openai_model, cfg.role_content, cfg.prompt, cfg)
            print(f"=== Saving revised text to {outfile} ===")
            save(outfile, html)

# ------------------------------------------------------
# テンプレート処理
# ------------------------------------------------------
    context = cfg.__dict__.copy()
    context["data"] = data

    template_dir = os.getcwd()
    env = Environment(loader=FileSystemLoader(template_dir))
    template = env.get_template(cfg.html_template_path)
    rendered_html = template.render(context)

    print()
    print(f"Save translation data to [{output_comparison_html_path}]")
    with open(output_comparison_html_path, 'w', encoding='utf-8') as file:
        file.write(rendered_html)

    print()


def main():
    cfg = initialize()
    cfg = update_variables(cfg)
    if not read_api_inf(cfg):
        exit()

    execute(cfg)

if __name__ == "__main__":
    main()
    usage()
