import os
import sys
from pathlib import Path
import chardet
from dotenv import load_dotenv
import re
import requests
from urllib.parse import urlparse
from bs4 import BeautifulSoup
import codecs  # BOM付き保存用ライブラリ


# ------------------------------------------------------
# 初期設定
# ------------------------------------------------------
account_inf_path = "d:/MyWebs/Database/accounts.env"
config_path = "translate.env"

limit_to_multibyte_str = False
source_lang = "JA"
target_lang = "EN"

url_to_translate = "http://d2mate.mdxes.iir.isct.ac.jp/D2MatE/excel/vba_setup.html"


# ------------------------------------------------------
# 設定ファイル読み込み
# ------------------------------------------------------
load_dotenv(dotenv_path = account_inf_path)
api_key = os.getenv("DEEPL_API_KEY")
if not api_key:
    print("ERROR: DeepL APIキーが見つかりません。環境変数 'DEEPL_API_KEY' を設定してください。")
    sys.exit(1)

endpoint = os.getenv("endpoint")
if not api_key:
    print("ERROR: endpointが見つかりません。環境変数 'endpoint' を設定してください。")
    sys.exit(1)

load_dotenv(dotenv_path = config_path)
force_server_charcode = os.getenv("force_server_charcode", 'utf-8')
out_html = os.getenv("out_html", '')   # ''ならurlから自動生成


# ------------------------------------------------------
# 起動時引数設定
# ------------------------------------------------------
argv = sys.argv
nargs = len(argv)
if nargs > 1:
    url_to_translate = argv[1]
if nargs > 2:
    source_lang = argv[2]
if nargs > 3:
    target_lang = argv[3]


print()
print(f"Translate [{url_to_translate}] from [{source_lang}] to [{target_lang}]")
print()


# ------------------------------------------------------
# 関数
# ------------------------------------------------------
def usage():
    print()
    print("usage: python translate.py url (source_lang target_lang)")
    print("  source_lang, target_lang: e.g., JA, EN")


def split_url(url):
    parsed = urlparse(url)
    path_segments = parsed.path.split('/')  # パスを分割してセグメント化
    file_name = path_segments[-1] if path_segments[-1] else None  # ファイル名（空の場合はNone）

    if file_name:
        if '.' in file_name:
            file_body, extension = file_name.rsplit('.', 1)
        else:
            file_body, extension = file_name, None
    else:
        file_body, extension = None, None

    directory = '/'.join(path_segments[:-1]) if len(path_segments) > 1 else None

    return {
        "protocol": parsed.scheme,
        "server": parsed.netloc,
        "directory": directory,
        "file_name": file_name,
        "file_body": file_body,
        "extension": extension
    }

def get_filename_from_url(url):
    print(f"Fetching HTML header from {url}...")
    filename_candidates = ['', 'index.html', 'index.php', 'Default.html', 'Default.htm']
    for f in filename_candidates:
        response = None
        try:
            if f == "":
                response = requests.head(url, allow_redirects=True)
            elif url.endswith('/'):
                response = requests.head(url + f, allow_redirects=True)
            else:
                response = requests.head(url + '/' + f, allow_redirects=True)
        except Exception as e:
#            return f"Error: {e}"
            pass

        if response:
            final_url = response.url
            url_dict = split_url(final_url)
            if url_dict.get('file_name', None) is not None:
                return final_url
            else:
#                print("else")
                pass

    return None

def get_from_url(url, force_server_charcode = ""):
    print(f"Fetching HTML from {url}...")
    filal_url = get_filename_from_url(url)

    try:
        response = requests.get(filal_url, allow_redirects=True)
    except Exception as e:
        return f"Error: {e}", ""

    if response.status_code != 200:
        return f"Failed to fetch HTML: {response.status_code}", ""

    print("response.encoding=", response.encoding)
    print(f"Content-Type Header: {response.headers.get('Content-Type')}")
#    print("response.content=", response.content)
#    print("response.text=", response.text)
    detected_encoding = chardet.detect(response.content)
    print(f"Detected Encoding: {detected_encoding}")

    if force_server_charcode != "":
        response.encoding = force_server_charcode
    if response.encoding is None:
        response.encoding = 'utf-8'

# 最終的なリダイレクト先URL
    final_url = response.url
#    print(f"Final URL: {final_url}")

    return response.text, final_url


def is_multibyte_str(text):
    # 2バイト文字（日本語を含む）を検出する正規表現
    pattern = re.compile(r'[\u0800-\uFFFF]')
    return bool(pattern.search(text))

def translate_text(text, api_key):
    """
    DeepL APIを使用してテキストを翻訳する関数
    """

    if not is_multibyte_str(text): return text

    headers = {"Authorization": f"DeepL-Auth-Key {api_key}"}
    params = {
        "text": text,
        "source_lang": source_lang,
        "target_lang": target_lang
    }
    response = requests.post(endpoint, headers=headers, data=params)
    if response.status_code == 200:
        result = response.json()
        return result["translations"][0]["text"]
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return text  # エラーの場合、元のテキストを返す

def translate_html(text, api_key):
    """
    HTMLの日本語部分を英語に翻訳
    """

    print("Parsing HTML...")
    soup = BeautifulSoup(text, 'html.parser')

    print("Translating content...")
    for element in soup.find_all(string=True):  # 全てのテキストノードを取得
        if element.strip():  # 空白でない場合のみ処理
            try:
                print(f"  Original: {element}")  # 元のテキストを出力（デバッグ用）
                translated_text = translate_text(element, api_key)
                print(f"  Translated: {translated_text}")  # 翻訳後のテキストを出力
                element.replace_with(translated_text)  # 翻訳済みテキストに置換
            except Exception as e:
                print(f"Error while translating: {e}")
    
    return str(soup)


def main():
    global out_html
    
    text, final_url = get_from_url(url_to_translate, force_server_charcode = force_server_charcode)
    if final_url == "":
        print(f"Error in main(): {text}")
        exit()

    print("final_url:", final_url)
    url_dict = split_url(final_url)
#print("url_dict:", url_dict)

    file_name = url_dict.get('file_name', None)
    if file_name is None:
        print(f"Error: URL [{final_url}] does not include file name.")
        exit()
        
    if out_html == '':
        if re.search(r'_jp\.', file_name):
            out_html = re.sub(r'_jp\.', r'_en\.', url_dict['file_name'])
        else:
            out_html = f"{url_dict['file_body']}_en.{url_dict['extension']}"

    print("out_html:", out_html)
    if os.path.exists(out_html):
        print(f"\nError: out_html [{out_html}] exists. Terminate.\n")
        return

    translated_html = translate_html(text, api_key)

    print(f"Save translated HTML to {out_html}")
    with open(out_html, "w", encoding="utf-8") as file:
        file.write(translated_html)


if __name__ == "__main__":
    main()
    usage()
