# このプログラムの実行には以下のライブラリと外部依存が必要です。
# pip install chardet pyttsx3 openai pydub pyperclip
# 
# 外部依存:
# - ffmpeg.exe: 環境PATHに設定するか、pydubに検出させる必要があります。
# - AquesTalkPlayer.exe: Windowsでのみ使用可能。実行パスを指定する必要があります。
# - OpenAI API Key: 環境変数 OPENAI_API_KEY に設定する必要があります。

import os
import sys
import argparse
import traceback

missing = []
for lib in ["chardet", "pyperclip", "tktts"]:
    try:
        __import__(lib)
    except ImportError:
        missing.append(lib)

if missing:
    print(f"Error: Missing libraries:\n{', '.join(missing)}")
    print("  install: pip  chardet")
    input("\nPress ENTER to terminate>>\n")
    sys.exit(1)

import chardet
import pyperclip

try:
    import tktts
    from tktts import tkTTS
except Exception as e:
    print(f"\nWarning in tktts.py: Import error for tktts_pyttsx3")
    print("------------------------------------------------------------------")
    print(f"Error message: {e}")
    print("Traceback:")
    traceback.print_exc()
    print("------------------------------------------------------------------")


DEFAULT_INPUT = "clip" 
DEFAULT_ENGINE = "pyttsx3"
DEFAULT_VOICEVOX_ENDPOINT = "http://127.0.0.1:50021"
DEFAULT_AQUESTALK_PATH = "AquesTalkPlayer.exe"
DEFAULT_TEMP_DIR = "tts_temp_wavs"

VOICE_MAPS = {
    "pyttsx3": {"四国めたん": "Zira", "ずんだもん": "David", "れいむ": "Zira", "まりさ": "David"}, 
    "aquestalkplayer": {"四国めたん": "れいむ", "ずんだもん": "まりさ", "れいむ": "れいむ", "まりさ": "まりさ", "青山龍星": "青山龍星"},
    "openai": {"四国めたん": "nova", "ずんだもん": "shimmer", "れいむ": "alloy", "まりさ": "fable"},
}

pause = 0


def terminate():
    if pause:
        input("\nPress ENTER to terminate>>\n")
    exit()

def initialize():
    parser = argparse.ArgumentParser(description="統合TTS (pyttsx3, AquesTalkPlayer, OpenAI) CLIツール")
    parser.add_argument("--tts", "-t", choices=["pyttsx3", "voicevox", "aquestalkplayer", "atp", "openai"], default=DEFAULT_ENGINE, help="TTSエンジンを選択")
    parser.add_argument("--endpoint", type=str, default=DEFAULT_VOICEVOX_ENDPOINT, help="VOICEVOX Engineのendpoint")
#    parser.add_argument("--language", default = 'japanese', help="pyttsx3 で使用する言語")
    parser.add_argument("--monologue", "-m", type=int, default=0, help="独話形式 (カンマのない行も読み込む)")
    parser.add_argument("--voices", "-v", type=str, default="", help="voice_map の上書き (key=val;key=val)")
    parser.add_argument("--replace", "-r", type=str, default="", help="文字列置換ルール (key=val;key=val)")

    parser.add_argument("--infile", "-i", type=str, default=DEFAULT_INPUT, help="入力元 ('clip' またはファイルパス)")
    parser.add_argument("--outfile", "-o", type=str, default="", help="出力音声ファイル (未指定の場合、リアルタイム再生)")
   
    parser.add_argument("--temp_dir", type=str, default=DEFAULT_TEMP_DIR, help="一時ファイルを作成するディレクトリ名 (AquesTalkPlayer/OpenAI使用時)")
    parser.add_argument("--list", action="store_true", help="利用可能な voices を表示して終了")
    parser.add_argument("--map",  action="store_true", help="voice map を表示して終了")
    parser.add_argument("--pause",     "-p", type=int, default=0, help="終了時に入力待ちする")
    parser.add_argument("--wait_for_clipboard", type=int, default=1, help="Clipbordから適すつを取得する際に入力待ちする")

    parser.add_argument("--speak_rate", type=int, default=150, help="pyttsx3 の読み上げ速度 (Word Per Minute)")
    parser.add_argument("--fspeak_rate", type=float, default=1.0, help="VOICEVOX の読み上げ速度比 (標準: 1.0)")
    parser.add_argument("--fspeak_pitch", type=float, default=0.0, help="VOICEVOX の超えの高さ (標準: 0.0)")

    parser.add_argument("--aquestalk_path", type=str, default=DEFAULT_AQUESTALK_PATH, help="AquesTalkPlayer.exe の実行パス (AquesTalkPlayer使用時)")
    parser.add_argument("--tinterval", type=float, default=0.5, help="AquesTalkPlayer/OpenAIの音声ファイル間に挿入する無音区間の長さ（秒、デフォルト 0.5）")

    parser.add_argument("--instruction", type=str, default="", help="OpenAI TTS APIへの追加指示 (OpenAI使用時)")

    args = parser.parse_args()
    return args

def main():
    global pause

    print()
    print(f"\n===== 統合TTS CLIツール speak.py =====")

    args = initialize()
    pause = args.pause
    if args.infile == "": args.infile = "clip"

    print(f"TTS engine  : {args.tts}")
    print(f"is monologue: {args.monologue}")
    print(f"Input       : {args.infile}")
    print(f"Output      : {args.outfile}")
#    print(f"Language: {args.language}")
    print(f"pyttsx3  speak_rate: {args.speak_rate}")
    print(f"VOICEVOX speak_rate: {args.fspeak_rate}")
    print(f"VOICEVOX speak_pitch: {args.fspeak_pitch}")
    print(f"VOICEVOX Engine endpoint: {args.endpoint}")
    print(f"wait_for_clipboard: {args.wait_for_clipboard}")

# endpoint, aquestalk_pathはargsで渡す
    tktts = tkTTS(tts_name = args.tts, config = args)

    if args.list:
        print()
        tktts.list_available_voices()
        terminate()

    if args.map:
        print()
        tktts.show_voice_map(args.infile, args.voices, VOICE_MAPS, args.monologue)
        terminate()

    print()
    print(f"[{args.infile}]を解析します:")
    dialogue = tktts.load_text(args.infile, args.monologue, wait_for_clipboard = args.wait_for_clipboard)
    if not dialogue:
        print("エラー: 有効なテキストデータが取得できませんでした。")
        if not args.monologue:
            print("  対話形式でない場合は --monologue=1 オプションをつけてください。")
        terminate()

    speakers_in_file = tktts.get_speakers_from_dialogue(dialogue)
    print(f"  Speakers in [{args.infile}]")
    for idx, sp in enumerate(speakers_in_file):
        print(f"    {idx:02d}: {sp}")

    current_voice_map = tktts.update_voice_map(voice_map = VOICE_MAPS, 
                            voices = args.voices, speakers = speakers_in_file)

    print()
    print("=== 置換辞書 ===")
    replacements = tktts.parse_kv_string(args.replace)
    if replacements:
        for key, val in replacements.items():
            print(f"  {key}: {val}")
    else:
        print("  (なし)")

    print()
    print(f"Voice map updated:")
    for key, val in current_voice_map.items():
        if type(key) is str:
            print(f"  (speaker) {key}: (voice) {val}")
    for key, val in current_voice_map.items():
        if type(key) is not str and type(key) is not int:
            print(f"  (speaker) {key}: (voice) {val}")
    for key, val in current_voice_map.items():
        if type(key) is int:
            print(f"  (speaker) {key}: (voice) {val}")
                
    print("=== 検出された話者とvoice ===")
    print(f"Voice map updated;", current_voice_map)
    for s in sorted(speakers_in_file):
        if s is None or s == "":
            voice = current_voice_map.get(s, None)
            if voice is None: voice = current_voice_map.get(0, None)
            print(f"  (独話): {voice}")
        else:
            s = tktts.normalize_speaker(s, args.tts)
            print(f"  (speaker) {s}: (voice) {current_voice_map.get(s, '未設定')}")

    print()
    print("--- 読み上げ処理開始 ---")
    ret = tktts.speak_dialogue(
        config = args, dialogue = dialogue, 
        voice_map = current_voice_map, replacements = replacements)
    if ret is None:
        terminate()

    print("--- 処理完了 ---")


if __name__ == "__main__":
    main()
    terminate()
