import os
import sys
import argparse
from pathlib import Path
import numpy as np
from types import SimpleNamespace
try:
    from dotenv import load_dotenv
except:
    print("\nError to ijmport dotenv")
    input("Install: pip install dotenv\n")
from scipy.io import wavfile
try:
    import pyperclip
except:
    print("\nError to ijmport pyperclip")
    input("Install: pip install pyperclip\n")
try:
    import pyttsx3
except:
    print("\nError to ijmport pyttsx3")
    input("Install: pip install pyttsx3\n")

# 話者名とHTSボイスファイルパスの対応を定義
# システムにインストールされているOpenJTalkのパスに合わせて適宜変更してください
SPEAKER_VOICE_PATHS = {
    "mei": "C:/open_jtalk_dic_utf_8-1.11/voice/mei/mei_normal.htsvoice",
    "momo": "C:/open_jtalk_dic_utf_8-1.11/voice/momo/momo_normal.htsvoice",
    "takumi": "C:/open_jtalk_dic_utf_8-1.11/voice/takumi/takumi_normal.htsvoice",
    "kyoko": "C:/open_jtalk_dic_utf_8-1.11/voice/kyoko/kyoko_normal.htsvoice"
}

def initialize():
    """
    コマンドライン引数のパーサーを初期化し返します。
    --engine: 使用するTTSエンジン (openai, pyttsx3, pyopenjtalk)
    --outfile: 出力ファイルパス
    --speed: pyopenjtalk 用 読み上げ速度
    --speaker: pyopenjtalk 用 声質選択 (speaker)
    """
    parser = argparse.ArgumentParser(description="Clipboard to speech tool")
    parser.add_argument(
        "--engine", "-e",
        choices=["openai", "pyttsx3", "pyopenjtalk"],
        default="pyttsx3",
        help="TTSエンジンを選択"
    )
    parser.add_argument(
        "--outfile", "-o",
        default="output.mp3",
        help="出力ファイルパス"
    )
    parser.add_argument(
        "--language",
        default = 'japanese',
        help="pyttsx3 で使用する言語"
    )
    parser.add_argument(
        "--voicetype",
        default = 'female',
        help="pyttsx3 で使用する音声の種類"
    )
    parser.add_argument(
        "--speed", "-s",
        type=float,
        default=1.0,
        help="pyttsx3/pyopenjtalk の読み上げ速度 (例: 1.0が標準)"
    )
    parser.add_argument(
        "--speaker", "-p",
        default=None,
        help="pyopenjtalk の声質選択用 speaker 名"
    )
    parser.add_argument(
        "--list-speakers",
        action="store_true",
        help="pyopenjtalk で使用可能な speaker 一覧を表示して終了"
    )
    return parser

def get_htsvoice_path(speaker_name):
    """
    話者名からhtsvoiceファイルのパスを検索して返す
    """
    if speaker_name in SPEAKER_VOICE_PATHS:
        path = SPEAKER_VOICE_PATHS[speaker_name]
        if os.path.exists(path):
            return path
        else:
            print(f"Error: HTS voice file not found at path: {path}")
            sys.exit(1)
    else:
        print(f"Error: Unknown speaker '{speaker_name}'. Please choose from: {list(SPEAKER_VOICE_PATHS.keys())}")
        sys.exit(1)


def main(args):
    # pyopenjtalk の利用可能な speaker を表示して終了
    if args.engine == "pyopenjtalk" and getattr(args, 'list_speakers', False):
        import pyopenjtalk
        
        print("Available speakers (configured in script):")
        for s in SPEAKER_VOICE_PATHS.keys():
            print(f"  {s}")
        print("\nNote: Please ensure the corresponding HTS voice files exist at the specified paths.")
        return

    print()
    input("読み上げるテキストをクリップボードにコピーしてください:\n")
    
    engine = args.engine
    outfile = args.outfile
    speech_path = Path(outfile)
    fplay = False

    # クリップボードからテキストを取得
    text = pyperclip.paste()
    print()
    print("クリップボードの内容:")
    print(text)
    print(f"Engine: {engine}")
    print(f"Language: {args.language}")
    print(f"Voice type: {args.voicetype}")
    print()

    if engine == "pyttsx3":
        tts = pyttsx3.init()
        tts.setProperty('rate', 150 * args.speed)

        voices = tts.getProperty('voices')
        print("Available voices:")
        ivoice_dict = {}
        for idx, v in enumerate(voices):
            print(f"  {v.languages}  {v.gender}  {v.age} ({v.name})")
            nhit = 0
            if args.language in ":".join([s.lower() for s in v.languages]) + v.name.lower():
                nhit = 1
                ivoice_dict[str(nhit)] = idx
                if args.voicetype == v.gender.lower():
                    nhit += 1
                    ivoice_dict[str(nhit)] = idx
                    print("idx=", idx, "t")
        if ivoice_dict.get("2", None):
            ivoice = ivoice_dict["2"]
        elif ivoice_dict.get("1", None):
            ivoice = ivoice_dict["1"]
        else:
            ivoice = 0
        v = voices[ivoice]
        print(f"Selected language: #{ivoice}: {v.languages}  {v.gender}  {v.age} ({v.name})")
        tts.setProperty('voice', voices[ivoice].id)
        tts.say(text)
        tts.runAndWait()
    elif engine == "openai":
        from openai import OpenAI
        cfg = SimpleNamespace(config_path="translate.env")
        if not os.path.isfile(cfg.config_path):
            cfg.config_path = os.path.join(os.path.dirname(__file__), cfg.config_path)
        load_dotenv(dotenv_path=cfg.config_path)
        cfg.account_inf_path = os.getenv("account_inf_path", "accounts.env")
        load_dotenv(dotenv_path=cfg.account_inf_path)
        api_key = os.getenv("OPENAI_API_KEY")
        if api_key is None:
            raise ValueError("Missing OPENAI_API_KEY in environment")
        client = OpenAI(api_key=api_key)
        tts_model = os.getenv("TTS_MODEL", "tts-1")
        tts_voice = os.getenv("TTS_VOICE", "nova")
        import warnings
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        with client.audio.speech.with_streaming_response.create(
            model=tts_model,
            voice=tts_voice,
            input=text
        ) as response:
            response.stream_to_file(str(speech_path))
        print(f"Saved to [{outfile}]")
        fplay = True
    elif engine == "pyopenjtalk":
        import pyopenjtalk
        
        tts_kwargs = {}
        if args.speed is not None:
            tts_kwargs['speed'] = args.speed
        
        # 話者名が指定された場合、対応するhtsvoiceパスを取得して引数に追加
        if args.speaker:
            htsvoice_path = get_htsvoice_path(args.speaker)
            tts_kwargs['htsvoice'] = htsvoice_path
        
        print(f"pyopenjtalk: speed={args.speed}, speaker={args.speaker}")
        x, sr = pyopenjtalk.tts(text, **tts_kwargs)
        wavfile.write(str(speech_path), sr, x.astype(np.int16))
        print(f"Saved to [{outfile}]")
        fplay = True

    else:
        print(f"Unknown engine: {engine}")

    if fplay:
        print(f"Playing [{outfile}]")
        try:
            import winsound
            winsound.PlaySound(str(speech_path), winsound.SND_FILENAME)
        except ImportError:
            print("winsound module not available, skipping playback.")

    input("\nPress ENTER to terminate>>\n")
    
    
if __name__ == "__main__":
    parser = initialize()
    args = parser.parse_args()
    main(args)
    
    