import os
import sys
import re

missing = []
for lib in ["elevenlabs"]:
    try:
        __import__(lib)
    except ImportError:
        missing.append(lib)

if missing:
    print(f"Error: Missing libraries:\n{', '.join(missing)}")
    print("  install: pip install elevenlabs")
    input("\nPress ENTER to terminate>>\n")
    sys.exit(1)

from elevenlabs import ElevenLabs

from tktts_base import apply_replacements, normalize_speaker, split_dialogue


# ElevenLabs エンジン設定
TTS_ENGINE_NAME = 'elevenlabs'

# 「氏是」=「自然」っぽい落ち着いた読み上げを狙うデフォルト
DEFAULT_MODEL_ID = "eleven_multilingual_v2"
DEFAULT_ELEVENLABS_VOICE = "Rachel"  # アカウントにより存在しない場合あり（その場合は先頭の voice を使う）
DEFAULT_VOICE_SETTINGS = {
    "stability": 0.60,
    "similarity_boost": 0.75,
    # "style": 0.0,              # 契約/モデルにより未対応の場合あり
    # "use_speaker_boost": True  # 同上
}


def _get_client(api_key=None):
    """
    ElevenLabs クライアント生成。
    api_key が None の場合、ELEVENLABS_API_KEY 環境変数が使われます。
    """
    if api_key is not None:
        return ElevenLabs(api_key=api_key)
    return ElevenLabs()


def get_available_voices_info(api_key=None):
    """
    利用可能な voice の一覧を返す（dict の list）。
    返却例: [{"name": "Rachel", "id": "...", "category": "...", "labels": {...}}, ...]
    """
    client = _get_client(api_key)
    res = client.voices.get_all()
    voices = []

    # elevenlabs SDK の戻りはオブジェクトのことがあるので安全に扱う
    for v in getattr(res, "voices", []) or []:
        voices.append({
            "name": getattr(v, "name", ""),
            "id": getattr(v, "voice_id", None) or getattr(v, "id", None),
            "category": getattr(v, "category", None),
            "labels": getattr(v, "labels", None),
        })
    return voices


def get_available_voices(api_key=None):
    voices = get_available_voices_info(api_key)
    if not voices:
        return False
    return [v["name"] for v in voices]


def list_available_voices(api_key=None):
    print(f"=== 利用可能な {TTS_ENGINE_NAME} voices ===")
    voices = get_available_voices_info(api_key)
    if not voices:
        print("  (no voices found)")
        return False

    for v in voices:
        print(f"  Name: {v['name']}, ID: {v['id']}, Category: {v.get('category')}, Labels: {v.get('labels')}")
    return True


def resolve_speaker_id(speaker_name, api_key=None, voices_dict=None):
    """
    speaker_name にフルネームや部分文字列を渡すと、最初に一致した voice_id を返す。
    voices_dict を渡すと再検索を減らせます。
    """
    if voices_dict is None:
        voices_dict = get_available_voices_info(api_key)

    speaker_name_norm = normalize_speaker(speaker_name).lower()

    # 完全一致→部分一致
    for v in voices_dict:
        if speaker_name_norm == (v["name"] or "").lower():
            return voices_dict, v["id"]

    for v in voices_dict:
        if speaker_name_norm in (v["name"] or "").lower():
            return voices_dict, v["id"]

    raise ValueError(f"❌ Error in tktts_elevenlabs.resolve_speaker_id(): 話者 [{speaker_name}] が見つかりませんでした")


def _pick_default_voice_id(voices_dict):
    """
    DEFAULT_ELEVENLABS_VOICE が存在すればそれ、なければ先頭を返す。
    """
    if not voices_dict:
        return None

    want = DEFAULT_ELEVENLABS_VOICE.lower()
    for v in voices_dict:
        if (v["name"] or "").lower() == want:
            return v["id"]
    return voices_dict[0]["id"]


def _write_audio_to_file(audio, outfile):
    """
    ElevenLabs SDK の戻り値が
      - bytes
      - generator / iterable of bytes
    のどちらでも安全に書き込む
    """
    total = 0
    with open(outfile, "wb") as f:
        if isinstance(audio, (bytes, bytearray)):
            f.write(audio)
            return len(audio)

        for chunk in audio:
            if not chunk:
                continue
            f.write(chunk)
            total += len(chunk)
    return total

def speak(outfile, text, voice, speak_rate=None, speak_pitch=None,
          api_key=None, model_id=DEFAULT_MODEL_ID, voice_settings=None):
    """
    text -> 音声ファイル保存（デフォルト mp3/wav は API 出力に依存）。
    voice には voice_id か name（部分一致）を渡せます。
    """
    client = _get_client(api_key)

    # 互換引数（VOICEVOXの rate/pitch 相当）は ElevenLabs では直接制御しづらい
    if speak_rate is not None or speak_pitch is not None:
        print("  [warn] ElevenLabs では speak_rate / speak_pitch の直接指定は基本未対応のため無視します。")

    voices_dict = get_available_voices_info(api_key)
    if not voices_dict:
        print("❌ ElevenLabs の voice 一覧が取得できませんでした（APIキー/接続/権限を確認してください）")
        return None

    # voice がIDっぽいか、名前かを判定
    voice_id = None
    if isinstance(voice, str) and re.fullmatch(r"[A-Za-z0-9_-]{10,}", voice):
        # voice_id の可能性が高い
        voice_id = voice
    else:
        # name として解決を試みる
        try:
            _, voice_id = resolve_speaker_id(str(voice), api_key=api_key, voices_dict=voices_dict)
        except Exception:
            voice_id = _pick_default_voice_id(voices_dict)

    vs = dict(DEFAULT_VOICE_SETTINGS)
    if voice_settings:
        vs.update(voice_settings)

    try:
        audio = client.text_to_speech.convert(
            text=text,
            voice_id=voice_id,
            model_id=model_id,
            voice_settings=vs
        )
    except Exception as e:
        print(f"❌ ElevenLabs TTS エラー: {e}")
        return None

    total = _write_audio_to_file(audio, outfile)
    if total <= 0:
        print("❌ Error: 音声データが空でした")
        return None

    if os.path.exists(outfile):
        print(f"    ** 一時ファイル [{outfile}] を保存しました")
        return outfile

    print(f"    ** Error: ファイル [{outfile}] の出力に失敗しました")
    return None


def speak_streaming(text, voice, api_key=None, model_id=DEFAULT_MODEL_ID, voice_settings=None,
                    outfile=None, chunk_size=8192):
    """
    ElevenLabs ストリーミング生成（バイト列を逐次 yield）。
    outfile を指定すると、ストリームを保存しつつ yield します。
    """
    client = _get_client(api_key)

    voices_dict = get_available_voices_info(api_key)
    if not voices_dict:
        raise RuntimeError("ElevenLabs の voice 一覧が取得できませんでした（APIキー/接続/権限を確認してください）")

    # voice 解決
    if isinstance(voice, str) and re.fullmatch(r"[A-Za-z0-9_-]{10,}", voice):
        voice_id = voice
    else:
        try:
            _, voice_id = resolve_speaker_id(str(voice), api_key=api_key, voices_dict=voices_dict)
        except Exception:
            voice_id = _pick_default_voice_id(voices_dict)

    vs = dict(DEFAULT_VOICE_SETTINGS)
    if voice_settings:
        vs.update(voice_settings)

    # SDK の stream() を使う（環境により戻りがジェネレータ/イテレータ）
    stream_iter = client.text_to_speech.stream(
        text=text,
        voice_id=voice_id,
        model_id=model_id,
        voice_settings=vs
    )

    f = None
    try:
        if outfile:
            f = open(outfile, "wb")

        for chunk in stream_iter:
            if not chunk:
                continue
            if f:
                f.write(chunk)
            yield chunk

    finally:
        if f:
            f.close()


# target_voicesが文字列型の場合、speakerを強制的に置き換える（元コード踏襲）
def speak_dialogue(dialogue, replacements, target_voices, speakers={}, temp_dir=".",
                   outfile=None, ext="mp3", api_key=None,
                   cfg=None, model_id=DEFAULT_MODEL_ID, voice_settings=None):
    """
    dialogue を分割して各発話を個別音声に出力し、tmpfiles を返す（VOICEVOX版踏襲）。
    ElevenLabs は ext を mp3 推奨（wav が欲しい場合は後段で変換推奨）。
    """
    is_save_mode = bool(outfile)

    print("tktts_elevenlabs.speak_dialogue(): target_voices:", target_voices)

    tmpfiles = []
    voices_dict = None  # 何度も取得しない
    idx = 1

    for i, _dialogue in enumerate(dialogue):
        print()
        print(f"Dialogue {i:04d}:")

        dialogue_list = split_dialogue(
            _dialogue, target_voices, speakers=speakers,
            default_voice=DEFAULT_ELEVENLABS_VOICE,
            is_monologue=getattr(cfg, "monologue", False) if cfg else False
        )

        for speaker, text in dialogue_list:
            tmpfile = os.path.join(temp_dir, f"tmp_{idx:03d}.{ext}")
            text = apply_replacements(text, replacements)

            if isinstance(target_voices, str):
                speaker = target_voices

            if voices_dict is None:
                voices_dict = get_available_voices_info(api_key)

            try:
                voices_dict, target_voice_id = resolve_speaker_id(speaker, api_key=api_key, voices_dict=voices_dict)
            except ValueError as e:
                print(e)
                # 見つからない場合はデフォルトにフォールバック
                target_voice_id = _pick_default_voice_id(voices_dict)
                print(f"  [warn] speaker='{speaker}' が見つからないためデフォルト voice_id={target_voice_id} を使用します")

            print(f"  {idx:04d}: voice={speaker} (id={target_voice_id}): ", end="")
            print(text)

            # VOICEVOX互換: cfg.fspeak_rate/fspeak_pitch は ElevenLabs では基本無視
            speedScale = getattr(cfg, "fspeak_rate", None) if cfg else None
            pitchScale = getattr(cfg, "fspeak_pitch", None) if cfg else None

            _outfile = speak(
                outfile=tmpfile,
                text=text,
                voice=target_voice_id,
                speak_rate=speedScale,
                speak_pitch=pitchScale,
                api_key=api_key,
                model_id=model_id,
                voice_settings=voice_settings
            )

            if _outfile is None:
                return False, tmpfiles

            tmpfiles.append(tmpfile)
            idx += 1

    return True, tmpfiles


def speak_dialogue_streaming(dialogue, replacements, target_voices, speakers={},
                            api_key=None, cfg=None, model_id=DEFAULT_MODEL_ID, voice_settings=None):
    """
    dialogue 全体を streaming で逐次 yield する版（ファイル分割はせず、順に流す）。
    返り値： (speaker, text, chunk_bytes) のタプルを yield
    """
    voices_dict = None

    for _dialogue in dialogue:
        dialogue_list = split_dialogue(
            _dialogue, target_voices, speakers=speakers,
            default_voice=DEFAULT_ELEVENLABS_VOICE,
            is_monologue=getattr(cfg, "monologue", False) if cfg else False
        )

        if voices_dict is None:
            voices_dict = get_available_voices_info(api_key)

        for speaker, text in dialogue_list:
            text = apply_replacements(text, replacements)
            if isinstance(target_voices, str):
                speaker = target_voices

            try:
                _, voice_id = resolve_speaker_id(speaker, api_key=api_key, voices_dict=voices_dict)
            except Exception:
                voice_id = _pick_default_voice_id(voices_dict)

            for chunk in speak_streaming(
                text=text,
                voice=voice_id,
                api_key=api_key,
                model_id=model_id,
                voice_settings=voice_settings,
                outfile=None
            ):
                yield (speaker, text, chunk)