tktts_winrt.py ダウンロード/コピー

tktts_winrt.py をダウンロード
tktts_winrt.py
tktts_winrt.py
"""WinRT backend for tktts.

このモジュールはtkttsのWinRTバックエンドを提供します。
tktts_pyttsx3.pyと並行して、ドロップインスタイルのバックエンドとして機能することを目的としています。
Pythonの `winsdk` パッケージを介して `Windows.Media.SpeechSynthesis` を使用します。

インストール（Windowsの場合）:
    pip install winsdk

注意点:
    * WinRT SpeechSynthesizerはWAVオーディオストリームを生成します。
    * `speak_rate` は、値が10より大きい場合、`pyttsx3` のようなWPM（Words Per Minute）として解釈されます（例: 150 -> 1.0x）。
      値が6以下の場合は、WinRTの相対的な速度として解釈されます。
    * ナレーターの「自然な音声」は、Windowsのビルドや音声パッケージによっては、サードパーティ製プログラムに公開されない場合があります。
      `list_available_voices()` を使用して、WinRTが実際に認識できる音声を確認してください。

関連リンク: :doc:`tktts_winrt_usage`
"""

from __future__ import annotations

import asyncio
import os
import sys
import tempfile
import threading
from typing import Any

from tktts_base import apply_replacements, normalize_speaker, split_dialogue


TTS_ENGINE_NAME = "winrt"
DEFAULT_WINRT_VOICE = "Nanami"  # fallback logic handles systems without Nanami
DEFAULT_PYTTSX3_COMPAT_RATE = 150.0


# -----------------------------------------------------------------------------
# Lazy WinRT imports
# -----------------------------------------------------------------------------

def _import_winrt():
    """WinRTクラスを遅延インポートします。

    概要: WinRTの必要なクラスを動的にインポートします。
    詳細説明: モジュールインポート時にこれらをインポートすると、非Windowsマシンではこのバックエンドが使用不能になり、またオプションの依存関係が不足している場合も早期に致命的なエラーが発生するため、必要な時までインポートを遅延させます。
    :returns: `SpeechSynthesizer`, `Buffer`, `DataReader`, `InputStreamOptions` クラスのタプル。
    """
    try:
        from winsdk.windows.media.speechsynthesis import SpeechSynthesizer
        from winsdk.windows.storage.streams import Buffer, DataReader, InputStreamOptions
    except ImportError as e:
        print("エラー: tktts_winrtに必要なライブラリが不足しているか、サポートされていないプラットフォームです。")
        print(f"詳細: {e}")
        print("  インストール方法: pip install winsdk")
        input("\nENTERを押して終了>>\n")
        sys.exit(1)

    return SpeechSynthesizer, Buffer, DataReader, InputStreamOptions


def _run_async(coro):
    """同期コードから非同期コルーチンを実行します。

    概要: 非同期コルーチンを同期的に実行します。
    詳細説明: コマンドラインでの使用には `asyncio.run()` で十分ですが、このバックエンドが既にイベントループを所有しているコードから呼び出された場合にクラッシュしないよう、スレッドでのフォールバック処理を含みます。
    :param coro: Any: 実行する非同期コルーチン。
    :returns: Any: コルーチンの実行結果。
    :raises BaseException: コルーチン内で発生した例外を呼び出し元のスレッドに再スローします。
    """
    try:
        asyncio.get_running_loop()
    except RuntimeError:
        return asyncio.run(coro)

    result_box: dict[str, Any] = {}
    error_box: dict[str, BaseException] = {}

    def runner():
        try:
            result_box["result"] = asyncio.run(coro)
        except BaseException as e:  # noqa: BLE001 - re-raised in caller thread
            error_box["error"] = e

    th = threading.Thread(target=runner, daemon=True)
    th.start()
    th.join()

    if "error" in error_box:
        raise error_box["error"]
    return result_box.get("result")


# -----------------------------------------------------------------------------
# Voice handling
# -----------------------------------------------------------------------------

def _safe_attr(obj: Any, name: str, default: Any = "") -> Any:
    """オブジェクトの属性を安全に取得します。

    概要: オブジェクトから指定された属性を安全に取得し、エラー発生時はデフォルト値を返します。
    詳細説明: `getattr()` を使用して属性を取得しますが、属性が存在しない、またはアクセス中に何らかの例外が発生した場合に、指定されたデフォルト値を返します。
    :param obj: Any: 属性を取得する対象のオブジェクト。
    :param name: str: 取得する属性の名前。
    :param default: Any, optional: 属性が見つからない場合やエラーが発生した場合に返すデフォルト値。デフォルトは空文字列。
    :returns: Any: 取得された属性の値、またはデフォルト値。
    """
    try:
        return getattr(obj, name)
    except Exception:
        return default


def _voice_gender_text(voice: Any) -> str:
    """WinRT音声オブジェクトから性別情報を文字列として取得します。

    概要: WinRTのVoiceInformationオブジェクトから性別を判別し、文字列として返します。
    詳細説明: `voice.gender` 属性が存在する場合、それが列挙型であればその名前を、そうでなければ直接文字列として返します。
    :param voice: Any: WinRTの `VoiceInformation` オブジェクト。
    :returns: str: 音声の性別を表す文字列。
    """
    gender = _safe_attr(voice, "gender", "")
    if hasattr(gender, "name"):
        return str(gender.name)
    return str(gender)


def _voice_to_dict(voice: Any) -> dict[str, Any]:
    """WinRT VoiceInformationオブジェクトをpyttsx3ライクな辞書に変換します。

    概要: WinRTの `VoiceInformation` オブジェクトから音声の詳細情報を抽出し、`pyttsx3` と互換性のある辞書形式で返します。
    詳細説明: 音声の表示名、ID、言語、性別、説明を抽出し、キーと値のペアで構成される辞書として返します。
    :param voice: Any: WinRTの `VoiceInformation` オブジェクト。
    :returns: dict[str, Any]: 変換された音声情報辞書。
    """
    display_name = str(_safe_attr(voice, "display_name", ""))
    voice_id = str(_safe_attr(voice, "id", ""))
    language = str(_safe_attr(voice, "language", ""))
    description = str(_safe_attr(voice, "description", ""))

    return {
        "name": display_name or voice_id,
        "id": voice_id,
        "lang": language,
        "gender": _voice_gender_text(voice),
        "description": description,
    }


def _voice_match_text(voice: Any) -> str:
    """音声のマッチングに使用するテキスト文字列を生成します。

    概要: 音声情報の主要な要素を結合し、検索用の一つの小文字文字列を生成します。
    詳細説明: `_voice_to_dict` で取得した音声情報（名前、ID、言語、性別、説明）を改行で結合し、小文字に変換して返します。これにより、部分一致検索が容易になります。
    :param voice: Any: WinRTの `VoiceInformation` オブジェクト。
    :returns: str: 音声マッチングに使用する結合されたテキスト。
    """
    d = _voice_to_dict(voice)
    return "\n".join(str(d.get(k, "")) for k in ["name", "id", "lang", "gender", "description"]).lower()


def _select_voice(synthesizer: Any, target_voice: str | None):
    """WinRT音声を部分一致で選択します。

    概要: 指定された文字列に部分的に一致するWinRT音声を選択し、`SpeechSynthesizer` オブジェクトに設定します。
    詳細説明: `display_name`、`id`、`language`、`gender`、`description` を含む広範な検索を実行します。
    指定された `target_voice` が見つからない場合、`DEFAULT_WINRT_VOICE`（"Nanami"）や「ja-JP」などの日本語音声、またはシステムのデフォルト音声をフォールバックとして選択します。
    :param synthesizer: SpeechSynthesizer: 音声合成に使用する `SpeechSynthesizer` オブジェクト。
    :param target_voice: str | None: 検索対象の音声名、ID、言語などの部分文字列。Noneの場合、デフォルトの検索ロジックが適用されます。
    :returns: Any: 選択された `VoiceInformation` オブジェクト、または見つからなかった場合はNone。
    """
    SpeechSynthesizer, _, _, _ = _import_winrt()

    voices = list(SpeechSynthesizer.all_voices)
    if not voices:
        return None

    target = (target_voice or "").strip().lower()
    if target:
        for voice in voices:
            if target in _voice_match_text(voice):
                synthesizer.voice = voice
                return voice

    # Prefer a Japanese voice if available.  Otherwise keep the system default.
    for fallback in [DEFAULT_WINRT_VOICE.lower(), "ja-jp", "japan"]:
        for voice in voices:
            if fallback in _voice_match_text(voice):
                synthesizer.voice = voice
                return voice

    return _safe_attr(SpeechSynthesizer, "default_voice", None)


def get_available_voices_info() -> list[dict[str, Any]] | bool:
    """利用可能なWinRT音声情報を辞書リストとして返します。

    概要: 現在システムで利用可能なWinRT音声の詳細情報を辞書のリストとして提供します。
    詳細説明: `tktts_pyttsx3.py` で使用されている既存のバックエンドの慣例に合わせるため、WinRTの初期化やインポートに失敗した場合は `False` を返します。
    :returns: list[dict[str, Any]] | bool: 各音声の名前、ID、言語、性別、説明を含む辞書のリスト。初期化エラーが発生した場合は `False`。
    :raises SystemExit: `_import_winrt` 内部でWinRT関連ライブラリのインポートに失敗した場合、プログラムが終了するため、この例外は再スローされます。
    """
    try:
        SpeechSynthesizer, _, _, _ = _import_winrt()
        return [_voice_to_dict(v) for v in SpeechSynthesizer.all_voices]
    except SystemExit:
        raise
    except Exception as e:
        print(f"エラー: tktts_winrt.get_available_voices_info()での初期化エラー: {TTS_ENGINE_NAME}: {e}")
        return False


def get_available_voices() -> list[str] | bool:
    """利用可能なWinRT音声の名前リストを返します。

    概要: システムで利用可能なWinRT音声の名前のみのリストを返します。
    詳細説明: `get_available_voices_info()` を呼び出し、その結果から各音声の `'name'` 属性を抽出してリストとして返します。
    :returns: list[str] | bool: 利用可能な音声の名前のリスト。`get_available_voices_info()` が `False` を返した場合は `False`。
    """
    voices = get_available_voices_info()
    if not voices:
        return False
    return [v["name"] for v in voices]


def list_available_voices() -> bool:
    """利用可能なWinRT音声の情報をコンソールに出力します。

    概要: 利用可能なWinRT音声の詳細情報を整形して標準出力に表示します。
    詳細説明: `get_available_voices_info()` を使用して音声情報を取得し、各音声の名前、言語、性別、ID、および説明（存在する場合）をユーザーフレンドリーな形式で表示します。
    :returns: bool: 音声情報の取得と表示が成功した場合は `True`、失敗した場合は `False`。
    """
    print(f"=== 利用可能な {TTS_ENGINE_NAME} voices ===")
    voices = get_available_voices_info()
    if not voices:
        return False

    for v in voices:
        print(f"  Name: {v['name']}, Lang: {v['lang']}, Gender: {v['gender']}, ID: {v['id']}")
        if v.get("description"):
            print(f"        Description: {v['description']}")

    return True


# -----------------------------------------------------------------------------
# Speech synthesis
# -----------------------------------------------------------------------------

def _convert_speak_rate(speak_rate: float | int | None) -> float:
    """pyttsx3ライクなWPMをWinRTの相対的な読み上げ速度に変換します。

    概要: `pyttsx3` で一般的に使われるWPM（Words Per Minute）形式の読み上げ速度を、WinRTが要求する相対的な速度値に変換します。
    詳細説明: WinRTの `speaking_rate` は相対値であり、1.0が通常速度、0.5が半速、6.0が6倍速です。
    既存の `tktts` コードは `150` のようなWPM値を渡す傾向があるため、これをWinRT互換の相対値に変換します。
    変換後の値は0.5から6.0の範囲に制限されます。
    :param speak_rate: float | int | None: 読み上げ速度。`None` の場合、デフォルトの `1.0` (通常速度) を返します。
                                          `10.0` より大きい値はWPMとして解釈され、それ以外の値はWinRTの相対速度として扱われます。
    :returns: float: WinRT互換の相対的な読み上げ速度。
    """
    if speak_rate is None:
        return 1.0

    try:
        rate = float(speak_rate)
    except (TypeError, ValueError):
        return 1.0

    if rate > 10.0:
        rate = rate / DEFAULT_PYTTSX3_COMPAT_RATE

    return max(0.5, min(6.0, rate))


async def _stream_to_bytes(stream: Any) -> bytes:
    """WinRT SpeechSynthesisStreamをバイトデータとして読み込みます。

    概要: WinRTの `SpeechSynthesisStream` オブジェクトから音声データをバイト配列として読み取ります。
    詳細説明: ストリームのサイズを取得し、そのサイズのバッファを割り当てて非同期でデータを読み込みます。
    読み取り後、ストリームとDataReaderオブジェクトはクローズされます。
    :param stream: Any: WinRTの `SpeechSynthesisStream` オブジェクト。
    :returns: bytes: ストリームから読み取られたWAV形式の音声データ。
    """
    _, Buffer, DataReader, InputStreamOptions = _import_winrt()

    size = int(_safe_attr(stream, "size", 0))
    if size <= 0:
        return b""

    # Reset to the beginning before reading.
    try:
        stream.seek(0)
    except Exception:
        pass

    buffer = Buffer(size)
    read_buffer = await stream.read_async(buffer, size, InputStreamOptions.READ_AHEAD)

    length = int(_safe_attr(read_buffer, "length", 0)) or size
    reader = DataReader.from_buffer(read_buffer)
    data = bytearray(length)
    reader.read_bytes(data)

    try:
        reader.close()
    except Exception:
        pass

    return bytes(data)


async def _synthesize_wav_bytes(text: str, target_voice: str | None, speak_rate: float | int | None) -> bytes:
    """指定されたテキスト、音声、読み上げ速度で音声を合成し、WAV形式のバイトデータを返します。

    概要: テキストを音声合成し、結果をWAV形式のバイトデータとして取得します。
    詳細説明: `SpeechSynthesizer` オブジェクトを初期化し、指定された音声を選択し、読み上げ速度を設定します。
    その後、テキストを非同期で合成してストリームを取得し、そのストリームからバイトデータを読み取ります。
    処理の最後に、リソースを適切にクローズします。
    :param text: str: 合成するテキスト文字列。
    :param target_voice: str | None: 使用する音声の名前またはID。Noneの場合、デフォルトの選択ロジックが適用されます。
    :param speak_rate: float | int | None: 読み上げ速度。`_convert_speak_rate` 関数によってWinRT互換の値に変換されます。
    :returns: bytes: 合成されたWAV形式の音声データ。
    """
    SpeechSynthesizer, _, _, _ = _import_winrt()

    text = text or ""
    synthesizer = SpeechSynthesizer()
    _select_voice(synthesizer, target_voice)

    try:
        synthesizer.options.speaking_rate = _convert_speak_rate(speak_rate)
    except Exception:
        # Older Windows builds may not support SpeakingRate.
        pass

    stream = await synthesizer.synthesize_text_to_stream_async(text)
    try:
        data = await _stream_to_bytes(stream)
    finally:
        for obj in [stream, synthesizer]:
            try:
                obj.close()
            except Exception:
                pass

    return data


def _write_wav(outfile: str, text: str, target_voice: str | None, speak_rate: float | int | None) -> str | None:
    """合成された音声を指定されたファイルにWAV形式で保存します。

    概要: テキストを音声合成し、その結果のWAVデータを指定されたパスにファイルとして書き出します。
    詳細説明: `_synthesize_wav_bytes` を使用して音声データを取得します。
    出力ディレクトリが存在しない場合は作成し、取得したデータをバイナリモードでファイルに書き込みます。
    ファイルが正しく書き込まれたか、サイズが0でないかを確認し、成功した場合はファイルパスを、失敗した場合はNoneを返します。
    :param outfile: str: 出力WAVファイルの絶対パス。
    :param text: str: 合成するテキスト文字列。
    :param target_voice: str | None: 使用する音声の名前またはID。
    :param speak_rate: float | int | None: 読み上げ速度。
    :returns: str | None: ファイルの書き込みが成功した場合は出力ファイルのパス、失敗した場合はNone。
    """
    data = _run_async(_synthesize_wav_bytes(text, target_voice, speak_rate))
    if not data:
        print(f"　エラー: {TTS_ENGINE_NAME} の音声生成結果が空です")
        return None

    outdir = os.path.dirname(os.path.abspath(outfile))
    if outdir:
        os.makedirs(outdir, exist_ok=True)

    with open(outfile, "wb") as f:
        f.write(data)

    if not os.path.exists(outfile) or os.path.getsize(outfile) <= 0:
        print(f"　エラー: ファイル [{outfile}] の出力に失敗しました")
        return None

    return outfile


def _play_wav_file(wavfile: str) -> bool:
    """指定されたWAVファイルを再生します（Windows環境のみ）。

    概要: `winsound` モジュールを使用してWAVファイルを再生します。
    詳細説明: この機能はWindowsプラットフォームに特化しています。Windows以外のOSで呼び出された場合、エラーメッセージを出力し、再生は実行しません。
    :param wavfile: str: 再生するWAVファイルのパス。
    :returns: bool: ファイルの再生が成功した場合は `True`、Windows環境でない場合は `False`。
    """
    if sys.platform != "win32":
        print(f"エラー: WAV再生はWindows環境でのみ対応しています: {wavfile}")
        return False

    import winsound

    winsound.PlaySound(wavfile, winsound.SND_FILENAME)
    return True


def speak(outfile: str | None, text: str, voice: str | None, speak_rate: float | int | None = None) -> bool | str | None:
    """1つのテキスト文字列を合成し、再生またはWAVファイルとして保存します。

    概要: 指定されたテキストを音声合成し、`outfile` の指定に応じて一時ファイルとして再生するか、指定されたWAVファイルに保存します。
    詳細説明: パラメータは `tktts_pyttsx3.speak()` と互換性があります。
    `outfile` が指定されている場合、WAVファイルがそのパスに書き込まれます。
    `outfile` が空または `None` の場合、一時的なWAVファイルが生成され、`winsound` を使用して再生されます。
    :param outfile: str | None: 出力WAVファイルのパス。Noneまたは空文字列の場合、一時ファイルとして生成し再生します。
    :param text: str: 合成するテキスト文字列。
    :param voice: str | None: 使用する音声の名前またはID。Noneの場合、`DEFAULT_WINRT_VOICE` が使用されます。
    :param speak_rate: float | int | None, optional: 読み上げ速度。デフォルトはNoneで、通常速度になります。
    :returns: bool | str | None:
        - `outfile` が指定されずに再生が成功した場合: `True`
        - `outfile` が指定され保存が成功した場合: 出力ファイルのパス (`str`)
        - 失敗した場合: `False` または `None`
    """
    is_save_mode = bool(outfile)
    target_voice = voice or DEFAULT_WINRT_VOICE

    if is_save_mode:
        if str(outfile).lower().endswith(".wav") is False:
            print("　警告: WinRTバックエンドはWAVデータを出力します。拡張子は .wav を推奨します。")
        return _write_wav(str(outfile), text, target_voice, speak_rate)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
        tmpfile = f.name

    try:
        result = _write_wav(tmpfile, text, target_voice, speak_rate)
        if not result:
            return False
        return _play_wav_file(tmpfile)
    finally:
        try:
            os.remove(tmpfile)
        except OSError:
            pass


def speak_dialogue(
    dialogue: list[str],
    replacements: dict[str, str],
    target_voices: dict[str, str] | str,
    speakers: dict[str, Any] = {},
    speak_rate: float | int = 150,
    temp_dir: str | None = None,
    outfile: str | None = None,
    ext: str = "wav",
    cfg: Any = None
) -> tuple[bool, list[str] | dict[Any, Any]]:
    """対話ブロックの音声ファイルを生成します。

    概要: 複数の対話ブロックに対して音声合成を行い、一時ファイルとして保存するか、結合して再生します。
    詳細説明: この関数は `tktts_pyttsx3.speak_dialogue()` と同様の動作をします。
    WinRTの音声合成APIが非同期ストリームベースであるため、発話ごとに合成が実行されます。
    `outfile` が指定された場合、各対話ブロックの音声は一時ファイルとして保存され、そのファイルパスのリストが返されます。
    `outfile` が指定されない場合、すべてのテキストが結合され、一度に再生されます。
    WinRTバックエンドはWAV形式のみを出力するため、`ext` パラメータは常に"wav"として扱われます。
    :param dialogue: list[str]: 処理する対話ブロックのリスト。
    :param replacements: dict[str, str]: テキストに適用される置換ルールを定義する辞書。
    :param target_voices: dict[str, str] | str: スピーカー名に対応する音声IDの辞書、またはすべての対話で使用する単一の音声ID文字列。
    :param speakers: dict[str, Any], optional: スピーカーに関する追加情報を含む辞書。デフォルトは空の辞書。
    :param speak_rate: float | int, optional: 読み上げ速度。デフォルトは150 (WPM)。
    :param temp_dir: str | None, optional: 一時ファイルを保存するディレクトリのパス。Noneの場合、システムのデフォルト一時ディレクトリが使用されます。
    :param outfile: str | None, optional: 最終的な出力ファイル名。このパラメータが指定されると、音声は一時ファイルに保存されます。
    :param ext: str, optional: 生成される一時ファイルの拡張子。WinRTはWAVを生成するため、常に"wav"として扱われます。デフォルトは"wav"。
    :param cfg: Any, optional: 設定オブジェクト。`monologue` 属性を持つ場合があります。デフォルトはNone。
    :returns: tuple[bool, list[str] | dict[Any, Any]]:
        - 最初の要素は処理の成否を示すブール値 (`True` または `False`)。
        - 2番目の要素は、保存モード (`outfile` が指定された場合) では一時ファイルのパスのリスト (`list[str]`)、再生モードでは空の辞書 (`dict[Any, Any]`)。
    """
    is_save_mode = bool(outfile)
    temp_dir = temp_dir or tempfile.gettempdir()
    os.makedirs(temp_dir, exist_ok=True)

    # WinRT returns WAV.  Keep downstream honest even if caller passed mp3.
    if ext.lower() != "wav":
        print("　警告: WinRTバックエンドはWAVを出力します。一時ファイル拡張子を wav に変更します。")
        ext = "wav"

    print()
    print("tktts_winrt.speak_dialogue(): ")
    print(f"　出力ファイル: {outfile}")
    print(f"  is_save_mode: {is_save_mode}")
    print("target_voices:", target_voices)

    tmpfiles = []
    text_all = ""
    idx = 1
    is_monologue = bool(getattr(cfg, "monologue", False))

    for i, _dialogue in enumerate(dialogue):
        print()
        print(f"Dialogue {i:04d}:")
        dialogue_list = split_dialogue(
            _dialogue,
            target_voices,
            speakers=speakers,
            default_voice=DEFAULT_WINRT_VOICE,
            is_monologue=is_monologue,
        )

        for speaker, text in dialogue_list:
            text = apply_replacements(text, replacements)
            if type(target_voices) is str:
                speaker = target_voices

            speaker = normalize_speaker(speaker)
            if type(target_voices) is str:
                target_voice = speaker
            else:
                target_voice = target_voices.get(speaker, DEFAULT_WINRT_VOICE)

            print(f"  {idx:04d}: voice={speaker} (id={target_voice}): ", end="")
            print(text)
            print(f"{i:04d}: {speaker}: {target_voice}: {text}")

            if is_save_mode:
                tmpfile = os.path.join(temp_dir, f"tmp_{idx:03d}.{ext}")
                result = _write_wav(tmpfile, text, target_voice, speak_rate)
                if not result:
                    return False, tmpfiles
                tmpfiles.append(tmpfile)
            else:
                text_all += "\n" + text

            idx += 1

    if is_save_mode:
        print(f"\n{TTS_ENGINE_NAME}で音声ファイルを一時ファイルに生成しました。")
        return True, tmpfiles

    print(f"\n{TTS_ENGINE_NAME}で音声ファイルを再生中...")
    ok = speak(None, text_all, DEFAULT_WINRT_VOICE, speak_rate=speak_rate)
    return bool(ok), {}


# -----------------------------------------------------------------------------
# Small standalone test CLI
# -----------------------------------------------------------------------------

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="WinRT TTS test utility for tktts_winrt.py")
    parser.add_argument("--list", type=int, default=0, choices=[0, 1], help="list available voices")
    parser.add_argument("--voice", type=str, default=DEFAULT_WINRT_VOICE, help="voice name/id/language partial match")
    parser.add_argument("--text", type=str, default="こんにちは。これは WinRT 音声合成のテストです。", help="text to speak")
    parser.add_argument("--outfile", type=str, default="", help="output wav file; omit to play")
    parser.add_argument("--rate", type=float, default=150.0, help="pyttsx3-like WPM or WinRT relative rate")
    args = parser.parse_args()

    if args.list:
        list_available_voices()
    else:
        speak(args.outfile, args.text, args.voice, speak_rate=args.rate)