import os
import sys
import os
import sys
import json
try:
    from dotenv import load_dotenv
except:
    print(f"\nImport error: dotenv")
    input("Install: pip install dotenv\n")
    exit()
try:
    import openai
    from openai import OpenAI
except:
    print(f"\nImport error: openai")
    input("Install: pip install openai\n")
    exit()


config_path = "translate.env"
audio_path = "generative_ai_programming.wav"
guess_speakers = 1

if not os.path.isfile(config_path):
    script_dir = os.path.dirname(os.path.abspath(__file__))
    config_path = os.path.join(script_dir, config_path)

print()
# 環境変数読み込み
if os.path.isfile(config_path):
    print(f"config_path: {config_path}")
else:
    print(f"Warning: config_path {config_path} is not found")
load_dotenv(dotenv_path=config_path)

account_inf_path = os.getenv("account_inf_path", "accounts.env")
if os.path.isfile(account_inf_path):
    print(f"account_inf_path: {account_inf_path}")
else:
    print(f"Warning: account_inf_path {account_inf_path} is not found")
load_dotenv(dotenv_path=account_inf_path)

# APIキー設定
api_key = os.getenv("OPENAI_API_KEY")
if api_key is None or api_key == "":
    print(f"Error: api_key is not found.")
    exit()
else:
    print(f"api_key: {api_key}")


client = OpenAI(api_key=api_key)

openai_model = os.getenv("openai_model", "gpt-4o")
temperature = float(os.getenv("temperature", "0.3"))
max_tokens = int(os.getenv("max_tokens", "2000"))
whisper_model = os.getenv("whisper_model", "whisper-1")

print(f"openai_model : {openai_model}")
print(f"whisper_model: {whisper_model}")


argv = sys.argv
nargs = len(argv)
if nargs > 1: audio_path = argv[1]
if nargs > 2: guess_speakers = int(argv[2])

print(f"audio_path: {audio_path}")
print(f"guess_speakers: {guess_speakers}")


# speaker guess用Function Call関数定義
functions = [
    {
        "name": "guess_speakers",
        "description": "会話のテキストから話者を推測し、JSON 構造で返す",
        "parameters": {
            "type": "object",
            "properties": {
                "speakers": {
                    "type": "array",
                    "items": {"type": "string"}
                },
                "turns": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "speaker": {"type": "string"},
                            "text": {"type": "string"}
                        },
                        "required": ["speaker", "text"]
                    }
                }
            },
            "required": ["speakers", "turns"]
        }
    }
]

def print_speakers(json_str: str):
    data = json.loads(json_str)
    for turn in data.get("turns", []):
        speaker = turn.get("speaker", "Unknown")
        text    = turn.get("text", "")
        print(f"[{speaker}]: {text}")

def save_with_speakers(json_str: str, outfile: str):
    """
    json_str に含まれる "turns" 配列を
      [SpeakerX]: 発言テキスト
    の形式で outfile に書き出します。
    """
    data = json.loads(json_str)
    with open(outfile, "w", encoding="utf-8") as f:
        for turn in data.get("turns", []):
            speaker = turn.get("speaker", "Unknown")
            text    = turn.get("text", "")
            f.write(f"[{speaker}]: {text}\n")

def transcribe_audio(input_path: str) -> str:
    """
    Whisper API（新バージョン）で音声ファイルを文字起こし
    """
    print(f"音声ファイル '{input_path}' を文字起こし中...")
    with open(input_path, "rb") as audio_file:
        response = client.audio.transcriptions.create(
            model = whisper_model,
            file=audio_file
        )
    return response.text

def summarize_text(text: str) -> str:
    """
    ChatGPT API を使って文字起こしテキストを要約
    """
    print("文字起こしテキストを要約中...")
    response = client.chat.completions.create(
        model=openai_model,
        messages=[
            {"role": "system", "content": "以下の文字起こしを要約してください。"},
            {"role": "user", "content": text}
        ],
        temperature=temperature,
        max_tokens=max_tokens
    )
    return response.choices[0].message.content.strip()

def save_text_to_file(filename: str, content: str):
    """
    指定された内容をファイルに保存
    """
    try:
        with open(filename, "w", encoding="utf-8") as f:
            f.write(content)
        print(f"'{filename}' に内容を保存しました。")
    except IOError as e:
        print(f"ファイルの保存中にエラーが発生しました: {e}")

def main():
    output_dir = os.path.dirname(os.path.abspath(audio_path)) or "."
    file_body = os.path.splitext(os.path.basename(audio_path))[0]

    # 文字起こしを実行
    transcript = transcribe_audio(audio_path)

    if guess_speakers:
        # GPT に Function Calling 形式でリクエスト
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "以下の会話テキストから、話者を推測してください。"},
                {"role": "user",   "content": transcript}
            ],
            functions=functions,
            function_call={"name": "guess_speakers"},
            temperature=0.0
        )

        # 関数呼び出しの arguments 部分を JSON としてロード
        arguments = response.choices[0].message.function_call.arguments
        result = json.loads(arguments)

        # 出力ファイルを入力と同じディレクトリに作成
        base = os.path.splitext(os.path.basename(audio_path))[0]
        out_file = os.path.join(os.path.dirname(os.path.abspath(audio_path)), f"{base}_guessed.json")

        print_speakers(arguments)
        save_with_speakers(arguments, out_file)

        """
        # JSON 出力
        with open(out_file, "w", encoding="utf-8") as f:
            json.dump(result, f, ensure_ascii=False, indent=2)
        """
    
        print(f"話者推測結果を '{out_file}' に保存しました。")

    else: 
        # 文字起こし結果をファイルに保存 (例: generative_ai_programming_transcript.txt)
        transcript_filepath = os.path.join(output_dir, f"{file_body}_transcript.txt")
        save_text_to_file(transcript_filepath, transcript)

    # 要約を実行
    summary = summarize_text(transcript)
    # 要約結果をファイルに保存 (例: generative_ai_programming_summary.txt)
    summary_filepath= os.path.join(output_dir, f"{file_body}_summary.txt")
    save_text_to_file(summary_filepath, summary)

    print("\n===== 文字起こし結果 =====\n")
    print(transcript)
    print("\n===== 要約結果 =====\n")
    print(summary)
    
    input("\nPress ENTER to terminate>>\n")
    

if __name__ == "__main__":
    main()
