import os
import sys
import tempfile
try:
    from dotenv import load_dotenv
except:
    print("\nImport error: dotenv")
    input("Install: pip install dotenv")
    exit()
try:
    from pydub import AudioSegment
    from pydub.utils import make_chunks
except:
    print("\nImport error: pydub")
    input("Install: pip install pydub")
    exit()
try:
    from openai import OpenAI
except:
    print("\nImport error: openai")
    input("Install: pip install openai")
    exit()

# ファイル設定
#input_file = "semiconductor-physics.mp3"  # 文字起こし対象のファイル
input_file = "e:\GMT20250227-035951_Recording_2560x1440.mp3"
output_path = "transcript.txt"

guess_speakers = 1
chunk_length_ms = 5 * 60 * 1000  # 分割単位（例：5分）
max_chunks = 1000  # 最大チャンク数


config_path = "translate.env"

if not os.path.isfile(config_path):
    script_dir = os.path.dirname(os.path.abspath(__file__))
    config_path = os.path.join(script_dir, config_path)

print()
if os.path.isfile(config_path):
    print(f"config_path: {config_path}")
else:
    print(f"Warning: config_path {config_path} is not found")
load_dotenv(dotenv_path=config_path)

account_inf_path = os.getenv("account_inf_path", "accounts.env")
if os.path.isfile(account_inf_path):
    print(f"account_inf_path: {account_inf_path}")
else:
    print(f"Warning: account_inf_path {account_inf_path} is not found")
load_dotenv(dotenv_path=account_inf_path)

api_key = os.getenv("OPENAI_API_KEY")
if api_key is None or api_key == "":
    print(f"Error: api_key is not found.")
    exit()
else:
    print(f"api_key: {api_key}")
    pass


#sys.argvからinput_file, output_path, guess_spekers, chunk_length_ms, max_chunksを取得
if len(sys.argv) > 1:
    input_file = sys.argv[1]    
if len(sys.argv) > 2:
    output_path = sys.argv[2]
if len(sys.argv) > 3:
    guess_speakers = int(sys.argv[3])   
if len(sys.argv) > 4:
    chunk_length_ms = int(sys.argv[4]) * 60 * 1000  # 分割単位を分からミリ秒に変換
if len(sys.argv) > 5:
    max_chunks = int(sys.argv[5])

print(f"input_file: {input_file}")
print(f"output_path: {output_path}")
print(f"guess_speakers: {guess_speakers}")
print(f"chunk_length_ms: {chunk_length_ms} ms")
print(f"max_chunks: {max_chunks}")


client = OpenAI(api_key = api_key) 

# 音声を読み込み、チャンクに分割
audio = AudioSegment.from_file(input_file, format="mp3")
chunks = make_chunks(audio, chunk_length_ms)

# 文字起こし結果をまとめる
all_text = ""
for i, chunk in enumerate(chunks):
    print(f"--- 処理中: チャンク {i+1}/{len(chunks)} ---")
    if i >= max_chunks:
        print(f"最大チャンク数 {max_chunks} に達しました。処理を終了します。")
        break

    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
        chunk.export(tmp_file.name, format="mp3")
        tmp_file.flush()

        # ファイルを開いてAPIへ送信
        with open(tmp_file.name, "rb") as f:
            try:
                response = client.audio.transcriptions.create(
                    model="whisper-1",
                    file=f,
                )
                all_text += response.text + "\n"
            except Exception as e:
                print(f"エラー（チャンク{i}）: {e}")

    os.remove(tmp_file.name)

# 出力
print("\n==== 文字起こし結果 ====\n")
print(all_text)

with open(output_path, "w", encoding="utf-8") as f:
    f.write(all_text)
