"""talk.py - give a face picture a voice: your line -> speech -> lip-synced video.

Put this file INSIDE your Wav2Lip folder (next to inference.py), along with:
    narrator.png    your close-up character picture (person-style face!)
and make sure the model file is at checkpoints/wav2lip_gan.pth
(the setup card on the page shows where both come from).

First run only:  pip3 install edge-tts
Then:            python3 talk.py
"""
import asyncio
import subprocess
import sys

# ---- knobs you can tweak ----------------------------------------------------
LINE  = ("Welcome to the palace! Tonight, two royal cats are going for "
         "an evening walk. Let's watch!")
VOICE = "en-US-AriaNeural"   # try en-GB-RyanNeural, th-TH-PremwadeeNeural, ...
                             # full list:  edge-tts --list-voices
FACE  = "narrator.png"       # your close-up picture (a short video works too)
OUT   = "talk.mp4"
# ------------------------------------------------------------------------------

# 1) your line -> voice.mp3 (free text-to-speech, no account, no key)
import edge_tts
asyncio.run(edge_tts.Communicate(LINE, VOICE).save("voice.mp3"))
print("voice saved -> voice.mp3")

# 2) voice + face -> talking video. Wav2Lip listens to the sound and repaints
#    the mouth in every frame to match it - that's the whole trick.
subprocess.run([sys.executable, "inference.py",
                "--checkpoint_path", "checkpoints/wav2lip_gan.pth",
                "--face", FACE,
                "--audio", "voice.mp3",
                "--outfile", OUT],
               check=True)
print("done ->", OUT)
