import streamlit as st
import whisper
import random
import gc

# RTL support for Persian text
st.markdown("""
<style>
    html, body, [class*="css"] {
        direction: rtl;
        text-align: right;
    }
    button, input, optgroup, select, textarea {
        direction: rtl;
    }
    .stButton button {
        direction: rtl;
    }
    header {
        direction: rtl;
    }
    .main {
        direction: rtl;
    }
</style>
""", unsafe_allow_html=True)

st.title("تبدیل گفتار به متن")

@st.cache_resource
def init_whisper():
    model = whisper.load_model("large-v3-turbo")
    # model = whisper.load_model("tiny")
    # model = whisper.load_model("turbo")
    return model

whisper_model = init_whisper()

audio = st.audio_input("صدای خود را ضبط کنید")
if audio:
    try:
        sample_name = str(random.randint(1, 100000))+"_test.wav"
    except Exception:
        sample_name = str(random.randint(1, 100000))+"_test.wav"

    with open(f"audio/{sample_name}", "wb") as f:
        f.write(audio.getbuffer())

    result = whisper_model.transcribe(f"audio/{sample_name}")
    
    st.write(result["text"])

    gc.collect()
