import json
import threading
from typing import Iterator
import time
import gradio as gr
from groq import Groq
from elevenlabs import play, stream
from elevenlabs.client import ElevenLabs, AsyncElevenLabs
import subprocess

from utils.mapper import TOOLS, AVAILABLE_FUNCTIONS, COMMON_VARS

# Initialize Groq client
client = Groq(api_key="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")
elevenlabs_client = ElevenLabs(
    api_key="sk_16d08614d675e9de0a89bdbff094c6332fceaafbb280f4b3"
)
# elevenlabs_client = AsyncElevenLabs(
#     api_key="73d9a4f6d777e9224641e79aeb39dc50"
# )


def text_to_speech_file(text: str, play_audio: bool) -> Iterator[bytes]:
    audio = elevenlabs_client.generate(
        text=text,
        voice="Adam",
        model="eleven_turbo_v2_5",
        stream=True,
        optimize_streaming_latency=3
    )
    if play_audio:
        print("streaming")
        stream(audio)
    return audio


def create_content(result):
    additional_text = """Based on the user's request, follow these steps:
        1. **Understand the Request:** Read the user's request carefully to determine the specific needs or actions 
        required.
        2. **Match with Functions:** Compare the user's request with the available functions in the tools. Identify 
        which function aligns with the user's needs.
        3. **Select the Best Function:** Choose the function that best matches the user's request.
        4. **Call the Function:** Use the selected function from the tools to fulfill the user's request.
        """
    content = (
        f"""You are an AI assistant that will suggest and call the functions provided in the tools based on the 
        user's request. You need to analyze the user's request and select the function from the provided tools that 
        best matches the request and provide the results by calling the appropriate function.

        Expect all parameters from user request, Consider the dates according to the user query for example if user 
        asking some operations for today, it should be understood to get today's date in YYYY-MM-DD format for date 
        parameters.

        If required parameters are not in the user request these default can be used {COMMON_VARS}

        """

    )
    return content


def background_task(audio_file_path, language, additional_text):
    with open(audio_file_path, "rb") as audio_file:
        # Make the API call
        response = client.audio.transcriptions.create(
            file=audio_file,
            model="whisper-large-v3",
            language=language
        )

    transcribed_text = response.text

    result = f"Transcription: {transcribed_text}\n\nAdditional Context: {additional_text}\n\n"

    content = create_content(result)
    model = "llama3-groq-70b-8192-tool-use-preview"
    messages = [
        {
            "role": "system",
            "content": content
        },
        {
            "role": "user",
            "content": result
        }
    ]
    chat_completion = client.chat.completions.create(
        messages=messages,
        tools=TOOLS,
        tool_choice="auto",
        model=model,
        temperature=0.5,
        max_tokens=500,
    )

    # thread.join()

    response_message = chat_completion.choices[0].message
    tool_calls = response_message.tool_calls
    if not tool_calls:
        raise Exception(f"No Tool Found associated with query: {transcribed_text}")
    messages.append(response_message)
    for tool_call in tool_calls:
        function_name = tool_call.function.name
        function_to_call = AVAILABLE_FUNCTIONS[function_name]
        function_args = json.loads(tool_call.function.arguments)
        function_response = function_to_call(**function_args)
        messages.append(
            {
                "tool_call_id": tool_call.id,
                "role": "tool",
                "name": function_name,
                "content": function_response,
            }
        )
    second_response = client.chat.completions.create(
        model=model,
        messages=messages
    )
    final_response = second_response.choices[0].message.content
    print("final: ", final_response)

    # Response Speech
    audio = text_to_speech_file(final_response, True)
    # stream(audio)
    # thread = threading.Thread(target=stream, args=(audio,))
    # thread.start()
    # print(time.time() - start_time, "seconds")
    return final_response


def play_audio():
    bytes_data = open('greetings.mp3', 'rb').read()
    play(bytes_data)


def transcribe_audio(audio_file_path, language, additional_text):
    try:
        # start_time = time.time()
        # thread = threading.Thread(target=background_task, args=(audio_file_path, language, additional_text,))
        # thread.start()

        # text_to_speech_file("Sure.", True)
        # text_to_speech_file("Sure, let me get that for you from the CRM.", True)
        bytes_data = open('greetings.mp3', 'rb').read()
        play(bytes_data)
        # Open the audio file
        with open(audio_file_path, "rb") as audio_file:
            # Make the API call
            response = client.audio.transcriptions.create(
                file=audio_file,
                model="whisper-large-v3",
                language=language
            )

        transcribed_text = response.text

        result = f"Transcription: {transcribed_text}\n\nAdditional Context: {additional_text}\n\n"

        content = create_content(result)
        model = "llama3-groq-70b-8192-tool-use-preview"
        messages = [
            {
                "role": "system",
                "content": content
            },
            {
                "role": "user",
                "content": result
            }
        ]
        chat_completion = client.chat.completions.create(
            messages=messages,
            tools=TOOLS,
            tool_choice="auto",
            model=model,
            temperature=0.5,
            max_tokens=500,
        )

        # thread.join()

        response_message = chat_completion.choices[0].message
        tool_calls = response_message.tool_calls
        if not tool_calls:
            raise Exception(f"No Tool Found associated with query: {transcribed_text}")
        messages.append(response_message)
        for tool_call in tool_calls:
            function_name = tool_call.function.name
            function_to_call = AVAILABLE_FUNCTIONS[function_name]
            function_args = json.loads(tool_call.function.arguments)
            function_response = function_to_call(**function_args)
            messages.append(
                {
                    "tool_call_id": tool_call.id,
                    "role": "tool",
                    "name": function_name,
                    "content": function_response,
                }
            )
        second_response = client.chat.completions.create(
            model=model,
            messages=messages
        )
        final_response = second_response.choices[0].message.content
        print("final: ", final_response)

        # Response Speech
        # audio = text_to_speech_file(final_response, True)
        # stream(audio)
        thread = threading.Thread(target=text_to_speech_file, args=(final_response, True,))
        thread.start()
        # print(time.time() - start_time, "seconds")
        return final_response

    except Exception as e:
        return f"An error occurred: {str(e)}"


def speach_to_text():
    # List of supported languages (this is an example, adjust based on Groq's actual supported languages)
    languages = ["en", "ba", "ms", "is", "no", "id"]

    # Create Gradio interface
    iface = gr.Interface(
        fn=transcribe_audio,
        inputs=[
            gr.Audio(type="filepath", label="Upload Audio File"),
            gr.Dropdown(choices=languages, label="Select Language", value="en"),
            # gr.Radio(["standard", "high"], label="Transcription Quality", value="standard"),
            gr.Textbox(label="Additional Text", placeholder="Enter any additional context or instructions here...")
        ],
        outputs="text",
        title="Groq Speech-to-Text Transcription",
        description="Upload an audio file, set parameters, and provide additional text for context in the "
                    "transcription process."
    )

    # Launch the interface
    iface.launch()


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    import platform
    print(f"platform: {platform.platform()}")
    print(f"platform: {platform.freedesktop_os_release()}")
    if "Linux" in platform.platform():
        # subprocess.run(['apt-get', 'install', '-y', 'snapd'])
        # subprocess.run(['snap', 'install', '-y', 'mpv'])
        subprocess.run(['apt', 'install', '-y', 'mpv'])
    speach_to_text()