import os
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from langchain.prompts import ChatPromptTemplate
import time
from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError # Corrected import path

# --- Setup for environment variables and client ---
# 1. Create a .env file in the same directory as this script.
# 2. Add your Hugging Face Access Token to it:
#    HF_TOKEN="hf_YOUR_ACTUAL_HUGGING_FACE_TOKEN"
#    (The 'hf_' prefix is important for Hugging Face tokens)
load_dotenv()

# Initialize llm with your standard Hugging Face Token
# The InferenceClient automatically looks for HF_TOKEN if not explicitly provided
try:
    hf_token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJra3VuYWxnZ3VwdGEyMDBAZ21haWwuY29tIiwiaWF0IjoxNzQ5MDI5MzE2fQ.FyaF9EEw5MlkVNjq3SxjfzIiFqGCm8Z-glIqGEuL8ac"
    if not hf_token:
        print("Warning: HF_TOKEN not found in .env file or environment variables.")
        print("The InferenceClient might work for public models, but private models or higher rate limits may require a token.")
        # If no token is found, try to proceed without it (might work for public models)
        llm = InferenceClient()
    else:
        llm = InferenceClient(token=hf_token) # Explicitly pass the token if you want to be sure
    
except Exception as e:
    print(f"Error initializing InferenceClient: {e}")
    print("Please ensure your .env file has HF_TOKEN set correctly.")
    exit()

# --- Sample Prompt and API Call ---
def test_llm_timeout():
    # Use a simple prompt for testing
    test_prompt = ChatPromptTemplate.from_template(
        "Explain the concept of neural networks in a simple way."
    )
    rendered_prompt_content = test_prompt.format()

    messages = [
        {
            "role": "user",
            "content": rendered_prompt_content
        }
    ]

    # The model you specified in your Agent.py
    model_name = "deepseek-ai/DeepSeek-R1" 
    max_retries = 3
    
    print(f"Attempting to call model: {model_name}")
    print(f"Prompt: '{rendered_prompt_content[:50]}...'")

    for attempt in range(max_retries):
        print(f"\n--- Attempt {attempt + 1}/{max_retries} ---")
        try:
            # Make the API call
            result = llm.chat.completions.create(
                model=model_name,
                messages=messages,
                max_tokens=200, # Keep max_tokens reasonable for testing
                temperature=0.7,
                # Explicitly specify the router if you know it, though usually not needed with token
                # router="https://router.huggingface.co/hyperbolic/v1/" # This might be the actual endpoint you need
            )
            # If successful, print the result and break
            print("API call successful!")
            print("Response:", result.choices[0].message.content)
            return

        except HfHubHTTPError as e:
            if e.response.status_code == 504:
                print(f"Caught 504 Gateway Time-out on attempt {attempt + 1}.")
                if attempt < max_retries - 1:
                    wait_time = 2 ** (attempt + 1) # Exponential backoff: 2, 4 seconds
                    print(f"Retrying in {wait_time} seconds...")
                    time.sleep(wait_time)
                else:
                    print(f"Max retries ({max_retries}) reached. Still encountering 504.")
                    print("This indicates a persistent issue with the API or model availability.")
                    print(f"Full error: {e}")
                    return
            else:
                # Re-raise other HTTP errors
                print(f"Caught unexpected HTTP error: {e.response.status_code} - {e.response.reason}")
                print(f"Full error response: {e.response.text}")
                raise
        except Exception as e:
            # Catch any other unexpected errors
            print(f"An unexpected error occurred: {e}")
            return

# Run the test
if __name__ == "__main__":
    test_llm_timeout()