import os from dotenv import load_dotenv from huggingface_hub import InferenceClient from langchain.prompts import ChatPromptTemplate import time from huggingface_hub import InferenceClient from huggingface_hub.errors import HfHubHTTPError # Corrected import path # --- Setup for environment variables and client --- # 1. Create a .env file in the same directory as this script. # 2. Add your Hugging Face Access Token to it: # HF_TOKEN="hf_YOUR_ACTUAL_HUGGING_FACE_TOKEN" # (The 'hf_' prefix is important for Hugging Face tokens) load_dotenv() # Initialize llm with your standard Hugging Face Token # The InferenceClient automatically looks for HF_TOKEN if not explicitly provided try: hf_token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJra3VuYWxnZ3VwdGEyMDBAZ21haWwuY29tIiwiaWF0IjoxNzQ5MDI5MzE2fQ.FyaF9EEw5MlkVNjq3SxjfzIiFqGCm8Z-glIqGEuL8ac" if not hf_token: print("Warning: HF_TOKEN not found in .env file or environment variables.") print("The InferenceClient might work for public models, but private models or higher rate limits may require a token.") # If no token is found, try to proceed without it (might work for public models) llm = InferenceClient() else: llm = InferenceClient(token=hf_token) # Explicitly pass the token if you want to be sure except Exception as e: print(f"Error initializing InferenceClient: {e}") print("Please ensure your .env file has HF_TOKEN set correctly.") exit() # --- Sample Prompt and API Call --- def test_llm_timeout(): # Use a simple prompt for testing test_prompt = ChatPromptTemplate.from_template( "Explain the concept of neural networks in a simple way." ) rendered_prompt_content = test_prompt.format() messages = [ { "role": "user", "content": rendered_prompt_content } ] # The model you specified in your Agent.py model_name = "deepseek-ai/DeepSeek-R1" max_retries = 3 print(f"Attempting to call model: {model_name}") print(f"Prompt: '{rendered_prompt_content[:50]}...'") for attempt in range(max_retries): print(f"\n--- Attempt {attempt + 1}/{max_retries} ---") try: # Make the API call result = llm.chat.completions.create( model=model_name, messages=messages, max_tokens=200, # Keep max_tokens reasonable for testing temperature=0.7, # Explicitly specify the router if you know it, though usually not needed with token # router="https://router.huggingface.co/hyperbolic/v1/" # This might be the actual endpoint you need ) # If successful, print the result and break print("API call successful!") print("Response:", result.choices[0].message.content) return except HfHubHTTPError as e: if e.response.status_code == 504: print(f"Caught 504 Gateway Time-out on attempt {attempt + 1}.") if attempt < max_retries - 1: wait_time = 2 ** (attempt + 1) # Exponential backoff: 2, 4 seconds print(f"Retrying in {wait_time} seconds...") time.sleep(wait_time) else: print(f"Max retries ({max_retries}) reached. Still encountering 504.") print("This indicates a persistent issue with the API or model availability.") print(f"Full error: {e}") return else: # Re-raise other HTTP errors print(f"Caught unexpected HTTP error: {e.response.status_code} - {e.response.reason}") print(f"Full error response: {e.response.text}") raise except Exception as e: # Catch any other unexpected errors print(f"An unexpected error occurred: {e}") return # Run the test if __name__ == "__main__": test_llm_timeout()