Kunal
updated score model
e6f0bb0
import os
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from langchain.prompts import ChatPromptTemplate
import time
from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError # Corrected import path
# --- Setup for environment variables and client ---
# 1. Create a .env file in the same directory as this script.
# 2. Add your Hugging Face Access Token to it:
# HF_TOKEN="hf_YOUR_ACTUAL_HUGGING_FACE_TOKEN"
# (The 'hf_' prefix is important for Hugging Face tokens)
load_dotenv()
# Initialize llm with your standard Hugging Face Token
# The InferenceClient automatically looks for HF_TOKEN if not explicitly provided
try:
hf_token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJra3VuYWxnZ3VwdGEyMDBAZ21haWwuY29tIiwiaWF0IjoxNzQ5MDI5MzE2fQ.FyaF9EEw5MlkVNjq3SxjfzIiFqGCm8Z-glIqGEuL8ac"
if not hf_token:
print("Warning: HF_TOKEN not found in .env file or environment variables.")
print("The InferenceClient might work for public models, but private models or higher rate limits may require a token.")
# If no token is found, try to proceed without it (might work for public models)
llm = InferenceClient()
else:
llm = InferenceClient(token=hf_token) # Explicitly pass the token if you want to be sure
except Exception as e:
print(f"Error initializing InferenceClient: {e}")
print("Please ensure your .env file has HF_TOKEN set correctly.")
exit()
# --- Sample Prompt and API Call ---
def test_llm_timeout():
# Use a simple prompt for testing
test_prompt = ChatPromptTemplate.from_template(
"Explain the concept of neural networks in a simple way."
)
rendered_prompt_content = test_prompt.format()
messages = [
{
"role": "user",
"content": rendered_prompt_content
}
]
# The model you specified in your Agent.py
model_name = "deepseek-ai/DeepSeek-R1"
max_retries = 3
print(f"Attempting to call model: {model_name}")
print(f"Prompt: '{rendered_prompt_content[:50]}...'")
for attempt in range(max_retries):
print(f"\n--- Attempt {attempt + 1}/{max_retries} ---")
try:
# Make the API call
result = llm.chat.completions.create(
model=model_name,
messages=messages,
max_tokens=200, # Keep max_tokens reasonable for testing
temperature=0.7,
# Explicitly specify the router if you know it, though usually not needed with token
# router="https://router.huggingface.co/hyperbolic/v1/" # This might be the actual endpoint you need
)
# If successful, print the result and break
print("API call successful!")
print("Response:", result.choices[0].message.content)
return
except HfHubHTTPError as e:
if e.response.status_code == 504:
print(f"Caught 504 Gateway Time-out on attempt {attempt + 1}.")
if attempt < max_retries - 1:
wait_time = 2 ** (attempt + 1) # Exponential backoff: 2, 4 seconds
print(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time)
else:
print(f"Max retries ({max_retries}) reached. Still encountering 504.")
print("This indicates a persistent issue with the API or model availability.")
print(f"Full error: {e}")
return
else:
# Re-raise other HTTP errors
print(f"Caught unexpected HTTP error: {e.response.status_code} - {e.response.reason}")
print(f"Full error response: {e.response.text}")
raise
except Exception as e:
# Catch any other unexpected errors
print(f"An unexpected error occurred: {e}")
return
# Run the test
if __name__ == "__main__":
test_llm_timeout()