| import os |
| from dotenv import load_dotenv |
| from huggingface_hub import InferenceClient |
| from langchain.prompts import ChatPromptTemplate |
| import time |
| from huggingface_hub import InferenceClient |
| from huggingface_hub.errors import HfHubHTTPError |
|
|
| |
| |
| |
| |
| |
| load_dotenv() |
|
|
| |
| |
| try: |
| hf_token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJra3VuYWxnZ3VwdGEyMDBAZ21haWwuY29tIiwiaWF0IjoxNzQ5MDI5MzE2fQ.FyaF9EEw5MlkVNjq3SxjfzIiFqGCm8Z-glIqGEuL8ac" |
| if not hf_token: |
| print("Warning: HF_TOKEN not found in .env file or environment variables.") |
| print("The InferenceClient might work for public models, but private models or higher rate limits may require a token.") |
| |
| llm = InferenceClient() |
| else: |
| llm = InferenceClient(token=hf_token) |
| |
| except Exception as e: |
| print(f"Error initializing InferenceClient: {e}") |
| print("Please ensure your .env file has HF_TOKEN set correctly.") |
| exit() |
|
|
| |
| def test_llm_timeout(): |
| |
| test_prompt = ChatPromptTemplate.from_template( |
| "Explain the concept of neural networks in a simple way." |
| ) |
| rendered_prompt_content = test_prompt.format() |
|
|
| messages = [ |
| { |
| "role": "user", |
| "content": rendered_prompt_content |
| } |
| ] |
|
|
| |
| model_name = "deepseek-ai/DeepSeek-R1" |
| max_retries = 3 |
| |
| print(f"Attempting to call model: {model_name}") |
| print(f"Prompt: '{rendered_prompt_content[:50]}...'") |
|
|
| for attempt in range(max_retries): |
| print(f"\n--- Attempt {attempt + 1}/{max_retries} ---") |
| try: |
| |
| result = llm.chat.completions.create( |
| model=model_name, |
| messages=messages, |
| max_tokens=200, |
| temperature=0.7, |
| |
| |
| ) |
| |
| print("API call successful!") |
| print("Response:", result.choices[0].message.content) |
| return |
|
|
| except HfHubHTTPError as e: |
| if e.response.status_code == 504: |
| print(f"Caught 504 Gateway Time-out on attempt {attempt + 1}.") |
| if attempt < max_retries - 1: |
| wait_time = 2 ** (attempt + 1) |
| print(f"Retrying in {wait_time} seconds...") |
| time.sleep(wait_time) |
| else: |
| print(f"Max retries ({max_retries}) reached. Still encountering 504.") |
| print("This indicates a persistent issue with the API or model availability.") |
| print(f"Full error: {e}") |
| return |
| else: |
| |
| print(f"Caught unexpected HTTP error: {e.response.status_code} - {e.response.reason}") |
| print(f"Full error response: {e.response.text}") |
| raise |
| except Exception as e: |
| |
| print(f"An unexpected error occurred: {e}") |
| return |
|
|
| |
| if __name__ == "__main__": |
| test_llm_timeout() |