knighter75 commited on
Commit
d866e5b
·
1 Parent(s): 9ea68b1
app/auth.py CHANGED
@@ -1,34 +1,13 @@
1
  import os
2
  from fastapi import HTTPException, Security
3
- from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
4
- import sys
5
 
6
  security = HTTPBearer()
7
 
8
- print(f"📁 Looking for .env in: {os.path.abspath('.')}", flush=True)
9
- print(f"📁 .env exists: {os.path.exists('.env')}", flush=True)
10
-
11
- # Загружаем ключи из переменной окружения
12
  API_KEYS_STR = os.getenv("API_KEYS", "")
 
13
 
14
- # Разбираем ключи (поддерживаем разделение запятой)
15
- VALID_KEYS = set()
16
- if API_KEYS_STR:
17
- for key in API_KEYS_STR.split(","):
18
- key = key.strip()
19
- if key:
20
- VALID_KEYS.add(key)
21
-
22
- async def verify_api_key(credentials: HTTPAuthorizationCredentials = Security(security)):
23
- """
24
- Проверяет API-ключ, переданный в заголовке Authorization: Bearer <key>.
25
- """
26
- provided_key = credentials.credentials
27
- print(f"🔑 Received key: '{provided_key}'", flush=True)
28
-
29
- if provided_key not in VALID_KEYS:
30
- print(f"❌ Invalid key: '{provided_key}' not in {VALID_KEYS}", flush=True)
31
- raise HTTPException(status_code=403, detail="Invalid or missing API Key")
32
-
33
- print(f"✅ Valid key: {provided_key}", flush=True)
34
- return provided_key
 
1
  import os
2
  from fastapi import HTTPException, Security
3
+ from fastapi.security import HTTPBearer
 
4
 
5
  security = HTTPBearer()
6
 
 
 
 
 
7
  API_KEYS_STR = os.getenv("API_KEYS", "")
8
+ VALID_KEYS = set(key.strip() for key in API_KEYS_STR.split(",") if key.strip())
9
 
10
+ async def verify_api_key(credentials: str = Security(security)):
11
+ if credentials.credentials not in VALID_KEYS:
12
+ raise HTTPException(status_code=403, detail="Invalid API Key")
13
+ return credentials.credentials
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/factory.py CHANGED
@@ -1,15 +1,13 @@
1
- from .providers.zhipu import ZhipuFlashProvider
2
  from .providers.hf_openai import HFOpenAIProvider
3
 
4
  class ProviderFactory:
5
  _providers = {
6
- "zhipu-flash": ZhipuFlashProvider,
7
  "arch-router": HFOpenAIProvider,
8
  "phi-3-mini": HFOpenAIProvider,
9
- "gemma-2-2b": HFOpenAIProvider,
10
  "mistral-7b": HFOpenAIProvider,
11
- "llama-3.2-3b": HFOpenAIProvider,
12
- "qwen2.5-3b": HFOpenAIProvider,
13
  }
14
 
15
  _instances = {}
@@ -17,7 +15,7 @@ class ProviderFactory:
17
  @classmethod
18
  def get_provider(cls, model_name: str):
19
  if model_name not in cls._providers:
20
- raise ValueError(f"Unsupported model/provider: {model_name}")
21
 
22
  provider_class = cls._providers[model_name]
23
  cache_key = provider_class.__name__
 
 
1
  from .providers.hf_openai import HFOpenAIProvider
2
 
3
  class ProviderFactory:
4
  _providers = {
 
5
  "arch-router": HFOpenAIProvider,
6
  "phi-3-mini": HFOpenAIProvider,
7
+ "gemma-2b": HFOpenAIProvider,
8
  "mistral-7b": HFOpenAIProvider,
9
+ "llama-3b": HFOpenAIProvider,
10
+ "qwen-3b": HFOpenAIProvider,
11
  }
12
 
13
  _instances = {}
 
15
  @classmethod
16
  def get_provider(cls, model_name: str):
17
  if model_name not in cls._providers:
18
+ raise ValueError(f"Unsupported model: {model_name}")
19
 
20
  provider_class = cls._providers[model_name]
21
  cache_key = provider_class.__name__
app/main.py CHANGED
@@ -1,80 +1,55 @@
1
  import os
2
  import sys
 
3
  from dotenv import load_dotenv
4
 
5
- # Отключаем буферизацию вывода сразу при старте
6
- sys.stdout.reconfigure(line_buffering=True)
7
- print("🚀 Starting application initialization...", flush=True)
8
-
9
  load_dotenv()
10
 
11
- from fastapi import FastAPI, Depends, HTTPException
12
  from .auth import verify_api_key
13
  from .factory import ProviderFactory
14
  from .models import ChatRequest, ChatResponse
15
 
16
  app = FastAPI(title="LLM API Proxy", version="1.0.0")
17
- print("✅ FastAPI app created", flush=True)
18
 
19
  @app.get("/")
20
  async def root():
21
- print("🌐 Root endpoint accessed", flush=True)
22
- return {"message": "LLM API Proxy is running", "version": "1.0.1"}
23
 
24
  @app.get("/v1/models")
25
  async def list_models(api_key: str = Depends(verify_api_key)):
 
26
  return {
27
  "models": [
28
- {"id": "zhipu-flash", "name": "GLM-4.7-Flash (Zhipu)", "provider": "zhipu", "type": "free"},
29
- {"id": "arch-router", "name": "Arch Router 1.5B (HF)", "provider": "huggingface", "type": "free"},
30
- {"id": "phi-3-mini", "name": "Phi-3 Mini 4K (HF)", "provider": "huggingface", "type": "free"},
31
- {"id": "gemma-2-2b", "name": "Gemma 2 2B (HF)", "provider": "huggingface", "type": "free"},
32
- {"id": "mistral-7b", "name": "Mistral 7B (HF)", "provider": "huggingface", "type": "free"},
33
- {"id": "llama-3.2-3b", "name": "Llama 3.2 3B (HF)", "provider": "huggingface", "type": "free"},
34
- {"id": "qwen2.5-3b", "name": "Qwen 2.5 3B (HF)", "provider": "huggingface", "type": "free"},
35
  ]
36
  }
37
 
38
- @app.post("/v1/chat/completions", response_model=ChatResponse)
39
  async def chat_completion(
40
  request: ChatRequest,
41
  api_key: str = Depends(verify_api_key)
42
  ):
43
- """Основной эндпоинт для генерации текста."""
44
- print(f"💬 Chat completion requested with model: {request.model}", flush=True)
45
-
46
  try:
47
- # 1. Получаем провайдера по имени модели из запроса
48
- print(f"🔍 Getting provider for model: {request.model}", flush=True)
49
  provider = ProviderFactory.get_provider(request.model)
50
- print(f"✅ Provider obtained: {type(provider).__name__}", flush=True)
51
-
52
- # 2. Генерируем ответ
53
- print("🔄 Calling provider.generate()...", flush=True)
54
  result = await provider.generate(
55
- messages=[m.dict() for m in request.messages],
56
  max_tokens=request.max_tokens,
57
  temperature=request.temperature,
58
  model=request.model
59
  )
60
- print(f"✅ Generation complete, tokens: {result.get('total_tokens', 0)}", flush=True)
61
 
62
- # 3. Возвращаем в стандартном формате
63
- response = ChatResponse(
64
  id=f"chat-{hash(str(request.messages))}",
65
  choices=[{"message": {"content": result["content"]}}],
66
  usage={"total_tokens": result.get("total_tokens", 0)},
67
  model=request.model
68
  )
69
- print("✅ Response prepared, sending...", flush=True)
70
- return response
71
-
72
  except ValueError as e:
73
- print(f"❌ ValueError: {e}", flush=True)
74
  raise HTTPException(status_code=400, detail=str(e))
75
  except Exception as e:
76
- import traceback
77
- error_trace = traceback.format_exc()
78
- print(f"❌ Exception: {type(e).__name__}: {e}", flush=True)
79
- print(f"❌ Traceback: {error_trace}", flush=True)
80
  raise HTTPException(status_code=502, detail=f"Provider error: {str(e)}")
 
1
  import os
2
  import sys
3
+ from fastapi import FastAPI, Depends, HTTPException
4
  from dotenv import load_dotenv
5
 
 
 
 
 
6
  load_dotenv()
7
 
 
8
  from .auth import verify_api_key
9
  from .factory import ProviderFactory
10
  from .models import ChatRequest, ChatResponse
11
 
12
  app = FastAPI(title="LLM API Proxy", version="1.0.0")
 
13
 
14
  @app.get("/")
15
  async def root():
16
+ return {"message": "LLM API Proxy is running", "version": "1.0.0"}
 
17
 
18
  @app.get("/v1/models")
19
  async def list_models(api_key: str = Depends(verify_api_key)):
20
+ """Возвращает список доступных моделей"""
21
  return {
22
  "models": [
23
+ {"id": "arch-router", "name": "Arch Router 1.5B (HF)", "provider": "huggingface"},
24
+ {"id": "phi-3-mini", "name": "Phi-3 Mini 4K (HF)", "provider": "huggingface"},
25
+ {"id": "gemma-2b", "name": "Gemma 2 2B (HF)", "provider": "huggingface"},
26
+ {"id": "mistral-7b", "name": "Mistral 7B (HF)", "provider": "huggingface"},
27
+ {"id": "llama-3b", "name": "Llama 3.2 3B (HF)", "provider": "huggingface"},
28
+ {"id": "qwen-3b", "name": "Qwen 2.5 3B (HF)", "provider": "huggingface"},
 
29
  ]
30
  }
31
 
32
+ @app.post("/v1/chat/completions")
33
  async def chat_completion(
34
  request: ChatRequest,
35
  api_key: str = Depends(verify_api_key)
36
  ):
 
 
 
37
  try:
 
 
38
  provider = ProviderFactory.get_provider(request.model)
 
 
 
 
39
  result = await provider.generate(
40
+ messages=[{"role": m.role, "content": m.content} for m in request.messages],
41
  max_tokens=request.max_tokens,
42
  temperature=request.temperature,
43
  model=request.model
44
  )
 
45
 
46
+ return ChatResponse(
 
47
  id=f"chat-{hash(str(request.messages))}",
48
  choices=[{"message": {"content": result["content"]}}],
49
  usage={"total_tokens": result.get("total_tokens", 0)},
50
  model=request.model
51
  )
 
 
 
52
  except ValueError as e:
 
53
  raise HTTPException(status_code=400, detail=str(e))
54
  except Exception as e:
 
 
 
 
55
  raise HTTPException(status_code=502, detail=f"Provider error: {str(e)}")
app/models.py CHANGED
@@ -2,14 +2,14 @@ from pydantic import BaseModel
2
  from typing import List, Optional
3
 
4
  class Message(BaseModel):
5
- role: str # "user", "assistant", "system"
6
  content: str
7
 
8
  class ChatRequest(BaseModel):
9
  model: str
10
  messages: List[Message]
11
- max_tokens: Optional[int] = 1000
12
- temperature: Optional[float] = 0.8
13
 
14
  class ChatResponse(BaseModel):
15
  id: str
 
2
  from typing import List, Optional
3
 
4
  class Message(BaseModel):
5
+ role: str
6
  content: str
7
 
8
  class ChatRequest(BaseModel):
9
  model: str
10
  messages: List[Message]
11
+ max_tokens: Optional[int] = 500
12
+ temperature: Optional[float] = 0.7
13
 
14
  class ChatResponse(BaseModel):
15
  id: str
app/providers/base.py CHANGED
@@ -1,11 +1,7 @@
1
  from abc import ABC, abstractmethod
2
- from typing import List, Dict, Any, AsyncGenerator
3
 
4
  class BaseLLMProvider(ABC):
5
  @abstractmethod
6
  async def generate(self, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
7
- pass
8
-
9
- @abstractmethod
10
- async def generate_stream(self, messages: List[Dict[str, str]], **kwargs) -> AsyncGenerator[str, None]:
11
  pass
 
1
  from abc import ABC, abstractmethod
2
+ from typing import List, Dict, Any
3
 
4
  class BaseLLMProvider(ABC):
5
  @abstractmethod
6
  async def generate(self, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
 
 
 
 
7
  pass
app/providers/hf_openai.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
- from openai import AsyncOpenAI
3
  from typing import List, Dict, Any
4
  from .base import BaseLLMProvider
5
 
@@ -9,58 +9,44 @@ class HFOpenAIProvider(BaseLLMProvider):
9
  SUPPORTED_MODELS = {
10
  "arch-router": "katanemo/Arch-Router-1.5B:hf-inference",
11
  "phi-3-mini": "microsoft/Phi-3-mini-4k-instruct:hf-inference",
12
- "gemma-2-2b": "google/gemma-2-2b-it:hf-inference",
13
  "mistral-7b": "mistralai/Mistral-7B-Instruct-v0.3:hf-inference",
14
- "llama-3.2-3b": "meta-llama/Llama-3.2-3B-Instruct:hf-inference",
15
- "qwen2.5-3b": "Qwen/Qwen2.5-3B-Instruct:hf-inference",
16
  }
17
 
18
  def __init__(self):
19
  self.api_key = os.getenv("HF_API_KEY")
20
  if not self.api_key:
21
- raise ValueError("HF_API_KEY not set in environment variables")
22
 
23
- self.client = AsyncOpenAI(
24
  base_url="https://router.huggingface.co/v1",
25
  api_key=self.api_key
26
  )
27
- print(f"🤗 HF OpenAI Provider initialized", flush=True)
28
 
29
  def _get_model_id(self, model_name: str) -> str:
30
- """Получает полный ID модели с провайдером"""
31
  if model_name in self.SUPPORTED_MODELS:
32
  return self.SUPPORTED_MODELS[model_name]
33
- # По умолчанию
34
  return "katanemo/Arch-Router-1.5B:hf-inference"
35
 
36
  async def generate(self, messages: List[Dict[str, str]], **kwargs):
37
- """Генерация ответа через OpenAI-совместимый API HF"""
38
- model_name = kwargs.get("model", "arch-router")
39
- model_id = self._get_model_id(model_name)
40
 
41
  try:
42
- print(f"🚀 Sending request to HF OpenAI with model {model_id}", flush=True)
43
-
44
- response = await self.client.chat.completions.create(
45
- model=model_id,
46
  messages=messages,
47
  max_tokens=kwargs.get("max_tokens", 500),
48
- temperature=kwargs.get("temperature", 0.7),
49
- top_p=kwargs.get("top_p", 0.95)
50
  )
51
 
52
- print(f"✅ Received response", flush=True)
53
-
54
  return {
55
  "content": response.choices[0].message.content,
56
  "total_tokens": response.usage.total_tokens if response.usage else 0,
57
- "model": model_id
58
  }
59
-
60
  except Exception as e:
61
- print(f"❌ Error in HF OpenAI provider: {e}", flush=True)
62
- raise
63
-
64
- async def generate_stream(self, messages: List[Dict[str, str]], **kwargs):
65
- """Стриминг пока не реализован"""
66
- raise NotImplementedError("Streaming not implemented")
 
1
  import os
2
+ from openai import OpenAI
3
  from typing import List, Dict, Any
4
  from .base import BaseLLMProvider
5
 
 
9
  SUPPORTED_MODELS = {
10
  "arch-router": "katanemo/Arch-Router-1.5B:hf-inference",
11
  "phi-3-mini": "microsoft/Phi-3-mini-4k-instruct:hf-inference",
12
+ "gemma-2b": "google/gemma-2-2b-it:hf-inference",
13
  "mistral-7b": "mistralai/Mistral-7B-Instruct-v0.3:hf-inference",
14
+ "llama-3b": "meta-llama/Llama-3.2-3B-Instruct:hf-inference",
15
+ "qwen-3b": "Qwen/Qwen2.5-3B-Instruct:hf-inference",
16
  }
17
 
18
  def __init__(self):
19
  self.api_key = os.getenv("HF_API_KEY")
20
  if not self.api_key:
21
+ raise ValueError("HF_API_KEY not set")
22
 
23
+ self.client = OpenAI(
24
  base_url="https://router.huggingface.co/v1",
25
  api_key=self.api_key
26
  )
27
+ print(f"🤗 HF OpenAI Provider initialized")
28
 
29
  def _get_model_id(self, model_name: str) -> str:
 
30
  if model_name in self.SUPPORTED_MODELS:
31
  return self.SUPPORTED_MODELS[model_name]
 
32
  return "katanemo/Arch-Router-1.5B:hf-inference"
33
 
34
  async def generate(self, messages: List[Dict[str, str]], **kwargs):
35
+ model = self._get_model_id(kwargs.get("model", "arch-router"))
 
 
36
 
37
  try:
38
+ response = self.client.chat.completions.create(
39
+ model=model,
 
 
40
  messages=messages,
41
  max_tokens=kwargs.get("max_tokens", 500),
42
+ temperature=kwargs.get("temperature", 0.7)
 
43
  )
44
 
 
 
45
  return {
46
  "content": response.choices[0].message.content,
47
  "total_tokens": response.usage.total_tokens if response.usage else 0,
48
+ "model": model
49
  }
 
50
  except Exception as e:
51
+ print(f"❌ Error: {e}")
52
+ raise
 
 
 
 
app/providers/huggingface.py DELETED
@@ -1,218 +0,0 @@
1
- import os
2
- import httpx
3
- import json
4
- from typing import List, Dict, Any, Optional
5
- from .base import BaseLLMProvider
6
- import sys
7
-
8
- class HuggingFaceProvider(BaseLLMProvider):
9
- """Провайдер для Hugging Face Serverless Inference API"""
10
-
11
- # Словарь с информацией о поддерживаемых моделях
12
- SUPPORTED_MODELS = {
13
- "phi-3-mini": {
14
- "model_id": "microsoft/Phi-3-mini-4k-instruct",
15
- "max_tokens": 4096,
16
- "description": "Microsoft Phi-3 Mini (3.8B) - очень быстрая",
17
- "type": "free"
18
- },
19
- "mistral-7b": {
20
- "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
21
- "max_tokens": 8192,
22
- "description": "Mistral 7B Instruct - качественная базовая модель",
23
- "type": "free"
24
- },
25
- "gemma-2-2b": {
26
- "model_id": "google/gemma-2-2b-it",
27
- "max_tokens": 8192,
28
- "description": "Google Gemma 2 2B - быстрая и легкая",
29
- "type": "free"
30
- },
31
- "llama-3.2-1b": {
32
- "model_id": "meta-llama/Llama-3.2-1B-Instruct",
33
- "max_tokens": 131072,
34
- "description": "Meta Llama 3.2 1B - сверхбыстрая",
35
- "type": "free"
36
- },
37
- "llama-3.2-3b": {
38
- "model_id": "meta-llama/Llama-3.2-3B-Instruct",
39
- "max_tokens": 131072,
40
- "description": "Meta Llama 3.2 3B - баланс скорости и качества",
41
- "type": "free"
42
- },
43
- "qwen2.5-3b": {
44
- "model_id": "Qwen/Qwen2.5-3B-Instruct",
45
- "max_tokens": 32768,
46
- "description": "Qwen 2.5 3B - хорошая поддержка русского",
47
- "type": "free"
48
- },
49
- "ru-mistral": {
50
- "model_id": "AlexWortega/ruMistral-7B-Instruct",
51
- "max_tokens": 8192,
52
- "description": "Русскоязычный Mistral 7B",
53
- "type": "free"
54
- }
55
- }
56
-
57
- def __init__(self):
58
- self.api_key = os.getenv("HF_API_KEY")
59
- if not self.api_key:
60
- print("⚠️ HF_API_KEY not set, will use without authentication (rate limits apply)", flush=True)
61
- self.api_key = None
62
- self.base_url = "https://router.huggingface.co/hf/v1"
63
- print(f"🤗 HuggingFaceProvider initialized, API key: {'✅' if self.api_key else '❌'}", flush=True)
64
-
65
- def _get_model_id(self, model_name: str) -> str:
66
- """Получает реальный ID модели из HF по короткому имени"""
67
- if model_name in self.SUPPORTED_MODELS:
68
- return self.SUPPORTED_MODELS[model_name]["model_id"]
69
- # Если передан полный HF ID, используем его
70
- if "/" in model_name:
71
- return model_name
72
- # По умолчанию
73
- return "microsoft/Phi-3-mini-4k-instruct"
74
-
75
- async def generate(self, messages: List[Dict[str, str]], **kwargs):
76
- """Генерация ответа через HF Inference API - РАБОЧАЯ ВЕРСИЯ ДЛЯ GEMMA"""
77
- model_name = kwargs.get("model", "phi-3-mini")
78
- model_id = self._get_model_id(model_name) # Предполагаем, что тут будет "google/gemma-2-2b-it"
79
-
80
- # Берем последнее сообщение пользователя
81
- user_message = ""
82
- for msg in messages:
83
- if msg["role"] == "user":
84
- user_message = msg["content"]
85
- break
86
-
87
- if not user_message:
88
- user_message = "Hello"
89
-
90
- # Gemma 2 instruct требует особого формата промпта для чата [citation:6]
91
- # <bos><start_of_turn>user\n{user_message}<end_of_turn>\n<start_of_turn>model\n
92
- prompt = f"<bos><start_of_turn>user\n{user_message}<end_of_turn>\n<start_of_turn>model\n"
93
-
94
- headers = {}
95
- if self.api_key:
96
- headers["Authorization"] = f"Bearer {self.api_key}"
97
- headers["Content-Type"] = "application/json"
98
-
99
- # ПРАВИЛЬНЫЙ URL для бесплатного Inference API
100
- url = f"https://api-inference.huggingface.co/models/{model_id}"
101
- print(f"🚀 Sending to URL: {url}", flush=True)
102
- print(f"📝 Prompt: {prompt}", flush=True)
103
-
104
- payload = {
105
- "inputs": prompt,
106
- "parameters": {
107
- "max_new_tokens": kwargs.get("max_tokens", 500),
108
- "temperature": kwargs.get("temperature", 0.7),
109
- "top_p": kwargs.get("top_p", 0.95),
110
- "do_sample": True,
111
- "return_full_text": False # Не возвращать промпт в ответе
112
- }
113
- }
114
-
115
- async with httpx.AsyncClient() as client:
116
- try:
117
- resp = await client.post(
118
- url,
119
- json=payload,
120
- headers=headers,
121
- timeout=60.0
122
- )
123
-
124
- print(f"📥 Response status: {resp.status_code}", flush=True)
125
-
126
- if resp.status_code == 200:
127
- data = resp.json()
128
- print(f"📦 Response data: {str(data)[:200]}...", flush=True)
129
-
130
- # Парсим ответ от Gemma (он приходит в виде списка)
131
- if isinstance(data, list) and len(data) > 0:
132
- if "generated_text" in data[0]:
133
- # Ответ модели уже содержит продолжение, нам не нужен промпт
134
- generated_text = data[0]["generated_text"]
135
- return {
136
- "content": generated_text,
137
- "total_tokens": kwargs.get("max_tokens", 500),
138
- "model": model_id
139
- }
140
- return {
141
- "content": "Не удалось распарсить ответ модели.",
142
- "total_tokens": 0,
143
- "model": model_id
144
- }
145
- elif resp.status_code == 503:
146
- return {
147
- "content": "⏳ Модель загружается (холодный старт), попробуйте через несколько секунд...",
148
- "total_tokens": 0,
149
- "model": model_id
150
- }
151
- else:
152
- error_text = resp.text
153
- print(f"❌ Error: {resp.status_code} - {error_text}", flush=True)
154
- return {
155
- "content": f"Error: {resp.status_code}",
156
- "total_tokens": 0,
157
- "model": model_id
158
- }
159
-
160
- except Exception as e:
161
- print(f"❌ Exception: {e}", flush=True)
162
- return {
163
- "content": f"Error: {str(e)}",
164
- "total_tokens": 0,
165
- "model": model_id
166
- }
167
-
168
- def _format_messages(self, messages: List[Dict[str, str]], model_id: str) -> str:
169
- """Форматирует сообщения в промпт для конкретной модели"""
170
- # Простая реализация - берем последнее сообщение пользователя
171
- # В реальном проекте нужно делать под каждый формат модели
172
- last_user_msg = None
173
- system_msg = None
174
-
175
- for msg in messages:
176
- if msg["role"] == "user":
177
- last_user_msg = msg["content"]
178
- elif msg["role"] == "system":
179
- system_msg = msg["content"]
180
-
181
- if not last_user_msg:
182
- last_user_msg = "Hello"
183
-
184
- # Форматируем в зависимости от модели
185
- if "phi" in model_id.lower():
186
- # Phi-3 формат
187
- prompt = f"<|user|>\n{last_user_msg}\n<|assistant|>\n"
188
- elif "llama" in model_id.lower():
189
- # Llama 3 формат
190
- prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{last_user_msg}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
191
- elif "gemma" in model_id.lower():
192
- # Gemma формат
193
- prompt = f"<start_of_turn>user\n{last_user_msg}<end_of_turn>\n<start_of_turn>model\n"
194
- else:
195
- # Универсальный формат
196
- prompt = last_user_msg
197
-
198
- return prompt
199
-
200
- def _extract_response(self, data: Any) -> str:
201
- """Извлекает текст ответа из разных форматов HF"""
202
- try:
203
- if isinstance(data, list) and len(data) > 0:
204
- if isinstance(data[0], dict) and "generated_text" in data[0]:
205
- return data[0]["generated_text"]
206
- elif isinstance(data, dict):
207
- if "generated_text" in data:
208
- return data["generated_text"]
209
-
210
- # Если ничего не нашли, возвращаем как строку
211
- return str(data)
212
- except Exception as e:
213
- print(f"❌ Error extracting response: {e}", flush=True)
214
- return str(data)
215
-
216
- async def generate_stream(self, messages: List[Dict[str, str]], **kwargs):
217
- """Стриминг пока не поддерживается"""
218
- raise NotImplementedError("Streaming not implemented for HuggingFace provider")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/providers/zhipu.py DELETED
@@ -1,52 +0,0 @@
1
- import os
2
- import httpx
3
- from .base import BaseLLMProvider
4
-
5
- class ZhipuFlashProvider(BaseLLMProvider):
6
- def __init__(self):
7
- self.api_key = os.getenv("ZHIPU_API_KEY")
8
- if not self.api_key:
9
- raise ValueError("ZHIPU_API_KEY not set")
10
- self.base_url = "https://open.bigmodel.cn/api/paas/v4/chat/completions"
11
-
12
- def __init__(self):
13
- self.api_key = os.getenv("ZHIPU_API_KEY")
14
- print(f"🔑 ZHIPU_API_KEY loaded: {'Yes' if self.api_key else 'NO!'}", flush=True)
15
- if not self.api_key:
16
- raise ValueError("ZHIPU_API_KEY not set!")
17
- self.base_url = "https://api.z.ai/api/paas/v4/chat/completions"
18
-
19
- async def generate(self, messages, **kwargs):
20
- try:
21
- async with httpx.AsyncClient() as client:
22
- payload = {
23
- "model": "glm-4.7-flash",
24
- "messages": messages,
25
- "max_tokens": kwargs.get("max_tokens", 1000),
26
- "temperature": kwargs.get("temperature", 0.8)
27
- }
28
- headers = {
29
- "Authorization": f"Bearer {self.api_key}",
30
- "Content-Type": "application/json"
31
- }
32
-
33
- print(f"🚀 Sending request to Zhipu: {payload}") # Диагностика
34
- resp = await client.post(self.base_url, json=payload, headers=headers, timeout=30.0)
35
- print(f"📥 Response status: {resp.status_code}") # Диагностика
36
- print(f"📥 Response body: {resp.text}") # Диагностика
37
-
38
- resp.raise_for_status()
39
- data = resp.json()
40
-
41
- return {
42
- "content": data["choices"][0]["message"]["content"],
43
- "total_tokens": data["usage"]["total_tokens"]
44
- }
45
- except Exception as e:
46
- print(f"💥 Error in Zhipu provider: {str(e)}") # Диагностика
47
- print(f"💥 Exception type: {type(e)}") # Диагностика
48
- raise # Пробрасываем дальше
49
-
50
- async def generate_stream(self, messages, **kwargs):
51
- # Для простоты пропускаем, но можно реализовать
52
- raise NotImplementedError("Streaming not yet implemented for Zhipu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,8 +1,6 @@
1
  fastapi==0.115.0
2
  uvicorn[standard]==0.30.0
3
- httpx==0.27.0
4
  pydantic==2.7.0
5
- python-multipart==0.0.9
6
- httpx==0.27.0
7
  python-dotenv==1.0.0
8
- openai>=1.0.0
 
 
1
  fastapi==0.115.0
2
  uvicorn[standard]==0.30.0
 
3
  pydantic==2.7.0
 
 
4
  python-dotenv==1.0.0
5
+ openai>=1.0.0
6
+ httpx==0.27.0