Upload 16 files
Browse files- agent.py +84 -0
- config.py +9 -0
- requirements.txt +15 -0
- tools/__init__.py +0 -0
- tools/chess_tool.py +22 -0
- tools/codegen_tool.py +24 -0
- tools/excel_tool.py +27 -0
- tools/gaia_tool.py +22 -0
- tools/grocery_tool.py +40 -0
- tools/group_tool.py +32 -0
- tools/string_tool.py +18 -0
- tools/web_search_tool.py +28 -0
- tools/wiki_tool.py +25 -0
- tools/youtube_species_tool.py +71 -0
- tools/youtube_tool.py +61 -0
- tools/youtube_transcription_tool.py +51 -0
agent.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# agent.py
|
| 2 |
+
import argparse
|
| 3 |
+
from langchain_openai.chat_models import ChatOpenAI
|
| 4 |
+
from langchain.agents import initialize_agent
|
| 5 |
+
from langchain.tools import Tool
|
| 6 |
+
from config import config
|
| 7 |
+
|
| 8 |
+
from tools.gaia_tool import GaiaQATool
|
| 9 |
+
from tools.wiki_tool import WikiSearchTool
|
| 10 |
+
from tools.youtube_species_tool import YouTubeSpeciesTool
|
| 11 |
+
from tools.youtube_transcription_tool import YouTubeTranscriptionTool
|
| 12 |
+
from tools.excel_tool import ExcelTool
|
| 13 |
+
from tools.codegen_tool import CodeGenTool
|
| 14 |
+
from tools.web_search_tool import WebSearchTool
|
| 15 |
+
from tools.string_tool import StringTool
|
| 16 |
+
from tools.chess_tool import ChessTool
|
| 17 |
+
from tools.group_tool import GroupTool
|
| 18 |
+
from tools.grocery_tool import GroceryTool
|
| 19 |
+
|
| 20 |
+
def main() -> None:
|
| 21 |
+
parser = argparse.ArgumentParser(description="Многофункциональный QA агент")
|
| 22 |
+
parser.add_argument(
|
| 23 |
+
"--input", required=True,
|
| 24 |
+
help="Вопрос, URL или команда для инструмента"
|
| 25 |
+
)
|
| 26 |
+
args = parser.parse_args()
|
| 27 |
+
user_input = args.input
|
| 28 |
+
|
| 29 |
+
# Инициализируем LLM
|
| 30 |
+
llm = ChatOpenAI(
|
| 31 |
+
model_name=config.OPENAI_MODEL,
|
| 32 |
+
openai_api_key=config.OPENAI_API_KEY,
|
| 33 |
+
temperature=0.0
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Создаём и сохраняем инстансы инструментов
|
| 37 |
+
gaia = GaiaQATool()
|
| 38 |
+
wiki = WikiSearchTool()
|
| 39 |
+
species = YouTubeSpeciesTool()
|
| 40 |
+
transcript = YouTubeTranscriptionTool()
|
| 41 |
+
excel = ExcelTool()
|
| 42 |
+
codegen = CodeGenTool()
|
| 43 |
+
web = WebSearchTool()
|
| 44 |
+
string_tool = StringTool()
|
| 45 |
+
chess = ChessTool()
|
| 46 |
+
group = GroupTool()
|
| 47 |
+
grocery = GroceryTool()
|
| 48 |
+
|
| 49 |
+
# Собираем список Tool(...) из инстансов
|
| 50 |
+
tools = [
|
| 51 |
+
Tool(name=gaia.name, func=gaia._run, description=gaia.description),
|
| 52 |
+
Tool(name=wiki.name, func=wiki._run, description=wiki.description),
|
| 53 |
+
Tool(name=species.name, func=species._run, description=species.description),
|
| 54 |
+
Tool(name=transcript.name, func=transcript._run, description=transcript.description),
|
| 55 |
+
Tool(name=excel.name, func=excel._run, description=excel.description),
|
| 56 |
+
Tool(name=codegen.name, func=codegen._run, description=codegen.description),
|
| 57 |
+
Tool(name=web.name, func=web._run, description=web.description),
|
| 58 |
+
Tool(name=string_tool.name, func=string_tool._run, description=string_tool.description),
|
| 59 |
+
Tool(name=chess.name, func=chess._run, description=chess.description),
|
| 60 |
+
Tool(name=group.name, func=group._run, description=group.description),
|
| 61 |
+
Tool(name=grocery.name, func=grocery._run, description=grocery.description),
|
| 62 |
+
]
|
| 63 |
+
|
| 64 |
+
# Инициализируем агента в режиме zero-shot-react-description
|
| 65 |
+
agent = initialize_agent(
|
| 66 |
+
tools=tools,
|
| 67 |
+
llm=llm,
|
| 68 |
+
agent="zero-shot-react-description",
|
| 69 |
+
verbose=True,
|
| 70 |
+
max_iterations=30, # <-- увеличили число итераций
|
| 71 |
+
early_stopping_method="force", # или "generate"
|
| 72 |
+
handle_parsing_errors=True # при ошибках парсинга пробует ещё раз
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
# Запускаем и получаем ответ
|
| 76 |
+
answer = agent.invoke(user_input)
|
| 77 |
+
print("\nОтвет:\n", answer)
|
| 78 |
+
|
| 79 |
+
if __name__ == "__main__":
|
| 80 |
+
main()
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
|
config.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# config.py
|
| 2 |
+
from easydict import EasyDict
|
| 3 |
+
|
| 4 |
+
# Пропишите ваш API-ключ OpenAI и используемую модель прямо здесь
|
| 5 |
+
config = EasyDict({
|
| 6 |
+
"OPENAI_API_KEY": "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",
|
| 7 |
+
"OPENAI_MODEL": "gpt-4.1",
|
| 8 |
+
"WHISPER_MODEL": "base",
|
| 9 |
+
})
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
langchain-openai~=0.3.14
|
| 2 |
+
langchain~=0.3.24
|
| 3 |
+
langchain-community~=0.3.22
|
| 4 |
+
pydantic~=2.11.3
|
| 5 |
+
chess~=1.11.2
|
| 6 |
+
pandas~=2.2.3
|
| 7 |
+
opencv-python~=4.11.0.86
|
| 8 |
+
torch~=2.7.0
|
| 9 |
+
torchvision~=0.22.0
|
| 10 |
+
pytube~=15.0.0
|
| 11 |
+
requests~=2.32.3
|
| 12 |
+
beautifulsoup4~=4.13.4
|
| 13 |
+
yt-dlp~=2025.3.31
|
| 14 |
+
openai-whisper~=20240930
|
| 15 |
+
easydict~=1.13
|
tools/__init__.py
ADDED
|
File without changes
|
tools/chess_tool.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/chess_tool.py
|
| 2 |
+
from langchain.tools import BaseTool
|
| 3 |
+
import chess
|
| 4 |
+
import chess.engine
|
| 5 |
+
|
| 6 |
+
class ChessTool(BaseTool):
|
| 7 |
+
"""
|
| 8 |
+
Инструмент для анализа шахматных позиций.
|
| 9 |
+
Принимает FEN-строку и возвращает лучший ход по UCI с помощью Stockfish.
|
| 10 |
+
"""
|
| 11 |
+
name: str = "chess_tool"
|
| 12 |
+
description: str = "Возвращает лучший ход в шахматной позиции (FEN) через Stockfish."
|
| 13 |
+
|
| 14 |
+
def _run(self, fen: str) -> str:
|
| 15 |
+
board = chess.Board(fen)
|
| 16 |
+
# Запускаем движок Stockfish (должен быть в PATH)
|
| 17 |
+
with chess.engine.SimpleEngine.popen_uci("stockfish") as engine:
|
| 18 |
+
result = engine.play(board, chess.engine.Limit(depth=20))
|
| 19 |
+
return result.move.uci()
|
| 20 |
+
|
| 21 |
+
async def _arun(self, fen: str) -> str:
|
| 22 |
+
raise NotImplementedError("Async not supported.")
|
tools/codegen_tool.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.tools import BaseTool
|
| 2 |
+
from langchain_openai.llms import OpenAI
|
| 3 |
+
from config import config
|
| 4 |
+
|
| 5 |
+
class CodeGenTool(BaseTool):
|
| 6 |
+
"""
|
| 7 |
+
Инструмент для генерации кода по текстовому описанию.
|
| 8 |
+
"""
|
| 9 |
+
name: str = "code_gen"
|
| 10 |
+
description: str = "Генерирует Python-код по спецификации."
|
| 11 |
+
|
| 12 |
+
def _run(self, spec: str) -> str:
|
| 13 |
+
llm = OpenAI(
|
| 14 |
+
model_name=config.OPENAI_MODEL,
|
| 15 |
+
openai_api_key=config.OPENAI_API_KEY,
|
| 16 |
+
temperature=0.2
|
| 17 |
+
)
|
| 18 |
+
prompt = f"Write Python code to: {spec}"
|
| 19 |
+
return llm.invoke(prompt)
|
| 20 |
+
|
| 21 |
+
async def _arun(self, spec: str) -> str:
|
| 22 |
+
raise NotImplementedError("Async not supported.")
|
| 23 |
+
|
| 24 |
+
|
tools/excel_tool.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.tools import BaseTool
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
class ExcelTool(BaseTool):
|
| 5 |
+
"""
|
| 6 |
+
Инструмент для выполнения простых запросов к Excel-файлам.
|
| 7 |
+
Формат команды: 'path.xlsx,sheet=Sheet1,query=mean(columnA)'
|
| 8 |
+
"""
|
| 9 |
+
name: str = "excel_query"
|
| 10 |
+
description: str = (
|
| 11 |
+
"Загружает .xlsx, выполняет заданный запрос Pandas и возвращает результат."
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
def _run(self, command: str) -> str:
|
| 15 |
+
path, rest = command.split(',', 1)
|
| 16 |
+
opts = dict(item.split('=') for item in rest.split(','))
|
| 17 |
+
df = pd.read_excel(path.strip(), sheet_name=opts.get('sheet','Sheet1').strip())
|
| 18 |
+
query = opts.get('query','').strip()
|
| 19 |
+
if query.startswith('mean(') and query.endswith(')'):
|
| 20 |
+
col = query[5:-1]
|
| 21 |
+
return str(df[col].mean())
|
| 22 |
+
return df.to_string(index=False)
|
| 23 |
+
|
| 24 |
+
async def _arun(self, command: str) -> str:
|
| 25 |
+
raise NotImplementedError("Async not supported.")
|
| 26 |
+
|
| 27 |
+
|
tools/gaia_tool.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain_openai.llms import OpenAI
|
| 2 |
+
from langchain.tools import BaseTool
|
| 3 |
+
from config import config
|
| 4 |
+
|
| 5 |
+
class GaiaQATool(BaseTool):
|
| 6 |
+
"""
|
| 7 |
+
Инструмент для ответов на вопросы GAIA Level 1.
|
| 8 |
+
"""
|
| 9 |
+
name: str = "gaia_level1_qa"
|
| 10 |
+
description: str = "Отвечает на GAIA Level1 вопросы."
|
| 11 |
+
|
| 12 |
+
def _run(self, question: str) -> str:
|
| 13 |
+
llm = OpenAI(
|
| 14 |
+
model_name=config.OPENAI_MODEL,
|
| 15 |
+
openai_api_key=config.OPENAI_API_KEY,
|
| 16 |
+
temperature=0
|
| 17 |
+
)
|
| 18 |
+
prompt = f"Answer GAIA Level 1 question: {question}"
|
| 19 |
+
return llm.invoke(prompt)
|
| 20 |
+
|
| 21 |
+
async def _arun(self, question: str) -> str:
|
| 22 |
+
raise NotImplementedError("Async not supported.")
|
tools/grocery_tool.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/grocery_tool.py
|
| 2 |
+
from langchain.tools import BaseTool
|
| 3 |
+
from typing import List
|
| 4 |
+
|
| 5 |
+
def is_vegetable(item: str) -> bool:
|
| 6 |
+
vegs = {
|
| 7 |
+
"broccoli",
|
| 8 |
+
"bell pepper",
|
| 9 |
+
"celery",
|
| 10 |
+
"corn",
|
| 11 |
+
"green beans",
|
| 12 |
+
"lettuce",
|
| 13 |
+
"peanuts",
|
| 14 |
+
"rice",
|
| 15 |
+
"zucchini"
|
| 16 |
+
}
|
| 17 |
+
return item.lower() in vegs
|
| 18 |
+
|
| 19 |
+
class GroceryTool(BaseTool):
|
| 20 |
+
"""
|
| 21 |
+
Инструмент для выделения овощей или фруктов из списка.
|
| 22 |
+
Использовать префиксы 'vegetables:' или 'fruits:' в команде.
|
| 23 |
+
"""
|
| 24 |
+
name: str = "grocery_tool"
|
| 25 |
+
description: str = "Выделяет из списка овощи по префиксу 'vegetables:'."
|
| 26 |
+
|
| 27 |
+
def _run(self, command: str) -> str:
|
| 28 |
+
cmd = command.strip()
|
| 29 |
+
if cmd.lower().startswith('vegetables:'):
|
| 30 |
+
items = [x.strip() for x in cmd[len('vegetables:'):].split(',')]
|
| 31 |
+
veg_list: List[str] = [i for i in items if is_vegetable(i)]
|
| 32 |
+
return ','.join(sorted(veg_list))
|
| 33 |
+
elif cmd.lower().startswith('fruits:'):
|
| 34 |
+
# При необходимости можно добавить проверку фруктов
|
| 35 |
+
return "Фруктовый режим пока не реализован."
|
| 36 |
+
else:
|
| 37 |
+
return f"Неподдерживаемая категория: {command}"
|
| 38 |
+
|
| 39 |
+
async def _arun(self, command: str) -> str:
|
| 40 |
+
raise NotImplementedError("Async not supported.")
|
tools/group_tool.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/group_tool.py
|
| 2 |
+
from langchain.tools import BaseTool
|
| 3 |
+
|
| 4 |
+
class GroupTool(BaseTool):
|
| 5 |
+
"""
|
| 6 |
+
Инструмент для поиска контрпримеров некоммутативности в заданной таблице операции.
|
| 7 |
+
"""
|
| 8 |
+
name: str = "group_tool"
|
| 9 |
+
description: str = "Находит элементы, участвующие в контр-примерах некоммутативности."
|
| 10 |
+
|
| 11 |
+
def _run(self, table: str) -> str:
|
| 12 |
+
# Парсим строки таблицы вида Markdown
|
| 13 |
+
lines = [l for l in table.splitlines() if '|' in l]
|
| 14 |
+
# Заголовок: имена элементов
|
| 15 |
+
elems = [x.strip() for x in lines[0].split('|')[1:-1]]
|
| 16 |
+
# Матрица значений
|
| 17 |
+
mat = []
|
| 18 |
+
for row in lines[1:]:
|
| 19 |
+
vals = [x.strip() for x in row.split('|')[1:-1]]
|
| 20 |
+
mat.append(vals)
|
| 21 |
+
# Собираем пары (a,b), где a*b != b*a
|
| 22 |
+
bad = set()
|
| 23 |
+
for i, a in enumerate(elems):
|
| 24 |
+
for j, b in enumerate(elems):
|
| 25 |
+
if mat[i][j] != mat[j][i]:
|
| 26 |
+
bad.add(a)
|
| 27 |
+
bad.add(b)
|
| 28 |
+
# Возвращаем элементы в алфавитном порядке
|
| 29 |
+
return ','.join(sorted(bad))
|
| 30 |
+
|
| 31 |
+
async def _arun(self, table: str) -> str:
|
| 32 |
+
raise NotImplementedError("Async not supported.")
|
tools/string_tool.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tools/string_tool.py
|
| 2 |
+
from langchain.tools import BaseTool
|
| 3 |
+
|
| 4 |
+
class StringTool(BaseTool):
|
| 5 |
+
"""
|
| 6 |
+
Операции над строками: реверс текста по префиксу 'reverse:'.
|
| 7 |
+
"""
|
| 8 |
+
name: str = "string_tool"
|
| 9 |
+
description: str = "Реверсирует часть строки после 'reverse:'."
|
| 10 |
+
|
| 11 |
+
def _run(self, text: str) -> str:
|
| 12 |
+
if text.lower().startswith('reverse:'):
|
| 13 |
+
s = text[len('reverse:'):].strip()
|
| 14 |
+
return s[::-1]
|
| 15 |
+
return f"Неизвестная операция: {text}"
|
| 16 |
+
|
| 17 |
+
async def _arun(self, text: str) -> str:
|
| 18 |
+
raise NotImplementedError("Async not supported.")
|
tools/web_search_tool.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.tools import BaseTool
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
|
| 5 |
+
class WebSearchTool(BaseTool):
|
| 6 |
+
"""
|
| 7 |
+
Инструмент для простого веб-поиска (Google) — возвращает топ-5 результатов.
|
| 8 |
+
"""
|
| 9 |
+
name: str = "web_search"
|
| 10 |
+
description: str = "Выполняет поиск Google и возвращает заголовки и ссылки."
|
| 11 |
+
|
| 12 |
+
def _run(self, query: str) -> str:
|
| 13 |
+
resp = requests.get(
|
| 14 |
+
'https://www.google.com/search',
|
| 15 |
+
params={'q': query},
|
| 16 |
+
headers={'User-Agent': 'Mozilla/5.0'}
|
| 17 |
+
)
|
| 18 |
+
soup = BeautifulSoup(resp.text, 'html.parser')
|
| 19 |
+
results = []
|
| 20 |
+
for g in soup.select('div.yuRUbf')[:5]:
|
| 21 |
+
a = g.a
|
| 22 |
+
results.append(f"{a.text} - {a['href']}")
|
| 23 |
+
return "\n".join(results)
|
| 24 |
+
|
| 25 |
+
async def _arun(self, query: str) -> str:
|
| 26 |
+
raise NotImplementedError("Async not supported.")
|
| 27 |
+
|
| 28 |
+
|
tools/wiki_tool.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.tools import BaseTool
|
| 2 |
+
from langchain_community.utilities import WikipediaAPIWrapper
|
| 3 |
+
from pydantic import PrivateAttr
|
| 4 |
+
|
| 5 |
+
class WikiSearchTool(BaseTool):
|
| 6 |
+
"""
|
| 7 |
+
Инструмент для поиска в English Wikipedia.
|
| 8 |
+
"""
|
| 9 |
+
name: str = "wiki_search"
|
| 10 |
+
description: str = "Краткая выжимка статьи из English Wikipedia по запросу."
|
| 11 |
+
_wrapper: WikipediaAPIWrapper = PrivateAttr()
|
| 12 |
+
|
| 13 |
+
def __init__(self) -> None:
|
| 14 |
+
super().__init__()
|
| 15 |
+
self._wrapper = WikipediaAPIWrapper(lang="en")
|
| 16 |
+
|
| 17 |
+
def _run(self, query: str) -> str:
|
| 18 |
+
return self._wrapper.run(query)
|
| 19 |
+
|
| 20 |
+
async def _arun(self, query: str) -> str:
|
| 21 |
+
raise NotImplementedError("Async not supported.")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
tools/youtube_species_tool.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.tools import BaseTool
|
| 2 |
+
import cv2
|
| 3 |
+
import tempfile
|
| 4 |
+
import os
|
| 5 |
+
import torch
|
| 6 |
+
import torchvision.transforms as T
|
| 7 |
+
from torchvision.models.detection import fasterrcnn_resnet50_fpn
|
| 8 |
+
import re
|
| 9 |
+
import yt_dlp
|
| 10 |
+
|
| 11 |
+
class YouTubeSpeciesTool(BaseTool):
|
| 12 |
+
"""
|
| 13 |
+
Инструмент для анализа видео YouTube: находит максимальное число птиц в кадре.
|
| 14 |
+
Скачивает видео через yt-dlp и детектирует птиц Faster R-CNN.
|
| 15 |
+
"""
|
| 16 |
+
name: str = "youtube_species"
|
| 17 |
+
description: str = (
|
| 18 |
+
"Скачивает видео по URL, детектирует птиц с помощью Faster R-CNN "
|
| 19 |
+
"и возвращает максимальное число птиц в кадре."
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
def _run(self, text: str) -> str:
|
| 23 |
+
# 1) Извлекаем URL из произвольного текста
|
| 24 |
+
m = re.search(r"(https?://[^\s,]+)", text)
|
| 25 |
+
if not m:
|
| 26 |
+
return "Ошибка: не найден корректный URL."
|
| 27 |
+
url = m.group(1)
|
| 28 |
+
|
| 29 |
+
# 2) Скачиваем видео через yt-dlp
|
| 30 |
+
temp_dir = tempfile.mkdtemp()
|
| 31 |
+
ydl_opts = {
|
| 32 |
+
"outtmpl": os.path.join(temp_dir, "%(id)s.%(ext)s"),
|
| 33 |
+
"format": "mp4",
|
| 34 |
+
}
|
| 35 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 36 |
+
info = ydl.extract_info(url, download=True)
|
| 37 |
+
video_path = os.path.join(temp_dir, f"{info['id']}.mp4")
|
| 38 |
+
|
| 39 |
+
# 3) Готовим модель
|
| 40 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 41 |
+
model = fasterrcnn_resnet50_fpn(pretrained=True).to(device)
|
| 42 |
+
model.eval()
|
| 43 |
+
transform = T.Compose([T.ToTensor()])
|
| 44 |
+
|
| 45 |
+
# 4) Обрабатываем кадры
|
| 46 |
+
cap = cv2.VideoCapture(video_path)
|
| 47 |
+
max_count = 0
|
| 48 |
+
while True:
|
| 49 |
+
ret, frame = cap.read()
|
| 50 |
+
if not ret:
|
| 51 |
+
break
|
| 52 |
+
img = transform(frame).to(device)
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
out = model([img])[0]
|
| 55 |
+
labels = out["labels"].cpu().numpy()
|
| 56 |
+
# COCO class 15 = 'bird'
|
| 57 |
+
count = int((labels == 15).sum())
|
| 58 |
+
max_count = max(max_count, count)
|
| 59 |
+
cap.release()
|
| 60 |
+
|
| 61 |
+
# 5) Удаляем временные файлы
|
| 62 |
+
os.remove(video_path)
|
| 63 |
+
os.rmdir(temp_dir)
|
| 64 |
+
return f"Максимальное число птиц в кадре: {max_count}"
|
| 65 |
+
|
| 66 |
+
async def _arun(self, text: str) -> str:
|
| 67 |
+
raise NotImplementedError("Async not supported.")
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
tools/youtube_tool.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# tools/youtube_tool.py
|
| 3 |
+
from langchain.tools import BaseTool
|
| 4 |
+
from pytube import YouTube
|
| 5 |
+
import cv2
|
| 6 |
+
import tempfile
|
| 7 |
+
import os
|
| 8 |
+
import torch
|
| 9 |
+
import torchvision.transforms as T
|
| 10 |
+
from torchvision.models.detection import fasterrcnn_resnet50_fpn
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class YouTubeSpeciesTool(BaseTool):
|
| 14 |
+
"""
|
| 15 |
+
Инструмент для анализа видео YouTube: находит максимальное число видов птиц в кадре.
|
| 16 |
+
Использует предобученную модель Faster R-CNN на COCO для детекции птиц (COCO class 15).
|
| 17 |
+
"""
|
| 18 |
+
name: str = "youtube_species"
|
| 19 |
+
description: str = (
|
| 20 |
+
"Принимает URL видео YouTube, скачивает его, "
|
| 21 |
+
"анализирует кадры и возвращает максимальное число видов птиц, "
|
| 22 |
+
"попавших в кадр одновременно."
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
def _run(self, url: str) -> str:
|
| 26 |
+
# Загружаем видео
|
| 27 |
+
yt = YouTube(url)
|
| 28 |
+
stream = yt.streams.filter(file_extension='mp4', progressive=True).first()
|
| 29 |
+
temp_dir = tempfile.mkdtemp()
|
| 30 |
+
video_path = stream.download(output_path=temp_dir)
|
| 31 |
+
|
| 32 |
+
# Загрузка модели
|
| 33 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 34 |
+
model = fasterrcnn_resnet50_fpn(pretrained=True).to(device)
|
| 35 |
+
model.eval()
|
| 36 |
+
transform = T.Compose([T.ToTensor()])
|
| 37 |
+
|
| 38 |
+
cap = cv2.VideoCapture(video_path)
|
| 39 |
+
max_species = 0
|
| 40 |
+
while True:
|
| 41 |
+
ret, frame = cap.read()
|
| 42 |
+
if not ret:
|
| 43 |
+
break
|
| 44 |
+
# Преобразуем и передаем в модель
|
| 45 |
+
img = transform(frame).to(device)
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
outputs = model([img])[0]
|
| 48 |
+
# Метки классов
|
| 49 |
+
labels = outputs['labels'].cpu().numpy()
|
| 50 |
+
# COCO class 15 соответствует 'bird'
|
| 51 |
+
count = (labels == 15).sum()
|
| 52 |
+
max_species = max(max_species, int(count))
|
| 53 |
+
cap.release()
|
| 54 |
+
|
| 55 |
+
# Очистка временных файлов
|
| 56 |
+
os.remove(video_path)
|
| 57 |
+
os.rmdir(temp_dir)
|
| 58 |
+
return f"Максимальное число видов птиц в кадре: {max_species}"
|
| 59 |
+
|
| 60 |
+
async def _arun(self, url: str) -> str:
|
| 61 |
+
raise NotImplementedError("Async not supported.")
|
tools/youtube_transcription_tool.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.tools import BaseTool
|
| 2 |
+
import whisper
|
| 3 |
+
import tempfile
|
| 4 |
+
import os
|
| 5 |
+
import re
|
| 6 |
+
import yt_dlp
|
| 7 |
+
from config import config
|
| 8 |
+
|
| 9 |
+
class YouTubeTranscriptionTool(BaseTool):
|
| 10 |
+
"""
|
| 11 |
+
Инструмент для транскрипции аудио из видео YouTube.
|
| 12 |
+
Скачивает аудио через yt-dlp, обрабатывает Whisper и возвращает текст.
|
| 13 |
+
"""
|
| 14 |
+
name: str = "youtube_transcribe"
|
| 15 |
+
description: str = (
|
| 16 |
+
"Скачивает аудио из YouTube по URL, транскрибирует через Whisper и возвращает текст."
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
def _run(self, text: str) -> str:
|
| 20 |
+
# 1) Извлекаем URL
|
| 21 |
+
m = re.search(r"(https?://[^\s,]+)", text)
|
| 22 |
+
if not m:
|
| 23 |
+
return "Ошибка: не найден корректный URL."
|
| 24 |
+
url = m.group(1)
|
| 25 |
+
|
| 26 |
+
# 2) Скачиваем аудио через yt-dlp
|
| 27 |
+
temp_dir = tempfile.mkdtemp()
|
| 28 |
+
ydl_opts = {
|
| 29 |
+
"outtmpl": os.path.join(temp_dir, "%(id)s.%(ext)s"),
|
| 30 |
+
"format": "bestaudio[ext=m4a]",
|
| 31 |
+
"quiet": True,
|
| 32 |
+
}
|
| 33 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 34 |
+
info = ydl.extract_info(url, download=True)
|
| 35 |
+
audio_path = os.path.join(temp_dir, f"{info['id']}.m4a")
|
| 36 |
+
|
| 37 |
+
# 3) Транскрибируем через Whisper
|
| 38 |
+
model = whisper.load_model(config.WHISPER_MODEL)
|
| 39 |
+
result = model.transcribe(audio_path)
|
| 40 |
+
|
| 41 |
+
# 4) Чистим
|
| 42 |
+
os.remove(audio_path)
|
| 43 |
+
os.rmdir(temp_dir)
|
| 44 |
+
return result["text"].strip()
|
| 45 |
+
|
| 46 |
+
async def _arun(self, text: str) -> str:
|
| 47 |
+
raise NotImplementedError("Async not supported.")
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
|