chatbot / app.py
MGGroup's picture
Update app.py
5dc4e7f verified
import gradio as gr
import requests
import os
import json
import fitz # PyMuPDF
from pathlib import Path
# --- 核心配置 ---
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY")
MODEL_ID = "google/gemini-2.0-flash-001"
# --- 你的专属 HTML 声明 (保持不变) ---
INFO_HTML = """<div style="text-align: left; border-left: 4px solid #2196F3; padding-left: 15px; margin-bottom: 20px;">
<h3>MG TaxAI | 跨境财税合规实验室 (Beta)</h3>
<p>本系统依托 <b>MG 核心智库</b> 构建...</p>
</div>"""
# --- 深度知识库检索引擎 (RAG) ---
def get_knowledge_context(query):
context_chunks = []
base_dirs = ["Treaties", "InvestmentGuide"]
keywords = [word for word in query.split() if len(word) > 1]
for folder in base_dirs:
path = Path(folder)
if not path.exists(): continue
for pdf_file in path.rglob("*.pdf"):
if any(kw.lower() in pdf_file.name.lower() for kw in keywords):
try:
with fitz.open(pdf_file) as doc:
# 增加至前 3 页,获取更多上下文
text = "".join([page.get_text() for page in doc[:3]])
context_chunks.append(f"来自文件 [{pdf_file.name}]:\n{text}")
except:
continue
# 修正:确保在遍历完所有文件夹后再返回
return "\n\n".join(context_chunks)[:6000]
# --- API 专家级调用逻辑 ---
def ask_ai(message, history):
if not OPENROUTER_API_KEY:
return "⚠️ 未检测到 API Key,请在 Space 的 Settings -> Secrets 中添加。"
local_context = get_knowledge_context(message)
# 强化版系统指令:加入避险逻辑和专业深度
system_instruction = """
你是一位资深的 MG Consulting 国际税务专家级 AI。
【核心准则】:
1. 专业性:优先引用参考知识库。若背景不足,基于 2025-2026 最新全球财税准则回答。
2. 避险:在讨论行业趋势时,使用“大型咨询机构”或“核心智库”等统称,**严禁提及具体的国际会计师事务所名称**。
3. 风格:直接进入分析,不进行冗长的自我介绍,使用 Markdown 格式(标题、列表、粗体)。
4. 深度:分析需涵盖税种差异(Income Tax, VAT, Withholding Tax)及双边协定(DTA)影响。
"""
messages = [{"role": "system", "content": system_instruction}]
# Gradio 的 history 已经是 list of tuples
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
current_input = f"【参考知识库】:\n{local_context}\n\n【用户咨询】:\n{message}"
messages.append({"role": "user", "content": current_input})
payload = {
"model": MODEL_ID,
"messages": messages,
"temperature": 0.2,
"top_p": 0.9
}
try:
response = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"},
data=json.dumps(payload),
timeout=60
)
if response.status_code == 200:
return response.json()['choices'][0]['message']['content']
return f"❌ 接口响应异常 ({response.status_code})"
except Exception as e:
return f"💥 系统连接超时: {str(e)}"
# --- 界面构建 ---
with gr.Blocks(title="MG TaxAI Lab", fill_height=True) as demo:
gr.HTML(INFO_HTML)
gr.ChatInterface(
fn=ask_ai,
fill_height=True,
retry_btn="🔄 重新生成",
undo_btn="↩️ 撤回",
clear_btn="🗑️ 清空",
)
if __name__ == "__main__":
# 强制监听所有 IP 且关闭 share(HF 内部不支持 share=True)
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
)