Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import os | |
| import json | |
| import fitz # PyMuPDF | |
| from pathlib import Path | |
| # --- 核心配置 --- | |
| OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY") | |
| MODEL_ID = "google/gemini-2.0-flash-001" | |
| # --- 你的专属 HTML 声明 (保持不变) --- | |
| INFO_HTML = """<div style="text-align: left; border-left: 4px solid #2196F3; padding-left: 15px; margin-bottom: 20px;"> | |
| <h3>MG TaxAI | 跨境财税合规实验室 (Beta)</h3> | |
| <p>本系统依托 <b>MG 核心智库</b> 构建...</p> | |
| </div>""" | |
| # --- 深度知识库检索引擎 (RAG) --- | |
| def get_knowledge_context(query): | |
| context_chunks = [] | |
| base_dirs = ["Treaties", "InvestmentGuide"] | |
| keywords = [word for word in query.split() if len(word) > 1] | |
| for folder in base_dirs: | |
| path = Path(folder) | |
| if not path.exists(): continue | |
| for pdf_file in path.rglob("*.pdf"): | |
| if any(kw.lower() in pdf_file.name.lower() for kw in keywords): | |
| try: | |
| with fitz.open(pdf_file) as doc: | |
| # 增加至前 3 页,获取更多上下文 | |
| text = "".join([page.get_text() for page in doc[:3]]) | |
| context_chunks.append(f"来自文件 [{pdf_file.name}]:\n{text}") | |
| except: | |
| continue | |
| # 修正:确保在遍历完所有文件夹后再返回 | |
| return "\n\n".join(context_chunks)[:6000] | |
| # --- API 专家级调用逻辑 --- | |
| def ask_ai(message, history): | |
| if not OPENROUTER_API_KEY: | |
| return "⚠️ 未检测到 API Key,请在 Space 的 Settings -> Secrets 中添加。" | |
| local_context = get_knowledge_context(message) | |
| # 强化版系统指令:加入避险逻辑和专业深度 | |
| system_instruction = """ | |
| 你是一位资深的 MG Consulting 国际税务专家级 AI。 | |
| 【核心准则】: | |
| 1. 专业性:优先引用参考知识库。若背景不足,基于 2025-2026 最新全球财税准则回答。 | |
| 2. 避险:在讨论行业趋势时,使用“大型咨询机构”或“核心智库”等统称,**严禁提及具体的国际会计师事务所名称**。 | |
| 3. 风格:直接进入分析,不进行冗长的自我介绍,使用 Markdown 格式(标题、列表、粗体)。 | |
| 4. 深度:分析需涵盖税种差异(Income Tax, VAT, Withholding Tax)及双边协定(DTA)影响。 | |
| """ | |
| messages = [{"role": "system", "content": system_instruction}] | |
| # Gradio 的 history 已经是 list of tuples | |
| for user_msg, assistant_msg in history: | |
| messages.append({"role": "user", "content": user_msg}) | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| current_input = f"【参考知识库】:\n{local_context}\n\n【用户咨询】:\n{message}" | |
| messages.append({"role": "user", "content": current_input}) | |
| payload = { | |
| "model": MODEL_ID, | |
| "messages": messages, | |
| "temperature": 0.2, | |
| "top_p": 0.9 | |
| } | |
| try: | |
| response = requests.post( | |
| "https://openrouter.ai/api/v1/chat/completions", | |
| headers={"Authorization": f"Bearer {OPENROUTER_API_KEY}", "Content-Type": "application/json"}, | |
| data=json.dumps(payload), | |
| timeout=60 | |
| ) | |
| if response.status_code == 200: | |
| return response.json()['choices'][0]['message']['content'] | |
| return f"❌ 接口响应异常 ({response.status_code})" | |
| except Exception as e: | |
| return f"💥 系统连接超时: {str(e)}" | |
| # --- 界面构建 --- | |
| with gr.Blocks(title="MG TaxAI Lab", fill_height=True) as demo: | |
| gr.HTML(INFO_HTML) | |
| gr.ChatInterface( | |
| fn=ask_ai, | |
| fill_height=True, | |
| retry_btn="🔄 重新生成", | |
| undo_btn="↩️ 撤回", | |
| clear_btn="🗑️ 清空", | |
| ) | |
| if __name__ == "__main__": | |
| # 强制监听所有 IP 且关闭 share(HF 内部不支持 share=True) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_error=True | |
| ) |