810proj / app.py
jscmp4's picture
Update app.py
d6cad76 verified
import os
import json
import re
import requests
import traceback
import pandas as pd
from datetime import datetime
from flask import Flask, request, jsonify, render_template_string
from openai import OpenAI
from pymongo import MongoClient
from difflib import SequenceMatcher
# ==========================================
# 1. 配置与连接
# ==========================================
# 数据库必须由你提供,不能让用户填
MONGO_USER = os.environ.get("MONGO_USER")
MONGO_PASS = os.environ.get("MONGO_PASS")
MONGO_CLUSTER = os.environ.get("MONGO_CLUSTER", "cluster0.mh3esar.mongodb.net")
# 系统默认 Key (可选:如果你想给用户免费试用,可以配置这个;否则留空)
SYSTEM_OPENAI_KEY = os.environ.get("OPENAI_API_KEY")
# 初始化 MongoDB (这是全局共享的,不用改)
try:
uri = f"mongodb+srv://{MONGO_USER}:{MONGO_PASS}@{MONGO_CLUSTER}/?retryWrites=true&w=majority"
mongo_client = MongoClient(uri, serverSelectionTimeoutMS=5000)
db = mongo_client["proj810_db"]
collection = db["rankings"]
print("✅ 数据库连接成功")
except Exception as e:
print(f"❌ 数据库连接失败: {e}")
# ==========================================
# 2. 工具定义 (无状态,不需要 Key)
# ==========================================
def fetch_metadata(query):
"""通过 OpenAlex 获取 DOI 元数据"""
if "10." not in query: return "Invalid DOI"
try:
url = f"https://api.openalex.org/works/doi:{query}"
r = requests.get(url, timeout=10)
if r.status_code == 200:
d = r.json()
if not d: return "OpenAlex: Empty Response"
src = d.get('primary_location') or {}
source = src.get('source') or {}
pub = source.get('host_organization_name') or source.get('publisher')
name = source.get('display_name')
if not name and d.get('locations'):
for loc in d.get('locations'):
s = (loc.get('source') or {})
if s.get('display_name'):
name = s.get('display_name')
pub = s.get('host_organization_name') or s.get('publisher')
break
type_ = d.get('type')
if name:
return json.dumps({"journal": name, "publisher": pub, "type": type_})
return json.dumps({"journal": "Unknown Source", "publisher": pub or "Unknown", "type": type_, "note": "Source name not found"})
elif r.status_code == 404:
return "OpenAlex: DOI Not Found"
else:
return f"OpenAlex Error: {r.status_code}"
except Exception as e:
return f"Metadata Error: {str(e)}"
def check_ranking(journal_name):
"""查询数据库中的期刊排名"""
try:
if not journal_name: return "Error: Empty Name"
clean = journal_name.replace('"', '').replace("'", "").strip()
safe = re.escape(clean)
res = collection.find_one({"Title": {"$regex": f"^{safe}$", "$options": "i"}})
# 模糊匹配逻辑
if not res:
stopwords = ["the", "of", "and", "in", "on", "for", "journal", "international", "proceedings"]
words = [w for w in re.split(r'[^a-zA-Z]+', clean.lower()) if len(w) > 3 and w not in stopwords]
if len(words) >= 1:
longest_word = max(words, key=len)
candidates = collection.find({"Title": {"$regex": longest_word, "$options": "i"}}).limit(20)
best_score = 0
best_match = None
for cand in candidates:
score = SequenceMatcher(None, clean.lower(), cand['Title'].lower()).ratio()
if score > 0.85 and score > best_score:
best_score = score
best_match = cand
if best_match: res = best_match
if res:
keys = res.keys()
docs_col = next((k for k in keys if "Total Docs" in k), "Total Docs")
cit_col = next((k for k in keys if "Citations / Doc" in k), "Citations / Doc")
def get_safe_val(key, default="-"):
val = res.get(key, default)
if val == default: return default
try:
if isinstance(val, str): val = val.replace(',', '')
return float(val)
except: return val
try:
gp = str(res.get('Global_Percentile', '0')).replace('%', '')
gp_val = float(gp)
rank_str = f"Top {100 - gp_val:.1f}%" if gp_val > 0 else "N/A"
except: rank_str = "N/A"
quartile = res.get("SJR Best Quartile", "-")
if (not quartile or quartile == "-") and rank_str != "N/A":
try:
top_percent = float(rank_str.replace("Top ", "").replace("%", ""))
if top_percent <= 25: quartile = "Q1 (Implied)"
elif top_percent <= 50: quartile = "Q2 (Implied)"
elif top_percent <= 75: quartile = "Q3 (Implied)"
else: quartile = "Q4 (Implied)"
except: pass
return json.dumps({
"Title": res.get("Title"),
"Quartile": quartile,
"SJR": get_safe_val("SJR"),
"H_Index": get_safe_val("H index"),
"Total_Docs": get_safe_val(docs_col),
"Citations_Per_Doc": get_safe_val(cit_col),
"Publisher": res.get("Publisher", "Unknown"),
"Global_Rank": rank_str,
"Categories": res.get("Categories", "")
})
return f"DB: Not Found (Cleaned: {clean})"
except Exception as e:
return f"DB Error: {str(e)}"
tools_schema = [
{"type": "function", "function": {"name": "fetch_metadata", "description": "Get journal name from DOI.", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}},
{"type": "function", "function": {"name": "check_ranking", "description": "Check journal metrics in DB.", "parameters": {"type": "object", "properties": {"journal_name": {"type": "string"}}, "required": ["journal_name"]}}}
]
# ==========================================
# 3. Agent 核心 (动态 Client)
# ==========================================
def run_agent_with_logs(user_input, api_key):
logs = []
# 1. 检查 Key
# 优先使用用户输入的 Key,如果没输,尝试使用系统的 Key
active_key = api_key if api_key else SYSTEM_OPENAI_KEY
if not active_key:
return "❌ **Error**: No API Key provided. Please enter your OpenAI API Key.", ["❌ Error: Missing API Key"]
# 2. 动态初始化 Client
try:
client = OpenAI(api_key=active_key)
except Exception as e:
return f"❌ **Error**: Invalid API Key format. {str(e)}", ["❌ Client Init Failed"]
# 3. System Prompt (包含 MISQ 兜底逻辑)
system_prompt = """
You are an Expert Journal Authority Auditor.
YOUR GOAL: Verify the quality of academic venues using the `check_ranking` tool AND your own domain knowledge.
### 🧠 CRITICAL THINKING PROCESS:
1. **Analyze Input**: Normalize acronyms (e.g., "MISQ" -> "MIS Quarterly").
2. **Check DB**: Call `check_ranking` first.
3. **Evaluate Result**:
- If DB returns Q1/Top Tier -> Report it.
- If DB returns "Not Found" or low rank, BUT you know this is a prestigious venue (e.g., FT50, CCF-A, AIS Senior Scholars' Basket, Nature/Science family) -> **YOU MUST OVERRIDE/SUPPLEMENT**.
### 📝 REPORT FORMAT (Strict Markdown)
**Situation 1: Found in DB (Standard)**
| Metric | Value | Status |
| :--- | :--- | :--- |
| 📖 Venue | [Title] | - |
| 🏆 Quartile | [Q1/Q2...] | [✅/⚠️] |
| 📉 SJR | [Value] | - |
**Situation 2: Not in DB / Low Rank BUT Prestigious (The "MIS Quarterly" Case)**
> ⚠️ **Database Note**: Not found in current SJR index (or name mismatch).
### 🧠 AI Domain Knowledge Supplement
* **Prestige Status**: [e.g., "Undisputed Top Tier", "FT50 Listed", "AIS Basket of 8"]
* **Consensus**: [Explain WHY it is top tier despite missing data.]
* **Verdict**: ✅ **Highly Recommended** (Based on Domain Reputation)
**Situation 3: Not in DB & Not Famous**
> ⚠️ **Notice**: "[Input]" is not ranked and lacks broad recognition. Proceed with caution.
"""
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_input}]
logs.append(f"🧠 System: Analyzing '{user_input}'...")
turn_count = 0
try:
while turn_count < 4:
turn_count += 1
logs.append(f"🤔 Step {turn_count}: Thinking...")
resp = client.chat.completions.create(
model="gpt-4o",
messages=messages,
tools=tools_schema,
temperature=0
)
msg = resp.choices[0].message
messages.append(msg)
if not msg.tool_calls:
logs.append("📝 Report generated.")
return msg.content, logs
for tc in msg.tool_calls:
fname = tc.function.name
args_str = tc.function.arguments
try:
args = json.loads(args_str)
logs.append(f"🔍 Checking: {args.get('journal_name') or args.get('query')}")
if fname == "fetch_metadata":
res = fetch_metadata(args.get("query"))
elif fname == "check_ranking":
res = check_ranking(args.get("journal_name"))
else:
res = "Error: Unknown Tool"
except Exception as e:
res = f"Tool Error: {str(e)}"
logs.append(f"✅ Result: {str(res)[:80]}...")
messages.append({
"tool_call_id": tc.id,
"role": "tool",
"name": fname,
"content": str(res)
})
return "⚠️ Timeout: Analysis too complex.", logs
except Exception as e:
print(traceback.format_exc())
return f"**API Error**: {str(e)} (Please check your Key)", logs
# ==========================================
# 4. Flask Web Server (含 API Key 输入框)
# ==========================================
app = Flask(__name__)
CHAT_HTML = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Journal Authority Auditor</title>
<style>
body { font-family: 'Segoe UI', system-ui; background: #2c3e50; display: flex; justify-content: center; height: 100vh; margin: 0; }
.container { display: flex; width: 95%; max-width: 1400px; height: 95vh; gap: 20px; margin-top: 2.5vh; }
.chat-panel { flex: 1.2; background: #f4f7f6; border-radius: 12px; display: flex; flex-direction: column; overflow: hidden; }
.header { background: #34495e; color: white; padding: 15px; font-weight: bold; display: flex; justify-content: space-between; align-items: center;}
.messages-area { flex: 1; padding: 20px; overflow-y: auto; display: flex; flex-direction: column; gap: 15px; }
.message { max-width: 85%; padding: 14px; border-radius: 12px; line-height: 1.6; }
.bot { background: white; align-self: flex-start; border-left: 4px solid #3498db; }
.user { background: #3498db; color: white; align-self: flex-end; }
/* 顶部 Key 输入栏 */
.key-bar { background: #ecf0f1; padding: 10px 20px; border-bottom: 1px solid #ddd; display: flex; gap: 10px; align-items: center; font-size: 0.9rem;}
.key-input { border: 1px solid #bdc3c7; padding: 5px 10px; border-radius: 4px; width: 200px; font-family: monospace; }
.input-form { padding: 20px; background: white; display: flex; gap: 10px; }
input[type="text"] { flex: 1; padding: 10px; border-radius: 8px; border: 1px solid #ccc; }
button { padding: 10px 20px; background: #2c3e50; color: white; border: none; border-radius: 8px; cursor: pointer; }
button:hover { background: #1a252f; }
.brain-panel { flex: 0.8; background: #1e272e; border-radius: 12px; color: #0fb9b1; padding: 15px; font-family: monospace; overflow-y: auto; }
table { width: 100%; border-collapse: collapse; margin: 10px 0; background: white; }
th, td { border: 1px solid #ddd; padding: 8px; }
blockquote { border-left: 4px solid #f1c40f; margin: 5px 0; padding: 10px; background: #fffbf0; }
</style>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
</head>
<body>
<div class="container">
<div class="chat-panel">
<div class="header">
<span>🛡️ Journal Auditor</span>
</div>
<div class="key-bar">
<span>🔑 OpenAI Key (Optional):</span>
<input type="password" id="user-key" class="key-input" placeholder="sk-..." autocomplete="new-password">
<span style="color: #7f8c8d; font-size: 0.8em;">(Leave empty to use System Key if available)</span>
</div>
<div class="messages-area" id="chat-box"><div class="message bot">System Online. Enter a DOI or Journal Name.</div></div>
<form class="input-form" onsubmit="event.preventDefault(); send();">
<input type="text" id="inp" placeholder="Type here..." autocomplete="off">
<button id="btn-send" onclick="send()">Analyze</button>
</form>
</div>
<div class="brain-panel" id="log-box"><div>TERMINAL LOG...</div></div>
</div>
<script>
async function send(){
let i = document.getElementById('inp');
let k = document.getElementById('user-key'); // 获取 Key
let btn = document.getElementById('btn-send');
let txt = i.value.trim();
let apiKey = k.value.trim();
if(!txt) return;
i.disabled = true; btn.disabled = true; btn.innerText = "...";
addMsg(txt, 'user'); i.value = '';
let logsDiv = document.getElementById('log-box');
try {
let r = await fetch('/chat', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({
message: txt,
api_key: apiKey // 发送 Key 给后端
})
});
let d = await r.json();
if(d.logs) {
d.logs.forEach(l => {
let div = document.createElement('div');
div.innerText = l;
div.style.marginBottom = "5px";
div.style.borderLeft = "2px solid #555";
logsDiv.appendChild(div);
});
logsDiv.scrollTop = logsDiv.scrollHeight;
}
addMsg(d.reply || "Error", 'bot', true);
} catch(e) {
addMsg("Server Error: " + e, 'bot');
} finally {
i.disabled = false; btn.disabled = false; btn.innerText = "Analyze"; i.focus();
}
}
function addMsg(txt, cls, html=false){
let d = document.createElement('div');
d.className = 'message ' + cls;
if(html) d.innerHTML = marked.parse(txt); else d.innerText = txt;
let box = document.getElementById('chat-box');
box.appendChild(d);
box.scrollTop = box.scrollHeight;
}
</script></body></html>
"""
@app.route('/')
def index(): return render_template_string(CHAT_HTML)
@app.route('/chat', methods=['POST'])
def chat():
try:
data = request.json
# 从前端获取 user_key
user_key = data.get('api_key', '').strip()
message = data.get('message', '')
reply, logs = run_agent_with_logs(message, user_key)
return jsonify({"reply": reply, "logs": logs})
except Exception as e:
return jsonify({"reply": f"Error: {str(e)}", "logs": []})
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860)