import os import json import re import requests import traceback import pandas as pd from datetime import datetime from flask import Flask, request, jsonify, render_template_string from openai import OpenAI from pymongo import MongoClient from difflib import SequenceMatcher # ========================================== # 1. 配置与连接 # ========================================== # 数据库必须由你提供,不能让用户填 MONGO_USER = os.environ.get("MONGO_USER") MONGO_PASS = os.environ.get("MONGO_PASS") MONGO_CLUSTER = os.environ.get("MONGO_CLUSTER", "cluster0.mh3esar.mongodb.net") # 系统默认 Key (可选:如果你想给用户免费试用,可以配置这个;否则留空) SYSTEM_OPENAI_KEY = os.environ.get("OPENAI_API_KEY") # 初始化 MongoDB (这是全局共享的,不用改) try: uri = f"mongodb+srv://{MONGO_USER}:{MONGO_PASS}@{MONGO_CLUSTER}/?retryWrites=true&w=majority" mongo_client = MongoClient(uri, serverSelectionTimeoutMS=5000) db = mongo_client["proj810_db"] collection = db["rankings"] print("✅ 数据库连接成功") except Exception as e: print(f"❌ 数据库连接失败: {e}") # ========================================== # 2. 工具定义 (无状态,不需要 Key) # ========================================== def fetch_metadata(query): """通过 OpenAlex 获取 DOI 元数据""" if "10." not in query: return "Invalid DOI" try: url = f"https://api.openalex.org/works/doi:{query}" r = requests.get(url, timeout=10) if r.status_code == 200: d = r.json() if not d: return "OpenAlex: Empty Response" src = d.get('primary_location') or {} source = src.get('source') or {} pub = source.get('host_organization_name') or source.get('publisher') name = source.get('display_name') if not name and d.get('locations'): for loc in d.get('locations'): s = (loc.get('source') or {}) if s.get('display_name'): name = s.get('display_name') pub = s.get('host_organization_name') or s.get('publisher') break type_ = d.get('type') if name: return json.dumps({"journal": name, "publisher": pub, "type": type_}) return json.dumps({"journal": "Unknown Source", "publisher": pub or "Unknown", "type": type_, "note": "Source name not found"}) elif r.status_code == 404: return "OpenAlex: DOI Not Found" else: return f"OpenAlex Error: {r.status_code}" except Exception as e: return f"Metadata Error: {str(e)}" def check_ranking(journal_name): """查询数据库中的期刊排名""" try: if not journal_name: return "Error: Empty Name" clean = journal_name.replace('"', '').replace("'", "").strip() safe = re.escape(clean) res = collection.find_one({"Title": {"$regex": f"^{safe}$", "$options": "i"}}) # 模糊匹配逻辑 if not res: stopwords = ["the", "of", "and", "in", "on", "for", "journal", "international", "proceedings"] words = [w for w in re.split(r'[^a-zA-Z]+', clean.lower()) if len(w) > 3 and w not in stopwords] if len(words) >= 1: longest_word = max(words, key=len) candidates = collection.find({"Title": {"$regex": longest_word, "$options": "i"}}).limit(20) best_score = 0 best_match = None for cand in candidates: score = SequenceMatcher(None, clean.lower(), cand['Title'].lower()).ratio() if score > 0.85 and score > best_score: best_score = score best_match = cand if best_match: res = best_match if res: keys = res.keys() docs_col = next((k for k in keys if "Total Docs" in k), "Total Docs") cit_col = next((k for k in keys if "Citations / Doc" in k), "Citations / Doc") def get_safe_val(key, default="-"): val = res.get(key, default) if val == default: return default try: if isinstance(val, str): val = val.replace(',', '') return float(val) except: return val try: gp = str(res.get('Global_Percentile', '0')).replace('%', '') gp_val = float(gp) rank_str = f"Top {100 - gp_val:.1f}%" if gp_val > 0 else "N/A" except: rank_str = "N/A" quartile = res.get("SJR Best Quartile", "-") if (not quartile or quartile == "-") and rank_str != "N/A": try: top_percent = float(rank_str.replace("Top ", "").replace("%", "")) if top_percent <= 25: quartile = "Q1 (Implied)" elif top_percent <= 50: quartile = "Q2 (Implied)" elif top_percent <= 75: quartile = "Q3 (Implied)" else: quartile = "Q4 (Implied)" except: pass return json.dumps({ "Title": res.get("Title"), "Quartile": quartile, "SJR": get_safe_val("SJR"), "H_Index": get_safe_val("H index"), "Total_Docs": get_safe_val(docs_col), "Citations_Per_Doc": get_safe_val(cit_col), "Publisher": res.get("Publisher", "Unknown"), "Global_Rank": rank_str, "Categories": res.get("Categories", "") }) return f"DB: Not Found (Cleaned: {clean})" except Exception as e: return f"DB Error: {str(e)}" tools_schema = [ {"type": "function", "function": {"name": "fetch_metadata", "description": "Get journal name from DOI.", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}}, {"type": "function", "function": {"name": "check_ranking", "description": "Check journal metrics in DB.", "parameters": {"type": "object", "properties": {"journal_name": {"type": "string"}}, "required": ["journal_name"]}}} ] # ========================================== # 3. Agent 核心 (动态 Client) # ========================================== def run_agent_with_logs(user_input, api_key): logs = [] # 1. 检查 Key # 优先使用用户输入的 Key,如果没输,尝试使用系统的 Key active_key = api_key if api_key else SYSTEM_OPENAI_KEY if not active_key: return "❌ **Error**: No API Key provided. Please enter your OpenAI API Key.", ["❌ Error: Missing API Key"] # 2. 动态初始化 Client try: client = OpenAI(api_key=active_key) except Exception as e: return f"❌ **Error**: Invalid API Key format. {str(e)}", ["❌ Client Init Failed"] # 3. System Prompt (包含 MISQ 兜底逻辑) system_prompt = """ You are an Expert Journal Authority Auditor. YOUR GOAL: Verify the quality of academic venues using the `check_ranking` tool AND your own domain knowledge. ### 🧠 CRITICAL THINKING PROCESS: 1. **Analyze Input**: Normalize acronyms (e.g., "MISQ" -> "MIS Quarterly"). 2. **Check DB**: Call `check_ranking` first. 3. **Evaluate Result**: - If DB returns Q1/Top Tier -> Report it. - If DB returns "Not Found" or low rank, BUT you know this is a prestigious venue (e.g., FT50, CCF-A, AIS Senior Scholars' Basket, Nature/Science family) -> **YOU MUST OVERRIDE/SUPPLEMENT**. ### 📝 REPORT FORMAT (Strict Markdown) **Situation 1: Found in DB (Standard)** | Metric | Value | Status | | :--- | :--- | :--- | | 📖 Venue | [Title] | - | | 🏆 Quartile | [Q1/Q2...] | [✅/⚠️] | | 📉 SJR | [Value] | - | **Situation 2: Not in DB / Low Rank BUT Prestigious (The "MIS Quarterly" Case)** > ⚠️ **Database Note**: Not found in current SJR index (or name mismatch). ### 🧠 AI Domain Knowledge Supplement * **Prestige Status**: [e.g., "Undisputed Top Tier", "FT50 Listed", "AIS Basket of 8"] * **Consensus**: [Explain WHY it is top tier despite missing data.] * **Verdict**: ✅ **Highly Recommended** (Based on Domain Reputation) **Situation 3: Not in DB & Not Famous** > ⚠️ **Notice**: "[Input]" is not ranked and lacks broad recognition. Proceed with caution. """ messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_input}] logs.append(f"🧠 System: Analyzing '{user_input}'...") turn_count = 0 try: while turn_count < 4: turn_count += 1 logs.append(f"🤔 Step {turn_count}: Thinking...") resp = client.chat.completions.create( model="gpt-4o", messages=messages, tools=tools_schema, temperature=0 ) msg = resp.choices[0].message messages.append(msg) if not msg.tool_calls: logs.append("📝 Report generated.") return msg.content, logs for tc in msg.tool_calls: fname = tc.function.name args_str = tc.function.arguments try: args = json.loads(args_str) logs.append(f"🔍 Checking: {args.get('journal_name') or args.get('query')}") if fname == "fetch_metadata": res = fetch_metadata(args.get("query")) elif fname == "check_ranking": res = check_ranking(args.get("journal_name")) else: res = "Error: Unknown Tool" except Exception as e: res = f"Tool Error: {str(e)}" logs.append(f"✅ Result: {str(res)[:80]}...") messages.append({ "tool_call_id": tc.id, "role": "tool", "name": fname, "content": str(res) }) return "⚠️ Timeout: Analysis too complex.", logs except Exception as e: print(traceback.format_exc()) return f"**API Error**: {str(e)} (Please check your Key)", logs # ========================================== # 4. Flask Web Server (含 API Key 输入框) # ========================================== app = Flask(__name__) CHAT_HTML = """ Journal Authority Auditor
🛡️ Journal Auditor
🔑 OpenAI Key (Optional): (Leave empty to use System Key if available)
System Online. Enter a DOI or Journal Name.
TERMINAL LOG...
""" @app.route('/') def index(): return render_template_string(CHAT_HTML) @app.route('/chat', methods=['POST']) def chat(): try: data = request.json # 从前端获取 user_key user_key = data.get('api_key', '').strip() message = data.get('message', '') reply, logs = run_agent_with_logs(message, user_key) return jsonify({"reply": reply, "logs": logs}) except Exception as e: return jsonify({"reply": f"Error: {str(e)}", "logs": []}) if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)