| import os |
| import json |
| import re |
| import requests |
| import traceback |
| import pandas as pd |
| from datetime import datetime |
| from flask import Flask, request, jsonify, render_template_string |
| from openai import OpenAI |
| from pymongo import MongoClient |
| from difflib import SequenceMatcher |
|
|
| |
| |
| |
| |
| MONGO_USER = os.environ.get("MONGO_USER") |
| MONGO_PASS = os.environ.get("MONGO_PASS") |
| MONGO_CLUSTER = os.environ.get("MONGO_CLUSTER", "cluster0.mh3esar.mongodb.net") |
| |
| SYSTEM_OPENAI_KEY = os.environ.get("OPENAI_API_KEY") |
|
|
| |
| try: |
| uri = f"mongodb+srv://{MONGO_USER}:{MONGO_PASS}@{MONGO_CLUSTER}/?retryWrites=true&w=majority" |
| mongo_client = MongoClient(uri, serverSelectionTimeoutMS=5000) |
| db = mongo_client["proj810_db"] |
| collection = db["rankings"] |
| print("✅ 数据库连接成功") |
| except Exception as e: |
| print(f"❌ 数据库连接失败: {e}") |
|
|
| |
| |
| |
| def fetch_metadata(query): |
| """通过 OpenAlex 获取 DOI 元数据""" |
| if "10." not in query: return "Invalid DOI" |
| try: |
| url = f"https://api.openalex.org/works/doi:{query}" |
| r = requests.get(url, timeout=10) |
| if r.status_code == 200: |
| d = r.json() |
| if not d: return "OpenAlex: Empty Response" |
| src = d.get('primary_location') or {} |
| source = src.get('source') or {} |
| pub = source.get('host_organization_name') or source.get('publisher') |
| name = source.get('display_name') |
| if not name and d.get('locations'): |
| for loc in d.get('locations'): |
| s = (loc.get('source') or {}) |
| if s.get('display_name'): |
| name = s.get('display_name') |
| pub = s.get('host_organization_name') or s.get('publisher') |
| break |
| type_ = d.get('type') |
| if name: |
| return json.dumps({"journal": name, "publisher": pub, "type": type_}) |
| return json.dumps({"journal": "Unknown Source", "publisher": pub or "Unknown", "type": type_, "note": "Source name not found"}) |
| elif r.status_code == 404: |
| return "OpenAlex: DOI Not Found" |
| else: |
| return f"OpenAlex Error: {r.status_code}" |
| except Exception as e: |
| return f"Metadata Error: {str(e)}" |
|
|
| def check_ranking(journal_name): |
| """查询数据库中的期刊排名""" |
| try: |
| if not journal_name: return "Error: Empty Name" |
| clean = journal_name.replace('"', '').replace("'", "").strip() |
| safe = re.escape(clean) |
| res = collection.find_one({"Title": {"$regex": f"^{safe}$", "$options": "i"}}) |
| |
| |
| if not res: |
| stopwords = ["the", "of", "and", "in", "on", "for", "journal", "international", "proceedings"] |
| words = [w for w in re.split(r'[^a-zA-Z]+', clean.lower()) if len(w) > 3 and w not in stopwords] |
| if len(words) >= 1: |
| longest_word = max(words, key=len) |
| candidates = collection.find({"Title": {"$regex": longest_word, "$options": "i"}}).limit(20) |
| best_score = 0 |
| best_match = None |
| for cand in candidates: |
| score = SequenceMatcher(None, clean.lower(), cand['Title'].lower()).ratio() |
| if score > 0.85 and score > best_score: |
| best_score = score |
| best_match = cand |
| if best_match: res = best_match |
|
|
| if res: |
| keys = res.keys() |
| docs_col = next((k for k in keys if "Total Docs" in k), "Total Docs") |
| cit_col = next((k for k in keys if "Citations / Doc" in k), "Citations / Doc") |
| |
| def get_safe_val(key, default="-"): |
| val = res.get(key, default) |
| if val == default: return default |
| try: |
| if isinstance(val, str): val = val.replace(',', '') |
| return float(val) |
| except: return val |
|
|
| try: |
| gp = str(res.get('Global_Percentile', '0')).replace('%', '') |
| gp_val = float(gp) |
| rank_str = f"Top {100 - gp_val:.1f}%" if gp_val > 0 else "N/A" |
| except: rank_str = "N/A" |
|
|
| quartile = res.get("SJR Best Quartile", "-") |
| if (not quartile or quartile == "-") and rank_str != "N/A": |
| try: |
| top_percent = float(rank_str.replace("Top ", "").replace("%", "")) |
| if top_percent <= 25: quartile = "Q1 (Implied)" |
| elif top_percent <= 50: quartile = "Q2 (Implied)" |
| elif top_percent <= 75: quartile = "Q3 (Implied)" |
| else: quartile = "Q4 (Implied)" |
| except: pass |
|
|
| return json.dumps({ |
| "Title": res.get("Title"), |
| "Quartile": quartile, |
| "SJR": get_safe_val("SJR"), |
| "H_Index": get_safe_val("H index"), |
| "Total_Docs": get_safe_val(docs_col), |
| "Citations_Per_Doc": get_safe_val(cit_col), |
| "Publisher": res.get("Publisher", "Unknown"), |
| "Global_Rank": rank_str, |
| "Categories": res.get("Categories", "") |
| }) |
| return f"DB: Not Found (Cleaned: {clean})" |
| except Exception as e: |
| return f"DB Error: {str(e)}" |
|
|
| tools_schema = [ |
| {"type": "function", "function": {"name": "fetch_metadata", "description": "Get journal name from DOI.", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}}, |
| {"type": "function", "function": {"name": "check_ranking", "description": "Check journal metrics in DB.", "parameters": {"type": "object", "properties": {"journal_name": {"type": "string"}}, "required": ["journal_name"]}}} |
| ] |
|
|
| |
| |
| |
| def run_agent_with_logs(user_input, api_key): |
| logs = [] |
| |
| |
| |
| active_key = api_key if api_key else SYSTEM_OPENAI_KEY |
| |
| if not active_key: |
| return "❌ **Error**: No API Key provided. Please enter your OpenAI API Key.", ["❌ Error: Missing API Key"] |
|
|
| |
| try: |
| client = OpenAI(api_key=active_key) |
| except Exception as e: |
| return f"❌ **Error**: Invalid API Key format. {str(e)}", ["❌ Client Init Failed"] |
|
|
| |
| system_prompt = """ |
| You are an Expert Journal Authority Auditor. |
| YOUR GOAL: Verify the quality of academic venues using the `check_ranking` tool AND your own domain knowledge. |
| |
| ### 🧠 CRITICAL THINKING PROCESS: |
| 1. **Analyze Input**: Normalize acronyms (e.g., "MISQ" -> "MIS Quarterly"). |
| 2. **Check DB**: Call `check_ranking` first. |
| 3. **Evaluate Result**: |
| - If DB returns Q1/Top Tier -> Report it. |
| - If DB returns "Not Found" or low rank, BUT you know this is a prestigious venue (e.g., FT50, CCF-A, AIS Senior Scholars' Basket, Nature/Science family) -> **YOU MUST OVERRIDE/SUPPLEMENT**. |
| |
| ### 📝 REPORT FORMAT (Strict Markdown) |
| |
| **Situation 1: Found in DB (Standard)** |
| | Metric | Value | Status | |
| | :--- | :--- | :--- | |
| | 📖 Venue | [Title] | - | |
| | 🏆 Quartile | [Q1/Q2...] | [✅/⚠️] | |
| | 📉 SJR | [Value] | - | |
| |
| **Situation 2: Not in DB / Low Rank BUT Prestigious (The "MIS Quarterly" Case)** |
| > ⚠️ **Database Note**: Not found in current SJR index (or name mismatch). |
| |
| ### 🧠 AI Domain Knowledge Supplement |
| * **Prestige Status**: [e.g., "Undisputed Top Tier", "FT50 Listed", "AIS Basket of 8"] |
| * **Consensus**: [Explain WHY it is top tier despite missing data.] |
| * **Verdict**: ✅ **Highly Recommended** (Based on Domain Reputation) |
| |
| **Situation 3: Not in DB & Not Famous** |
| > ⚠️ **Notice**: "[Input]" is not ranked and lacks broad recognition. Proceed with caution. |
| """ |
| |
| messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_input}] |
| logs.append(f"🧠 System: Analyzing '{user_input}'...") |
| |
| turn_count = 0 |
| try: |
| while turn_count < 4: |
| turn_count += 1 |
| logs.append(f"🤔 Step {turn_count}: Thinking...") |
| |
| resp = client.chat.completions.create( |
| model="gpt-4o", |
| messages=messages, |
| tools=tools_schema, |
| temperature=0 |
| ) |
| msg = resp.choices[0].message |
| messages.append(msg) |
| |
| if not msg.tool_calls: |
| logs.append("📝 Report generated.") |
| return msg.content, logs |
| |
| for tc in msg.tool_calls: |
| fname = tc.function.name |
| args_str = tc.function.arguments |
| try: |
| args = json.loads(args_str) |
| logs.append(f"🔍 Checking: {args.get('journal_name') or args.get('query')}") |
| |
| if fname == "fetch_metadata": |
| res = fetch_metadata(args.get("query")) |
| elif fname == "check_ranking": |
| res = check_ranking(args.get("journal_name")) |
| else: |
| res = "Error: Unknown Tool" |
| except Exception as e: |
| res = f"Tool Error: {str(e)}" |
| |
| logs.append(f"✅ Result: {str(res)[:80]}...") |
| messages.append({ |
| "tool_call_id": tc.id, |
| "role": "tool", |
| "name": fname, |
| "content": str(res) |
| }) |
| return "⚠️ Timeout: Analysis too complex.", logs |
| except Exception as e: |
| print(traceback.format_exc()) |
| return f"**API Error**: {str(e)} (Please check your Key)", logs |
|
|
| |
| |
| |
| app = Flask(__name__) |
|
|
| CHAT_HTML = """ |
| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Journal Authority Auditor</title> |
| <style> |
| body { font-family: 'Segoe UI', system-ui; background: #2c3e50; display: flex; justify-content: center; height: 100vh; margin: 0; } |
| .container { display: flex; width: 95%; max-width: 1400px; height: 95vh; gap: 20px; margin-top: 2.5vh; } |
| .chat-panel { flex: 1.2; background: #f4f7f6; border-radius: 12px; display: flex; flex-direction: column; overflow: hidden; } |
| .header { background: #34495e; color: white; padding: 15px; font-weight: bold; display: flex; justify-content: space-between; align-items: center;} |
| .messages-area { flex: 1; padding: 20px; overflow-y: auto; display: flex; flex-direction: column; gap: 15px; } |
| .message { max-width: 85%; padding: 14px; border-radius: 12px; line-height: 1.6; } |
| .bot { background: white; align-self: flex-start; border-left: 4px solid #3498db; } |
| .user { background: #3498db; color: white; align-self: flex-end; } |
| |
| /* 顶部 Key 输入栏 */ |
| .key-bar { background: #ecf0f1; padding: 10px 20px; border-bottom: 1px solid #ddd; display: flex; gap: 10px; align-items: center; font-size: 0.9rem;} |
| .key-input { border: 1px solid #bdc3c7; padding: 5px 10px; border-radius: 4px; width: 200px; font-family: monospace; } |
| |
| .input-form { padding: 20px; background: white; display: flex; gap: 10px; } |
| input[type="text"] { flex: 1; padding: 10px; border-radius: 8px; border: 1px solid #ccc; } |
| button { padding: 10px 20px; background: #2c3e50; color: white; border: none; border-radius: 8px; cursor: pointer; } |
| button:hover { background: #1a252f; } |
| |
| .brain-panel { flex: 0.8; background: #1e272e; border-radius: 12px; color: #0fb9b1; padding: 15px; font-family: monospace; overflow-y: auto; } |
| table { width: 100%; border-collapse: collapse; margin: 10px 0; background: white; } |
| th, td { border: 1px solid #ddd; padding: 8px; } |
| blockquote { border-left: 4px solid #f1c40f; margin: 5px 0; padding: 10px; background: #fffbf0; } |
| </style> |
| <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script> |
| </head> |
| <body> |
| <div class="container"> |
| <div class="chat-panel"> |
| <div class="header"> |
| <span>🛡️ Journal Auditor</span> |
| </div> |
| <div class="key-bar"> |
| <span>🔑 OpenAI Key (Optional):</span> |
| <input type="password" id="user-key" class="key-input" placeholder="sk-..." autocomplete="new-password"> |
| <span style="color: #7f8c8d; font-size: 0.8em;">(Leave empty to use System Key if available)</span> |
| </div> |
| |
| <div class="messages-area" id="chat-box"><div class="message bot">System Online. Enter a DOI or Journal Name.</div></div> |
| |
| <form class="input-form" onsubmit="event.preventDefault(); send();"> |
| <input type="text" id="inp" placeholder="Type here..." autocomplete="off"> |
| <button id="btn-send" onclick="send()">Analyze</button> |
| </form> |
| </div> |
| <div class="brain-panel" id="log-box"><div>TERMINAL LOG...</div></div> |
| </div> |
| <script> |
| async function send(){ |
| let i = document.getElementById('inp'); |
| let k = document.getElementById('user-key'); // 获取 Key |
| let btn = document.getElementById('btn-send'); |
| let txt = i.value.trim(); |
| let apiKey = k.value.trim(); |
| |
| if(!txt) return; |
| |
| i.disabled = true; btn.disabled = true; btn.innerText = "..."; |
| addMsg(txt, 'user'); i.value = ''; |
| |
| let logsDiv = document.getElementById('log-box'); |
| |
| try { |
| let r = await fetch('/chat', { |
| method: 'POST', |
| headers: {'Content-Type': 'application/json'}, |
| body: JSON.stringify({ |
| message: txt, |
| api_key: apiKey // 发送 Key 给后端 |
| }) |
| }); |
| let d = await r.json(); |
| |
| if(d.logs) { |
| d.logs.forEach(l => { |
| let div = document.createElement('div'); |
| div.innerText = l; |
| div.style.marginBottom = "5px"; |
| div.style.borderLeft = "2px solid #555"; |
| logsDiv.appendChild(div); |
| }); |
| logsDiv.scrollTop = logsDiv.scrollHeight; |
| } |
| addMsg(d.reply || "Error", 'bot', true); |
| } catch(e) { |
| addMsg("Server Error: " + e, 'bot'); |
| } finally { |
| i.disabled = false; btn.disabled = false; btn.innerText = "Analyze"; i.focus(); |
| } |
| } |
| function addMsg(txt, cls, html=false){ |
| let d = document.createElement('div'); |
| d.className = 'message ' + cls; |
| if(html) d.innerHTML = marked.parse(txt); else d.innerText = txt; |
| let box = document.getElementById('chat-box'); |
| box.appendChild(d); |
| box.scrollTop = box.scrollHeight; |
| } |
| </script></body></html> |
| """ |
|
|
| @app.route('/') |
| def index(): return render_template_string(CHAT_HTML) |
|
|
| @app.route('/chat', methods=['POST']) |
| def chat(): |
| try: |
| data = request.json |
| |
| user_key = data.get('api_key', '').strip() |
| message = data.get('message', '') |
| |
| reply, logs = run_agent_with_logs(message, user_key) |
| return jsonify({"reply": reply, "logs": logs}) |
| except Exception as e: |
| return jsonify({"reply": f"Error: {str(e)}", "logs": []}) |
|
|
| if __name__ == '__main__': |
| app.run(host='0.0.0.0', port=7860) |