Spaces:

jscmp4
/

810proj

Sleeping

File size: 16,170 Bytes

import os
import json
import re
import requests
import traceback
import pandas as pd
from datetime import datetime
from flask import Flask, request, jsonify, render_template_string
from openai import OpenAI
from pymongo import MongoClient
from difflib import SequenceMatcher

# ==========================================
# 1. 配置与连接
# ==========================================
# 数据库必须由你提供，不能让用户填
MONGO_USER = os.environ.get("MONGO_USER")
MONGO_PASS = os.environ.get("MONGO_PASS")
MONGO_CLUSTER = os.environ.get("MONGO_CLUSTER", "cluster0.mh3esar.mongodb.net")
# 系统默认 Key (可选：如果你想给用户免费试用，可以配置这个；否则留空)
SYSTEM_OPENAI_KEY = os.environ.get("OPENAI_API_KEY")

# 初始化 MongoDB (这是全局共享的，不用改)
try:
    uri = f"mongodb+srv://{MONGO_USER}:{MONGO_PASS}@{MONGO_CLUSTER}/?retryWrites=true&w=majority"
    mongo_client = MongoClient(uri, serverSelectionTimeoutMS=5000)
    db = mongo_client["proj810_db"]
    collection = db["rankings"]
    print("✅ 数据库连接成功")
except Exception as e:
    print(f"❌ 数据库连接失败: {e}")

# ==========================================
# 2. 工具定义 (无状态，不需要 Key)
# ==========================================
def fetch_metadata(query):
    """通过 OpenAlex 获取 DOI 元数据"""
    if "10." not in query: return "Invalid DOI"
    try:
        url = f"https://api.openalex.org/works/doi:{query}"
        r = requests.get(url, timeout=10)
        if r.status_code == 200:
            d = r.json()
            if not d: return "OpenAlex: Empty Response"
            src = d.get('primary_location') or {}
            source = src.get('source') or {}
            pub = source.get('host_organization_name') or source.get('publisher')
            name = source.get('display_name')
            if not name and d.get('locations'):
                for loc in d.get('locations'):
                    s = (loc.get('source') or {})
                    if s.get('display_name'):
                        name = s.get('display_name')
                        pub = s.get('host_organization_name') or s.get('publisher')
                        break
            type_ = d.get('type')
            if name:
                return json.dumps({"journal": name, "publisher": pub, "type": type_})
            return json.dumps({"journal": "Unknown Source", "publisher": pub or "Unknown", "type": type_, "note": "Source name not found"})
        elif r.status_code == 404:
            return "OpenAlex: DOI Not Found"
        else:
            return f"OpenAlex Error: {r.status_code}"
    except Exception as e:
        return f"Metadata Error: {str(e)}"

def check_ranking(journal_name):
    """查询数据库中的期刊排名"""
    try:
        if not journal_name: return "Error: Empty Name"
        clean = journal_name.replace('"', '').replace("'", "").strip()
        safe = re.escape(clean)
        res = collection.find_one({"Title": {"$regex": f"^{safe}$", "$options": "i"}})
        
        # 模糊匹配逻辑
        if not res:
            stopwords = ["the", "of", "and", "in", "on", "for", "journal", "international", "proceedings"]
            words = [w for w in re.split(r'[^a-zA-Z]+', clean.lower()) if len(w) > 3 and w not in stopwords]
            if len(words) >= 1:
                longest_word = max(words, key=len)
                candidates = collection.find({"Title": {"$regex": longest_word, "$options": "i"}}).limit(20)
                best_score = 0
                best_match = None
                for cand in candidates:
                    score = SequenceMatcher(None, clean.lower(), cand['Title'].lower()).ratio()
                    if score > 0.85 and score > best_score:
                        best_score = score
                        best_match = cand
                if best_match: res = best_match

        if res:
            keys = res.keys()
            docs_col = next((k for k in keys if "Total Docs" in k), "Total Docs")
            cit_col = next((k for k in keys if "Citations / Doc" in k), "Citations / Doc")
            
            def get_safe_val(key, default="-"):
                val = res.get(key, default)
                if val == default: return default
                try:
                    if isinstance(val, str): val = val.replace(',', '')
                    return float(val)
                except: return val

            try:
                gp = str(res.get('Global_Percentile', '0')).replace('%', '')
                gp_val = float(gp)
                rank_str = f"Top {100 - gp_val:.1f}%" if gp_val > 0 else "N/A"
            except: rank_str = "N/A"

            quartile = res.get("SJR Best Quartile", "-")
            if (not quartile or quartile == "-") and rank_str != "N/A":
                try:
                    top_percent = float(rank_str.replace("Top ", "").replace("%", ""))
                    if top_percent <= 25: quartile = "Q1 (Implied)"
                    elif top_percent <= 50: quartile = "Q2 (Implied)"
                    elif top_percent <= 75: quartile = "Q3 (Implied)"
                    else: quartile = "Q4 (Implied)"
                except: pass

            return json.dumps({
                "Title": res.get("Title"),
                "Quartile": quartile,
                "SJR": get_safe_val("SJR"),
                "H_Index": get_safe_val("H index"),
                "Total_Docs": get_safe_val(docs_col),
                "Citations_Per_Doc": get_safe_val(cit_col),
                "Publisher": res.get("Publisher", "Unknown"),
                "Global_Rank": rank_str,
                "Categories": res.get("Categories", "")
            })
        return f"DB: Not Found (Cleaned: {clean})"
    except Exception as e:
        return f"DB Error: {str(e)}"

tools_schema = [
    {"type": "function", "function": {"name": "fetch_metadata", "description": "Get journal name from DOI.", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}},
    {"type": "function", "function": {"name": "check_ranking", "description": "Check journal metrics in DB.", "parameters": {"type": "object", "properties": {"journal_name": {"type": "string"}}, "required": ["journal_name"]}}}
]

# ==========================================
# 3. Agent 核心 (动态 Client)
# ==========================================
def run_agent_with_logs(user_input, api_key):
    logs = []
    
    # 1. 检查 Key
    # 优先使用用户输入的 Key，如果没输，尝试使用系统的 Key
    active_key = api_key if api_key else SYSTEM_OPENAI_KEY
    
    if not active_key:
        return "❌ **Error**: No API Key provided. Please enter your OpenAI API Key.", ["❌ Error: Missing API Key"]

    # 2. 动态初始化 Client
    try:
        client = OpenAI(api_key=active_key)
    except Exception as e:
        return f"❌ **Error**: Invalid API Key format. {str(e)}", ["❌ Client Init Failed"]

    # 3. System Prompt (包含 MISQ 兜底逻辑)
    system_prompt = """
    You are an Expert Journal Authority Auditor.
    YOUR GOAL: Verify the quality of academic venues using the `check_ranking` tool AND your own domain knowledge.

    ### 🧠 CRITICAL THINKING PROCESS:
    1. **Analyze Input**: Normalize acronyms (e.g., "MISQ" -> "MIS Quarterly").
    2. **Check DB**: Call `check_ranking` first.
    3. **Evaluate Result**:
       - If DB returns Q1/Top Tier -> Report it.
       - If DB returns "Not Found" or low rank, BUT you know this is a prestigious venue (e.g., FT50, CCF-A, AIS Senior Scholars' Basket, Nature/Science family) -> **YOU MUST OVERRIDE/SUPPLEMENT**.
    
    ### 📝 REPORT FORMAT (Strict Markdown)

    **Situation 1: Found in DB (Standard)**
    | Metric | Value | Status |
    | :--- | :--- | :--- |
    | 📖 Venue | [Title] | - |
    | 🏆 Quartile | [Q1/Q2...] | [✅/⚠️] |
    | 📉 SJR | [Value] | - |
    
    **Situation 2: Not in DB / Low Rank BUT Prestigious (The "MIS Quarterly" Case)**
    > ⚠️ **Database Note**: Not found in current SJR index (or name mismatch).
    
    ### 🧠 AI Domain Knowledge Supplement
    * **Prestige Status**: [e.g., "Undisputed Top Tier", "FT50 Listed", "AIS Basket of 8"]
    * **Consensus**: [Explain WHY it is top tier despite missing data.]
    * **Verdict**: ✅ **Highly Recommended** (Based on Domain Reputation)

    **Situation 3: Not in DB & Not Famous**
    > ⚠️ **Notice**: "[Input]" is not ranked and lacks broad recognition. Proceed with caution.
    """
    
    messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_input}]
    logs.append(f"🧠 System: Analyzing '{user_input}'...")
    
    turn_count = 0
    try:
        while turn_count < 4:
            turn_count += 1
            logs.append(f"🤔 Step {turn_count}: Thinking...")
            
            resp = client.chat.completions.create(
                model="gpt-4o",
                messages=messages,
                tools=tools_schema,
                temperature=0
            )
            msg = resp.choices[0].message
            messages.append(msg)
            
            if not msg.tool_calls:
                logs.append("📝 Report generated.")
                return msg.content, logs
            
            for tc in msg.tool_calls:
                fname = tc.function.name
                args_str = tc.function.arguments
                try:
                    args = json.loads(args_str)
                    logs.append(f"🔍 Checking: {args.get('journal_name') or args.get('query')}")
                    
                    if fname == "fetch_metadata":
                        res = fetch_metadata(args.get("query"))
                    elif fname == "check_ranking":
                        res = check_ranking(args.get("journal_name"))
                    else:
                        res = "Error: Unknown Tool"
                except Exception as e:
                    res = f"Tool Error: {str(e)}"
                
                logs.append(f"✅ Result: {str(res)[:80]}...")
                messages.append({
                    "tool_call_id": tc.id,
                    "role": "tool",
                    "name": fname,
                    "content": str(res)
                })
        return "⚠️ Timeout: Analysis too complex.", logs
    except Exception as e:
        print(traceback.format_exc())
        return f"**API Error**: {str(e)} (Please check your Key)", logs

# ==========================================
# 4. Flask Web Server (含 API Key 输入框)
# ==========================================
app = Flask(__name__)

CHAT_HTML = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Journal Authority Auditor</title>
<style>
    body { font-family: 'Segoe UI', system-ui; background: #2c3e50; display: flex; justify-content: center; height: 100vh; margin: 0; }
    .container { display: flex; width: 95%; max-width: 1400px; height: 95vh; gap: 20px; margin-top: 2.5vh; }
    .chat-panel { flex: 1.2; background: #f4f7f6; border-radius: 12px; display: flex; flex-direction: column; overflow: hidden; }
    .header { background: #34495e; color: white; padding: 15px; font-weight: bold; display: flex; justify-content: space-between; align-items: center;}
    .messages-area { flex: 1; padding: 20px; overflow-y: auto; display: flex; flex-direction: column; gap: 15px; }
    .message { max-width: 85%; padding: 14px; border-radius: 12px; line-height: 1.6; }
    .bot { background: white; align-self: flex-start; border-left: 4px solid #3498db; }
    .user { background: #3498db; color: white; align-self: flex-end; }
    
    /* 顶部 Key 输入栏 */
    .key-bar { background: #ecf0f1; padding: 10px 20px; border-bottom: 1px solid #ddd; display: flex; gap: 10px; align-items: center; font-size: 0.9rem;}
    .key-input { border: 1px solid #bdc3c7; padding: 5px 10px; border-radius: 4px; width: 200px; font-family: monospace; }
    
    .input-form { padding: 20px; background: white; display: flex; gap: 10px; }
    input[type="text"] { flex: 1; padding: 10px; border-radius: 8px; border: 1px solid #ccc; }
    button { padding: 10px 20px; background: #2c3e50; color: white; border: none; border-radius: 8px; cursor: pointer; }
    button:hover { background: #1a252f; }
    
    .brain-panel { flex: 0.8; background: #1e272e; border-radius: 12px; color: #0fb9b1; padding: 15px; font-family: monospace; overflow-y: auto; }
    table { width: 100%; border-collapse: collapse; margin: 10px 0; background: white; }
    th, td { border: 1px solid #ddd; padding: 8px; }
    blockquote { border-left: 4px solid #f1c40f; margin: 5px 0; padding: 10px; background: #fffbf0; }
</style>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
</head>
<body>
<div class="container">
    <div class="chat-panel">
        <div class="header">
            <span>🛡️ Journal Auditor</span>
        </div>
        <div class="key-bar">
            <span>🔑 OpenAI Key (Optional):</span>
            <input type="password" id="user-key" class="key-input" placeholder="sk-..." autocomplete="new-password">
            <span style="color: #7f8c8d; font-size: 0.8em;">(Leave empty to use System Key if available)</span>
        </div>

        <div class="messages-area" id="chat-box"><div class="message bot">System Online. Enter a DOI or Journal Name.</div></div>
        
        <form class="input-form" onsubmit="event.preventDefault(); send();">
            <input type="text" id="inp" placeholder="Type here..." autocomplete="off">
            <button id="btn-send" onclick="send()">Analyze</button>
        </form>
    </div>
    <div class="brain-panel" id="log-box"><div>TERMINAL LOG...</div></div>
</div>
<script>
async function send(){
    let i = document.getElementById('inp');
    let k = document.getElementById('user-key'); // 获取 Key
    let btn = document.getElementById('btn-send');
    let txt = i.value.trim();
    let apiKey = k.value.trim();
    
    if(!txt) return;
    
    i.disabled = true; btn.disabled = true; btn.innerText = "...";
    addMsg(txt, 'user'); i.value = '';
    
    let logsDiv = document.getElementById('log-box');
    
    try {
        let r = await fetch('/chat', {
            method: 'POST',
            headers: {'Content-Type': 'application/json'},
            body: JSON.stringify({
                message: txt,
                api_key: apiKey // 发送 Key 给后端
            })
        });
        let d = await r.json();
        
        if(d.logs) {
            d.logs.forEach(l => {
                let div = document.createElement('div');
                div.innerText = l;
                div.style.marginBottom = "5px";
                div.style.borderLeft = "2px solid #555";
                logsDiv.appendChild(div);
            });
            logsDiv.scrollTop = logsDiv.scrollHeight;
        }
        addMsg(d.reply || "Error", 'bot', true);
    } catch(e) { 
        addMsg("Server Error: " + e, 'bot'); 
    } finally {
        i.disabled = false; btn.disabled = false; btn.innerText = "Analyze"; i.focus();
    }
}
function addMsg(txt, cls, html=false){
    let d = document.createElement('div');
    d.className = 'message ' + cls;
    if(html) d.innerHTML = marked.parse(txt); else d.innerText = txt;
    let box = document.getElementById('chat-box');
    box.appendChild(d);
    box.scrollTop = box.scrollHeight;
}
</script></body></html>
"""

@app.route('/')
def index(): return render_template_string(CHAT_HTML)

@app.route('/chat', methods=['POST'])
def chat():
    try:
        data = request.json
        # 从前端获取 user_key
        user_key = data.get('api_key', '').strip()
        message = data.get('message', '')
        
        reply, logs = run_agent_with_logs(message, user_key)
        return jsonify({"reply": reply, "logs": logs})
    except Exception as e:
        return jsonify({"reply": f"Error: {str(e)}", "logs": []})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)