jscmp4 commited on
Commit
2d1764d
·
verified ·
1 Parent(s): 1abeda3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +312 -0
app.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import re
4
+ import requests
5
+ import traceback
6
+ import pandas as pd
7
+ from datetime import datetime
8
+ from flask import Flask, request, jsonify, render_template_string
9
+ from openai import OpenAI
10
+ from pymongo import MongoClient
11
+ from difflib import SequenceMatcher
12
+
13
+ # ==========================================
14
+ # 1. 配置与连接 (使用环境变量)
15
+ # ==========================================
16
+ # 在 Hugging Face 的 Settings -> Variables and secrets 里设置这些
17
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
18
+ MONGO_USER = os.environ.get("MONGO_USER")
19
+ MONGO_PASS = os.environ.get("MONGO_PASS")
20
+ MONGO_CLUSTER = os.environ.get("MONGO_CLUSTER", "cluster0.mh3esar.mongodb.net")
21
+
22
+ # 初始化 OpenAI
23
+ client = OpenAI(api_key=OPENAI_API_KEY)
24
+
25
+ # 初始化 MongoDB
26
+ try:
27
+ uri = f"mongodb+srv://{MONGO_USER}:{MONGO_PASS}@{MONGO_CLUSTER}/?retryWrites=true&w=majority"
28
+ mongo_client = MongoClient(uri, serverSelectionTimeoutMS=5000)
29
+ db = mongo_client["proj810_db"]
30
+ collection = db["rankings"]
31
+ print("✅ 数据库连接成功")
32
+ except Exception as e:
33
+ print(f"❌ 数据库连接失败: {e}")
34
+
35
+ # ==========================================
36
+ # 2. 工具定义 (逻辑保持不变)
37
+ # ==========================================
38
+ def fetch_metadata(query):
39
+ """通过 OpenAlex 获取 DOI 元数据"""
40
+ if "10." not in query: return "Invalid DOI"
41
+ try:
42
+ url = f"https://api.openalex.org/works/doi:{query}"
43
+ r = requests.get(url, timeout=10)
44
+ if r.status_code == 200:
45
+ d = r.json()
46
+ if not d: return "OpenAlex: Empty Response"
47
+ src = d.get('primary_location') or {}
48
+ source = src.get('source') or {}
49
+ pub = source.get('host_organization_name') or source.get('publisher')
50
+ name = source.get('display_name')
51
+ if not name and d.get('locations'):
52
+ for loc in d.get('locations'):
53
+ s = (loc.get('source') or {})
54
+ if s.get('display_name'):
55
+ name = s.get('display_name')
56
+ pub = s.get('host_organization_name') or s.get('publisher')
57
+ break
58
+ type_ = d.get('type')
59
+ if name:
60
+ return json.dumps({"journal": name, "publisher": pub, "type": type_})
61
+ return json.dumps({"journal": "Unknown Source", "publisher": pub or "Unknown", "type": type_, "note": "Source name not found"})
62
+ elif r.status_code == 404:
63
+ return "OpenAlex: DOI Not Found"
64
+ else:
65
+ return f"OpenAlex Error: {r.status_code}"
66
+ except Exception as e:
67
+ return f"Metadata Error: {str(e)}"
68
+
69
+ def check_ranking(journal_name):
70
+ """查询数据库中的期刊排名"""
71
+ try:
72
+ if not journal_name: return "Error: Empty Name"
73
+ clean = journal_name.replace('"', '').replace("'", "").strip()
74
+ safe = re.escape(clean)
75
+ res = collection.find_one({"Title": {"$regex": f"^{safe}$", "$options": "i"}})
76
+
77
+ # 模糊匹配逻辑
78
+ if not res:
79
+ stopwords = ["the", "of", "and", "in", "on", "for", "journal", "international", "proceedings"]
80
+ words = [w for w in re.split(r'[^a-zA-Z]+', clean.lower()) if len(w) > 3 and w not in stopwords]
81
+ if len(words) >= 1:
82
+ longest_word = max(words, key=len)
83
+ candidates = collection.find({"Title": {"$regex": longest_word, "$options": "i"}}).limit(20)
84
+ best_score = 0
85
+ best_match = None
86
+ for cand in candidates:
87
+ score = SequenceMatcher(None, clean.lower(), cand['Title'].lower()).ratio()
88
+ if score > 0.85 and score > best_score:
89
+ best_score = score
90
+ best_match = cand
91
+ if best_match: res = best_match
92
+
93
+ if res:
94
+ keys = res.keys()
95
+ docs_col = next((k for k in keys if "Total Docs" in k), "Total Docs")
96
+ cit_col = next((k for k in keys if "Citations / Doc" in k), "Citations / Doc")
97
+
98
+ def get_safe_val(key, default="-"):
99
+ val = res.get(key, default)
100
+ if val == default: return default
101
+ try:
102
+ if isinstance(val, str): val = val.replace(',', '')
103
+ return float(val)
104
+ except: return val
105
+
106
+ try:
107
+ gp = str(res.get('Global_Percentile', '0')).replace('%', '')
108
+ gp_val = float(gp)
109
+ rank_str = f"Top {100 - gp_val:.1f}%" if gp_val > 0 else "N/A"
110
+ except: rank_str = "N/A"
111
+
112
+ quartile = res.get("SJR Best Quartile", "-")
113
+ if (not quartile or quartile == "-") and rank_str != "N/A":
114
+ try:
115
+ top_percent = float(rank_str.replace("Top ", "").replace("%", ""))
116
+ if top_percent <= 25: quartile = "Q1 (Implied)"
117
+ elif top_percent <= 50: quartile = "Q2 (Implied)"
118
+ elif top_percent <= 75: quartile = "Q3 (Implied)"
119
+ else: quartile = "Q4 (Implied)"
120
+ except: pass
121
+
122
+ return json.dumps({
123
+ "Title": res.get("Title"),
124
+ "Quartile": quartile,
125
+ "SJR": get_safe_val("SJR"),
126
+ "H_Index": get_safe_val("H index"),
127
+ "Total_Docs": get_safe_val(docs_col),
128
+ "Citations_Per_Doc": get_safe_val(cit_col),
129
+ "Publisher": res.get("Publisher", "Unknown"),
130
+ "Global_Rank": rank_str,
131
+ "Categories": res.get("Categories", "")
132
+ })
133
+ return f"DB: Not Found (Cleaned: {clean})"
134
+ except Exception as e:
135
+ return f"DB Error: {str(e)}"
136
+
137
+ tools_schema = [
138
+ {"type": "function", "function": {"name": "fetch_metadata", "description": "Get journal name from DOI.", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}},
139
+ {"type": "function", "function": {"name": "check_ranking", "description": "Check journal metrics in DB.", "parameters": {"type": "object", "properties": {"journal_name": {"type": "string"}}, "required": ["journal_name"]}}}
140
+ ]
141
+
142
+ # ==========================================
143
+ # 3. Agent 核心
144
+ # ==========================================
145
+ def run_agent_with_logs(user_input):
146
+ logs = []
147
+ system_prompt = """
148
+ You are an Expert Journal Authority Auditor.
149
+ YOUR GOAL: Verify the quality of academic venues using the `check_ranking` tool.
150
+
151
+ ### CRITICAL THINKING PROCESS:
152
+ 1. Analyze the Input (DOI, name, or acronym).
153
+ 2. Normalize: Convert acronyms (e.g., CVPR) to full titles.
154
+ 3. Action: Call `check_ranking` with the Cleaned Full Name.
155
+
156
+ ### REPORT FORMAT (Strict Markdown)
157
+ If Found:
158
+ | Metric | Value | Status |
159
+ | :--- | :--- | :--- |
160
+ | 📖 Venue | [Title] | - |
161
+ | 🏆 Quartile | [Q1-Q4] | [✅/⚠️] |
162
+ | 📉 SJR | [Value] | - |
163
+
164
+ If Not Found:
165
+ > ⚠️ Notice: "[Input]" is not ranked.
166
+ """
167
+
168
+ messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_input}]
169
+ logs.append(f"🧠 System: Analyzing '{user_input}'...")
170
+
171
+ turn_count = 0
172
+ try:
173
+ while turn_count < 4:
174
+ turn_count += 1
175
+ logs.append(f"🤔 Step {turn_count}: Thinking...")
176
+
177
+ resp = client.chat.completions.create(
178
+ model="gpt-4o",
179
+ messages=messages,
180
+ tools=tools_schema,
181
+ temperature=0
182
+ )
183
+ msg = resp.choices[0].message
184
+ messages.append(msg)
185
+
186
+ if not msg.tool_calls:
187
+ logs.append("📝 Report generated.")
188
+ return msg.content, logs
189
+
190
+ for tc in msg.tool_calls:
191
+ fname = tc.function.name
192
+ args_str = tc.function.arguments
193
+ try:
194
+ args = json.loads(args_str)
195
+ logs.append(f"🔍 Checking: {args.get('journal_name') or args.get('query')}")
196
+
197
+ if fname == "fetch_metadata":
198
+ res = fetch_metadata(args.get("query"))
199
+ elif fname == "check_ranking":
200
+ res = check_ranking(args.get("journal_name"))
201
+ else:
202
+ res = "Error: Unknown Tool"
203
+ except Exception as e:
204
+ res = f"Tool Error: {str(e)}"
205
+
206
+ logs.append(f"✅ Result: {str(res)[:80]}...")
207
+ messages.append({
208
+ "tool_call_id": tc.id,
209
+ "role": "tool",
210
+ "name": fname,
211
+ "content": str(res)
212
+ })
213
+ return "⚠️ Timeout: Analysis too complex.", logs
214
+ except Exception as e:
215
+ print(traceback.format_exc())
216
+ return f"**System Error**: {str(e)}", logs
217
+
218
+ # ==========================================
219
+ # 4. Flask Web Server
220
+ # ==========================================
221
+ app = Flask(__name__)
222
+
223
+ # 这里放入你的 HTML (为了简洁,我稍微压缩了一下,你可以直接用你 Colab 里那个完整版)
224
+ CHAT_HTML = """
225
+ <!DOCTYPE html>
226
+ <html lang="en">
227
+ <head>
228
+ <meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
229
+ <title>Journal Authority Auditor</title>
230
+ <style>
231
+ body { font-family: 'Segoe UI', system-ui; background: #2c3e50; display: flex; justify-content: center; height: 100vh; margin: 0; }
232
+ .container { display: flex; width: 95%; max-width: 1400px; height: 95vh; gap: 20px; margin-top: 2.5vh; }
233
+ .chat-panel { flex: 1.2; background: #f4f7f6; border-radius: 12px; display: flex; flex-direction: column; overflow: hidden; }
234
+ .header { background: #34495e; color: white; padding: 15px; font-weight: bold; }
235
+ .messages-area { flex: 1; padding: 20px; overflow-y: auto; display: flex; flex-direction: column; gap: 15px; }
236
+ .message { max-width: 85%; padding: 14px; border-radius: 12px; line-height: 1.6; }
237
+ .bot { background: white; align-self: flex-start; border-left: 4px solid #3498db; }
238
+ .user { background: #3498db; color: white; align-self: flex-end; }
239
+ .input-form { padding: 20px; background: white; display: flex; gap: 10px; }
240
+ input { flex: 1; padding: 10px; border-radius: 8px; border: 1px solid #ccc; }
241
+ .brain-panel { flex: 0.8; background: #1e272e; border-radius: 12px; color: #0fb9b1; padding: 15px; font-family: monospace; overflow-y: auto; }
242
+ table { width: 100%; border-collapse: collapse; margin: 10px 0; background: white; }
243
+ th, td { border: 1px solid #ddd; padding: 8px; }
244
+ </style>
245
+ <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
246
+ </head>
247
+ <body>
248
+ <div class="container">
249
+ <div class="chat-panel">
250
+ <div class="header">🛡️ Journal Auditor (HF Space Edition)</div>
251
+ <div class="messages-area" id="chat-box"><div class="message bot">System Online.</div></div>
252
+ <form class="input-form" onsubmit="event.preventDefault(); send();">
253
+ <input type="text" id="inp" placeholder="Type DOI or Journal Name..." autocomplete="off">
254
+ <button onclick="send()">Analyze</button>
255
+ </form>
256
+ </div>
257
+ <div class="brain-panel" id="log-box"><div>TERMINAL LOG...</div></div>
258
+ </div>
259
+ <script>
260
+ async function send(){
261
+ let i = document.getElementById('inp');
262
+ let txt = i.value.trim();
263
+ if(!txt) return;
264
+ addMsg(txt, 'user'); i.value = '';
265
+
266
+ let logsDiv = document.getElementById('log-box');
267
+
268
+ try {
269
+ let r = await fetch('/chat', {
270
+ method: 'POST',
271
+ headers: {'Content-Type': 'application/json'},
272
+ body: JSON.stringify({message: txt})
273
+ });
274
+ let d = await r.json();
275
+
276
+ if(d.logs) {
277
+ d.logs.forEach(l => {
278
+ let div = document.createElement('div');
279
+ div.innerText = l;
280
+ div.style.marginBottom = "5px";
281
+ div.style.borderLeft = "2px solid #555";
282
+ logsDiv.appendChild(div);
283
+ });
284
+ logsDiv.scrollTop = logsDiv.scrollHeight;
285
+ }
286
+ addMsg(d.reply || "Error", 'bot', true);
287
+ } catch(e) { addMsg("Server Error", 'bot'); }
288
+ }
289
+ function addMsg(txt, cls, html=false){
290
+ let d = document.createElement('div');
291
+ d.className = 'message ' + cls;
292
+ if(html) d.innerHTML = marked.parse(txt); else d.innerText = txt;
293
+ document.getElementById('chat-box').appendChild(d);
294
+ }
295
+ </script></body></html>
296
+ """
297
+
298
+ @app.route('/')
299
+ def index(): return render_template_string(CHAT_HTML)
300
+
301
+ @app.route('/chat', methods=['POST'])
302
+ def chat():
303
+ try:
304
+ data = request.json
305
+ reply, logs = run_agent_with_logs(data.get('message', ''))
306
+ return jsonify({"reply": reply, "logs": logs})
307
+ except Exception as e:
308
+ return jsonify({"reply": f"Error: {str(e)}", "logs": []})
309
+
310
+ if __name__ == '__main__':
311
+ # Hugging Face 需要监听 0.0.0.0 和端口 7860
312
+ app.run(host='0.0.0.0', port=7860)