stardust-coder commited on
Commit
0a1e821
ยท
1 Parent(s): 135b830

[mod] codes

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -1
  2. src/streamlit_app.py +514 -53
requirements.txt CHANGED
@@ -2,4 +2,5 @@ altair
2
  pandas
3
  streamlit
4
  requests
5
- beautifulsoup4
 
 
2
  pandas
3
  streamlit
4
  requests
5
+ beautifulsoup4
6
+ openai
src/streamlit_app.py CHANGED
@@ -1,78 +1,539 @@
1
- import json
2
  import requests
3
  import streamlit as st
 
 
4
 
 
 
 
5
 
6
- API_URL = "https://api.semanticscholar.org/graph/v1/paper/search"
 
 
 
 
7
 
8
 
9
- def search_papers(query, limit=10):
10
- params = {
11
- "query": query,
12
- "limit": limit,
13
- "fields": "title,abstract,authors,year,venue,url"
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- response = requests.get(API_URL, params=params, timeout=20)
17
- response.raise_for_status()
 
18
 
19
- data = response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  papers = []
21
 
22
- for item in data.get("data", []):
23
- authors = ", ".join([a.get("name", "") for a in item.get("authors", [])])
24
- paper = {
25
- "title": item.get("title", "ใ‚ฟใ‚คใƒˆใƒซใชใ—"),
26
- "publication_info": f"{authors} / {item.get('venue', 'Unknown Venue')} / {item.get('year', 'Unknown Year')}",
27
- "snippet": item.get("abstract", "ๆฆ‚่ฆใชใ—"),
28
- "url": item.get("url", "")
29
- }
30
- papers.append(paper)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  return papers
33
 
34
 
35
- def main():
36
- st.set_page_config(page_title="Paper Search App", layout="wide")
37
- st.title("่ซ–ๆ–‡ๆคœ็ดขใ‚ขใƒ—ใƒช")
38
- st.write("Semantic Scholar API ใ‚’ไฝฟใฃใฆ่ซ–ๆ–‡ใ‚’ๆคœ็ดขใ—ใพใ™ใ€‚")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- query = st.text_input("ๆคœ็ดขใ‚ญใƒผใƒฏใƒผใƒ‰", value="neuro")
41
- limit = st.slider("ๅ–ๅพ—ไปถๆ•ฐ", min_value=1, max_value=20, value=10)
 
 
42
 
43
- if st.button("ๆคœ็ดข"):
44
- with st.spinner("ๆคœ็ดขไธญ..."):
45
- try:
46
- papers = search_papers(query, limit)
47
 
48
- if not papers:
49
- st.warning("็ตๆžœใŒ่ฆ‹ใคใ‹ใ‚Šใพใ›ใ‚“ใงใ—ใŸใ€‚")
50
- return
51
 
52
- st.success(f"{len(papers)} ไปถๅ–ๅพ—ใ—ใพใ—ใŸใ€‚")
 
 
 
53
 
54
- for i, paper in enumerate(papers, start=1):
55
- st.subheader(f"{i}. {paper['title']}")
56
- st.write(f"**Publication Info:** {paper['publication_info']}")
57
- st.write(f"**Snippet:** {paper['snippet']}")
58
- if paper["url"]:
59
- st.markdown(f"[่ซ–ๆ–‡ใƒšใƒผใ‚ธใ‚’้–‹ใ]({paper['url']})")
60
- st.divider()
61
 
62
- json_data = json.dumps(papers, indent=2, ensure_ascii=False)
63
- st.download_button(
64
- label="JSONใ‚’ใƒ€ใ‚ฆใƒณใƒญใƒผใƒ‰",
65
- data=json_data,
66
- file_name="papers.json",
67
- mime="application/json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  )
69
 
70
- st.json(papers)
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- except requests.exceptions.RequestException as e:
73
- st.error(f"APIใƒชใ‚ฏใ‚จใ‚นใƒˆไธญใซใ‚จใƒฉใƒผใŒ็™บ็”Ÿใ—ใพใ—ใŸ: {e}")
74
- except Exception as e:
75
- st.error(f"ไบˆๆœŸใ—ใชใ„ใ‚จใƒฉใƒผใŒ็™บ็”Ÿใ—ใพใ—ใŸ: {e}")
76
 
 
 
 
 
 
 
 
 
 
77
 
78
- main()
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
  import requests
3
  import streamlit as st
4
+ import xml.etree.ElementTree as ET
5
+ from openai import OpenAI
6
 
7
+ # =========================
8
+ # OpenAI Client
9
+ # =========================
10
 
11
+ def get_openai_client():
12
+ api_key = st.session_state.get("OPENAI_API_KEY", "")
13
+ if not api_key:
14
+ raise ValueError("OpenAI API Key ใŒๆœช่จญๅฎšใงใ™ใ€‚")
15
+ return OpenAI(api_key=api_key)
16
 
17
 
18
+ def ask_llm(prompt, model="gpt-4.1-mini"):
19
+ client = get_openai_client()
20
+ res = client.chat.completions.create(
21
+ model=model,
22
+ messages=[{"role": "user", "content": prompt}],
23
+ temperature=0.2,
24
+ )
25
+ return (res.choices[0].message.content or "").strip()
26
+
27
+
28
+ # =========================
29
+ # Utility
30
+ # =========================
31
+
32
+ def normalize_title(title: str) -> str:
33
+ return " ".join((title or "").lower().strip().split())
34
+
35
+
36
+ def normalize_text(text: str) -> str:
37
+ return " ".join((text or "").strip().split())
38
+
39
 
40
+ def deduplicate_papers(papers):
41
+ seen = set()
42
+ unique = []
43
 
44
+ for p in papers:
45
+ title = normalize_title(p.get("title", ""))
46
+ if not title:
47
+ continue
48
+
49
+ authors = p.get("authors", []) or []
50
+ first_author = authors[0].lower().strip() if authors else ""
51
+ key = (title, first_author)
52
+
53
+ if key not in seen:
54
+ seen.add(key)
55
+ unique.append(p)
56
+
57
+ return unique
58
+
59
+
60
+ # =========================
61
+ # arXiv Search
62
+ # =========================
63
+
64
+ def parse_arxiv_response(xml_text):
65
+ root = ET.fromstring(xml_text)
66
  papers = []
67
 
68
+ for entry in root.findall("{http://www.w3.org/2005/Atom}entry"):
69
+ title_el = entry.find("{http://www.w3.org/2005/Atom}title")
70
+ abstract_el = entry.find("{http://www.w3.org/2005/Atom}summary")
71
+ date_el = entry.find("{http://www.w3.org/2005/Atom}published")
72
+
73
+ authors = []
74
+ for a in entry.findall("{http://www.w3.org/2005/Atom}author"):
75
+ name_el = a.find("{http://www.w3.org/2005/Atom}name")
76
+ if name_el is not None and name_el.text:
77
+ authors.append(name_el.text.strip())
78
+
79
+ title = title_el.text.strip() if title_el is not None and title_el.text else ""
80
+ abstract = abstract_el.text.strip() if abstract_el is not None and abstract_el.text else ""
81
+ date = date_el.text.strip() if date_el is not None and date_el.text else ""
82
+
83
+ if title:
84
+ papers.append(
85
+ {
86
+ "title": title,
87
+ "authors": authors,
88
+ "abstract": abstract,
89
+ "date": date,
90
+ "source": "arXiv",
91
+ "venue": "",
92
+ "url": "",
93
+ }
94
+ )
95
 
96
  return papers
97
 
98
 
99
+ def search_arxiv_once(search_query, max_results=3):
100
+ url = "https://export.arxiv.org/api/query"
101
+ params = {
102
+ "search_query": search_query,
103
+ "start": 0,
104
+ "max_results": max_results,
105
+ "sortBy": "relevance",
106
+ "sortOrder": "descending",
107
+ }
108
+
109
+ res = requests.get(
110
+ url,
111
+ params=params,
112
+ timeout=30,
113
+ headers={"User-Agent": "paper-finder/0.1"},
114
+ )
115
+ res.raise_for_status()
116
+ return parse_arxiv_response(res.text)
117
+
118
+
119
+ def search_arxiv(query, max_results=3, debug=False):
120
+ query = normalize_text(query)
121
+ if not query:
122
+ return []
123
+
124
+ terms = [t for t in re.split(r"\s+", query) if t]
125
+ strategies = []
126
+
127
+ # ็ทฉใ„้ †ใซ่ฉฆใ™
128
+ strategies.append(f'all:{query}')
129
+ strategies.append(f'all:"{query}"')
130
+ strategies.append(f'ti:"{query}"')
131
+
132
+ if terms:
133
+ strategies.append(" AND ".join([f'all:{t}' for t in terms]))
134
+
135
+ seen = set()
136
+ all_papers = []
137
+
138
+ for s in strategies:
139
+ try:
140
+ if debug:
141
+ st.write("arXiv API query:", s)
142
+
143
+ papers = search_arxiv_once(s, max_results=max_results)
144
+
145
+ for p in papers:
146
+ key = normalize_title(p["title"])
147
+ if key not in seen:
148
+ seen.add(key)
149
+ all_papers.append(p)
150
+
151
+ if len(all_papers) >= max_results:
152
+ return all_papers[:max_results]
153
+
154
+ except Exception as e:
155
+ if debug:
156
+ st.warning(f"arXiv query failed: {s} / {e}")
157
+
158
+ return all_papers[:max_results]
159
+
160
+
161
+ # =========================
162
+ # OpenAlex Search
163
+ # =========================
164
+
165
+ def reconstruct_abstract(inv_index):
166
+ if not inv_index:
167
+ return ""
168
+
169
+ words = []
170
+ for word, pos_list in inv_index.items():
171
+ for pos in pos_list:
172
+ words.append((pos, word))
173
+
174
+ words.sort(key=lambda x: x[0])
175
+ return " ".join(w for _, w in words)
176
+
177
+
178
+ def extract_openalex_venue(item):
179
+ primary_location = item.get("primary_location") or {}
180
+ source = primary_location.get("source") or {}
181
+ venue = source.get("display_name", "") or ""
182
+
183
+ if not venue:
184
+ locations = item.get("locations") or []
185
+ for loc in locations:
186
+ src = (loc or {}).get("source") or {}
187
+ venue = src.get("display_name", "") or ""
188
+ if venue:
189
+ break
190
+
191
+ if not venue:
192
+ host_venue = item.get("host_venue") or {}
193
+ venue = host_venue.get("display_name", "") or ""
194
+
195
+ return venue
196
+
197
+
198
+ def search_openalex(query, venues, max_results=3, debug=False):
199
+ query = normalize_text(query)
200
+ if not query or not venues:
201
+ return []
202
+
203
+ url = "https://api.openalex.org/works"
204
+ params = {
205
+ "search": query,
206
+ "per-page": 50,
207
+ }
208
+
209
+ try:
210
+ res = requests.get(
211
+ url,
212
+ params=params,
213
+ timeout=30,
214
+ headers={"User-Agent": "paper-finder/0.1"},
215
+ )
216
+ res.raise_for_status()
217
+ data = res.json()
218
+
219
+ papers = []
220
+
221
+ for item in data.get("results", []):
222
+ venue = extract_openalex_venue(item)
223
+
224
+ if not any(v.lower() in venue.lower() for v in venues):
225
+ continue
226
+
227
+ authors = []
228
+ for a in item.get("authorships", []):
229
+ author = a.get("author") or {}
230
+ name = author.get("display_name")
231
+ if name:
232
+ authors.append(name)
233
+
234
+ abstract = item.get("abstract_inverted_index")
235
+ if isinstance(abstract, dict):
236
+ abstract = reconstruct_abstract(abstract)
237
+ elif not isinstance(abstract, str):
238
+ abstract = ""
239
+
240
+ papers.append(
241
+ {
242
+ "title": item.get("title", "") or "",
243
+ "authors": authors,
244
+ "abstract": abstract,
245
+ "date": item.get("publication_date", "") or "",
246
+ "source": "OpenAlex",
247
+ "venue": venue,
248
+ "url": item.get("id", "") or "",
249
+ }
250
+ )
251
+
252
+ if len(papers) >= max_results:
253
+ break
254
+
255
+ if debug:
256
+ st.write("OpenAlex matched papers:", len(papers))
257
+
258
+ return papers
259
 
260
+ except Exception as e:
261
+ if debug:
262
+ st.warning(f"OpenAlex search failed: {e}")
263
+ return []
264
 
 
 
 
 
265
 
266
+ # =========================
267
+ # LLM Utilities
268
+ # =========================
269
 
270
+ def normalize_keyword_for_search(keyword, model):
271
+ prompt = f"""
272
+ ใ‚ใชใŸใฏๅญฆ่ก“่ซ–ๆ–‡ๆคœ็ดขใ‚ขใ‚ทใ‚นใ‚ฟใƒณใƒˆใงใ™ใ€‚
273
+ ไปฅไธ‹ใฎใƒฆใƒผใ‚ถใƒผๅ…ฅๅŠ›ใ‚’ใ€arXivใ‚„OpenAlexใงๆคœ็ดขใ—ใ‚„ใ™ใ„่‹ฑ่ชžใฎ็Ÿญใ„ๆคœ็ดขใ‚ฏใ‚จใƒชใซๅค‰ๆ›ใ—ใฆใใ ใ•ใ„ใ€‚
274
 
275
+ ใƒซใƒผใƒซ:
276
+ - ๅ‡บๅŠ›ใฏ่‹ฑ่ชžใฎๆคœ็ดขใ‚ฏใ‚จใƒช1ใคใ ใ‘
277
+ - ไฝ™่จˆใช่ชฌๆ˜Žใฏไธ่ฆ
278
+ - ๆ—ฅๆœฌ่ชžๅ…ฅๅŠ›ใชใ‚‰่‡ช็„ถใช่‹ฑ่ชžใฎ็ ”็ฉถใ‚ญใƒผใƒฏใƒผใƒ‰ใธๅค‰ๆ›
279
+ - ่‹ฑ่ชžๅ…ฅๅŠ›ใชใ‚‰ๆ„ๅ‘ณใ‚’ไฟใฃใฆ็ฐกๆฝ”ใซๆ•ดๅฝข
280
+ - 2่ชžใ‹ใ‚‰8่ชž็จ‹ๅบฆใŒๆœ›ใพใ—ใ„
281
+ - ไธ่ฆใช่จ˜ๅทใฏๅ…ฅใ‚Œใชใ„
282
 
283
+ input: {keyword}
284
+ """
285
+ return normalize_text(ask_llm(prompt, model))
286
+
287
+
288
+ def paraphrase_query(keyword, model):
289
+ prompt = f"""
290
+ ๆฌกใฎ็ ”็ฉถใƒˆใƒ”ใƒƒใ‚ฏใ‚’ใ€่‹ฑ่ชžใฎ่ซ–ๆ–‡ๆคœ็ดขใ‚ฏใ‚จใƒชใจใ—ใฆ่จ€ใ„ๆ›ใˆใฆใใ ใ•ใ„ใ€‚
291
+ ๅ‡บๅŠ›ใฏ็Ÿญใ„่‹ฑ่ชžใ‚ฏใ‚จใƒชใ‚’1ใคใ ใ‘ใซใ—ใฆใใ ใ•ใ„ใ€‚
292
+ ่ชฌๆ˜Žใฏไธ่ฆใงใ™ใ€‚
293
+
294
+ topic: {keyword}
295
+ """
296
+ return normalize_text(ask_llm(prompt, model))
297
+
298
+
299
+ def classify_field(keyword, model):
300
+ prompt = f"""
301
+ ๆฌกใฎ็ ”็ฉถใƒˆใƒ”ใƒƒใ‚ฏใŒไธปใซๅฑžใ™ใ‚‹ๅˆ†้‡Žใ‚’ใ€ไปฅไธ‹ใ‹ใ‚‰1ใคใ ใ‘้ธใ‚“ใงใใ ใ•ใ„ใ€‚
302
+
303
+ ๅ€™่ฃœ:
304
+ ML
305
+ NLP
306
+ CV
307
+ OTHER
308
+
309
+ ็ ”็ฉถใƒˆใƒ”ใƒƒใ‚ฏ:
310
+ {keyword}
311
+
312
+ ๅˆคๅฎšใƒซใƒผใƒซ:
313
+ - ๆฉŸๆขฐๅญฆ็ฟ’ๅ…จ่ˆฌใ€ๆœ€้ฉๅŒ–ใ€่กจ็พๅญฆ็ฟ’ใ€ๅผทๅŒ–ๅญฆ็ฟ’ใ€็”Ÿๆˆใƒขใƒ‡ใƒซใชใฉใฏ ML
314
+ - ่‡ช็„ถ่จ€่ชžๅ‡ฆ็†ใ€ๅฏพ่ฉฑใ€็ฟป่จณใ€่ฆ็ด„ใ€LLMใ€RAG ใชใฉใฏ NLP
315
+ - ็”ปๅƒใ€ๅ‹•็”ปใ€็‰ฉไฝ“ๆคœๅ‡บใ€ใ‚ปใ‚ฐใƒกใƒณใƒ†ใƒผใ‚ทใƒงใƒณใ€3D vision ใชใฉใฏ CV
316
+ - ไธŠ่จ˜ใซๆ˜Ž็ขบใซๅฝ“ใฆใฏใพใ‚‰ใชใ‘ใ‚Œใฐ OTHER
317
+
318
+ ๅ‡บๅŠ›ใฏใƒฉใƒ™ใƒซ1ใคใ ใ‘ใซใ—ใฆใใ ใ•ใ„ใ€‚
319
+ """
320
+ return ask_llm(prompt, model).strip().upper()
321
+
322
+
323
+ def summarize_paper(title, abstract, model, venue=""):
324
+ prompt = f"""
325
+ ๆฌกใฎ่ซ–ๆ–‡ใ‚’็ฐกๆฝ”ใซๆ—ฅๆœฌ่ชžใง่งฃ่ชฌใ—ใฆใใ ใ•ใ„ใ€‚
326
+
327
+ Title:
328
+ {title}
329
+
330
+ Venue:
331
+ {venue}
332
+
333
+ Abstract:
334
+ {abstract}
335
+
336
+ ๅ‡บๅŠ›ๅฝขๅผ:
337
+ - ่ฆ็ด„
338
+ - ไฝ•ใŒๆ–ฐใ—ใ„ใ‹
339
+ - ใฉใ‚“ใชไบบใซใŠใ™ใ™ใ‚ใ‹
340
+ """
341
+ return ask_llm(prompt, model)
342
+
343
+
344
+ def select_best_papers(papers, keyword, model, top_k=3):
345
+ if not papers:
346
+ return []
347
+
348
+ if len(papers) <= top_k:
349
+ return papers[:top_k]
350
+
351
+ text = ""
352
+ for i, p in enumerate(papers):
353
+ text += f"""
354
+ Paper {i}
355
+ Title: {p.get("title", "")}
356
+ Venue: {p.get("venue", "")}
357
+ Abstract: {p.get("abstract", "")}
358
+ """
359
+
360
+ prompt = f"""
361
+ ๆฌกใฎ่ซ–ๆ–‡ใƒชใ‚นใƒˆใ‹ใ‚‰ใ€็ ”็ฉถใƒˆใƒ”ใƒƒใ‚ฏใ€Œ{keyword}ใ€ใซๆœ€ใ‚‚้–ข้€ฃใŒใ‚ใ‚Š้‡่ฆๅบฆใŒ้ซ˜ใ„่ซ–ๆ–‡ใ‚’ {top_k} ๆœฌ้ธใ‚“ใงใใ ใ•ใ„ใ€‚
362
+ ๅฟ…ใš็•ฐใชใ‚‹่ซ–ๆ–‡ใ‚’้ธใ‚“ใงใใ ใ•ใ„ใ€‚
363
+
364
+ {text}
365
+
366
+ ๅ‡บๅŠ›ๅฝขๅผ:
367
+ 0,2,5
368
+ """
369
+
370
+ try:
371
+ res = ask_llm(prompt, model)
372
+ ids = []
373
+ for x in res.split(","):
374
+ x = x.strip()
375
+ if x.isdigit():
376
+ ids.append(int(x))
377
+
378
+ ids = list(dict.fromkeys(ids))
379
+
380
+ results = []
381
+ seen_titles = set()
382
+
383
+ for i in ids:
384
+ if 0 <= i < len(papers):
385
+ title_key = normalize_title(papers[i].get("title", ""))
386
+ if title_key and title_key not in seen_titles:
387
+ results.append(papers[i])
388
+ seen_titles.add(title_key)
389
+
390
+ if len(results) >= top_k:
391
+ break
392
+
393
+ if results:
394
+ return results[:top_k]
395
+
396
+ except Exception:
397
+ pass
398
+
399
+ return papers[:top_k]
400
+
401
+
402
+ # =========================
403
+ # Streamlit UI
404
+ # =========================
405
+
406
+ st.set_page_config(page_title="Paper Finder", layout="wide")
407
+ st.title("๐Ÿ“š Paper Finder")
408
+
409
+ st.sidebar.header("Settings")
410
+
411
+ openai_api_key = st.sidebar.text_input("OpenAI API Key", type="password")
412
+ if openai_api_key:
413
+ st.session_state["OPENAI_API_KEY"] = openai_api_key
414
+
415
+ model = st.sidebar.selectbox(
416
+ "Model",
417
+ ["gpt-4.1-mini", "gpt-4.1", "gpt-4o-mini"],
418
+ index=0,
419
+ )
420
+
421
+ debug_mode = st.sidebar.checkbox("Debug mode", value=True)
422
+
423
+ keyword = st.text_input("Research Keyword")
424
+
425
+ if st.button("Search Papers"):
426
+ if not st.session_state.get("OPENAI_API_KEY"):
427
+ st.error("OpenAI API Key ใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚")
428
+ st.stop()
429
+
430
+ if not keyword.strip():
431
+ st.warning("Research Keyword ใ‚’ๅ…ฅๅŠ›ใ—ใฆใใ ใ•ใ„ใ€‚")
432
+ st.stop()
433
+
434
+ paper_list = []
435
+
436
+ st.write("### Step0 Query Normalization")
437
+ try:
438
+ normalized_keyword = normalize_keyword_for_search(keyword, model)
439
+ except Exception as e:
440
+ st.error(f"ๆคœ็ดขใ‚ฏใ‚จใƒชๆญฃ่ฆๅŒ–ใซๅคฑๆ•—ใ—ใพใ—ใŸ: {e}")
441
+ st.stop()
442
+
443
+ st.write("**Input keyword:**", keyword)
444
+ st.write("**Normalized English query:**", normalized_keyword)
445
+
446
+ st.write("### Step1 arXiv search")
447
+ papers_step1 = search_arxiv(normalized_keyword, max_results=10, debug=debug_mode)
448
+ paper_list.extend(papers_step1)
449
+ st.write(f"found {len(papers_step1)} papers")
450
+
451
+ st.write("### Step2 Query Paraphrase")
452
+ try:
453
+ paraphrased = paraphrase_query(normalized_keyword, model)
454
+ except Exception as e:
455
+ paraphrased = normalized_keyword
456
+ if debug_mode:
457
+ st.warning(f"Query paraphrase failed: {e}")
458
+
459
+ st.write("**Paraphrased query:**", paraphrased)
460
+
461
+ papers_step2 = search_arxiv(paraphrased, max_results=10, debug=debug_mode)
462
+ paper_list.extend(papers_step2)
463
+ st.write(f"found {len(papers_step2)} papers")
464
+
465
+ st.write("### Step3 Field Classification")
466
+ try:
467
+ field = classify_field(keyword, model)
468
+ except Exception as e:
469
+ field = "OTHER"
470
+ if debug_mode:
471
+ st.warning(f"Field classification failed: {e}")
472
+
473
+ st.write("**field:**", field)
474
+
475
+ if field == "ML":
476
+ venues = ["ICML", "ICLR", "NeurIPS"]
477
+ elif field == "NLP":
478
+ venues = ["ACL", "EMNLP", "NAACL", "AACL"]
479
+ elif field == "CV":
480
+ venues = ["CVPR", "ICCV", "ECCV", "SIGGRAPH"]
481
+ else:
482
+ venues = []
483
+
484
+ papers_step3 = []
485
+ if venues:
486
+ st.write("### Step4 Top-conference Search")
487
+ papers_step3 = search_openalex(normalized_keyword, venues, max_results=10, debug=debug_mode)
488
+ paper_list.extend(papers_step3)
489
+ st.write(f"found {len(papers_step3)} papers")
490
+
491
+ paper_list = deduplicate_papers(paper_list)
492
+
493
+ st.write("### Total candidate papers:", len(paper_list))
494
+
495
+ if debug_mode and paper_list:
496
+ with st.expander("Candidate Papers"):
497
+ for i, p in enumerate(paper_list):
498
+ st.write(
499
+ f"{i}. {p.get('title', '')} | venue={p.get('venue', '') or '-'} | source={p.get('source', '')}"
500
  )
501
 
502
+ if not paper_list:
503
+ st.error("่ซ–ๆ–‡ใŒ่ฆ‹ใคใ‹ใ‚Šใพใ›ใ‚“ใงใ—ใŸใ€‚ใ‚ˆใ‚Šไธ€่ˆฌ็š„ใช่กจ็พใ‚„ๅˆฅใฎใ‚ญใƒผใƒฏใƒผใƒ‰ใง่ฉฆใ—ใฆใใ ใ•ใ„ใ€‚")
504
+ st.stop()
505
+
506
+ st.write("### Selecting best papers")
507
+ best = select_best_papers(paper_list, keyword, model, top_k=3)
508
+
509
+ if not best:
510
+ st.warning("ๆŽจ่–ฆ่ซ–ๆ–‡ใฎ้ธๅฎšใซๅคฑๆ•—ใ—ใŸใŸใ‚ใ€ๅ€™่ฃœ่ซ–ๆ–‡ใ‚’ใใฎใพใพ่กจ็คบใ—ใพใ™ใ€‚")
511
+ best = paper_list[:3]
512
+
513
+ st.write("## Recommended Papers")
514
 
515
+ for p in best:
516
+ abstract = p.get("abstract", "") or ""
517
+ venue = p.get("venue", "") or "-"
 
518
 
519
+ try:
520
+ summary = summarize_paper(
521
+ title=p.get("title", ""),
522
+ abstract=abstract,
523
+ model=model,
524
+ venue=venue,
525
+ ) if abstract else "ใ‚ขใƒ–ใ‚นใƒˆใƒฉใ‚ฏใƒˆใŒๅ–ๅพ—ใงใใชใ‹ใฃใŸใŸใ‚ใ€่ฆ็ด„ใ‚’็”Ÿๆˆใงใใพใ›ใ‚“ใงใ—ใŸใ€‚"
526
+ except Exception as e:
527
+ summary = f"่ฆ็ด„็”Ÿๆˆใซๅคฑๆ•—ใ—ใพใ—ใŸ: {e}"
528
 
529
+ st.markdown("---")
530
+ st.subheader(p.get("title", "Untitled"))
531
+ st.write("**Explanation:**")
532
+ st.write(summary)
533
+ st.write("**Authors:**", ", ".join(p.get("authors", [])) if p.get("authors") else "-")
534
+ st.write("**Date:**", p.get("date", "") or "-")
535
+ st.write("**Source:**", p.get("source", "") or "-")
536
+ st.write("**Venue:**", venue)
537
+ st.write("**Abstract:**")
538
+ st.write(abstract if abstract else "ใ‚ขใƒ–ใ‚นใƒˆใƒฉใ‚ฏใƒˆใชใ—")
539
+