openfree commited on
Commit
34656b2
·
verified ·
1 Parent(s): e7ce71c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -473
app.py CHANGED
@@ -893,480 +893,8 @@ def quick_score(text):
893
  hum=analyze_humanizer(text,sents,words,morphs)
894
  fs,v,lv=compute_verdict(sc, sent_avg=sent_avg, ppx_score=ppx["score"], hum_score=hum["score"])
895
  return fs,v,lv,sc,ppx,hum
896
- def brave_search(query, count=5):
897
- """Brave Search API — 단일 쿼리"""
898
- if not BRAVE_KEY: return []
899
- url = f"https://api.search.brave.com/res/v1/web/search?q={query}&count={count}"
900
- try:
901
- if HAS_HTTPX:
902
- r = httpx.get(url, headers={"X-Subscription-Token": BRAVE_KEY, "Accept": "application/json"}, timeout=10)
903
- if r.status_code == 200:
904
- data = r.json()
905
- results = []
906
- for item in data.get("web", {}).get("results", []):
907
- results.append({"title": item.get("title",""), "url": item.get("url",""), "snippet": item.get("description",""), "source": "Brave"})
908
- return results
909
- except: pass
910
- return []
911
- def search_kci(query):
912
- """KCI(한국학술지인용색인) 검색"""
913
- try:
914
- url = f"https://open.kci.go.kr/po/openapi/openApiSearch.kci?apiCode=articleSearch&title={query}&displayCount=3"
915
- resp = http_get(url, timeout=8)
916
- if resp:
917
- results = []
918
- for m in re.finditer(r'<article-title><!\[CDATA\[(.+?)\]\]></article-title>.*?<url><!\[CDATA\[(.+?)\]\]></url>', resp, re.S):
919
- results.append({"title": m.group(1), "url": m.group(2), "snippet": "", "source": "KCI"})
920
- return results[:3]
921
- except: pass
922
- return []
923
- def search_riss(query):
924
- """RISS(학술연구정보서비스) — 간접 검색"""
925
- results = []
926
- try:
927
- url = f"http://www.riss.kr/search/Search.do?isDetailSearch=N&searchGubun=true&viewYn=OP&queryText=&strQuery={query}&iStartCount=0&iGroupView=5&icate=all"
928
- resp = http_get(url, timeout=8)
929
- if resp:
930
- for m in re.finditer(r'class="title"[^>]*>.*?<a[^>]*href="([^"]+)"[^>]*>(.*?)</a>', resp, re.S):
931
- title = re.sub(r'<[^>]+>', '', m.group(2)).strip()
932
- if title:
933
- results.append({"title": title, "url": "https://www.riss.kr" + m.group(1), "snippet": "", "source": "RISS"})
934
- except: pass
935
- return results[:3]
936
- def search_arxiv(query):
937
- """arXiv API 검색"""
938
- results = []
939
- try:
940
- import urllib.parse
941
- q = urllib.parse.quote(query)
942
- url = f"https://export.arxiv.org/api/query?search_query=all:{q}&start=0&max_results=3&sortBy=relevance"
943
- resp = http_get(url, timeout=12)
944
- if resp:
945
- for m in re.finditer(r'<entry>.*?<title>(.*?)</title>.*?<id>(.*?)</id>.*?<summary>(.*?)</summary>', resp, re.S):
946
- title = re.sub(r'\s+', ' ', m.group(1)).strip()
947
- results.append({"title": title, "url": m.group(2).strip(), "snippet": re.sub(r'\s+', ' ', m.group(3)).strip()[:150], "source": "arXiv"})
948
- except Exception as e:
949
- pass
950
- return results[:3]
951
- def gemini_plagiarism_check(text_chunk):
952
- """Gemini + Google Search Grounding으로 표절 검사"""
953
- if not HAS_GENAI or not GEMINI_KEY: return None
954
- try:
955
- client = genai.Client(api_key=GEMINI_KEY)
956
- tool = gtypes.Tool(googleSearch=gtypes.GoogleSearch())
957
- prompt = f"""다음 텍스트가 인터넷에 존재하는지 Google Search로 확인하세요.
958
- 유사한 문장이 발견되면 출처 URL과 유사도(%)를 보고하세요.
959
- 마지막 줄에 "유사도: XX%" 형식으로 작성.
960
- [텍스트]
961
- {text_chunk[:1000]}"""
962
- resp = client.models.generate_content(
963
- model="gemini-flash-lite-latest",
964
- contents=prompt,
965
- config=gtypes.GenerateContentConfig(tools=[tool], temperature=0.1, max_output_tokens=600)
966
- )
967
- text_resp = resp.text if resp.text else ""
968
- sources = []
969
- if hasattr(resp, 'candidates') and resp.candidates:
970
- gc = resp.candidates[0].grounding_metadata
971
- if gc and hasattr(gc, 'grounding_chunks'):
972
- for chunk in gc.grounding_chunks:
973
- if hasattr(chunk, 'web') and chunk.web:
974
- sources.append({"title": chunk.web.title or "", "url": chunk.web.uri or "", "source": "Google"})
975
- pm = re.search(r'유사도[:\s]*(\d+)', text_resp)
976
- pct = int(pm.group(1)) if pm else 0
977
- return {"pct": pct, "response": text_resp, "sources": sources}
978
- except Exception as e:
979
- return {"pct": 0, "response": str(e)[:100], "sources": []}
980
- def parallel_brave_search(queries, max_workers=10):
981
- """Brave Search 병렬 실행 (최대 20개)"""
982
- all_results = {}
983
- with ThreadPoolExecutor(max_workers=min(max_workers, 20)) as executor:
984
- futures = {executor.submit(brave_search, q, 3): q for q in queries}
985
- for future in as_completed(futures):
986
- q = futures[future]
987
- try:
988
- results = future.result()
989
- all_results[q] = results
990
- except: all_results[q] = []
991
- return all_results
992
- def duckduckgo_search(query, max_results=5):
993
- """DuckDuckGo HTML 스크래핑 — API 키 불필요 폴백"""
994
- results = []
995
- try:
996
- import urllib.parse
997
- q = urllib.parse.quote(query)
998
- url = f"https://html.duckduckgo.com/html/?q={q}"
999
- headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
1000
- resp = http_get(url, headers=headers, timeout=10)
1001
- if resp:
1002
- for m in re.finditer(r'<a[^>]+class="result__a"[^>]+href="([^"]+)"[^>]*>(.*?)</a>.*?<a[^>]+class="result__snippet"[^>]*>(.*?)</a>', resp, re.S):
1003
- href = m.group(1)
1004
- title = re.sub(r'<[^>]+>', '', m.group(2)).strip()
1005
- snippet = re.sub(r'<[^>]+>', '', m.group(3)).strip()
1006
- real_url = href
1007
- if 'uddg=' in href:
1008
- um = re.search(r'uddg=([^&]+)', href)
1009
- if um: real_url = urllib.parse.unquote(um.group(1))
1010
- if title:
1011
- results.append({"title": title, "url": real_url, "snippet": snippet, "source": "Web"})
1012
- if len(results) >= max_results: break
1013
- except: pass
1014
- return results
1015
- def self_crawl_search(query, max_results=3):
1016
- """httpx 기반 자체 크롤링 (DuckDuckGo + 학술 사이트)"""
1017
- all_results = []
1018
- all_results.extend(duckduckgo_search(query, max_results))
1019
- if '논문' not in query and 'paper' not in query.lower():
1020
- all_results.extend(duckduckgo_search(f"{query} 논문 학술", 2))
1021
- return all_results
1022
- def _extract_key_phrases(text, max_phrases=6):
1023
- """텍스트에서 핵심 검색 구문 추출 (Brave/학술 검색용)"""
1024
- sents = split_sentences(text)
1025
- phrases = []
1026
- # 긴 문장 우선 (정보량 많은 문장)
1027
- ranked = sorted(sents, key=lambda s: len(s), reverse=True)
1028
- for s in ranked:
1029
- # 15~80자 사이 문장만 검색 쿼리로 적합
1030
- if 15 <= len(s) <= 80:
1031
- phrases.append(s)
1032
- elif len(s) > 80:
1033
- phrases.append(s[:80])
1034
- if len(phrases) >= max_phrases:
1035
- break
1036
- # 부족하면 앞부분에서 보충
1037
- if len(phrases) < 2 and sents:
1038
- phrases.append(sents[0][:80])
1039
- return phrases
1040
-
1041
-
1042
- def run_plagiarism(text, progress=gr.Progress()):
1043
- """표절 검사 — 3단계 파이프라인: ① Gemini Google Search ② Brave 웹검색 ③ 학술DB"""
1044
- if not text or len(text.strip()) < 50:
1045
- return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자 이상</div>", ""
1046
-
1047
- text = text.strip()
1048
- now = datetime.now().strftime("%Y-%m-%d %H:%M")
1049
- doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
1050
- log_lines = []
1051
-
1052
- gemini_pct = 0
1053
- gemini_sources = []
1054
- gemini_response = ""
1055
- brave_sources = []
1056
- academic_sources = []
1057
-
1058
- # ═══════════════════════════════════════
1059
- # PHASE 1: Gemini Google Search Grounding
1060
- # ═══════════════════════════════════════
1061
- if HAS_GENAI and GEMINI_KEY:
1062
- progress(0.10, "① Gemini Google Search Grounding...")
1063
- try:
1064
- client = genai.Client(api_key=GEMINI_KEY)
1065
-
1066
- prompt = f"""당신은 표절 검사 전문가입니다. 아래 텍스트가 인터넷에 이미 존재하는 내용인지 Google Search로 철저히 검색하세요.
1067
-
1068
- [검사 대상 텍스트]
1069
- {text[:3000]}
1070
-
1071
- [응답 형식]
1072
- 1. 발견된 유사 콘텐츠를 각각 "제목 | URL | 유사도(높음/중간/낮음)" 형식으로 나열
1073
- 2. 발견 못하면 "유사 콘텐츠 없음"
1074
- 3. 마지막 줄에 반드시 "표절율: XX%" 형식으로 종합 판정"""
1075
-
1076
- contents = [
1077
- gtypes.Content(
1078
- role="user",
1079
- parts=[gtypes.Part.from_text(text=prompt)],
1080
- )
1081
- ]
1082
- tools = [gtypes.Tool(googleSearch=gtypes.GoogleSearch())]
1083
- generate_content_config = gtypes.GenerateContentConfig(
1084
- thinking_config=gtypes.ThinkingConfig(thinking_budget=0),
1085
- tools=tools,
1086
- temperature=0.1,
1087
- max_output_tokens=4000,
1088
- )
1089
-
1090
- progress(0.20, "① Google Search 실행 중...")
1091
-
1092
- # 스트리밍 응답 수집
1093
- full_response = ""
1094
- for chunk in client.models.generate_content_stream(
1095
- model="gemini-flash-lite-latest",
1096
- contents=contents,
1097
- config=generate_content_config,
1098
- ):
1099
- if chunk.text:
1100
- full_response += chunk.text
1101
- gemini_response = full_response
1102
-
1103
- # ✅ 표절율 추출
1104
- pm = re.search(r'표절율[:\s]*(\d+)', full_response)
1105
- if pm:
1106
- gemini_pct = int(pm.group(1))
1107
-
1108
- # ✅ Grounding Metadata에서 출처 추출 (비스트리밍 재호출)
1109
- progress(0.28, "① 출처 메타데이터 추출...")
1110
- try:
1111
- resp_full = client.models.generate_content(
1112
- model="gemini-flash-lite-latest",
1113
- contents=prompt,
1114
- config=gtypes.GenerateContentConfig(
1115
- tools=[gtypes.Tool(googleSearch=gtypes.GoogleSearch())],
1116
- temperature=0.1,
1117
- max_output_tokens=2000,
1118
- )
1119
- )
1120
- # grounding_metadata에서 실제 검색 출처 추출
1121
- if hasattr(resp_full, 'candidates') and resp_full.candidates:
1122
- cand = resp_full.candidates[0]
1123
- gm = getattr(cand, 'grounding_metadata', None)
1124
- if gm:
1125
- chunks = getattr(gm, 'grounding_chunks', None) or []
1126
- for gc in chunks:
1127
- web = getattr(gc, 'web', None)
1128
- if web:
1129
- title = getattr(web, 'title', '') or ''
1130
- uri = getattr(web, 'uri', '') or ''
1131
- if uri:
1132
- gemini_sources.append({"title": title, "url": uri, "source": "Google", "snippet": ""})
1133
- # support_chunks도 확인
1134
- supports = getattr(gm, 'grounding_supports', None) or []
1135
- for sup in supports:
1136
- seg = getattr(sup, 'segment', None)
1137
- snippet_text = getattr(seg, 'text', '') if seg else ''
1138
- idxs = getattr(sup, 'grounding_chunk_indices', []) or []
1139
- # snippet을 해당 source에 매핑
1140
- for idx in idxs:
1141
- if idx < len(gemini_sources) and snippet_text:
1142
- gemini_sources[idx]["snippet"] = snippet_text[:120]
1143
- except Exception as e2:
1144
- print(f"Gemini 메타데이터 추출 오류: {e2}")
1145
-
1146
- # 텍스트 응답에서 추가 URL 추출 (grounding에 없는 것만)
1147
- existing_urls = {s["url"] for s in gemini_sources}
1148
- for m in re.finditer(r'https?://[^\s\)\]\,\"\']{10,}', full_response):
1149
- url = m.group(0).rstrip('.')
1150
- if url not in existing_urls:
1151
- domain = url.split('/')[2] if len(url.split('/')) > 2 else url
1152
- gemini_sources.append({"title": domain, "url": url, "source": "Google", "snippet": ""})
1153
- existing_urls.add(url)
1154
-
1155
- log_lines.append(f"[Gemini] 표절율={gemini_pct}%, 출처={len(gemini_sources)}건")
1156
-
1157
- except Exception as e:
1158
- log_lines.append(f"[Gemini] 오류: {str(e)[:100]}")
1159
- print(f"Gemini 오류: {str(e)}")
1160
- else:
1161
- log_lines.append("[Gemini] API 키 없음 — 건너뜀")
1162
-
1163
- # ═══════════════════════════════════════
1164
- # PHASE 2: Brave Search 병렬 웹 검색
1165
- # ═══════════════════════════════════════
1166
- progress(0.40, "② Brave Search 웹 검색...")
1167
- key_phrases = _extract_key_phrases(text, max_phrases=6)
1168
-
1169
- if BRAVE_KEY and key_phrases:
1170
- try:
1171
- brave_results = parallel_brave_search(key_phrases, max_workers=10)
1172
- seen_urls = {s["url"] for s in gemini_sources}
1173
- for query, results in brave_results.items():
1174
- for r in results:
1175
- url = r.get("url", "")
1176
- if url and url not in seen_urls:
1177
- brave_sources.append({
1178
- "title": r.get("title", "")[:80],
1179
- "url": url,
1180
- "source": "Brave",
1181
- "snippet": r.get("snippet", "")[:120],
1182
- })
1183
- seen_urls.add(url)
1184
- log_lines.append(f"[Brave] 쿼리={len(key_phrases)}개, 출처={len(brave_sources)}건")
1185
- except Exception as e:
1186
- log_lines.append(f"[Brave] 오류: {str(e)[:80]}")
1187
- elif not BRAVE_KEY:
1188
- # Brave 키 없으면 DuckDuckGo 폴백
1189
- try:
1190
- seen_urls = {s["url"] for s in gemini_sources}
1191
- for phrase in key_phrases[:3]:
1192
- for r in duckduckgo_search(phrase, max_results=3):
1193
- url = r.get("url", "")
1194
- if url and url not in seen_urls:
1195
- brave_sources.append({
1196
- "title": r.get("title", "")[:80],
1197
- "url": url,
1198
- "source": "Web",
1199
- "snippet": r.get("snippet", "")[:120],
1200
- })
1201
- seen_urls.add(url)
1202
- log_lines.append(f"[DuckDuckGo] 폴백, 출처={len(brave_sources)}��")
1203
- except Exception as e:
1204
- log_lines.append(f"[DuckDuckGo] 오류: {str(e)[:80]}")
1205
-
1206
- # ═══════════════════════════════════════
1207
- # PHASE 3: 학술 DB 검색 (KCI · RISS · arXiv)
1208
- # ═══════════════════════════════════════
1209
- progress(0.60, "③ 학술 DB 검색 (KCI·RISS·arXiv)...")
1210
-
1211
- # 학술 검색용 키워드: 텍스트에서 핵심 명사구 추출
1212
- academic_query = text[:100].replace('\n', ' ')
1213
- # 한글이 포함되어 있으면 한글 학술DB도 검색
1214
- has_korean = bool(re.search(r'[가-힣]', text))
1215
-
1216
- try:
1217
- with ThreadPoolExecutor(max_workers=5) as executor:
1218
- futures = {}
1219
- futures[executor.submit(search_arxiv, academic_query[:60])] = "arXiv"
1220
- if has_korean:
1221
- futures[executor.submit(search_kci, academic_query[:40])] = "KCI"
1222
- futures[executor.submit(search_riss, academic_query[:40])] = "RISS"
1223
-
1224
- seen_urls = {s["url"] for s in gemini_sources + brave_sources}
1225
- for future in as_completed(futures, timeout=15):
1226
- src_name = futures[future]
1227
- try:
1228
- results = future.result()
1229
- for r in results:
1230
- url = r.get("url", "")
1231
- if url and url not in seen_urls:
1232
- academic_sources.append({
1233
- "title": r.get("title", "")[:80],
1234
- "url": url,
1235
- "source": src_name,
1236
- "snippet": r.get("snippet", "")[:120],
1237
- })
1238
- seen_urls.add(url)
1239
- except Exception:
1240
- pass
1241
- log_lines.append(f"[학술] KCI·RISS·arXiv 출처={len(academic_sources)}건")
1242
- except Exception as e:
1243
- log_lines.append(f"[학술] 오류: {str(e)[:80]}")
1244
-
1245
- # ═══════════════════════════════════════
1246
- # 종합 판정
1247
- # ═══════════════════════════════════════
1248
- progress(0.80, "보고서 생성...")
1249
-
1250
- all_sources = gemini_sources + brave_sources + academic_sources
1251
-
1252
- # 종합 표절율: Gemini 90% + Brave/학술 보조 10%
1253
- web_boost = min(len(brave_sources) * 1.5, 7)
1254
- acad_boost = min(len(academic_sources) * 2, 3)
1255
- plag_pct = min(round(gemini_pct * 0.9 + web_boost + acad_boost), 100)
1256
-
1257
- if plag_pct >= 50:
1258
- grade, gc = "🚨 표절 의심", "#FF4444"
1259
- elif plag_pct >= 30:
1260
- grade, gc = "⚠️ 주의 필요", "#FF8800"
1261
- elif plag_pct >= 15:
1262
- grade, gc = "📌 유사표현", "#DDAA00"
1263
- elif plag_pct >= 5:
1264
- grade, gc = "✓ 양호", "#4ECDC4"
1265
- else:
1266
- grade, gc = "✅ 우수", "#22AA44"
1267
-
1268
- word_count = len(split_words(text))
1269
- char_count = len(text)
1270
-
1271
- # ═══════════════════════════════════════
1272
- # 출처 테이블 HTML 생성
1273
- # ═══════════════════════════════════════
1274
- def _source_badge(src):
1275
- colors = {"Google": "#4285F4", "Brave": "#FB542B", "Web": "#888",
1276
- "KCI": "#2E7D32", "RISS": "#1565C0", "arXiv": "#B71C1C"}
1277
- c = colors.get(src, "#666")
1278
- return f'<span style="display:inline-block;padding:2px 6px;border-radius:3px;background:{c};color:#fff;font-size:9px;font-weight:700;">{src}</span>'
1279
-
1280
- src_rows = ""
1281
- for i, s in enumerate(all_sources[:30]):
1282
- title_display = s['title'][:55] if s['title'] else s['url'].split('/')[2] if len(s['url'].split('/')) > 2 else s['url'][:40]
1283
- snippet_html = f'<div style="font-size:9px;color:#888;margin-top:2px;">{s["snippet"][:100]}</div>' if s.get("snippet") else ""
1284
- src_rows += f"""<tr style="border-bottom:1px solid #E8E8E8;">
1285
- <td style="padding:8px;text-align:center;font-size:11px;color:#666;">{i+1}</td>
1286
- <td style="padding:8px;">{_source_badge(s.get('source',''))}</td>
1287
- <td style="padding:8px;"><a href="{s['url']}" target="_blank" rel="noopener noreferrer" style="color:#2E86C1;text-decoration:none;font-weight:600;font-size:11px;">{title_display}</a>{snippet_html}</td>
1288
- <td style="padding:8px;font-size:9px;color:#999;word-break:break-all;max-width:200px;"><a href="{s['url']}" target="_blank" rel="noopener noreferrer" style="color:#999;text-decoration:none;">{s['url'][:65]}</a></td>
1289
- </tr>"""
1290
-
1291
- if not src_rows:
1292
- src_rows = '<tr><td colspan="4" style="padding:20px;text-align:center;color:#999;">발견된 출처 없음</td></tr>'
1293
-
1294
- # Gemini 분석 요약 (접기)
1295
- gemini_summary = ""
1296
- if gemini_response:
1297
- safe_resp = gemini_response.replace('<', '&lt;').replace('>', '&gt;').replace('\n', '<br>')
1298
- gemini_summary = f"""
1299
- <div style="padding:16px 24px;border-bottom:1px solid #E0E0E0;">
1300
- <details>
1301
- <summary style="cursor:pointer;font-size:13px;font-weight:700;color:#1A3C6E;">🤖 Gemini 분석 상세</summary>
1302
- <div style="margin-top:10px;padding:12px;background:#F8F9FA;border-radius:6px;font-size:11px;line-height:1.7;color:#333;max-height:300px;overflow-y:auto;">{safe_resp}</div>
1303
- </details>
1304
- </div>"""
1305
-
1306
- HDR = '#3B7DD8'
1307
- html = f"""<div style="font-family:'Noto Sans KR',sans-serif;max-width:900px;margin:20px auto;background:#fff;border:1px solid #E0E0E0;border-radius:8px;box-shadow:0 2px 8px rgba(0,0,0,0.06);">
1308
- <div style="background:linear-gradient(135deg,{HDR},#4A8DE0);padding:24px;color:#fff;border-radius:8px 8px 0 0;">
1309
- <div style="display:flex;justify-content:space-between;align-items:center;">
1310
- <div>
1311
- <div style="font-size:24px;font-weight:900;">표절 검사 결과</div>
1312
- <div style="font-size:12px;opacity:0.9;margin-top:4px;">Gemini Google Search + Brave + KCI·RISS·arXiv</div>
1313
- </div>
1314
- <div style="text-align:right;font-size:11px;opacity:0.9;">
1315
- <div>문서: {doc_id}</div>
1316
- <div>{now}</div>
1317
- </div>
1318
- </div>
1319
- </div>
1320
- <div style="padding:24px;background:#FAFBFE;border-bottom:1px solid #E0E0E0;">
1321
- <div style="display:grid;grid-template-columns:1fr 1fr 1fr 1fr;gap:12px;">
1322
- <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
1323
- <div style="font-size:42px;font-weight:900;color:{gc};">{plag_pct}%</div>
1324
- <div style="font-size:11px;color:#666;margin-top:6px;">종합 표절율</div>
1325
- </div>
1326
- <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
1327
- <div style="font-size:22px;font-weight:900;color:{gc};margin-top:6px;">{grade}</div>
1328
- <div style="font-size:11px;color:#666;margin-top:6px;">판정</div>
1329
- </div>
1330
- <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
1331
- <div style="font-size:28px;font-weight:900;color:#555;">{len(all_sources)}</div>
1332
- <div style="font-size:11px;color:#666;margin-top:6px;">발견 출처</div>
1333
- </div>
1334
- <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
1335
- <div style="font-size:14px;font-weight:700;color:#4285F4;margin-top:4px;">{len(gemini_sources)}</div>
1336
- <div style="font-size:14px;font-weight:700;color:#FB542B;">{len(brave_sources)}</div>
1337
- <div style="font-size:14px;font-weight:700;color:#2E7D32;">{len(academic_sources)}</div>
1338
- <div style="font-size:9px;color:#666;margin-top:2px;">Google·Brave·학술</div>
1339
- </div>
1340
- </div>
1341
- </div>
1342
- <div style="padding:16px 24px;border-bottom:1px solid #E0E0E0;">
1343
- <div style="font-size:13px;font-weight:700;color:#1A3C6E;margin-bottom:8px;">📋 검사 정보</div>
1344
- <div style="display:flex;gap:24px;font-size:12px;">
1345
- <span><span style="color:#888;">글자수</span> <b>{char_count:,}자</b></span>
1346
- <span><span style="color:#888;">단어수</span> <b>{word_count:,}단어</b></span>
1347
- <span><span style="color:#888;">검색엔진</span> <b>Google + Brave + KCI·RISS·arXiv</b></span>
1348
- </div>
1349
- </div>{gemini_summary}
1350
- <div style="padding:24px;">
1351
- <div style="font-size:13px;font-weight:700;color:#1A3C6E;margin-bottom:12px;">🔍 발견된 출처 ({len(all_sources)}건)</div>
1352
- <table style="width:100%;border-collapse:collapse;font-size:11px;">
1353
- <thead>
1354
- <tr style="background:{HDR};color:white;">
1355
- <th style="padding:10px;text-align:center;width:35px;">#</th>
1356
- <th style="padding:10px;text-align:center;width:55px;">소스</th>
1357
- <th style="padding:10px;text-align:left;">출처</th>
1358
- <th style="padding:10px;text-align:left;width:200px;">URL</th>
1359
- </tr>
1360
- </thead>
1361
- <tbody>{src_rows}</tbody>
1362
- </table>
1363
- </div>
1364
- </div>"""
1365
-
1366
- progress(0.95, "완료!")
1367
- log_text = "\n".join(log_lines)
1368
 
1369
- return html, log_text
1370
  def run_detection(text, progress=gr.Progress()):
1371
  if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자</div>",""
1372
  text=text.strip()
 
893
  hum=analyze_humanizer(text,sents,words,morphs)
894
  fs,v,lv=compute_verdict(sc, sent_avg=sent_avg, ppx_score=ppx["score"], hum_score=hum["score"])
895
  return fs,v,lv,sc,ppx,hum
896
+ from plagiarism_check import run_plagiarism
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897
 
 
898
  def run_detection(text, progress=gr.Progress()):
899
  if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자</div>",""
900
  text=text.strip()