Spaces:

VIDraft
/

TeXray-backup

Sleeping

App Files Files Community

openfree commited on Mar 1

Commit

34656b2

verified ·

1 Parent(s): e7ce71c

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -473

app.py CHANGED Viewed

@@ -893,480 +893,8 @@ def quick_score(text):
     hum=analyze_humanizer(text,sents,words,morphs)
     fs,v,lv=compute_verdict(sc, sent_avg=sent_avg, ppx_score=ppx["score"], hum_score=hum["score"])
     return fs,v,lv,sc,ppx,hum
-def brave_search(query, count=5):
-    """Brave Search API — 단일 쿼리"""
-    if not BRAVE_KEY: return []
-    url = f"https://api.search.brave.com/res/v1/web/search?q={query}&count={count}"
-    try:
-        if HAS_HTTPX:
-            r = httpx.get(url, headers={"X-Subscription-Token": BRAVE_KEY, "Accept": "application/json"}, timeout=10)
-            if r.status_code == 200:
-                data = r.json()
-                results = []
-                for item in data.get("web", {}).get("results", []):
-                    results.append({"title": item.get("title",""), "url": item.get("url",""), "snippet": item.get("description",""), "source": "Brave"})
-                return results
-    except: pass
-    return []
-def search_kci(query):
-    """KCI(한국학술지인용색인) 검색"""
-    try:
-        url = f"https://open.kci.go.kr/po/openapi/openApiSearch.kci?apiCode=articleSearch&title={query}&displayCount=3"
-        resp = http_get(url, timeout=8)
-        if resp:
-            results = []
-            for m in re.finditer(r'<article-title><!\[CDATA\[(.+?)\]\]></article-title>.*?<url><!\[CDATA\[(.+?)\]\]></url>', resp, re.S):
-                results.append({"title": m.group(1), "url": m.group(2), "snippet": "", "source": "KCI"})
-            return results[:3]
-    except: pass
-    return []
-def search_riss(query):
-    """RISS(학술연구정보서비스) — 간접 검색"""
-    results = []
-    try:
-        url = f"http://www.riss.kr/search/Search.do?isDetailSearch=N&searchGubun=true&viewYn=OP&queryText=&strQuery={query}&iStartCount=0&iGroupView=5&icate=all"
-        resp = http_get(url, timeout=8)
-        if resp:
-            for m in re.finditer(r'class="title"[^>]*>.*?<a[^>]*href="([^"]+)"[^>]*>(.*?)</a>', resp, re.S):
-                title = re.sub(r'<[^>]+>', '', m.group(2)).strip()
-                if title:
-                    results.append({"title": title, "url": "https://www.riss.kr" + m.group(1), "snippet": "", "source": "RISS"})
-    except: pass
-    return results[:3]
-def search_arxiv(query):
-    """arXiv API 검색"""
-    results = []
-    try:
-        import urllib.parse
-        q = urllib.parse.quote(query)
-        url = f"https://export.arxiv.org/api/query?search_query=all:{q}&start=0&max_results=3&sortBy=relevance"
-        resp = http_get(url, timeout=12)
-        if resp:
-            for m in re.finditer(r'<entry>.*?<title>(.*?)</title>.*?<id>(.*?)</id>.*?<summary>(.*?)</summary>', resp, re.S):
-                title = re.sub(r'\s+', ' ', m.group(1)).strip()
-                results.append({"title": title, "url": m.group(2).strip(), "snippet": re.sub(r'\s+', ' ', m.group(3)).strip()[:150], "source": "arXiv"})
-    except Exception as e:
-        pass
-    return results[:3]
-def gemini_plagiarism_check(text_chunk):
-    """Gemini + Google Search Grounding으로 표절 검사"""
-    if not HAS_GENAI or not GEMINI_KEY: return None
-    try:
-        client = genai.Client(api_key=GEMINI_KEY)
-        tool = gtypes.Tool(googleSearch=gtypes.GoogleSearch())
-        prompt = f"""다음 텍스트가 인터넷에 존재하는지 Google Search로 확인하세요.
-유사한 문장이 발견되면 출처 URL과 유사도(%)를 보고하세요.
-마지막 줄에 "유사도: XX%" 형식으로 작성.
-[텍스트]
-{text_chunk[:1000]}"""
-        resp = client.models.generate_content(
-            model="gemini-flash-lite-latest",
-            contents=prompt,
-            config=gtypes.GenerateContentConfig(tools=[tool], temperature=0.1, max_output_tokens=600)
-        )
-        text_resp = resp.text if resp.text else ""
-        sources = []
-        if hasattr(resp, 'candidates') and resp.candidates:
-            gc = resp.candidates[0].grounding_metadata
-            if gc and hasattr(gc, 'grounding_chunks'):
-                for chunk in gc.grounding_chunks:
-                    if hasattr(chunk, 'web') and chunk.web:
-                        sources.append({"title": chunk.web.title or "", "url": chunk.web.uri or "", "source": "Google"})
-        pm = re.search(r'유사도[:\s]*(\d+)', text_resp)
-        pct = int(pm.group(1)) if pm else 0
-        return {"pct": pct, "response": text_resp, "sources": sources}
-    except Exception as e:
-        return {"pct": 0, "response": str(e)[:100], "sources": []}
-def parallel_brave_search(queries, max_workers=10):
-    """Brave Search 병렬 실행 (최대 20개)"""
-    all_results = {}
-    with ThreadPoolExecutor(max_workers=min(max_workers, 20)) as executor:
-        futures = {executor.submit(brave_search, q, 3): q for q in queries}
-        for future in as_completed(futures):
-            q = futures[future]
-            try:
-                results = future.result()
-                all_results[q] = results
-            except: all_results[q] = []
-    return all_results
-def duckduckgo_search(query, max_results=5):
-    """DuckDuckGo HTML 스크래핑 — API 키 불필요 폴백"""
-    results = []
-    try:
-        import urllib.parse
-        q = urllib.parse.quote(query)
-        url = f"https://html.duckduckgo.com/html/?q={q}"
-        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
-        resp = http_get(url, headers=headers, timeout=10)
-        if resp:
-            for m in re.finditer(r'<a[^>]+class="result__a"[^>]+href="([^"]+)"[^>]*>(.*?)</a>.*?<a[^>]+class="result__snippet"[^>]*>(.*?)</a>', resp, re.S):
-                href = m.group(1)
-                title = re.sub(r'<[^>]+>', '', m.group(2)).strip()
-                snippet = re.sub(r'<[^>]+>', '', m.group(3)).strip()
-                real_url = href
-                if 'uddg=' in href:
-                    um = re.search(r'uddg=([^&]+)', href)
-                    if um: real_url = urllib.parse.unquote(um.group(1))
-                if title:
-                    results.append({"title": title, "url": real_url, "snippet": snippet, "source": "Web"})
-                if len(results) >= max_results: break
-    except: pass
-    return results
-def self_crawl_search(query, max_results=3):
-    """httpx 기반 자체 크롤링 (DuckDuckGo + 학술 사이트)"""
-    all_results = []
-    all_results.extend(duckduckgo_search(query, max_results))
-    if '논문' not in query and 'paper' not in query.lower():
-        all_results.extend(duckduckgo_search(f"{query} 논문 학술", 2))
-    return all_results
-def _extract_key_phrases(text, max_phrases=6):
-    """텍스트에서 핵심 검색 구문 추출 (Brave/학술 검색용)"""
-    sents = split_sentences(text)
-    phrases = []
-    # 긴 문장 우선 (정보량 많은 문장)
-    ranked = sorted(sents, key=lambda s: len(s), reverse=True)
-    for s in ranked:
-        # 15~80자 사이 문장만 검색 쿼리로 적합
-        if 15 <= len(s) <= 80:
-            phrases.append(s)
-        elif len(s) > 80:
-            phrases.append(s[:80])
-        if len(phrases) >= max_phrases:
-            break
-    # 부족하면 앞부분에서 보충
-    if len(phrases) < 2 and sents:
-        phrases.append(sents[0][:80])
-    return phrases
-def run_plagiarism(text, progress=gr.Progress()):
-    """표절 검사 — 3단계 파이프라인: ① Gemini Google Search ② Brave 웹검색 ③ 학술DB"""
-    if not text or len(text.strip()) < 50:
-        return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자 이상</div>", ""
-    text = text.strip()
-    now = datetime.now().strftime("%Y-%m-%d %H:%M")
-    doc_id = hashlib.md5(text[:100].encode()).hexdigest()[:8].upper()
-    log_lines = []
-    gemini_pct = 0
-    gemini_sources = []
-    gemini_response = ""
-    brave_sources = []
-    academic_sources = []
-    # ═══════════════════════════════════════
-    # PHASE 1: Gemini Google Search Grounding
-    # ═══════════════════════════════════════
-    if HAS_GENAI and GEMINI_KEY:
-        progress(0.10, "① Gemini Google Search Grounding...")
-        try:
-            client = genai.Client(api_key=GEMINI_KEY)
-            prompt = f"""당신은 표절 검사 전문가입니다. 아래 텍스트가 인터넷에 이미 존재하는 내용인지 Google Search로 철저히 검색하세요.
-[검사 대상 텍스트]
-{text[:3000]}
-[응답 형식]
-1. 발견된 유사 콘텐츠를 각각 "제목 | URL | 유사도(높음/중간/낮음)" 형식으로 나열
-2. 발견 못하면 "유사 콘텐츠 없음"
-3. 마지막 줄에 반드시 "표절율: XX%" 형식으로 종합 판정"""
-            contents = [
-                gtypes.Content(
-                    role="user",
-                    parts=[gtypes.Part.from_text(text=prompt)],
-                )
-            ]
-            tools = [gtypes.Tool(googleSearch=gtypes.GoogleSearch())]
-            generate_content_config = gtypes.GenerateContentConfig(
-                thinking_config=gtypes.ThinkingConfig(thinking_budget=0),
-                tools=tools,
-                temperature=0.1,
-                max_output_tokens=4000,
-            )
-            progress(0.20, "① Google Search 실행 중...")
-            # 스트리밍 응답 수집
-            full_response = ""
-            for chunk in client.models.generate_content_stream(
-                model="gemini-flash-lite-latest",
-                contents=contents,
-                config=generate_content_config,
-            ):
-                if chunk.text:
-                    full_response += chunk.text
-            gemini_response = full_response
-            # ✅ 표절율 추출
-            pm = re.search(r'표절율[:\s]*(\d+)', full_response)
-            if pm:
-                gemini_pct = int(pm.group(1))
-            # ✅ Grounding Metadata에서 출처 추출 (비스트리밍 재호출)
-            progress(0.28, "① 출처 메타데이터 추출...")
-            try:
-                resp_full = client.models.generate_content(
-                    model="gemini-flash-lite-latest",
-                    contents=prompt,
-                    config=gtypes.GenerateContentConfig(
-                        tools=[gtypes.Tool(googleSearch=gtypes.GoogleSearch())],
-                        temperature=0.1,
-                        max_output_tokens=2000,
-                    )
-                )
-                # grounding_metadata에서 실제 검색 출처 추출
-                if hasattr(resp_full, 'candidates') and resp_full.candidates:
-                    cand = resp_full.candidates[0]
-                    gm = getattr(cand, 'grounding_metadata', None)
-                    if gm:
-                        chunks = getattr(gm, 'grounding_chunks', None) or []
-                        for gc in chunks:
-                            web = getattr(gc, 'web', None)
-                            if web:
-                                title = getattr(web, 'title', '') or ''
-                                uri = getattr(web, 'uri', '') or ''
-                                if uri:
-                                    gemini_sources.append({"title": title, "url": uri, "source": "Google", "snippet": ""})
-                        # support_chunks도 확인
-                        supports = getattr(gm, 'grounding_supports', None) or []
-                        for sup in supports:
-                            seg = getattr(sup, 'segment', None)
-                            snippet_text = getattr(seg, 'text', '') if seg else ''
-                            idxs = getattr(sup, 'grounding_chunk_indices', []) or []
-                            # snippet을 해당 source에 매핑
-                            for idx in idxs:
-                                if idx < len(gemini_sources) and snippet_text:
-                                    gemini_sources[idx]["snippet"] = snippet_text[:120]
-            except Exception as e2:
-                print(f"Gemini 메타데이터 추출 오류: {e2}")
-            # 텍스트 응답에서 추가 URL 추출 (grounding에 없는 것만)
-            existing_urls = {s["url"] for s in gemini_sources}
-            for m in re.finditer(r'https?://[^\s\)\]\,\"\']{10,}', full_response):
-                url = m.group(0).rstrip('.')
-                if url not in existing_urls:
-                    domain = url.split('/')[2] if len(url.split('/')) > 2 else url
-                    gemini_sources.append({"title": domain, "url": url, "source": "Google", "snippet": ""})
-                    existing_urls.add(url)
-            log_lines.append(f"[Gemini] 표절율={gemini_pct}%, 출처={len(gemini_sources)}건")
-        except Exception as e:
-            log_lines.append(f"[Gemini] 오류: {str(e)[:100]}")
-            print(f"Gemini 오류: {str(e)}")
-    else:
-        log_lines.append("[Gemini] API 키 없음 — 건너뜀")
-    # ═══════════════════════════════════════
-    # PHASE 2: Brave Search 병렬 웹 검색
-    # ═══════════════════════════════════════
-    progress(0.40, "② Brave Search 웹 검색...")
-    key_phrases = _extract_key_phrases(text, max_phrases=6)
-    if BRAVE_KEY and key_phrases:
-        try:
-            brave_results = parallel_brave_search(key_phrases, max_workers=10)
-            seen_urls = {s["url"] for s in gemini_sources}
-            for query, results in brave_results.items():
-                for r in results:
-                    url = r.get("url", "")
-                    if url and url not in seen_urls:
-                        brave_sources.append({
-                            "title": r.get("title", "")[:80],
-                            "url": url,
-                            "source": "Brave",
-                            "snippet": r.get("snippet", "")[:120],
-                        })
-                        seen_urls.add(url)
-            log_lines.append(f"[Brave] 쿼리={len(key_phrases)}개, 출처={len(brave_sources)}건")
-        except Exception as e:
-            log_lines.append(f"[Brave] 오류: {str(e)[:80]}")
-    elif not BRAVE_KEY:
-        # Brave 키 없으면 DuckDuckGo 폴백
-        try:
-            seen_urls = {s["url"] for s in gemini_sources}
-            for phrase in key_phrases[:3]:
-                for r in duckduckgo_search(phrase, max_results=3):
-                    url = r.get("url", "")
-                    if url and url not in seen_urls:
-                        brave_sources.append({
-                            "title": r.get("title", "")[:80],
-                            "url": url,
-                            "source": "Web",
-                            "snippet": r.get("snippet", "")[:120],
-                        })
-                        seen_urls.add(url)
-            log_lines.append(f"[DuckDuckGo] 폴백, 출처={len(brave_sources)}��")
-        except Exception as e:
-            log_lines.append(f"[DuckDuckGo] 오류: {str(e)[:80]}")
-    # ═══════════════════════════════════════
-    # PHASE 3: 학술 DB 검색 (KCI · RISS · arXiv)
-    # ═══════════════════════════════════════
-    progress(0.60, "③ 학술 DB 검색 (KCI·RISS·arXiv)...")
-    # 학술 검색용 키워드: 텍스트에서 핵심 명사구 추출
-    academic_query = text[:100].replace('\n', ' ')
-    # 한글이 포함되어 있으면 한글 학술DB도 검색
-    has_korean = bool(re.search(r'[가-힣]', text))
-    try:
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = {}
-            futures[executor.submit(search_arxiv, academic_query[:60])] = "arXiv"
-            if has_korean:
-                futures[executor.submit(search_kci, academic_query[:40])] = "KCI"
-                futures[executor.submit(search_riss, academic_query[:40])] = "RISS"
-            seen_urls = {s["url"] for s in gemini_sources + brave_sources}
-            for future in as_completed(futures, timeout=15):
-                src_name = futures[future]
-                try:
-                    results = future.result()
-                    for r in results:
-                        url = r.get("url", "")
-                        if url and url not in seen_urls:
-                            academic_sources.append({
-                                "title": r.get("title", "")[:80],
-                                "url": url,
-                                "source": src_name,
-                                "snippet": r.get("snippet", "")[:120],
-                            })
-                            seen_urls.add(url)
-                except Exception:
-                    pass
-        log_lines.append(f"[학술] KCI·RISS·arXiv 출처={len(academic_sources)}건")
-    except Exception as e:
-        log_lines.append(f"[학술] 오류: {str(e)[:80]}")
-    # ═══════════════════════════════════════
-    # 종합 판정
-    # ═══════════════════════════════════════
-    progress(0.80, "보고서 생성...")
-    all_sources = gemini_sources + brave_sources + academic_sources
-    # 종합 표절율: Gemini 90% + Brave/학술 보조 10%
-    web_boost = min(len(brave_sources) * 1.5, 7)
-    acad_boost = min(len(academic_sources) * 2, 3)
-    plag_pct = min(round(gemini_pct * 0.9 + web_boost + acad_boost), 100)
-    if plag_pct >= 50:
-        grade, gc = "🚨 표절 의심", "#FF4444"
-    elif plag_pct >= 30:
-        grade, gc = "⚠️ 주의 필요", "#FF8800"
-    elif plag_pct >= 15:
-        grade, gc = "📌 유사표현", "#DDAA00"
-    elif plag_pct >= 5:
-        grade, gc = "✓ 양호", "#4ECDC4"
-    else:
-        grade, gc = "✅ 우수", "#22AA44"
-    word_count = len(split_words(text))
-    char_count = len(text)
-    # ═══════════════════════════════════════
-    # 출처 테이블 HTML 생성
-    # ═══════════════════════════════════════
-    def _source_badge(src):
-        colors = {"Google": "#4285F4", "Brave": "#FB542B", "Web": "#888",
-                  "KCI": "#2E7D32", "RISS": "#1565C0", "arXiv": "#B71C1C"}
-        c = colors.get(src, "#666")
-        return f'<span style="display:inline-block;padding:2px 6px;border-radius:3px;background:{c};color:#fff;font-size:9px;font-weight:700;">{src}</span>'
-    src_rows = ""
-    for i, s in enumerate(all_sources[:30]):
-        title_display = s['title'][:55] if s['title'] else s['url'].split('/')[2] if len(s['url'].split('/')) > 2 else s['url'][:40]
-        snippet_html = f'<div style="font-size:9px;color:#888;margin-top:2px;">{s["snippet"][:100]}</div>' if s.get("snippet") else ""
-        src_rows += f"""<tr style="border-bottom:1px solid #E8E8E8;">
-          <td style="padding:8px;text-align:center;font-size:11px;color:#666;">{i+1}</td>
-          <td style="padding:8px;">{_source_badge(s.get('source',''))}</td>
-          <td style="padding:8px;"><a href="{s['url']}" target="_blank" rel="noopener noreferrer" style="color:#2E86C1;text-decoration:none;font-weight:600;font-size:11px;">{title_display}</a>{snippet_html}</td>
-          <td style="padding:8px;font-size:9px;color:#999;word-break:break-all;max-width:200px;"><a href="{s['url']}" target="_blank" rel="noopener noreferrer" style="color:#999;text-decoration:none;">{s['url'][:65]}</a></td>
-        </tr>"""
-    if not src_rows:
-        src_rows = '<tr><td colspan="4" style="padding:20px;text-align:center;color:#999;">발견된 출처 없음</td></tr>'
-    # Gemini 분석 요약 (접기)
-    gemini_summary = ""
-    if gemini_response:
-        safe_resp = gemini_response.replace('<', '&lt;').replace('>', '&gt;').replace('\n', '<br>')
-        gemini_summary = f"""
-    <div style="padding:16px 24px;border-bottom:1px solid #E0E0E0;">
-      <details>
-        <summary style="cursor:pointer;font-size:13px;font-weight:700;color:#1A3C6E;">🤖 Gemini 분석 상세</summary>
-        <div style="margin-top:10px;padding:12px;background:#F8F9FA;border-radius:6px;font-size:11px;line-height:1.7;color:#333;max-height:300px;overflow-y:auto;">{safe_resp}</div>
-      </details>
-    </div>"""
-    HDR = '#3B7DD8'
-    html = f"""<div style="font-family:'Noto Sans KR',sans-serif;max-width:900px;margin:20px auto;background:#fff;border:1px solid #E0E0E0;border-radius:8px;box-shadow:0 2px 8px rgba(0,0,0,0.06);">
-    <div style="background:linear-gradient(135deg,{HDR},#4A8DE0);padding:24px;color:#fff;border-radius:8px 8px 0 0;">
-      <div style="display:flex;justify-content:space-between;align-items:center;">
-        <div>
-          <div style="font-size:24px;font-weight:900;">표절 검사 결과</div>
-          <div style="font-size:12px;opacity:0.9;margin-top:4px;">Gemini Google Search + Brave + KCI·RISS·arXiv</div>
-        </div>
-        <div style="text-align:right;font-size:11px;opacity:0.9;">
-          <div>문서: {doc_id}</div>
-          <div>{now}</div>
-        </div>
-      </div>
-    </div>
-    <div style="padding:24px;background:#FAFBFE;border-bottom:1px solid #E0E0E0;">
-      <div style="display:grid;grid-template-columns:1fr 1fr 1fr 1fr;gap:12px;">
-        <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
-          <div style="font-size:42px;font-weight:900;color:{gc};">{plag_pct}%</div>
-          <div style="font-size:11px;color:#666;margin-top:6px;">종합 표절율</div>
-        </div>
-        <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
-          <div style="font-size:22px;font-weight:900;color:{gc};margin-top:6px;">{grade}</div>
-          <div style="font-size:11px;color:#666;margin-top:6px;">판정</div>
-        </div>
-        <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
-          <div style="font-size:28px;font-weight:900;color:#555;">{len(all_sources)}</div>
-          <div style="font-size:11px;color:#666;margin-top:6px;">발견 출처</div>
-        </div>
-        <div style="text-align:center;padding:16px;background:#fff;border-radius:6px;border:1px solid #E0E0E0;">
-          <div style="font-size:14px;font-weight:700;color:#4285F4;margin-top:4px;">{len(gemini_sources)}</div>
-          <div style="font-size:14px;font-weight:700;color:#FB542B;">{len(brave_sources)}</div>
-          <div style="font-size:14px;font-weight:700;color:#2E7D32;">{len(academic_sources)}</div>
-          <div style="font-size:9px;color:#666;margin-top:2px;">Google·Brave·학술</div>
-        </div>
-      </div>
-    </div>
-    <div style="padding:16px 24px;border-bottom:1px solid #E0E0E0;">
-      <div style="font-size:13px;font-weight:700;color:#1A3C6E;margin-bottom:8px;">📋 검사 정보</div>
-      <div style="display:flex;gap:24px;font-size:12px;">
-        <span><span style="color:#888;">글자수</span> <b>{char_count:,}자</b></span>
-        <span><span style="color:#888;">단어수</span> <b>{word_count:,}단어</b></span>
-        <span><span style="color:#888;">검색엔진</span> <b>Google + Brave + KCI·RISS·arXiv</b></span>
-      </div>
-    </div>{gemini_summary}
-    <div style="padding:24px;">
-      <div style="font-size:13px;font-weight:700;color:#1A3C6E;margin-bottom:12px;">🔍 발견된 출처 ({len(all_sources)}건)</div>
-      <table style="width:100%;border-collapse:collapse;font-size:11px;">
-        <thead>
-          <tr style="background:{HDR};color:white;">
-            <th style="padding:10px;text-align:center;width:35px;">#</th>
-            <th style="padding:10px;text-align:center;width:55px;">소스</th>
-            <th style="padding:10px;text-align:left;">출처</th>
-            <th style="padding:10px;text-align:left;width:200px;">URL</th>
-          </tr>
-        </thead>
-        <tbody>{src_rows}</tbody>
-      </table>
-    </div>
-    </div>"""
-    progress(0.95, "완료!")
-    log_text = "\n".join(log_lines)
-    return html, log_text
 def run_detection(text, progress=gr.Progress()):
     if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자</div>",""
     text=text.strip()

     hum=analyze_humanizer(text,sents,words,morphs)
     fs,v,lv=compute_verdict(sc, sent_avg=sent_avg, ppx_score=ppx["score"], hum_score=hum["score"])
     return fs,v,lv,sc,ppx,hum
+from plagiarism_check import run_plagiarism
 def run_detection(text, progress=gr.Progress()):
     if not text or len(text.strip())<50: return "<div style='padding:20px;text-align:center;color:#888;'>⚠️ 최소 50자</div>",""
     text=text.strip()