| import gradio as gr |
| import requests |
| import os |
| from bs4 import BeautifulSoup |
|
|
|
|
| def fetch_ai_overview(query: str) -> str: |
| """ |
| Scrapt die Google-Suchergebnisseite und extrahiert den AI Overview (KI-Übersicht). |
| Gibt den Text zurück oder eine Fehlermeldung. |
| """ |
| headers = { |
| "User-Agent": ( |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " |
| "AppleWebKit/537.36 (KHTML, like Gecko) " |
| "Chrome/124.0.0.0 Safari/537.36" |
| ), |
| "Accept-Language": "de-DE,de;q=0.9", |
| "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", |
| } |
|
|
| params = { |
| "q": query, |
| "hl": "de", |
| "gl": "de", |
| } |
|
|
| try: |
| resp = requests.get( |
| "https://duck.ai/duckchat/v1/chat", |
| headers=headers, |
| params=params, |
| timeout=10, |
| ) |
| resp.raise_for_status() |
| soup = BeautifulSoup(resp.text, "html.parser") |
| |
| body = soup.find('body') |
| clean_text = body.get_text(separator=' ', strip=True) |
|
|
| return clean_text |
|
|
| |
| |
| ai_selectors = [ |
| |
| {"class": "Ww4FFb"}, |
| ] |
|
|
| for sel in ai_selectors: |
| block = soup.find(attrs=sel) |
| if block: |
| text = block.get_text(separator="\n", strip=True) |
| if len(text) > 50: |
| return text |
|
|
| |
| for div in soup.find_all("div"): |
| text = div.get_text(separator="\n", strip=True) |
| if ("KI-Übersicht" in text or "AI Overview" in text) and len(text) > 100: |
| |
| siblings = div.find_next_siblings() |
| if siblings: |
| sibling_text = "\n".join( |
| s.get_text(separator="\n", strip=True) for s in siblings[:3] |
| ) |
| if len(sibling_text) > 100: |
| return sibling_text |
| return text |
|
|
| |
| for sel in [{"class": "hgKElc"}, {"class": "ILfuVd"}, {"class": "c2xzTb"}]: |
| block = soup.find(attrs=sel) |
| if block: |
| text = block.get_text(separator="\n", strip=True) |
| if len(text) > 50: |
| return text |
|
|
| return "ℹ️ Keine KI-Übersicht gefunden. Google zeigt sie möglicherweise nicht für diese Suchanfrage an, oder der Block wurde dynamisch geladen (JavaScript-Rendering erforderlich)." |
|
|
| except requests.exceptions.Timeout: |
| return "⏱️ Zeitüberschreitung bei der Google-Anfrage." |
| except requests.exceptions.HTTPError as e: |
| return f"HTTP-Fehler: {e.response.status_code}" |
| except Exception as e: |
| return f"Fehler: {str(e)}" |
|
|
|
|
| def google_search(query: str, num_results: int = 5) -> str: |
| API_KEY = os.environ.get("GOOGLE_API_KEY", "") |
| CSE_CX = "77f1602c0ff764edb" |
|
|
| if not API_KEY: |
| return "❌ Kein API-Key gefunden." |
| if not query.strip(): |
| return "Bitte gib einen Suchbegriff ein." |
|
|
| |
| ai_text = fetch_ai_overview(query) |
| ai_section = f"## 🤖 KI-Übersicht\n\n{ai_text}\n\n---\n\n" |
|
|
| |
| try: |
| resp = requests.get( |
| "https://www.googleapis.com/customsearch/v1", |
| params={ |
| "key": API_KEY, |
| "cx": CSE_CX, |
| "q": query, |
| "num": max(1, min(int(num_results), 10)), |
| }, |
| timeout=8, |
| ) |
| resp.raise_for_status() |
| data = resp.json() |
| items = data.get("items", []) |
|
|
| if not items: |
| return ai_section + "Keine weiteren Suchergebnisse gefunden." |
|
|
| snippets = [] |
| for item in items: |
| title = item.get("title", "") |
| link = item.get("link", "") |
| snippet = item.get("snippet", "").replace("\n", " ") |
| snippets.append(f"### {title}\n**URL:** {link}\n\n{snippet}") |
|
|
| return ai_section + "\n\n---\n\n".join(snippets) |
|
|
| except requests.exceptions.HTTPError as e: |
| return ai_section + f"HTTP-Fehler: {e.response.status_code} – {e.response.text}" |
| except requests.exceptions.Timeout: |
| return ai_section + "Zeitüberschreitung bei der API-Anfrage." |
| except Exception as e: |
| return ai_section + f"Google-Suchfehler: {str(e)}" |
|
|
|
|
| demo = gr.Interface( |
| fn=google_search, |
| inputs=[ |
| gr.Textbox(label="Suchanfrage", placeholder="Wonach suchst du?"), |
| gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Anzahl Ergebnisse"), |
| ], |
| outputs=gr.Markdown(label="Ergebnisse"), |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|
|
|
|
|
|
|
|
|
| """ |
| |
| import gradio as gr |
| import requests |
| import os |
| from bs4 import BeautifulSoup |
| |
| |
| def fetch_body_text(url: str, timeout: int = 6) -> str: |
| |
| try: |
| headers = {"User-Agent": "Mozilla/5.0 (compatible; GoogleSearchBot/1.0)"} |
| resp = requests.get(url, headers=headers, timeout=timeout) |
| resp.raise_for_status() |
| soup = BeautifulSoup(resp.text, "html.parser") |
| |
| # Remove script/style tags before extracting text |
| for tag in soup(["script", "style", "noscript", "head"]): |
| tag.decompose() |
| |
| body = soup.body |
| if body: |
| text = body.get_text(separator="\n", strip=True) |
| else: |
| text = soup.get_text(separator="\n", strip=True) |
| |
| # Collapse excessive blank lines |
| lines = [line for line in text.splitlines() if line.strip()] |
| return "\n".join(lines) |
| |
| except Exception as e: |
| return f"⚠️ Konnte Seite nicht laden: {e}" |
| |
| |
| def google_search(query: str, num_results: int = 5) -> str: |
| API_KEY = os.environ.get("GOOGLE_API_KEY", "") |
| CSE_CX = "77f1602c0ff764edb" |
| |
| if not API_KEY: |
| return "❌ Kein API-Key gefunden. Bitte `GOOGLE_API_KEY` als Umgebungsvariable setzen." |
| if not query.strip(): |
| return "Bitte gib einen Suchbegriff ein." |
| |
| try: |
| resp = requests.get( |
| "https://www.googleapis.com/customsearch/v1", |
| params={ |
| "key": API_KEY, |
| "cx": CSE_CX, |
| "q": query, |
| "num": max(1, min(int(num_results), 10)), |
| }, |
| timeout=8, |
| ) |
| resp.raise_for_status() |
| data = resp.json() |
| items = data.get("items", []) |
| |
| if not items: |
| return "Keine Suchergebnisse gefunden." |
| |
| results = [] |
| for item in items: |
| title = item.get("title", "") |
| link = item.get("link", "") |
| |
| body_text = fetch_body_text(link) |
| |
| results.append( |
| f"### {title}\n" |
| f"**URL:** {link}\n\n" |
| f"```\n{body_text[:3000]}\n```" # cap per result to avoid huge output |
| ) |
| |
| return "\n\n---\n\n".join(results) |
| |
| except requests.exceptions.HTTPError as e: |
| return f"HTTP-Fehler: {e.response.status_code} – {e.response.text}" |
| except requests.exceptions.Timeout: |
| return "Zeitüberschreitung bei der Anfrage. Bitte erneut versuchen." |
| except Exception as e: |
| return f"Google-Suchfehler: {str(e)}" |
| |
| |
| demo = gr.Interface( |
| fn=google_search, |
| inputs=[ |
| gr.Textbox(label="Suchanfrage", placeholder="Wonach suchst du?"), |
| gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Anzahl Ergebnisse"), |
| ], |
| outputs=gr.Markdown(label="Ergebnisse"), |
| ) |
| |
| if __name__ == "__main__": |
| demo.launch() |
| |
| |
| |
| |
| import gradio as gr |
| import requests |
| import os |
| |
| def google_search(query: str, num_results: int = 5) -> str: |
| API_KEY = os.environ.get("GOOGLE_API_KEY", "") |
| CSE_CX = "77f1602c0ff764edb" |
| |
| if not API_KEY: |
| return "❌ Kein API-Key gefunden. Bitte `GOOGLE_API_KEY` als Umgebungsvariable setzen." |
| if not query.strip(): |
| return "Bitte gib einen Suchbegriff ein." |
| |
| try: |
| resp = requests.get( |
| "https://www.googleapis.com/customsearch/v1", |
| params={ |
| "key": API_KEY, |
| "cx": CSE_CX, |
| "q": query, |
| "num": max(1, min(int(num_results), 10)), |
| }, |
| timeout=8, |
| ) |
| resp.raise_for_status() |
| data = resp.json() |
| items = data.get("items", []) |
| print(data) |
| if not items: |
| return "Keine Suchergebnisse gefunden." |
| |
| snippets = [] |
| for item in items: |
| title = item.get("title", "") |
| link = item.get("link", "") |
| snippet = item.get("snippet", "").replace("\n", " ") |
| snippets.append(f"### {title}\nURL: {link}\n\n{snippet}") |
| |
| return "\n\n---\n\n".join(snippets) |
| |
| except requests.exceptions.HTTPError as e: |
| return f"HTTP-Fehler: {e.response.status_code} – {e.response.text}" |
| except requests.exceptions.Timeout: |
| return "Zeitüberschreitung bei der Anfrage. Bitte erneut versuchen." |
| except Exception as e: |
| return f"Google-Suchfehler: {str(e)}" |
| |
| |
| demo = gr.Interface( |
| fn=google_search, |
| inputs=[ |
| gr.Textbox(label="Suchanfrage", placeholder="Wonach suchst du?"), |
| gr.Slider(minimum=1, maximum=10, value=5, step=1, label="Anzahl Ergebnisse"), |
| ], |
| outputs=gr.Markdown(label="Ergebnisse"), |
| ) |
| |
| if __name__ == "__main__": |
| demo.launch() |
| """ |