| import streamlit as st |
| import requests |
| import html2text |
| import os |
|
|
| api_key=os.getenv('APIKEY_BRIGHTDATA') |
| st.title("ByPass Capcha & Text Extractor") |
|
|
| |
| zone = "web_unlocker1" |
| url = st.text_input("Target URL", value="https://in.indeed.com/cmp/Ey/reviews") |
|
|
| |
|
|
| if st.button("Extract Text"): |
| if not url : |
| st.warning("Please enter both API key and URL.") |
| else: |
| headers = { |
| "Authorization": f"Bearer {api_key}", |
| "Content-Type": "application/json", |
| "Accept-Language": "en-US,en;q=0.9" |
| } |
| data = { |
| "zone": zone, |
| "url": url, |
| "format": "raw" |
| } |
| with st.spinner("Fetching page..."): |
| try: |
| response = requests.post( |
| "https://api.brightdata.com/request", |
| json=data, |
| headers=headers, |
| timeout=60 |
| ) |
| response.raise_for_status() |
| html = response.text |
| |
| text = html2text.html2text(html) |
| st.subheader("Extracted Text") |
| st.text_area("Result", text, height=400) |
| st.download_button("Download as .txt", text, file_name="extracted.txt") |
| except Exception as e: |
| st.error(f"Error: {e}") |