| import streamlit as st |
| from streamlit_lottie import st_lottie |
| import requests |
| from io import BytesIO |
| from docx import Document |
| import pdfplumber |
| from gtts import gTTS |
| import os |
| import base64 |
|
|
| |
| def load_lottieurl(url): |
| r = requests.get(url) |
| if r.status_code != 200: |
| return None |
| return r.json() |
|
|
| lottie_astronaut = load_lottieurl("https://lottie.host/b86c724d-556d-4a7a-a9b2-277f8099687b/J5c91vW5qS.json") |
|
|
| |
| def read_docx(file): |
| try: |
| document = Document(file) |
| full_text = [] |
| for para in document.paragraphs: |
| full_text.append(para.text) |
| return "\n".join(full_text) |
| except Exception as e: |
| return f"Error reading DOCX file: {e}" |
|
|
| def read_pdf(file): |
| try: |
| text = "" |
| with pdfplumber.open(file) as pdf: |
| for page in pdf.pages: |
| text += page.extract_text() + "\n" |
| return text |
| except Exception as e: |
| return f"Error reading PDF file: {e}" |
|
|
| def analyze_text(text): |
| |
| word_count = len(text.split()) |
| char_count = len(text) |
| sentences = text.split('.') |
| sentence_count = len([s.strip() for s in sentences if s.strip()]) |
| return f"Word Count: {word_count}\nCharacter Count: {char_count}\nSentence Count: {sentence_count}" |
|
|
| def text_to_speech(text, language='en'): |
| try: |
| tts = gTTS(text=text, lang=language, slow=False) |
| audio_file = 'temp_audio.mp3' |
| tts.save(audio_file) |
| return audio_file |
| except Exception as e: |
| st.error(f"Error generating speech: {e}") |
| return None |
|
|
| def play_audio(audio_file): |
| with open(audio_file, "rb") as f: |
| data = f.read() |
| b64 = base64.b64encode(data).decode() |
| md = f""" |
| <audio controls autoplay="true"> |
| <source src="data:audio/mp3;base64,{b64}" type="audio/mp3"> |
| </audio> |
| """ |
| st.markdown(md, unsafe_allow_html=True) |
|
|
| |
| st.set_page_config(page_title="AI Document Reader & Analyzer", page_icon=":book:") |
|
|
| st.subheader("๐ AI Document Reader & Analyzer") |
| st_lottie(lottie_astronaut, height=150) |
|
|
| uploaded_file = st.file_uploader("Upload a DOCX or PDF file", type=["docx", "pdf"]) |
|
|
| if uploaded_file is not None: |
| file_extension = uploaded_file.name.split(".")[-1].lower() |
| document_text = "" |
|
|
| with st.spinner(f"Reading and processing your {file_extension.upper()} file..."): |
| if file_extension == "docx": |
| document_text = read_docx(uploaded_file) |
| elif file_extension == "pdf": |
| document_text = read_pdf(uploaded_file) |
|
|
| if document_text: |
| st.subheader("Document Content:") |
| st.text_area("Text from the document", document_text, height=300) |
|
|
| st.subheader("Document Analysis:") |
| analysis = analyze_text(document_text) |
| st.write(analysis) |
|
|
| st.subheader("Virtual Voice Reader:") |
| language_choice = st.selectbox("Select language for voice:", ["en", "hi", "es", "fr", "de", "ja", "ko", "pt", "ru", "zh-cn"]) |
| if st.button("Read with Virtual Voice"): |
| with st.spinner("Generating and playing audio..."): |
| audio_file = text_to_speech(document_text, language=language_choice) |
| if audio_file: |
| play_audio(audio_file) |
| |
| os.remove(audio_file) |
| else: |
| st.error("Could not extract text from the uploaded file.") |
|
|
| st.markdown("---") |
| st.info("This AI Space can read DOCX and PDF files, analyze basic statistics, and read the content using a virtual voice. You can expand the analysis capabilities with more advanced Natural Language Processing (NLP) techniques.") |